From 81b1e2bb3b82fcd9f47c309883ef5cd0d718ba05 Mon Sep 17 00:00:00 2001 From: thestackdev Date: Tue, 18 Feb 2025 16:51:30 +0530 Subject: [PATCH 1/5] feat: Add endpoint to retrieve documents by folder name --- server/endpoints/api/document/index.js | 54 ++++++++++++++++++++++++++ server/swagger/openapi.json | 24 ++++++++++++ server/utils/files/index.js | 42 ++++++++++++++++++++ 3 files changed, 120 insertions(+) diff --git a/server/endpoints/api/document/index.js b/server/endpoints/api/document/index.js index f49cf0dd436..014784fe464 100644 --- a/server/endpoints/api/document/index.js +++ b/server/endpoints/api/document/index.js @@ -4,6 +4,7 @@ const { handleAPIFileUpload } = require("../../../utils/files/multer"); const { viewLocalFiles, findDocumentInDocuments, + getDocumentsByFolder, normalizePath, isWithin, } = require("../../../utils/files"); @@ -395,6 +396,59 @@ function apiDocumentEndpoints(app) { } }); + app.get( + "/v1/documents/folder/:folderName", + [validApiKey], + async (request, response) => { + /* + #swagger.tags = ['Documents'] + #swagger.description = 'Get all documents stored in a specific folder.' + #swagger.parameters['folderName'] = { + in: 'path', + description: 'Name of the folder to retrieve documents from', + required: true, + type: 'string' + } + #swagger.responses[200] = { + content: { + "application/json": { + schema: { + type: 'object', + example: { + folder: "custom-documents", + documents: [ + { + name: "document1.json", + type: "file", + cached: false, + pinnedWorkspaces: [], + watched: false, + // ... other document metadata + }, + // more documents + ] + } + } + } + } + } + #swagger.responses[403] = { + schema: { + "$ref": "#/definitions/InvalidAPIKey" + } + } + */ + try { + const { folderName } = request.params; + const result = await getDocumentsByFolder(folderName); + response.status(200).json(result); + } catch (e) { + console.error(e.message, e); + response.sendStatus(500).end(); + } + } + ); + app.get( "/v1/document/accepted-file-types", [validApiKey], diff --git a/server/swagger/openapi.json b/server/swagger/openapi.json index 46a63809a5e..69474fb2073 100644 --- a/server/swagger/openapi.json +++ b/server/swagger/openapi.json @@ -1124,6 +1124,30 @@ } } }, + "/v1/documents/folder/{folderName}": { + "get": { + "tags": [ + "Documents" + ], + "description": "Get all documents stored in a specific folder.", + "parameters": [ + { + "name": "folderName", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "Name of the folder to retrieve documents from" + } + ], + "responses": { + "403": { + "description": "Forbidden" + } + } + } + }, "/v1/document/accepted-file-types": { "get": { "tags": [ diff --git a/server/utils/files/index.js b/server/utils/files/index.js index 625d8582cde..6dbafeac113 100644 --- a/server/utils/files/index.js +++ b/server/utils/files/index.js @@ -91,6 +91,47 @@ async function viewLocalFiles() { return directory; } +async function getDocumentsByFolder(folderName = "") { + if (!folderName) throw new Error("Folder name must be provided."); + const folderPath = path.resolve(documentsPath, normalizePath(folderName)); + if (!fs.existsSync(folderPath) || !fs.lstatSync(folderPath).isDirectory()) { + throw new Error(`Folder "${folderName}" does not exist.`); + } + + const documents = []; + const filenames = {}; + const files = fs.readdirSync(folderPath); + for (const file of files) { + if (path.extname(file) !== ".json") continue; + const filePath = path.join(folderPath, file); + const rawData = fs.readFileSync(filePath, "utf8"); + const cachefilename = `${folderName}/${file}`; + const { pageContent, ...metadata } = JSON.parse(rawData); + documents.push({ + name: file, + type: "file", + ...metadata, + cached: await cachedVectorInformation(cachefilename, true), + }); + filenames[cachefilename] = file; + } + + // Get pinned and watched information for each document in the folder + const pinnedWorkspacesByDocument = + await getPinnedWorkspacesByDocument(filenames); + const watchedDocumentsFilenames = + await getWatchedDocumentFilenames(filenames); + for (let doc of documents) { + doc.pinnedWorkspaces = pinnedWorkspacesByDocument[doc.name] || []; + doc.watched = Object.prototype.hasOwnProperty.call( + watchedDocumentsFilenames, + doc.name + ); + } + + return { folder: folderName, documents }; +} + /** * Searches the vector-cache folder for existing information so we dont have to re-embed a * document and can instead push directly to vector db. @@ -304,4 +345,5 @@ module.exports = { documentsPath, hasVectorCachedFiles, purgeEntireVectorCache, + getDocumentsByFolder, }; From 8f7426171acc813b7ad79246844e1daf0c144896 Mon Sep 17 00:00:00 2001 From: timothycarambat Date: Tue, 18 Feb 2025 14:00:32 -0800 Subject: [PATCH 2/5] isWithin Check on path to prevent path traversal --- server/utils/files/index.js | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/server/utils/files/index.js b/server/utils/files/index.js index 6dbafeac113..4b33fbc0c82 100644 --- a/server/utils/files/index.js +++ b/server/utils/files/index.js @@ -94,9 +94,12 @@ async function viewLocalFiles() { async function getDocumentsByFolder(folderName = "") { if (!folderName) throw new Error("Folder name must be provided."); const folderPath = path.resolve(documentsPath, normalizePath(folderName)); - if (!fs.existsSync(folderPath) || !fs.lstatSync(folderPath).isDirectory()) { + if ( + !isWithin(documentsPath, folderPath) || + !fs.existsSync(folderPath) || + !fs.lstatSync(folderPath).isDirectory() + ) throw new Error(`Folder "${folderName}" does not exist.`); - } const documents = []; const filenames = {}; From 989c20b26b0ab6926a8d33ca74d0abdcf1897aef Mon Sep 17 00:00:00 2001 From: thestackdev Date: Wed, 19 Feb 2025 14:56:48 +0530 Subject: [PATCH 3/5] feat: Add endpoint to upload documents to a specified folder --- server/endpoints/api/document/index.js | 144 +++++++++++++++++++++++++ server/swagger/openapi.json | 76 +++++++++++++ 2 files changed, 220 insertions(+) diff --git a/server/endpoints/api/document/index.js b/server/endpoints/api/document/index.js index 014784fe464..32d58393b17 100644 --- a/server/endpoints/api/document/index.js +++ b/server/endpoints/api/document/index.js @@ -122,6 +122,150 @@ function apiDocumentEndpoints(app) { } ); + app.post( + "/v1/document/upload-folder", + [validApiKey, handleAPIFileUpload], + async (request, response) => { + /* + #swagger.tags = ['Documents'] + #swagger.description = 'Upload a new file to AnythingLLM to be parsed and prepared for embedding.' + #swagger.requestBody = { + description: 'File to be uploaded.', + required: true, + content: { + "multipart/form-data": { + schema: { + type: 'string', + format: 'binary', + properties: { + file: { + type: 'string', + format: 'binary', + } + } + }, + } + } + } + #swagger.responses[200] = { + content: { + "application/json": { + schema: { + type: 'object', + example: { + success: true, + error: null, + documents: [ + { + "location": "custom-documents/anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json", + "name": "anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json", + "url": "file:///Users/tim/Documents/anything-llm/collector/hotdir/anythingllm.txt", + "title": "anythingllm.txt", + "docAuthor": "Unknown", + "description": "Unknown", + "docSource": "a text file uploaded by the user.", + "chunkSource": "anythingllm.txt", + "published": "1/16/2024, 3:07:00 PM", + "wordCount": 93, + "token_count_estimate": 115, + } + ] + } + } + } + } + } + #swagger.responses[403] = { + schema: { + "$ref": "#/definitions/InvalidAPIKey" + } + } + */ + try { + const { originalname } = request.file; + let folder = request.body.folder || "custom-documents"; + folder = normalizePath(folder); + const targetFolderPath = path.join(documentsPath, folder); + + if ( + !isWithin(path.resolve(documentsPath), path.resolve(targetFolderPath)) + ) { + throw new Error("Invalid folder name"); + } + + // Create the folder if it does not exist + if (!fs.existsSync(targetFolderPath)) { + fs.mkdirSync(targetFolderPath, { recursive: true }); + } + + const Collector = new CollectorApi(); + const processingOnline = await Collector.online(); + if (!processingOnline) { + response + .status(500) + .json({ + success: false, + error: `Document processing API is not online. Document ${originalname} will not be processed automatically.`, + }) + .end(); + return; + } + + // Process the uploaded document + const { success, reason, documents } = + await Collector.processDocument(originalname); + if (!success) { + response + .status(500) + .json({ success: false, error: reason, documents }) + .end(); + return; + } + + // For each processed document, check if it is already in the desired folder. + // If not, move it using similar logic as in the move-files endpoint. + for (const doc of documents) { + const currentFolder = path.dirname(doc.location); + if (currentFolder !== folder) { + const sourcePath = path.join( + documentsPath, + normalizePath(doc.location) + ); + const destinationPath = path.join( + targetFolderPath, + path.basename(doc.location) + ); + + if ( + !isWithin(documentsPath, sourcePath) || + !isWithin(documentsPath, destinationPath) + ) { + throw new Error("Invalid file location"); + } + + await fs.promises.rename(sourcePath, destinationPath); + + doc.location = path.join(folder, path.basename(doc.location)); + doc.name = path.basename(doc.location); + } + } + + Collector.log( + `Document ${originalname} uploaded, processed, and moved to folder ${folder} successfully.` + ); + await Telemetry.sendTelemetry("document_uploaded_to_folder"); + await EventLogs.logEvent("api_document_uploaded_to_folder", { + documentName: originalname, + folder, + }); + response.status(200).json({ success: true, error: null, documents }); + } catch (e) { + console.error(e.message, e); + response.sendStatus(500).end(); + } + } + ); + app.post( "/v1/document/upload-link", [validApiKey], diff --git a/server/swagger/openapi.json b/server/swagger/openapi.json index 3c52dc16444..6cecbb38e02 100644 --- a/server/swagger/openapi.json +++ b/server/swagger/openapi.json @@ -914,6 +914,82 @@ } } }, + "/v1/document/upload-folder": { + "post": { + "tags": [ + "Documents" + ], + "description": "Upload a new file to AnythingLLM to be parsed and prepared for embedding.", + "parameters": [], + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "type": "object", + "example": { + "success": true, + "error": null, + "documents": [ + { + "location": "custom-documents/anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json", + "name": "anythingllm.txt-6e8be64c-c162-4b43-9997-b068c0071e8b.json", + "url": "file://Users/tim/Documents/anything-llm/collector/hotdir/anythingllm.txt", + "title": "anythingllm.txt", + "docAuthor": "Unknown", + "description": "Unknown", + "docSource": "a text file uploaded by the user.", + "chunkSource": "anythingllm.txt", + "published": "1/16/2024, 3:07:00 PM", + "wordCount": 93, + "token_count_estimate": 115 + } + ] + } + } + } + } + }, + "403": { + "description": "Forbidden", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InvalidAPIKey" + } + }, + "application/xml": { + "schema": { + "$ref": "#/components/schemas/InvalidAPIKey" + } + } + } + }, + "500": { + "description": "Internal Server Error" + } + }, + "requestBody": { + "description": "File to be uploaded.", + "required": true, + "content": { + "multipart/form-data": { + "schema": { + "type": "string", + "format": "binary", + "properties": { + "file": { + "type": "string", + "format": "binary" + } + } + } + } + } + } + } + }, "/v1/document/upload-link": { "post": { "tags": [ From 7de5a58ae68513aa619abb1a128c57ee976c5168 Mon Sep 17 00:00:00 2001 From: shatfield4 Date: Wed, 19 Feb 2025 16:12:10 -0800 Subject: [PATCH 4/5] refactor upload to folder endpoint + update jsdoc for swagger --- server/endpoints/api/document/index.js | 49 ++++++++++++++++------- server/swagger/openapi.json | 54 +++++++++++++++++++------- 2 files changed, 76 insertions(+), 27 deletions(-) diff --git a/server/endpoints/api/document/index.js b/server/endpoints/api/document/index.js index 65309cfe59f..96954222960 100644 --- a/server/endpoints/api/document/index.js +++ b/server/endpoints/api/document/index.js @@ -35,12 +35,13 @@ function apiDocumentEndpoints(app) { content: { "multipart/form-data": { schema: { - type: 'string', - format: 'binary', + type: 'object', + required: ['file'], properties: { file: { type: 'string', format: 'binary', + description: 'The file to upload' } } } @@ -65,7 +66,7 @@ function apiDocumentEndpoints(app) { "description": "Unknown", "docSource": "a text file uploaded by the user.", "chunkSource": "anythingllm.txt", - "published": "1/16/2024, 3:07:00 PM", + "published": "1/16/2024, 3:07:00 PM", "wordCount": 93, "token_count_estimate": 115, } @@ -123,27 +124,35 @@ function apiDocumentEndpoints(app) { ); app.post( - "/v1/document/upload-folder", + "/v1/document/upload/:folderName", [validApiKey, handleAPIFileUpload], async (request, response) => { /* #swagger.tags = ['Documents'] - #swagger.description = 'Upload a new file to AnythingLLM to be parsed and prepared for embedding.' + #swagger.description = 'Upload a new file to a specific folder in AnythingLLM to be parsed and prepared for embedding. If the folder does not exist, it will be created.' + #swagger.parameters['folderName'] = { + in: 'path', + description: 'Target folder path (defaults to "custom-documents" if not provided)', + required: true, + type: 'string', + example: 'my-folder' + } #swagger.requestBody = { description: 'File to be uploaded.', required: true, content: { "multipart/form-data": { schema: { - type: 'string', - format: 'binary', + type: 'object', + required: ['file'], properties: { file: { type: 'string', format: 'binary', + description: 'The file to upload' } } - }, + } } } } @@ -165,9 +174,9 @@ function apiDocumentEndpoints(app) { "description": "Unknown", "docSource": "a text file uploaded by the user.", "chunkSource": "anythingllm.txt", - "published": "1/16/2024, 3:07:00 PM", + "published": "1/16/2024, 3:07:00 PM", "wordCount": 93, - "token_count_estimate": 115, + "token_count_estimate": 115 } ] } @@ -180,10 +189,24 @@ function apiDocumentEndpoints(app) { "$ref": "#/definitions/InvalidAPIKey" } } + #swagger.responses[500] = { + description: "Internal Server Error", + content: { + "application/json": { + schema: { + type: 'object', + example: { + success: false, + error: "Document processing API is not online. Document will not be processed automatically." + } + } + } + } + } */ try { const { originalname } = request.file; - let folder = request.body.folder || "custom-documents"; + let folder = request.params.folderName || "custom-documents"; folder = normalizePath(folder); const targetFolderPath = path.join(documentsPath, folder); @@ -304,7 +327,7 @@ function apiDocumentEndpoints(app) { "description": "No description found.", "docSource": "URL link uploaded by the user.", "chunkSource": "https:anythingllm.com.html", - "published": "1/16/2024, 3:46:33 PM", + "published": "1/16/2024, 3:46:33 PM", "wordCount": 252, "pageContent": "AnythingLLM is the best....", "token_count_estimate": 447, @@ -407,7 +430,7 @@ function apiDocumentEndpoints(app) { "description": "No description found.", "docSource": "My custom description set during upload", "chunkSource": "no chunk source specified", - "published": "1/16/2024, 3:46:33 PM", + "published": "1/16/2024, 3:46:33 PM", "wordCount": 252, "pageContent": "AnythingLLM is the best....", "token_count_estimate": 447, diff --git a/server/swagger/openapi.json b/server/swagger/openapi.json index dfda2afae9c..e7b3582d361 100644 --- a/server/swagger/openapi.json +++ b/server/swagger/openapi.json @@ -865,7 +865,7 @@ "description": "Unknown", "docSource": "a text file uploaded by the user.", "chunkSource": "anythingllm.txt", - "published": "1/16/2024, 3:07:00 PM", + "published": "1/16/2024, 3:07:00 PM", "wordCount": 93, "token_count_estimate": 115 } @@ -900,12 +900,15 @@ "content": { "multipart/form-data": { "schema": { - "type": "string", - "format": "binary", + "type": "object", + "required": [ + "file" + ], "properties": { "file": { "type": "string", - "format": "binary" + "format": "binary", + "description": "The file to upload" } } } @@ -914,13 +917,22 @@ } } }, - "/v1/document/upload-folder": { + "/v1/document/upload/{folderName}": { "post": { "tags": [ "Documents" ], - "description": "Upload a new file to AnythingLLM to be parsed and prepared for embedding.", - "parameters": [], + "description": "Upload a new file to a specific folder in AnythingLLM to be parsed and prepared for embedding. If the folder does not exist, it will be created.", + "parameters": [ + { + "name": "folderName", + "in": "path", + "required": true, + "schema": { + "type": "string" + } + } + ], "responses": { "200": { "description": "OK", @@ -941,7 +953,7 @@ "description": "Unknown", "docSource": "a text file uploaded by the user.", "chunkSource": "anythingllm.txt", - "published": "1/16/2024, 3:07:00 PM", + "published": "1/16/2024, 3:07:00 PM", "wordCount": 93, "token_count_estimate": 115 } @@ -967,7 +979,18 @@ } }, "500": { - "description": "Internal Server Error" + "description": "Internal Server Error", + "content": { + "application/json": { + "schema": { + "type": "object", + "example": { + "success": false, + "error": "Document processing API is not online. Document will not be processed automatically." + } + } + } + } } }, "requestBody": { @@ -976,12 +999,15 @@ "content": { "multipart/form-data": { "schema": { - "type": "string", - "format": "binary", + "type": "object", + "required": [ + "file" + ], "properties": { "file": { "type": "string", - "format": "binary" + "format": "binary", + "description": "The file to upload" } } } @@ -1016,7 +1042,7 @@ "description": "No description found.", "docSource": "URL link uploaded by the user.", "chunkSource": "https:anythingllm.com.html", - "published": "1/16/2024, 3:46:33 PM", + "published": "1/16/2024, 3:46:33 PM", "wordCount": 252, "pageContent": "AnythingLLM is the best....", "token_count_estimate": 447, @@ -1089,7 +1115,7 @@ "description": "No description found.", "docSource": "My custom description set during upload", "chunkSource": "no chunk source specified", - "published": "1/16/2024, 3:46:33 PM", + "published": "1/16/2024, 3:46:33 PM", "wordCount": 252, "pageContent": "AnythingLLM is the best....", "token_count_estimate": 447, From d19c7943fd4f0e9060ca896563ed09022b58a66d Mon Sep 17 00:00:00 2001 From: timothycarambat Date: Wed, 26 Feb 2025 12:25:14 -0800 Subject: [PATCH 5/5] linting --- server/endpoints/api/document/index.js | 21 ++++++++------------- server/swagger/openapi.json | 5 +---- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/server/endpoints/api/document/index.js b/server/endpoints/api/document/index.js index ed095a0b1b8..faa6b85e489 100644 --- a/server/endpoints/api/document/index.js +++ b/server/endpoints/api/document/index.js @@ -207,20 +207,16 @@ function apiDocumentEndpoints(app) { */ try { const { originalname } = request.file; - let folder = request.params.folderName || "custom-documents"; + let folder = request.params?.folderName || "custom-documents"; folder = normalizePath(folder); const targetFolderPath = path.join(documentsPath, folder); if ( !isWithin(path.resolve(documentsPath), path.resolve(targetFolderPath)) - ) { + ) throw new Error("Invalid folder name"); - } - - // Create the folder if it does not exist - if (!fs.existsSync(targetFolderPath)) { + if (!fs.existsSync(targetFolderPath)) fs.mkdirSync(targetFolderPath, { recursive: true }); - } const Collector = new CollectorApi(); const processingOnline = await Collector.online(); @@ -263,12 +259,10 @@ function apiDocumentEndpoints(app) { if ( !isWithin(documentsPath, sourcePath) || !isWithin(documentsPath, destinationPath) - ) { + ) throw new Error("Invalid file location"); - } - - await fs.promises.rename(sourcePath, destinationPath); + fs.renameSync(sourcePath, destinationPath); doc.location = path.join(folder, path.basename(doc.location)); doc.name = path.basename(doc.location); } @@ -277,8 +271,9 @@ function apiDocumentEndpoints(app) { Collector.log( `Document ${originalname} uploaded, processed, and moved to folder ${folder} successfully.` ); - await Telemetry.sendTelemetry("document_uploaded_to_folder"); - await EventLogs.logEvent("api_document_uploaded_to_folder", { + + await Telemetry.sendTelemetry("document_uploaded"); + await EventLogs.logEvent("api_document_uploaded", { documentName: originalname, folder, }); diff --git a/server/swagger/openapi.json b/server/swagger/openapi.json index 0db8ab41735..e7b3582d361 100644 --- a/server/swagger/openapi.json +++ b/server/swagger/openapi.json @@ -910,10 +910,7 @@ "format": "binary", "description": "The file to upload" } - }, - "required": [ - "file" - ] + } } } }