diff --git a/server/endpoints/api/document/index.js b/server/endpoints/api/document/index.js index efac6a22d3e..56175142052 100644 --- a/server/endpoints/api/document/index.js +++ b/server/endpoints/api/document/index.js @@ -43,6 +43,10 @@ function apiDocumentEndpoints(app) { type: 'string', format: 'binary', description: 'The file to upload' + }, + addToWorkspaces: { + type: 'string', + description: 'comma-separated text-string of workspace slugs to embed the document into post-upload. eg: workspace1,workspace2', } }, required: ['file'] @@ -87,6 +91,7 @@ function apiDocumentEndpoints(app) { try { const Collector = new CollectorApi(); const { originalname } = request.file; + const { addToWorkspaces = "" } = reqBody(request); const processingOnline = await Collector.online(); if (!processingOnline) { @@ -117,6 +122,12 @@ function apiDocumentEndpoints(app) { await EventLogs.logEvent("api_document_uploaded", { documentName: originalname, }); + + if (!!addToWorkspaces) + await Document.api.uploadToWorkspace( + addToWorkspaces, + documents?.[0].location + ); response.status(200).json({ success: true, error: null, documents }); } catch (e) { console.error(e.message, e); @@ -152,6 +163,10 @@ function apiDocumentEndpoints(app) { type: 'string', format: 'binary', description: 'The file to upload' + }, + addToWorkspaces: { + type: 'string', + description: 'comma-separated text-string of workspace slugs to embed the document into post-upload. eg: workspace1,workspace2', } } } @@ -206,6 +221,7 @@ function apiDocumentEndpoints(app) { */ try { const { originalname } = request.file; + const { addToWorkspaces = "" } = reqBody(request); let folder = request.params?.folderName || "custom-documents"; folder = normalizePath(folder); const targetFolderPath = path.join(documentsPath, folder); @@ -276,6 +292,12 @@ function apiDocumentEndpoints(app) { documentName: originalname, folder, }); + + if (!!addToWorkspaces) + await Document.api.uploadToWorkspace( + addToWorkspaces, + documents?.[0].location + ); response.status(200).json({ success: true, error: null, documents }); } catch (e) { console.error(e.message, e); @@ -290,16 +312,17 @@ function apiDocumentEndpoints(app) { async (request, response) => { /* #swagger.tags = ['Documents'] - #swagger.description = 'Upload a valid URL for AnythingLLM to scrape and prepare for embedding.' + #swagger.description = 'Upload a valid URL for AnythingLLM to scrape and prepare for embedding. Optionally, specify a comma-separated list of workspace slugs to embed the document into post-upload.' #swagger.requestBody = { - description: 'Link of web address to be scraped.', + description: 'Link of web address to be scraped and optionally a comma-separated list of workspace slugs to embed the document into post-upload.', required: true, content: { "application/json": { schema: { type: 'object', example: { - "link": "https://anythingllm.com" + "link": "https://anythingllm.com", + "addToWorkspaces": "workspace1,workspace2" } } } @@ -342,7 +365,7 @@ function apiDocumentEndpoints(app) { */ try { const Collector = new CollectorApi(); - const { link } = reqBody(request); + const { link, addToWorkspaces = "" } = reqBody(request); const processingOnline = await Collector.online(); if (!processingOnline) { @@ -373,6 +396,12 @@ function apiDocumentEndpoints(app) { await EventLogs.logEvent("api_link_uploaded", { link, }); + + if (!!addToWorkspaces) + await Document.api.uploadToWorkspace( + addToWorkspaces, + documents?.[0].location + ); response.status(200).json({ success: true, error: null, documents }); } catch (e) { console.error(e.message, e); @@ -397,11 +426,12 @@ function apiDocumentEndpoints(app) { type: 'object', example: { "textContent": "This is the raw text that will be saved as a document in AnythingLLM.", + "addToWorkspaces": "workspace1,workspace2", "metadata": { "title": "This key is required. See in /server/endpoints/api/document/index.js:287", - keyOne: "valueOne", - keyTwo: "valueTwo", - etc: "etc" + "keyOne": "valueOne", + "keyTwo": "valueTwo", + "etc": "etc" } } } @@ -446,7 +476,11 @@ function apiDocumentEndpoints(app) { try { const Collector = new CollectorApi(); const requiredMetadata = ["title"]; - const { textContent, metadata = {} } = reqBody(request); + const { + textContent, + metadata = {}, + addToWorkspaces = "", + } = reqBody(request); const processingOnline = await Collector.online(); if (!processingOnline) { @@ -506,6 +540,12 @@ function apiDocumentEndpoints(app) { ); await Telemetry.sendTelemetry("raw_document_uploaded"); await EventLogs.logEvent("api_raw_document_uploaded"); + + if (!!addToWorkspaces) + await Document.api.uploadToWorkspace( + addToWorkspaces, + documents?.[0].location + ); response.status(200).json({ success: true, error: null, documents }); } catch (e) { console.error(e.message, e); diff --git a/server/models/documents.js b/server/models/documents.js index 81c2dd9a79e..e937eb2b16d 100644 --- a/server/models/documents.js +++ b/server/models/documents.js @@ -255,6 +255,57 @@ const Document = { return sourceString; }, + + /** + * Functions for the backend API endpoints - not to be used by the frontend or elsewhere. + * @namespace api + */ + api: { + /** + * Process a document upload from the API and upsert it into the database. This + * functionality should only be used by the backend /v1/documents/upload endpoints for post-upload embedding. + * @param {string} wsSlugs - The slugs of the workspaces to embed the document into, will be comma-separated list of workspace slugs + * @param {string} docLocation - The location/path of the document that was uploaded + * @returns {Promise} - True if the document was uploaded successfully, false otherwise + */ + uploadToWorkspace: async function (wsSlugs = "", docLocation = null) { + if (!docLocation) + return console.log( + "No document location provided for embedding", + docLocation + ); + + const slugs = wsSlugs + .split(",") + .map((slug) => String(slug)?.trim()?.toLowerCase()); + if (slugs.length === 0) + return console.log(`No workspaces provided got: ${wsSlugs}`); + + const { Workspace } = require("./workspace"); + const workspaces = await Workspace.where({ slug: { in: slugs } }); + if (workspaces.length === 0) + return console.log("No valid workspaces found for slugs: ", slugs); + + // Upsert the document into each workspace - do this sequentially + // because the document may be large and we don't want to overwhelm the embedder, plus on the first + // upsert we will then have the cache of the document - making n+1 embeds faster. If we parallelize this + // we will have to do a lot of extra work to ensure that the document is not embedded more than once. + for (const workspace of workspaces) { + const { failedToEmbed = [], errors = [] } = await Document.addDocuments( + workspace, + [docLocation] + ); + if (failedToEmbed.length > 0) + return console.log( + `Failed to embed document into workspace ${workspace.slug}`, + errors + ); + console.log(`Document embedded into workspace ${workspace.slug}...`); + } + + return true; + }, + }, }; module.exports = { Document }; diff --git a/server/swagger/openapi.json b/server/swagger/openapi.json index 94a1d71e39a..3ae58b0e8af 100644 --- a/server/swagger/openapi.json +++ b/server/swagger/openapi.json @@ -909,6 +909,10 @@ "type": "string", "format": "binary", "description": "The file to upload" + }, + "addToWorkspaces": { + "type": "string", + "description": "comma-separated text-string of workspace slugs to embed the document into post-upload. eg: workspace1,workspace2" } } } @@ -1010,6 +1014,10 @@ "type": "string", "format": "binary", "description": "The file to upload" + }, + "addToWorkspaces": { + "type": "string", + "description": "comma-separated text-string of workspace slugs to embed the document into post-upload. eg: workspace1,workspace2" } } } @@ -1023,7 +1031,7 @@ "tags": [ "Documents" ], - "description": "Upload a valid URL for AnythingLLM to scrape and prepare for embedding.", + "description": "Upload a valid URL for AnythingLLM to scrape and prepare for embedding. Optionally, specify a comma-separated list of workspace slugs to embed the document into post-upload.", "parameters": [], "responses": { "200": { @@ -1076,14 +1084,15 @@ } }, "requestBody": { - "description": "Link of web address to be scraped.", + "description": "Link of web address to be scraped and optionally a comma-separated list of workspace slugs to embed the document into post-upload.", "required": true, "content": { "application/json": { "schema": { "type": "object", "example": { - "link": "https://anythingllm.com" + "link": "https://anythingllm.com", + "addToWorkspaces": "workspace1,workspace2" } } } @@ -1160,6 +1169,7 @@ "type": "object", "example": { "textContent": "This is the raw text that will be saved as a document in AnythingLLM.", + "addToWorkspaces": "workspace1,workspace2", "metadata": { "title": "This key is required. See in /server/endpoints/api/document/index.js:287", "keyOne": "valueOne",