Mintplex-Labs · timothycarambat · Apr 21, 2025 · Apr 21, 2025 · Apr 21, 2025
diff --git a/server/endpoints/api/document/index.js b/server/endpoints/api/document/index.js
@@ -43,6 +43,10 @@ function apiDocumentEndpoints(app) {
                 type: 'string',
                 format: 'binary',
                 description: 'The file to upload'
+              },
+              addToWorkspaces: {
+                type: 'string',
+                description: 'comma-separated text-string of workspace slugs to embed the document into post-upload. eg: workspace1,workspace2',
               }
             },
             required: ['file']
@@ -87,6 +91,7 @@ function apiDocumentEndpoints(app) {
       try {
         const Collector = new CollectorApi();
         const { originalname } = request.file;
+        const { addToWorkspaces = "" } = reqBody(request);
         const processingOnline = await Collector.online();
 
         if (!processingOnline) {
@@ -117,6 +122,12 @@ function apiDocumentEndpoints(app) {
         await EventLogs.logEvent("api_document_uploaded", {
           documentName: originalname,
         });
+
+        if (!!addToWorkspaces)
+          await Document.api.uploadToWorkspace(
+            addToWorkspaces,
+            documents?.[0].location
+          );
         response.status(200).json({ success: true, error: null, documents });
       } catch (e) {
         console.error(e.message, e);
@@ -152,6 +163,10 @@ function apiDocumentEndpoints(app) {
                   type: 'string',
                   format: 'binary',
                   description: 'The file to upload'
+                },
+                addToWorkspaces: {
+                  type: 'string',
+                  description: 'comma-separated text-string of workspace slugs to embed the document into post-upload. eg: workspace1,workspace2',
                 }
               }
             }
@@ -206,6 +221,7 @@ function apiDocumentEndpoints(app) {
       */
       try {
         const { originalname } = request.file;
+        const { addToWorkspaces = "" } = reqBody(request);
         let folder = request.params?.folderName || "custom-documents";
         folder = normalizePath(folder);
         const targetFolderPath = path.join(documentsPath, folder);
@@ -276,6 +292,12 @@ function apiDocumentEndpoints(app) {
           documentName: originalname,
           folder,
         });
+
+        if (!!addToWorkspaces)
+          await Document.api.uploadToWorkspace(
+            addToWorkspaces,
+            documents?.[0].location
+          );
         response.status(200).json({ success: true, error: null, documents });
       } catch (e) {
         console.error(e.message, e);
@@ -290,16 +312,17 @@ function apiDocumentEndpoints(app) {
     async (request, response) => {
       /*
     #swagger.tags = ['Documents']
-    #swagger.description = 'Upload a valid URL for AnythingLLM to scrape and prepare for embedding.'
+    #swagger.description = 'Upload a valid URL for AnythingLLM to scrape and prepare for embedding. Optionally, specify a comma-separated list of workspace slugs to embed the document into post-upload.'
     #swagger.requestBody = {
-      description: 'Link of web address to be scraped.',
+      description: 'Link of web address to be scraped and optionally a comma-separated list of workspace slugs to embed the document into post-upload.',
       required: true,
       content: {
           "application/json": {
             schema: {
               type: 'object',
               example: {
-                "link": "https://anythingllm.com"
+                "link": "https://anythingllm.com",
+                "addToWorkspaces": "workspace1,workspace2"
               }
             }
           }
@@ -342,7 +365,7 @@ function apiDocumentEndpoints(app) {
     */
       try {
         const Collector = new CollectorApi();
-        const { link } = reqBody(request);
+        const { link, addToWorkspaces = "" } = reqBody(request);
         const processingOnline = await Collector.online();
 
         if (!processingOnline) {
@@ -373,6 +396,12 @@ function apiDocumentEndpoints(app) {
         await EventLogs.logEvent("api_link_uploaded", {
           link,
         });
+
+        if (!!addToWorkspaces)
+          await Document.api.uploadToWorkspace(
+            addToWorkspaces,
+            documents?.[0].location
+          );
         response.status(200).json({ success: true, error: null, documents });
       } catch (e) {
         console.error(e.message, e);
@@ -397,11 +426,12 @@ function apiDocumentEndpoints(app) {
             type: 'object',
             example: {
               "textContent": "This is the raw text that will be saved as a document in AnythingLLM.",
+              "addToWorkspaces": "workspace1,workspace2",
               "metadata": {
                 "title": "This key is required. See in /server/endpoints/api/document/index.js:287",
-                keyOne: "valueOne",
-                keyTwo: "valueTwo",
-                etc: "etc"
+                "keyOne": "valueOne",
+                "keyTwo": "valueTwo",
+                "etc": "etc"
               }
             }
           }
@@ -446,7 +476,11 @@ function apiDocumentEndpoints(app) {
       try {
         const Collector = new CollectorApi();
         const requiredMetadata = ["title"];
-        const { textContent, metadata = {} } = reqBody(request);
+        const {
+          textContent,
+          metadata = {},
+          addToWorkspaces = "",
+        } = reqBody(request);
         const processingOnline = await Collector.online();
 
         if (!processingOnline) {
@@ -506,6 +540,12 @@ function apiDocumentEndpoints(app) {
         );
         await Telemetry.sendTelemetry("raw_document_uploaded");
         await EventLogs.logEvent("api_raw_document_uploaded");
+
+        if (!!addToWorkspaces)
+          await Document.api.uploadToWorkspace(
+            addToWorkspaces,
+            documents?.[0].location
+          );
         response.status(200).json({ success: true, error: null, documents });
       } catch (e) {
         console.error(e.message, e);

diff --git a/server/models/documents.js b/server/models/documents.js
@@ -255,6 +255,57 @@ const Document = {
 
     return sourceString;
   },
+
+  /**
+   * Functions for the backend API endpoints - not to be used by the frontend or elsewhere.
+   * @namespace api
+   */
+  api: {
+    /**
+     * Process a document upload from the API and upsert it into the database. This
+     * functionality should only be used by the backend /v1/documents/upload endpoints for post-upload embedding.
+     * @param {string} wsSlugs - The slugs of the workspaces to embed the document into, will be comma-separated list of workspace slugs
+     * @param {string} docLocation - The location/path of the document that was uploaded
+     * @returns {Promise<boolean>} - True if the document was uploaded successfully, false otherwise
+     */
+    uploadToWorkspace: async function (wsSlugs = "", docLocation = null) {
+      if (!docLocation)
+        return console.log(
+          "No document location provided for embedding",
+          docLocation
+        );
+
+      const slugs = wsSlugs
+        .split(",")
+        .map((slug) => String(slug)?.trim()?.toLowerCase());
+      if (slugs.length === 0)
+        return console.log(`No workspaces provided got: ${wsSlugs}`);
+
+      const { Workspace } = require("./workspace");
+      const workspaces = await Workspace.where({ slug: { in: slugs } });
+      if (workspaces.length === 0)
+        return console.log("No valid workspaces found for slugs: ", slugs);
+
+      // Upsert the document into each workspace - do this sequentially
+      // because the document may be large and we don't want to overwhelm the embedder, plus on the first
+      // upsert we will then have the cache of the document - making n+1 embeds faster. If we parallelize this
+      // we will have to do a lot of extra work to ensure that the document is not embedded more than once.
+      for (const workspace of workspaces) {
+        const { failedToEmbed = [], errors = [] } = await Document.addDocuments(
+          workspace,
+          [docLocation]
+        );
+        if (failedToEmbed.length > 0)
+          return console.log(
+            `Failed to embed document into workspace ${workspace.slug}`,
+            errors
+          );
+        console.log(`Document embedded into workspace ${workspace.slug}...`);
+      }
+
+      return true;
+    },
+  },
 };
 
 module.exports = { Document };
diff --git a/server/swagger/openapi.json b/server/swagger/openapi.json
@@ -909,6 +909,10 @@
                     "type": "string",
                     "format": "binary",
                     "description": "The file to upload"
+                  },
+                  "addToWorkspaces": {
+                    "type": "string",
+                    "description": "comma-separated text-string of workspace slugs to embed the document into post-upload. eg: workspace1,workspace2"
                   }
                 }
               }
@@ -1010,6 +1014,10 @@
                     "type": "string",
                     "format": "binary",
                     "description": "The file to upload"
+                  },
+                  "addToWorkspaces": {
+                    "type": "string",
+                    "description": "comma-separated text-string of workspace slugs to embed the document into post-upload. eg: workspace1,workspace2"
                   }
                 }
               }
@@ -1023,7 +1031,7 @@
         "tags": [
           "Documents"
         ],
-        "description": "Upload a valid URL for AnythingLLM to scrape and prepare for embedding.",
+        "description": "Upload a valid URL for AnythingLLM to scrape and prepare for embedding. Optionally, specify a comma-separated list of workspace slugs to embed the document into post-upload.",
         "parameters": [],
         "responses": {
           "200": {
@@ -1076,14 +1084,15 @@
           }
         },
         "requestBody": {
-          "description": "Link of web address to be scraped.",
+          "description": "Link of web address to be scraped and optionally a comma-separated list of workspace slugs to embed the document into post-upload.",
           "required": true,
           "content": {
             "application/json": {
               "schema": {
                 "type": "object",
                 "example": {
-                  "link": "https://anythingllm.com"
+                  "link": "https://anythingllm.com",
+                  "addToWorkspaces": "workspace1,workspace2"
                 }
               }
             }
@@ -1160,6 +1169,7 @@
                 "type": "object",
                 "example": {
                   "textContent": "This is the raw text that will be saved as a document in AnythingLLM.",
+                  "addToWorkspaces": "workspace1,workspace2",
                   "metadata": {
                     "title": "This key is required. See in /server/endpoints/api/document/index.js:287",
                     "keyOne": "valueOne",