From 4c373f30705deb005d88f4f1decc07c8b95a8278 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82a=C5=BCej=20Owczarczyk?= Date: Mon, 23 Sep 2024 22:04:26 +0200 Subject: [PATCH 1/3] #2317 Fetch pinned documents once per folder to reduce the number of queries. --- server/models/documents.js | 4 +++- server/utils/files/index.js | 42 ++++++++++++++++++++++++++++++++----- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/server/models/documents.js b/server/models/documents.js index 43ec5f9f453..7d768578c70 100644 --- a/server/models/documents.js +++ b/server/models/documents.js @@ -76,7 +76,8 @@ const Document = { clause = {}, limit = null, orderBy = null, - include = null + include = null, + select = null ) { try { const results = await prisma.workspace_documents.findMany({ @@ -84,6 +85,7 @@ const Document = { ...(limit !== null ? { take: limit } : {}), ...(orderBy !== null ? { orderBy } : {}), ...(include !== null ? { include } : {}), + ...(select !== null ? { select: { ...select } } : {}), }); return results; } catch (error) { diff --git a/server/utils/files/index.js b/server/utils/files/index.js index 58bdf807a4e..f68375f3b8b 100644 --- a/server/utils/files/index.js +++ b/server/utils/files/index.js @@ -44,17 +44,17 @@ async function viewLocalFiles() { items: [], }; const subfiles = fs.readdirSync(folderPath); + const filenames = {}; for (const subfile of subfiles) { if (path.extname(subfile) !== ".json") continue; const filePath = path.join(folderPath, subfile); const rawData = fs.readFileSync(filePath, "utf8"); const cachefilename = `${file}/${subfile}`; + filenames[cachefilename] = subfile; + const { pageContent, ...metadata } = JSON.parse(rawData); - const pinnedInWorkspaces = await Document.getOnlyWorkspaceIds({ - docpath: cachefilename, - pinned: true, - }); + const watchedInWorkspaces = liveSyncAvailable ? await Document.getOnlyWorkspaceIds({ docpath: cachefilename, @@ -67,7 +67,6 @@ async function viewLocalFiles() { type: "file", ...metadata, cached: await cachedVectorInformation(cachefilename, true), - pinnedWorkspaces: pinnedInWorkspaces, canWatch: liveSyncAvailable ? DocumentSyncQueue.canWatch(metadata) : false, @@ -75,6 +74,39 @@ async function viewLocalFiles() { watched: watchedInWorkspaces.length !== 0, }); } + + // Get documents pinned to at least one workspace. + const pinnedWorkspacesByDocument = ( + await Document.where( + { + docpath: { + in: Object.keys(filenames), + }, + pinned: true, + }, + null, + null, + null, + { + workspaceId: true, + docpath: true, + } + ) + ).reduce((result, { workspaceId, docpath }) => { + const filename = filenames[docpath]; + if (!result[filename]) { + result[filename] = []; + } + if (!result[filename].includes(workspaceId)) { + result[filename].push(workspaceId); + } + return result; + }, {}); + + for (const item of subdocs.items) { + item.pinnedWorkspaces = pinnedWorkspacesByDocument[item.name] || []; + } + directory.items.push(subdocs); } } From 76f7e800793839e04fe9f74701189545d8f37b09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C5=82a=C5=BCej=20Owczarczyk?= Date: Mon, 23 Sep 2024 22:47:15 +0200 Subject: [PATCH 2/3] Reorder the lines to keeps const declarations together. --- server/utils/files/index.js | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/server/utils/files/index.js b/server/utils/files/index.js index f68375f3b8b..4ec16a76a11 100644 --- a/server/utils/files/index.js +++ b/server/utils/files/index.js @@ -51,10 +51,7 @@ async function viewLocalFiles() { const filePath = path.join(folderPath, subfile); const rawData = fs.readFileSync(filePath, "utf8"); const cachefilename = `${file}/${subfile}`; - filenames[cachefilename] = subfile; - const { pageContent, ...metadata } = JSON.parse(rawData); - const watchedInWorkspaces = liveSyncAvailable ? await Document.getOnlyWorkspaceIds({ docpath: cachefilename, @@ -73,6 +70,7 @@ async function viewLocalFiles() { // Is file watched in any workspace since sync updates all workspaces where file is referenced watched: watchedInWorkspaces.length !== 0, }); + filenames[cachefilename] = subfile; } // Get documents pinned to at least one workspace. From 1ea9d33d365f4adb447663cf4db91ddaae88ddf0 Mon Sep 17 00:00:00 2001 From: timothycarambat Date: Tue, 24 Sep 2024 15:51:46 -0700 Subject: [PATCH 3/3] Add some comments to functions move pinned document fetch for folder to function move watched documents per-folder to also function the same remove unused function in documents model --- server/models/documentSyncQueue.js | 7 ++ server/models/documents.js | 15 ---- server/utils/files/index.js | 111 +++++++++++++++++++---------- 3 files changed, 79 insertions(+), 54 deletions(-) diff --git a/server/models/documentSyncQueue.js b/server/models/documentSyncQueue.js index b034643ca94..0ebaa05294c 100644 --- a/server/models/documentSyncQueue.js +++ b/server/models/documentSyncQueue.js @@ -38,6 +38,13 @@ const DocumentSyncQueue = { return new Date(Number(new Date()) + queueRecord.staleAfterMs); }, + /** + * Check if the document can be watched based on the metadata fields + * @param {object} metadata - metadata to check + * @param {string} metadata.title - title of the document + * @param {string} metadata.chunkSource - chunk source of the document + * @returns {boolean} - true if the document can be watched, false otherwise + */ canWatch: function ({ title, chunkSource = null } = {}) { if (chunkSource.startsWith("link://") && title.endsWith(".html")) return true; // If is web-link material (prior to feature most chunkSources were links://) diff --git a/server/models/documents.js b/server/models/documents.js index 7d768578c70..81c2dd9a79e 100644 --- a/server/models/documents.js +++ b/server/models/documents.js @@ -57,21 +57,6 @@ const Document = { } }, - getOnlyWorkspaceIds: async function (clause = {}) { - try { - const workspaceIds = await prisma.workspace_documents.findMany({ - where: clause, - select: { - workspaceId: true, - }, - }); - return workspaceIds.map((record) => record.workspaceId) || []; - } catch (error) { - console.error(error.message); - return []; - } - }, - where: async function ( clause = {}, limit = null, diff --git a/server/utils/files/index.js b/server/utils/files/index.js index 4ec16a76a11..598884f999d 100644 --- a/server/utils/files/index.js +++ b/server/utils/files/index.js @@ -52,13 +52,6 @@ async function viewLocalFiles() { const rawData = fs.readFileSync(filePath, "utf8"); const cachefilename = `${file}/${subfile}`; const { pageContent, ...metadata } = JSON.parse(rawData); - const watchedInWorkspaces = liveSyncAvailable - ? await Document.getOnlyWorkspaceIds({ - docpath: cachefilename, - watched: true, - }) - : []; - subdocs.items.push({ name: subfile, type: "file", @@ -67,42 +60,22 @@ async function viewLocalFiles() { canWatch: liveSyncAvailable ? DocumentSyncQueue.canWatch(metadata) : false, - // Is file watched in any workspace since sync updates all workspaces where file is referenced - watched: watchedInWorkspaces.length !== 0, + // pinnedWorkspaces: [], // This is the list of workspaceIds that have pinned this document + // watched: false, // boolean to indicate if this document is watched in ANY workspace }); filenames[cachefilename] = subfile; } - // Get documents pinned to at least one workspace. - const pinnedWorkspacesByDocument = ( - await Document.where( - { - docpath: { - in: Object.keys(filenames), - }, - pinned: true, - }, - null, - null, - null, - { - workspaceId: true, - docpath: true, - } - ) - ).reduce((result, { workspaceId, docpath }) => { - const filename = filenames[docpath]; - if (!result[filename]) { - result[filename] = []; - } - if (!result[filename].includes(workspaceId)) { - result[filename].push(workspaceId); - } - return result; - }, {}); - + // Grab the pinned workspaces and watched documents for this folder's documents + // at the time of the query so we don't have to re-query the database for each file + const pinnedWorkspacesByDocument = + await getPinnedWorkspacesByDocument(filenames); + const watchedDocumentsFilenames = + await getWatchedDocumentFilenames(filenames); for (const item of subdocs.items) { item.pinnedWorkspaces = pinnedWorkspacesByDocument[item.name] || []; + item.watched = + watchedDocumentsFilenames.hasOwnProperty(item.name) || false; } directory.items.push(subdocs); @@ -118,8 +91,13 @@ async function viewLocalFiles() { return directory; } -// Searches the vector-cache folder for existing information so we dont have to re-embed a -// document and can instead push directly to vector db. +/** + * Searches the vector-cache folder for existing information so we dont have to re-embed a + * document and can instead push directly to vector db. + * @param {string} filename - the filename to check for cached vector information + * @param {boolean} checkOnly - if true, only check if the file exists, do not return the cached data + * @returns {Promise<{exists: boolean, chunks: any[]}>} - a promise that resolves to an object containing the existence of the file and its cached chunks + */ async function cachedVectorInformation(filename = null, checkOnly = false) { if (!filename) return checkOnly ? false : { exists: false, chunks: [] }; @@ -248,6 +226,61 @@ function hasVectorCachedFiles() { return false; } +/** + * @param {string[]} filenames - array of filenames to check for pinned workspaces + * @returns {Promise>} - a record of filenames and their corresponding workspaceIds + */ +async function getPinnedWorkspacesByDocument(filenames = []) { + return ( + await Document.where( + { + docpath: { + in: Object.keys(filenames), + }, + pinned: true, + }, + null, + null, + null, + { + workspaceId: true, + docpath: true, + } + ) + ).reduce((result, { workspaceId, docpath }) => { + const filename = filenames[docpath]; + if (!result[filename]) result[filename] = []; + if (!result[filename].includes(workspaceId)) + result[filename].push(workspaceId); + return result; + }, {}); +} + +/** + * Get a record of filenames and their corresponding workspaceIds that have watched a document + * that will be used to determine if a document should be displayed in the watched documents sidebar + * @param {string[]} filenames - array of filenames to check for watched workspaces + * @returns {Promise>} - a record of filenames and their corresponding workspaceIds + */ +async function getWatchedDocumentFilenames(filenames = []) { + return ( + await Document.where( + { + docpath: { in: Object.keys(filenames) }, + watched: true, + }, + null, + null, + null, + { workspaceId: true, docpath: true } + ) + ).reduce((result, { workspaceId, docpath }) => { + const filename = filenames[docpath]; + result[filename] = workspaceId; + return result; + }, {}); +} + module.exports = { findDocumentInDocuments, cachedVectorInformation,