θΏ™ζ˜―indexlocζδΎ›ηš„ζœεŠ‘οΌŒδΈθ¦θΎ“ε…₯任何密码
Skip to content

PR#2355 Continued + expanded scope #2365

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions server/models/documentSyncQueue.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ const DocumentSyncQueue = {
return new Date(Number(new Date()) + queueRecord.staleAfterMs);
},

/**
* Check if the document can be watched based on the metadata fields
* @param {object} metadata - metadata to check
* @param {string} metadata.title - title of the document
* @param {string} metadata.chunkSource - chunk source of the document
* @returns {boolean} - true if the document can be watched, false otherwise
*/
canWatch: function ({ title, chunkSource = null } = {}) {
if (chunkSource.startsWith("link://") && title.endsWith(".html"))
return true; // If is web-link material (prior to feature most chunkSources were links://)
Expand Down
19 changes: 3 additions & 16 deletions server/models/documents.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,33 +57,20 @@ const Document = {
}
},

getOnlyWorkspaceIds: async function (clause = {}) {
try {
const workspaceIds = await prisma.workspace_documents.findMany({
where: clause,
select: {
workspaceId: true,
},
});
return workspaceIds.map((record) => record.workspaceId) || [];
} catch (error) {
console.error(error.message);
return [];
}
},

where: async function (
clause = {},
limit = null,
orderBy = null,
include = null
include = null,
select = null
) {
try {
const results = await prisma.workspace_documents.findMany({
where: clause,
...(limit !== null ? { take: limit } : {}),
...(orderBy !== null ? { orderBy } : {}),
...(include !== null ? { include } : {}),
...(select !== null ? { select: { ...select } } : {}),
});
return results;
} catch (error) {
Expand Down
95 changes: 79 additions & 16 deletions server/utils/files/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,37 +44,40 @@ async function viewLocalFiles() {
items: [],
};
const subfiles = fs.readdirSync(folderPath);
const filenames = {};

for (const subfile of subfiles) {
if (path.extname(subfile) !== ".json") continue;
const filePath = path.join(folderPath, subfile);
const rawData = fs.readFileSync(filePath, "utf8");
const cachefilename = `${file}/${subfile}`;
const { pageContent, ...metadata } = JSON.parse(rawData);
const pinnedInWorkspaces = await Document.getOnlyWorkspaceIds({
docpath: cachefilename,
pinned: true,
});
const watchedInWorkspaces = liveSyncAvailable
? await Document.getOnlyWorkspaceIds({
docpath: cachefilename,
watched: true,
})
: [];

subdocs.items.push({
name: subfile,
type: "file",
...metadata,
cached: await cachedVectorInformation(cachefilename, true),
pinnedWorkspaces: pinnedInWorkspaces,
canWatch: liveSyncAvailable
? DocumentSyncQueue.canWatch(metadata)
: false,
// Is file watched in any workspace since sync updates all workspaces where file is referenced
watched: watchedInWorkspaces.length !== 0,
// pinnedWorkspaces: [], // This is the list of workspaceIds that have pinned this document
// watched: false, // boolean to indicate if this document is watched in ANY workspace
});
filenames[cachefilename] = subfile;
}

// Grab the pinned workspaces and watched documents for this folder's documents
// at the time of the query so we don't have to re-query the database for each file
const pinnedWorkspacesByDocument =
await getPinnedWorkspacesByDocument(filenames);
const watchedDocumentsFilenames =
await getWatchedDocumentFilenames(filenames);
for (const item of subdocs.items) {
item.pinnedWorkspaces = pinnedWorkspacesByDocument[item.name] || [];
item.watched =
watchedDocumentsFilenames.hasOwnProperty(item.name) || false;
}

directory.items.push(subdocs);
}
}
Expand All @@ -88,8 +91,13 @@ async function viewLocalFiles() {
return directory;
}

// Searches the vector-cache folder for existing information so we dont have to re-embed a
// document and can instead push directly to vector db.
/**
* Searches the vector-cache folder for existing information so we dont have to re-embed a
* document and can instead push directly to vector db.
* @param {string} filename - the filename to check for cached vector information
* @param {boolean} checkOnly - if true, only check if the file exists, do not return the cached data
* @returns {Promise<{exists: boolean, chunks: any[]}>} - a promise that resolves to an object containing the existence of the file and its cached chunks
*/
async function cachedVectorInformation(filename = null, checkOnly = false) {
if (!filename) return checkOnly ? false : { exists: false, chunks: [] };

Expand Down Expand Up @@ -218,6 +226,61 @@ function hasVectorCachedFiles() {
return false;
}

/**
* @param {string[]} filenames - array of filenames to check for pinned workspaces
* @returns {Promise<Record<string, string[]>>} - a record of filenames and their corresponding workspaceIds
*/
async function getPinnedWorkspacesByDocument(filenames = []) {
return (
await Document.where(
{
docpath: {
in: Object.keys(filenames),
},
pinned: true,
},
null,
null,
null,
{
workspaceId: true,
docpath: true,
}
)
).reduce((result, { workspaceId, docpath }) => {
const filename = filenames[docpath];
if (!result[filename]) result[filename] = [];
if (!result[filename].includes(workspaceId))
result[filename].push(workspaceId);
return result;
}, {});
}

/**
* Get a record of filenames and their corresponding workspaceIds that have watched a document
* that will be used to determine if a document should be displayed in the watched documents sidebar
* @param {string[]} filenames - array of filenames to check for watched workspaces
* @returns {Promise<Record<string, string[]>>} - a record of filenames and their corresponding workspaceIds
*/
async function getWatchedDocumentFilenames(filenames = []) {
return (
await Document.where(
{
docpath: { in: Object.keys(filenames) },
watched: true,
},
null,
null,
null,
{ workspaceId: true, docpath: true }
)
).reduce((result, { workspaceId, docpath }) => {
const filename = filenames[docpath];
result[filename] = workspaceId;
return result;
}, {});
}

module.exports = {
findDocumentInDocuments,
cachedVectorInformation,
Expand Down