From 5ee9be2c2e8977be04f5bf6e73426eedce545098 Mon Sep 17 00:00:00 2001 From: timothycarambat Date: Mon, 19 May 2025 13:04:59 -0700 Subject: [PATCH] Filter malformed post-processed files from UI --- frontend/src/locales/zh/common.js | 3 ++- server/utils/files/index.js | 31 ++++++++++++++++++++++++++++--- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/frontend/src/locales/zh/common.js b/frontend/src/locales/zh/common.js index 39161ce8448..9e764937f35 100644 --- a/frontend/src/locales/zh/common.js +++ b/frontend/src/locales/zh/common.js @@ -808,7 +808,8 @@ const TRANSLATIONS = { name: "Obsidian", description: "一键导入 Obsidian 仓库。", vault_location: "仓库位置", - vault_description: "选择你的 Obsidian 仓库文件夹,以导入所有笔记及其关联。", + vault_description: + "选择你的 Obsidian 仓库文件夹,以导入所有笔记及其关联。", selected_files: "找到 {{count}} 个 Markdown 文件", importing: "正在导入保险库…", import_vault: "导入保险库", diff --git a/server/utils/files/index.js b/server/utils/files/index.js index ba6c8230d4c..3f186f356fe 100644 --- a/server/utils/files/index.js +++ b/server/utils/files/index.js @@ -61,9 +61,9 @@ async function viewLocalFiles() { ); filenames[cachefilename] = subfile; } - const results = await Promise.all(filePromises).then((results) => - results.filter((i) => !!i) - ); + const results = await Promise.all(filePromises) + .then((results) => results.filter((i) => !!i)) // Remove null results + .then((results) => results.filter((i) => hasRequiredMetadata(i))); // Remove invalid file structures subdocs.items.push(...results); // Grab the pinned workspaces and watched documents for this folder's documents @@ -433,6 +433,31 @@ async function fileToPickerData({ }; } +const REQUIRED_FILE_OBJECT_FIELDS = [ + "name", + "type", + "url", + "title", + "docAuthor", + "description", + "docSource", + "chunkSource", + "published", + "wordCount", + "token_count_estimate", +]; + +/** + * Checks if a given metadata object has all the required fields + * @param {{name: string, type: string, url: string, title: string, docAuthor: string, description: string, docSource: string, chunkSource: string, published: string, wordCount: number, token_count_estimate: number}} metadata - The metadata object to check (fileToPickerData) + * @returns {boolean} - Returns true if the metadata object has all the required fields, false otherwise + */ +function hasRequiredMetadata(metadata = {}) { + return REQUIRED_FILE_OBJECT_FIELDS.every((field) => + metadata.hasOwnProperty(field) + ); +} + module.exports = { findDocumentInDocuments, cachedVectorInformation,