From 2e6a4313dd4750a62748ed217df27d470b74fccc Mon Sep 17 00:00:00 2001 From: shatfield4 Date: Thu, 25 Sep 2025 17:48:24 -0700 Subject: [PATCH 1/7] wip support reranker for all vector dbs --- .../VectorDatabase/VectorSearchMode/index.jsx | 6 +- server/utils/vectorDbProviders/astra/index.js | 57 +++++++++++-- .../utils/vectorDbProviders/chroma/index.js | 79 +++++++++++++++--- server/utils/vectorDbProviders/lance/index.js | 80 +++++-------------- .../utils/vectorDbProviders/milvus/index.js | 53 ++++++++++-- .../utils/vectorDbProviders/pinecone/index.js | 53 ++++++++++-- .../utils/vectorDbProviders/qdrant/index.js | 57 +++++++++++-- server/utils/vectorDbProviders/rerank.js | 65 +++++++++++++++ .../utils/vectorDbProviders/weaviate/index.js | 53 ++++++++++-- .../utils/vectorDbProviders/zilliz/index.js | 53 ++++++++++-- 10 files changed, 432 insertions(+), 124 deletions(-) create mode 100644 server/utils/vectorDbProviders/rerank.js diff --git a/frontend/src/pages/WorkspaceSettings/VectorDatabase/VectorSearchMode/index.jsx b/frontend/src/pages/WorkspaceSettings/VectorDatabase/VectorSearchMode/index.jsx index 5e5816cda8d..f257156af96 100644 --- a/frontend/src/pages/WorkspaceSettings/VectorDatabase/VectorSearchMode/index.jsx +++ b/frontend/src/pages/WorkspaceSettings/VectorDatabase/VectorSearchMode/index.jsx @@ -1,8 +1,5 @@ import { useState } from "react"; -// We dont support all vectorDBs yet for reranking due to complexities of how each provider -// returns information. We need to normalize the response data so Reranker can be used for each provider. -const supportedVectorDBs = ["lancedb"]; const hint = { default: { title: "Default", @@ -20,8 +17,7 @@ export default function VectorSearchMode({ workspace, setHasChanges }) { const [selection, setSelection] = useState( workspace?.vectorSearchMode ?? "default" ); - if (!workspace?.vectorDB || !supportedVectorDBs.includes(workspace?.vectorDB)) - return null; + if (!workspace?.vectorDB) return null; return (
diff --git a/server/utils/vectorDbProviders/astra/index.js b/server/utils/vectorDbProviders/astra/index.js index b34a8d83afa..8e0331df3be 100644 --- a/server/utils/vectorDbProviders/astra/index.js +++ b/server/utils/vectorDbProviders/astra/index.js @@ -5,6 +5,7 @@ const { storeVectorResult, cachedVectorInformation } = require("../../files"); const { v4: uuidv4 } = require("uuid"); const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { sourceIdentifier } = require("../../chats"); +const { rerankDocuments, getSearchLimit } = require("../rerank"); const sanitizeNamespace = (namespace) => { // If namespace already starts with ns_, don't add it again @@ -301,6 +302,7 @@ const AstraDB = { similarityThreshold = 0.25, topN = 4, filterIdentifiers = [], + rerank = false, }) { if (!namespace || !input || !LLMConnector) throw new Error("Invalid request to performSimilaritySearch."); @@ -319,14 +321,24 @@ const AstraDB = { } const queryVector = await LLMConnector.embedTextInput(input); - const { contextTexts, sourceDocuments } = await this.similarityResponse({ - client, - namespace: sanitizedNamespace, - queryVector, - similarityThreshold, - topN, - filterIdentifiers, - }); + const { contextTexts, sourceDocuments } = rerank + ? await this.rerankedSimilarityResponse({ + client, + namespace: sanitizedNamespace, + query: input, + queryVector, + similarityThreshold, + topN, + filterIdentifiers, + }) + : await this.similarityResponse({ + client, + namespace: sanitizedNamespace, + queryVector, + similarityThreshold, + topN, + filterIdentifiers, + }); const sources = sourceDocuments.map((metadata, i) => { return { ...metadata, text: contextTexts[i] }; @@ -378,6 +390,35 @@ const AstraDB = { }); return result; }, + rerankedSimilarityResponse: async function ({ + client, + namespace, + query, + queryVector, + topN = 4, + similarityThreshold = 0.25, + filterIdentifiers = [], + }) { + const totalEmbeddings = await this.namespaceCount(namespace); + const searchLimit = getSearchLimit(totalEmbeddings, topN); + const { sourceDocuments } = await this.similarityResponse({ + client, + namespace, + queryVector, + similarityThreshold, + topN: searchLimit, + filterIdentifiers, + }); + return await rerankDocuments( + query, + sourceDocuments.map((doc) => ({ ...doc.metadata, score: null })), + { + topN, + similarityThreshold, + filterIdentifiers, + } + ); + }, allNamespaces: async function (client) { try { let header = new Headers(); diff --git a/server/utils/vectorDbProviders/chroma/index.js b/server/utils/vectorDbProviders/chroma/index.js index bc12818fd18..1f4468d0e2c 100644 --- a/server/utils/vectorDbProviders/chroma/index.js +++ b/server/utils/vectorDbProviders/chroma/index.js @@ -6,6 +6,7 @@ const { v4: uuidv4 } = require("uuid"); const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { parseAuthHeader } = require("../../http"); const { sourceIdentifier } = require("../../chats"); +const { rerankDocuments, getSearchLimit } = require("../rerank"); const COLLECTION_REGEX = new RegExp( /^(?!\d+\.\d+\.\d+\.\d+$)(?!.*\.\.)(?=^[a-zA-Z0-9][a-zA-Z0-9_-]{1,61}[a-zA-Z0-9]$).{3,63}$/ ); @@ -150,6 +151,52 @@ const Chroma = { return result; }, + rerankedSimilarityResponse: async function ({ + client, + namespace, + query, + queryVector, + topN = 4, + similarityThreshold = 0.25, + filterIdentifiers = [], + }) { + const totalEmbeddings = await this.namespaceCount(namespace); + const searchLimit = getSearchLimit(totalEmbeddings, topN); + const { sourceDocuments, contextTexts } = await this.similarityResponse({ + client, + namespace, + queryVector, + similarityThreshold, + topN: searchLimit, + filterIdentifiers, + }); + const documentsForReranking = sourceDocuments.map((metadata, i) => ({ + ...metadata, + text: contextTexts[i], + })); + + const rerankedDocs = await rerankDocuments(query, documentsForReranking, { + topN, + similarityThreshold, + filterIdentifiers, + }); + + // Post-process to fix scores and contextTexts from the generic reranker. + const result = { + contextTexts: [], + sourceDocuments: [], + scores: [], + }; + + rerankedDocs.sourceDocuments.forEach((item) => { + if (item.rerank_score < similarityThreshold) return; + const { rerank_score, ...rest } = item; + result.sourceDocuments.push({ ...rest, score: rerank_score }); + result.contextTexts.push(item.text); + result.scores.push(rerank_score); + }); + return result; + }, namespace: async function (client, namespace = null) { if (!namespace) throw new Error("No namespace value provided."); const collection = await client @@ -348,12 +395,14 @@ const Chroma = { similarityThreshold = 0.25, topN = 4, filterIdentifiers = [], + rerank = false, }) { if (!namespace || !input || !LLMConnector) throw new Error("Invalid request to performSimilaritySearch."); const { client } = await this.connect(); - if (!(await this.namespaceExists(client, this.normalize(namespace)))) { + const collectionName = this.normalize(namespace); + if (!(await this.namespaceExists(client, collectionName))) { return { contextTexts: [], sources: [], @@ -362,16 +411,26 @@ const Chroma = { } const queryVector = await LLMConnector.embedTextInput(input); - const { contextTexts, sourceDocuments, scores } = - await this.similarityResponse({ - client, - namespace, - queryVector, - similarityThreshold, - topN, - filterIdentifiers, - }); + const result = rerank + ? await this.rerankedSimilarityResponse({ + client, + namespace, + query: input, + queryVector, + similarityThreshold, + topN, + filterIdentifiers, + }) + : await this.similarityResponse({ + client, + namespace, + queryVector, + similarityThreshold, + topN, + filterIdentifiers, + }); + const { contextTexts, sourceDocuments, scores } = result; const sources = sourceDocuments.map((metadata, i) => ({ metadata: { ...metadata, diff --git a/server/utils/vectorDbProviders/lance/index.js b/server/utils/vectorDbProviders/lance/index.js index 563095fe5db..b8d50e9a032 100644 --- a/server/utils/vectorDbProviders/lance/index.js +++ b/server/utils/vectorDbProviders/lance/index.js @@ -5,7 +5,7 @@ const { SystemSettings } = require("../../../models/systemSettings"); const { storeVectorResult, cachedVectorInformation } = require("../../files"); const { v4: uuidv4 } = require("uuid"); const { sourceIdentifier } = require("../../chats"); -const { NativeEmbeddingReranker } = require("../../EmbeddingRerankers/native"); +const { rerankDocuments, getSearchLimit } = require("../rerank"); /** * LancedDB Client connection object @@ -79,68 +79,24 @@ const LanceDb = { similarityThreshold = 0.25, filterIdentifiers = [], }) { - const reranker = new NativeEmbeddingReranker(); - const collection = await client.openTable(namespace); const totalEmbeddings = await this.namespaceCount(namespace); - const result = { - contextTexts: [], - sourceDocuments: [], - scores: [], - }; - - /** - * For reranking, we want to work with a larger number of results than the topN. - * This is because the reranker can only rerank the results it it given and we dont auto-expand the results. - * We want to give the reranker a larger number of results to work with. - * - * However, we cannot make this boundless as reranking is expensive and time consuming. - * So we limit the number of results to a maximum of 50 and a minimum of 10. - * This is a good balance between the number of results to rerank and the cost of reranking - * and ensures workspaces with 10K embeddings will still rerank within a reasonable timeframe on base level hardware. - * - * Benchmarks: - * On Intel Mac: 2.6 GHz 6-Core Intel Core i7 - 20 docs reranked in ~5.2 sec - */ - const searchLimit = Math.max( - 10, - Math.min(50, Math.ceil(totalEmbeddings * 0.1)) - ); - const vectorSearchResults = await collection - .vectorSearch(queryVector) - .distanceType("cosine") - .limit(searchLimit) - .toArray(); - - await reranker - .rerank(query, vectorSearchResults, { topK: topN }) - .then((rerankResults) => { - rerankResults.forEach((item) => { - if (this.distanceToSimilarity(item._distance) < similarityThreshold) - return; - const { vector: _, ...rest } = item; - if (filterIdentifiers.includes(sourceIdentifier(rest))) { - console.log( - "LanceDB: A source was filtered from context as it's parent document is pinned." - ); - return; - } - const score = - item?.rerank_score || this.distanceToSimilarity(item._distance); - - result.contextTexts.push(rest.text); - result.sourceDocuments.push({ - ...rest, - score, - }); - result.scores.push(score); - }); - }) - .catch((e) => { - console.error(e); - console.error("LanceDB::rerankedSimilarityResponse", e.message); - }); + const searchLimit = getSearchLimit(totalEmbeddings, topN); + const vectorSearchResults = await client + .openTable(namespace) + .then((tbl) => + tbl + .vectorSearch(queryVector) + .distanceType("cosine") + .limit(searchLimit) + .toArray() + ); - return result; + const reranked = await rerankDocuments(query, vectorSearchResults, { + topN, + similarityThreshold, + filterIdentifiers, + }); + return reranked; }, /** @@ -421,6 +377,8 @@ const LanceDb = { filterIdentifiers, }); + console.log("result", result); + const { contextTexts, sourceDocuments } = result; const sources = sourceDocuments.map((metadata, i) => { return { metadata: { ...metadata, text: contextTexts[i] } }; diff --git a/server/utils/vectorDbProviders/milvus/index.js b/server/utils/vectorDbProviders/milvus/index.js index 2ddaad567bb..f8e4ec183f8 100644 --- a/server/utils/vectorDbProviders/milvus/index.js +++ b/server/utils/vectorDbProviders/milvus/index.js @@ -10,6 +10,7 @@ const { v4: uuidv4 } = require("uuid"); const { storeVectorResult, cachedVectorInformation } = require("../../files"); const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { sourceIdentifier } = require("../../chats"); +const { rerankDocuments, getSearchLimit } = require("../rerank"); const Milvus = { name: "Milvus", @@ -299,6 +300,7 @@ const Milvus = { similarityThreshold = 0.25, topN = 4, filterIdentifiers = [], + rerank = false, }) { if (!namespace || !input || !LLMConnector) throw new Error("Invalid request to performSimilaritySearch."); @@ -313,14 +315,24 @@ const Milvus = { } const queryVector = await LLMConnector.embedTextInput(input); - const { contextTexts, sourceDocuments } = await this.similarityResponse({ - client, - namespace, - queryVector, - similarityThreshold, - topN, - filterIdentifiers, - }); + const { contextTexts, sourceDocuments } = rerank + ? await this.rerankedSimilarityResponse({ + client, + namespace, + query: input, + queryVector, + similarityThreshold, + topN, + filterIdentifiers, + }) + : await this.similarityResponse({ + client, + namespace, + queryVector, + similarityThreshold, + topN, + filterIdentifiers, + }); const sources = sourceDocuments.map((doc, i) => { return { metadata: doc, text: contextTexts[i] }; @@ -368,6 +380,31 @@ const Milvus = { }); return result; }, + rerankedSimilarityResponse: async function ({ + client, + namespace, + query, + queryVector, + topN = 4, + similarityThreshold = 0.25, + filterIdentifiers = [], + }) { + const totalEmbeddings = await this.namespaceCount(namespace); + const searchLimit = getSearchLimit(totalEmbeddings, topN); + const { sourceDocuments } = await this.similarityResponse({ + client, + namespace, + queryVector, + similarityThreshold, + topN: searchLimit, + filterIdentifiers, + }); + return await rerankDocuments(query, sourceDocuments, { + topN, + similarityThreshold, + filterIdentifiers, + }); + }, "namespace-stats": async function (reqBody = {}) { const { namespace = null } = reqBody; if (!namespace) throw new Error("namespace required"); diff --git a/server/utils/vectorDbProviders/pinecone/index.js b/server/utils/vectorDbProviders/pinecone/index.js index c5c55acb58c..6329753d433 100644 --- a/server/utils/vectorDbProviders/pinecone/index.js +++ b/server/utils/vectorDbProviders/pinecone/index.js @@ -5,6 +5,7 @@ const { storeVectorResult, cachedVectorInformation } = require("../../files"); const { v4: uuidv4 } = require("uuid"); const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { sourceIdentifier } = require("../../chats"); +const { rerankDocuments, getSearchLimit } = require("../rerank"); const PineconeDB = { name: "Pinecone", @@ -76,6 +77,31 @@ const PineconeDB = { return result; }, + rerankedSimilarityResponse: async function ({ + client, + namespace, + query, + queryVector, + topN = 4, + similarityThreshold = 0.25, + filterIdentifiers = [], + }) { + const totalEmbeddings = await this.namespaceCount(namespace); + const searchLimit = getSearchLimit(totalEmbeddings, topN); + const { sourceDocuments } = await this.similarityResponse({ + client, + namespace, + queryVector, + similarityThreshold, + topN: searchLimit, + filterIdentifiers, + }); + return await rerankDocuments(query, sourceDocuments, { + topN, + similarityThreshold, + filterIdentifiers, + }); + }, namespace: async function (index, namespace = null) { if (!namespace) throw new Error("No namespace value provided."); const { namespaces } = await index.describeIndexStats(); @@ -247,6 +273,7 @@ const PineconeDB = { similarityThreshold = 0.25, topN = 4, filterIdentifiers = [], + rerank = false, }) { if (!namespace || !input || !LLMConnector) throw new Error("Invalid request to performSimilaritySearch."); @@ -258,14 +285,24 @@ const PineconeDB = { ); const queryVector = await LLMConnector.embedTextInput(input); - const { contextTexts, sourceDocuments } = await this.similarityResponse({ - client: pineconeIndex, - namespace, - queryVector, - similarityThreshold, - topN, - filterIdentifiers, - }); + const { contextTexts, sourceDocuments } = rerank + ? await this.rerankedSimilarityResponse({ + client: pineconeIndex, + namespace, + query: input, + queryVector, + similarityThreshold, + topN, + filterIdentifiers, + }) + : await this.similarityResponse({ + client: pineconeIndex, + namespace, + queryVector, + similarityThreshold, + topN, + filterIdentifiers, + }); const sources = sourceDocuments.map((doc, i) => { return { metadata: doc, text: contextTexts[i] }; diff --git a/server/utils/vectorDbProviders/qdrant/index.js b/server/utils/vectorDbProviders/qdrant/index.js index 50fe5fab36e..b086c863f14 100644 --- a/server/utils/vectorDbProviders/qdrant/index.js +++ b/server/utils/vectorDbProviders/qdrant/index.js @@ -5,6 +5,7 @@ const { storeVectorResult, cachedVectorInformation } = require("../../files"); const { v4: uuidv4 } = require("uuid"); const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { sourceIdentifier } = require("../../chats"); +const { rerankDocuments, getSearchLimit } = require("../rerank"); const QDrant = { name: "QDrant", @@ -86,6 +87,35 @@ const QDrant = { return result; }, + rerankedSimilarityResponse: async function ({ + client, + namespace, + query, + queryVector, + topN = 4, + similarityThreshold = 0.25, + filterIdentifiers = [], + }) { + const totalEmbeddings = await this.namespaceCount(namespace); + const searchLimit = getSearchLimit(totalEmbeddings, topN); + const { sourceDocuments } = await this.similarityResponse({ + client, + namespace, + queryVector, + similarityThreshold, + topN: searchLimit, + filterIdentifiers, + }); + return await rerankDocuments( + query, + sourceDocuments.map((doc) => ({ ...doc, score: null })), + { + topN, + similarityThreshold, + filterIdentifiers, + } + ); + }, namespace: async function (client, namespace = null) { if (!namespace) throw new Error("No namespace value provided."); const collection = await client.getCollection(namespace).catch(() => null); @@ -324,6 +354,7 @@ const QDrant = { similarityThreshold = 0.25, topN = 4, filterIdentifiers = [], + rerank = false, }) { if (!namespace || !input || !LLMConnector) throw new Error("Invalid request to performSimilaritySearch."); @@ -338,14 +369,24 @@ const QDrant = { } const queryVector = await LLMConnector.embedTextInput(input); - const { contextTexts, sourceDocuments } = await this.similarityResponse({ - client, - namespace, - queryVector, - similarityThreshold, - topN, - filterIdentifiers, - }); + const { contextTexts, sourceDocuments } = rerank + ? await this.rerankedSimilarityResponse({ + client, + namespace, + query: input, + queryVector, + similarityThreshold, + topN, + filterIdentifiers, + }) + : await this.similarityResponse({ + client, + namespace, + queryVector, + similarityThreshold, + topN, + filterIdentifiers, + }); const sources = sourceDocuments.map((metadata, i) => { return { ...metadata, text: contextTexts[i] }; diff --git a/server/utils/vectorDbProviders/rerank.js b/server/utils/vectorDbProviders/rerank.js new file mode 100644 index 00000000000..dacee5db72a --- /dev/null +++ b/server/utils/vectorDbProviders/rerank.js @@ -0,0 +1,65 @@ +const { NativeEmbeddingReranker } = require("../EmbeddingRerankers/native"); +const { sourceIdentifier } = require("../chats"); + +async function rerankDocuments( + query, + documents, + options = { topN: 4, similarityThreshold: 0.25, filterIdentifiers: [] } +) { + const { topN, similarityThreshold, filterIdentifiers } = options; + const reranker = new NativeEmbeddingReranker(); + const result = { + contextTexts: [], + sourceDocuments: [], + scores: [], + }; + + await reranker + .rerank(query, documents, { topK: topN }) + .then((rerankResults) => { + rerankResults.forEach((item) => { + if (item.score < similarityThreshold) return; + + const { vector: _, ...rest } = item; + if (filterIdentifiers.includes(sourceIdentifier(rest))) { + console.log( + "A source was filtered from context as it's parent document is pinned." + ); + return; + } + + result.contextTexts.push(rest.text); + result.sourceDocuments.push({ + ...rest, + }); + result.scores.push(item.score); + }); + }) + .catch((e) => { + console.error(e); + console.error("rerankDocuments", e.message); + }); + + return result; +} +/** + * For reranking, we want to work with a larger number of results than the topN. + * This is because the reranker can only rerank the results it it given and we dont auto-expand the results. + * We want to give the reranker a larger number of results to work with. + * + * However, we cannot make this boundless as reranking is expensive and time consuming. + * So we limit the number of results to a maximum of 50 and a minimum of 10. + * This is a good balance between the number of results to rerank and the cost of reranking + * and ensures workspaces with 10K embeddings will still rerank within a reasonable timeframe on base level hardware. + * + * Benchmarks: + * On Intel Mac: 2.6 GHz 6-Core Intel Core i7 - 20 docs reranked in ~5.2 sec + */ +function getSearchLimit(totalEmbeddings = 0, topN = 4) { + return Math.max(10, Math.min(50, Math.ceil(totalEmbeddings * 0.1) || topN)); +} + +module.exports = { + rerankDocuments, + getSearchLimit, +}; diff --git a/server/utils/vectorDbProviders/weaviate/index.js b/server/utils/vectorDbProviders/weaviate/index.js index 2385c5e8ef1..431ba7ed2c1 100644 --- a/server/utils/vectorDbProviders/weaviate/index.js +++ b/server/utils/vectorDbProviders/weaviate/index.js @@ -6,6 +6,7 @@ const { v4: uuidv4 } = require("uuid"); const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { camelCase } = require("../../helpers/camelcase"); const { sourceIdentifier } = require("../../chats"); +const { rerankDocuments, getSearchLimit } = require("../rerank"); const Weaviate = { name: "Weaviate", @@ -121,6 +122,31 @@ const Weaviate = { return result; }, + rerankedSimilarityResponse: async function ({ + client, + namespace, + query, + queryVector, + topN = 4, + similarityThreshold = 0.25, + filterIdentifiers = [], + }) { + const totalEmbeddings = await this.namespaceCount(namespace); + const searchLimit = getSearchLimit(totalEmbeddings, topN); + const { sourceDocuments } = await this.similarityResponse({ + client, + namespace, + queryVector, + similarityThreshold, + topN: searchLimit, + filterIdentifiers, + }); + return await rerankDocuments(query, sourceDocuments, { + topN, + similarityThreshold, + filterIdentifiers, + }); + }, allNamespaces: async function (client) { try { const { classes = [] } = await client.schema.getter().do(); @@ -368,6 +394,7 @@ const Weaviate = { similarityThreshold = 0.25, topN = 4, filterIdentifiers = [], + rerank = false, }) { if (!namespace || !input || !LLMConnector) throw new Error("Invalid request to performSimilaritySearch."); @@ -382,14 +409,24 @@ const Weaviate = { } const queryVector = await LLMConnector.embedTextInput(input); - const { contextTexts, sourceDocuments } = await this.similarityResponse({ - client, - namespace, - queryVector, - similarityThreshold, - topN, - filterIdentifiers, - }); + const { contextTexts, sourceDocuments } = rerank + ? await this.rerankedSimilarityResponse({ + client, + namespace, + query: input, + queryVector, + similarityThreshold, + topN, + filterIdentifiers, + }) + : await this.similarityResponse({ + client, + namespace, + queryVector, + similarityThreshold, + topN, + filterIdentifiers, + }); const sources = sourceDocuments.map((metadata, i) => { return { ...metadata, text: contextTexts[i] }; diff --git a/server/utils/vectorDbProviders/zilliz/index.js b/server/utils/vectorDbProviders/zilliz/index.js index ab866f4edd5..0aef7c2ec35 100644 --- a/server/utils/vectorDbProviders/zilliz/index.js +++ b/server/utils/vectorDbProviders/zilliz/index.js @@ -10,6 +10,7 @@ const { v4: uuidv4 } = require("uuid"); const { storeVectorResult, cachedVectorInformation } = require("../../files"); const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { sourceIdentifier } = require("../../chats"); +const { rerankDocuments, getSearchLimit } = require("../rerank"); // Zilliz is basically a copy of Milvus DB class with a different constructor // to connect to the cloud @@ -292,6 +293,7 @@ const Zilliz = { similarityThreshold = 0.25, topN = 4, filterIdentifiers = [], + rerank = false, }) { if (!namespace || !input || !LLMConnector) throw new Error("Invalid request to performSimilaritySearch."); @@ -306,14 +308,24 @@ const Zilliz = { } const queryVector = await LLMConnector.embedTextInput(input); - const { contextTexts, sourceDocuments } = await this.similarityResponse({ - client, - namespace, - queryVector, - similarityThreshold, - topN, - filterIdentifiers, - }); + const { contextTexts, sourceDocuments } = rerank + ? await this.rerankedSimilarityResponse({ + client, + namespace, + query: input, + queryVector, + similarityThreshold, + topN, + filterIdentifiers, + }) + : await this.similarityResponse({ + client, + namespace, + queryVector, + similarityThreshold, + topN, + filterIdentifiers, + }); const sources = sourceDocuments.map((doc, i) => { return { metadata: doc, text: contextTexts[i] }; @@ -359,6 +371,31 @@ const Zilliz = { }); return result; }, + rerankedSimilarityResponse: async function ({ + client, + namespace, + query, + queryVector, + topN = 4, + similarityThreshold = 0.25, + filterIdentifiers = [], + }) { + const totalEmbeddings = await this.namespaceCount(namespace); + const searchLimit = getSearchLimit(totalEmbeddings, topN); + const { sourceDocuments } = await this.similarityResponse({ + client, + namespace, + queryVector, + similarityThreshold, + topN: searchLimit, + filterIdentifiers, + }); + return await rerankDocuments(query, sourceDocuments, { + topN, + similarityThreshold, + filterIdentifiers, + }); + }, "namespace-stats": async function (reqBody = {}) { const { namespace = null } = reqBody; if (!namespace) throw new Error("namespace required"); From 9b717b2fc64e02f04bc6098da0f33d4c6df0659f Mon Sep 17 00:00:00 2001 From: shatfield4 Date: Thu, 25 Sep 2025 18:34:15 -0700 Subject: [PATCH 2/7] simplify rerank.js + fix chroma/lance db reranking --- .../utils/vectorDbProviders/chroma/index.js | 23 +++++----- server/utils/vectorDbProviders/lance/index.js | 36 +++++++++++---- server/utils/vectorDbProviders/rerank.js | 45 ++----------------- 3 files changed, 43 insertions(+), 61 deletions(-) diff --git a/server/utils/vectorDbProviders/chroma/index.js b/server/utils/vectorDbProviders/chroma/index.js index 1f4468d0e2c..6adc42c42d1 100644 --- a/server/utils/vectorDbProviders/chroma/index.js +++ b/server/utils/vectorDbProviders/chroma/index.js @@ -6,7 +6,7 @@ const { v4: uuidv4 } = require("uuid"); const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { parseAuthHeader } = require("../../http"); const { sourceIdentifier } = require("../../chats"); -const { rerankDocuments, getSearchLimit } = require("../rerank"); +const { rerank, getSearchLimit } = require("../rerank"); const COLLECTION_REGEX = new RegExp( /^(?!\d+\.\d+\.\d+\.\d+$)(?!.*\.\.)(?=^[a-zA-Z0-9][a-zA-Z0-9_-]{1,61}[a-zA-Z0-9]$).{3,63}$/ ); @@ -175,24 +175,23 @@ const Chroma = { text: contextTexts[i], })); - const rerankedDocs = await rerankDocuments(query, documentsForReranking, { - topN, - similarityThreshold, - filterIdentifiers, - }); - - // Post-process to fix scores and contextTexts from the generic reranker. + const rerankedResults = await rerank(query, documentsForReranking, topN); const result = { contextTexts: [], sourceDocuments: [], scores: [], }; - rerankedDocs.sourceDocuments.forEach((item) => { + rerankedResults.forEach((item) => { if (item.rerank_score < similarityThreshold) return; - const { rerank_score, ...rest } = item; - result.sourceDocuments.push({ ...rest, score: rerank_score }); - result.contextTexts.push(item.text); + const { vector: _, rerank_score, ...rest } = item; + if (filterIdentifiers.includes(sourceIdentifier(rest))) return; + + result.contextTexts.push(rest.text); + result.sourceDocuments.push({ + ...rest, + score: rerank_score, + }); result.scores.push(rerank_score); }); return result; diff --git a/server/utils/vectorDbProviders/lance/index.js b/server/utils/vectorDbProviders/lance/index.js index b8d50e9a032..68ff7f21b67 100644 --- a/server/utils/vectorDbProviders/lance/index.js +++ b/server/utils/vectorDbProviders/lance/index.js @@ -5,7 +5,7 @@ const { SystemSettings } = require("../../../models/systemSettings"); const { storeVectorResult, cachedVectorInformation } = require("../../files"); const { v4: uuidv4 } = require("uuid"); const { sourceIdentifier } = require("../../chats"); -const { rerankDocuments, getSearchLimit } = require("../rerank"); +const { rerank, getSearchLimit } = require("../rerank"); /** * LancedDB Client connection object @@ -91,12 +91,34 @@ const LanceDb = { .toArray() ); - const reranked = await rerankDocuments(query, vectorSearchResults, { - topN, - similarityThreshold, - filterIdentifiers, + const rerankedResults = await rerank(query, vectorSearchResults, topN); + const result = { + contextTexts: [], + sourceDocuments: [], + scores: [], + }; + + rerankedResults.forEach((item) => { + if (this.distanceToSimilarity(item._distance) < similarityThreshold) + return; + const { vector: _, ...rest } = item; + if (filterIdentifiers.includes(sourceIdentifier(rest))) { + console.log( + "LanceDB: A source was filtered from context as it's parent document is pinned." + ); + return; + } + const score = + item?.rerank_score || this.distanceToSimilarity(item._distance); + + result.contextTexts.push(rest.text); + result.sourceDocuments.push({ + ...rest, + score, + }); + result.scores.push(score); }); - return reranked; + return result; }, /** @@ -377,8 +399,6 @@ const LanceDb = { filterIdentifiers, }); - console.log("result", result); - const { contextTexts, sourceDocuments } = result; const sources = sourceDocuments.map((metadata, i) => { return { metadata: { ...metadata, text: contextTexts[i] } }; diff --git a/server/utils/vectorDbProviders/rerank.js b/server/utils/vectorDbProviders/rerank.js index dacee5db72a..98d1e810322 100644 --- a/server/utils/vectorDbProviders/rerank.js +++ b/server/utils/vectorDbProviders/rerank.js @@ -1,47 +1,10 @@ const { NativeEmbeddingReranker } = require("../EmbeddingRerankers/native"); -const { sourceIdentifier } = require("../chats"); -async function rerankDocuments( - query, - documents, - options = { topN: 4, similarityThreshold: 0.25, filterIdentifiers: [] } -) { - const { topN, similarityThreshold, filterIdentifiers } = options; +async function rerank(query, documents, topN = 4) { const reranker = new NativeEmbeddingReranker(); - const result = { - contextTexts: [], - sourceDocuments: [], - scores: [], - }; - - await reranker - .rerank(query, documents, { topK: topN }) - .then((rerankResults) => { - rerankResults.forEach((item) => { - if (item.score < similarityThreshold) return; - - const { vector: _, ...rest } = item; - if (filterIdentifiers.includes(sourceIdentifier(rest))) { - console.log( - "A source was filtered from context as it's parent document is pinned." - ); - return; - } - - result.contextTexts.push(rest.text); - result.sourceDocuments.push({ - ...rest, - }); - result.scores.push(item.score); - }); - }) - .catch((e) => { - console.error(e); - console.error("rerankDocuments", e.message); - }); - - return result; + return await reranker.rerank(query, documents, { topK: topN }); } + /** * For reranking, we want to work with a larger number of results than the topN. * This is because the reranker can only rerank the results it it given and we dont auto-expand the results. @@ -60,6 +23,6 @@ function getSearchLimit(totalEmbeddings = 0, topN = 4) { } module.exports = { - rerankDocuments, + rerank, getSearchLimit, }; From 72e1aec0f17f9d18c650044a00e4046260d11caa Mon Sep 17 00:00:00 2001 From: shatfield4 Date: Fri, 26 Sep 2025 16:38:44 -0700 Subject: [PATCH 3/7] fix scores + reranking for milvus, zilliz, astra providers --- server/utils/vectorDbProviders/astra/index.js | 49 +++++++---- .../utils/vectorDbProviders/milvus/index.js | 26 ++++-- .../utils/vectorDbProviders/zilliz/index.js | 81 ++++++++++++------- 3 files changed, 106 insertions(+), 50 deletions(-) diff --git a/server/utils/vectorDbProviders/astra/index.js b/server/utils/vectorDbProviders/astra/index.js index 8e0331df3be..2d25d2d7038 100644 --- a/server/utils/vectorDbProviders/astra/index.js +++ b/server/utils/vectorDbProviders/astra/index.js @@ -5,7 +5,7 @@ const { storeVectorResult, cachedVectorInformation } = require("../../files"); const { v4: uuidv4 } = require("uuid"); const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { sourceIdentifier } = require("../../chats"); -const { rerankDocuments, getSearchLimit } = require("../rerank"); +const { rerank, getSearchLimit } = require("../rerank"); const sanitizeNamespace = (namespace) => { // If namespace already starts with ns_, don't add it again @@ -340,9 +340,10 @@ const AstraDB = { filterIdentifiers, }); - const sources = sourceDocuments.map((metadata, i) => { - return { ...metadata, text: contextTexts[i] }; + const sources = sourceDocuments.map((doc, i) => { + return { metadata: doc, text: contextTexts[i] }; }); + console.log("sources", this.curateSources(sources)); return { contextTexts, sources: this.curateSources(sources), @@ -385,7 +386,10 @@ const AstraDB = { return; } result.contextTexts.push(response.metadata.text); - result.sourceDocuments.push(response); + result.sourceDocuments.push({ + ...response.metadata, + score: response.$similarity, + }); result.scores.push(response.$similarity); }); return result; @@ -409,15 +413,27 @@ const AstraDB = { topN: searchLimit, filterIdentifiers, }); - return await rerankDocuments( - query, - sourceDocuments.map((doc) => ({ ...doc.metadata, score: null })), - { - topN, - similarityThreshold, - filterIdentifiers, - } - ); + + const rerankedResults = await rerank(query, sourceDocuments, topN); + const result = { + contextTexts: [], + sourceDocuments: [], + scores: [], + }; + + rerankedResults.forEach((item) => { + if (item.rerank_score < similarityThreshold) return; + const { rerank_score, ...rest } = item; + if (filterIdentifiers.includes(sourceIdentifier(rest))) return; + + result.contextTexts.push(rest.text); + result.sourceDocuments.push({ + ...rest, + score: rerank_score, + }); + result.scores.push(rerank_score); + }); + return result; }, allNamespaces: async function (client) { try { @@ -473,12 +489,11 @@ const AstraDB = { curateSources: function (sources = []) { const documents = []; for (const source of sources) { - if (Object.keys(source).length > 0) { - const metadata = source.hasOwnProperty("metadata") - ? source.metadata - : source; + const { metadata = {} } = source; + if (Object.keys(metadata).length > 0) { documents.push({ ...metadata, + ...(source.text ? { text: source.text } : {}), }); } } diff --git a/server/utils/vectorDbProviders/milvus/index.js b/server/utils/vectorDbProviders/milvus/index.js index f8e4ec183f8..fe0c9a3e6e0 100644 --- a/server/utils/vectorDbProviders/milvus/index.js +++ b/server/utils/vectorDbProviders/milvus/index.js @@ -10,7 +10,7 @@ const { v4: uuidv4 } = require("uuid"); const { storeVectorResult, cachedVectorInformation } = require("../../files"); const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { sourceIdentifier } = require("../../chats"); -const { rerankDocuments, getSearchLimit } = require("../rerank"); +const { rerank, getSearchLimit } = require("../rerank"); const Milvus = { name: "Milvus", @@ -399,11 +399,27 @@ const Milvus = { topN: searchLimit, filterIdentifiers, }); - return await rerankDocuments(query, sourceDocuments, { - topN, - similarityThreshold, - filterIdentifiers, + + const rerankedResults = await rerank(query, sourceDocuments, topN); + const result = { + contextTexts: [], + sourceDocuments: [], + scores: [], + }; + + rerankedResults.forEach((item) => { + if (item.rerank_score < similarityThreshold) return; + const { rerank_score, ...rest } = item; + if (filterIdentifiers.includes(sourceIdentifier(rest))) return; + + result.contextTexts.push(rest.text); + result.sourceDocuments.push({ + ...rest, + score: rerank_score, + }); + result.scores.push(rerank_score); }); + return result; }, "namespace-stats": async function (reqBody = {}) { const { namespace = null } = reqBody; diff --git a/server/utils/vectorDbProviders/zilliz/index.js b/server/utils/vectorDbProviders/zilliz/index.js index 0aef7c2ec35..dff29e17a82 100644 --- a/server/utils/vectorDbProviders/zilliz/index.js +++ b/server/utils/vectorDbProviders/zilliz/index.js @@ -10,7 +10,7 @@ const { v4: uuidv4 } = require("uuid"); const { storeVectorResult, cachedVectorInformation } = require("../../files"); const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { sourceIdentifier } = require("../../chats"); -const { rerankDocuments, getSearchLimit } = require("../rerank"); +const { rerank, getSearchLimit } = require("../rerank"); // Zilliz is basically a copy of Milvus DB class with a different constructor // to connect to the cloud @@ -158,30 +158,38 @@ const Zilliz = { vectorDimension = chunks[0][0].values.length || null; await this.getOrCreateCollection(client, namespace, vectorDimension); - for (const chunk of chunks) { - // Before sending to Pinecone and saving the records to our db - // we need to assign the id of each chunk that is stored in the cached file. - const newChunks = chunk.map((chunk) => { - const id = uuidv4(); - documentVectors.push({ docId, vectorId: id }); - return { id, vector: chunk.values, metadata: chunk.metadata }; - }); - const insertResult = await client.insert({ - collection_name: this.normalize(namespace), - data: newChunks, - }); + try { + for (const chunk of chunks) { + // Before sending to Zilliz and saving the records to our db + // we need to assign the id of each chunk that is stored in the cached file. + const newChunks = chunk.map((chunk) => { + const id = uuidv4(); + documentVectors.push({ docId, vectorId: id }); + return { id, vector: chunk.values, metadata: chunk.metadata }; + }); + const insertResult = await client.insert({ + collection_name: this.normalize(namespace), + data: newChunks, + }); - if (insertResult?.status.error_code !== "Success") { - throw new Error( - `Error embedding into Zilliz! Reason:${insertResult?.status.reason}` - ); + if (insertResult?.status.error_code !== "Success") { + throw new Error( + `Error embedding into Milvus! Reason:${insertResult?.status.reason}` + ); + } } + await DocumentVectors.bulkInsert(documentVectors); + await client.flushSync({ + collection_names: [this.normalize(namespace)], + }); + return { vectorized: true, error: null }; + } catch (insertError) { + console.error( + "Error inserting cached chunks:", + insertError.message + ); + return { vectorized: false, error: insertError.message }; } - await DocumentVectors.bulkInsert(documentVectors); - await client.flushSync({ - collection_names: [this.normalize(namespace)], - }); - return { vectorized: true, error: null }; } } @@ -240,7 +248,7 @@ const Zilliz = { data: chunk.map((item) => ({ id: item.id, vector: item.values, - metadata: chunk.metadata, + metadata: item.metadata, })), }); @@ -330,6 +338,7 @@ const Zilliz = { const sources = sourceDocuments.map((doc, i) => { return { metadata: doc, text: contextTexts[i] }; }); + return { contextTexts, sources: this.curateSources(sources), @@ -362,6 +371,7 @@ const Zilliz = { ); return; } + result.contextTexts.push(match.metadata.text); result.sourceDocuments.push({ ...match.metadata, @@ -390,11 +400,27 @@ const Zilliz = { topN: searchLimit, filterIdentifiers, }); - return await rerankDocuments(query, sourceDocuments, { - topN, - similarityThreshold, - filterIdentifiers, + + const rerankedResults = await rerank(query, sourceDocuments, topN); + const result = { + contextTexts: [], + sourceDocuments: [], + scores: [], + }; + + rerankedResults.forEach((item) => { + if (item.rerank_score < similarityThreshold) return; + const { rerank_score, ...rest } = item; + if (filterIdentifiers.includes(sourceIdentifier(rest))) return; + + result.contextTexts.push(rest.text); + result.sourceDocuments.push({ + ...rest, + score: rerank_score, + }); + result.scores.push(rerank_score); }); + return result; }, "namespace-stats": async function (reqBody = {}) { const { namespace = null } = reqBody; @@ -431,7 +457,6 @@ const Zilliz = { }); } } - return documents; }, }; From 0f227ce283800bf5d4803fd0512f1db7d93df8a8 Mon Sep 17 00:00:00 2001 From: shatfield4 Date: Fri, 26 Sep 2025 17:17:00 -0700 Subject: [PATCH 4/7] pgvector reranking support --- .../utils/vectorDbProviders/pgvector/index.js | 115 +++++++++++++----- 1 file changed, 85 insertions(+), 30 deletions(-) diff --git a/server/utils/vectorDbProviders/pgvector/index.js b/server/utils/vectorDbProviders/pgvector/index.js index d5c86907566..53c23b8bc1b 100644 --- a/server/utils/vectorDbProviders/pgvector/index.js +++ b/server/utils/vectorDbProviders/pgvector/index.js @@ -3,6 +3,7 @@ const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { TextSplitter } = require("../../TextSplitter"); const { v4: uuidv4 } = require("uuid"); const { sourceIdentifier } = require("../../chats"); +const { rerank, getSearchLimit } = require("../rerank"); /* Embedding Table Schema (table name defined by user) @@ -158,29 +159,31 @@ const PGVector = { }, PGVector.connectionTimeout); }); - const connectionPromise = new Promise(async (resolve) => { - let pgClient = null; - try { - pgClient = this.client(connectionString); - await pgClient.connect(); - const result = await pgClient.query(this.getTablesSql); - - if (result.rows.length !== 0 && !!tableName) { - const tableExists = result.rows.some( - (row) => row.tablename === tableName - ); - if (tableExists) - await this.validateExistingEmbeddingTableSchema( - pgClient, - tableName + const connectionPromise = new Promise((resolve) => { + (async () => { + let pgClient = null; + try { + pgClient = this.client(connectionString); + await pgClient.connect(); + const result = await pgClient.query(this.getTablesSql); + + if (result.rows.length !== 0 && !!tableName) { + const tableExists = result.rows.some( + (row) => row.tablename === tableName ); + if (tableExists) + await this.validateExistingEmbeddingTableSchema( + pgClient, + tableName + ); + } + resolve({ error: null, success: true }); + } catch (err) { + resolve({ error: err.message, success: false }); + } finally { + if (pgClient) await pgClient.end(); } - resolve({ error: null, success: true }); - } catch (err) { - resolve({ error: err.message, success: false }); - } finally { - if (pgClient) await pgClient.end(); - } + })(); }); // Race the connection attempt against the timeout @@ -352,6 +355,48 @@ const PGVector = { return result; }, + rerankedSimilarityResponse: async function ({ + client, + namespace, + query, + queryVector, + topN = 4, + similarityThreshold = 0.25, + filterIdentifiers = [], + }) { + const totalEmbeddings = await this.namespaceCount(namespace); + const searchLimit = getSearchLimit(totalEmbeddings, topN); + const { sourceDocuments } = await this.similarityResponse({ + client, + namespace, + queryVector, + similarityThreshold, + topN: searchLimit, + filterIdentifiers, + }); + + const rerankedResults = await rerank(query, sourceDocuments, topN); + const result = { + contextTexts: [], + sourceDocuments: [], + scores: [], + }; + + rerankedResults.forEach((item) => { + if (item.rerank_score < similarityThreshold) return; + const { rerank_score, ...rest } = item; + if (filterIdentifiers.includes(sourceIdentifier(rest))) return; + + result.contextTexts.push(rest.text); + result.sourceDocuments.push({ + ...rest, + score: rerank_score, + }); + result.scores.push(rerank_score); + }); + return result; + }, + normalizeVector: function (vector) { const magnitude = Math.sqrt( vector.reduce((sum, val) => sum + val * val, 0) @@ -655,6 +700,7 @@ const PGVector = { similarityThreshold = 0.25, topN = 4, filterIdentifiers = [], + rerank = false, }) { let connection = null; if (!namespace || !input || !LLMConnector) @@ -675,16 +721,25 @@ const PGVector = { } const queryVector = await LLMConnector.embedTextInput(input); - const result = await this.similarityResponse({ - client: connection, - namespace, - queryVector, - similarityThreshold, - topN, - filterIdentifiers, - }); + const { contextTexts, sourceDocuments } = rerank + ? await this.rerankedSimilarityResponse({ + client: connection, + namespace, + query: input, + queryVector, + similarityThreshold, + topN, + filterIdentifiers, + }) + : await this.similarityResponse({ + client: connection, + namespace, + queryVector, + similarityThreshold, + topN, + filterIdentifiers, + }); - const { contextTexts, sourceDocuments } = result; const sources = sourceDocuments.map((metadata, i) => { return { metadata: { ...metadata, text: contextTexts[i] } }; }); From bbfc7b965c204164da27741e3090a239c64d4134 Mon Sep 17 00:00:00 2001 From: shatfield4 Date: Fri, 26 Sep 2025 17:38:14 -0700 Subject: [PATCH 5/7] support reranking for pinecone, qdrant, weaviate providers --- .../utils/vectorDbProviders/pinecone/index.js | 26 ++++++++--- .../utils/vectorDbProviders/qdrant/index.js | 44 ++++++++++++------- .../utils/vectorDbProviders/weaviate/index.js | 43 ++++++++++++------ 3 files changed, 79 insertions(+), 34 deletions(-) diff --git a/server/utils/vectorDbProviders/pinecone/index.js b/server/utils/vectorDbProviders/pinecone/index.js index 6329753d433..67a08b34ea0 100644 --- a/server/utils/vectorDbProviders/pinecone/index.js +++ b/server/utils/vectorDbProviders/pinecone/index.js @@ -5,7 +5,7 @@ const { storeVectorResult, cachedVectorInformation } = require("../../files"); const { v4: uuidv4 } = require("uuid"); const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { sourceIdentifier } = require("../../chats"); -const { rerankDocuments, getSearchLimit } = require("../rerank"); +const { rerank, getSearchLimit } = require("../rerank"); const PineconeDB = { name: "Pinecone", @@ -96,11 +96,27 @@ const PineconeDB = { topN: searchLimit, filterIdentifiers, }); - return await rerankDocuments(query, sourceDocuments, { - topN, - similarityThreshold, - filterIdentifiers, + + const rerankedResults = await rerank(query, sourceDocuments, topN); + const result = { + contextTexts: [], + sourceDocuments: [], + scores: [], + }; + + rerankedResults.forEach((item) => { + if (item.rerank_score < similarityThreshold) return; + const { rerank_score, ...rest } = item; + if (filterIdentifiers.includes(sourceIdentifier(rest))) return; + + result.contextTexts.push(rest.text); + result.sourceDocuments.push({ + ...rest, + score: rerank_score, + }); + result.scores.push(rerank_score); }); + return result; }, namespace: async function (index, namespace = null) { if (!namespace) throw new Error("No namespace value provided."); diff --git a/server/utils/vectorDbProviders/qdrant/index.js b/server/utils/vectorDbProviders/qdrant/index.js index b086c863f14..285884e199f 100644 --- a/server/utils/vectorDbProviders/qdrant/index.js +++ b/server/utils/vectorDbProviders/qdrant/index.js @@ -5,7 +5,7 @@ const { storeVectorResult, cachedVectorInformation } = require("../../files"); const { v4: uuidv4 } = require("uuid"); const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { sourceIdentifier } = require("../../chats"); -const { rerankDocuments, getSearchLimit } = require("../rerank"); +const { rerank, getSearchLimit } = require("../rerank"); const QDrant = { name: "QDrant", @@ -81,6 +81,7 @@ const QDrant = { result.sourceDocuments.push({ ...(response?.payload || {}), id: response.id, + score: response.score, }); result.scores.push(response.score); }); @@ -106,15 +107,27 @@ const QDrant = { topN: searchLimit, filterIdentifiers, }); - return await rerankDocuments( - query, - sourceDocuments.map((doc) => ({ ...doc, score: null })), - { - topN, - similarityThreshold, - filterIdentifiers, - } - ); + + const rerankedResults = await rerank(query, sourceDocuments, topN); + const result = { + contextTexts: [], + sourceDocuments: [], + scores: [], + }; + + rerankedResults.forEach((item) => { + if (item.rerank_score < similarityThreshold) return; + const { rerank_score, ...rest } = item; + if (filterIdentifiers.includes(sourceIdentifier(rest))) return; + + result.contextTexts.push(rest.text); + result.sourceDocuments.push({ + ...rest, + score: rerank_score, + }); + result.scores.push(rerank_score); + }); + return result; }, namespace: async function (client, namespace = null) { if (!namespace) throw new Error("No namespace value provided."); @@ -388,8 +401,8 @@ const QDrant = { filterIdentifiers, }); - const sources = sourceDocuments.map((metadata, i) => { - return { ...metadata, text: contextTexts[i] }; + const sources = sourceDocuments.map((doc, i) => { + return { metadata: doc, text: contextTexts[i] }; }); return { contextTexts, @@ -431,12 +444,11 @@ const QDrant = { curateSources: function (sources = []) { const documents = []; for (const source of sources) { - if (Object.keys(source).length > 0) { - const metadata = source.hasOwnProperty("metadata") - ? source.metadata - : source; + const { metadata = {} } = source; + if (Object.keys(metadata).length > 0) { documents.push({ ...metadata, + ...(source.text ? { text: source.text } : {}), }); } } diff --git a/server/utils/vectorDbProviders/weaviate/index.js b/server/utils/vectorDbProviders/weaviate/index.js index 431ba7ed2c1..8363fa5c8d4 100644 --- a/server/utils/vectorDbProviders/weaviate/index.js +++ b/server/utils/vectorDbProviders/weaviate/index.js @@ -6,7 +6,7 @@ const { v4: uuidv4 } = require("uuid"); const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { camelCase } = require("../../helpers/camelcase"); const { sourceIdentifier } = require("../../chats"); -const { rerankDocuments, getSearchLimit } = require("../rerank"); +const { rerank, getSearchLimit } = require("../rerank"); const Weaviate = { name: "Weaviate", @@ -116,7 +116,7 @@ const Weaviate = { return; } result.contextTexts.push(rest.text); - result.sourceDocuments.push({ ...rest, id }); + result.sourceDocuments.push({ ...rest, id, score: certainty }); result.scores.push(certainty); }); @@ -141,11 +141,27 @@ const Weaviate = { topN: searchLimit, filterIdentifiers, }); - return await rerankDocuments(query, sourceDocuments, { - topN, - similarityThreshold, - filterIdentifiers, + + const rerankedResults = await rerank(query, sourceDocuments, topN); + const result = { + contextTexts: [], + sourceDocuments: [], + scores: [], + }; + + rerankedResults.forEach((item) => { + if (item.rerank_score < similarityThreshold) return; + const { rerank_score, ...rest } = item; + if (filterIdentifiers.includes(sourceIdentifier(rest))) return; + + result.contextTexts.push(rest.text); + result.sourceDocuments.push({ + ...rest, + score: rerank_score, + }); + result.scores.push(rerank_score); }); + return result; }, allNamespaces: async function (client) { try { @@ -428,8 +444,8 @@ const Weaviate = { filterIdentifiers, }); - const sources = sourceDocuments.map((metadata, i) => { - return { ...metadata, text: contextTexts[i] }; + const sources = sourceDocuments.map((doc, i) => { + return { metadata: doc, text: contextTexts[i] }; }); return { contextTexts, @@ -468,11 +484,12 @@ const Weaviate = { curateSources: function (sources = []) { const documents = []; for (const source of sources) { - if (Object.keys(source).length > 0) { - const metadata = source.hasOwnProperty("metadata") - ? source.metadata - : source; - documents.push({ ...metadata }); + const { metadata = {} } = source; + if (Object.keys(metadata).length > 0) { + documents.push({ + ...metadata, + ...(source.text ? { text: source.text } : {}), + }); } } From 6e47ad8378ff1c548d1acc0da6e7cf02467d050c Mon Sep 17 00:00:00 2001 From: shatfield4 Date: Fri, 26 Sep 2025 17:39:34 -0700 Subject: [PATCH 6/7] unneeded console log --- server/utils/vectorDbProviders/astra/index.js | 1 - 1 file changed, 1 deletion(-) diff --git a/server/utils/vectorDbProviders/astra/index.js b/server/utils/vectorDbProviders/astra/index.js index 2d25d2d7038..c4f8d087299 100644 --- a/server/utils/vectorDbProviders/astra/index.js +++ b/server/utils/vectorDbProviders/astra/index.js @@ -343,7 +343,6 @@ const AstraDB = { const sources = sourceDocuments.map((doc, i) => { return { metadata: doc, text: contextTexts[i] }; }); - console.log("sources", this.curateSources(sources)); return { contextTexts, sources: this.curateSources(sources), From b03ec4cd6237dce36caa3aae79da533ef2d28d76 Mon Sep 17 00:00:00 2001 From: shatfield4 Date: Mon, 29 Sep 2025 16:36:04 -0700 Subject: [PATCH 7/7] refactor structure to prep for other external embedding rerankers --- .../rerank.js | 9 +++--- server/utils/helpers/index.js | 29 +++++++++++++++++++ server/utils/vectorDbProviders/astra/index.js | 4 +-- .../utils/vectorDbProviders/chroma/index.js | 4 +-- server/utils/vectorDbProviders/lance/index.js | 10 +++---- .../utils/vectorDbProviders/milvus/index.js | 4 +-- .../utils/vectorDbProviders/pgvector/index.js | 4 +-- .../utils/vectorDbProviders/pinecone/index.js | 4 +-- .../utils/vectorDbProviders/qdrant/index.js | 4 +-- .../utils/vectorDbProviders/weaviate/index.js | 4 +-- .../utils/vectorDbProviders/zilliz/index.js | 4 +-- 11 files changed, 54 insertions(+), 26 deletions(-) rename server/utils/{vectorDbProviders => EmbeddingRerankers}/rerank.js (83%) diff --git a/server/utils/vectorDbProviders/rerank.js b/server/utils/EmbeddingRerankers/rerank.js similarity index 83% rename from server/utils/vectorDbProviders/rerank.js rename to server/utils/EmbeddingRerankers/rerank.js index 98d1e810322..8bbe588d54e 100644 --- a/server/utils/vectorDbProviders/rerank.js +++ b/server/utils/EmbeddingRerankers/rerank.js @@ -1,7 +1,7 @@ -const { NativeEmbeddingReranker } = require("../EmbeddingRerankers/native"); +const { getRerankerProvider } = require("../helpers"); async function rerank(query, documents, topN = 4) { - const reranker = new NativeEmbeddingReranker(); + const reranker = getRerankerProvider(); return await reranker.rerank(query, documents, { topK: topN }); } @@ -18,8 +18,9 @@ async function rerank(query, documents, topN = 4) { * Benchmarks: * On Intel Mac: 2.6 GHz 6-Core Intel Core i7 - 20 docs reranked in ~5.2 sec */ -function getSearchLimit(totalEmbeddings = 0, topN = 4) { - return Math.max(10, Math.min(50, Math.ceil(totalEmbeddings * 0.1) || topN)); + +function getSearchLimit(totalEmbeddings = 0) { + return Math.max(10, Math.min(50, Math.ceil(totalEmbeddings * 0.1))); } module.exports = { diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index 12327698954..a6fc4230451 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -75,6 +75,11 @@ * @property {Function} embedChunks - Embeds multiple chunks of text. */ +/** + * @typedef {Object} BaseRerankerProvider + * @property {function(string, {text: string}[], {topK: number}): Promise} rerank - Reranks a list of documents. + */ + /** * Gets the systems current vector database provider. * @param {('pinecone' | 'chroma' | 'chromacloud' | 'lancedb' | 'weaviate' | 'qdrant' | 'milvus' | 'zilliz' | 'astra') | null} getExactly - If provided, this will return an explit provider. @@ -463,6 +468,29 @@ function toChunks(arr, size) { ); } +/** + * Returns the Reranker provider. + * @returns {BaseRerankerProvider} + */ +function getRerankerProvider() { + const rerankerSelection = process.env.RERANKING_PROVIDER ?? "native"; + switch (rerankerSelection) { + case "native": + const { + NativeEmbeddingReranker, + } = require("../EmbeddingRerankers/native"); + return new NativeEmbeddingReranker(); + default: + console.log( + `[RERANKING] Reranker provider ${rerankerSelection} is not supported. Using native reranker as fallback.` + ); + const { + NativeEmbeddingReranker: Native, + } = require("../EmbeddingRerankers/native"); + return new Native(); + } +} + module.exports = { getEmbeddingEngineSelection, maximumChunkLength, @@ -471,4 +499,5 @@ module.exports = { getBaseLLMProviderModel, getLLMProvider, toChunks, + getRerankerProvider, }; diff --git a/server/utils/vectorDbProviders/astra/index.js b/server/utils/vectorDbProviders/astra/index.js index c4f8d087299..783e6340733 100644 --- a/server/utils/vectorDbProviders/astra/index.js +++ b/server/utils/vectorDbProviders/astra/index.js @@ -5,7 +5,7 @@ const { storeVectorResult, cachedVectorInformation } = require("../../files"); const { v4: uuidv4 } = require("uuid"); const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { sourceIdentifier } = require("../../chats"); -const { rerank, getSearchLimit } = require("../rerank"); +const { rerank, getSearchLimit } = require("../../EmbeddingRerankers/rerank"); const sanitizeNamespace = (namespace) => { // If namespace already starts with ns_, don't add it again @@ -403,7 +403,7 @@ const AstraDB = { filterIdentifiers = [], }) { const totalEmbeddings = await this.namespaceCount(namespace); - const searchLimit = getSearchLimit(totalEmbeddings, topN); + const searchLimit = getSearchLimit(totalEmbeddings); const { sourceDocuments } = await this.similarityResponse({ client, namespace, diff --git a/server/utils/vectorDbProviders/chroma/index.js b/server/utils/vectorDbProviders/chroma/index.js index 6adc42c42d1..a8aa09018b0 100644 --- a/server/utils/vectorDbProviders/chroma/index.js +++ b/server/utils/vectorDbProviders/chroma/index.js @@ -6,7 +6,7 @@ const { v4: uuidv4 } = require("uuid"); const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { parseAuthHeader } = require("../../http"); const { sourceIdentifier } = require("../../chats"); -const { rerank, getSearchLimit } = require("../rerank"); +const { rerank, getSearchLimit } = require("../../EmbeddingRerankers/rerank"); const COLLECTION_REGEX = new RegExp( /^(?!\d+\.\d+\.\d+\.\d+$)(?!.*\.\.)(?=^[a-zA-Z0-9][a-zA-Z0-9_-]{1,61}[a-zA-Z0-9]$).{3,63}$/ ); @@ -161,7 +161,7 @@ const Chroma = { filterIdentifiers = [], }) { const totalEmbeddings = await this.namespaceCount(namespace); - const searchLimit = getSearchLimit(totalEmbeddings, topN); + const searchLimit = getSearchLimit(totalEmbeddings); const { sourceDocuments, contextTexts } = await this.similarityResponse({ client, namespace, diff --git a/server/utils/vectorDbProviders/lance/index.js b/server/utils/vectorDbProviders/lance/index.js index 68ff7f21b67..adda0cefc44 100644 --- a/server/utils/vectorDbProviders/lance/index.js +++ b/server/utils/vectorDbProviders/lance/index.js @@ -5,7 +5,7 @@ const { SystemSettings } = require("../../../models/systemSettings"); const { storeVectorResult, cachedVectorInformation } = require("../../files"); const { v4: uuidv4 } = require("uuid"); const { sourceIdentifier } = require("../../chats"); -const { rerank, getSearchLimit } = require("../rerank"); +const { rerank, getSearchLimit } = require("../../EmbeddingRerankers/rerank"); /** * LancedDB Client connection object @@ -80,7 +80,7 @@ const LanceDb = { filterIdentifiers = [], }) { const totalEmbeddings = await this.namespaceCount(namespace); - const searchLimit = getSearchLimit(totalEmbeddings, topN); + const searchLimit = getSearchLimit(totalEmbeddings); const vectorSearchResults = await client .openTable(namespace) .then((tbl) => @@ -99,8 +99,7 @@ const LanceDb = { }; rerankedResults.forEach((item) => { - if (this.distanceToSimilarity(item._distance) < similarityThreshold) - return; + if (item.rerank_score < similarityThreshold) return; const { vector: _, ...rest } = item; if (filterIdentifiers.includes(sourceIdentifier(rest))) { console.log( @@ -108,8 +107,7 @@ const LanceDb = { ); return; } - const score = - item?.rerank_score || this.distanceToSimilarity(item._distance); + const score = item.rerank_score; result.contextTexts.push(rest.text); result.sourceDocuments.push({ diff --git a/server/utils/vectorDbProviders/milvus/index.js b/server/utils/vectorDbProviders/milvus/index.js index fe0c9a3e6e0..b1f6800c23f 100644 --- a/server/utils/vectorDbProviders/milvus/index.js +++ b/server/utils/vectorDbProviders/milvus/index.js @@ -10,7 +10,7 @@ const { v4: uuidv4 } = require("uuid"); const { storeVectorResult, cachedVectorInformation } = require("../../files"); const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { sourceIdentifier } = require("../../chats"); -const { rerank, getSearchLimit } = require("../rerank"); +const { rerank, getSearchLimit } = require("../../EmbeddingRerankers/rerank"); const Milvus = { name: "Milvus", @@ -390,7 +390,7 @@ const Milvus = { filterIdentifiers = [], }) { const totalEmbeddings = await this.namespaceCount(namespace); - const searchLimit = getSearchLimit(totalEmbeddings, topN); + const searchLimit = getSearchLimit(totalEmbeddings); const { sourceDocuments } = await this.similarityResponse({ client, namespace, diff --git a/server/utils/vectorDbProviders/pgvector/index.js b/server/utils/vectorDbProviders/pgvector/index.js index 53c23b8bc1b..21775ecdb25 100644 --- a/server/utils/vectorDbProviders/pgvector/index.js +++ b/server/utils/vectorDbProviders/pgvector/index.js @@ -3,7 +3,7 @@ const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { TextSplitter } = require("../../TextSplitter"); const { v4: uuidv4 } = require("uuid"); const { sourceIdentifier } = require("../../chats"); -const { rerank, getSearchLimit } = require("../rerank"); +const { rerank, getSearchLimit } = require("../../EmbeddingRerankers/rerank"); /* Embedding Table Schema (table name defined by user) @@ -365,7 +365,7 @@ const PGVector = { filterIdentifiers = [], }) { const totalEmbeddings = await this.namespaceCount(namespace); - const searchLimit = getSearchLimit(totalEmbeddings, topN); + const searchLimit = getSearchLimit(totalEmbeddings); const { sourceDocuments } = await this.similarityResponse({ client, namespace, diff --git a/server/utils/vectorDbProviders/pinecone/index.js b/server/utils/vectorDbProviders/pinecone/index.js index 67a08b34ea0..9335c90dbcd 100644 --- a/server/utils/vectorDbProviders/pinecone/index.js +++ b/server/utils/vectorDbProviders/pinecone/index.js @@ -5,7 +5,7 @@ const { storeVectorResult, cachedVectorInformation } = require("../../files"); const { v4: uuidv4 } = require("uuid"); const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { sourceIdentifier } = require("../../chats"); -const { rerank, getSearchLimit } = require("../rerank"); +const { rerank, getSearchLimit } = require("../../EmbeddingRerankers/rerank"); const PineconeDB = { name: "Pinecone", @@ -87,7 +87,7 @@ const PineconeDB = { filterIdentifiers = [], }) { const totalEmbeddings = await this.namespaceCount(namespace); - const searchLimit = getSearchLimit(totalEmbeddings, topN); + const searchLimit = getSearchLimit(totalEmbeddings); const { sourceDocuments } = await this.similarityResponse({ client, namespace, diff --git a/server/utils/vectorDbProviders/qdrant/index.js b/server/utils/vectorDbProviders/qdrant/index.js index 285884e199f..5721c3f77a8 100644 --- a/server/utils/vectorDbProviders/qdrant/index.js +++ b/server/utils/vectorDbProviders/qdrant/index.js @@ -5,7 +5,7 @@ const { storeVectorResult, cachedVectorInformation } = require("../../files"); const { v4: uuidv4 } = require("uuid"); const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { sourceIdentifier } = require("../../chats"); -const { rerank, getSearchLimit } = require("../rerank"); +const { rerank, getSearchLimit } = require("../../EmbeddingRerankers/rerank"); const QDrant = { name: "QDrant", @@ -98,7 +98,7 @@ const QDrant = { filterIdentifiers = [], }) { const totalEmbeddings = await this.namespaceCount(namespace); - const searchLimit = getSearchLimit(totalEmbeddings, topN); + const searchLimit = getSearchLimit(totalEmbeddings); const { sourceDocuments } = await this.similarityResponse({ client, namespace, diff --git a/server/utils/vectorDbProviders/weaviate/index.js b/server/utils/vectorDbProviders/weaviate/index.js index 8363fa5c8d4..6a475380f7d 100644 --- a/server/utils/vectorDbProviders/weaviate/index.js +++ b/server/utils/vectorDbProviders/weaviate/index.js @@ -6,7 +6,7 @@ const { v4: uuidv4 } = require("uuid"); const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { camelCase } = require("../../helpers/camelcase"); const { sourceIdentifier } = require("../../chats"); -const { rerank, getSearchLimit } = require("../rerank"); +const { rerank, getSearchLimit } = require("../../EmbeddingRerankers/rerank"); const Weaviate = { name: "Weaviate", @@ -132,7 +132,7 @@ const Weaviate = { filterIdentifiers = [], }) { const totalEmbeddings = await this.namespaceCount(namespace); - const searchLimit = getSearchLimit(totalEmbeddings, topN); + const searchLimit = getSearchLimit(totalEmbeddings); const { sourceDocuments } = await this.similarityResponse({ client, namespace, diff --git a/server/utils/vectorDbProviders/zilliz/index.js b/server/utils/vectorDbProviders/zilliz/index.js index dff29e17a82..189d2eb85d6 100644 --- a/server/utils/vectorDbProviders/zilliz/index.js +++ b/server/utils/vectorDbProviders/zilliz/index.js @@ -10,7 +10,7 @@ const { v4: uuidv4 } = require("uuid"); const { storeVectorResult, cachedVectorInformation } = require("../../files"); const { toChunks, getEmbeddingEngineSelection } = require("../../helpers"); const { sourceIdentifier } = require("../../chats"); -const { rerank, getSearchLimit } = require("../rerank"); +const { rerank, getSearchLimit } = require("../../EmbeddingRerankers/rerank"); // Zilliz is basically a copy of Milvus DB class with a different constructor // to connect to the cloud @@ -391,7 +391,7 @@ const Zilliz = { filterIdentifiers = [], }) { const totalEmbeddings = await this.namespaceCount(namespace); - const searchLimit = getSearchLimit(totalEmbeddings, topN); + const searchLimit = getSearchLimit(totalEmbeddings); const { sourceDocuments } = await this.similarityResponse({ client, namespace,