From 2e6a4313dd4750a62748ed217df27d470b74fccc Mon Sep 17 00:00:00 2001
From: shatfield4 <seanhatfield5@gmail.com>
Date: Thu, 25 Sep 2025 17:48:24 -0700
Subject: [PATCH 1/7] wip support reranker for all vector dbs

---
 .../VectorDatabase/VectorSearchMode/index.jsx |  6 +-
 server/utils/vectorDbProviders/astra/index.js | 57 +++++++++++--
 .../utils/vectorDbProviders/chroma/index.js   | 79 +++++++++++++++---
 server/utils/vectorDbProviders/lance/index.js | 80 +++++--------------
 .../utils/vectorDbProviders/milvus/index.js   | 53 ++++++++++--
 .../utils/vectorDbProviders/pinecone/index.js | 53 ++++++++++--
 .../utils/vectorDbProviders/qdrant/index.js   | 57 +++++++++++--
 server/utils/vectorDbProviders/rerank.js      | 65 +++++++++++++++
 .../utils/vectorDbProviders/weaviate/index.js | 53 ++++++++++--
 .../utils/vectorDbProviders/zilliz/index.js   | 53 ++++++++++--
 10 files changed, 432 insertions(+), 124 deletions(-)
 create mode 100644 server/utils/vectorDbProviders/rerank.js
diff --git a/frontend/src/pages/WorkspaceSettings/VectorDatabase/VectorSearchMode/index.jsx b/frontend/src/pages/WorkspaceSettings/VectorDatabase/VectorSearchMode/index.jsx
index 5e5816cda8d..f257156af96 100644
--- a/frontend/src/pages/WorkspaceSettings/VectorDatabase/VectorSearchMode/index.jsx
+++ b/frontend/src/pages/WorkspaceSettings/VectorDatabase/VectorSearchMode/index.jsx
@@ -1,8 +1,5 @@
 import { useState } from "react";
 
-// We dont support all vectorDBs yet for reranking due to complexities of how each provider
-// returns information. We need to normalize the response data so Reranker can be used for each provider.
-const supportedVectorDBs = ["lancedb"];
 const hint = {
   default: {
     title: "Default",
@@ -20,8 +17,7 @@ export default function VectorSearchMode({ workspace, setHasChanges }) {
   const [selection, setSelection] = useState(
     workspace?.vectorSearchMode ?? "default"
   );
-  if (!workspace?.vectorDB || !supportedVectorDBs.includes(workspace?.vectorDB))
-    return null;
+  if (!workspace?.vectorDB) return null;
 
   return (
     <div>
diff --git a/server/utils/vectorDbProviders/astra/index.js b/server/utils/vectorDbProviders/astra/index.js
index b34a8d83afa..8e0331df3be 100644
--- a/server/utils/vectorDbProviders/astra/index.js
+++ b/server/utils/vectorDbProviders/astra/index.js
@@ -5,6 +5,7 @@ const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { v4: uuidv4 } = require("uuid");
 const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { sourceIdentifier } = require("../../chats");
+const { rerankDocuments, getSearchLimit } = require("../rerank");
 
 const sanitizeNamespace = (namespace) => {
   // If namespace already starts with ns_, don't add it again
@@ -301,6 +302,7 @@ const AstraDB = {
     similarityThreshold = 0.25,
     topN = 4,
     filterIdentifiers = [],
+    rerank = false,
   }) {
     if (!namespace || !input || !LLMConnector)
       throw new Error("Invalid request to performSimilaritySearch.");
@@ -319,14 +321,24 @@ const AstraDB = {
     }
 
     const queryVector = await LLMConnector.embedTextInput(input);
-    const { contextTexts, sourceDocuments } = await this.similarityResponse({
-      client,
-      namespace: sanitizedNamespace,
-      queryVector,
-      similarityThreshold,
-      topN,
-      filterIdentifiers,
-    });
+    const { contextTexts, sourceDocuments } = rerank
+      ? await this.rerankedSimilarityResponse({
+          client,
+          namespace: sanitizedNamespace,
+          query: input,
+          queryVector,
+          similarityThreshold,
+          topN,
+          filterIdentifiers,
+        })
+      : await this.similarityResponse({
+          client,
+          namespace: sanitizedNamespace,
+          queryVector,
+          similarityThreshold,
+          topN,
+          filterIdentifiers,
+        });
 
     const sources = sourceDocuments.map((metadata, i) => {
       return { ...metadata, text: contextTexts[i] };
@@ -378,6 +390,35 @@ const AstraDB = {
     });
     return result;
   },
+  rerankedSimilarityResponse: async function ({
+    client,
+    namespace,
+    query,
+    queryVector,
+    topN = 4,
+    similarityThreshold = 0.25,
+    filterIdentifiers = [],
+  }) {
+    const totalEmbeddings = await this.namespaceCount(namespace);
+    const searchLimit = getSearchLimit(totalEmbeddings, topN);
+    const { sourceDocuments } = await this.similarityResponse({
+      client,
+      namespace,
+      queryVector,
+      similarityThreshold,
+      topN: searchLimit,
+      filterIdentifiers,
+    });
+    return await rerankDocuments(
+      query,
+      sourceDocuments.map((doc) => ({ ...doc.metadata, score: null })),
+      {
+        topN,
+        similarityThreshold,
+        filterIdentifiers,
+      }
+    );
+  },
   allNamespaces: async function (client) {
     try {
       let header = new Headers();
diff --git a/server/utils/vectorDbProviders/chroma/index.js b/server/utils/vectorDbProviders/chroma/index.js
index bc12818fd18..1f4468d0e2c 100644
--- a/server/utils/vectorDbProviders/chroma/index.js
+++ b/server/utils/vectorDbProviders/chroma/index.js
@@ -6,6 +6,7 @@ const { v4: uuidv4 } = require("uuid");
 const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { parseAuthHeader } = require("../../http");
 const { sourceIdentifier } = require("../../chats");
+const { rerankDocuments, getSearchLimit } = require("../rerank");
 const COLLECTION_REGEX = new RegExp(
   /^(?!\d+\.\d+\.\d+\.\d+$)(?!.*\.\.)(?=^[a-zA-Z0-9][a-zA-Z0-9_-]{1,61}[a-zA-Z0-9]$).{3,63}$/
 );
@@ -150,6 +151,52 @@ const Chroma = {
 
     return result;
   },
+  rerankedSimilarityResponse: async function ({
+    client,
+    namespace,
+    query,
+    queryVector,
+    topN = 4,
+    similarityThreshold = 0.25,
+    filterIdentifiers = [],
+  }) {
+    const totalEmbeddings = await this.namespaceCount(namespace);
+    const searchLimit = getSearchLimit(totalEmbeddings, topN);
+    const { sourceDocuments, contextTexts } = await this.similarityResponse({
+      client,
+      namespace,
+      queryVector,
+      similarityThreshold,
+      topN: searchLimit,
+      filterIdentifiers,
+    });
+    const documentsForReranking = sourceDocuments.map((metadata, i) => ({
+      ...metadata,
+      text: contextTexts[i],
+    }));
+
+    const rerankedDocs = await rerankDocuments(query, documentsForReranking, {
+      topN,
+      similarityThreshold,
+      filterIdentifiers,
+    });
+
+    // Post-process to fix scores and contextTexts from the generic reranker.
+    const result = {
+      contextTexts: [],
+      sourceDocuments: [],
+      scores: [],
+    };
+
+    rerankedDocs.sourceDocuments.forEach((item) => {
+      if (item.rerank_score < similarityThreshold) return;
+      const { rerank_score, ...rest } = item;
+      result.sourceDocuments.push({ ...rest, score: rerank_score });
+      result.contextTexts.push(item.text);
+      result.scores.push(rerank_score);
+    });
+    return result;
+  },
   namespace: async function (client, namespace = null) {
     if (!namespace) throw new Error("No namespace value provided.");
     const collection = await client
@@ -348,12 +395,14 @@ const Chroma = {
     similarityThreshold = 0.25,
     topN = 4,
     filterIdentifiers = [],
+    rerank = false,
   }) {
     if (!namespace || !input || !LLMConnector)
       throw new Error("Invalid request to performSimilaritySearch.");
 
     const { client } = await this.connect();
-    if (!(await this.namespaceExists(client, this.normalize(namespace)))) {
+    const collectionName = this.normalize(namespace);
+    if (!(await this.namespaceExists(client, collectionName))) {
       return {
         contextTexts: [],
         sources: [],
@@ -362,16 +411,26 @@ const Chroma = {
     }
 
     const queryVector = await LLMConnector.embedTextInput(input);
-    const { contextTexts, sourceDocuments, scores } =
-      await this.similarityResponse({
-        client,
-        namespace,
-        queryVector,
-        similarityThreshold,
-        topN,
-        filterIdentifiers,
-      });
+    const result = rerank
+      ? await this.rerankedSimilarityResponse({
+          client,
+          namespace,
+          query: input,
+          queryVector,
+          similarityThreshold,
+          topN,
+          filterIdentifiers,
+        })
+      : await this.similarityResponse({
+          client,
+          namespace,
+          queryVector,
+          similarityThreshold,
+          topN,
+          filterIdentifiers,
+        });
 
+    const { contextTexts, sourceDocuments, scores } = result;
     const sources = sourceDocuments.map((metadata, i) => ({
       metadata: {
         ...metadata,
diff --git a/server/utils/vectorDbProviders/lance/index.js b/server/utils/vectorDbProviders/lance/index.js
index 563095fe5db..b8d50e9a032 100644
--- a/server/utils/vectorDbProviders/lance/index.js
+++ b/server/utils/vectorDbProviders/lance/index.js
@@ -5,7 +5,7 @@ const { SystemSettings } = require("../../../models/systemSettings");
 const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { v4: uuidv4 } = require("uuid");
 const { sourceIdentifier } = require("../../chats");
-const { NativeEmbeddingReranker } = require("../../EmbeddingRerankers/native");
+const { rerankDocuments, getSearchLimit } = require("../rerank");
 
 /**
  * LancedDB Client connection object
@@ -79,68 +79,24 @@ const LanceDb = {
     similarityThreshold = 0.25,
     filterIdentifiers = [],
   }) {
-    const reranker = new NativeEmbeddingReranker();
-    const collection = await client.openTable(namespace);
     const totalEmbeddings = await this.namespaceCount(namespace);
-    const result = {
-      contextTexts: [],
-      sourceDocuments: [],
-      scores: [],
-    };
-
-    /**
-     * For reranking, we want to work with a larger number of results than the topN.
-     * This is because the reranker can only rerank the results it it given and we dont auto-expand the results.
-     * We want to give the reranker a larger number of results to work with.
-     *
-     * However, we cannot make this boundless as reranking is expensive and time consuming.
-     * So we limit the number of results to a maximum of 50 and a minimum of 10.
-     * This is a good balance between the number of results to rerank and the cost of reranking
-     * and ensures workspaces with 10K embeddings will still rerank within a reasonable timeframe on base level hardware.
-     *
-     * Benchmarks:
-     * On Intel Mac: 2.6 GHz 6-Core Intel Core i7 - 20 docs reranked in ~5.2 sec
-     */
-    const searchLimit = Math.max(
-      10,
-      Math.min(50, Math.ceil(totalEmbeddings * 0.1))
-    );
-    const vectorSearchResults = await collection
-      .vectorSearch(queryVector)
-      .distanceType("cosine")
-      .limit(searchLimit)
-      .toArray();
-
-    await reranker
-      .rerank(query, vectorSearchResults, { topK: topN })
-      .then((rerankResults) => {
-        rerankResults.forEach((item) => {
-          if (this.distanceToSimilarity(item._distance) < similarityThreshold)
-            return;
-          const { vector: _, ...rest } = item;
-          if (filterIdentifiers.includes(sourceIdentifier(rest))) {
-            console.log(
-              "LanceDB: A source was filtered from context as it's parent document is pinned."
-            );
-            return;
-          }
-          const score =
-            item?.rerank_score || this.distanceToSimilarity(item._distance);
-
-          result.contextTexts.push(rest.text);
-          result.sourceDocuments.push({
-            ...rest,
-            score,
-          });
-          result.scores.push(score);
-        });
-      })
-      .catch((e) => {
-        console.error(e);
-        console.error("LanceDB::rerankedSimilarityResponse", e.message);
-      });
+    const searchLimit = getSearchLimit(totalEmbeddings, topN);
+    const vectorSearchResults = await client
+      .openTable(namespace)
+      .then((tbl) =>
+        tbl
+          .vectorSearch(queryVector)
+          .distanceType("cosine")
+          .limit(searchLimit)
+          .toArray()
+      );
 
-    return result;
+    const reranked = await rerankDocuments(query, vectorSearchResults, {
+      topN,
+      similarityThreshold,
+      filterIdentifiers,
+    });
+    return reranked;
   },
 
   /**
@@ -421,6 +377,8 @@ const LanceDb = {
           filterIdentifiers,
         });
 
+    console.log("result", result);
+
     const { contextTexts, sourceDocuments } = result;
     const sources = sourceDocuments.map((metadata, i) => {
       return { metadata: { ...metadata, text: contextTexts[i] } };
diff --git a/server/utils/vectorDbProviders/milvus/index.js b/server/utils/vectorDbProviders/milvus/index.js
index 2ddaad567bb..f8e4ec183f8 100644
--- a/server/utils/vectorDbProviders/milvus/index.js
+++ b/server/utils/vectorDbProviders/milvus/index.js
@@ -10,6 +10,7 @@ const { v4: uuidv4 } = require("uuid");
 const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { sourceIdentifier } = require("../../chats");
+const { rerankDocuments, getSearchLimit } = require("../rerank");
 
 const Milvus = {
   name: "Milvus",
@@ -299,6 +300,7 @@ const Milvus = {
     similarityThreshold = 0.25,
     topN = 4,
     filterIdentifiers = [],
+    rerank = false,
   }) {
     if (!namespace || !input || !LLMConnector)
       throw new Error("Invalid request to performSimilaritySearch.");
@@ -313,14 +315,24 @@ const Milvus = {
     }
 
     const queryVector = await LLMConnector.embedTextInput(input);
-    const { contextTexts, sourceDocuments } = await this.similarityResponse({
-      client,
-      namespace,
-      queryVector,
-      similarityThreshold,
-      topN,
-      filterIdentifiers,
-    });
+    const { contextTexts, sourceDocuments } = rerank
+      ? await this.rerankedSimilarityResponse({
+          client,
+          namespace,
+          query: input,
+          queryVector,
+          similarityThreshold,
+          topN,
+          filterIdentifiers,
+        })
+      : await this.similarityResponse({
+          client,
+          namespace,
+          queryVector,
+          similarityThreshold,
+          topN,
+          filterIdentifiers,
+        });
 
     const sources = sourceDocuments.map((doc, i) => {
       return { metadata: doc, text: contextTexts[i] };
@@ -368,6 +380,31 @@ const Milvus = {
     });
     return result;
   },
+  rerankedSimilarityResponse: async function ({
+    client,
+    namespace,
+    query,
+    queryVector,
+    topN = 4,
+    similarityThreshold = 0.25,
+    filterIdentifiers = [],
+  }) {
+    const totalEmbeddings = await this.namespaceCount(namespace);
+    const searchLimit = getSearchLimit(totalEmbeddings, topN);
+    const { sourceDocuments } = await this.similarityResponse({
+      client,
+      namespace,
+      queryVector,
+      similarityThreshold,
+      topN: searchLimit,
+      filterIdentifiers,
+    });
+    return await rerankDocuments(query, sourceDocuments, {
+      topN,
+      similarityThreshold,
+      filterIdentifiers,
+    });
+  },
   "namespace-stats": async function (reqBody = {}) {
     const { namespace = null } = reqBody;
     if (!namespace) throw new Error("namespace required");
diff --git a/server/utils/vectorDbProviders/pinecone/index.js b/server/utils/vectorDbProviders/pinecone/index.js
index c5c55acb58c..6329753d433 100644
--- a/server/utils/vectorDbProviders/pinecone/index.js
+++ b/server/utils/vectorDbProviders/pinecone/index.js
@@ -5,6 +5,7 @@ const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { v4: uuidv4 } = require("uuid");
 const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { sourceIdentifier } = require("../../chats");
+const { rerankDocuments, getSearchLimit } = require("../rerank");
 
 const PineconeDB = {
   name: "Pinecone",
@@ -76,6 +77,31 @@ const PineconeDB = {
 
     return result;
   },
+  rerankedSimilarityResponse: async function ({
+    client,
+    namespace,
+    query,
+    queryVector,
+    topN = 4,
+    similarityThreshold = 0.25,
+    filterIdentifiers = [],
+  }) {
+    const totalEmbeddings = await this.namespaceCount(namespace);
+    const searchLimit = getSearchLimit(totalEmbeddings, topN);
+    const { sourceDocuments } = await this.similarityResponse({
+      client,
+      namespace,
+      queryVector,
+      similarityThreshold,
+      topN: searchLimit,
+      filterIdentifiers,
+    });
+    return await rerankDocuments(query, sourceDocuments, {
+      topN,
+      similarityThreshold,
+      filterIdentifiers,
+    });
+  },
   namespace: async function (index, namespace = null) {
     if (!namespace) throw new Error("No namespace value provided.");
     const { namespaces } = await index.describeIndexStats();
@@ -247,6 +273,7 @@ const PineconeDB = {
     similarityThreshold = 0.25,
     topN = 4,
     filterIdentifiers = [],
+    rerank = false,
   }) {
     if (!namespace || !input || !LLMConnector)
       throw new Error("Invalid request to performSimilaritySearch.");
@@ -258,14 +285,24 @@ const PineconeDB = {
       );
 
     const queryVector = await LLMConnector.embedTextInput(input);
-    const { contextTexts, sourceDocuments } = await this.similarityResponse({
-      client: pineconeIndex,
-      namespace,
-      queryVector,
-      similarityThreshold,
-      topN,
-      filterIdentifiers,
-    });
+    const { contextTexts, sourceDocuments } = rerank
+      ? await this.rerankedSimilarityResponse({
+          client: pineconeIndex,
+          namespace,
+          query: input,
+          queryVector,
+          similarityThreshold,
+          topN,
+          filterIdentifiers,
+        })
+      : await this.similarityResponse({
+          client: pineconeIndex,
+          namespace,
+          queryVector,
+          similarityThreshold,
+          topN,
+          filterIdentifiers,
+        });
 
     const sources = sourceDocuments.map((doc, i) => {
       return { metadata: doc, text: contextTexts[i] };
diff --git a/server/utils/vectorDbProviders/qdrant/index.js b/server/utils/vectorDbProviders/qdrant/index.js
index 50fe5fab36e..b086c863f14 100644
--- a/server/utils/vectorDbProviders/qdrant/index.js
+++ b/server/utils/vectorDbProviders/qdrant/index.js
@@ -5,6 +5,7 @@ const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { v4: uuidv4 } = require("uuid");
 const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { sourceIdentifier } = require("../../chats");
+const { rerankDocuments, getSearchLimit } = require("../rerank");
 
 const QDrant = {
   name: "QDrant",
@@ -86,6 +87,35 @@ const QDrant = {
 
     return result;
   },
+  rerankedSimilarityResponse: async function ({
+    client,
+    namespace,
+    query,
+    queryVector,
+    topN = 4,
+    similarityThreshold = 0.25,
+    filterIdentifiers = [],
+  }) {
+    const totalEmbeddings = await this.namespaceCount(namespace);
+    const searchLimit = getSearchLimit(totalEmbeddings, topN);
+    const { sourceDocuments } = await this.similarityResponse({
+      client,
+      namespace,
+      queryVector,
+      similarityThreshold,
+      topN: searchLimit,
+      filterIdentifiers,
+    });
+    return await rerankDocuments(
+      query,
+      sourceDocuments.map((doc) => ({ ...doc, score: null })),
+      {
+        topN,
+        similarityThreshold,
+        filterIdentifiers,
+      }
+    );
+  },
   namespace: async function (client, namespace = null) {
     if (!namespace) throw new Error("No namespace value provided.");
     const collection = await client.getCollection(namespace).catch(() => null);
@@ -324,6 +354,7 @@ const QDrant = {
     similarityThreshold = 0.25,
     topN = 4,
     filterIdentifiers = [],
+    rerank = false,
   }) {
     if (!namespace || !input || !LLMConnector)
       throw new Error("Invalid request to performSimilaritySearch.");
@@ -338,14 +369,24 @@ const QDrant = {
     }
 
     const queryVector = await LLMConnector.embedTextInput(input);
-    const { contextTexts, sourceDocuments } = await this.similarityResponse({
-      client,
-      namespace,
-      queryVector,
-      similarityThreshold,
-      topN,
-      filterIdentifiers,
-    });
+    const { contextTexts, sourceDocuments } = rerank
+      ? await this.rerankedSimilarityResponse({
+          client,
+          namespace,
+          query: input,
+          queryVector,
+          similarityThreshold,
+          topN,
+          filterIdentifiers,
+        })
+      : await this.similarityResponse({
+          client,
+          namespace,
+          queryVector,
+          similarityThreshold,
+          topN,
+          filterIdentifiers,
+        });
 
     const sources = sourceDocuments.map((metadata, i) => {
       return { ...metadata, text: contextTexts[i] };
diff --git a/server/utils/vectorDbProviders/rerank.js b/server/utils/vectorDbProviders/rerank.js
new file mode 100644
index 00000000000..dacee5db72a
--- /dev/null
+++ b/server/utils/vectorDbProviders/rerank.js
@@ -0,0 +1,65 @@
+const { NativeEmbeddingReranker } = require("../EmbeddingRerankers/native");
+const { sourceIdentifier } = require("../chats");
+
+async function rerankDocuments(
+  query,
+  documents,
+  options = { topN: 4, similarityThreshold: 0.25, filterIdentifiers: [] }
+) {
+  const { topN, similarityThreshold, filterIdentifiers } = options;
+  const reranker = new NativeEmbeddingReranker();
+  const result = {
+    contextTexts: [],
+    sourceDocuments: [],
+    scores: [],
+  };
+
+  await reranker
+    .rerank(query, documents, { topK: topN })
+    .then((rerankResults) => {
+      rerankResults.forEach((item) => {
+        if (item.score < similarityThreshold) return;
+
+        const { vector: _, ...rest } = item;
+        if (filterIdentifiers.includes(sourceIdentifier(rest))) {
+          console.log(
+            "A source was filtered from context as it's parent document is pinned."
+          );
+          return;
+        }
+
+        result.contextTexts.push(rest.text);
+        result.sourceDocuments.push({
+          ...rest,
+        });
+        result.scores.push(item.score);
+      });
+    })
+    .catch((e) => {
+      console.error(e);
+      console.error("rerankDocuments", e.message);
+    });
+
+  return result;
+}
+/**
+ * For reranking, we want to work with a larger number of results than the topN.
+ * This is because the reranker can only rerank the results it it given and we dont auto-expand the results.
+ * We want to give the reranker a larger number of results to work with.
+ *
+ * However, we cannot make this boundless as reranking is expensive and time consuming.
+ * So we limit the number of results to a maximum of 50 and a minimum of 10.
+ * This is a good balance between the number of results to rerank and the cost of reranking
+ * and ensures workspaces with 10K embeddings will still rerank within a reasonable timeframe on base level hardware.
+ *
+ * Benchmarks:
+ * On Intel Mac: 2.6 GHz 6-Core Intel Core i7 - 20 docs reranked in ~5.2 sec
+ */
+function getSearchLimit(totalEmbeddings = 0, topN = 4) {
+  return Math.max(10, Math.min(50, Math.ceil(totalEmbeddings * 0.1) || topN));
+}
+
+module.exports = {
+  rerankDocuments,
+  getSearchLimit,
+};
diff --git a/server/utils/vectorDbProviders/weaviate/index.js b/server/utils/vectorDbProviders/weaviate/index.js
index 2385c5e8ef1..431ba7ed2c1 100644
--- a/server/utils/vectorDbProviders/weaviate/index.js
+++ b/server/utils/vectorDbProviders/weaviate/index.js
@@ -6,6 +6,7 @@ const { v4: uuidv4 } = require("uuid");
 const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { camelCase } = require("../../helpers/camelcase");
 const { sourceIdentifier } = require("../../chats");
+const { rerankDocuments, getSearchLimit } = require("../rerank");
 
 const Weaviate = {
   name: "Weaviate",
@@ -121,6 +122,31 @@ const Weaviate = {
 
     return result;
   },
+  rerankedSimilarityResponse: async function ({
+    client,
+    namespace,
+    query,
+    queryVector,
+    topN = 4,
+    similarityThreshold = 0.25,
+    filterIdentifiers = [],
+  }) {
+    const totalEmbeddings = await this.namespaceCount(namespace);
+    const searchLimit = getSearchLimit(totalEmbeddings, topN);
+    const { sourceDocuments } = await this.similarityResponse({
+      client,
+      namespace,
+      queryVector,
+      similarityThreshold,
+      topN: searchLimit,
+      filterIdentifiers,
+    });
+    return await rerankDocuments(query, sourceDocuments, {
+      topN,
+      similarityThreshold,
+      filterIdentifiers,
+    });
+  },
   allNamespaces: async function (client) {
     try {
       const { classes = [] } = await client.schema.getter().do();
@@ -368,6 +394,7 @@ const Weaviate = {
     similarityThreshold = 0.25,
     topN = 4,
     filterIdentifiers = [],
+    rerank = false,
   }) {
     if (!namespace || !input || !LLMConnector)
       throw new Error("Invalid request to performSimilaritySearch.");
@@ -382,14 +409,24 @@ const Weaviate = {
     }
 
     const queryVector = await LLMConnector.embedTextInput(input);
-    const { contextTexts, sourceDocuments } = await this.similarityResponse({
-      client,
-      namespace,
-      queryVector,
-      similarityThreshold,
-      topN,
-      filterIdentifiers,
-    });
+    const { contextTexts, sourceDocuments } = rerank
+      ? await this.rerankedSimilarityResponse({
+          client,
+          namespace,
+          query: input,
+          queryVector,
+          similarityThreshold,
+          topN,
+          filterIdentifiers,
+        })
+      : await this.similarityResponse({
+          client,
+          namespace,
+          queryVector,
+          similarityThreshold,
+          topN,
+          filterIdentifiers,
+        });
 
     const sources = sourceDocuments.map((metadata, i) => {
       return { ...metadata, text: contextTexts[i] };
diff --git a/server/utils/vectorDbProviders/zilliz/index.js b/server/utils/vectorDbProviders/zilliz/index.js
index ab866f4edd5..0aef7c2ec35 100644
--- a/server/utils/vectorDbProviders/zilliz/index.js
+++ b/server/utils/vectorDbProviders/zilliz/index.js
@@ -10,6 +10,7 @@ const { v4: uuidv4 } = require("uuid");
 const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { sourceIdentifier } = require("../../chats");
+const { rerankDocuments, getSearchLimit } = require("../rerank");
 
 // Zilliz is basically a copy of Milvus DB class with a different constructor
 // to connect to the cloud
@@ -292,6 +293,7 @@ const Zilliz = {
     similarityThreshold = 0.25,
     topN = 4,
     filterIdentifiers = [],
+    rerank = false,
   }) {
     if (!namespace || !input || !LLMConnector)
       throw new Error("Invalid request to performSimilaritySearch.");
@@ -306,14 +308,24 @@ const Zilliz = {
     }
 
     const queryVector = await LLMConnector.embedTextInput(input);
-    const { contextTexts, sourceDocuments } = await this.similarityResponse({
-      client,
-      namespace,
-      queryVector,
-      similarityThreshold,
-      topN,
-      filterIdentifiers,
-    });
+    const { contextTexts, sourceDocuments } = rerank
+      ? await this.rerankedSimilarityResponse({
+          client,
+          namespace,
+          query: input,
+          queryVector,
+          similarityThreshold,
+          topN,
+          filterIdentifiers,
+        })
+      : await this.similarityResponse({
+          client,
+          namespace,
+          queryVector,
+          similarityThreshold,
+          topN,
+          filterIdentifiers,
+        });
 
     const sources = sourceDocuments.map((doc, i) => {
       return { metadata: doc, text: contextTexts[i] };
@@ -359,6 +371,31 @@ const Zilliz = {
     });
     return result;
   },
+  rerankedSimilarityResponse: async function ({
+    client,
+    namespace,
+    query,
+    queryVector,
+    topN = 4,
+    similarityThreshold = 0.25,
+    filterIdentifiers = [],
+  }) {
+    const totalEmbeddings = await this.namespaceCount(namespace);
+    const searchLimit = getSearchLimit(totalEmbeddings, topN);
+    const { sourceDocuments } = await this.similarityResponse({
+      client,
+      namespace,
+      queryVector,
+      similarityThreshold,
+      topN: searchLimit,
+      filterIdentifiers,
+    });
+    return await rerankDocuments(query, sourceDocuments, {
+      topN,
+      similarityThreshold,
+      filterIdentifiers,
+    });
+  },
   "namespace-stats": async function (reqBody = {}) {
     const { namespace = null } = reqBody;
     if (!namespace) throw new Error("namespace required");

From 9b717b2fc64e02f04bc6098da0f33d4c6df0659f Mon Sep 17 00:00:00 2001
From: shatfield4 <seanhatfield5@gmail.com>
Date: Thu, 25 Sep 2025 18:34:15 -0700
Subject: [PATCH 2/7] simplify rerank.js + fix chroma/lance db reranking

---
 .../utils/vectorDbProviders/chroma/index.js   | 23 +++++-----
 server/utils/vectorDbProviders/lance/index.js | 36 +++++++++++----
 server/utils/vectorDbProviders/rerank.js      | 45 ++-----------------
 3 files changed, 43 insertions(+), 61 deletions(-)

diff --git a/server/utils/vectorDbProviders/chroma/index.js b/server/utils/vectorDbProviders/chroma/index.js
index 1f4468d0e2c..6adc42c42d1 100644
--- a/server/utils/vectorDbProviders/chroma/index.js
+++ b/server/utils/vectorDbProviders/chroma/index.js
@@ -6,7 +6,7 @@ const { v4: uuidv4 } = require("uuid");
 const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { parseAuthHeader } = require("../../http");
 const { sourceIdentifier } = require("../../chats");
-const { rerankDocuments, getSearchLimit } = require("../rerank");
+const { rerank, getSearchLimit } = require("../rerank");
 const COLLECTION_REGEX = new RegExp(
   /^(?!\d+\.\d+\.\d+\.\d+$)(?!.*\.\.)(?=^[a-zA-Z0-9][a-zA-Z0-9_-]{1,61}[a-zA-Z0-9]$).{3,63}$/
 );
@@ -175,24 +175,23 @@ const Chroma = {
       text: contextTexts[i],
     }));
 
-    const rerankedDocs = await rerankDocuments(query, documentsForReranking, {
-      topN,
-      similarityThreshold,
-      filterIdentifiers,
-    });
-
-    // Post-process to fix scores and contextTexts from the generic reranker.
+    const rerankedResults = await rerank(query, documentsForReranking, topN);
     const result = {
       contextTexts: [],
       sourceDocuments: [],
       scores: [],
     };
 
-    rerankedDocs.sourceDocuments.forEach((item) => {
+    rerankedResults.forEach((item) => {
       if (item.rerank_score < similarityThreshold) return;
-      const { rerank_score, ...rest } = item;
-      result.sourceDocuments.push({ ...rest, score: rerank_score });
-      result.contextTexts.push(item.text);
+      const { vector: _, rerank_score, ...rest } = item;
+      if (filterIdentifiers.includes(sourceIdentifier(rest))) return;
+
+      result.contextTexts.push(rest.text);
+      result.sourceDocuments.push({
+        ...rest,
+        score: rerank_score,
+      });
       result.scores.push(rerank_score);
     });
     return result;
diff --git a/server/utils/vectorDbProviders/lance/index.js b/server/utils/vectorDbProviders/lance/index.js
index b8d50e9a032..68ff7f21b67 100644
--- a/server/utils/vectorDbProviders/lance/index.js
+++ b/server/utils/vectorDbProviders/lance/index.js
@@ -5,7 +5,7 @@ const { SystemSettings } = require("../../../models/systemSettings");
 const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { v4: uuidv4 } = require("uuid");
 const { sourceIdentifier } = require("../../chats");
-const { rerankDocuments, getSearchLimit } = require("../rerank");
+const { rerank, getSearchLimit } = require("../rerank");
 
 /**
  * LancedDB Client connection object
@@ -91,12 +91,34 @@ const LanceDb = {
           .toArray()
       );
 
-    const reranked = await rerankDocuments(query, vectorSearchResults, {
-      topN,
-      similarityThreshold,
-      filterIdentifiers,
+    const rerankedResults = await rerank(query, vectorSearchResults, topN);
+    const result = {
+      contextTexts: [],
+      sourceDocuments: [],
+      scores: [],
+    };
+
+    rerankedResults.forEach((item) => {
+      if (this.distanceToSimilarity(item._distance) < similarityThreshold)
+        return;
+      const { vector: _, ...rest } = item;
+      if (filterIdentifiers.includes(sourceIdentifier(rest))) {
+        console.log(
+          "LanceDB: A source was filtered from context as it's parent document is pinned."
+        );
+        return;
+      }
+      const score =
+        item?.rerank_score || this.distanceToSimilarity(item._distance);
+
+      result.contextTexts.push(rest.text);
+      result.sourceDocuments.push({
+        ...rest,
+        score,
+      });
+      result.scores.push(score);
     });
-    return reranked;
+    return result;
   },
 
   /**
@@ -377,8 +399,6 @@ const LanceDb = {
           filterIdentifiers,
         });
 
-    console.log("result", result);
-
     const { contextTexts, sourceDocuments } = result;
     const sources = sourceDocuments.map((metadata, i) => {
       return { metadata: { ...metadata, text: contextTexts[i] } };
diff --git a/server/utils/vectorDbProviders/rerank.js b/server/utils/vectorDbProviders/rerank.js
index dacee5db72a..98d1e810322 100644
--- a/server/utils/vectorDbProviders/rerank.js
+++ b/server/utils/vectorDbProviders/rerank.js
@@ -1,47 +1,10 @@
 const { NativeEmbeddingReranker } = require("../EmbeddingRerankers/native");
-const { sourceIdentifier } = require("../chats");
 
-async function rerankDocuments(
-  query,
-  documents,
-  options = { topN: 4, similarityThreshold: 0.25, filterIdentifiers: [] }
-) {
-  const { topN, similarityThreshold, filterIdentifiers } = options;
+async function rerank(query, documents, topN = 4) {
   const reranker = new NativeEmbeddingReranker();
-  const result = {
-    contextTexts: [],
-    sourceDocuments: [],
-    scores: [],
-  };
-
-  await reranker
-    .rerank(query, documents, { topK: topN })
-    .then((rerankResults) => {
-      rerankResults.forEach((item) => {
-        if (item.score < similarityThreshold) return;
-
-        const { vector: _, ...rest } = item;
-        if (filterIdentifiers.includes(sourceIdentifier(rest))) {
-          console.log(
-            "A source was filtered from context as it's parent document is pinned."
-          );
-          return;
-        }
-
-        result.contextTexts.push(rest.text);
-        result.sourceDocuments.push({
-          ...rest,
-        });
-        result.scores.push(item.score);
-      });
-    })
-    .catch((e) => {
-      console.error(e);
-      console.error("rerankDocuments", e.message);
-    });
-
-  return result;
+  return await reranker.rerank(query, documents, { topK: topN });
 }
+
 /**
  * For reranking, we want to work with a larger number of results than the topN.
  * This is because the reranker can only rerank the results it it given and we dont auto-expand the results.
@@ -60,6 +23,6 @@ function getSearchLimit(totalEmbeddings = 0, topN = 4) {
 }
 
 module.exports = {
-  rerankDocuments,
+  rerank,
   getSearchLimit,
 };

From 72e1aec0f17f9d18c650044a00e4046260d11caa Mon Sep 17 00:00:00 2001
From: shatfield4 <seanhatfield5@gmail.com>
Date: Fri, 26 Sep 2025 16:38:44 -0700
Subject: [PATCH 3/7] fix scores + reranking for milvus, zilliz, astra
 providers

---
 server/utils/vectorDbProviders/astra/index.js | 49 +++++++----
 .../utils/vectorDbProviders/milvus/index.js   | 26 ++++--
 .../utils/vectorDbProviders/zilliz/index.js   | 81 ++++++++++++-------
 3 files changed, 106 insertions(+), 50 deletions(-)

diff --git a/server/utils/vectorDbProviders/astra/index.js b/server/utils/vectorDbProviders/astra/index.js
index 8e0331df3be..2d25d2d7038 100644
--- a/server/utils/vectorDbProviders/astra/index.js
+++ b/server/utils/vectorDbProviders/astra/index.js
@@ -5,7 +5,7 @@ const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { v4: uuidv4 } = require("uuid");
 const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { sourceIdentifier } = require("../../chats");
-const { rerankDocuments, getSearchLimit } = require("../rerank");
+const { rerank, getSearchLimit } = require("../rerank");
 
 const sanitizeNamespace = (namespace) => {
   // If namespace already starts with ns_, don't add it again
@@ -340,9 +340,10 @@ const AstraDB = {
           filterIdentifiers,
         });
 
-    const sources = sourceDocuments.map((metadata, i) => {
-      return { ...metadata, text: contextTexts[i] };
+    const sources = sourceDocuments.map((doc, i) => {
+      return { metadata: doc, text: contextTexts[i] };
     });
+    console.log("sources", this.curateSources(sources));
     return {
       contextTexts,
       sources: this.curateSources(sources),
@@ -385,7 +386,10 @@ const AstraDB = {
         return;
       }
       result.contextTexts.push(response.metadata.text);
-      result.sourceDocuments.push(response);
+      result.sourceDocuments.push({
+        ...response.metadata,
+        score: response.$similarity,
+      });
       result.scores.push(response.$similarity);
     });
     return result;
@@ -409,15 +413,27 @@ const AstraDB = {
       topN: searchLimit,
       filterIdentifiers,
     });
-    return await rerankDocuments(
-      query,
-      sourceDocuments.map((doc) => ({ ...doc.metadata, score: null })),
-      {
-        topN,
-        similarityThreshold,
-        filterIdentifiers,
-      }
-    );
+
+    const rerankedResults = await rerank(query, sourceDocuments, topN);
+    const result = {
+      contextTexts: [],
+      sourceDocuments: [],
+      scores: [],
+    };
+
+    rerankedResults.forEach((item) => {
+      if (item.rerank_score < similarityThreshold) return;
+      const { rerank_score, ...rest } = item;
+      if (filterIdentifiers.includes(sourceIdentifier(rest))) return;
+
+      result.contextTexts.push(rest.text);
+      result.sourceDocuments.push({
+        ...rest,
+        score: rerank_score,
+      });
+      result.scores.push(rerank_score);
+    });
+    return result;
   },
   allNamespaces: async function (client) {
     try {
@@ -473,12 +489,11 @@ const AstraDB = {
   curateSources: function (sources = []) {
     const documents = [];
     for (const source of sources) {
-      if (Object.keys(source).length > 0) {
-        const metadata = source.hasOwnProperty("metadata")
-          ? source.metadata
-          : source;
+      const { metadata = {} } = source;
+      if (Object.keys(metadata).length > 0) {
         documents.push({
           ...metadata,
+          ...(source.text ? { text: source.text } : {}),
         });
       }
     }
diff --git a/server/utils/vectorDbProviders/milvus/index.js b/server/utils/vectorDbProviders/milvus/index.js
index f8e4ec183f8..fe0c9a3e6e0 100644
--- a/server/utils/vectorDbProviders/milvus/index.js
+++ b/server/utils/vectorDbProviders/milvus/index.js
@@ -10,7 +10,7 @@ const { v4: uuidv4 } = require("uuid");
 const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { sourceIdentifier } = require("../../chats");
-const { rerankDocuments, getSearchLimit } = require("../rerank");
+const { rerank, getSearchLimit } = require("../rerank");
 
 const Milvus = {
   name: "Milvus",
@@ -399,11 +399,27 @@ const Milvus = {
       topN: searchLimit,
       filterIdentifiers,
     });
-    return await rerankDocuments(query, sourceDocuments, {
-      topN,
-      similarityThreshold,
-      filterIdentifiers,
+
+    const rerankedResults = await rerank(query, sourceDocuments, topN);
+    const result = {
+      contextTexts: [],
+      sourceDocuments: [],
+      scores: [],
+    };
+
+    rerankedResults.forEach((item) => {
+      if (item.rerank_score < similarityThreshold) return;
+      const { rerank_score, ...rest } = item;
+      if (filterIdentifiers.includes(sourceIdentifier(rest))) return;
+
+      result.contextTexts.push(rest.text);
+      result.sourceDocuments.push({
+        ...rest,
+        score: rerank_score,
+      });
+      result.scores.push(rerank_score);
     });
+    return result;
   },
   "namespace-stats": async function (reqBody = {}) {
     const { namespace = null } = reqBody;
diff --git a/server/utils/vectorDbProviders/zilliz/index.js b/server/utils/vectorDbProviders/zilliz/index.js
index 0aef7c2ec35..dff29e17a82 100644
--- a/server/utils/vectorDbProviders/zilliz/index.js
+++ b/server/utils/vectorDbProviders/zilliz/index.js
@@ -10,7 +10,7 @@ const { v4: uuidv4 } = require("uuid");
 const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { sourceIdentifier } = require("../../chats");
-const { rerankDocuments, getSearchLimit } = require("../rerank");
+const { rerank, getSearchLimit } = require("../rerank");
 
 // Zilliz is basically a copy of Milvus DB class with a different constructor
 // to connect to the cloud
@@ -158,30 +158,38 @@ const Zilliz = {
           vectorDimension = chunks[0][0].values.length || null;
 
           await this.getOrCreateCollection(client, namespace, vectorDimension);
-          for (const chunk of chunks) {
-            // Before sending to Pinecone and saving the records to our db
-            // we need to assign the id of each chunk that is stored in the cached file.
-            const newChunks = chunk.map((chunk) => {
-              const id = uuidv4();
-              documentVectors.push({ docId, vectorId: id });
-              return { id, vector: chunk.values, metadata: chunk.metadata };
-            });
-            const insertResult = await client.insert({
-              collection_name: this.normalize(namespace),
-              data: newChunks,
-            });
+          try {
+            for (const chunk of chunks) {
+              // Before sending to Zilliz and saving the records to our db
+              // we need to assign the id of each chunk that is stored in the cached file.
+              const newChunks = chunk.map((chunk) => {
+                const id = uuidv4();
+                documentVectors.push({ docId, vectorId: id });
+                return { id, vector: chunk.values, metadata: chunk.metadata };
+              });
+              const insertResult = await client.insert({
+                collection_name: this.normalize(namespace),
+                data: newChunks,
+              });
 
-            if (insertResult?.status.error_code !== "Success") {
-              throw new Error(
-                `Error embedding into Zilliz! Reason:${insertResult?.status.reason}`
-              );
+              if (insertResult?.status.error_code !== "Success") {
+                throw new Error(
+                  `Error embedding into Milvus! Reason:${insertResult?.status.reason}`
+                );
+              }
             }
+            await DocumentVectors.bulkInsert(documentVectors);
+            await client.flushSync({
+              collection_names: [this.normalize(namespace)],
+            });
+            return { vectorized: true, error: null };
+          } catch (insertError) {
+            console.error(
+              "Error inserting cached chunks:",
+              insertError.message
+            );
+            return { vectorized: false, error: insertError.message };
           }
-          await DocumentVectors.bulkInsert(documentVectors);
-          await client.flushSync({
-            collection_names: [this.normalize(namespace)],
-          });
-          return { vectorized: true, error: null };
         }
       }
 
@@ -240,7 +248,7 @@ const Zilliz = {
             data: chunk.map((item) => ({
               id: item.id,
               vector: item.values,
-              metadata: chunk.metadata,
+              metadata: item.metadata,
             })),
           });
 
@@ -330,6 +338,7 @@ const Zilliz = {
     const sources = sourceDocuments.map((doc, i) => {
       return { metadata: doc, text: contextTexts[i] };
     });
+
     return {
       contextTexts,
       sources: this.curateSources(sources),
@@ -362,6 +371,7 @@ const Zilliz = {
         );
         return;
       }
+
       result.contextTexts.push(match.metadata.text);
       result.sourceDocuments.push({
         ...match.metadata,
@@ -390,11 +400,27 @@ const Zilliz = {
       topN: searchLimit,
       filterIdentifiers,
     });
-    return await rerankDocuments(query, sourceDocuments, {
-      topN,
-      similarityThreshold,
-      filterIdentifiers,
+
+    const rerankedResults = await rerank(query, sourceDocuments, topN);
+    const result = {
+      contextTexts: [],
+      sourceDocuments: [],
+      scores: [],
+    };
+
+    rerankedResults.forEach((item) => {
+      if (item.rerank_score < similarityThreshold) return;
+      const { rerank_score, ...rest } = item;
+      if (filterIdentifiers.includes(sourceIdentifier(rest))) return;
+
+      result.contextTexts.push(rest.text);
+      result.sourceDocuments.push({
+        ...rest,
+        score: rerank_score,
+      });
+      result.scores.push(rerank_score);
     });
+    return result;
   },
   "namespace-stats": async function (reqBody = {}) {
     const { namespace = null } = reqBody;
@@ -431,7 +457,6 @@ const Zilliz = {
         });
       }
     }
-
     return documents;
   },
 };

From 0f227ce283800bf5d4803fd0512f1db7d93df8a8 Mon Sep 17 00:00:00 2001
From: shatfield4 <seanhatfield5@gmail.com>
Date: Fri, 26 Sep 2025 17:17:00 -0700
Subject: [PATCH 4/7] pgvector reranking support

---
 .../utils/vectorDbProviders/pgvector/index.js | 115 +++++++++++++-----
 1 file changed, 85 insertions(+), 30 deletions(-)

diff --git a/server/utils/vectorDbProviders/pgvector/index.js b/server/utils/vectorDbProviders/pgvector/index.js
index d5c86907566..53c23b8bc1b 100644
--- a/server/utils/vectorDbProviders/pgvector/index.js
+++ b/server/utils/vectorDbProviders/pgvector/index.js
@@ -3,6 +3,7 @@ const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { TextSplitter } = require("../../TextSplitter");
 const { v4: uuidv4 } = require("uuid");
 const { sourceIdentifier } = require("../../chats");
+const { rerank, getSearchLimit } = require("../rerank");
 
 /*
  Embedding Table Schema (table name defined by user)
@@ -158,29 +159,31 @@ const PGVector = {
         }, PGVector.connectionTimeout);
       });
 
-      const connectionPromise = new Promise(async (resolve) => {
-        let pgClient = null;
-        try {
-          pgClient = this.client(connectionString);
-          await pgClient.connect();
-          const result = await pgClient.query(this.getTablesSql);
-
-          if (result.rows.length !== 0 && !!tableName) {
-            const tableExists = result.rows.some(
-              (row) => row.tablename === tableName
-            );
-            if (tableExists)
-              await this.validateExistingEmbeddingTableSchema(
-                pgClient,
-                tableName
+      const connectionPromise = new Promise((resolve) => {
+        (async () => {
+          let pgClient = null;
+          try {
+            pgClient = this.client(connectionString);
+            await pgClient.connect();
+            const result = await pgClient.query(this.getTablesSql);
+
+            if (result.rows.length !== 0 && !!tableName) {
+              const tableExists = result.rows.some(
+                (row) => row.tablename === tableName
               );
+              if (tableExists)
+                await this.validateExistingEmbeddingTableSchema(
+                  pgClient,
+                  tableName
+                );
+            }
+            resolve({ error: null, success: true });
+          } catch (err) {
+            resolve({ error: err.message, success: false });
+          } finally {
+            if (pgClient) await pgClient.end();
           }
-          resolve({ error: null, success: true });
-        } catch (err) {
-          resolve({ error: err.message, success: false });
-        } finally {
-          if (pgClient) await pgClient.end();
-        }
+        })();
       });
 
       // Race the connection attempt against the timeout
@@ -352,6 +355,48 @@ const PGVector = {
     return result;
   },
 
+  rerankedSimilarityResponse: async function ({
+    client,
+    namespace,
+    query,
+    queryVector,
+    topN = 4,
+    similarityThreshold = 0.25,
+    filterIdentifiers = [],
+  }) {
+    const totalEmbeddings = await this.namespaceCount(namespace);
+    const searchLimit = getSearchLimit(totalEmbeddings, topN);
+    const { sourceDocuments } = await this.similarityResponse({
+      client,
+      namespace,
+      queryVector,
+      similarityThreshold,
+      topN: searchLimit,
+      filterIdentifiers,
+    });
+
+    const rerankedResults = await rerank(query, sourceDocuments, topN);
+    const result = {
+      contextTexts: [],
+      sourceDocuments: [],
+      scores: [],
+    };
+
+    rerankedResults.forEach((item) => {
+      if (item.rerank_score < similarityThreshold) return;
+      const { rerank_score, ...rest } = item;
+      if (filterIdentifiers.includes(sourceIdentifier(rest))) return;
+
+      result.contextTexts.push(rest.text);
+      result.sourceDocuments.push({
+        ...rest,
+        score: rerank_score,
+      });
+      result.scores.push(rerank_score);
+    });
+    return result;
+  },
+
   normalizeVector: function (vector) {
     const magnitude = Math.sqrt(
       vector.reduce((sum, val) => sum + val * val, 0)
@@ -655,6 +700,7 @@ const PGVector = {
     similarityThreshold = 0.25,
     topN = 4,
     filterIdentifiers = [],
+    rerank = false,
   }) {
     let connection = null;
     if (!namespace || !input || !LLMConnector)
@@ -675,16 +721,25 @@ const PGVector = {
       }
 
       const queryVector = await LLMConnector.embedTextInput(input);
-      const result = await this.similarityResponse({
-        client: connection,
-        namespace,
-        queryVector,
-        similarityThreshold,
-        topN,
-        filterIdentifiers,
-      });
+      const { contextTexts, sourceDocuments } = rerank
+        ? await this.rerankedSimilarityResponse({
+            client: connection,
+            namespace,
+            query: input,
+            queryVector,
+            similarityThreshold,
+            topN,
+            filterIdentifiers,
+          })
+        : await this.similarityResponse({
+            client: connection,
+            namespace,
+            queryVector,
+            similarityThreshold,
+            topN,
+            filterIdentifiers,
+          });
 
-      const { contextTexts, sourceDocuments } = result;
       const sources = sourceDocuments.map((metadata, i) => {
         return { metadata: { ...metadata, text: contextTexts[i] } };
       });

From bbfc7b965c204164da27741e3090a239c64d4134 Mon Sep 17 00:00:00 2001
From: shatfield4 <seanhatfield5@gmail.com>
Date: Fri, 26 Sep 2025 17:38:14 -0700
Subject: [PATCH 5/7] support reranking for pinecone, qdrant, weaviate
 providers

---
 .../utils/vectorDbProviders/pinecone/index.js | 26 ++++++++---
 .../utils/vectorDbProviders/qdrant/index.js   | 44 ++++++++++++-------
 .../utils/vectorDbProviders/weaviate/index.js | 43 ++++++++++++------
 3 files changed, 79 insertions(+), 34 deletions(-)

diff --git a/server/utils/vectorDbProviders/pinecone/index.js b/server/utils/vectorDbProviders/pinecone/index.js
index 6329753d433..67a08b34ea0 100644
--- a/server/utils/vectorDbProviders/pinecone/index.js
+++ b/server/utils/vectorDbProviders/pinecone/index.js
@@ -5,7 +5,7 @@ const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { v4: uuidv4 } = require("uuid");
 const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { sourceIdentifier } = require("../../chats");
-const { rerankDocuments, getSearchLimit } = require("../rerank");
+const { rerank, getSearchLimit } = require("../rerank");
 
 const PineconeDB = {
   name: "Pinecone",
@@ -96,11 +96,27 @@ const PineconeDB = {
       topN: searchLimit,
       filterIdentifiers,
     });
-    return await rerankDocuments(query, sourceDocuments, {
-      topN,
-      similarityThreshold,
-      filterIdentifiers,
+
+    const rerankedResults = await rerank(query, sourceDocuments, topN);
+    const result = {
+      contextTexts: [],
+      sourceDocuments: [],
+      scores: [],
+    };
+
+    rerankedResults.forEach((item) => {
+      if (item.rerank_score < similarityThreshold) return;
+      const { rerank_score, ...rest } = item;
+      if (filterIdentifiers.includes(sourceIdentifier(rest))) return;
+
+      result.contextTexts.push(rest.text);
+      result.sourceDocuments.push({
+        ...rest,
+        score: rerank_score,
+      });
+      result.scores.push(rerank_score);
     });
+    return result;
   },
   namespace: async function (index, namespace = null) {
     if (!namespace) throw new Error("No namespace value provided.");
diff --git a/server/utils/vectorDbProviders/qdrant/index.js b/server/utils/vectorDbProviders/qdrant/index.js
index b086c863f14..285884e199f 100644
--- a/server/utils/vectorDbProviders/qdrant/index.js
+++ b/server/utils/vectorDbProviders/qdrant/index.js
@@ -5,7 +5,7 @@ const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { v4: uuidv4 } = require("uuid");
 const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { sourceIdentifier } = require("../../chats");
-const { rerankDocuments, getSearchLimit } = require("../rerank");
+const { rerank, getSearchLimit } = require("../rerank");
 
 const QDrant = {
   name: "QDrant",
@@ -81,6 +81,7 @@ const QDrant = {
       result.sourceDocuments.push({
         ...(response?.payload || {}),
         id: response.id,
+        score: response.score,
       });
       result.scores.push(response.score);
     });
@@ -106,15 +107,27 @@ const QDrant = {
       topN: searchLimit,
       filterIdentifiers,
     });
-    return await rerankDocuments(
-      query,
-      sourceDocuments.map((doc) => ({ ...doc, score: null })),
-      {
-        topN,
-        similarityThreshold,
-        filterIdentifiers,
-      }
-    );
+
+    const rerankedResults = await rerank(query, sourceDocuments, topN);
+    const result = {
+      contextTexts: [],
+      sourceDocuments: [],
+      scores: [],
+    };
+
+    rerankedResults.forEach((item) => {
+      if (item.rerank_score < similarityThreshold) return;
+      const { rerank_score, ...rest } = item;
+      if (filterIdentifiers.includes(sourceIdentifier(rest))) return;
+
+      result.contextTexts.push(rest.text);
+      result.sourceDocuments.push({
+        ...rest,
+        score: rerank_score,
+      });
+      result.scores.push(rerank_score);
+    });
+    return result;
   },
   namespace: async function (client, namespace = null) {
     if (!namespace) throw new Error("No namespace value provided.");
@@ -388,8 +401,8 @@ const QDrant = {
           filterIdentifiers,
         });
 
-    const sources = sourceDocuments.map((metadata, i) => {
-      return { ...metadata, text: contextTexts[i] };
+    const sources = sourceDocuments.map((doc, i) => {
+      return { metadata: doc, text: contextTexts[i] };
     });
     return {
       contextTexts,
@@ -431,12 +444,11 @@ const QDrant = {
   curateSources: function (sources = []) {
     const documents = [];
     for (const source of sources) {
-      if (Object.keys(source).length > 0) {
-        const metadata = source.hasOwnProperty("metadata")
-          ? source.metadata
-          : source;
+      const { metadata = {} } = source;
+      if (Object.keys(metadata).length > 0) {
         documents.push({
           ...metadata,
+          ...(source.text ? { text: source.text } : {}),
         });
       }
     }
diff --git a/server/utils/vectorDbProviders/weaviate/index.js b/server/utils/vectorDbProviders/weaviate/index.js
index 431ba7ed2c1..8363fa5c8d4 100644
--- a/server/utils/vectorDbProviders/weaviate/index.js
+++ b/server/utils/vectorDbProviders/weaviate/index.js
@@ -6,7 +6,7 @@ const { v4: uuidv4 } = require("uuid");
 const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { camelCase } = require("../../helpers/camelcase");
 const { sourceIdentifier } = require("../../chats");
-const { rerankDocuments, getSearchLimit } = require("../rerank");
+const { rerank, getSearchLimit } = require("../rerank");
 
 const Weaviate = {
   name: "Weaviate",
@@ -116,7 +116,7 @@ const Weaviate = {
         return;
       }
       result.contextTexts.push(rest.text);
-      result.sourceDocuments.push({ ...rest, id });
+      result.sourceDocuments.push({ ...rest, id, score: certainty });
       result.scores.push(certainty);
     });
 
@@ -141,11 +141,27 @@ const Weaviate = {
       topN: searchLimit,
       filterIdentifiers,
     });
-    return await rerankDocuments(query, sourceDocuments, {
-      topN,
-      similarityThreshold,
-      filterIdentifiers,
+
+    const rerankedResults = await rerank(query, sourceDocuments, topN);
+    const result = {
+      contextTexts: [],
+      sourceDocuments: [],
+      scores: [],
+    };
+
+    rerankedResults.forEach((item) => {
+      if (item.rerank_score < similarityThreshold) return;
+      const { rerank_score, ...rest } = item;
+      if (filterIdentifiers.includes(sourceIdentifier(rest))) return;
+
+      result.contextTexts.push(rest.text);
+      result.sourceDocuments.push({
+        ...rest,
+        score: rerank_score,
+      });
+      result.scores.push(rerank_score);
     });
+    return result;
   },
   allNamespaces: async function (client) {
     try {
@@ -428,8 +444,8 @@ const Weaviate = {
           filterIdentifiers,
         });
 
-    const sources = sourceDocuments.map((metadata, i) => {
-      return { ...metadata, text: contextTexts[i] };
+    const sources = sourceDocuments.map((doc, i) => {
+      return { metadata: doc, text: contextTexts[i] };
     });
     return {
       contextTexts,
@@ -468,11 +484,12 @@ const Weaviate = {
   curateSources: function (sources = []) {
     const documents = [];
     for (const source of sources) {
-      if (Object.keys(source).length > 0) {
-        const metadata = source.hasOwnProperty("metadata")
-          ? source.metadata
-          : source;
-        documents.push({ ...metadata });
+      const { metadata = {} } = source;
+      if (Object.keys(metadata).length > 0) {
+        documents.push({
+          ...metadata,
+          ...(source.text ? { text: source.text } : {}),
+        });
       }
     }
 

From 6e47ad8378ff1c548d1acc0da6e7cf02467d050c Mon Sep 17 00:00:00 2001
From: shatfield4 <seanhatfield5@gmail.com>
Date: Fri, 26 Sep 2025 17:39:34 -0700
Subject: [PATCH 6/7] unneeded console log

---
 server/utils/vectorDbProviders/astra/index.js | 1 -
 1 file changed, 1 deletion(-)

diff --git a/server/utils/vectorDbProviders/astra/index.js b/server/utils/vectorDbProviders/astra/index.js
index 2d25d2d7038..c4f8d087299 100644
--- a/server/utils/vectorDbProviders/astra/index.js
+++ b/server/utils/vectorDbProviders/astra/index.js
@@ -343,7 +343,6 @@ const AstraDB = {
     const sources = sourceDocuments.map((doc, i) => {
       return { metadata: doc, text: contextTexts[i] };
     });
-    console.log("sources", this.curateSources(sources));
     return {
       contextTexts,
       sources: this.curateSources(sources),

From b03ec4cd6237dce36caa3aae79da533ef2d28d76 Mon Sep 17 00:00:00 2001
From: shatfield4 <seanhatfield5@gmail.com>
Date: Mon, 29 Sep 2025 16:36:04 -0700
Subject: [PATCH 7/7] refactor structure to prep for other external embedding
 rerankers

---
 .../rerank.js                                 |  9 +++---
 server/utils/helpers/index.js                 | 29 +++++++++++++++++++
 server/utils/vectorDbProviders/astra/index.js |  4 +--
 .../utils/vectorDbProviders/chroma/index.js   |  4 +--
 server/utils/vectorDbProviders/lance/index.js | 10 +++----
 .../utils/vectorDbProviders/milvus/index.js   |  4 +--
 .../utils/vectorDbProviders/pgvector/index.js |  4 +--
 .../utils/vectorDbProviders/pinecone/index.js |  4 +--
 .../utils/vectorDbProviders/qdrant/index.js   |  4 +--
 .../utils/vectorDbProviders/weaviate/index.js |  4 +--
 .../utils/vectorDbProviders/zilliz/index.js   |  4 +--
 11 files changed, 54 insertions(+), 26 deletions(-)
 rename server/utils/{vectorDbProviders => EmbeddingRerankers}/rerank.js (83%)

diff --git a/server/utils/vectorDbProviders/rerank.js b/server/utils/EmbeddingRerankers/rerank.js
similarity index 83%
rename from server/utils/vectorDbProviders/rerank.js
rename to server/utils/EmbeddingRerankers/rerank.js
index 98d1e810322..8bbe588d54e 100644
--- a/server/utils/vectorDbProviders/rerank.js
+++ b/server/utils/EmbeddingRerankers/rerank.js
@@ -1,7 +1,7 @@
-const { NativeEmbeddingReranker } = require("../EmbeddingRerankers/native");
+const { getRerankerProvider } = require("../helpers");
 
 async function rerank(query, documents, topN = 4) {
-  const reranker = new NativeEmbeddingReranker();
+  const reranker = getRerankerProvider();
   return await reranker.rerank(query, documents, { topK: topN });
 }
 
@@ -18,8 +18,9 @@ async function rerank(query, documents, topN = 4) {
  * Benchmarks:
  * On Intel Mac: 2.6 GHz 6-Core Intel Core i7 - 20 docs reranked in ~5.2 sec
  */
-function getSearchLimit(totalEmbeddings = 0, topN = 4) {
-  return Math.max(10, Math.min(50, Math.ceil(totalEmbeddings * 0.1) || topN));
+
+function getSearchLimit(totalEmbeddings = 0) {
+  return Math.max(10, Math.min(50, Math.ceil(totalEmbeddings * 0.1)));
 }
 
 module.exports = {
diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js
index 12327698954..a6fc4230451 100644
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@@ -75,6 +75,11 @@
  * @property {Function} embedChunks - Embeds multiple chunks of text.
  */
 
+/**
+ * @typedef {Object} BaseRerankerProvider
+ * @property {function(string, {text: string}[], {topK: number}): Promise<any[]>} rerank - Reranks a list of documents.
+ */
+
 /**
  * Gets the systems current vector database provider.
  * @param {('pinecone' | 'chroma' | 'chromacloud' | 'lancedb' | 'weaviate' | 'qdrant' | 'milvus' | 'zilliz' | 'astra') | null} getExactly - If provided, this will return an explit provider.
@@ -463,6 +468,29 @@ function toChunks(arr, size) {
   );
 }
 
+/**
+ * Returns the Reranker provider.
+ * @returns {BaseRerankerProvider}
+ */
+function getRerankerProvider() {
+  const rerankerSelection = process.env.RERANKING_PROVIDER ?? "native";
+  switch (rerankerSelection) {
+    case "native":
+      const {
+        NativeEmbeddingReranker,
+      } = require("../EmbeddingRerankers/native");
+      return new NativeEmbeddingReranker();
+    default:
+      console.log(
+        `[RERANKING] Reranker provider ${rerankerSelection} is not supported. Using native reranker as fallback.`
+      );
+      const {
+        NativeEmbeddingReranker: Native,
+      } = require("../EmbeddingRerankers/native");
+      return new Native();
+  }
+}
+
 module.exports = {
   getEmbeddingEngineSelection,
   maximumChunkLength,
@@ -471,4 +499,5 @@ module.exports = {
   getBaseLLMProviderModel,
   getLLMProvider,
   toChunks,
+  getRerankerProvider,
 };
diff --git a/server/utils/vectorDbProviders/astra/index.js b/server/utils/vectorDbProviders/astra/index.js
index c4f8d087299..783e6340733 100644
--- a/server/utils/vectorDbProviders/astra/index.js
+++ b/server/utils/vectorDbProviders/astra/index.js
@@ -5,7 +5,7 @@ const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { v4: uuidv4 } = require("uuid");
 const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { sourceIdentifier } = require("../../chats");
-const { rerank, getSearchLimit } = require("../rerank");
+const { rerank, getSearchLimit } = require("../../EmbeddingRerankers/rerank");
 
 const sanitizeNamespace = (namespace) => {
   // If namespace already starts with ns_, don't add it again
@@ -403,7 +403,7 @@ const AstraDB = {
     filterIdentifiers = [],
   }) {
     const totalEmbeddings = await this.namespaceCount(namespace);
-    const searchLimit = getSearchLimit(totalEmbeddings, topN);
+    const searchLimit = getSearchLimit(totalEmbeddings);
     const { sourceDocuments } = await this.similarityResponse({
       client,
       namespace,
diff --git a/server/utils/vectorDbProviders/chroma/index.js b/server/utils/vectorDbProviders/chroma/index.js
index 6adc42c42d1..a8aa09018b0 100644
--- a/server/utils/vectorDbProviders/chroma/index.js
+++ b/server/utils/vectorDbProviders/chroma/index.js
@@ -6,7 +6,7 @@ const { v4: uuidv4 } = require("uuid");
 const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { parseAuthHeader } = require("../../http");
 const { sourceIdentifier } = require("../../chats");
-const { rerank, getSearchLimit } = require("../rerank");
+const { rerank, getSearchLimit } = require("../../EmbeddingRerankers/rerank");
 const COLLECTION_REGEX = new RegExp(
   /^(?!\d+\.\d+\.\d+\.\d+$)(?!.*\.\.)(?=^[a-zA-Z0-9][a-zA-Z0-9_-]{1,61}[a-zA-Z0-9]$).{3,63}$/
 );
@@ -161,7 +161,7 @@ const Chroma = {
     filterIdentifiers = [],
   }) {
     const totalEmbeddings = await this.namespaceCount(namespace);
-    const searchLimit = getSearchLimit(totalEmbeddings, topN);
+    const searchLimit = getSearchLimit(totalEmbeddings);
     const { sourceDocuments, contextTexts } = await this.similarityResponse({
       client,
       namespace,
diff --git a/server/utils/vectorDbProviders/lance/index.js b/server/utils/vectorDbProviders/lance/index.js
index 68ff7f21b67..adda0cefc44 100644
--- a/server/utils/vectorDbProviders/lance/index.js
+++ b/server/utils/vectorDbProviders/lance/index.js
@@ -5,7 +5,7 @@ const { SystemSettings } = require("../../../models/systemSettings");
 const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { v4: uuidv4 } = require("uuid");
 const { sourceIdentifier } = require("../../chats");
-const { rerank, getSearchLimit } = require("../rerank");
+const { rerank, getSearchLimit } = require("../../EmbeddingRerankers/rerank");
 
 /**
  * LancedDB Client connection object
@@ -80,7 +80,7 @@ const LanceDb = {
     filterIdentifiers = [],
   }) {
     const totalEmbeddings = await this.namespaceCount(namespace);
-    const searchLimit = getSearchLimit(totalEmbeddings, topN);
+    const searchLimit = getSearchLimit(totalEmbeddings);
     const vectorSearchResults = await client
       .openTable(namespace)
       .then((tbl) =>
@@ -99,8 +99,7 @@ const LanceDb = {
     };
 
     rerankedResults.forEach((item) => {
-      if (this.distanceToSimilarity(item._distance) < similarityThreshold)
-        return;
+      if (item.rerank_score < similarityThreshold) return;
       const { vector: _, ...rest } = item;
       if (filterIdentifiers.includes(sourceIdentifier(rest))) {
         console.log(
@@ -108,8 +107,7 @@ const LanceDb = {
         );
         return;
       }
-      const score =
-        item?.rerank_score || this.distanceToSimilarity(item._distance);
+      const score = item.rerank_score;
 
       result.contextTexts.push(rest.text);
       result.sourceDocuments.push({
diff --git a/server/utils/vectorDbProviders/milvus/index.js b/server/utils/vectorDbProviders/milvus/index.js
index fe0c9a3e6e0..b1f6800c23f 100644
--- a/server/utils/vectorDbProviders/milvus/index.js
+++ b/server/utils/vectorDbProviders/milvus/index.js
@@ -10,7 +10,7 @@ const { v4: uuidv4 } = require("uuid");
 const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { sourceIdentifier } = require("../../chats");
-const { rerank, getSearchLimit } = require("../rerank");
+const { rerank, getSearchLimit } = require("../../EmbeddingRerankers/rerank");
 
 const Milvus = {
   name: "Milvus",
@@ -390,7 +390,7 @@ const Milvus = {
     filterIdentifiers = [],
   }) {
     const totalEmbeddings = await this.namespaceCount(namespace);
-    const searchLimit = getSearchLimit(totalEmbeddings, topN);
+    const searchLimit = getSearchLimit(totalEmbeddings);
     const { sourceDocuments } = await this.similarityResponse({
       client,
       namespace,
diff --git a/server/utils/vectorDbProviders/pgvector/index.js b/server/utils/vectorDbProviders/pgvector/index.js
index 53c23b8bc1b..21775ecdb25 100644
--- a/server/utils/vectorDbProviders/pgvector/index.js
+++ b/server/utils/vectorDbProviders/pgvector/index.js
@@ -3,7 +3,7 @@ const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { TextSplitter } = require("../../TextSplitter");
 const { v4: uuidv4 } = require("uuid");
 const { sourceIdentifier } = require("../../chats");
-const { rerank, getSearchLimit } = require("../rerank");
+const { rerank, getSearchLimit } = require("../../EmbeddingRerankers/rerank");
 
 /*
  Embedding Table Schema (table name defined by user)
@@ -365,7 +365,7 @@ const PGVector = {
     filterIdentifiers = [],
   }) {
     const totalEmbeddings = await this.namespaceCount(namespace);
-    const searchLimit = getSearchLimit(totalEmbeddings, topN);
+    const searchLimit = getSearchLimit(totalEmbeddings);
     const { sourceDocuments } = await this.similarityResponse({
       client,
       namespace,
diff --git a/server/utils/vectorDbProviders/pinecone/index.js b/server/utils/vectorDbProviders/pinecone/index.js
index 67a08b34ea0..9335c90dbcd 100644
--- a/server/utils/vectorDbProviders/pinecone/index.js
+++ b/server/utils/vectorDbProviders/pinecone/index.js
@@ -5,7 +5,7 @@ const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { v4: uuidv4 } = require("uuid");
 const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { sourceIdentifier } = require("../../chats");
-const { rerank, getSearchLimit } = require("../rerank");
+const { rerank, getSearchLimit } = require("../../EmbeddingRerankers/rerank");
 
 const PineconeDB = {
   name: "Pinecone",
@@ -87,7 +87,7 @@ const PineconeDB = {
     filterIdentifiers = [],
   }) {
     const totalEmbeddings = await this.namespaceCount(namespace);
-    const searchLimit = getSearchLimit(totalEmbeddings, topN);
+    const searchLimit = getSearchLimit(totalEmbeddings);
     const { sourceDocuments } = await this.similarityResponse({
       client,
       namespace,
diff --git a/server/utils/vectorDbProviders/qdrant/index.js b/server/utils/vectorDbProviders/qdrant/index.js
index 285884e199f..5721c3f77a8 100644
--- a/server/utils/vectorDbProviders/qdrant/index.js
+++ b/server/utils/vectorDbProviders/qdrant/index.js
@@ -5,7 +5,7 @@ const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { v4: uuidv4 } = require("uuid");
 const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { sourceIdentifier } = require("../../chats");
-const { rerank, getSearchLimit } = require("../rerank");
+const { rerank, getSearchLimit } = require("../../EmbeddingRerankers/rerank");
 
 const QDrant = {
   name: "QDrant",
@@ -98,7 +98,7 @@ const QDrant = {
     filterIdentifiers = [],
   }) {
     const totalEmbeddings = await this.namespaceCount(namespace);
-    const searchLimit = getSearchLimit(totalEmbeddings, topN);
+    const searchLimit = getSearchLimit(totalEmbeddings);
     const { sourceDocuments } = await this.similarityResponse({
       client,
       namespace,
diff --git a/server/utils/vectorDbProviders/weaviate/index.js b/server/utils/vectorDbProviders/weaviate/index.js
index 8363fa5c8d4..6a475380f7d 100644
--- a/server/utils/vectorDbProviders/weaviate/index.js
+++ b/server/utils/vectorDbProviders/weaviate/index.js
@@ -6,7 +6,7 @@ const { v4: uuidv4 } = require("uuid");
 const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { camelCase } = require("../../helpers/camelcase");
 const { sourceIdentifier } = require("../../chats");
-const { rerank, getSearchLimit } = require("../rerank");
+const { rerank, getSearchLimit } = require("../../EmbeddingRerankers/rerank");
 
 const Weaviate = {
   name: "Weaviate",
@@ -132,7 +132,7 @@ const Weaviate = {
     filterIdentifiers = [],
   }) {
     const totalEmbeddings = await this.namespaceCount(namespace);
-    const searchLimit = getSearchLimit(totalEmbeddings, topN);
+    const searchLimit = getSearchLimit(totalEmbeddings);
     const { sourceDocuments } = await this.similarityResponse({
       client,
       namespace,
diff --git a/server/utils/vectorDbProviders/zilliz/index.js b/server/utils/vectorDbProviders/zilliz/index.js
index dff29e17a82..189d2eb85d6 100644
--- a/server/utils/vectorDbProviders/zilliz/index.js
+++ b/server/utils/vectorDbProviders/zilliz/index.js
@@ -10,7 +10,7 @@ const { v4: uuidv4 } = require("uuid");
 const { storeVectorResult, cachedVectorInformation } = require("../../files");
 const { toChunks, getEmbeddingEngineSelection } = require("../../helpers");
 const { sourceIdentifier } = require("../../chats");
-const { rerank, getSearchLimit } = require("../rerank");
+const { rerank, getSearchLimit } = require("../../EmbeddingRerankers/rerank");
 
 // Zilliz is basically a copy of Milvus DB class with a different constructor
 // to connect to the cloud
@@ -391,7 +391,7 @@ const Zilliz = {
     filterIdentifiers = [],
   }) {
     const totalEmbeddings = await this.namespaceCount(namespace);
-    const searchLimit = getSearchLimit(totalEmbeddings, topN);
+    const searchLimit = getSearchLimit(totalEmbeddings);
     const { sourceDocuments } = await this.similarityResponse({
       client,
       namespace,