θΏ™ζ˜―indexlocζδΎ›ηš„ζœεŠ‘οΌŒδΈθ¦θΎ“ε…₯任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion server/utils/chats/embed.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
const { v4: uuidv4 } = require("uuid");
const { getVectorDbClass, getLLMProvider } = require("../helpers");
const { chatPrompt } = require("./index");
const { chatPrompt, sourceIdentifier } = require("./index");
const { EmbedChats } = require("../../models/embedChats");
const {
convertToPromptHistory,
Expand Down Expand Up @@ -69,6 +69,7 @@ async function streamChatWithForEmbed(
let completeText;
let contextTexts = [];
let sources = [];
let pinnedDocIdentifiers = [];
const { rawHistory, chatHistory } = await recentEmbedChatHistory(
sessionId,
embed,
Expand All @@ -86,6 +87,7 @@ async function streamChatWithForEmbed(
.then((pinnedDocs) => {
pinnedDocs.forEach((doc) => {
const { pageContent, ...metadata } = doc;
pinnedDocIdentifiers.push(sourceIdentifier(doc));
contextTexts.push(doc.pageContent);
sources.push({
text:
Expand All @@ -104,6 +106,7 @@ async function streamChatWithForEmbed(
LLMConnector,
similarityThreshold: embed.workspace?.similarityThreshold,
topN: embed.workspace?.topN,
filterIdentifiers: pinnedDocIdentifiers,
})
: {
contextTexts: [],
Expand Down
14 changes: 14 additions & 0 deletions server/utils/chats/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ async function chatWithWorkspace(
// 2. Chatting in "query" mode and has at least 1 embedding
let contextTexts = [];
let sources = [];
let pinnedDocIdentifiers = [];
const { rawHistory, chatHistory } = await recentChatHistory({
user,
workspace,
Expand All @@ -97,6 +98,7 @@ async function chatWithWorkspace(
.then((pinnedDocs) => {
pinnedDocs.forEach((doc) => {
const { pageContent, ...metadata } = doc;
pinnedDocIdentifiers.push(sourceIdentifier(doc));
contextTexts.push(doc.pageContent);
sources.push({
text:
Expand All @@ -115,6 +117,7 @@ async function chatWithWorkspace(
LLMConnector,
similarityThreshold: workspace?.similarityThreshold,
topN: workspace?.topN,
filterIdentifiers: pinnedDocIdentifiers,
})
: {
contextTexts: [],
Expand Down Expand Up @@ -227,7 +230,18 @@ function chatPrompt(workspace) {
);
}

// We use this util function to deduplicate sources from similarity searching
// if the document is already pinned.
// Eg: You pin a csv, if we RAG + full-text that you will get the same data
// points both in the full-text and possibly from RAG - result in bad results
// even if the LLM was not even going to hallucinate.
function sourceIdentifier(sourceDocument) {
if (!sourceDocument?.title || !sourceDocument?.published) return uuidv4();
return `title:${sourceDocument.title}-timestamp:${sourceDocument.published}`;
}

module.exports = {
sourceIdentifier,
recentChatHistory,
chatWithWorkspace,
chatPrompt,
Expand Down
4 changes: 4 additions & 0 deletions server/utils/chats/stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const {
VALID_COMMANDS,
chatPrompt,
recentChatHistory,
sourceIdentifier,
} = require("./index");

const VALID_CHAT_MODE = ["chat", "query"];
Expand Down Expand Up @@ -92,6 +93,7 @@ async function streamChatWithWorkspace(
let completeText;
let contextTexts = [];
let sources = [];
let pinnedDocIdentifiers = [];
const { rawHistory, chatHistory } = await recentChatHistory({
user,
workspace,
Expand All @@ -110,6 +112,7 @@ async function streamChatWithWorkspace(
.then((pinnedDocs) => {
pinnedDocs.forEach((doc) => {
const { pageContent, ...metadata } = doc;
pinnedDocIdentifiers.push(sourceIdentifier(doc));
contextTexts.push(doc.pageContent);
sources.push({
text:
Expand All @@ -128,6 +131,7 @@ async function streamChatWithWorkspace(
LLMConnector,
similarityThreshold: workspace?.similarityThreshold,
topN: workspace?.topN,
filterIdentifiers: pinnedDocIdentifiers,
})
: {
contextTexts: [],
Expand Down
14 changes: 12 additions & 2 deletions server/utils/vectorDbProviders/astra/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const {
getLLMProvider,
getEmbeddingEngineSelection,
} = require("../../helpers");
const { sourceIdentifier } = require("../../chats");

const AstraDB = {
name: "AstraDB",
Expand Down Expand Up @@ -252,6 +253,7 @@ const AstraDB = {
LLMConnector = null,
similarityThreshold = 0.25,
topN = 4,
filterIdentifiers = [],
}) {
if (!namespace || !input || !LLMConnector)
throw new Error("Invalid request to performSimilaritySearch.");
Expand All @@ -272,7 +274,8 @@ const AstraDB = {
namespace,
queryVector,
similarityThreshold,
topN
topN,
filterIdentifiers
);

const sources = sourceDocuments.map((metadata, i) => {
Expand All @@ -289,7 +292,8 @@ const AstraDB = {
namespace,
queryVector,
similarityThreshold = 0.25,
topN = 4
topN = 4,
filterIdentifiers = []
) {
const result = {
contextTexts: [],
Expand All @@ -311,6 +315,12 @@ const AstraDB = {

responses.forEach((response) => {
if (response.$similarity < similarityThreshold) return;
if (filterIdentifiers.includes(sourceIdentifier(response.metadata))) {
console.log(
"AstraDB: A source was filtered from context as it's parent document is pinned."
);
return;
}
result.contextTexts.push(response.metadata.text);
result.sourceDocuments.push(response);
result.scores.push(response.$similarity);
Expand Down
17 changes: 15 additions & 2 deletions server/utils/vectorDbProviders/chroma/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const {
getEmbeddingEngineSelection,
} = require("../../helpers");
const { parseAuthHeader } = require("../../http");
const { sourceIdentifier } = require("../../chats");

const Chroma = {
name: "Chroma",
Expand Down Expand Up @@ -70,7 +71,8 @@ const Chroma = {
namespace,
queryVector,
similarityThreshold = 0.25,
topN = 4
topN = 4,
filterIdentifiers = []
) {
const collection = await client.getCollection({ name: namespace });
const result = {
Expand All @@ -89,6 +91,15 @@ const Chroma = {
similarityThreshold
)
return;

if (
filterIdentifiers.includes(sourceIdentifier(response.metadatas[0][i]))
) {
console.log(
"Chroma: A source was filtered from context as it's parent document is pinned."
);
return;
}
result.contextTexts.push(response.documents[0][i]);
result.sourceDocuments.push(response.metadatas[0][i]);
result.scores.push(this.distanceToSimilarity(response.distances[0][i]));
Expand Down Expand Up @@ -282,6 +293,7 @@ const Chroma = {
LLMConnector = null,
similarityThreshold = 0.25,
topN = 4,
filterIdentifiers = [],
}) {
if (!namespace || !input || !LLMConnector)
throw new Error("Invalid request to performSimilaritySearch.");
Expand All @@ -301,7 +313,8 @@ const Chroma = {
namespace,
queryVector,
similarityThreshold,
topN
topN,
filterIdentifiers
);

const sources = sourceDocuments.map((metadata, i) => {
Expand Down
15 changes: 13 additions & 2 deletions server/utils/vectorDbProviders/lance/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const { TextSplitter } = require("../../TextSplitter");
const { SystemSettings } = require("../../../models/systemSettings");
const { storeVectorResult, cachedVectorInformation } = require("../../files");
const { v4: uuidv4 } = require("uuid");
const { sourceIdentifier } = require("../../chats");

const LanceDb = {
uri: `${
Expand Down Expand Up @@ -64,7 +65,8 @@ const LanceDb = {
namespace,
queryVector,
similarityThreshold = 0.25,
topN = 4
topN = 4,
filterIdentifiers = []
) {
const collection = await client.openTable(namespace);
const result = {
Expand All @@ -82,6 +84,13 @@ const LanceDb = {
response.forEach((item) => {
if (this.distanceToSimilarity(item.score) < similarityThreshold) return;
const { vector: _, ...rest } = item;
if (filterIdentifiers.includes(sourceIdentifier(rest))) {
console.log(
"LanceDB: A source was filtered from context as it's parent document is pinned."
);
return;
}

result.contextTexts.push(rest.text);
result.sourceDocuments.push(rest);
result.scores.push(this.distanceToSimilarity(item.score));
Expand Down Expand Up @@ -250,6 +259,7 @@ const LanceDb = {
LLMConnector = null,
similarityThreshold = 0.25,
topN = 4,
filterIdentifiers = [],
}) {
if (!namespace || !input || !LLMConnector)
throw new Error("Invalid request to performSimilaritySearch.");
Expand All @@ -269,7 +279,8 @@ const LanceDb = {
namespace,
queryVector,
similarityThreshold,
topN
topN,
filterIdentifiers
);

const sources = sourceDocuments.map((metadata, i) => {
Expand Down
15 changes: 13 additions & 2 deletions server/utils/vectorDbProviders/milvus/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ const {
getLLMProvider,
getEmbeddingEngineSelection,
} = require("../../helpers");
const { sourceIdentifier } = require("../../chats");

const Milvus = {
name: "Milvus",
Expand Down Expand Up @@ -288,6 +289,7 @@ const Milvus = {
LLMConnector = null,
similarityThreshold = 0.25,
topN = 4,
filterIdentifiers = [],
}) {
if (!namespace || !input || !LLMConnector)
throw new Error("Invalid request to performSimilaritySearch.");
Expand All @@ -307,7 +309,8 @@ const Milvus = {
namespace,
queryVector,
similarityThreshold,
topN
topN,
filterIdentifiers
);

const sources = sourceDocuments.map((metadata, i) => {
Expand All @@ -324,7 +327,8 @@ const Milvus = {
namespace,
queryVector,
similarityThreshold = 0.25,
topN = 4
topN = 4,
filterIdentifiers = []
) {
const result = {
contextTexts: [],
Expand All @@ -338,6 +342,13 @@ const Milvus = {
});
response.results.forEach((match) => {
if (match.score < similarityThreshold) return;
if (filterIdentifiers.includes(sourceIdentifier(match.metadata))) {
console.log(
"Milvus: A source was filtered from context as it's parent document is pinned."
);
return;
}

result.contextTexts.push(match.metadata.text);
result.sourceDocuments.push(match);
result.scores.push(match.score);
Expand Down
15 changes: 13 additions & 2 deletions server/utils/vectorDbProviders/pinecone/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const {
getLLMProvider,
getEmbeddingEngineSelection,
} = require("../../helpers");
const { sourceIdentifier } = require("../../chats");

const PineconeDB = {
name: "Pinecone",
Expand Down Expand Up @@ -44,7 +45,8 @@ const PineconeDB = {
namespace,
queryVector,
similarityThreshold = 0.25,
topN = 4
topN = 4,
filterIdentifiers = []
) {
const result = {
contextTexts: [],
Expand All @@ -61,6 +63,13 @@ const PineconeDB = {

response.matches.forEach((match) => {
if (match.score < similarityThreshold) return;
if (filterIdentifiers.includes(sourceIdentifier(match.metadata))) {
console.log(
"Pinecone: A source was filtered from context as it's parent document is pinned."
);
return;
}

result.contextTexts.push(match.metadata.text);
result.sourceDocuments.push(match);
result.scores.push(match.score);
Expand Down Expand Up @@ -233,6 +242,7 @@ const PineconeDB = {
LLMConnector = null,
similarityThreshold = 0.25,
topN = 4,
filterIdentifiers = [],
}) {
if (!namespace || !input || !LLMConnector)
throw new Error("Invalid request to performSimilaritySearch.");
Expand All @@ -249,7 +259,8 @@ const PineconeDB = {
namespace,
queryVector,
similarityThreshold,
topN
topN,
filterIdentifiers
);

const sources = sourceDocuments.map((metadata, i) => {
Expand Down
Loading