θΏ™ζ˜―indexlocζδΎ›ηš„ζœεŠ‘οΌŒδΈθ¦θΎ“ε…₯任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 18 additions & 9 deletions server/utils/chats/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -151,16 +151,27 @@ async function chatWithWorkspace(
};
}

contextTexts = [...contextTexts, ...vectorSearchResults.contextTexts];
const { fillSourceWindow } = require("../helpers/chat");
const filledSources = fillSourceWindow({
nDocs: workspace?.topN || 4,
searchResults: vectorSearchResults.sources,
history: rawHistory,
filterIdentifiers: pinnedDocIdentifiers,
});

// Why does contextTexts get all the info, but sources only get current search?
// This is to give the ability of the LLM to "comprehend" a contextual response without
// populating the Citations under a response with documents the user "thinks" are irrelevant
// due to how we manage backfilling of the context to keep chats with the LLM more correct in responses.
// If a past citation was used to answer the question - that is visible in the history so it logically makes sense
// and does not appear to the user that a new response used information that is otherwise irrelevant for a given prompt.
// TLDR; reduces GitHub issues for "LLM citing document that has no answer in it" while keep answers highly accurate.
contextTexts = [...contextTexts, ...filledSources.contextTexts];
sources = [...sources, ...vectorSearchResults.sources];

// If in query mode and no sources are found from the vector search and no pinned documents, do not
// If in query mode and no context chunks are found from search, backfill, or pins - do not
// let the LLM try to hallucinate a response or use general knowledge and exit early
if (
chatMode === "query" &&
vectorSearchResults.sources.length === 0 &&
pinnedDocIdentifiers.length === 0
) {
if (chatMode === "query" && contextTexts.length === 0) {
return {
id: uuid,
type: "textResponse",
Expand Down Expand Up @@ -224,9 +235,7 @@ async function recentChatHistory({
workspace,
thread = null,
messageLimit = 20,
chatMode = null,
}) {
if (chatMode === "query") return { rawHistory: [], chatHistory: [] };
const rawHistory = (
await WorkspaceChats.where(
{
Expand Down
26 changes: 18 additions & 8 deletions server/utils/chats/stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ async function streamChatWithWorkspace(
workspace,
thread,
messageLimit,
chatMode,
});

// Look for pinned documents and see if the user decided to use this feature. We will also do a vector search
Expand Down Expand Up @@ -157,16 +156,27 @@ async function streamChatWithWorkspace(
return;
}

contextTexts = [...contextTexts, ...vectorSearchResults.contextTexts];
const { fillSourceWindow } = require("../helpers/chat");
const filledSources = fillSourceWindow({
nDocs: workspace?.topN || 4,
searchResults: vectorSearchResults.sources,
history: rawHistory,
filterIdentifiers: pinnedDocIdentifiers,
});

// Why does contextTexts get all the info, but sources only get current search?
// This is to give the ability of the LLM to "comprehend" a contextual response without
// populating the Citations under a response with documents the user "thinks" are irrelevant
// due to how we manage backfilling of the context to keep chats with the LLM more correct in responses.
// If a past citation was used to answer the question - that is visible in the history so it logically makes sense
// and does not appear to the user that a new response used information that is otherwise irrelevant for a given prompt.
// TLDR; reduces GitHub issues for "LLM citing document that has no answer in it" while keep answers highly accurate.
contextTexts = [...contextTexts, ...filledSources.contextTexts];
sources = [...sources, ...vectorSearchResults.sources];

// If in query mode and no sources are found from the vector search and no pinned documents, do not
// If in query mode and no context chunks are found from search, backfill, or pins - do not
// let the LLM try to hallucinate a response or use general knowledge and exit early
if (
chatMode === "query" &&
sources.length === 0 &&
pinnedDocIdentifiers.length === 0
) {
if (chatMode === "query" && contextTexts.length === 0) {
writeResponseChunk(response, {
id: uuid,
type: "textResponse",
Expand Down
99 changes: 99 additions & 0 deletions server/utils/helpers/chat/index.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
const { sourceIdentifier } = require("../../chats");
const { safeJsonParse } = require("../../http");
const { TokenManager } = require("../tiktoken");
const { convertToPromptHistory } = require("./responses");

Expand Down Expand Up @@ -343,7 +345,104 @@ function cannonball({
return truncatedText;
}

/**
* Fill the sources window with the priority of
* 1. Pinned documents (handled prior to function)
* 2. VectorSearch results
* 3. prevSources in chat history - starting from most recent.
*
* Ensuring the window always has the desired amount of sources so that followup questions
* in any chat mode have relevant sources, but not infinite sources. This function is used during chatting
* and allows follow-up questions within a query chat that otherwise would have zero sources and would fail.
* The added benefit is that during regular RAG chat, we have better coherence of citations that otherwise would
* also yield no results with no need for a ReRanker to run and take much longer to return a response.
*
* The side effect of this is follow-up unrelated questions now have citations that would look totally irrelevant, however
* we would rather optimize on the correctness of a response vs showing extraneous sources during a response. Given search
* results always take a priority a good unrelated question that produces RAG results will still function as desired and due to previous
* history backfill sources "changing context" mid-chat is handled appropriately.
* example:
* ---previous implementation---
* prompt 1: "What is anythingllm?" -> possibly get 4 good sources
* prompt 2: "Tell me some features" -> possible get 0 - 1 maybe relevant source + previous answer response -> bad response due to bad context mgmt
* ---next implementation---
* prompt 1: "What is anythingllm?" -> possibly get 4 good sources
* prompt 2: "Tell me some features" -> possible get 0 - 1 maybe relevant source + previous answer response -> backfill with 3 good sources from previous -> much better response
*
* @param {Object} config - params to call
* @param {object} config.nDocs = fill size of the window
* @param {object} config.searchResults = vector similarityResponse results for .sources
* @param {object[]} config.history - rawHistory of chat containing sources
* @param {string[]} config.filterIdentifiers - Pinned document identifiers to prevent duplicate context
* @returns {{
* contextTexts: string[],
* sources: object[],
* }} - Array of sources that should be added to window
*/
function fillSourceWindow({
nDocs = 4, // Number of documents
searchResults = [], // Sources from similarity search
history = [], // Raw history
filterIdentifiers = [], // pinned document sources
} = config) {
const sources = [...searchResults];

if (sources.length >= nDocs || history.length === 0) {
return {
sources,
contextTexts: sources.map((src) => src.text),
};
}

const log = (text, ...args) => {
console.log(`\x1b[36m[fillSourceWindow]\x1b[0m ${text}`, ...args);
};

log(
`Need to backfill ${nDocs - searchResults.length} chunks to fill in the source window for RAG!`
);
const seenChunks = new Set(searchResults.map((source) => source.id));

// We need to reverse again because we need to iterate from bottom of array (most recent chats)
// Looking at this function by itself you may think that this loop could be extreme for long history chats,
// but this was already handled where `history` we derived. This comes from `recentChatHistory` which
// includes a limit for history (default: 20). So this loop does not look as extreme as on first glance.
for (const chat of history.reverse()) {
if (sources.length >= nDocs) {
log(
`Citations backfilled to ${nDocs} references from ${searchResults.length} original citations.`
);
break;
}

const chatSources =
safeJsonParse(chat.response, { sources: [] })?.sources || [];
if (!chatSources?.length || !Array.isArray(chatSources)) continue;

const validSources = chatSources.filter((source) => {
return (
filterIdentifiers.includes(sourceIdentifier(source)) == false && // source cannot be in current pins
source.hasOwnProperty("score") && // source cannot have come from a pinned document that was previously pinned
source.hasOwnProperty("text") && // source has a valid text property we can use
seenChunks.has(source.id) == false // is unique
);
});

for (const validSource of validSources) {
if (sources.length >= nDocs) break;
sources.push(validSource);
seenChunks.add(validSource.id);
}
}

return {
sources,
contextTexts: sources.map((src) => src.text),
};
}

module.exports = {
messageArrayCompressor,
messageStringCompressor,
fillSourceWindow,
};