Mintplex-Labs · timothycarambat · May 1, 2024 · May 1, 2024 · May 1, 2024
diff --git a/server/utils/chats/embed.js b/server/utils/chats/embed.js
@@ -77,11 +77,10 @@ async function streamChatWithForEmbed(
     chatMode
   );
 
-  // Look for pinned documents and see if the user decided to use this feature. We will also do a vector search
-  // as pinning is a supplemental tool but it should be used with caution since it can easily blow up a context window.
+  // See stream.js comment for more information on this implementation.
   await new DocumentManager({
     workspace: embed.workspace,
-    maxTokens: LLMConnector.limits.system,
+    maxTokens: LLMConnector.promptWindowLimit(),
   })
     .pinnedDocs()
     .then((pinnedDocs) => {

diff --git a/server/utils/chats/index.js b/server/utils/chats/index.js
@@ -88,11 +88,10 @@ async function chatWithWorkspace(
     chatMode,
   });
 
-  // Look for pinned documents and see if the user decided to use this feature. We will also do a vector search
-  // as pinning is a supplemental tool but it should be used with caution since it can easily blow up a context window.
+  // See stream.js comment for more information on this implementation.
   await new DocumentManager({
     workspace,
-    maxTokens: LLMConnector.limits.system,
+    maxTokens: LLMConnector.promptWindowLimit(),
   })
     .pinnedDocs()
     .then((pinnedDocs) => {

diff --git a/server/utils/chats/stream.js b/server/utils/chats/stream.js
@@ -104,9 +104,13 @@ async function streamChatWithWorkspace(
 
   // Look for pinned documents and see if the user decided to use this feature. We will also do a vector search
   // as pinning is a supplemental tool but it should be used with caution since it can easily blow up a context window.
+  // However we limit the maximum of appended context to 80% of its overall size, mostly because if it expands beyond this
+  // it will undergo prompt compression anyway to make it work. If there is so much pinned that the context here is bigger than
+  // what the model can support - it would get compressed anyway and that really is not the point of pinning. It is really best
+  // suited for high-context models.
   await new DocumentManager({
     workspace,
-    maxTokens: LLMConnector.limits.system,
+    maxTokens: LLMConnector.promptWindowLimit(),
   })
     .pinnedDocs()
     .then((pinnedDocs) => {