Mintplex-Labs · timothycarambat · Aug 15, 2024 · Aug 15, 2024 · Aug 15, 2024 · Aug 15, 2024
diff --git a/server/utils/AiProviders/anthropic/index.js b/server/utils/AiProviders/anthropic/index.js
@@ -4,6 +4,7 @@ const {
   clientAbortedHandler,
 } = require("../../helpers/chat/responses");
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
+const { MODEL_MAP } = require("../modelMap");
 
 class AnthropicLLM {
   constructor(embedder = null, modelPreference = null) {
@@ -32,25 +33,12 @@ class AnthropicLLM {
     return "streamGetChatCompletion" in this;
   }
 
+  static promptWindowLimit(modelName) {
+    return MODEL_MAP.anthropic[modelName] ?? 100_000;
+  }
+
   promptWindowLimit() {
-    switch (this.model) {
-      case "claude-instant-1.2":
-        return 100_000;
-      case "claude-2.0":
-        return 100_000;
-      case "claude-2.1":
-        return 200_000;
-      case "claude-3-opus-20240229":
-        return 200_000;
-      case "claude-3-sonnet-20240229":
-        return 200_000;
-      case "claude-3-haiku-20240307":
-        return 200_000;
-      case "claude-3-5-sonnet-20240620":
-        return 200_000;
-      default:
-        return 100_000; // assume a claude-instant-1.2 model
-    }
+    return MODEL_MAP.anthropic[this.model] ?? 100_000;
   }
 
   isValidChatCompletionModel(modelName = "") {

diff --git a/server/utils/AiProviders/azureOpenAi/index.js b/server/utils/AiProviders/azureOpenAi/index.js
@@ -43,6 +43,12 @@ class AzureOpenAiLLM {
     return "streamGetChatCompletion" in this;
   }
 
+  static promptWindowLimit(_modelName) {
+    return !!process.env.AZURE_OPENAI_TOKEN_LIMIT
+      ? Number(process.env.AZURE_OPENAI_TOKEN_LIMIT)
+      : 4096;
+  }
+
   // Sure the user selected a proper value for the token limit
   // could be any of these https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#gpt-4-models
   // and if undefined - assume it is the lowest end.

diff --git a/server/utils/AiProviders/bedrock/index.js b/server/utils/AiProviders/bedrock/index.js
@@ -82,6 +82,13 @@ class AWSBedrockLLM {
     return "streamGetChatCompletion" in this;
   }
 
+  static promptWindowLimit(_modelName) {
+    const limit = process.env.AWS_BEDROCK_LLM_MODEL_TOKEN_LIMIT || 8191;
+    if (!limit || isNaN(Number(limit)))
+      throw new Error("No valid token context limit was set.");
+    return Number(limit);
+  }
+
   // Ensure the user set a value for the token limit
   // and if undefined - assume 4096 window.
   promptWindowLimit() {

diff --git a/server/utils/AiProviders/cohere/index.js b/server/utils/AiProviders/cohere/index.js
@@ -1,6 +1,7 @@
 const { v4 } = require("uuid");
 const { writeResponseChunk } = require("../../helpers/chat/responses");
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
+const { MODEL_MAP } = require("../modelMap");
 
 class CohereLLM {
   constructor(embedder = null) {
@@ -58,23 +59,12 @@ class CohereLLM {
     return "streamGetChatCompletion" in this;
   }
 
+  static promptWindowLimit(modelName) {
+    return MODEL_MAP.cohere[modelName] ?? 4_096;
+  }
+
   promptWindowLimit() {
-    switch (this.model) {
-      case "command-r":
-        return 128_000;
-      case "command-r-plus":
-        return 128_000;
-      case "command":
-        return 4_096;
-      case "command-light":
-        return 4_096;
-      case "command-nightly":
-        return 8_192;
-      case "command-light-nightly":
-        return 8_192;
-      default:
-        return 4_096;
-    }
+    return MODEL_MAP.cohere[this.model] ?? 4_096;
   }
 
   async isValidChatCompletionModel(model = "") {

diff --git a/server/utils/AiProviders/gemini/index.js b/server/utils/AiProviders/gemini/index.js
@@ -3,6 +3,7 @@ const {
   writeResponseChunk,
   clientAbortedHandler,
 } = require("../../helpers/chat/responses");
+const { MODEL_MAP } = require("../modelMap");
 
 class GeminiLLM {
   constructor(embedder = null, modelPreference = null) {
@@ -89,21 +90,12 @@ class GeminiLLM {
     return "streamGetChatCompletion" in this;
   }
 
+  static promptWindowLimit(modelName) {
+    return MODEL_MAP.gemini[modelName] ?? 30_720;
+  }
+
   promptWindowLimit() {
-    switch (this.model) {
-      case "gemini-pro":
-        return 30_720;
-      case "gemini-1.0-pro":
-        return 30_720;
-      case "gemini-1.5-flash-latest":
-        return 1_048_576;
-      case "gemini-1.5-pro-latest":
-        return 2_097_152;
-      case "gemini-1.5-pro-exp-0801":
-        return 2_097_152;
-      default:
-        return 30_720; // assume a gemini-pro model
-    }
+    return MODEL_MAP.gemini[this.model] ?? 30_720;
   }
 
   isValidChatCompletionModel(modelName = "") {

diff --git a/server/utils/AiProviders/genericOpenAi/index.js b/server/utils/AiProviders/genericOpenAi/index.js
@@ -55,6 +55,13 @@ class GenericOpenAiLLM {
     return "streamGetChatCompletion" in this;
   }
 
+  static promptWindowLimit(_modelName) {
+    const limit = process.env.GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT || 4096;
+    if (!limit || isNaN(Number(limit)))
+      throw new Error("No token context limit was set.");
+    return Number(limit);
+  }
+
   // Ensure the user set a value for the token limit
   // and if undefined - assume 4096 window.
   promptWindowLimit() {

diff --git a/server/utils/AiProviders/groq/index.js b/server/utils/AiProviders/groq/index.js
@@ -2,6 +2,7 @@ const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const {
   handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
+const { MODEL_MAP } = require("../modelMap");
 
 class GroqLLM {
   constructor(embedder = null, modelPreference = null) {
@@ -40,21 +41,12 @@ class GroqLLM {
     return "streamGetChatCompletion" in this;
   }
 
+  static promptWindowLimit(modelName) {
+    return MODEL_MAP.groq[modelName] ?? 8192;
+  }
+
   promptWindowLimit() {
-    switch (this.model) {
-      case "gemma2-9b-it":
-      case "gemma-7b-it":
-      case "llama3-70b-8192":
-      case "llama3-8b-8192":
-        return 8192;
-      case "llama-3.1-70b-versatile":
-      case "llama-3.1-8b-instant":
-        return 8000;
-      case "mixtral-8x7b-32768":
-        return 32768;
-      default:
-        return 8192;
-    }
+    return MODEL_MAP.groq[this.model] ?? 8192;
   }
 
   async isValidChatCompletionModel(modelName = "") {

diff --git a/server/utils/AiProviders/huggingface/index.js b/server/utils/AiProviders/huggingface/index.js
@@ -45,6 +45,13 @@ class HuggingFaceLLM {
     return "streamGetChatCompletion" in this;
   }
 
+  static promptWindowLimit(_modelName) {
+    const limit = process.env.HUGGING_FACE_LLM_TOKEN_LIMIT || 4096;
+    if (!limit || isNaN(Number(limit)))
+      throw new Error("No HuggingFace token context limit was set.");
+    return Number(limit);
+  }
+
   promptWindowLimit() {
     const limit = process.env.HUGGING_FACE_LLM_TOKEN_LIMIT || 4096;
     if (!limit || isNaN(Number(limit)))

diff --git a/server/utils/AiProviders/koboldCPP/index.js b/server/utils/AiProviders/koboldCPP/index.js
@@ -51,6 +51,13 @@ class KoboldCPPLLM {
     return "streamGetChatCompletion" in this;
   }
 
+  static promptWindowLimit(_modelName) {
+    const limit = process.env.KOBOLD_CPP_MODEL_TOKEN_LIMIT || 4096;
+    if (!limit || isNaN(Number(limit)))
+      throw new Error("No token context limit was set.");
+    return Number(limit);
+  }
+
   // Ensure the user set a value for the token limit
   // and if undefined - assume 4096 window.
   promptWindowLimit() {

diff --git a/server/utils/AiProviders/liteLLM/index.js b/server/utils/AiProviders/liteLLM/index.js
@@ -50,6 +50,13 @@ class LiteLLM {
     return "streamGetChatCompletion" in this;
   }
 
+  static promptWindowLimit(_modelName) {
+    const limit = process.env.LITE_LLM_MODEL_TOKEN_LIMIT || 4096;
+    if (!limit || isNaN(Number(limit)))
+      throw new Error("No token context limit was set.");
+    return Number(limit);
+  }
+
   // Ensure the user set a value for the token limit
   // and if undefined - assume 4096 window.
   promptWindowLimit() {

diff --git a/server/utils/AiProviders/lmStudio/index.js b/server/utils/AiProviders/lmStudio/index.js
@@ -48,6 +48,13 @@ class LMStudioLLM {
     return "streamGetChatCompletion" in this;
   }
 
+  static promptWindowLimit(_modelName) {
+    const limit = process.env.LMSTUDIO_MODEL_TOKEN_LIMIT || 4096;
+    if (!limit || isNaN(Number(limit)))
+      throw new Error("No LMStudio token context limit was set.");
+    return Number(limit);
+  }
+
   // Ensure the user set a value for the token limit
   // and if undefined - assume 4096 window.
   promptWindowLimit() {

diff --git a/server/utils/AiProviders/localAi/index.js b/server/utils/AiProviders/localAi/index.js
@@ -40,6 +40,13 @@ class LocalAiLLM {
     return "streamGetChatCompletion" in this;
   }
 
+  static promptWindowLimit(_modelName) {
+    const limit = process.env.LOCAL_AI_MODEL_TOKEN_LIMIT || 4096;
+    if (!limit || isNaN(Number(limit)))
+      throw new Error("No LocalAi token context limit was set.");
+    return Number(limit);
+  }
+
   // Ensure the user set a value for the token limit
   // and if undefined - assume 4096 window.
   promptWindowLimit() {

diff --git a/server/utils/AiProviders/mistral/index.js b/server/utils/AiProviders/mistral/index.js
@@ -41,6 +41,10 @@ class MistralLLM {
     return "streamGetChatCompletion" in this;
   }
 
+  static promptWindowLimit() {
+    return 32000;
+  }
+
   promptWindowLimit() {
     return 32000;
   }

diff --git a/server/utils/AiProviders/modelMap.js b/server/utils/AiProviders/modelMap.js
@@ -0,0 +1,55 @@
+/**
+ * The model name and context window for all know model windows
+ * that are available through providers which has discrete model options.
+ */
+const MODEL_MAP = {
+  anthropic: {
+    "claude-instant-1.2": 100_000,
+    "claude-2.0": 100_000,
+    "claude-2.1": 200_000,
+    "claude-3-opus-20240229": 200_000,
+    "claude-3-sonnet-20240229": 200_000,
+    "claude-3-haiku-20240307": 200_000,
+    "claude-3-5-sonnet-20240620": 200_000,
+  },
+  cohere: {
+    "command-r": 128_000,
+    "command-r-plus": 128_000,
+    command: 4_096,
+    "command-light": 4_096,
+    "command-nightly": 8_192,
+    "command-light-nightly": 8_192,
+  },
+  gemini: {
+    "gemini-pro": 30_720,
+    "gemini-1.0-pro": 30_720,
+    "gemini-1.5-flash-latest": 1_048_576,
+    "gemini-1.5-pro-latest": 2_097_152,
+    "gemini-1.5-pro-exp-0801": 2_097_152,
+  },
+  groq: {
+    "gemma2-9b-it": 8192,
+    "gemma-7b-it": 8192,
+    "llama3-70b-8192": 8192,
+    "llama3-8b-8192": 8192,
+    "llama-3.1-70b-versatile": 8000,
+    "llama-3.1-8b-instant": 8000,
+    "mixtral-8x7b-32768": 32768,
+  },
+  openai: {
+    "gpt-3.5-turbo": 16_385,
+    "gpt-3.5-turbo-1106": 16_385,
+    "gpt-4o": 128_000,
+    "gpt-4o-2024-08-06": 128_000,
+    "gpt-4o-2024-05-13": 128_000,
+    "gpt-4o-mini": 128_000,
+    "gpt-4o-mini-2024-07-18": 128_000,
+    "gpt-4-turbo": 128_000,
+    "gpt-4-1106-preview": 128_000,
+    "gpt-4-turbo-preview": 128_000,
+    "gpt-4": 8_192,
+    "gpt-4-32k": 32_000,
+  },
+};
+
+module.exports = { MODEL_MAP };
diff --git a/server/utils/AiProviders/native/index.js b/server/utils/AiProviders/native/index.js
@@ -96,6 +96,13 @@ class NativeLLM {
     return "streamGetChatCompletion" in this;
   }
 
+  static promptWindowLimit(_modelName) {
+    const limit = process.env.NATIVE_LLM_MODEL_TOKEN_LIMIT || 4096;
+    if (!limit || isNaN(Number(limit)))
+      throw new Error("No NativeAI token context limit was set.");
+    return Number(limit);
+  }
+
   // Ensure the user set a value for the token limit
   promptWindowLimit() {
     const limit = process.env.NATIVE_LLM_MODEL_TOKEN_LIMIT || 4096;

diff --git a/server/utils/AiProviders/ollama/index.js b/server/utils/AiProviders/ollama/index.js
@@ -82,6 +82,13 @@ class OllamaAILLM {
     return "streamGetChatCompletion" in this;
   }
 
+  static promptWindowLimit(_modelName) {
+    const limit = process.env.OLLAMA_MODEL_TOKEN_LIMIT || 4096;
+    if (!limit || isNaN(Number(limit)))
+      throw new Error("No Ollama token context limit was set.");
+    return Number(limit);
+  }
+
   // Ensure the user set a value for the token limit
   // and if undefined - assume 4096 window.
   promptWindowLimit() {

diff --git a/server/utils/AiProviders/openAi/index.js b/server/utils/AiProviders/openAi/index.js
@@ -2,6 +2,7 @@ const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const {
   handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
+const { MODEL_MAP } = require("../modelMap");
 
 class OpenAiLLM {
   constructor(embedder = null, modelPreference = null) {
@@ -38,27 +39,12 @@ class OpenAiLLM {
     return "streamGetChatCompletion" in this;
   }
 
+  static promptWindowLimit(modelName) {
+    return MODEL_MAP.openai[modelName] ?? 4_096;
+  }
+
   promptWindowLimit() {
-    switch (this.model) {
-      case "gpt-3.5-turbo":
-      case "gpt-3.5-turbo-1106":
-        return 16_385;
-      case "gpt-4o":
-      case "gpt-4o-2024-08-06":
-      case "gpt-4o-2024-05-13":
-      case "gpt-4o-mini":
-      case "gpt-4o-mini-2024-07-18":
-      case "gpt-4-turbo":
-      case "gpt-4-1106-preview":
-      case "gpt-4-turbo-preview":
-        return 128_000;
-      case "gpt-4":
-        return 8_192;
-      case "gpt-4-32k":
-        return 32_000;
-      default:
-        return 4_096; // assume a fine-tune 3.5?
-    }
+    return MODEL_MAP.openai[this.model] ?? 4_096;
   }
 
   // Short circuit if name has 'gpt' since we now fetch models from OpenAI API