θΏ™ζ˜―indexlocζδΎ›ηš„ζœεŠ‘οΌŒδΈθ¦θΎ“ε…₯任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 6 additions & 18 deletions server/utils/AiProviders/anthropic/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const {
clientAbortedHandler,
} = require("../../helpers/chat/responses");
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const { MODEL_MAP } = require("../modelMap");

class AnthropicLLM {
constructor(embedder = null, modelPreference = null) {
Expand Down Expand Up @@ -32,25 +33,12 @@ class AnthropicLLM {
return "streamGetChatCompletion" in this;
}

static promptWindowLimit(modelName) {
return MODEL_MAP.anthropic[modelName] ?? 100_000;
}

promptWindowLimit() {
switch (this.model) {
case "claude-instant-1.2":
return 100_000;
case "claude-2.0":
return 100_000;
case "claude-2.1":
return 200_000;
case "claude-3-opus-20240229":
return 200_000;
case "claude-3-sonnet-20240229":
return 200_000;
case "claude-3-haiku-20240307":
return 200_000;
case "claude-3-5-sonnet-20240620":
return 200_000;
default:
return 100_000; // assume a claude-instant-1.2 model
}
return MODEL_MAP.anthropic[this.model] ?? 100_000;
}

isValidChatCompletionModel(modelName = "") {
Expand Down
6 changes: 6 additions & 0 deletions server/utils/AiProviders/azureOpenAi/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ class AzureOpenAiLLM {
return "streamGetChatCompletion" in this;
}

static promptWindowLimit(_modelName) {
return !!process.env.AZURE_OPENAI_TOKEN_LIMIT
? Number(process.env.AZURE_OPENAI_TOKEN_LIMIT)
: 4096;
}

// Sure the user selected a proper value for the token limit
// could be any of these https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#gpt-4-models
// and if undefined - assume it is the lowest end.
Expand Down
7 changes: 7 additions & 0 deletions server/utils/AiProviders/bedrock/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,13 @@ class AWSBedrockLLM {
return "streamGetChatCompletion" in this;
}

static promptWindowLimit(_modelName) {
const limit = process.env.AWS_BEDROCK_LLM_MODEL_TOKEN_LIMIT || 8191;
if (!limit || isNaN(Number(limit)))
throw new Error("No valid token context limit was set.");
return Number(limit);
}

// Ensure the user set a value for the token limit
// and if undefined - assume 4096 window.
promptWindowLimit() {
Expand Down
22 changes: 6 additions & 16 deletions server/utils/AiProviders/cohere/index.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
const { v4 } = require("uuid");
const { writeResponseChunk } = require("../../helpers/chat/responses");
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const { MODEL_MAP } = require("../modelMap");

class CohereLLM {
constructor(embedder = null) {
Expand Down Expand Up @@ -58,23 +59,12 @@ class CohereLLM {
return "streamGetChatCompletion" in this;
}

static promptWindowLimit(modelName) {
return MODEL_MAP.cohere[modelName] ?? 4_096;
}

promptWindowLimit() {
switch (this.model) {
case "command-r":
return 128_000;
case "command-r-plus":
return 128_000;
case "command":
return 4_096;
case "command-light":
return 4_096;
case "command-nightly":
return 8_192;
case "command-light-nightly":
return 8_192;
default:
return 4_096;
}
return MODEL_MAP.cohere[this.model] ?? 4_096;
}

async isValidChatCompletionModel(model = "") {
Expand Down
20 changes: 6 additions & 14 deletions server/utils/AiProviders/gemini/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ const {
writeResponseChunk,
clientAbortedHandler,
} = require("../../helpers/chat/responses");
const { MODEL_MAP } = require("../modelMap");

class GeminiLLM {
constructor(embedder = null, modelPreference = null) {
Expand Down Expand Up @@ -89,21 +90,12 @@ class GeminiLLM {
return "streamGetChatCompletion" in this;
}

static promptWindowLimit(modelName) {
return MODEL_MAP.gemini[modelName] ?? 30_720;
}

promptWindowLimit() {
switch (this.model) {
case "gemini-pro":
return 30_720;
case "gemini-1.0-pro":
return 30_720;
case "gemini-1.5-flash-latest":
return 1_048_576;
case "gemini-1.5-pro-latest":
return 2_097_152;
case "gemini-1.5-pro-exp-0801":
return 2_097_152;
default:
return 30_720; // assume a gemini-pro model
}
return MODEL_MAP.gemini[this.model] ?? 30_720;
}

isValidChatCompletionModel(modelName = "") {
Expand Down
7 changes: 7 additions & 0 deletions server/utils/AiProviders/genericOpenAi/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@ class GenericOpenAiLLM {
return "streamGetChatCompletion" in this;
}

static promptWindowLimit(_modelName) {
const limit = process.env.GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT || 4096;
if (!limit || isNaN(Number(limit)))
throw new Error("No token context limit was set.");
return Number(limit);
}

// Ensure the user set a value for the token limit
// and if undefined - assume 4096 window.
promptWindowLimit() {
Expand Down
20 changes: 6 additions & 14 deletions server/utils/AiProviders/groq/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const {
handleDefaultStreamResponseV2,
} = require("../../helpers/chat/responses");
const { MODEL_MAP } = require("../modelMap");

class GroqLLM {
constructor(embedder = null, modelPreference = null) {
Expand Down Expand Up @@ -40,21 +41,12 @@ class GroqLLM {
return "streamGetChatCompletion" in this;
}

static promptWindowLimit(modelName) {
return MODEL_MAP.groq[modelName] ?? 8192;
}

promptWindowLimit() {
switch (this.model) {
case "gemma2-9b-it":
case "gemma-7b-it":
case "llama3-70b-8192":
case "llama3-8b-8192":
return 8192;
case "llama-3.1-70b-versatile":
case "llama-3.1-8b-instant":
return 8000;
case "mixtral-8x7b-32768":
return 32768;
default:
return 8192;
}
return MODEL_MAP.groq[this.model] ?? 8192;
}

async isValidChatCompletionModel(modelName = "") {
Expand Down
7 changes: 7 additions & 0 deletions server/utils/AiProviders/huggingface/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ class HuggingFaceLLM {
return "streamGetChatCompletion" in this;
}

static promptWindowLimit(_modelName) {
const limit = process.env.HUGGING_FACE_LLM_TOKEN_LIMIT || 4096;
if (!limit || isNaN(Number(limit)))
throw new Error("No HuggingFace token context limit was set.");
return Number(limit);
}

promptWindowLimit() {
const limit = process.env.HUGGING_FACE_LLM_TOKEN_LIMIT || 4096;
if (!limit || isNaN(Number(limit)))
Expand Down
7 changes: 7 additions & 0 deletions server/utils/AiProviders/koboldCPP/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,13 @@ class KoboldCPPLLM {
return "streamGetChatCompletion" in this;
}

static promptWindowLimit(_modelName) {
const limit = process.env.KOBOLD_CPP_MODEL_TOKEN_LIMIT || 4096;
if (!limit || isNaN(Number(limit)))
throw new Error("No token context limit was set.");
return Number(limit);
}

// Ensure the user set a value for the token limit
// and if undefined - assume 4096 window.
promptWindowLimit() {
Expand Down
7 changes: 7 additions & 0 deletions server/utils/AiProviders/liteLLM/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ class LiteLLM {
return "streamGetChatCompletion" in this;
}

static promptWindowLimit(_modelName) {
const limit = process.env.LITE_LLM_MODEL_TOKEN_LIMIT || 4096;
if (!limit || isNaN(Number(limit)))
throw new Error("No token context limit was set.");
return Number(limit);
}

// Ensure the user set a value for the token limit
// and if undefined - assume 4096 window.
promptWindowLimit() {
Expand Down
7 changes: 7 additions & 0 deletions server/utils/AiProviders/lmStudio/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ class LMStudioLLM {
return "streamGetChatCompletion" in this;
}

static promptWindowLimit(_modelName) {
const limit = process.env.LMSTUDIO_MODEL_TOKEN_LIMIT || 4096;
if (!limit || isNaN(Number(limit)))
throw new Error("No LMStudio token context limit was set.");
return Number(limit);
}

// Ensure the user set a value for the token limit
// and if undefined - assume 4096 window.
promptWindowLimit() {
Expand Down
7 changes: 7 additions & 0 deletions server/utils/AiProviders/localAi/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,13 @@ class LocalAiLLM {
return "streamGetChatCompletion" in this;
}

static promptWindowLimit(_modelName) {
const limit = process.env.LOCAL_AI_MODEL_TOKEN_LIMIT || 4096;
if (!limit || isNaN(Number(limit)))
throw new Error("No LocalAi token context limit was set.");
return Number(limit);
}

// Ensure the user set a value for the token limit
// and if undefined - assume 4096 window.
promptWindowLimit() {
Expand Down
4 changes: 4 additions & 0 deletions server/utils/AiProviders/mistral/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ class MistralLLM {
return "streamGetChatCompletion" in this;
}

static promptWindowLimit() {
return 32000;
}

promptWindowLimit() {
return 32000;
}
Expand Down
55 changes: 55 additions & 0 deletions server/utils/AiProviders/modelMap.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/**
* The model name and context window for all know model windows
* that are available through providers which has discrete model options.
*/
const MODEL_MAP = {
anthropic: {
"claude-instant-1.2": 100_000,
"claude-2.0": 100_000,
"claude-2.1": 200_000,
"claude-3-opus-20240229": 200_000,
"claude-3-sonnet-20240229": 200_000,
"claude-3-haiku-20240307": 200_000,
"claude-3-5-sonnet-20240620": 200_000,
},
cohere: {
"command-r": 128_000,
"command-r-plus": 128_000,
command: 4_096,
"command-light": 4_096,
"command-nightly": 8_192,
"command-light-nightly": 8_192,
},
gemini: {
"gemini-pro": 30_720,
"gemini-1.0-pro": 30_720,
"gemini-1.5-flash-latest": 1_048_576,
"gemini-1.5-pro-latest": 2_097_152,
"gemini-1.5-pro-exp-0801": 2_097_152,
},
groq: {
"gemma2-9b-it": 8192,
"gemma-7b-it": 8192,
"llama3-70b-8192": 8192,
"llama3-8b-8192": 8192,
"llama-3.1-70b-versatile": 8000,
"llama-3.1-8b-instant": 8000,
"mixtral-8x7b-32768": 32768,
},
openai: {
"gpt-3.5-turbo": 16_385,
"gpt-3.5-turbo-1106": 16_385,
"gpt-4o": 128_000,
"gpt-4o-2024-08-06": 128_000,
"gpt-4o-2024-05-13": 128_000,
"gpt-4o-mini": 128_000,
"gpt-4o-mini-2024-07-18": 128_000,
"gpt-4-turbo": 128_000,
"gpt-4-1106-preview": 128_000,
"gpt-4-turbo-preview": 128_000,
"gpt-4": 8_192,
"gpt-4-32k": 32_000,
},
};

module.exports = { MODEL_MAP };
7 changes: 7 additions & 0 deletions server/utils/AiProviders/native/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,13 @@ class NativeLLM {
return "streamGetChatCompletion" in this;
}

static promptWindowLimit(_modelName) {
const limit = process.env.NATIVE_LLM_MODEL_TOKEN_LIMIT || 4096;
if (!limit || isNaN(Number(limit)))
throw new Error("No NativeAI token context limit was set.");
return Number(limit);
}

// Ensure the user set a value for the token limit
promptWindowLimit() {
const limit = process.env.NATIVE_LLM_MODEL_TOKEN_LIMIT || 4096;
Expand Down
7 changes: 7 additions & 0 deletions server/utils/AiProviders/ollama/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,13 @@ class OllamaAILLM {
return "streamGetChatCompletion" in this;
}

static promptWindowLimit(_modelName) {
const limit = process.env.OLLAMA_MODEL_TOKEN_LIMIT || 4096;
if (!limit || isNaN(Number(limit)))
throw new Error("No Ollama token context limit was set.");
return Number(limit);
}

// Ensure the user set a value for the token limit
// and if undefined - assume 4096 window.
promptWindowLimit() {
Expand Down
26 changes: 6 additions & 20 deletions server/utils/AiProviders/openAi/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const {
handleDefaultStreamResponseV2,
} = require("../../helpers/chat/responses");
const { MODEL_MAP } = require("../modelMap");

class OpenAiLLM {
constructor(embedder = null, modelPreference = null) {
Expand Down Expand Up @@ -38,27 +39,12 @@ class OpenAiLLM {
return "streamGetChatCompletion" in this;
}

static promptWindowLimit(modelName) {
return MODEL_MAP.openai[modelName] ?? 4_096;
}

promptWindowLimit() {
switch (this.model) {
case "gpt-3.5-turbo":
case "gpt-3.5-turbo-1106":
return 16_385;
case "gpt-4o":
case "gpt-4o-2024-08-06":
case "gpt-4o-2024-05-13":
case "gpt-4o-mini":
case "gpt-4o-mini-2024-07-18":
case "gpt-4-turbo":
case "gpt-4-1106-preview":
case "gpt-4-turbo-preview":
return 128_000;
case "gpt-4":
return 8_192;
case "gpt-4-32k":
return 32_000;
default:
return 4_096; // assume a fine-tune 3.5?
}
return MODEL_MAP.openai[this.model] ?? 4_096;
}

// Short circuit if name has 'gpt' since we now fetch models from OpenAI API
Expand Down
Loading