From 076cb844fc0b13e2797dfd309cdb756bdca6dc66 Mon Sep 17 00:00:00 2001 From: timothycarambat Date: Tue, 13 May 2025 16:23:23 -0700 Subject: [PATCH 1/5] Add context window finder from litellm maintained list apply to all cloud providers, have client cache for 3 days --- server/storage/models/.gitignore | 1 + server/utils/AiProviders/anthropic/index.js | 4 +- server/utils/AiProviders/cohere/index.js | 4 +- server/utils/AiProviders/deepseek/index.js | 6 +- .../utils/AiProviders/gemini/defaultModels.js | 15 +- server/utils/AiProviders/gemini/index.js | 12 +- .../AiProviders/gemini/syncStaticLists.mjs | 31 ---- server/utils/AiProviders/groq/index.js | 4 +- server/utils/AiProviders/modelMap/index.js | 140 ++++++++++++++++++ .../{modelMap.js => modelMap/legacy.js} | 13 +- server/utils/AiProviders/openAi/index.js | 9 +- server/utils/AiProviders/xai/index.js | 6 +- 12 files changed, 177 insertions(+), 68 deletions(-) create mode 100644 server/utils/AiProviders/modelMap/index.js rename server/utils/AiProviders/{modelMap.js => modelMap/legacy.js} (90%) diff --git a/server/storage/models/.gitignore b/server/storage/models/.gitignore index f5c78ac877c..5e83df7bcc4 100644 --- a/server/storage/models/.gitignore +++ b/server/storage/models/.gitignore @@ -9,3 +9,4 @@ gemini togetherAi tesseract ppio +context-windows/* \ No newline at end of file diff --git a/server/utils/AiProviders/anthropic/index.js b/server/utils/AiProviders/anthropic/index.js index c4f53acf639..450b376b3e0 100644 --- a/server/utils/AiProviders/anthropic/index.js +++ b/server/utils/AiProviders/anthropic/index.js @@ -45,11 +45,11 @@ class AnthropicLLM { } static promptWindowLimit(modelName) { - return MODEL_MAP.anthropic[modelName] ?? 100_000; + return MODEL_MAP.get("anthropic", modelName) ?? 100_000; } promptWindowLimit() { - return MODEL_MAP.anthropic[this.model] ?? 100_000; + return MODEL_MAP.get("anthropic", this.model) ?? 100_000; } isValidChatCompletionModel(_modelName = "") { diff --git a/server/utils/AiProviders/cohere/index.js b/server/utils/AiProviders/cohere/index.js index 33b65df21c0..a6647f5b658 100644 --- a/server/utils/AiProviders/cohere/index.js +++ b/server/utils/AiProviders/cohere/index.js @@ -63,11 +63,11 @@ class CohereLLM { } static promptWindowLimit(modelName) { - return MODEL_MAP.cohere[modelName] ?? 4_096; + return MODEL_MAP.get("cohere", modelName) ?? 4_096; } promptWindowLimit() { - return MODEL_MAP.cohere[this.model] ?? 4_096; + return MODEL_MAP.get("cohere", this.model) ?? 4_096; } async isValidChatCompletionModel(model = "") { diff --git a/server/utils/AiProviders/deepseek/index.js b/server/utils/AiProviders/deepseek/index.js index b91332a84a1..ddb2de317c7 100644 --- a/server/utils/AiProviders/deepseek/index.js +++ b/server/utils/AiProviders/deepseek/index.js @@ -29,7 +29,7 @@ class DeepSeekLLM { this.embedder = embedder ?? new NativeEmbedder(); this.defaultTemp = 0.7; - this.log("Initialized with model:", this.model); + this.log(`Initialized ${this.model} with context window ${this.promptWindowLimit()}`); } log(text, ...args) { @@ -53,11 +53,11 @@ class DeepSeekLLM { } static promptWindowLimit(modelName) { - return MODEL_MAP.deepseek[modelName] ?? 8192; + return MODEL_MAP.get("deepseek", modelName) ?? 8192; } promptWindowLimit() { - return MODEL_MAP.deepseek[this.model] ?? 8192; + return MODEL_MAP.get("deepseek", this.model) ?? 8192; } async isValidChatCompletionModel(modelName = "") { diff --git a/server/utils/AiProviders/gemini/defaultModels.js b/server/utils/AiProviders/gemini/defaultModels.js index fde63341d2a..4a52dc99c65 100644 --- a/server/utils/AiProviders/gemini/defaultModels.js +++ b/server/utils/AiProviders/gemini/defaultModels.js @@ -1,7 +1,7 @@ const { MODEL_MAP } = require("../modelMap"); const stableModels = [ - // %STABLE_MODELS% - updated 2025-04-07T20:29:49.276Z + // %STABLE_MODELS% - updated 2025-05-13T23:13:58.920Z "gemini-1.5-pro-001", "gemini-1.5-pro-002", "gemini-1.5-pro", @@ -14,6 +14,7 @@ const stableModels = [ "gemini-2.0-flash-001", "gemini-2.0-flash-lite-001", "gemini-2.0-flash-lite", + "gemini-2.0-flash-preview-image-generation", // %EOC_STABLE_MODELS% ]; @@ -22,7 +23,7 @@ const stableModels = [ // generally, v1beta models have `exp` in the name, but not always // so we check for both against a static list as well via API. const v1BetaModels = [ - // %V1BETA_MODELS% - updated 2025-04-07T20:29:49.276Z + // %V1BETA_MODELS% - updated 2025-05-13T23:13:58.920Z "gemini-1.5-pro-latest", "gemini-1.5-flash-latest", "gemini-1.5-flash-8b-latest", @@ -30,6 +31,9 @@ const v1BetaModels = [ "gemini-1.5-flash-8b-exp-0924", "gemini-2.5-pro-exp-03-25", "gemini-2.5-pro-preview-03-25", + "gemini-2.5-flash-preview-04-17", + "gemini-2.5-flash-preview-04-17-thinking", + "gemini-2.5-pro-preview-05-06", "gemini-2.0-flash-exp", "gemini-2.0-flash-exp-image-generation", "gemini-2.0-flash-lite-preview-02-05", @@ -41,6 +45,7 @@ const v1BetaModels = [ "gemini-2.0-flash-thinking-exp", "gemini-2.0-flash-thinking-exp-1219", "learnlm-1.5-pro-experimental", + "learnlm-2.0-flash-experimental", "gemma-3-1b-it", "gemma-3-4b-it", "gemma-3-12b-it", @@ -48,17 +53,17 @@ const v1BetaModels = [ // %EOC_V1BETA_MODELS% ]; -const defaultGeminiModels = [ +const defaultGeminiModels = () => [ ...stableModels.map((model) => ({ id: model, name: model, - contextWindow: MODEL_MAP.gemini[model], + contextWindow: MODEL_MAP.get("gemini", model), experimental: false, })), ...v1BetaModels.map((model) => ({ id: model, name: model, - contextWindow: MODEL_MAP.gemini[model], + contextWindow: MODEL_MAP.get("gemini", model), experimental: true, })), ]; diff --git a/server/utils/AiProviders/gemini/index.js b/server/utils/AiProviders/gemini/index.js index bd2268e5174..393f0cd5227 100644 --- a/server/utils/AiProviders/gemini/index.js +++ b/server/utils/AiProviders/gemini/index.js @@ -107,7 +107,7 @@ class GeminiLLM { try { const cacheModelPath = path.resolve(cacheFolder, "models.json"); if (!fs.existsSync(cacheModelPath)) - return MODEL_MAP.gemini[modelName] ?? 30_720; + return MODEL_MAP.get("gemini", modelName) ?? 30_720; const models = safeJsonParse(fs.readFileSync(cacheModelPath)); const model = models.find((model) => model.id === modelName); @@ -118,15 +118,13 @@ class GeminiLLM { return model.contextWindow; } catch (e) { console.error(`GeminiLLM:promptWindowLimit`, e.message); - return MODEL_MAP.gemini[modelName] ?? 30_720; + return MODEL_MAP.get("gemini", modelName) ?? 30_720; } } promptWindowLimit() { try { - if (!fs.existsSync(this.cacheModelPath)) - return MODEL_MAP.gemini[this.model] ?? 30_720; - + if (!fs.existsSync(this.cacheModelPath)) return MODEL_MAP.get("gemini", this.model) ?? 30_720; const models = safeJsonParse(fs.readFileSync(this.cacheModelPath)); const model = models.find((model) => model.id === this.model); if (!model) @@ -136,7 +134,7 @@ class GeminiLLM { return model.contextWindow; } catch (e) { console.error(`GeminiLLM:promptWindowLimit`, e.message); - return MODEL_MAP.gemini[this.model] ?? 30_720; + return MODEL_MAP.get("gemini", this.model) ?? 30_720; } } @@ -281,7 +279,7 @@ class GeminiLLM { if (allModels.length === 0) { console.error(`Gemini:getGeminiModels - No models found`); - return defaultGeminiModels; + return defaultGeminiModels(); } console.log( diff --git a/server/utils/AiProviders/gemini/syncStaticLists.mjs b/server/utils/AiProviders/gemini/syncStaticLists.mjs index 42382fae388..b276995acdf 100644 --- a/server/utils/AiProviders/gemini/syncStaticLists.mjs +++ b/server/utils/AiProviders/gemini/syncStaticLists.mjs @@ -9,7 +9,6 @@ import fs from "fs"; import path from "path"; import dotenv from "dotenv"; -import { MODEL_MAP } from "../modelMap.js"; dotenv.config({ path: `../../../.env.development` }); const existingCachePath = path.resolve('../../../storage/models/gemini') @@ -46,34 +45,4 @@ function updateDefaultModelsFile(models) { fs.writeFileSync(path.join("./defaultModels.js"), defaultModelFileContents); console.log("Updated defaultModels.js. Dont forget to `yarn lint` and commit!"); } - -function updateModelMap(models) { - const existingModelMap = MODEL_MAP; - console.log('Updating modelMap.js `gemini` object...') - console.log(`Removed existing gemini object (${Object.keys(existingModelMap.gemini).length} models) from modelMap.js`); - existingModelMap.gemini = {}; - - for (const model of models) existingModelMap.gemini[model.id] = model.contextWindow; - console.log(`Updated modelMap.js 'gemini' object with ${Object.keys(existingModelMap.gemini).length} models from API`); - - // Update the modelMap.js file - const contents = `/** - * The model name and context window for all know model windows - * that are available through providers which has discrete model options. - * This file is automatically generated by syncStaticLists.mjs - * and should not be edited manually. - * - * Last updated: ${new Date().toISOString()} - */ -const MODEL_MAP = { - ${Object.entries(existingModelMap).map(([key, value]) => `${key}: ${JSON.stringify(value, null, 2)}`).join(',\n')} -}; - -module.exports = { MODEL_MAP }; -`; - fs.writeFileSync(path.resolve("../modelMap.js"), contents); - console.log('Updated modelMap.js `gemini` object. Dont forget to `yarn lint` and commit!'); -} - updateDefaultModelsFile(models); -updateModelMap(models); diff --git a/server/utils/AiProviders/groq/index.js b/server/utils/AiProviders/groq/index.js index 9e7e77fa16b..8eddefab0c5 100644 --- a/server/utils/AiProviders/groq/index.js +++ b/server/utils/AiProviders/groq/index.js @@ -49,11 +49,11 @@ class GroqLLM { } static promptWindowLimit(modelName) { - return MODEL_MAP.groq[modelName] ?? 8192; + return MODEL_MAP.get("groq", modelName) ?? 8192; } promptWindowLimit() { - return MODEL_MAP.groq[this.model] ?? 8192; + return MODEL_MAP.get("groq", this.model) ?? 8192; } async isValidChatCompletionModel(modelName = "") { diff --git a/server/utils/AiProviders/modelMap/index.js b/server/utils/AiProviders/modelMap/index.js new file mode 100644 index 00000000000..dbf0aff6a60 --- /dev/null +++ b/server/utils/AiProviders/modelMap/index.js @@ -0,0 +1,140 @@ +const path = require("path"); +const fs = require("fs"); +const LEGACY_MODEL_MAP = require("./legacy"); + +class ContextWindowFinder { + static instance = null; + static modelMap = LEGACY_MODEL_MAP; + + /** + * Mapping for AnythingLLM provider <> LiteLLM provider + * @type {Record} + */ + static trackedProviders = { + anthropic: "anthropic", + openai: "openai", + cohere: "cohere_chat", + gemini: "vertex_ai-language-models", + groq: "groq", + xai: "xai", + deepseek: "deepseek", + }; + static expiryMs = 1000 * 60 * 60 * 24 * 3; // 3 days + static remoteUrl = + "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"; + + cacheLocation = path.resolve( + process.env.STORAGE_DIR + ? path.resolve(process.env.STORAGE_DIR, "models", "context-windows") + : path.resolve(__dirname, `../../../storage/models/context-windows`) + ); + cacheFilePath = path.resolve(this.cacheLocation, "context-windows.json"); + cacheFileExpiryPath = path.resolve(this.cacheLocation, ".cached_at"); + + constructor() { + if (ContextWindowFinder.instance) return ContextWindowFinder.instance; + ContextWindowFinder.instance = this; + if (!fs.existsSync(this.cacheLocation)) + fs.mkdirSync(this.cacheLocation, { recursive: true }); + this.#pullRemoteModelMap(); + } + + log(text, ...args) { + console.log(`\x1b[33m[ContextWindowFinder]\x1b[0m ${text}`, ...args); + } + + /** + * Checks if the cache is stale by checking if the cache file exists and if the cache file is older than the expiry time. + * @returns {boolean} + */ + get isCacheStale() { + if (!fs.existsSync(this.cacheFileExpiryPath)) return true; + const cachedAt = fs.readFileSync(this.cacheFileExpiryPath, "utf8"); + return Date.now() - cachedAt > ContextWindowFinder.expiryMs; + } + + get cache() { + if (!fs.existsSync(this.cacheFileExpiryPath)) return null; + if (!this.isCacheStale) + return JSON.parse( + fs.readFileSync(this.cacheFilePath, { encoding: "utf8" }) + ); + return null; + } + + /** + * Pulls the remote model map from the remote URL, formats it and caches it. + * @returns {Record>} - The formatted model map + */ + async #pullRemoteModelMap() { + const remoteContexWindowMap = await fetch(ContextWindowFinder.remoteUrl) + .then((res) => res.json()) + .then((data) => { + fs.writeFileSync(this.cacheFilePath, JSON.stringify(data, null, 2)); + fs.writeFileSync(this.cacheFileExpiryPath, Date.now().toString()); + this.log("Remote model map synced and cached"); + return data; + }) + .catch((error) => { + this.log("Error syncing remote model map", error); + return null; + }); + if (!remoteContexWindowMap) return null; + + const modelMap = this.#formatModelMap(remoteContexWindowMap); + fs.writeFileSync(this.cacheFilePath, JSON.stringify(modelMap, null, 2)); + fs.writeFileSync(this.cacheFileExpiryPath, Date.now().toString()); + return modelMap; + } + + /** + * Formats the remote model map to a format that is compatible with how we store the model map + * for all providers who use it. + * @param {Record} modelMap - The remote model map + * @returns {Record>} - The formatted model map + */ + #formatModelMap(modelMap = {}) { + const formattedModelMap = {}; + + for (const [provider, liteLLMProviderTag] of Object.entries( + ContextWindowFinder.trackedProviders + )) { + formattedModelMap[provider] = {}; + const matches = Object.entries(modelMap).filter( + ([_key, config]) => config.litellm_provider === liteLLMProviderTag + ); + for (const [key, config] of matches) { + const contextWindow = Number(config.max_input_tokens); + if (isNaN(contextWindow)) continue; + + // Some models have a provider/model-tag format, so we need to get the last part since we dont do paths + // for names with the exception of some router-providers like OpenRouter or Together. + const modelName = key.split("/").pop(); + formattedModelMap[provider][modelName] = contextWindow; + } + } + return formattedModelMap; + } + + /** + * Gets the context window for a given provider and model. + * @param {string} provider - The provider to get the context window for + * @param {string} model - The model to get the context window for + * @returns {number} - The context window for the given provider and model + */ + get(provider = null, model = null) { + if (!provider || !this.cache || !this.cache[provider]) return null; + if (!model) return this.cache[provider]; + const modelContextWindow = this.cache[provider][model]; + if (!modelContextWindow) { + this.log("Invalid access to model context window - not found in cache", { + provider, + model, + }); + return null; + } + return Number(modelContextWindow); + } +} + +module.exports = { MODEL_MAP: new ContextWindowFinder() }; diff --git a/server/utils/AiProviders/modelMap.js b/server/utils/AiProviders/modelMap/legacy.js similarity index 90% rename from server/utils/AiProviders/modelMap.js rename to server/utils/AiProviders/modelMap/legacy.js index a80400c30b4..2faf99dc237 100644 --- a/server/utils/AiProviders/modelMap.js +++ b/server/utils/AiProviders/modelMap/legacy.js @@ -1,12 +1,4 @@ -/** - * The model name and context window for all know model windows - * that are available through providers which has discrete model options. - * This file is automatically generated by syncStaticLists.mjs - * and should not be edited manually. - * - * Last updated: 2025-04-07T20:29:49.277Z - */ -const MODEL_MAP = { +const LEGACY_MODEL_MAP = { anthropic: { "claude-instant-1.2": 100000, "claude-2.0": 100000, @@ -117,5 +109,4 @@ const MODEL_MAP = { "grok-beta": 131072, }, }; - -module.exports = { MODEL_MAP }; +module.exports = LEGACY_MODEL_MAP; diff --git a/server/utils/AiProviders/openAi/index.js b/server/utils/AiProviders/openAi/index.js index 2e5996c939a..586532ce8dd 100644 --- a/server/utils/AiProviders/openAi/index.js +++ b/server/utils/AiProviders/openAi/index.js @@ -25,6 +25,11 @@ class OpenAiLLM { this.embedder = embedder ?? new NativeEmbedder(); this.defaultTemp = 0.7; + this.log(`Initialized ${this.model} with context window ${this.promptWindowLimit()}`); + } + + log(text, ...args) { + console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args); } /** @@ -54,11 +59,11 @@ class OpenAiLLM { } static promptWindowLimit(modelName) { - return MODEL_MAP.openai[modelName] ?? 4_096; + return MODEL_MAP.get("openai", modelName) ?? 4_096; } promptWindowLimit() { - return MODEL_MAP.openai[this.model] ?? 4_096; + return MODEL_MAP.get("openai", this.model) ?? 4_096; } // Short circuit if name has 'gpt' since we now fetch models from OpenAI API diff --git a/server/utils/AiProviders/xai/index.js b/server/utils/AiProviders/xai/index.js index 2319e72206e..ad0a7e5eecf 100644 --- a/server/utils/AiProviders/xai/index.js +++ b/server/utils/AiProviders/xai/index.js @@ -28,7 +28,7 @@ class XAiLLM { this.embedder = embedder ?? new NativeEmbedder(); this.defaultTemp = 0.7; - this.log("Initialized with model:", this.model); + this.log(`Initialized ${this.model} with context window ${this.promptWindowLimit()}`); } log(text, ...args) { @@ -52,11 +52,11 @@ class XAiLLM { } static promptWindowLimit(modelName) { - return MODEL_MAP.xai[modelName] ?? 131_072; + return MODEL_MAP.get("xai", modelName) ?? 131_072; } promptWindowLimit() { - return MODEL_MAP.xai[this.model] ?? 131_072; + return MODEL_MAP.get("xai", this.model) ?? 131_072; } isValidChatCompletionModel(_modelName = "") { From 36722ee9fd3dac8c41c651a8f4fd78af310e10e8 Mon Sep 17 00:00:00 2001 From: timothycarambat Date: Wed, 14 May 2025 10:27:53 -0700 Subject: [PATCH 2/5] docker container bootup warning --- .github/workflows/dev-build.yaml | 2 +- docker/docker-entrypoint.sh | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/.github/workflows/dev-build.yaml b/.github/workflows/dev-build.yaml index e4ff3689ce1..0e2e15c33b6 100644 --- a/.github/workflows/dev-build.yaml +++ b/.github/workflows/dev-build.yaml @@ -6,7 +6,7 @@ concurrency: on: push: - branches: ['873-pgvector-support'] # put your current branch to create a build. Core team only. + branches: ['warn-bad-docker-command'] # put your current branch to create a build. Core team only. paths-ignore: - '**.md' - 'cloud-deployments/*' diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh index 1ac69e5baf4..dc2faa59173 100755 --- a/docker/docker-entrypoint.sh +++ b/docker/docker-entrypoint.sh @@ -1,4 +1,17 @@ #!/bin/bash + +# Check if STORAGE_DIR is set +if [ -z "$STORAGE_DIR" ]; then + echo "================================================================" + echo "⚠️ ⚠️ ⚠️ WARNING: STORAGE_DIR environment variable is not set! ⚠️ ⚠️ ⚠️" + echo "Not setting this will result in data loss on container restart since" + echo "the application will not have a persistent storage location." + echo "Please run the container with the official docker command at" + echo "https://docs.anythingllm.com/installation-docker/quickstart" + echo "⚠️ ⚠️ ⚠️ WARNING: STORAGE_DIR environment variable is not set! ⚠️ ⚠️ ⚠️" + echo "================================================================" +fi + { cd /app/server/ && npx prisma generate --schema=./prisma/schema.prisma && @@ -7,4 +20,4 @@ } & { node /app/collector/index.js; } & wait -n -exit $? +exit $? \ No newline at end of file From 5006031a2a34be5cf6a8d8f0ce24ed60ed0c824b Mon Sep 17 00:00:00 2001 From: timothycarambat Date: Wed, 14 May 2025 10:40:05 -0700 Subject: [PATCH 3/5] update invalid ENV warning --- docker/docker-entrypoint.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh index dc2faa59173..9c47de25c4e 100755 --- a/docker/docker-entrypoint.sh +++ b/docker/docker-entrypoint.sh @@ -6,6 +6,8 @@ if [ -z "$STORAGE_DIR" ]; then echo "⚠️ ⚠️ ⚠️ WARNING: STORAGE_DIR environment variable is not set! ⚠️ ⚠️ ⚠️" echo "Not setting this will result in data loss on container restart since" echo "the application will not have a persistent storage location." + echo "It can also result in weird errors in various parts of the application." + echo "\n\n" echo "Please run the container with the official docker command at" echo "https://docs.anythingllm.com/installation-docker/quickstart" echo "⚠️ ⚠️ ⚠️ WARNING: STORAGE_DIR environment variable is not set! ⚠️ ⚠️ ⚠️" From da3fd0990f8bdd1d17744db48de843459f630bb6 Mon Sep 17 00:00:00 2001 From: timothycarambat Date: Wed, 14 May 2025 10:43:13 -0700 Subject: [PATCH 4/5] rebased with current HEAD --- server/storage/models/.gitignore | 1 - server/utils/AiProviders/anthropic/index.js | 4 +- server/utils/AiProviders/cohere/index.js | 4 +- server/utils/AiProviders/deepseek/index.js | 6 +- .../utils/AiProviders/gemini/defaultModels.js | 15 +- server/utils/AiProviders/gemini/index.js | 12 +- .../AiProviders/gemini/syncStaticLists.mjs | 31 ++++ server/utils/AiProviders/groq/index.js | 4 +- .../{modelMap/legacy.js => modelMap.js} | 13 +- server/utils/AiProviders/modelMap/index.js | 140 ------------------ server/utils/AiProviders/openAi/index.js | 9 +- server/utils/AiProviders/xai/index.js | 6 +- 12 files changed, 68 insertions(+), 177 deletions(-) rename server/utils/AiProviders/{modelMap/legacy.js => modelMap.js} (90%) delete mode 100644 server/utils/AiProviders/modelMap/index.js diff --git a/server/storage/models/.gitignore b/server/storage/models/.gitignore index 5e83df7bcc4..f5c78ac877c 100644 --- a/server/storage/models/.gitignore +++ b/server/storage/models/.gitignore @@ -9,4 +9,3 @@ gemini togetherAi tesseract ppio -context-windows/* \ No newline at end of file diff --git a/server/utils/AiProviders/anthropic/index.js b/server/utils/AiProviders/anthropic/index.js index 450b376b3e0..c4f53acf639 100644 --- a/server/utils/AiProviders/anthropic/index.js +++ b/server/utils/AiProviders/anthropic/index.js @@ -45,11 +45,11 @@ class AnthropicLLM { } static promptWindowLimit(modelName) { - return MODEL_MAP.get("anthropic", modelName) ?? 100_000; + return MODEL_MAP.anthropic[modelName] ?? 100_000; } promptWindowLimit() { - return MODEL_MAP.get("anthropic", this.model) ?? 100_000; + return MODEL_MAP.anthropic[this.model] ?? 100_000; } isValidChatCompletionModel(_modelName = "") { diff --git a/server/utils/AiProviders/cohere/index.js b/server/utils/AiProviders/cohere/index.js index a6647f5b658..33b65df21c0 100644 --- a/server/utils/AiProviders/cohere/index.js +++ b/server/utils/AiProviders/cohere/index.js @@ -63,11 +63,11 @@ class CohereLLM { } static promptWindowLimit(modelName) { - return MODEL_MAP.get("cohere", modelName) ?? 4_096; + return MODEL_MAP.cohere[modelName] ?? 4_096; } promptWindowLimit() { - return MODEL_MAP.get("cohere", this.model) ?? 4_096; + return MODEL_MAP.cohere[this.model] ?? 4_096; } async isValidChatCompletionModel(model = "") { diff --git a/server/utils/AiProviders/deepseek/index.js b/server/utils/AiProviders/deepseek/index.js index ddb2de317c7..b91332a84a1 100644 --- a/server/utils/AiProviders/deepseek/index.js +++ b/server/utils/AiProviders/deepseek/index.js @@ -29,7 +29,7 @@ class DeepSeekLLM { this.embedder = embedder ?? new NativeEmbedder(); this.defaultTemp = 0.7; - this.log(`Initialized ${this.model} with context window ${this.promptWindowLimit()}`); + this.log("Initialized with model:", this.model); } log(text, ...args) { @@ -53,11 +53,11 @@ class DeepSeekLLM { } static promptWindowLimit(modelName) { - return MODEL_MAP.get("deepseek", modelName) ?? 8192; + return MODEL_MAP.deepseek[modelName] ?? 8192; } promptWindowLimit() { - return MODEL_MAP.get("deepseek", this.model) ?? 8192; + return MODEL_MAP.deepseek[this.model] ?? 8192; } async isValidChatCompletionModel(modelName = "") { diff --git a/server/utils/AiProviders/gemini/defaultModels.js b/server/utils/AiProviders/gemini/defaultModels.js index 4a52dc99c65..fde63341d2a 100644 --- a/server/utils/AiProviders/gemini/defaultModels.js +++ b/server/utils/AiProviders/gemini/defaultModels.js @@ -1,7 +1,7 @@ const { MODEL_MAP } = require("../modelMap"); const stableModels = [ - // %STABLE_MODELS% - updated 2025-05-13T23:13:58.920Z + // %STABLE_MODELS% - updated 2025-04-07T20:29:49.276Z "gemini-1.5-pro-001", "gemini-1.5-pro-002", "gemini-1.5-pro", @@ -14,7 +14,6 @@ const stableModels = [ "gemini-2.0-flash-001", "gemini-2.0-flash-lite-001", "gemini-2.0-flash-lite", - "gemini-2.0-flash-preview-image-generation", // %EOC_STABLE_MODELS% ]; @@ -23,7 +22,7 @@ const stableModels = [ // generally, v1beta models have `exp` in the name, but not always // so we check for both against a static list as well via API. const v1BetaModels = [ - // %V1BETA_MODELS% - updated 2025-05-13T23:13:58.920Z + // %V1BETA_MODELS% - updated 2025-04-07T20:29:49.276Z "gemini-1.5-pro-latest", "gemini-1.5-flash-latest", "gemini-1.5-flash-8b-latest", @@ -31,9 +30,6 @@ const v1BetaModels = [ "gemini-1.5-flash-8b-exp-0924", "gemini-2.5-pro-exp-03-25", "gemini-2.5-pro-preview-03-25", - "gemini-2.5-flash-preview-04-17", - "gemini-2.5-flash-preview-04-17-thinking", - "gemini-2.5-pro-preview-05-06", "gemini-2.0-flash-exp", "gemini-2.0-flash-exp-image-generation", "gemini-2.0-flash-lite-preview-02-05", @@ -45,7 +41,6 @@ const v1BetaModels = [ "gemini-2.0-flash-thinking-exp", "gemini-2.0-flash-thinking-exp-1219", "learnlm-1.5-pro-experimental", - "learnlm-2.0-flash-experimental", "gemma-3-1b-it", "gemma-3-4b-it", "gemma-3-12b-it", @@ -53,17 +48,17 @@ const v1BetaModels = [ // %EOC_V1BETA_MODELS% ]; -const defaultGeminiModels = () => [ +const defaultGeminiModels = [ ...stableModels.map((model) => ({ id: model, name: model, - contextWindow: MODEL_MAP.get("gemini", model), + contextWindow: MODEL_MAP.gemini[model], experimental: false, })), ...v1BetaModels.map((model) => ({ id: model, name: model, - contextWindow: MODEL_MAP.get("gemini", model), + contextWindow: MODEL_MAP.gemini[model], experimental: true, })), ]; diff --git a/server/utils/AiProviders/gemini/index.js b/server/utils/AiProviders/gemini/index.js index 393f0cd5227..bd2268e5174 100644 --- a/server/utils/AiProviders/gemini/index.js +++ b/server/utils/AiProviders/gemini/index.js @@ -107,7 +107,7 @@ class GeminiLLM { try { const cacheModelPath = path.resolve(cacheFolder, "models.json"); if (!fs.existsSync(cacheModelPath)) - return MODEL_MAP.get("gemini", modelName) ?? 30_720; + return MODEL_MAP.gemini[modelName] ?? 30_720; const models = safeJsonParse(fs.readFileSync(cacheModelPath)); const model = models.find((model) => model.id === modelName); @@ -118,13 +118,15 @@ class GeminiLLM { return model.contextWindow; } catch (e) { console.error(`GeminiLLM:promptWindowLimit`, e.message); - return MODEL_MAP.get("gemini", modelName) ?? 30_720; + return MODEL_MAP.gemini[modelName] ?? 30_720; } } promptWindowLimit() { try { - if (!fs.existsSync(this.cacheModelPath)) return MODEL_MAP.get("gemini", this.model) ?? 30_720; + if (!fs.existsSync(this.cacheModelPath)) + return MODEL_MAP.gemini[this.model] ?? 30_720; + const models = safeJsonParse(fs.readFileSync(this.cacheModelPath)); const model = models.find((model) => model.id === this.model); if (!model) @@ -134,7 +136,7 @@ class GeminiLLM { return model.contextWindow; } catch (e) { console.error(`GeminiLLM:promptWindowLimit`, e.message); - return MODEL_MAP.get("gemini", this.model) ?? 30_720; + return MODEL_MAP.gemini[this.model] ?? 30_720; } } @@ -279,7 +281,7 @@ class GeminiLLM { if (allModels.length === 0) { console.error(`Gemini:getGeminiModels - No models found`); - return defaultGeminiModels(); + return defaultGeminiModels; } console.log( diff --git a/server/utils/AiProviders/gemini/syncStaticLists.mjs b/server/utils/AiProviders/gemini/syncStaticLists.mjs index b276995acdf..42382fae388 100644 --- a/server/utils/AiProviders/gemini/syncStaticLists.mjs +++ b/server/utils/AiProviders/gemini/syncStaticLists.mjs @@ -9,6 +9,7 @@ import fs from "fs"; import path from "path"; import dotenv from "dotenv"; +import { MODEL_MAP } from "../modelMap.js"; dotenv.config({ path: `../../../.env.development` }); const existingCachePath = path.resolve('../../../storage/models/gemini') @@ -45,4 +46,34 @@ function updateDefaultModelsFile(models) { fs.writeFileSync(path.join("./defaultModels.js"), defaultModelFileContents); console.log("Updated defaultModels.js. Dont forget to `yarn lint` and commit!"); } + +function updateModelMap(models) { + const existingModelMap = MODEL_MAP; + console.log('Updating modelMap.js `gemini` object...') + console.log(`Removed existing gemini object (${Object.keys(existingModelMap.gemini).length} models) from modelMap.js`); + existingModelMap.gemini = {}; + + for (const model of models) existingModelMap.gemini[model.id] = model.contextWindow; + console.log(`Updated modelMap.js 'gemini' object with ${Object.keys(existingModelMap.gemini).length} models from API`); + + // Update the modelMap.js file + const contents = `/** + * The model name and context window for all know model windows + * that are available through providers which has discrete model options. + * This file is automatically generated by syncStaticLists.mjs + * and should not be edited manually. + * + * Last updated: ${new Date().toISOString()} + */ +const MODEL_MAP = { + ${Object.entries(existingModelMap).map(([key, value]) => `${key}: ${JSON.stringify(value, null, 2)}`).join(',\n')} +}; + +module.exports = { MODEL_MAP }; +`; + fs.writeFileSync(path.resolve("../modelMap.js"), contents); + console.log('Updated modelMap.js `gemini` object. Dont forget to `yarn lint` and commit!'); +} + updateDefaultModelsFile(models); +updateModelMap(models); diff --git a/server/utils/AiProviders/groq/index.js b/server/utils/AiProviders/groq/index.js index 8eddefab0c5..9e7e77fa16b 100644 --- a/server/utils/AiProviders/groq/index.js +++ b/server/utils/AiProviders/groq/index.js @@ -49,11 +49,11 @@ class GroqLLM { } static promptWindowLimit(modelName) { - return MODEL_MAP.get("groq", modelName) ?? 8192; + return MODEL_MAP.groq[modelName] ?? 8192; } promptWindowLimit() { - return MODEL_MAP.get("groq", this.model) ?? 8192; + return MODEL_MAP.groq[this.model] ?? 8192; } async isValidChatCompletionModel(modelName = "") { diff --git a/server/utils/AiProviders/modelMap/legacy.js b/server/utils/AiProviders/modelMap.js similarity index 90% rename from server/utils/AiProviders/modelMap/legacy.js rename to server/utils/AiProviders/modelMap.js index 2faf99dc237..a80400c30b4 100644 --- a/server/utils/AiProviders/modelMap/legacy.js +++ b/server/utils/AiProviders/modelMap.js @@ -1,4 +1,12 @@ -const LEGACY_MODEL_MAP = { +/** + * The model name and context window for all know model windows + * that are available through providers which has discrete model options. + * This file is automatically generated by syncStaticLists.mjs + * and should not be edited manually. + * + * Last updated: 2025-04-07T20:29:49.277Z + */ +const MODEL_MAP = { anthropic: { "claude-instant-1.2": 100000, "claude-2.0": 100000, @@ -109,4 +117,5 @@ const LEGACY_MODEL_MAP = { "grok-beta": 131072, }, }; -module.exports = LEGACY_MODEL_MAP; + +module.exports = { MODEL_MAP }; diff --git a/server/utils/AiProviders/modelMap/index.js b/server/utils/AiProviders/modelMap/index.js deleted file mode 100644 index dbf0aff6a60..00000000000 --- a/server/utils/AiProviders/modelMap/index.js +++ /dev/null @@ -1,140 +0,0 @@ -const path = require("path"); -const fs = require("fs"); -const LEGACY_MODEL_MAP = require("./legacy"); - -class ContextWindowFinder { - static instance = null; - static modelMap = LEGACY_MODEL_MAP; - - /** - * Mapping for AnythingLLM provider <> LiteLLM provider - * @type {Record} - */ - static trackedProviders = { - anthropic: "anthropic", - openai: "openai", - cohere: "cohere_chat", - gemini: "vertex_ai-language-models", - groq: "groq", - xai: "xai", - deepseek: "deepseek", - }; - static expiryMs = 1000 * 60 * 60 * 24 * 3; // 3 days - static remoteUrl = - "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"; - - cacheLocation = path.resolve( - process.env.STORAGE_DIR - ? path.resolve(process.env.STORAGE_DIR, "models", "context-windows") - : path.resolve(__dirname, `../../../storage/models/context-windows`) - ); - cacheFilePath = path.resolve(this.cacheLocation, "context-windows.json"); - cacheFileExpiryPath = path.resolve(this.cacheLocation, ".cached_at"); - - constructor() { - if (ContextWindowFinder.instance) return ContextWindowFinder.instance; - ContextWindowFinder.instance = this; - if (!fs.existsSync(this.cacheLocation)) - fs.mkdirSync(this.cacheLocation, { recursive: true }); - this.#pullRemoteModelMap(); - } - - log(text, ...args) { - console.log(`\x1b[33m[ContextWindowFinder]\x1b[0m ${text}`, ...args); - } - - /** - * Checks if the cache is stale by checking if the cache file exists and if the cache file is older than the expiry time. - * @returns {boolean} - */ - get isCacheStale() { - if (!fs.existsSync(this.cacheFileExpiryPath)) return true; - const cachedAt = fs.readFileSync(this.cacheFileExpiryPath, "utf8"); - return Date.now() - cachedAt > ContextWindowFinder.expiryMs; - } - - get cache() { - if (!fs.existsSync(this.cacheFileExpiryPath)) return null; - if (!this.isCacheStale) - return JSON.parse( - fs.readFileSync(this.cacheFilePath, { encoding: "utf8" }) - ); - return null; - } - - /** - * Pulls the remote model map from the remote URL, formats it and caches it. - * @returns {Record>} - The formatted model map - */ - async #pullRemoteModelMap() { - const remoteContexWindowMap = await fetch(ContextWindowFinder.remoteUrl) - .then((res) => res.json()) - .then((data) => { - fs.writeFileSync(this.cacheFilePath, JSON.stringify(data, null, 2)); - fs.writeFileSync(this.cacheFileExpiryPath, Date.now().toString()); - this.log("Remote model map synced and cached"); - return data; - }) - .catch((error) => { - this.log("Error syncing remote model map", error); - return null; - }); - if (!remoteContexWindowMap) return null; - - const modelMap = this.#formatModelMap(remoteContexWindowMap); - fs.writeFileSync(this.cacheFilePath, JSON.stringify(modelMap, null, 2)); - fs.writeFileSync(this.cacheFileExpiryPath, Date.now().toString()); - return modelMap; - } - - /** - * Formats the remote model map to a format that is compatible with how we store the model map - * for all providers who use it. - * @param {Record} modelMap - The remote model map - * @returns {Record>} - The formatted model map - */ - #formatModelMap(modelMap = {}) { - const formattedModelMap = {}; - - for (const [provider, liteLLMProviderTag] of Object.entries( - ContextWindowFinder.trackedProviders - )) { - formattedModelMap[provider] = {}; - const matches = Object.entries(modelMap).filter( - ([_key, config]) => config.litellm_provider === liteLLMProviderTag - ); - for (const [key, config] of matches) { - const contextWindow = Number(config.max_input_tokens); - if (isNaN(contextWindow)) continue; - - // Some models have a provider/model-tag format, so we need to get the last part since we dont do paths - // for names with the exception of some router-providers like OpenRouter or Together. - const modelName = key.split("/").pop(); - formattedModelMap[provider][modelName] = contextWindow; - } - } - return formattedModelMap; - } - - /** - * Gets the context window for a given provider and model. - * @param {string} provider - The provider to get the context window for - * @param {string} model - The model to get the context window for - * @returns {number} - The context window for the given provider and model - */ - get(provider = null, model = null) { - if (!provider || !this.cache || !this.cache[provider]) return null; - if (!model) return this.cache[provider]; - const modelContextWindow = this.cache[provider][model]; - if (!modelContextWindow) { - this.log("Invalid access to model context window - not found in cache", { - provider, - model, - }); - return null; - } - return Number(modelContextWindow); - } -} - -module.exports = { MODEL_MAP: new ContextWindowFinder() }; diff --git a/server/utils/AiProviders/openAi/index.js b/server/utils/AiProviders/openAi/index.js index 586532ce8dd..2e5996c939a 100644 --- a/server/utils/AiProviders/openAi/index.js +++ b/server/utils/AiProviders/openAi/index.js @@ -25,11 +25,6 @@ class OpenAiLLM { this.embedder = embedder ?? new NativeEmbedder(); this.defaultTemp = 0.7; - this.log(`Initialized ${this.model} with context window ${this.promptWindowLimit()}`); - } - - log(text, ...args) { - console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args); } /** @@ -59,11 +54,11 @@ class OpenAiLLM { } static promptWindowLimit(modelName) { - return MODEL_MAP.get("openai", modelName) ?? 4_096; + return MODEL_MAP.openai[modelName] ?? 4_096; } promptWindowLimit() { - return MODEL_MAP.get("openai", this.model) ?? 4_096; + return MODEL_MAP.openai[this.model] ?? 4_096; } // Short circuit if name has 'gpt' since we now fetch models from OpenAI API diff --git a/server/utils/AiProviders/xai/index.js b/server/utils/AiProviders/xai/index.js index ad0a7e5eecf..2319e72206e 100644 --- a/server/utils/AiProviders/xai/index.js +++ b/server/utils/AiProviders/xai/index.js @@ -28,7 +28,7 @@ class XAiLLM { this.embedder = embedder ?? new NativeEmbedder(); this.defaultTemp = 0.7; - this.log(`Initialized ${this.model} with context window ${this.promptWindowLimit()}`); + this.log("Initialized with model:", this.model); } log(text, ...args) { @@ -52,11 +52,11 @@ class XAiLLM { } static promptWindowLimit(modelName) { - return MODEL_MAP.get("xai", modelName) ?? 131_072; + return MODEL_MAP.xai[modelName] ?? 131_072; } promptWindowLimit() { - return MODEL_MAP.get("xai", this.model) ?? 131_072; + return MODEL_MAP.xai[this.model] ?? 131_072; } isValidChatCompletionModel(_modelName = "") { From 00e1c54b4b60ad79b8d8dc3ce27e8e70c4e05262 Mon Sep 17 00:00:00 2001 From: timothycarambat Date: Wed, 14 May 2025 11:00:50 -0700 Subject: [PATCH 5/5] update newline printing --- docker/docker-entrypoint.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh index 9c47de25c4e..80296accc2f 100755 --- a/docker/docker-entrypoint.sh +++ b/docker/docker-entrypoint.sh @@ -4,12 +4,14 @@ if [ -z "$STORAGE_DIR" ]; then echo "================================================================" echo "⚠️ ⚠️ ⚠️ WARNING: STORAGE_DIR environment variable is not set! ⚠️ ⚠️ ⚠️" + echo "" echo "Not setting this will result in data loss on container restart since" echo "the application will not have a persistent storage location." echo "It can also result in weird errors in various parts of the application." - echo "\n\n" + echo "" echo "Please run the container with the official docker command at" echo "https://docs.anythingllm.com/installation-docker/quickstart" + echo "" echo "⚠️ ⚠️ ⚠️ WARNING: STORAGE_DIR environment variable is not set! ⚠️ ⚠️ ⚠️" echo "================================================================" fi