diff --git a/.prettierignore b/.prettierignore index e3b0c14e0e9..1c4725e585b 100644 --- a/.prettierignore +++ b/.prettierignore @@ -10,6 +10,7 @@ frontend/bundleinspector.html #server server/swagger/openapi.json +server/**/*.mjs #embed **/static/** diff --git a/server/utils/AiProviders/gemini/defaultModels.js b/server/utils/AiProviders/gemini/defaultModels.js index 303a0aafff3..fde63341d2a 100644 --- a/server/utils/AiProviders/gemini/defaultModels.js +++ b/server/utils/AiProviders/gemini/defaultModels.js @@ -1,30 +1,53 @@ const { MODEL_MAP } = require("../modelMap"); const stableModels = [ - "gemini-pro", - "gemini-1.0-pro", - "gemini-1.5-pro-latest", - "gemini-1.5-flash-latest", + // %STABLE_MODELS% - updated 2025-04-07T20:29:49.276Z + "gemini-1.5-pro-001", + "gemini-1.5-pro-002", + "gemini-1.5-pro", + "gemini-1.5-flash-001", + "gemini-1.5-flash", + "gemini-1.5-flash-002", + "gemini-1.5-flash-8b", + "gemini-1.5-flash-8b-001", + "gemini-2.0-flash", + "gemini-2.0-flash-001", + "gemini-2.0-flash-lite-001", + "gemini-2.0-flash-lite", + // %EOC_STABLE_MODELS% ]; -const experimentalModels = [ - "gemini-1.5-pro-exp-0801", - "gemini-1.5-pro-exp-0827", - "gemini-1.5-flash-exp-0827", +// There are some models that are only available in the v1beta API +// and some models that are only available in the v1 API +// generally, v1beta models have `exp` in the name, but not always +// so we check for both against a static list as well via API. +const v1BetaModels = [ + // %V1BETA_MODELS% - updated 2025-04-07T20:29:49.276Z + "gemini-1.5-pro-latest", + "gemini-1.5-flash-latest", + "gemini-1.5-flash-8b-latest", "gemini-1.5-flash-8b-exp-0827", - "gemini-exp-1114", - "gemini-exp-1121", + "gemini-1.5-flash-8b-exp-0924", + "gemini-2.5-pro-exp-03-25", + "gemini-2.5-pro-preview-03-25", + "gemini-2.0-flash-exp", + "gemini-2.0-flash-exp-image-generation", + "gemini-2.0-flash-lite-preview-02-05", + "gemini-2.0-flash-lite-preview", + "gemini-2.0-pro-exp", + "gemini-2.0-pro-exp-02-05", "gemini-exp-1206", + "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash-thinking-exp", + "gemini-2.0-flash-thinking-exp-1219", "learnlm-1.5-pro-experimental", - "gemini-2.0-flash-exp", + "gemma-3-1b-it", + "gemma-3-4b-it", + "gemma-3-12b-it", + "gemma-3-27b-it", + // %EOC_V1BETA_MODELS% ]; -// There are some models that are only available in the v1beta API -// and some models that are only available in the v1 API -// generally, v1beta models have `exp` in the name, but not always -// so we check for both against a static list as well. -const v1BetaModels = ["gemini-1.5-pro-latest", "gemini-1.5-flash-latest"]; - const defaultGeminiModels = [ ...stableModels.map((model) => ({ id: model, @@ -32,7 +55,7 @@ const defaultGeminiModels = [ contextWindow: MODEL_MAP.gemini[model], experimental: false, })), - ...experimentalModels.map((model) => ({ + ...v1BetaModels.map((model) => ({ id: model, name: model, contextWindow: MODEL_MAP.gemini[model], diff --git a/server/utils/AiProviders/gemini/index.js b/server/utils/AiProviders/gemini/index.js index fd7929f4b32..311d39d3b07 100644 --- a/server/utils/AiProviders/gemini/index.js +++ b/server/utils/AiProviders/gemini/index.js @@ -28,21 +28,11 @@ class GeminiLLM { const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY); this.model = modelPreference || process.env.GEMINI_LLM_MODEL_PREF || "gemini-pro"; + + const isExperimental = this.isExperimentalModel(this.model); this.gemini = genAI.getGenerativeModel( { model: this.model }, - { - apiVersion: - /** - * There are some models that are only available in the v1beta API - * and some models that are only available in the v1 API - * generally, v1beta models have `exp` in the name, but not always - * so we check for both against a static list as well. - * @see {v1BetaModels} - */ - this.model.includes("exp") || v1BetaModels.includes(this.model) - ? "v1beta" - : "v1", - } + { apiVersion: isExperimental ? "v1beta" : "v1" } ); this.limits = { history: this.promptWindowLimit() * 0.15, @@ -59,7 +49,7 @@ class GeminiLLM { this.cacheModelPath = path.resolve(cacheFolder, "models.json"); this.cacheAtPath = path.resolve(cacheFolder, ".cached_at"); this.#log( - `Initialized with model: ${this.model} (${this.promptWindowLimit()})` + `Initialized with model: ${this.model} ${isExperimental ? "[Experimental v1beta]" : "[Stable v1]"} - ctx: ${this.promptWindowLimit()}` ); } @@ -71,7 +61,7 @@ class GeminiLLM { // from the current date. If it is, then we will refetch the API so that all the models are up // to date. static cacheIsStale() { - const MAX_STALE = 6.048e8; // 1 Week in MS + const MAX_STALE = 8.64e7; // 1 day in MS if (!fs.existsSync(path.resolve(cacheFolder, ".cached_at"))) return true; const now = Number(new Date()); const timestampMs = Number( @@ -168,6 +158,28 @@ class GeminiLLM { } } + /** + * Checks if a model is experimental by reading from the cache if available, otherwise it will perform + * a blind check against the v1BetaModels list - which is manually maintained and updated. + * @param {string} modelName - The name of the model to check + * @returns {boolean} A boolean indicating if the model is experimental + */ + isExperimentalModel(modelName) { + if ( + fs.existsSync(cacheFolder) && + fs.existsSync(path.resolve(cacheFolder, "models.json")) + ) { + const models = safeJsonParse( + fs.readFileSync(path.resolve(cacheFolder, "models.json")) + ); + const model = models.find((model) => model.id === modelName); + if (!model) return false; + return model.experimental; + } + + return modelName.includes("exp") || v1BetaModels.includes(modelName); + } + /** * Fetches Gemini models from the Google Generative AI API * @param {string} apiKey - The API key to use for the request @@ -186,63 +198,125 @@ class GeminiLLM { ); } - const url = new URL( - "https://generativelanguage.googleapis.com/v1beta/models" - ); - url.searchParams.set("pageSize", limit); - url.searchParams.set("key", apiKey); - if (pageToken) url.searchParams.set("pageToken", pageToken); - let success = false; - - const models = await fetch(url.toString(), { - method: "GET", - headers: { "Content-Type": "application/json" }, - }) - .then((res) => res.json()) - .then((data) => { - if (data.error) throw new Error(data.error.message); - return data.models ?? []; - }) - .then((models) => { - success = true; - return models - .filter( - (model) => !model.displayName.toLowerCase().includes("tuning") - ) - .filter((model) => - model.supportedGenerationMethods.includes("generateContent") - ) // Only generateContent is supported - .map((model) => { - return { - id: model.name.split("/").pop(), - name: model.displayName, - contextWindow: model.inputTokenLimit, - experimental: model.name.includes("exp"), - }; - }); - }) - .catch((e) => { - console.error(`Gemini:getGeminiModels`, e.message); - success = false; - return defaultGeminiModels; - }); + const stableModels = []; + const allModels = []; - if (success) { - console.log( - `\x1b[32m[GeminiLLM]\x1b[0m Writing cached models API response to disk.` - ); - if (!fs.existsSync(cacheFolder)) - fs.mkdirSync(cacheFolder, { recursive: true }); - fs.writeFileSync( - path.resolve(cacheFolder, "models.json"), - JSON.stringify(models) + // Fetch from v1 + try { + const url = new URL( + "https://generativelanguage.googleapis.com/v1/models" ); - fs.writeFileSync( - path.resolve(cacheFolder, ".cached_at"), - new Date().getTime().toString() + url.searchParams.set("pageSize", limit); + url.searchParams.set("key", apiKey); + if (pageToken) url.searchParams.set("pageToken", pageToken); + await fetch(url.toString(), { + method: "GET", + headers: { "Content-Type": "application/json" }, + }) + .then((res) => res.json()) + .then((data) => { + if (data.error) throw new Error(data.error.message); + return data.models ?? []; + }) + .then((models) => { + return models + .filter( + (model) => !model.displayName?.toLowerCase()?.includes("tuning") + ) // remove tuning models + .filter( + (model) => + !model.description?.toLowerCase()?.includes("deprecated") + ) // remove deprecated models (in comment) + .filter((model) => + // Only generateContent is supported + model.supportedGenerationMethods.includes("generateContent") + ) + .map((model) => { + stableModels.push(model.name); + allModels.push({ + id: model.name.split("/").pop(), + name: model.displayName, + contextWindow: model.inputTokenLimit, + experimental: false, + }); + }); + }) + .catch((e) => { + console.error(`Gemini:getGeminiModelsV1`, e.message); + return; + }); + } catch (e) { + console.error(`Gemini:getGeminiModelsV1`, e.message); + } + + // Fetch from v1beta + try { + const url = new URL( + "https://generativelanguage.googleapis.com/v1beta/models" ); + url.searchParams.set("pageSize", limit); + url.searchParams.set("key", apiKey); + if (pageToken) url.searchParams.set("pageToken", pageToken); + await fetch(url.toString(), { + method: "GET", + headers: { "Content-Type": "application/json" }, + }) + .then((res) => res.json()) + .then((data) => { + if (data.error) throw new Error(data.error.message); + return data.models ?? []; + }) + .then((models) => { + return models + .filter((model) => !stableModels.includes(model.name)) // remove stable models that are already in the v1 list + .filter( + (model) => !model.displayName?.toLowerCase()?.includes("tuning") + ) // remove tuning models + .filter( + (model) => + !model.description?.toLowerCase()?.includes("deprecated") + ) // remove deprecated models (in comment) + .filter((model) => + // Only generateContent is supported + model.supportedGenerationMethods.includes("generateContent") + ) + .map((model) => { + allModels.push({ + id: model.name.split("/").pop(), + name: model.displayName, + contextWindow: model.inputTokenLimit, + experimental: true, + }); + }); + }) + .catch((e) => { + console.error(`Gemini:getGeminiModelsV1beta`, e.message); + return; + }); + } catch (e) { + console.error(`Gemini:getGeminiModelsV1beta`, e.message); + } + + if (allModels.length === 0) { + console.error(`Gemini:getGeminiModels - No models found`); + return defaultGeminiModels; } - return models; + + console.log( + `\x1b[32m[GeminiLLM]\x1b[0m Writing cached models API response to disk.` + ); + if (!fs.existsSync(cacheFolder)) + fs.mkdirSync(cacheFolder, { recursive: true }); + fs.writeFileSync( + path.resolve(cacheFolder, "models.json"), + JSON.stringify(allModels) + ); + fs.writeFileSync( + path.resolve(cacheFolder, ".cached_at"), + new Date().getTime().toString() + ); + + return allModels; } /** diff --git a/server/utils/AiProviders/gemini/syncStaticLists.mjs b/server/utils/AiProviders/gemini/syncStaticLists.mjs new file mode 100644 index 00000000000..42382fae388 --- /dev/null +++ b/server/utils/AiProviders/gemini/syncStaticLists.mjs @@ -0,0 +1,79 @@ +/** + * This is a script that syncs the static lists of models from the Gemini API + * so that maintainers can keep the fallback lists up to date. + * + * To run, cd into this directory and run: + * node syncStaticLists.mjs + */ + +import fs from "fs"; +import path from "path"; +import dotenv from "dotenv"; +import { MODEL_MAP } from "../modelMap.js"; + +dotenv.config({ path: `../../../.env.development` }); +const existingCachePath = path.resolve('../../../storage/models/gemini') + +// This will fetch all of the models from the Gemini API as well as post-process them +// to remove any models that are deprecated or experimental. +import { GeminiLLM } from "./index.js"; + +if (fs.existsSync(existingCachePath)) { + console.log("Removing existing cache so we can fetch fresh models from Gemini endpoints..."); + fs.rmSync(existingCachePath, { recursive: true, force: true }); +} + +const models = await GeminiLLM.fetchModels(process.env.GEMINI_API_KEY); + +function updateDefaultModelsFile(models) { + const stableModelKeys = models.filter((model) => !model.experimental).map((model) => model.id); + const v1BetaModelKeys = models.filter((model) => model.experimental).map((model) => model.id); + + let defaultModelFileContents = fs.readFileSync(path.join("./defaultModels.js"), "utf8"); + + // Update the stable models between %STABLE_MODELS% and %EOC_STABLE_MODELS% comments + defaultModelFileContents = defaultModelFileContents.replace( + /%STABLE_MODELS%[\s\S]*?%EOC_STABLE_MODELS%/, + `%STABLE_MODELS% - updated ${new Date().toISOString()}\n"${stableModelKeys.join('",\n"')}",\n// %EOC_STABLE_MODELS%` + ); + + // Update the v1beta models between %V1BETA_MODELS% and %EOC_V1BETA_MODELS% comments + defaultModelFileContents = defaultModelFileContents.replace( + /%V1BETA_MODELS%[\s\S]*?%EOC_V1BETA_MODELS%/, + `%V1BETA_MODELS% - updated ${new Date().toISOString()}\n"${v1BetaModelKeys.join('",\n"')}",\n// %EOC_V1BETA_MODELS%` + ); + + fs.writeFileSync(path.join("./defaultModels.js"), defaultModelFileContents); + console.log("Updated defaultModels.js. Dont forget to `yarn lint` and commit!"); +} + +function updateModelMap(models) { + const existingModelMap = MODEL_MAP; + console.log('Updating modelMap.js `gemini` object...') + console.log(`Removed existing gemini object (${Object.keys(existingModelMap.gemini).length} models) from modelMap.js`); + existingModelMap.gemini = {}; + + for (const model of models) existingModelMap.gemini[model.id] = model.contextWindow; + console.log(`Updated modelMap.js 'gemini' object with ${Object.keys(existingModelMap.gemini).length} models from API`); + + // Update the modelMap.js file + const contents = `/** + * The model name and context window for all know model windows + * that are available through providers which has discrete model options. + * This file is automatically generated by syncStaticLists.mjs + * and should not be edited manually. + * + * Last updated: ${new Date().toISOString()} + */ +const MODEL_MAP = { + ${Object.entries(existingModelMap).map(([key, value]) => `${key}: ${JSON.stringify(value, null, 2)}`).join(',\n')} +}; + +module.exports = { MODEL_MAP }; +`; + fs.writeFileSync(path.resolve("../modelMap.js"), contents); + console.log('Updated modelMap.js `gemini` object. Dont forget to `yarn lint` and commit!'); +} + +updateDefaultModelsFile(models); +updateModelMap(models); diff --git a/server/utils/AiProviders/modelMap.js b/server/utils/AiProviders/modelMap.js index 9c59c6beb7a..5482fcbd3dd 100644 --- a/server/utils/AiProviders/modelMap.js +++ b/server/utils/AiProviders/modelMap.js @@ -1,46 +1,71 @@ /** * The model name and context window for all know model windows * that are available through providers which has discrete model options. + * This file is automatically generated by syncStaticLists.mjs + * and should not be edited manually. + * + * Last updated: 2025-04-07T20:29:49.277Z */ const MODEL_MAP = { anthropic: { - "claude-instant-1.2": 100_000, - "claude-2.0": 100_000, - "claude-2.1": 200_000, - "claude-3-haiku-20240307": 200_000, - "claude-3-sonnet-20240229": 200_000, - "claude-3-opus-20240229": 200_000, - "claude-3-opus-latest": 200_000, - "claude-3-5-haiku-latest": 200_000, - "claude-3-5-haiku-20241022": 200_000, - "claude-3-5-sonnet-latest": 200_000, - "claude-3-5-sonnet-20241022": 200_000, - "claude-3-5-sonnet-20240620": 200_000, - "claude-3-7-sonnet-20250219": 200_000, - "claude-3-7-sonnet-latest": 200_000, + "claude-instant-1.2": 100000, + "claude-2.0": 100000, + "claude-2.1": 200000, + "claude-3-haiku-20240307": 200000, + "claude-3-sonnet-20240229": 200000, + "claude-3-opus-20240229": 200000, + "claude-3-opus-latest": 200000, + "claude-3-5-haiku-latest": 200000, + "claude-3-5-haiku-20241022": 200000, + "claude-3-5-sonnet-latest": 200000, + "claude-3-5-sonnet-20241022": 200000, + "claude-3-5-sonnet-20240620": 200000, + "claude-3-7-sonnet-20250219": 200000, + "claude-3-7-sonnet-latest": 200000, }, cohere: { - "command-r": 128_000, - "command-r-plus": 128_000, - command: 4_096, - "command-light": 4_096, - "command-nightly": 8_192, - "command-light-nightly": 8_192, + "command-r": 128000, + "command-r-plus": 128000, + command: 4096, + "command-light": 4096, + "command-nightly": 8192, + "command-light-nightly": 8192, }, gemini: { - "gemini-pro": 30_720, - "gemini-1.0-pro": 30_720, - "gemini-1.5-flash-latest": 1_048_576, - "gemini-1.5-pro-latest": 2_097_152, - "gemini-1.5-pro-exp-0801": 2_097_152, - "gemini-1.5-pro-exp-0827": 2_097_152, - "gemini-1.5-flash-exp-0827": 1_048_576, - "gemini-1.5-flash-8b-exp-0827": 1_048_576, - "gemini-exp-1114": 32_767, - "gemini-exp-1121": 32_767, - "gemini-exp-1206": 32_767, - "learnlm-1.5-pro-experimental": 32_767, - "gemini-2.0-flash-exp": 1_048_576, + "gemini-1.5-pro-001": 2000000, + "gemini-1.5-pro-002": 2000000, + "gemini-1.5-pro": 2000000, + "gemini-1.5-flash-001": 1000000, + "gemini-1.5-flash": 1000000, + "gemini-1.5-flash-002": 1000000, + "gemini-1.5-flash-8b": 1000000, + "gemini-1.5-flash-8b-001": 1000000, + "gemini-2.0-flash": 1048576, + "gemini-2.0-flash-001": 1048576, + "gemini-2.0-flash-lite-001": 1048576, + "gemini-2.0-flash-lite": 1048576, + "gemini-1.5-pro-latest": 2000000, + "gemini-1.5-flash-latest": 1000000, + "gemini-1.5-flash-8b-latest": 1000000, + "gemini-1.5-flash-8b-exp-0827": 1000000, + "gemini-1.5-flash-8b-exp-0924": 1000000, + "gemini-2.5-pro-exp-03-25": 1048576, + "gemini-2.5-pro-preview-03-25": 1048576, + "gemini-2.0-flash-exp": 1048576, + "gemini-2.0-flash-exp-image-generation": 1048576, + "gemini-2.0-flash-lite-preview-02-05": 1048576, + "gemini-2.0-flash-lite-preview": 1048576, + "gemini-2.0-pro-exp": 1048576, + "gemini-2.0-pro-exp-02-05": 1048576, + "gemini-exp-1206": 1048576, + "gemini-2.0-flash-thinking-exp-01-21": 1048576, + "gemini-2.0-flash-thinking-exp": 1048576, + "gemini-2.0-flash-thinking-exp-1219": 1048576, + "learnlm-1.5-pro-experimental": 32767, + "gemma-3-1b-it": 32768, + "gemma-3-4b-it": 32768, + "gemma-3-12b-it": 32768, + "gemma-3-27b-it": 131072, }, groq: { "gemma2-9b-it": 8192, @@ -52,32 +77,32 @@ const MODEL_MAP = { "mixtral-8x7b-32768": 32768, }, openai: { - "gpt-3.5-turbo": 16_385, - "gpt-3.5-turbo-1106": 16_385, - "gpt-4o": 128_000, - "gpt-4o-2024-08-06": 128_000, - "gpt-4o-2024-05-13": 128_000, - "gpt-4o-mini": 128_000, - "gpt-4o-mini-2024-07-18": 128_000, - "gpt-4-turbo": 128_000, - "gpt-4-1106-preview": 128_000, - "gpt-4-turbo-preview": 128_000, - "gpt-4": 8_192, - "gpt-4-32k": 32_000, - "o1-preview": 128_000, - "o1-preview-2024-09-12": 128_000, - "o1-mini": 128_000, - "o1-mini-2024-09-12": 128_000, - "o3-mini": 200_000, - "o3-mini-2025-01-31": 200_000, + "gpt-3.5-turbo": 16385, + "gpt-3.5-turbo-1106": 16385, + "gpt-4o": 128000, + "gpt-4o-2024-08-06": 128000, + "gpt-4o-2024-05-13": 128000, + "gpt-4o-mini": 128000, + "gpt-4o-mini-2024-07-18": 128000, + "gpt-4-turbo": 128000, + "gpt-4-1106-preview": 128000, + "gpt-4-turbo-preview": 128000, + "gpt-4": 8192, + "gpt-4-32k": 32000, + "o1-preview": 128000, + "o1-preview-2024-09-12": 128000, + "o1-mini": 128000, + "o1-mini-2024-09-12": 128000, + "o3-mini": 200000, + "o3-mini-2025-01-31": 200000, }, deepseek: { - "deepseek-chat": 128_000, - "deepseek-coder": 128_000, - "deepseek-reasoner": 128_000, + "deepseek-chat": 128000, + "deepseek-coder": 128000, + "deepseek-reasoner": 128000, }, xai: { - "grok-beta": 131_072, + "grok-beta": 131072, }, };