diff --git a/docker/.env.example b/docker/.env.example index dca22fa0493..c0a1b32ab10 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -211,6 +211,8 @@ GID='1000' # CHROMA_ENDPOINT='http://host.docker.internal:8000' # CHROMA_API_HEADER="X-Api-Key" # CHROMA_API_KEY="sk-123abc" +# CHROMA_TENANT="default_tenant" # Optional: for Chroma Cloud multi-tenancy +# CHROMA_DATABASE_NAME="default_database" # Optional: for Chroma Cloud database selection # Enable all below if you are using vector database: Chroma Cloud. # VECTOR_DB="chromacloud" diff --git a/frontend/src/components/VectorDBSelection/ChromaDBOptions/index.jsx b/frontend/src/components/VectorDBSelection/ChromaDBOptions/index.jsx index cdcda5cca72..98ecebe1531 100644 --- a/frontend/src/components/VectorDBSelection/ChromaDBOptions/index.jsx +++ b/frontend/src/components/VectorDBSelection/ChromaDBOptions/index.jsx @@ -46,6 +46,42 @@ export default function ChromaDBOptions({ settings }) { /> + +
+
+ + +
+ +
+ + +
+
); } diff --git a/server/.env.example b/server/.env.example index 0d3d1ecd0e0..ebfa4179529 100644 --- a/server/.env.example +++ b/server/.env.example @@ -201,6 +201,8 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long. # CHROMA_ENDPOINT='http://localhost:8000' # CHROMA_API_HEADER="X-Api-Key" # CHROMA_API_KEY="sk-123abc" +# CHROMA_TENANT="default_tenant" # Optional: for Chroma Cloud multi-tenancy +# CHROMA_DATABASE_NAME="default_database" # Optional: for Chroma Cloud database selection # Enable all below if you are using vector database: Chroma Cloud. # VECTOR_DB="chromacloud" diff --git a/server/__tests__/utils/AiProviders/openAi/gpt5-fix.test.js b/server/__tests__/utils/AiProviders/openAi/gpt5-fix.test.js new file mode 100644 index 00000000000..9dd8b586b7b --- /dev/null +++ b/server/__tests__/utils/AiProviders/openAi/gpt5-fix.test.js @@ -0,0 +1,107 @@ +/** + * Test for GPT-5 max_completion_tokens parameter fix + * GitHub Issue: https://github.com/Mintplex-Labs/anything-llm/issues/4304 + */ + +describe("OpenAI GPT-5 Parameter Fix", () => { + describe("Parameter Construction Logic", () => { + // Mock the logic from the OpenAiLLM class + function constructRequestParams(baseParams, maxTokens, model) { + const isGpt5Model = model.startsWith("gpt-5"); + const params = { ...baseParams }; + + if (maxTokens && isGpt5Model) { + params.max_completion_tokens = maxTokens; + } else if (maxTokens) { + params.max_tokens = maxTokens; + } + + return params; + } + + test("should use max_completion_tokens for gpt-5 model", () => { + const baseParams = { model: "gpt-5", messages: [] }; + const result = constructRequestParams(baseParams, 1024, "gpt-5"); + + expect(result).toHaveProperty("max_completion_tokens", 1024); + expect(result).not.toHaveProperty("max_tokens"); + }); + + test("should use max_completion_tokens for gpt-5-turbo model", () => { + const baseParams = { model: "gpt-5-turbo", messages: [] }; + const result = constructRequestParams(baseParams, 2048, "gpt-5-turbo"); + + expect(result).toHaveProperty("max_completion_tokens", 2048); + expect(result).not.toHaveProperty("max_tokens"); + }); + + test("should use max_tokens for gpt-4o model", () => { + const baseParams = { model: "gpt-4o", messages: [] }; + const result = constructRequestParams(baseParams, 4096, "gpt-4o"); + + expect(result).toHaveProperty("max_tokens", 4096); + expect(result).not.toHaveProperty("max_completion_tokens"); + }); + + test("should use max_tokens for gpt-3.5-turbo model", () => { + const baseParams = { model: "gpt-3.5-turbo", messages: [] }; + const result = constructRequestParams(baseParams, 1024, "gpt-3.5-turbo"); + + expect(result).toHaveProperty("max_tokens", 1024); + expect(result).not.toHaveProperty("max_completion_tokens"); + }); + + test("should not add any token parameter when maxTokens is null", () => { + const baseParams = { model: "gpt-5", messages: [] }; + const result = constructRequestParams(baseParams, null, "gpt-5"); + + expect(result).not.toHaveProperty("max_tokens"); + expect(result).not.toHaveProperty("max_completion_tokens"); + }); + + test("should not add any token parameter when maxTokens is undefined", () => { + const baseParams = { model: "gpt-5", messages: [] }; + const result = constructRequestParams(baseParams, undefined, "gpt-5"); + + expect(result).not.toHaveProperty("max_tokens"); + expect(result).not.toHaveProperty("max_completion_tokens"); + }); + + test("should preserve other parameters in baseParams", () => { + const baseParams = { + model: "gpt-5", + messages: [{ role: "user", content: "test" }], + temperature: 0.7, + stream: true, + }; + const result = constructRequestParams(baseParams, 1024, "gpt-5"); + + expect(result).toHaveProperty("model", "gpt-5"); + expect(result).toHaveProperty("messages"); + expect(result).toHaveProperty("temperature", 0.7); + expect(result).toHaveProperty("stream", true); + expect(result).toHaveProperty("max_completion_tokens", 1024); + }); + }); + + describe("Model Detection", () => { + function isGpt5Model(model) { + return model.startsWith("gpt-5"); + } + + test("should correctly identify gpt-5 models", () => { + expect(isGpt5Model("gpt-5")).toBe(true); + expect(isGpt5Model("gpt-5-turbo")).toBe(true); + expect(isGpt5Model("gpt-5-32k")).toBe(true); + expect(isGpt5Model("gpt-5-preview")).toBe(true); + }); + + test("should correctly identify non-gpt-5 models", () => { + expect(isGpt5Model("gpt-4o")).toBe(false); + expect(isGpt5Model("gpt-4")).toBe(false); + expect(isGpt5Model("gpt-3.5-turbo")).toBe(false); + expect(isGpt5Model("o1-preview")).toBe(false); + expect(isGpt5Model("claude-3")).toBe(false); + }); + }); +}); \ No newline at end of file diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index f0796be0431..db660d19e3e 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -427,6 +427,8 @@ const SystemSettings = { ChromaEndpoint: process.env.CHROMA_ENDPOINT, ChromaApiHeader: process.env.CHROMA_API_HEADER, ChromaApiKey: !!process.env.CHROMA_API_KEY, + ChromaTenant: process.env.CHROMA_TENANT, + ChromaDatabaseName: process.env.CHROMA_DATABASE_NAME, // ChromaCloud DB Keys ChromaCloudApiKey: !!process.env.CHROMACLOUD_API_KEY, @@ -465,6 +467,7 @@ const SystemSettings = { // OpenAI Keys OpenAiKey: !!process.env.OPEN_AI_KEY, OpenAiModelPref: process.env.OPEN_MODEL_PREF || "gpt-4o", + OpenAiMaxTokens: process.env.OPEN_AI_MAX_TOKENS, // Azure + OpenAI Keys AzureOpenAiEndpoint: process.env.AZURE_OPENAI_ENDPOINT, diff --git a/server/utils/AiProviders/openAi/index.js b/server/utils/AiProviders/openAi/index.js index c371a1d47d1..4ebfeaf92f5 100644 --- a/server/utils/AiProviders/openAi/index.js +++ b/server/utils/AiProviders/openAi/index.js @@ -23,6 +23,11 @@ class OpenAiLLM { user: this.promptWindowLimit() * 0.7, }; + // Set max tokens if specified in environment + this.maxTokens = process.env.OPEN_AI_MAX_TOKENS + ? parseInt(process.env.OPEN_AI_MAX_TOKENS) + : null; + this.embedder = embedder ?? new NativeEmbedder(); this.defaultTemp = 0.7; this.log( @@ -42,6 +47,33 @@ class OpenAiLLM { return this.model.startsWith("o"); } + /** + * Check if the model is a gpt-5 model that requires max_completion_tokens. + * @returns {boolean} + */ + get isGpt5Model() { + return this.model.startsWith("gpt-5"); + } + + /** + * Construct the appropriate parameters for the API request based on model type. + * @param {Object} baseParams - Base parameters for the request + * @param {number} maxTokens - Maximum tokens for response + * @returns {Object} Parameters with correct token limit key + */ + #constructRequestParams(baseParams, maxTokens = null) { + const params = { ...baseParams }; + + // gpt-5 models use max_completion_tokens instead of max_tokens + if (maxTokens && this.isGpt5Model) { + params.max_completion_tokens = maxTokens; + } else if (maxTokens) { + params.max_tokens = maxTokens; + } + + return params; + } + #appendContext(contextTexts = []) { if (!contextTexts || !contextTexts.length) return ""; return ( @@ -144,16 +176,21 @@ class OpenAiLLM { `OpenAI chat: ${this.model} is not valid for chat completion!` ); + const baseParams = { + model: this.model, + messages, + temperature: this.isOTypeModel ? 1 : temperature, // o1 models only accept temperature 1 + }; + + const requestParams = this.#constructRequestParams( + baseParams, + this.maxTokens + ); + const result = await LLMPerformanceMonitor.measureAsyncFunction( - this.openai.chat.completions - .create({ - model: this.model, - messages, - temperature: this.isOTypeModel ? 1 : temperature, // o1 models only accept temperature 1 - }) - .catch((e) => { - throw new Error(e.message); - }) + this.openai.chat.completions.create(requestParams).catch((e) => { + throw new Error(e.message); + }) ); if ( @@ -180,13 +217,20 @@ class OpenAiLLM { `OpenAI chat: ${this.model} is not valid for chat completion!` ); + const baseParams = { + model: this.model, + stream: true, + messages, + temperature: this.isOTypeModel ? 1 : temperature, // o1 models only accept temperature 1 + }; + + const requestParams = this.#constructRequestParams( + baseParams, + this.maxTokens + ); + const measuredStreamRequest = await LLMPerformanceMonitor.measureStream( - this.openai.chat.completions.create({ - model: this.model, - stream: true, - messages, - temperature: this.isOTypeModel ? 1 : temperature, // o1 models only accept temperature 1 - }), + this.openai.chat.completions.create(requestParams), messages // runPromptTokenCalculation: true - We manually count the tokens because OpenAI does not provide them in the stream // since we are not using the OpenAI API version that supports this `stream_options` param. diff --git a/server/utils/agents/aibitat/providers/openai.js b/server/utils/agents/aibitat/providers/openai.js index 73976bcb48e..48fec463bf9 100644 --- a/server/utils/agents/aibitat/providers/openai.js +++ b/server/utils/agents/aibitat/providers/openai.js @@ -53,6 +53,36 @@ class OpenAIProvider extends Provider { super(client); this.model = model; + this.maxTokens = process.env.OPEN_AI_MAX_TOKENS + ? parseInt(process.env.OPEN_AI_MAX_TOKENS) + : null; + } + + /** + * Check if the model is a gpt-5 model that requires max_completion_tokens. + * @returns {boolean} + */ + get isGpt5Model() { + return this.model.startsWith("gpt-5"); + } + + /** + * Construct the appropriate parameters for the API request based on model type. + * @param {Object} baseParams - Base parameters for the request + * @param {number} maxTokens - Maximum tokens for response + * @returns {Object} Parameters with correct token limit key + */ + #constructRequestParams(baseParams, maxTokens = null) { + const params = { ...baseParams }; + + // gpt-5 models use max_completion_tokens instead of max_tokens + if (maxTokens && this.isGpt5Model) { + params.max_completion_tokens = maxTokens; + } else if (maxTokens) { + params.max_tokens = maxTokens; + } + + return params; } /** @@ -64,14 +94,21 @@ class OpenAIProvider extends Provider { */ async complete(messages, functions = []) { try { - const response = await this.client.chat.completions.create({ + const baseParams = { model: this.model, // stream: true, messages, ...(Array.isArray(functions) && functions?.length > 0 ? { functions } : {}), - }); + }; + + const requestParams = this.#constructRequestParams( + baseParams, + this.maxTokens + ); + + const response = await this.client.chat.completions.create(requestParams); // Right now, we only support one completion, // so we just take the first one in the list diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index 124b4b4e77f..d219ce8ae12 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -18,6 +18,10 @@ const KEY_MAPPING = { envKey: "OPEN_MODEL_PREF", checks: [isNotEmpty], }, + OpenAiMaxTokens: { + envKey: "OPEN_AI_MAX_TOKENS", + checks: [], + }, // Azure OpenAI Settings AzureOpenAiEndpoint: { envKey: "AZURE_OPENAI_ENDPOINT", @@ -331,6 +335,14 @@ const KEY_MAPPING = { envKey: "CHROMA_API_KEY", checks: [], }, + ChromaTenant: { + envKey: "CHROMA_TENANT", + checks: [], + }, + ChromaDatabaseName: { + envKey: "CHROMA_DATABASE_NAME", + checks: [], + }, // ChromaCloud Options ChromaCloudApiKey: { diff --git a/server/utils/vectorDbProviders/chroma/CHROMA_SETUP.md b/server/utils/vectorDbProviders/chroma/CHROMA_SETUP.md index e6d6fee9e10..293b8b953c2 100644 --- a/server/utils/vectorDbProviders/chroma/CHROMA_SETUP.md +++ b/server/utils/vectorDbProviders/chroma/CHROMA_SETUP.md @@ -25,4 +25,15 @@ VECTOR_DB="chroma" CHROMA_ENDPOINT='http://localhost:8000' # CHROMA_API_HEADER="X-Api-Key" // If you have an Auth middleware on your instance. # CHROMA_API_KEY="sk-123abc" // If you have an Auth middleware on your instance. +# CHROMA_TENANT="default_tenant" // Optional: for Chroma Cloud multi-tenancy +# CHROMA_DATABASE_NAME="default_database" // Optional: for Chroma Cloud database selection ``` + +### Chroma Cloud Configuration + +When using Chroma Cloud, you can configure additional parameters for multi-tenancy: + +- **CHROMA_TENANT**: Specify the tenant to use (defaults to "default_tenant" if not set) +- **CHROMA_DATABASE_NAME**: Specify the database within the tenant (defaults to "default_database" if not set) + +These parameters are useful for organizing data in multi-tenant Chroma Cloud deployments where you need to isolate data by tenant and database. diff --git a/server/utils/vectorDbProviders/chroma/index.js b/server/utils/vectorDbProviders/chroma/index.js index bc12818fd18..c6112d02607 100644 --- a/server/utils/vectorDbProviders/chroma/index.js +++ b/server/utils/vectorDbProviders/chroma/index.js @@ -71,6 +71,12 @@ const Chroma = { }, } : {}), + ...(process.env.CHROMA_TENANT + ? { tenant: process.env.CHROMA_TENANT } + : {}), + ...(process.env.CHROMA_DATABASE_NAME + ? { database: process.env.CHROMA_DATABASE_NAME } + : {}), }); const isAlive = await client.heartbeat();