diff --git a/docker/.env.example b/docker/.env.example
index dca22fa0493..c0a1b32ab10 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -211,6 +211,8 @@ GID='1000'
# CHROMA_ENDPOINT='http://host.docker.internal:8000'
# CHROMA_API_HEADER="X-Api-Key"
# CHROMA_API_KEY="sk-123abc"
+# CHROMA_TENANT="default_tenant" # Optional: for Chroma Cloud multi-tenancy
+# CHROMA_DATABASE_NAME="default_database" # Optional: for Chroma Cloud database selection
# Enable all below if you are using vector database: Chroma Cloud.
# VECTOR_DB="chromacloud"
diff --git a/frontend/src/components/VectorDBSelection/ChromaDBOptions/index.jsx b/frontend/src/components/VectorDBSelection/ChromaDBOptions/index.jsx
index cdcda5cca72..98ecebe1531 100644
--- a/frontend/src/components/VectorDBSelection/ChromaDBOptions/index.jsx
+++ b/frontend/src/components/VectorDBSelection/ChromaDBOptions/index.jsx
@@ -46,6 +46,42 @@ export default function ChromaDBOptions({ settings }) {
/>
+
+
);
}
diff --git a/server/.env.example b/server/.env.example
index 0d3d1ecd0e0..ebfa4179529 100644
--- a/server/.env.example
+++ b/server/.env.example
@@ -201,6 +201,8 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
# CHROMA_ENDPOINT='http://localhost:8000'
# CHROMA_API_HEADER="X-Api-Key"
# CHROMA_API_KEY="sk-123abc"
+# CHROMA_TENANT="default_tenant" # Optional: for Chroma Cloud multi-tenancy
+# CHROMA_DATABASE_NAME="default_database" # Optional: for Chroma Cloud database selection
# Enable all below if you are using vector database: Chroma Cloud.
# VECTOR_DB="chromacloud"
diff --git a/server/__tests__/utils/AiProviders/openAi/gpt5-fix.test.js b/server/__tests__/utils/AiProviders/openAi/gpt5-fix.test.js
new file mode 100644
index 00000000000..9dd8b586b7b
--- /dev/null
+++ b/server/__tests__/utils/AiProviders/openAi/gpt5-fix.test.js
@@ -0,0 +1,107 @@
+/**
+ * Test for GPT-5 max_completion_tokens parameter fix
+ * GitHub Issue: https://github.com/Mintplex-Labs/anything-llm/issues/4304
+ */
+
+describe("OpenAI GPT-5 Parameter Fix", () => {
+ describe("Parameter Construction Logic", () => {
+ // Mock the logic from the OpenAiLLM class
+ function constructRequestParams(baseParams, maxTokens, model) {
+ const isGpt5Model = model.startsWith("gpt-5");
+ const params = { ...baseParams };
+
+ if (maxTokens && isGpt5Model) {
+ params.max_completion_tokens = maxTokens;
+ } else if (maxTokens) {
+ params.max_tokens = maxTokens;
+ }
+
+ return params;
+ }
+
+ test("should use max_completion_tokens for gpt-5 model", () => {
+ const baseParams = { model: "gpt-5", messages: [] };
+ const result = constructRequestParams(baseParams, 1024, "gpt-5");
+
+ expect(result).toHaveProperty("max_completion_tokens", 1024);
+ expect(result).not.toHaveProperty("max_tokens");
+ });
+
+ test("should use max_completion_tokens for gpt-5-turbo model", () => {
+ const baseParams = { model: "gpt-5-turbo", messages: [] };
+ const result = constructRequestParams(baseParams, 2048, "gpt-5-turbo");
+
+ expect(result).toHaveProperty("max_completion_tokens", 2048);
+ expect(result).not.toHaveProperty("max_tokens");
+ });
+
+ test("should use max_tokens for gpt-4o model", () => {
+ const baseParams = { model: "gpt-4o", messages: [] };
+ const result = constructRequestParams(baseParams, 4096, "gpt-4o");
+
+ expect(result).toHaveProperty("max_tokens", 4096);
+ expect(result).not.toHaveProperty("max_completion_tokens");
+ });
+
+ test("should use max_tokens for gpt-3.5-turbo model", () => {
+ const baseParams = { model: "gpt-3.5-turbo", messages: [] };
+ const result = constructRequestParams(baseParams, 1024, "gpt-3.5-turbo");
+
+ expect(result).toHaveProperty("max_tokens", 1024);
+ expect(result).not.toHaveProperty("max_completion_tokens");
+ });
+
+ test("should not add any token parameter when maxTokens is null", () => {
+ const baseParams = { model: "gpt-5", messages: [] };
+ const result = constructRequestParams(baseParams, null, "gpt-5");
+
+ expect(result).not.toHaveProperty("max_tokens");
+ expect(result).not.toHaveProperty("max_completion_tokens");
+ });
+
+ test("should not add any token parameter when maxTokens is undefined", () => {
+ const baseParams = { model: "gpt-5", messages: [] };
+ const result = constructRequestParams(baseParams, undefined, "gpt-5");
+
+ expect(result).not.toHaveProperty("max_tokens");
+ expect(result).not.toHaveProperty("max_completion_tokens");
+ });
+
+ test("should preserve other parameters in baseParams", () => {
+ const baseParams = {
+ model: "gpt-5",
+ messages: [{ role: "user", content: "test" }],
+ temperature: 0.7,
+ stream: true,
+ };
+ const result = constructRequestParams(baseParams, 1024, "gpt-5");
+
+ expect(result).toHaveProperty("model", "gpt-5");
+ expect(result).toHaveProperty("messages");
+ expect(result).toHaveProperty("temperature", 0.7);
+ expect(result).toHaveProperty("stream", true);
+ expect(result).toHaveProperty("max_completion_tokens", 1024);
+ });
+ });
+
+ describe("Model Detection", () => {
+ function isGpt5Model(model) {
+ return model.startsWith("gpt-5");
+ }
+
+ test("should correctly identify gpt-5 models", () => {
+ expect(isGpt5Model("gpt-5")).toBe(true);
+ expect(isGpt5Model("gpt-5-turbo")).toBe(true);
+ expect(isGpt5Model("gpt-5-32k")).toBe(true);
+ expect(isGpt5Model("gpt-5-preview")).toBe(true);
+ });
+
+ test("should correctly identify non-gpt-5 models", () => {
+ expect(isGpt5Model("gpt-4o")).toBe(false);
+ expect(isGpt5Model("gpt-4")).toBe(false);
+ expect(isGpt5Model("gpt-3.5-turbo")).toBe(false);
+ expect(isGpt5Model("o1-preview")).toBe(false);
+ expect(isGpt5Model("claude-3")).toBe(false);
+ });
+ });
+});
\ No newline at end of file
diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js
index f0796be0431..db660d19e3e 100644
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@@ -427,6 +427,8 @@ const SystemSettings = {
ChromaEndpoint: process.env.CHROMA_ENDPOINT,
ChromaApiHeader: process.env.CHROMA_API_HEADER,
ChromaApiKey: !!process.env.CHROMA_API_KEY,
+ ChromaTenant: process.env.CHROMA_TENANT,
+ ChromaDatabaseName: process.env.CHROMA_DATABASE_NAME,
// ChromaCloud DB Keys
ChromaCloudApiKey: !!process.env.CHROMACLOUD_API_KEY,
@@ -465,6 +467,7 @@ const SystemSettings = {
// OpenAI Keys
OpenAiKey: !!process.env.OPEN_AI_KEY,
OpenAiModelPref: process.env.OPEN_MODEL_PREF || "gpt-4o",
+ OpenAiMaxTokens: process.env.OPEN_AI_MAX_TOKENS,
// Azure + OpenAI Keys
AzureOpenAiEndpoint: process.env.AZURE_OPENAI_ENDPOINT,
diff --git a/server/utils/AiProviders/openAi/index.js b/server/utils/AiProviders/openAi/index.js
index c371a1d47d1..4ebfeaf92f5 100644
--- a/server/utils/AiProviders/openAi/index.js
+++ b/server/utils/AiProviders/openAi/index.js
@@ -23,6 +23,11 @@ class OpenAiLLM {
user: this.promptWindowLimit() * 0.7,
};
+ // Set max tokens if specified in environment
+ this.maxTokens = process.env.OPEN_AI_MAX_TOKENS
+ ? parseInt(process.env.OPEN_AI_MAX_TOKENS)
+ : null;
+
this.embedder = embedder ?? new NativeEmbedder();
this.defaultTemp = 0.7;
this.log(
@@ -42,6 +47,33 @@ class OpenAiLLM {
return this.model.startsWith("o");
}
+ /**
+ * Check if the model is a gpt-5 model that requires max_completion_tokens.
+ * @returns {boolean}
+ */
+ get isGpt5Model() {
+ return this.model.startsWith("gpt-5");
+ }
+
+ /**
+ * Construct the appropriate parameters for the API request based on model type.
+ * @param {Object} baseParams - Base parameters for the request
+ * @param {number} maxTokens - Maximum tokens for response
+ * @returns {Object} Parameters with correct token limit key
+ */
+ #constructRequestParams(baseParams, maxTokens = null) {
+ const params = { ...baseParams };
+
+ // gpt-5 models use max_completion_tokens instead of max_tokens
+ if (maxTokens && this.isGpt5Model) {
+ params.max_completion_tokens = maxTokens;
+ } else if (maxTokens) {
+ params.max_tokens = maxTokens;
+ }
+
+ return params;
+ }
+
#appendContext(contextTexts = []) {
if (!contextTexts || !contextTexts.length) return "";
return (
@@ -144,16 +176,21 @@ class OpenAiLLM {
`OpenAI chat: ${this.model} is not valid for chat completion!`
);
+ const baseParams = {
+ model: this.model,
+ messages,
+ temperature: this.isOTypeModel ? 1 : temperature, // o1 models only accept temperature 1
+ };
+
+ const requestParams = this.#constructRequestParams(
+ baseParams,
+ this.maxTokens
+ );
+
const result = await LLMPerformanceMonitor.measureAsyncFunction(
- this.openai.chat.completions
- .create({
- model: this.model,
- messages,
- temperature: this.isOTypeModel ? 1 : temperature, // o1 models only accept temperature 1
- })
- .catch((e) => {
- throw new Error(e.message);
- })
+ this.openai.chat.completions.create(requestParams).catch((e) => {
+ throw new Error(e.message);
+ })
);
if (
@@ -180,13 +217,20 @@ class OpenAiLLM {
`OpenAI chat: ${this.model} is not valid for chat completion!`
);
+ const baseParams = {
+ model: this.model,
+ stream: true,
+ messages,
+ temperature: this.isOTypeModel ? 1 : temperature, // o1 models only accept temperature 1
+ };
+
+ const requestParams = this.#constructRequestParams(
+ baseParams,
+ this.maxTokens
+ );
+
const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
- this.openai.chat.completions.create({
- model: this.model,
- stream: true,
- messages,
- temperature: this.isOTypeModel ? 1 : temperature, // o1 models only accept temperature 1
- }),
+ this.openai.chat.completions.create(requestParams),
messages
// runPromptTokenCalculation: true - We manually count the tokens because OpenAI does not provide them in the stream
// since we are not using the OpenAI API version that supports this `stream_options` param.
diff --git a/server/utils/agents/aibitat/providers/openai.js b/server/utils/agents/aibitat/providers/openai.js
index 73976bcb48e..48fec463bf9 100644
--- a/server/utils/agents/aibitat/providers/openai.js
+++ b/server/utils/agents/aibitat/providers/openai.js
@@ -53,6 +53,36 @@ class OpenAIProvider extends Provider {
super(client);
this.model = model;
+ this.maxTokens = process.env.OPEN_AI_MAX_TOKENS
+ ? parseInt(process.env.OPEN_AI_MAX_TOKENS)
+ : null;
+ }
+
+ /**
+ * Check if the model is a gpt-5 model that requires max_completion_tokens.
+ * @returns {boolean}
+ */
+ get isGpt5Model() {
+ return this.model.startsWith("gpt-5");
+ }
+
+ /**
+ * Construct the appropriate parameters for the API request based on model type.
+ * @param {Object} baseParams - Base parameters for the request
+ * @param {number} maxTokens - Maximum tokens for response
+ * @returns {Object} Parameters with correct token limit key
+ */
+ #constructRequestParams(baseParams, maxTokens = null) {
+ const params = { ...baseParams };
+
+ // gpt-5 models use max_completion_tokens instead of max_tokens
+ if (maxTokens && this.isGpt5Model) {
+ params.max_completion_tokens = maxTokens;
+ } else if (maxTokens) {
+ params.max_tokens = maxTokens;
+ }
+
+ return params;
}
/**
@@ -64,14 +94,21 @@ class OpenAIProvider extends Provider {
*/
async complete(messages, functions = []) {
try {
- const response = await this.client.chat.completions.create({
+ const baseParams = {
model: this.model,
// stream: true,
messages,
...(Array.isArray(functions) && functions?.length > 0
? { functions }
: {}),
- });
+ };
+
+ const requestParams = this.#constructRequestParams(
+ baseParams,
+ this.maxTokens
+ );
+
+ const response = await this.client.chat.completions.create(requestParams);
// Right now, we only support one completion,
// so we just take the first one in the list
diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js
index 124b4b4e77f..d219ce8ae12 100644
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@@ -18,6 +18,10 @@ const KEY_MAPPING = {
envKey: "OPEN_MODEL_PREF",
checks: [isNotEmpty],
},
+ OpenAiMaxTokens: {
+ envKey: "OPEN_AI_MAX_TOKENS",
+ checks: [],
+ },
// Azure OpenAI Settings
AzureOpenAiEndpoint: {
envKey: "AZURE_OPENAI_ENDPOINT",
@@ -331,6 +335,14 @@ const KEY_MAPPING = {
envKey: "CHROMA_API_KEY",
checks: [],
},
+ ChromaTenant: {
+ envKey: "CHROMA_TENANT",
+ checks: [],
+ },
+ ChromaDatabaseName: {
+ envKey: "CHROMA_DATABASE_NAME",
+ checks: [],
+ },
// ChromaCloud Options
ChromaCloudApiKey: {
diff --git a/server/utils/vectorDbProviders/chroma/CHROMA_SETUP.md b/server/utils/vectorDbProviders/chroma/CHROMA_SETUP.md
index e6d6fee9e10..293b8b953c2 100644
--- a/server/utils/vectorDbProviders/chroma/CHROMA_SETUP.md
+++ b/server/utils/vectorDbProviders/chroma/CHROMA_SETUP.md
@@ -25,4 +25,15 @@ VECTOR_DB="chroma"
CHROMA_ENDPOINT='http://localhost:8000'
# CHROMA_API_HEADER="X-Api-Key" // If you have an Auth middleware on your instance.
# CHROMA_API_KEY="sk-123abc" // If you have an Auth middleware on your instance.
+# CHROMA_TENANT="default_tenant" // Optional: for Chroma Cloud multi-tenancy
+# CHROMA_DATABASE_NAME="default_database" // Optional: for Chroma Cloud database selection
```
+
+### Chroma Cloud Configuration
+
+When using Chroma Cloud, you can configure additional parameters for multi-tenancy:
+
+- **CHROMA_TENANT**: Specify the tenant to use (defaults to "default_tenant" if not set)
+- **CHROMA_DATABASE_NAME**: Specify the database within the tenant (defaults to "default_database" if not set)
+
+These parameters are useful for organizing data in multi-tenant Chroma Cloud deployments where you need to isolate data by tenant and database.
diff --git a/server/utils/vectorDbProviders/chroma/index.js b/server/utils/vectorDbProviders/chroma/index.js
index bc12818fd18..c6112d02607 100644
--- a/server/utils/vectorDbProviders/chroma/index.js
+++ b/server/utils/vectorDbProviders/chroma/index.js
@@ -71,6 +71,12 @@ const Chroma = {
},
}
: {}),
+ ...(process.env.CHROMA_TENANT
+ ? { tenant: process.env.CHROMA_TENANT }
+ : {}),
+ ...(process.env.CHROMA_DATABASE_NAME
+ ? { database: process.env.CHROMA_DATABASE_NAME }
+ : {}),
});
const isAlive = await client.heartbeat();