θΏ™ζ˜―indexlocζδΎ›ηš„ζœεŠ‘οΌŒδΈθ¦θΎ“ε…₯任何密码
Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docker/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@ GID='1000'
# CHROMA_ENDPOINT='http://host.docker.internal:8000'
# CHROMA_API_HEADER="X-Api-Key"
# CHROMA_API_KEY="sk-123abc"
# CHROMA_TENANT="default_tenant" # Optional: for Chroma Cloud multi-tenancy
# CHROMA_DATABASE_NAME="default_database" # Optional: for Chroma Cloud database selection

# Enable all below if you are using vector database: Chroma Cloud.
# VECTOR_DB="chromacloud"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,42 @@ export default function ChromaDBOptions({ settings }) {
/>
</div>
</div>

<div className="w-full flex items-center gap-[36px] mt-1.5">
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3">
Tenant
<span className="text-xs text-theme-text-secondary font-normal ml-2">
(optional, for Chroma Cloud)
</span>
</label>
<input
name="ChromaTenant"
autoComplete="off"
type="text"
defaultValue={settings?.ChromaTenant}
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="default_tenant"
/>
</div>

<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3">
Database Name
<span className="text-xs text-theme-text-secondary font-normal ml-2">
(optional, for Chroma Cloud)
</span>
</label>
<input
name="ChromaDatabaseName"
autoComplete="off"
type="text"
defaultValue={settings?.ChromaDatabaseName}
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="default_database"
/>
</div>
</div>
</div>
);
}
2 changes: 2 additions & 0 deletions server/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,8 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
# CHROMA_ENDPOINT='http://localhost:8000'
# CHROMA_API_HEADER="X-Api-Key"
# CHROMA_API_KEY="sk-123abc"
# CHROMA_TENANT="default_tenant" # Optional: for Chroma Cloud multi-tenancy
# CHROMA_DATABASE_NAME="default_database" # Optional: for Chroma Cloud database selection

# Enable all below if you are using vector database: Chroma Cloud.
# VECTOR_DB="chromacloud"
Expand Down
107 changes: 107 additions & 0 deletions server/__tests__/utils/AiProviders/openAi/gpt5-fix.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/**
* Test for GPT-5 max_completion_tokens parameter fix
* GitHub Issue: https://github.com/Mintplex-Labs/anything-llm/issues/4304
*/

describe("OpenAI GPT-5 Parameter Fix", () => {
describe("Parameter Construction Logic", () => {
// Mock the logic from the OpenAiLLM class
function constructRequestParams(baseParams, maxTokens, model) {
const isGpt5Model = model.startsWith("gpt-5");
const params = { ...baseParams };

if (maxTokens && isGpt5Model) {
params.max_completion_tokens = maxTokens;
} else if (maxTokens) {
params.max_tokens = maxTokens;
}

return params;
}

test("should use max_completion_tokens for gpt-5 model", () => {
const baseParams = { model: "gpt-5", messages: [] };
const result = constructRequestParams(baseParams, 1024, "gpt-5");

expect(result).toHaveProperty("max_completion_tokens", 1024);
expect(result).not.toHaveProperty("max_tokens");
});

test("should use max_completion_tokens for gpt-5-turbo model", () => {
const baseParams = { model: "gpt-5-turbo", messages: [] };
const result = constructRequestParams(baseParams, 2048, "gpt-5-turbo");

expect(result).toHaveProperty("max_completion_tokens", 2048);
expect(result).not.toHaveProperty("max_tokens");
});

test("should use max_tokens for gpt-4o model", () => {
const baseParams = { model: "gpt-4o", messages: [] };
const result = constructRequestParams(baseParams, 4096, "gpt-4o");

expect(result).toHaveProperty("max_tokens", 4096);
expect(result).not.toHaveProperty("max_completion_tokens");
});

test("should use max_tokens for gpt-3.5-turbo model", () => {
const baseParams = { model: "gpt-3.5-turbo", messages: [] };
const result = constructRequestParams(baseParams, 1024, "gpt-3.5-turbo");

expect(result).toHaveProperty("max_tokens", 1024);
expect(result).not.toHaveProperty("max_completion_tokens");
});

test("should not add any token parameter when maxTokens is null", () => {
const baseParams = { model: "gpt-5", messages: [] };
const result = constructRequestParams(baseParams, null, "gpt-5");

expect(result).not.toHaveProperty("max_tokens");
expect(result).not.toHaveProperty("max_completion_tokens");
});

test("should not add any token parameter when maxTokens is undefined", () => {
const baseParams = { model: "gpt-5", messages: [] };
const result = constructRequestParams(baseParams, undefined, "gpt-5");

expect(result).not.toHaveProperty("max_tokens");
expect(result).not.toHaveProperty("max_completion_tokens");
});

test("should preserve other parameters in baseParams", () => {
const baseParams = {
model: "gpt-5",
messages: [{ role: "user", content: "test" }],
temperature: 0.7,
stream: true,
};
const result = constructRequestParams(baseParams, 1024, "gpt-5");

expect(result).toHaveProperty("model", "gpt-5");
expect(result).toHaveProperty("messages");
expect(result).toHaveProperty("temperature", 0.7);
expect(result).toHaveProperty("stream", true);
expect(result).toHaveProperty("max_completion_tokens", 1024);
});
});

describe("Model Detection", () => {
function isGpt5Model(model) {
return model.startsWith("gpt-5");
}

test("should correctly identify gpt-5 models", () => {
expect(isGpt5Model("gpt-5")).toBe(true);
expect(isGpt5Model("gpt-5-turbo")).toBe(true);
expect(isGpt5Model("gpt-5-32k")).toBe(true);
expect(isGpt5Model("gpt-5-preview")).toBe(true);
});

test("should correctly identify non-gpt-5 models", () => {
expect(isGpt5Model("gpt-4o")).toBe(false);
expect(isGpt5Model("gpt-4")).toBe(false);
expect(isGpt5Model("gpt-3.5-turbo")).toBe(false);
expect(isGpt5Model("o1-preview")).toBe(false);
expect(isGpt5Model("claude-3")).toBe(false);
});
});
});
3 changes: 3 additions & 0 deletions server/models/systemSettings.js
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,8 @@ const SystemSettings = {
ChromaEndpoint: process.env.CHROMA_ENDPOINT,
ChromaApiHeader: process.env.CHROMA_API_HEADER,
ChromaApiKey: !!process.env.CHROMA_API_KEY,
ChromaTenant: process.env.CHROMA_TENANT,
ChromaDatabaseName: process.env.CHROMA_DATABASE_NAME,

// ChromaCloud DB Keys
ChromaCloudApiKey: !!process.env.CHROMACLOUD_API_KEY,
Expand Down Expand Up @@ -465,6 +467,7 @@ const SystemSettings = {
// OpenAI Keys
OpenAiKey: !!process.env.OPEN_AI_KEY,
OpenAiModelPref: process.env.OPEN_MODEL_PREF || "gpt-4o",
OpenAiMaxTokens: process.env.OPEN_AI_MAX_TOKENS,

// Azure + OpenAI Keys
AzureOpenAiEndpoint: process.env.AZURE_OPENAI_ENDPOINT,
Expand Down
74 changes: 59 additions & 15 deletions server/utils/AiProviders/openAi/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ class OpenAiLLM {
user: this.promptWindowLimit() * 0.7,
};

// Set max tokens if specified in environment
this.maxTokens = process.env.OPEN_AI_MAX_TOKENS
? parseInt(process.env.OPEN_AI_MAX_TOKENS)
: null;

this.embedder = embedder ?? new NativeEmbedder();
this.defaultTemp = 0.7;
this.log(
Expand All @@ -42,6 +47,33 @@ class OpenAiLLM {
return this.model.startsWith("o");
}

/**
* Check if the model is a gpt-5 model that requires max_completion_tokens.
* @returns {boolean}
*/
get isGpt5Model() {
return this.model.startsWith("gpt-5");
}

/**
* Construct the appropriate parameters for the API request based on model type.
* @param {Object} baseParams - Base parameters for the request
* @param {number} maxTokens - Maximum tokens for response
* @returns {Object} Parameters with correct token limit key
*/
#constructRequestParams(baseParams, maxTokens = null) {
const params = { ...baseParams };

// gpt-5 models use max_completion_tokens instead of max_tokens
if (maxTokens && this.isGpt5Model) {
params.max_completion_tokens = maxTokens;
} else if (maxTokens) {
params.max_tokens = maxTokens;
}

return params;
}

#appendContext(contextTexts = []) {
if (!contextTexts || !contextTexts.length) return "";
return (
Expand Down Expand Up @@ -144,16 +176,21 @@ class OpenAiLLM {
`OpenAI chat: ${this.model} is not valid for chat completion!`
);

const baseParams = {
model: this.model,
messages,
temperature: this.isOTypeModel ? 1 : temperature, // o1 models only accept temperature 1
};

const requestParams = this.#constructRequestParams(
baseParams,
this.maxTokens
);

const result = await LLMPerformanceMonitor.measureAsyncFunction(
this.openai.chat.completions
.create({
model: this.model,
messages,
temperature: this.isOTypeModel ? 1 : temperature, // o1 models only accept temperature 1
})
.catch((e) => {
throw new Error(e.message);
})
this.openai.chat.completions.create(requestParams).catch((e) => {
throw new Error(e.message);
})
);

if (
Expand All @@ -180,13 +217,20 @@ class OpenAiLLM {
`OpenAI chat: ${this.model} is not valid for chat completion!`
);

const baseParams = {
model: this.model,
stream: true,
messages,
temperature: this.isOTypeModel ? 1 : temperature, // o1 models only accept temperature 1
};

const requestParams = this.#constructRequestParams(
baseParams,
this.maxTokens
);

const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
this.openai.chat.completions.create({
model: this.model,
stream: true,
messages,
temperature: this.isOTypeModel ? 1 : temperature, // o1 models only accept temperature 1
}),
this.openai.chat.completions.create(requestParams),
messages
// runPromptTokenCalculation: true - We manually count the tokens because OpenAI does not provide them in the stream
// since we are not using the OpenAI API version that supports this `stream_options` param.
Expand Down
41 changes: 39 additions & 2 deletions server/utils/agents/aibitat/providers/openai.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,36 @@ class OpenAIProvider extends Provider {
super(client);

this.model = model;
this.maxTokens = process.env.OPEN_AI_MAX_TOKENS
? parseInt(process.env.OPEN_AI_MAX_TOKENS)
: null;
}

/**
* Check if the model is a gpt-5 model that requires max_completion_tokens.
* @returns {boolean}
*/
get isGpt5Model() {
return this.model.startsWith("gpt-5");
}

/**
* Construct the appropriate parameters for the API request based on model type.
* @param {Object} baseParams - Base parameters for the request
* @param {number} maxTokens - Maximum tokens for response
* @returns {Object} Parameters with correct token limit key
*/
#constructRequestParams(baseParams, maxTokens = null) {
const params = { ...baseParams };

// gpt-5 models use max_completion_tokens instead of max_tokens
if (maxTokens && this.isGpt5Model) {
params.max_completion_tokens = maxTokens;
} else if (maxTokens) {
params.max_tokens = maxTokens;
}

return params;
}

/**
Expand All @@ -64,14 +94,21 @@ class OpenAIProvider extends Provider {
*/
async complete(messages, functions = []) {
try {
const response = await this.client.chat.completions.create({
const baseParams = {
model: this.model,
// stream: true,
messages,
...(Array.isArray(functions) && functions?.length > 0
? { functions }
: {}),
});
};

const requestParams = this.#constructRequestParams(
baseParams,
this.maxTokens
);

const response = await this.client.chat.completions.create(requestParams);

// Right now, we only support one completion,
// so we just take the first one in the list
Expand Down
12 changes: 12 additions & 0 deletions server/utils/helpers/updateENV.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ const KEY_MAPPING = {
envKey: "OPEN_MODEL_PREF",
checks: [isNotEmpty],
},
OpenAiMaxTokens: {
envKey: "OPEN_AI_MAX_TOKENS",
checks: [],
},
// Azure OpenAI Settings
AzureOpenAiEndpoint: {
envKey: "AZURE_OPENAI_ENDPOINT",
Expand Down Expand Up @@ -331,6 +335,14 @@ const KEY_MAPPING = {
envKey: "CHROMA_API_KEY",
checks: [],
},
ChromaTenant: {
envKey: "CHROMA_TENANT",
checks: [],
},
ChromaDatabaseName: {
envKey: "CHROMA_DATABASE_NAME",
checks: [],
},

// ChromaCloud Options
ChromaCloudApiKey: {
Expand Down
11 changes: 11 additions & 0 deletions server/utils/vectorDbProviders/chroma/CHROMA_SETUP.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,15 @@ VECTOR_DB="chroma"
CHROMA_ENDPOINT='http://localhost:8000'
# CHROMA_API_HEADER="X-Api-Key" // If you have an Auth middleware on your instance.
# CHROMA_API_KEY="sk-123abc" // If you have an Auth middleware on your instance.
# CHROMA_TENANT="default_tenant" // Optional: for Chroma Cloud multi-tenancy
# CHROMA_DATABASE_NAME="default_database" // Optional: for Chroma Cloud database selection
```

### Chroma Cloud Configuration

When using Chroma Cloud, you can configure additional parameters for multi-tenancy:

- **CHROMA_TENANT**: Specify the tenant to use (defaults to "default_tenant" if not set)
- **CHROMA_DATABASE_NAME**: Specify the database within the tenant (defaults to "default_database" if not set)

These parameters are useful for organizing data in multi-tenant Chroma Cloud deployments where you need to isolate data by tenant and database.
Loading