θΏ™ζ˜―indexlocζδΎ›ηš„ζœεŠ‘οΌŒδΈθ¦θΎ“ε…₯任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docker/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ GID='1000'
# OLLAMA_MODEL_PREF='llama2'
# OLLAMA_MODEL_TOKEN_LIMIT=4096
# OLLAMA_AUTH_TOKEN='your-ollama-auth-token-here (optional, only for ollama running behind auth - Bearer token)'
# OLLAMA_RESPONSE_TIMEOUT=7200000 (optional, max timeout in milliseconds for ollama response to conclude. Default is 5min before aborting)

# LLM_PROVIDER='togetherai'
# TOGETHER_AI_API_KEY='my-together-ai-key'
Expand Down
1 change: 1 addition & 0 deletions server/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
# OLLAMA_MODEL_PREF='llama2'
# OLLAMA_MODEL_TOKEN_LIMIT=4096
# OLLAMA_AUTH_TOKEN='your-ollama-auth-token-here (optional, only for ollama running behind auth - Bearer token)'
# OLLAMA_RESPONSE_TIMEOUT=7200000 (optional, max timeout in milliseconds for ollama response to conclude. Default is 5min before aborting)

# LLM_PROVIDER='togetherai'
# TOGETHER_AI_API_KEY='my-together-ai-key'
Expand Down
43 changes: 42 additions & 1 deletion server/utils/AiProviders/ollama/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@ class OllamaAILLM {
const headers = this.authToken
? { Authorization: `Bearer ${this.authToken}` }
: {};
this.client = new Ollama({ host: this.basePath, headers: headers });
this.client = new Ollama({
host: this.basePath,
headers: headers,
fetch: this.#applyFetch(),
});
this.embedder = embedder ?? new NativeEmbedder();
this.defaultTemp = 0.7;
this.#log(
Expand All @@ -55,6 +59,43 @@ class OllamaAILLM {
);
}

/**
* Apply a custom fetch function to the Ollama client.
* This is useful when we want to bypass the default 5m timeout for global fetch
* for machines which run responses very slowly.
* @returns {Function} The custom fetch function.
*/
#applyFetch() {
try {
if (!("OLLAMA_RESPONSE_TIMEOUT" in process.env)) return fetch;
const { Agent } = require("undici");
const moment = require("moment");
let timeout = process.env.OLLAMA_RESPONSE_TIMEOUT;

if (!timeout || isNaN(Number(timeout)) || Number(timeout) <= 5 * 60_000) {
this.#log(
"Timeout option was not set, is not a number, or is less than 5 minutes in ms - falling back to default",
{ timeout }
);
return fetch;
} else timeout = Number(timeout);

const noTimeoutFetch = (input, init = {}) => {
return fetch(input, {
...init,
dispatcher: new Agent({ headersTimeout: timeout }),
});
};

const humanDiff = moment.duration(timeout).humanize();
this.#log(`Applying custom fetch w/timeout of ${humanDiff}.`);
return noTimeoutFetch;
} catch (error) {
this.#log("Error applying custom fetch - using default fetch", error);
return fetch;
}
}

streamingEnabled() {
return "streamGetChatCompletion" in this;
}
Expand Down
3 changes: 3 additions & 0 deletions server/utils/helpers/updateENV.js
Original file line number Diff line number Diff line change
Expand Up @@ -1170,6 +1170,9 @@ function dumpENV() {

// Specify Chromium args for collector
"ANYTHINGLLM_CHROMIUM_ARGS",

// Allow setting a custom response timeout for Ollama
"OLLAMA_RESPONSE_TIMEOUT",
];

// Simple sanitization of each value to prevent ENV injection via newline or quote escaping.
Expand Down