From 8998300f74c01400701d59f898ced39746762285 Mon Sep 17 00:00:00 2001 From: timothycarambat Date: Mon, 29 Sep 2025 12:29:50 -0700 Subject: [PATCH] Enable custom HTTP response timeout for ollama --- docker/.env.example | 1 + server/.env.example | 1 + server/utils/AiProviders/ollama/index.js | 43 +++++++++++++++++++++++- server/utils/helpers/updateENV.js | 3 ++ 4 files changed, 47 insertions(+), 1 deletion(-) diff --git a/docker/.env.example b/docker/.env.example index f0fe46d1365..8e8ec73ce8e 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -44,6 +44,7 @@ GID='1000' # OLLAMA_MODEL_PREF='llama2' # OLLAMA_MODEL_TOKEN_LIMIT=4096 # OLLAMA_AUTH_TOKEN='your-ollama-auth-token-here (optional, only for ollama running behind auth - Bearer token)' +# OLLAMA_RESPONSE_TIMEOUT=7200000 (optional, max timeout in milliseconds for ollama response to conclude. Default is 5min before aborting) # LLM_PROVIDER='togetherai' # TOGETHER_AI_API_KEY='my-together-ai-key' diff --git a/server/.env.example b/server/.env.example index e1f5ebfdd94..196fcf0cfbd 100644 --- a/server/.env.example +++ b/server/.env.example @@ -41,6 +41,7 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long. # OLLAMA_MODEL_PREF='llama2' # OLLAMA_MODEL_TOKEN_LIMIT=4096 # OLLAMA_AUTH_TOKEN='your-ollama-auth-token-here (optional, only for ollama running behind auth - Bearer token)' +# OLLAMA_RESPONSE_TIMEOUT=7200000 (optional, max timeout in milliseconds for ollama response to conclude. Default is 5min before aborting) # LLM_PROVIDER='togetherai' # TOGETHER_AI_API_KEY='my-together-ai-key' diff --git a/server/utils/AiProviders/ollama/index.js b/server/utils/AiProviders/ollama/index.js index d7c8b15e98c..470a91fd08a 100644 --- a/server/utils/AiProviders/ollama/index.js +++ b/server/utils/AiProviders/ollama/index.js @@ -31,7 +31,11 @@ class OllamaAILLM { const headers = this.authToken ? { Authorization: `Bearer ${this.authToken}` } : {}; - this.client = new Ollama({ host: this.basePath, headers: headers }); + this.client = new Ollama({ + host: this.basePath, + headers: headers, + fetch: this.#applyFetch(), + }); this.embedder = embedder ?? new NativeEmbedder(); this.defaultTemp = 0.7; this.#log( @@ -55,6 +59,43 @@ class OllamaAILLM { ); } + /** + * Apply a custom fetch function to the Ollama client. + * This is useful when we want to bypass the default 5m timeout for global fetch + * for machines which run responses very slowly. + * @returns {Function} The custom fetch function. + */ + #applyFetch() { + try { + if (!("OLLAMA_RESPONSE_TIMEOUT" in process.env)) return fetch; + const { Agent } = require("undici"); + const moment = require("moment"); + let timeout = process.env.OLLAMA_RESPONSE_TIMEOUT; + + if (!timeout || isNaN(Number(timeout)) || Number(timeout) <= 5 * 60_000) { + this.#log( + "Timeout option was not set, is not a number, or is less than 5 minutes in ms - falling back to default", + { timeout } + ); + return fetch; + } else timeout = Number(timeout); + + const noTimeoutFetch = (input, init = {}) => { + return fetch(input, { + ...init, + dispatcher: new Agent({ headersTimeout: timeout }), + }); + }; + + const humanDiff = moment.duration(timeout).humanize(); + this.#log(`Applying custom fetch w/timeout of ${humanDiff}.`); + return noTimeoutFetch; + } catch (error) { + this.#log("Error applying custom fetch - using default fetch", error); + return fetch; + } + } + streamingEnabled() { return "streamGetChatCompletion" in this; } diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index 9032237833e..04484d09b91 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -1170,6 +1170,9 @@ function dumpENV() { // Specify Chromium args for collector "ANYTHINGLLM_CHROMIUM_ARGS", + + // Allow setting a custom response timeout for Ollama + "OLLAMA_RESPONSE_TIMEOUT", ]; // Simple sanitization of each value to prevent ENV injection via newline or quote escaping.