θΏ™ζ˜―indexlocζδΎ›ηš„ζœεŠ‘οΌŒδΈθ¦θΎ“ε…₯任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 52 additions & 8 deletions frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@ import React, { useEffect, useState } from "react";
import System from "@/models/system";
import PreLoader from "@/components/Preloader";
import { OLLAMA_COMMON_URLS } from "@/utils/constants";
import { CaretDown, CaretUp } from "@phosphor-icons/react";
import { CaretDown, CaretUp, Info } from "@phosphor-icons/react";
import useProviderEndpointAutoDiscovery from "@/hooks/useProviderEndpointAutoDiscovery";
import { Tooltip } from "react-tooltip";

export default function OllamaLLMOptions({ settings }) {
const {
Expand All @@ -18,15 +19,13 @@ export default function OllamaLLMOptions({ settings }) {
initialBasePath: settings?.OllamaLLMBasePath,
ENDPOINTS: OLLAMA_COMMON_URLS,
});

const [performanceMode, setPerformanceMode] = useState(
settings?.OllamaLLMPerformanceMode || "base"
);
const [maxTokens, setMaxTokens] = useState(
settings?.OllamaLLMTokenLimit || 4096
);

const handleMaxTokensChange = (e) => {
setMaxTokens(Number(e.target.value));
};

return (
<div className="w-full flex flex-col gap-y-7">
<div className="w-full flex items-start gap-[36px] mt-1.5">
Expand All @@ -46,7 +45,7 @@ export default function OllamaLLMOptions({ settings }) {
defaultChecked="4096"
min={1}
value={maxTokens}
onChange={handleMaxTokensChange}
onChange={(e) => setMaxTokens(Number(e.target.value))}
onScroll={(e) => e.target.blur()}
required={true}
autoComplete="off"
Expand All @@ -64,7 +63,7 @@ export default function OllamaLLMOptions({ settings }) {
}}
className="text-white hover:text-white/70 flex items-center text-sm"
>
{showAdvancedControls ? "Hide" : "Show"} Manual Endpoint Input
{showAdvancedControls ? "Hide" : "Show"} advanced settings
{showAdvancedControls ? (
<CaretUp size={14} className="ml-1" />
) : (
Expand Down Expand Up @@ -134,12 +133,57 @@ export default function OllamaLLMOptions({ settings }) {
className="underline text-blue-300"
href="https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately"
target="_blank"
rel="noreferrer"
>
{" "}
Learn more &rarr;
</a>
</p>
</div>

<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold mb-2 flex items-center">
Performance Mode
<Info
size={16}
className="ml-2 text-white"
data-tooltip-id="performance-mode-tooltip"
/>
</label>
<select
name="OllamaLLMPerformanceMode"
required={true}
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
value={performanceMode}
onChange={(e) => setPerformanceMode(e.target.value)}
>
<option value="base">Base (Default)</option>
<option value="maximum">Maximum</option>
</select>
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
Choose the performance mode for the Ollama model.
</p>
<Tooltip
id="performance-mode-tooltip"
place="bottom"
className="tooltip !text-xs max-w-xs"
>
<p className="text-red-500">
<strong>Note:</strong> Only change this setting if you
understand its implications on performance and resource usage.
</p>
<br />
<p>
<strong>Base:</strong> Ollama automatically limits the context
to 2048 tokens, reducing VRAM usage. Suitable for most users.
</p>
<br />
<p>
<strong>Maximum:</strong> Uses the full context window (up to
Max Tokens). May increase VRAM usage significantly.
</p>
</Tooltip>
</div>
</div>
</div>
</div>
Expand Down
1 change: 1 addition & 0 deletions server/models/systemSettings.js
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,7 @@ const SystemSettings = {
OllamaLLMModelPref: process.env.OLLAMA_MODEL_PREF,
OllamaLLMTokenLimit: process.env.OLLAMA_MODEL_TOKEN_LIMIT,
OllamaLLMKeepAliveSeconds: process.env.OLLAMA_KEEP_ALIVE_TIMEOUT ?? 300,
OllamaLLMPerformanceMode: process.env.OLLAMA_PERFORMANCE_MODE ?? "base",

// TogetherAI Keys
TogetherAiApiKey: !!process.env.TOGETHER_AI_API_KEY,
Expand Down
5 changes: 5 additions & 0 deletions server/utils/AiProviders/ollama/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class OllamaAILLM {

this.basePath = process.env.OLLAMA_BASE_PATH;
this.model = modelPreference || process.env.OLLAMA_MODEL_PREF;
this.performanceMode = process.env.OLLAMA_PERFORMANCE_MODE || "base";
this.keepAlive = process.env.OLLAMA_KEEP_ALIVE_TIMEOUT
? Number(process.env.OLLAMA_KEEP_ALIVE_TIMEOUT)
: 300; // Default 5-minute timeout for Ollama model loading.
Expand All @@ -33,6 +34,10 @@ class OllamaAILLM {
model: this.model,
keepAlive: this.keepAlive,
useMLock: true,
// There are currently only two performance settings so if its not "base" - its max context.
...(this.performanceMode === "base"
? {}
: { numCtx: this.promptWindowLimit() }),
temperature,
});
}
Expand Down
4 changes: 4 additions & 0 deletions server/utils/helpers/updateENV.js
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ const KEY_MAPPING = {
envKey: "OLLAMA_MODEL_TOKEN_LIMIT",
checks: [nonZero],
},
OllamaLLMPerformanceMode: {
envKey: "OLLAMA_PERFORMANCE_MODE",
checks: [],
},
OllamaLLMKeepAliveSeconds: {
envKey: "OLLAMA_KEEP_ALIVE_TIMEOUT",
checks: [isInteger],
Expand Down