Mintplex-Labs · themacexpert · Apr 18, 2025 · Apr 18, 2025 · Apr 18, 2025
diff --git a/README.md b/README.md
@@ -101,6 +101,7 @@ AnythingLLM divides your documents into objects called `workspaces`. A Workspace
 - [xAI](https://x.ai/)
 - [Novita AI (chat models)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link)
 - [PPIO](https://ppinfra.com?utm_source=github_anything-llm)
+- [kluster.ai](https://kluster.ai)
 
 **Embedder models:**
 

diff --git a/frontend/src/components/LLMSelection/KlusterLLMOptions/index.jsx b/frontend/src/components/LLMSelection/KlusterLLMOptions/index.jsx
@@ -0,0 +1,56 @@
+import React, { useState } from "react";
+import { Info } from "@phosphor-icons/react";
+import { Tooltip } from "react-tooltip";
+
+export default function KlusterLLMOptions({ settings }) {
+  const [apiKey, setApiKey] = useState(settings?.KlusterApiKey || "");
+  const [maxTokens, setMaxTokens] = useState(
+    settings?.KlusterMaxTokens || 1024
+  );
+
+  return (
+    <div className="flex flex-col gap-y-1">
+      <div className="flex flex-col gap-y-2">
+        <label className="text-white text-sm font-semibold block">
+          API Key
+          <span className="text-red-400 ml-1">*</span>
+        </label>
+        <input
+          type="password"
+          name="KlusterApiKey"
+          className="bg-theme-settings-input-bg text-white text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5"
+          placeholder="Enter your Kluster.ai API key"
+          value={apiKey}
+          onChange={(e) => setApiKey(e.target.value)}
+          required
+        />
+        <div className="flex items-center gap-x-2 text-white/60 text-xs">
+          <Info size={14} />
+          <span>
+            You can find your API key in the Kluster.ai developer console
+          </span>
+        </div>
+      </div>
+
+      <div className="flex flex-col gap-y-2">
+        <label className="text-white text-sm font-semibold block">
+          Max Tokens
+        </label>
+        <input
+          type="number"
+          name="KlusterMaxTokens"
+          className="bg-theme-settings-input-bg text-white text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5"
+          placeholder="Enter max tokens (default: 1024)"
+          value={maxTokens}
+          onChange={(e) => setMaxTokens(Number(e.target.value))}
+        />
+        <div className="flex items-center gap-x-2 text-white/60 text-xs">
+          <Info size={14} />
+          <span>
+            Maximum number of tokens to generate in the response (default: 1024)
+          </span>
+        </div>
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/src/media/llmprovider/kluster.png b/frontend/src/media/llmprovider/kluster.png
diff --git a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
@@ -31,6 +31,7 @@ import APIPieLogo from "@/media/llmprovider/apipie.png";
 import XAILogo from "@/media/llmprovider/xai.png";
 import NvidiaNimLogo from "@/media/llmprovider/nvidia-nim.png";
 import PPIOLogo from "@/media/llmprovider/ppio.png";
+import KlusterLogo from "@/media/llmprovider/kluster.png";
 
 import PreLoader from "@/components/Preloader";
 import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
@@ -59,6 +60,7 @@ import ApiPieLLMOptions from "@/components/LLMSelection/ApiPieOptions";
 import XAILLMOptions from "@/components/LLMSelection/XAiLLMOptions";
 import NvidiaNimOptions from "@/components/LLMSelection/NvidiaNimOptions";
 import PPIOLLMOptions from "@/components/LLMSelection/PPIOLLMOptions";
+import KlusterLLMOptions from "@/components/LLMSelection/KlusterLLMOptions";
 
 import LLMItem from "@/components/LLMSelection/LLMItem";
 import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
@@ -300,6 +302,14 @@ export const AVAILABLE_LLM_PROVIDERS = [
     description: "Run xAI's powerful LLMs like Grok-2 and more.",
     requiredConfig: ["XAIApiKey", "XAIModelPref"],
   },
+  {
+    name: "Kluster.ai",
+    value: "kluster",
+    logo: KlusterLogo,
+    options: (settings) => <KlusterLLMOptions settings={settings} />,
+    description: "Run powerful open-source models from Kluster.ai",
+    requiredConfig: ["KlusterApiKey"],
+  },
 ];
 
 export default function GeneralLLMPreference() {

diff --git a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx
@@ -31,6 +31,7 @@ const ENABLED_PROVIDERS = [
   "xai",
   "nvidia-nim",
   "gemini",
+  "kluster",
   // TODO: More agent support.
   // "cohere",         // Has tool calling and will need to build explicit support
   // "huggingface"     // Can be done but already has issues with no-chat templated. Needs to be tested.

diff --git a/server/utils/AiProviders/base.js b/server/utils/AiProviders/base.js
@@ -0,0 +1,27 @@
+class BaseLLM {
+  constructor(embedder, model = null) {
+    if (!embedder) throw new Error("No embedder provided to LLM!");
+    this.embedder = embedder;
+    this.model = model;
+  }
+
+  async embedTextInput(textInput) {
+    throw new Error("Method not implemented!");
+  }
+
+  async embedChunks(textChunks = []) {
+    throw new Error("Method not implemented!");
+  }
+
+  async getChatCompletion(messages = [], options = {}) {
+    throw new Error("Method not implemented!");
+  }
+
+  async getEmbedding(text) {
+    throw new Error("Method not implemented!");
+  }
+}
+
+module.exports = {
+  BaseLLM,
+};
diff --git a/server/utils/AiProviders/kluster.js b/server/utils/AiProviders/kluster.js
@@ -0,0 +1,137 @@
+const KlusterProvider = require("../agents/aibitat/providers/kluster");
+const { BaseLLM } = require("./base");
+const { MODEL_MAP } = require("./modelMap");
+const {
+  handleDefaultStreamResponseV2,
+  formatChatHistory,
+} = require("../helpers/chat/responses");
+const {
+  LLMPerformanceMonitor,
+} = require("../helpers/chat/LLMPerformanceMonitor");
+const { messageArrayCompressor } = require("../helpers/chat");
+
+class KlusterLLM extends BaseLLM {
+  static provider = "kluster";
+  static models = [
+    "klusterai/Meta-Llama-3.3-70B-Instruct-Turbo",
+    "klusterai/Meta-Llama-3.1-8B-Instruct-Turbo",
+    "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+    "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    "deepseek-ai/DeepSeek-R1",
+    "deepseek-ai/DeepSeek-V3-0324",
+    "google/gemma-3-27b-it",
+    "Qwen/Qwen2.5-VL-7B-Instruct",
+  ];
+
+  constructor(embedder, model = null) {
+    super(embedder, model);
+    this.model = model || KlusterLLM.models[0];
+    this.provider = new KlusterProvider({
+      model: this.model,
+    });
+    this.limits = {
+      history: this.promptWindowLimit() * 0.15,
+      system: this.promptWindowLimit() * 0.15,
+      user: this.promptWindowLimit() * 0.7,
+    };
+    this.defaultTemp = 0.7;
+  }
+
+  static promptWindowLimit(modelName) {
+    return MODEL_MAP.kluster?.[modelName] ?? 8192; // Default to 8k tokens if not specified
+  }
+
+  promptWindowLimit() {
+    return KlusterLLM.promptWindowLimit(this.model);
+  }
+
+  streamingEnabled() {
+    return true;
+  }
+
+  #appendContext(contextTexts = []) {
+    if (!contextTexts || !contextTexts.length) return "";
+    return (
+      "\nContext:\n" +
+      contextTexts
+        .map((text, i) => {
+          return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
+        })
+        .join("")
+    );
+  }
+
+  constructPrompt({
+    systemPrompt = "",
+    contextTexts = [],
+    chatHistory = [],
+    userPrompt = "",
+    attachments = [],
+  }) {
+    return [
+      {
+        role: "system",
+        content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
+      },
+      ...formatChatHistory(chatHistory),
+      {
+        role: "user",
+        content: userPrompt,
+      },
+    ];
+  }
+
+  async compressMessages(promptArgs = {}, rawHistory = []) {
+    const messages = this.constructPrompt(promptArgs);
+    return await messageArrayCompressor(this, messages, rawHistory);
+  }
+
+  async embedTextInput(textInput) {
+    return await this.embedder.embedTextInput(textInput);
+  }
+
+  async embedChunks(textChunks = []) {
+    return await this.embedder.embedChunks(textChunks);
+  }
+
+  async getChatCompletion(messages = [], { temperature = 0.7 } = {}) {
+    const result = await LLMPerformanceMonitor.measureAsyncFunction(
+      this.provider.complete(messages, [], { temperature })
+    );
+
+    return {
+      textResponse: result.output,
+      metrics: {
+        prompt_tokens: 0, // Kluster doesn't provide token counts
+        completion_tokens: 0,
+        total_tokens: 0,
+        outputTps: 0,
+        duration: result.duration,
+      },
+    };
+  }
+
+  async streamGetChatCompletion(messages = [], { temperature = 0.7 } = {}) {
+    const stream = await this.provider.complete(messages, [], {
+      stream: true,
+      temperature,
+    });
+    const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
+      stream,
+      messages
+    );
+    return measuredStreamRequest;
+  }
+
+  handleStream(response, stream, responseProps) {
+    return handleDefaultStreamResponseV2(response, stream, responseProps);
+  }
+
+  async getEmbedding(text) {
+    return await this.embedder.getEmbedding(text);
+  }
+}
+
+module.exports = {
+  KlusterLLM,
+};
diff --git a/server/utils/AiProviders/modelMap.js b/server/utils/AiProviders/modelMap.js
@@ -116,6 +116,16 @@ const MODEL_MAP = {
   xai: {
     "grok-beta": 131072,
   },
+  kluster: {
+    "klusterai/Meta-Llama-3.3-70B-Instruct-Turbo": 8192,
+    "klusterai/Meta-Llama-3.1-8B-Instruct-Turbo": 8192,
+    "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": 8192,
+    "meta-llama/Llama-4-Scout-17B-16E-Instruct": 8192,
+    "deepseek-ai/DeepSeek-R1": 8192,
+    "deepseek-ai/DeepSeek-V3-0324": 8192,
+    "google/gemma-3-27b-it": 8192,
+    "Qwen/Qwen2.5-VL-7B-Instruct": 8192,
+  },
 };
 
 module.exports = { MODEL_MAP };
diff --git a/server/utils/agents/aibitat/providers/kluster.js b/server/utils/agents/aibitat/providers/kluster.js
@@ -0,0 +1,86 @@
+const OpenAI = require("openai");
+const Provider = require("./ai-provider.js");
+const InheritMultiple = require("./helpers/classes.js");
+const UnTooled = require("./helpers/untooled.js");
+const { toValidNumber } = require("../../../http/index.js");
+
+/**
+ * The agent provider for Kluster.ai API.
+ * Kluster.ai is OpenAI-compatible, so we extend the GenericOpenAiProvider.
+ */
+class KlusterProvider extends InheritMultiple([Provider, UnTooled]) {
+  model;
+
+  constructor(config = {}) {
+    super();
+    const { model = "klusterai/Meta-Llama-3.3-70B-Instruct-Turbo" } = config;
+    const client = new OpenAI({
+      baseURL: "https://api.kluster.ai/v1",
+      apiKey: process.env.KLUSTER_API_KEY ?? null,
+      maxRetries: 3,
+    });
+
+    this._client = client;
+    this.model = model;
+    this.verbose = true;
+    this.maxTokens = process.env.KLUSTER_MAX_TOKENS
+      ? toValidNumber(process.env.KLUSTER_MAX_TOKENS, 1024)
+      : 1024;
+  }
+
+  get client() {
+    return this._client;
+  }
+
+  async #handleFunctionCallChat({ messages = [] }) {
+    return await this.client.chat.completions
+      .create({
+        model: this.model,
+        temperature: 0,
+        messages,
+        max_tokens: this.maxTokens,
+      })
+      .then((result) => {
+        if (!result.hasOwnProperty("choices"))
+          throw new Error("Kluster.ai chat: No results!");
+        if (result.choices.length === 0)
+          throw new Error("Kluster.ai chat: No results length!");
+        return result.choices[0].message.content;
+      });
+  }
+
+  async complete(messages, functions = [], options = {}) {
+    try {
+      if (functions.length > 0) {
+        return await this.#handleFunctionCallChat({ messages });
+      }
+
+      const response = await this.client.chat.completions.create({
+        model: this.model,
+        messages,
+        max_tokens: this.maxTokens,
+        stream: options.stream || false,
+        temperature: options.temperature || 0.7,
+      });
+
+      if (options.stream) {
+        return response;
+      }
+
+      if (!response.hasOwnProperty("choices"))
+        throw new Error("Kluster.ai chat: No results!");
+      if (response.choices.length === 0)
+        throw new Error("Kluster.ai chat: No results length!");
+
+      return response.choices[0].message.content;
+    } catch (error) {
+      throw new Error(`Kluster.ai chat failed: ${error.message}`);
+    }
+  }
+
+  getCost(_usage) {
+    return 0; // Kluster.ai pricing is not publicly documented
+  }
+}
+
+module.exports = KlusterProvider;
diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js
@@ -200,6 +200,9 @@ function getLLMProvider({ provider = null, model = null } = {}) {
     case "ppio":
       const { PPIOLLM } = require("../AiProviders/ppio");
       return new PPIOLLM(embedder, model);
+    case "kluster":
+      const { KlusterLLM } = require("../AiProviders/kluster");
+      return new KlusterLLM(embedder, model);
     default:
       throw new Error(
         `ENV: No valid LLM_PROVIDER value found in environment! Using ${process.env.LLM_PROVIDER}`
@@ -344,6 +347,9 @@ function getLLMProviderClass({ provider = null } = {}) {
     case "ppio":
       const { PPIOLLM } = require("../AiProviders/ppio");
       return PPIOLLM;
+    case "kluster":
+      const { KlusterLLM } = require("../AiProviders/kluster");
+      return KlusterLLM;
     default:
       return null;
   }