diff --git a/README.md b/README.md
index a8199403b2..6187d570aa 100644
--- a/README.md
+++ b/README.md
@@ -88,6 +88,7 @@ AnythingLLM divides your documents into objects called `workspaces`. A Workspace
- [LocalAI (all models)](https://localai.io/)
- [Together AI (chat models)](https://www.together.ai/)
- [Fireworks AI (chat models)](https://fireworks.ai/)
+- [AI/ML API (chat models)](https://aimlapi.com/models/?utm_source=anythingllm&utm_medium=github&utm_campaign=integration)
- [Perplexity (chat models)](https://www.perplexity.ai/)
- [OpenRouter (chat models)](https://openrouter.ai/)
- [DeepSeek (chat models)](https://deepseek.com/)
diff --git a/docker/.env.example b/docker/.env.example
index d0b6dbeb11..b8e6676959 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -129,6 +129,10 @@ GID='1000'
# DEEPSEEK_API_KEY='your-deepseek-api-key-here'
# DEEPSEEK_MODEL_PREF='deepseek-chat'
+# LLM_PROVIDER='aimlapi'
+# AIML_LLM_API_KEY='your-aimlapi-key'
+# AIML_MODEL_PREF='gpt-3.5-turbo'
+
# LLM_PROVIDER='ppio'
# PPIO_API_KEY='your-ppio-api-key-here'
# PPIO_MODEL_PREF=deepseek/deepseek-v3/community
@@ -182,6 +186,10 @@ GID='1000'
# GENERIC_OPEN_AI_EMBEDDING_API_KEY='sk-123abc'
# GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS=500
+# EMBEDDING_ENGINE='aimlapi'
+# AIML_EMBEDDER_API_KEY='your-aimlapi-key'
+# EMBEDDING_MODEL_PREF='text-embedding-ada-002'
+
# EMBEDDING_ENGINE='gemini'
# GEMINI_EMBEDDING_API_KEY=
# EMBEDDING_MODEL_PREF='text-embedding-004'
@@ -339,4 +347,4 @@ GID='1000'
# Specify the target languages for when using OCR to parse images and PDFs.
# This is a comma separated list of language codes as a string. Unsupported languages will be ignored.
# Default is English. See https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html for a list of valid language codes.
-# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol
\ No newline at end of file
+# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol
diff --git a/frontend/src/components/EmbeddingSelection/AimlApiOptions/index.jsx b/frontend/src/components/EmbeddingSelection/AimlApiOptions/index.jsx
new file mode 100644
index 0000000000..18ed62be56
--- /dev/null
+++ b/frontend/src/components/EmbeddingSelection/AimlApiOptions/index.jsx
@@ -0,0 +1,110 @@
+import { useState, useEffect } from "react";
+import System from "@/models/system";
+
+export default function AimlApiOptions({ settings }) {
+ const [inputValue, setInputValue] = useState(settings?.AimlEmbedderApiKey);
+ const [apiKey, setApiKey] = useState(settings?.AimlEmbedderApiKey);
+
+ return (
+
+
+
+
+ setInputValue(e.target.value)}
+ onBlur={() => setApiKey(inputValue)}
+ />
+
+
+
+
+ );
+}
+
+function AimlApiEmbeddingModelSelection({ apiKey, settings }) {
+ const [groupedModels, setGroupedModels] = useState({});
+ const [loading, setLoading] = useState(true);
+
+ useEffect(() => {
+ async function findModels() {
+ if (!apiKey) {
+ setGroupedModels({});
+ setLoading(true);
+ return;
+ }
+ setLoading(true);
+ const { models } = await System.customModels(
+ "aimlapi-embed",
+ typeof apiKey === "boolean" ? null : apiKey
+ );
+ if (models?.length > 0) {
+ const byDev = models.reduce((acc, model) => {
+ acc[model.organization] = acc[model.organization] || [];
+ acc[model.organization].push(model);
+ return acc;
+ }, {});
+ setGroupedModels(byDev);
+ }
+ setLoading(false);
+ }
+ findModels();
+ }, [apiKey]);
+
+ if (loading || Object.keys(groupedModels).length === 0) {
+ return (
+
+
+
+
+ );
+ }
+
+ return (
+
+
+
+
+ );
+}
diff --git a/frontend/src/components/LLMSelection/AimlApiOptions/index.jsx b/frontend/src/components/LLMSelection/AimlApiOptions/index.jsx
new file mode 100644
index 0000000000..86c400c91f
--- /dev/null
+++ b/frontend/src/components/LLMSelection/AimlApiOptions/index.jsx
@@ -0,0 +1,111 @@
+import { useState, useEffect } from "react";
+import System from "@/models/system";
+
+export default function AimlApiOptions({ settings }) {
+ const [inputValue, setInputValue] = useState(settings?.AimlLlmApiKey);
+ const [apiKey, setApiKey] = useState(settings?.AimlLlmApiKey);
+
+ return (
+
+
+
+ setInputValue(e.target.value)}
+ onBlur={() => setApiKey(inputValue)}
+ />
+
+ {!settings?.credentialsOnly && (
+
+ )}
+
+ );
+}
+
+function AimlApiModelSelection({ apiKey, settings }) {
+ const [groupedModels, setGroupedModels] = useState({});
+ const [loading, setLoading] = useState(true);
+
+ useEffect(() => {
+ async function findCustomModels() {
+ if (!apiKey) {
+ setGroupedModels({});
+ setLoading(true);
+ return;
+ }
+
+ setLoading(true);
+ const { models } = await System.customModels(
+ "aimlapi",
+ typeof apiKey === "boolean" ? null : apiKey
+ );
+ if (models?.length > 0) {
+ const byDev = models.reduce((acc, model) => {
+ acc[model.organization] = acc[model.organization] || [];
+ acc[model.organization].push(model);
+ return acc;
+ }, {});
+ setGroupedModels(byDev);
+ }
+ setLoading(false);
+ }
+ findCustomModels();
+ }, [apiKey]);
+
+ if (loading || Object.keys(groupedModels).length === 0) {
+ return (
+
+
+
+
+ );
+ }
+
+ return (
+
+
+
+
+ );
+}
diff --git a/frontend/src/hooks/useGetProvidersModels.js b/frontend/src/hooks/useGetProvidersModels.js
index 82ef427cfd..9cb964f1e7 100644
--- a/frontend/src/hooks/useGetProvidersModels.js
+++ b/frontend/src/hooks/useGetProvidersModels.js
@@ -52,6 +52,7 @@ const groupedProviders = [
"novita",
"openrouter",
"ppio",
+ "aimlapi",
];
export default function useGetProviderModels(provider = null) {
const [defaultModels, setDefaultModels] = useState([]);
diff --git a/frontend/src/media/llmprovider/aimlapi.png b/frontend/src/media/llmprovider/aimlapi.png
new file mode 100644
index 0000000000..b20cfbeff7
Binary files /dev/null and b/frontend/src/media/llmprovider/aimlapi.png differ
diff --git a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx
index de27acb80a..33a9ccc1dc 100644
--- a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx
+++ b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx
@@ -37,6 +37,8 @@ import ModalWrapper from "@/components/ModalWrapper";
import CTAButton from "@/components/lib/CTAButton";
import { useTranslation } from "react-i18next";
import MistralAiOptions from "@/components/EmbeddingSelection/MistralAiOptions";
+import AimlApiLogo from "@/media/llmprovider/aimlapi.png";
+import AimlApiOptions from "@/components/EmbeddingSelection/AimlApiOptions";
const EMBEDDERS = [
{
@@ -118,6 +120,13 @@ const EMBEDDERS = [
options: (settings) => ,
description: "Run powerful embedding models from Mistral AI.",
},
+ {
+ name: "AI/ML API",
+ value: "aimlapi",
+ logo: AimlApiLogo,
+ options: (settings) => ,
+ description: "Use embedding models hosted on AI/ML API.",
+ },
{
name: "Generic OpenAI",
value: "generic-openai",
diff --git a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
index f9aceec189..7e0013f89f 100644
--- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
+++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
@@ -32,6 +32,7 @@ import XAILogo from "@/media/llmprovider/xai.png";
import NvidiaNimLogo from "@/media/llmprovider/nvidia-nim.png";
import PPIOLogo from "@/media/llmprovider/ppio.png";
import DellProAiStudioLogo from "@/media/llmprovider/dpais.png";
+import AimlApiLogo from "@/media/llmprovider/aimlapi.png";
import PreLoader from "@/components/Preloader";
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
@@ -61,6 +62,7 @@ import XAILLMOptions from "@/components/LLMSelection/XAiLLMOptions";
import NvidiaNimOptions from "@/components/LLMSelection/NvidiaNimOptions";
import PPIOLLMOptions from "@/components/LLMSelection/PPIOLLMOptions";
import DellProAiStudioOptions from "@/components/LLMSelection/DPAISOptions";
+import AimlApiOptions from "@/components/LLMSelection/AimlApiOptions";
import LLMItem from "@/components/LLMSelection/LLMItem";
import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
@@ -263,6 +265,14 @@ export const AVAILABLE_LLM_PROVIDERS = [
description: "Run DeepSeek's powerful LLMs.",
requiredConfig: ["DeepSeekApiKey"],
},
+ {
+ name: "AI/ML API",
+ value: "aimlapi",
+ logo: AimlApiLogo,
+ options: (settings) => ,
+ description: "Access 300+ AI models with enterprise uptime.",
+ requiredConfig: ["AimlLlmApiKey"],
+ },
{
name: "PPIO",
value: "ppio",
diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
index bc48209da9..4b6e819dd8 100644
--- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
@@ -38,6 +38,7 @@ import VoyageAiLogo from "@/media/embeddingprovider/voyageai.png";
import PPIOLogo from "@/media/llmprovider/ppio.png";
import PGVectorLogo from "@/media/vectordbs/pgvector.png";
import DPAISLogo from "@/media/llmprovider/dpais.png";
+import AimlApiLogo from "@/media/llmprovider/aimlapi.png";
import React, { useState, useEffect } from "react";
import paths from "@/utils/paths";
import { useNavigate } from "react-router-dom";
@@ -235,6 +236,14 @@ export const LLM_SELECTION_PRIVACY = {
],
logo: PPIOLogo,
},
+ aimlapi: {
+ name: "AI/ML API",
+ description: [
+ "Your chats will not be used for training",
+ "Your prompts and document text used in response creation are visible to AI/ML API",
+ ],
+ logo: AimlApiLogo,
+ },
dpais: {
name: "Dell Pro AI Studio",
description: [
@@ -379,6 +388,14 @@ export const EMBEDDING_ENGINE_PRIVACY = {
],
logo: MistralLogo,
},
+ aimlapi: {
+ name: "AI/ML API",
+ description: [
+ "Your document text is visible to AI/ML API",
+ "Your documents are not used for training",
+ ],
+ logo: AimlApiLogo,
+ },
litellm: {
name: "LiteLLM",
description: [
diff --git a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
index 02d97893a7..4b8546d4f2 100644
--- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
@@ -27,6 +27,7 @@ import NvidiaNimLogo from "@/media/llmprovider/nvidia-nim.png";
import CohereLogo from "@/media/llmprovider/cohere.png";
import PPIOLogo from "@/media/llmprovider/ppio.png";
import DellProAiStudioLogo from "@/media/llmprovider/dpais.png";
+import AimlApiLogo from "@/media/llmprovider/aimlapi.png";
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions";
@@ -55,6 +56,7 @@ import XAILLMOptions from "@/components/LLMSelection/XAiLLMOptions";
import NvidiaNimOptions from "@/components/LLMSelection/NvidiaNimOptions";
import PPIOLLMOptions from "@/components/LLMSelection/PPIOLLMOptions";
import DellProAiStudioOptions from "@/components/LLMSelection/DPAISOptions";
+import AimlApiOptions from "@/components/LLMSelection/AimlApiOptions";
import LLMItem from "@/components/LLMSelection/LLMItem";
import System from "@/models/system";
@@ -226,6 +228,13 @@ const LLMS = [
options: (settings) => ,
description: "Run DeepSeek's powerful LLMs.",
},
+ {
+ name: "AI/ML API",
+ value: "aimlapi",
+ logo: AimlApiLogo,
+ options: (settings) => ,
+ description: "Access 300+ AI models with enterprise uptime.",
+ },
{
name: "PPIO",
value: "ppio",
diff --git a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx
index 6baae1ddee..fabf8449df 100644
--- a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx
+++ b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx
@@ -31,6 +31,7 @@ const ENABLED_PROVIDERS = [
"xai",
"nvidia-nim",
"gemini",
+ "aimlapi",
// TODO: More agent support.
// "cohere", // Has tool calling and will need to build explicit support
// "huggingface" // Can be done but already has issues with no-chat templated. Needs to be tested.
diff --git a/server/.env.example b/server/.env.example
index 12fa5ec226..19d4756076 100644
--- a/server/.env.example
+++ b/server/.env.example
@@ -57,6 +57,10 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
# DEEPSEEK_API_KEY=YOUR_API_KEY
# DEEPSEEK_MODEL_PREF='deepseek-chat'
+# LLM_PROVIDER='aimlapi'
+# AIML_LLM_API_KEY='your-aimlapi-key'
+# AIML_MODEL_PREF='gpt-3.5-turbo'
+
# LLM_PROVIDER='openrouter'
# OPENROUTER_API_KEY='my-openrouter-key'
# OPENROUTER_MODEL_PREF='openrouter/auto'
@@ -180,6 +184,10 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
# GENERIC_OPEN_AI_EMBEDDING_API_KEY='sk-123abc'
# GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS=500
+# EMBEDDING_ENGINE='aimlapi'
+# AIML_EMBEDDER_API_KEY='your-aimlapi-key'
+# EMBEDDING_MODEL_PREF='text-embedding-ada-002'
+
# EMBEDDING_ENGINE='gemini'
# GEMINI_EMBEDDING_API_KEY=
# EMBEDDING_MODEL_PREF='text-embedding-004'
@@ -336,4 +344,4 @@ TTS_PROVIDER="native"
# Specify the target languages for when using OCR to parse images and PDFs.
# This is a comma separated list of language codes as a string. Unsupported languages will be ignored.
# Default is English. See https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html for a list of valid language codes.
-# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol
\ No newline at end of file
+# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol
diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js
index 8de54f6c9c..73ac6674d8 100644
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@@ -221,6 +221,7 @@ const SystemSettings = {
GenericOpenAiEmbeddingMaxConcurrentChunks:
process.env.GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS || 500,
GeminiEmbeddingApiKey: !!process.env.GEMINI_EMBEDDING_API_KEY,
+ AimlEmbedderApiKey: !!process.env.AIML_EMBEDDER_API_KEY,
// --------------------------------------------------------
// VectorDB Provider Selection Settings & Configs
@@ -588,6 +589,10 @@ const SystemSettings = {
PPIOApiKey: !!process.env.PPIO_API_KEY,
PPIOModelPref: process.env.PPIO_MODEL_PREF,
+ // AI/ML API Keys
+ AimlLlmApiKey: !!process.env.AIML_LLM_API_KEY,
+ AimlModelPref: process.env.AIML_MODEL_PREF,
+
// Dell Pro AI Studio Keys
DellProAiStudioBasePath: process.env.DPAIS_LLM_BASE_PATH,
DellProAiStudioModelPref: process.env.DPAIS_LLM_MODEL_PREF,
diff --git a/server/storage/models/.gitignore b/server/storage/models/.gitignore
index 5e83df7bcc..b3c9a7a50d 100644
--- a/server/storage/models/.gitignore
+++ b/server/storage/models/.gitignore
@@ -9,4 +9,5 @@ gemini
togetherAi
tesseract
ppio
+aimlapi
context-windows/*
\ No newline at end of file
diff --git a/server/utils/AiProviders/aimlapi/index.js b/server/utils/AiProviders/aimlapi/index.js
new file mode 100644
index 0000000000..29a4e98a40
--- /dev/null
+++ b/server/utils/AiProviders/aimlapi/index.js
@@ -0,0 +1,328 @@
+const { NativeEmbedder } = require("../../EmbeddingEngines/native");
+const {
+ LLMPerformanceMonitor,
+} = require("../../helpers/chat/LLMPerformanceMonitor");
+const {
+ handleDefaultStreamResponseV2,
+ formatChatHistory,
+} = require("../../helpers/chat/responses");
+const fs = require("fs");
+const path = require("path");
+const { safeJsonParse } = require("../../http");
+
+const cacheFolder = path.resolve(
+ process.env.STORAGE_DIR
+ ? path.resolve(process.env.STORAGE_DIR, "models", "aimlapi")
+ : path.resolve(__dirname, `../../../storage/models/aimlapi`)
+);
+const embedCacheFolder = path.resolve(cacheFolder, "embeddings");
+
+class AimlApiLLM {
+ static BASE_URL = "https://api.aimlapi.com/v1";
+ static HEADERS = {
+ "HTTP-Referer": "https://anythingllm.com/",
+ "X-Title": "anything",
+ };
+ constructor(embedder = null, modelPreference = null) {
+ if (!process.env.AIML_LLM_API_KEY)
+ throw new Error("No AI/ML API key was set.");
+ const { OpenAI: OpenAIApi } = require("openai");
+ this.openai = new OpenAIApi({
+ apiKey: process.env.AIML_LLM_API_KEY,
+ baseURL: AimlApiLLM.BASE_URL,
+ defaultHeaders: AimlApiLLM.HEADERS,
+ });
+ this.model =
+ modelPreference || process.env.AIML_MODEL_PREF || "gpt-3.5-turbo";
+ this.limits = {
+ history: this.promptWindowLimit() * 0.15,
+ system: this.promptWindowLimit() * 0.15,
+ user: this.promptWindowLimit() * 0.7,
+ };
+
+ if (!fs.existsSync(cacheFolder))
+ fs.mkdirSync(cacheFolder, { recursive: true });
+ this.cacheModelPath = path.resolve(cacheFolder, "models.json");
+ this.cacheAtPath = path.resolve(cacheFolder, ".cached_at");
+
+ this.embedder = embedder ?? new NativeEmbedder();
+ this.defaultTemp = 0.7;
+ this.log(
+ `Initialized ${this.model} with context window ${this.promptWindowLimit()}`
+ );
+ }
+
+ log(text, ...args) {
+ console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args);
+ }
+
+ #appendContext(contextTexts = []) {
+ if (!contextTexts || !contextTexts.length) return "";
+ return (
+ "\nContext:\n" +
+ contextTexts
+ .map((text, i) => {
+ return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
+ })
+ .join("")
+ );
+ }
+
+ async #syncModels() {
+ if (fs.existsSync(this.cacheModelPath) && !this.#cacheIsStale())
+ return false;
+ this.log("Model cache is not present or stale. Fetching from AimlApi API.");
+ await fetchAimlApiModels();
+ return;
+ }
+
+ #cacheIsStale() {
+ const MAX_STALE = 6.048e8; // 1 Week in MS
+ if (!fs.existsSync(this.cacheAtPath)) return true;
+ const now = Number(new Date());
+ const timestampMs = Number(fs.readFileSync(this.cacheAtPath));
+ return now - timestampMs > MAX_STALE;
+ }
+
+ models() {
+ if (!fs.existsSync(this.cacheModelPath)) return {};
+ return safeJsonParse(
+ fs.readFileSync(this.cacheModelPath, { encoding: "utf-8" }),
+ {}
+ );
+ }
+
+ streamingEnabled() {
+ return "streamGetChatCompletion" in this;
+ }
+
+ static promptWindowLimit(modelName) {
+ const cacheModelPath = path.resolve(cacheFolder, "models.json");
+ const availableModels = fs.existsSync(cacheModelPath)
+ ? safeJsonParse(
+ fs.readFileSync(cacheModelPath, { encoding: "utf-8" }),
+ {}
+ )
+ : {};
+ return availableModels[modelName]?.maxLength || 4096;
+ }
+
+ promptWindowLimit() {
+ const availableModels = this.models();
+ return availableModels[this.model]?.maxLength || 4096;
+ }
+
+ async isValidChatCompletionModel(modelName = "") {
+ await this.#syncModels();
+ const availableModels = this.models();
+ return Object.prototype.hasOwnProperty.call(availableModels, modelName);
+ }
+
+ #generateContent({ userPrompt, attachments = [] }) {
+ if (!attachments.length) return userPrompt;
+
+ const content = [{ type: "text", text: userPrompt }];
+ for (let attachment of attachments) {
+ content.push({
+ type: "image_url",
+ image_url: { url: attachment.contentString, detail: "high" },
+ });
+ }
+ return content.flat();
+ }
+
+ constructPrompt({
+ systemPrompt = "",
+ contextTexts = [],
+ chatHistory = [],
+ userPrompt = "",
+ attachments = [],
+ }) {
+ const prompt = {
+ role: "system",
+ content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
+ };
+ return [
+ prompt,
+ ...formatChatHistory(chatHistory, this.#generateContent),
+ {
+ role: "user",
+ content: this.#generateContent({ userPrompt, attachments }),
+ },
+ ];
+ }
+
+ async getChatCompletion(messages = null, { temperature = 0.7 }) {
+ if (!(await this.isValidChatCompletionModel(this.model)))
+ throw new Error(
+ `AI/ML API chat: ${this.model} is not valid for chat completion!`
+ );
+
+ const result = await LLMPerformanceMonitor.measureAsyncFunction(
+ this.openai.chat.completions.create({
+ model: this.model,
+ messages,
+ temperature,
+ })
+ );
+
+ if (
+ !result.output.hasOwnProperty("choices") ||
+ result.output.choices.length === 0
+ )
+ return null;
+
+ return {
+ textResponse: result.output.choices[0].message.content,
+ metrics: {
+ prompt_tokens: result.output.usage.prompt_tokens || 0,
+ completion_tokens: result.output.usage.completion_tokens || 0,
+ total_tokens: result.output.usage.total_tokens || 0,
+ outputTps: result.output.usage.completion_tokens / result.duration,
+ duration: result.duration,
+ },
+ };
+ }
+
+ async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
+ if (!(await this.isValidChatCompletionModel(this.model)))
+ throw new Error(
+ `AI/ML API chat: ${this.model} is not valid for chat completion!`
+ );
+
+ const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
+ this.openai.chat.completions.create({
+ model: this.model,
+ stream: true,
+ messages,
+ temperature,
+ }),
+ messages,
+ false
+ );
+ return measuredStreamRequest;
+ }
+
+ handleStream(response, stream, responseProps) {
+ return handleDefaultStreamResponseV2(response, stream, responseProps);
+ }
+
+ async embedTextInput(textInput) {
+ return await this.embedder.embedTextInput(textInput);
+ }
+ async embedChunks(textChunks = []) {
+ return await this.embedder.embedChunks(textChunks);
+ }
+
+ async compressMessages(promptArgs = {}, rawHistory = []) {
+ const { messageArrayCompressor } = require("../../helpers/chat");
+ const messageArray = this.constructPrompt(promptArgs);
+ return await messageArrayCompressor(this, messageArray, rawHistory);
+ }
+}
+
+async function fetchAimlApiModels(providedApiKey = null) {
+ const apiKey = providedApiKey || process.env.AIML_LLM_API_KEY || null;
+ return await fetch(`${AimlApiLLM.BASE_URL}/models`, {
+ method: "GET",
+ headers: {
+ "Content-Type": "application/json",
+ ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
+ ...AimlApiLLM.HEADERS,
+ },
+ })
+ .then((res) => res.json())
+ .then(({ data = [] }) => {
+ const models = {};
+ data
+ .filter((m) => m.type === "chat-completion")
+ .forEach((model) => {
+ const developer =
+ model.info?.developer ||
+ model.provider ||
+ model.id?.split("/")[0] ||
+ "AimlApi";
+ models[model.id] = {
+ id: model.id,
+ name: model.name || model.id,
+ developer: developer.charAt(0).toUpperCase() + developer.slice(1),
+ maxLength: model.context_length || model.max_tokens || 4096,
+ };
+ });
+
+ if (!fs.existsSync(cacheFolder))
+ fs.mkdirSync(cacheFolder, { recursive: true });
+ fs.writeFileSync(
+ path.resolve(cacheFolder, "models.json"),
+ JSON.stringify(models),
+ { encoding: "utf-8" }
+ );
+ fs.writeFileSync(
+ path.resolve(cacheFolder, ".cached_at"),
+ String(Number(new Date())),
+ { encoding: "utf-8" }
+ );
+
+ return models;
+ })
+ .catch((e) => {
+ console.error(e);
+ return {};
+ });
+}
+
+async function fetchAimlApiEmbeddingModels(providedApiKey = null) {
+ const apiKey = providedApiKey || process.env.AIML_EMBEDDER_API_KEY || null;
+ return await fetch(`${AimlApiLLM.BASE_URL}/models`, {
+ method: "GET",
+ headers: {
+ "Content-Type": "application/json",
+ ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
+ ...AimlApiLLM.HEADERS,
+ },
+ })
+ .then((res) => res.json())
+ .then(({ data = [] }) => {
+ const models = {};
+ data
+ .filter((m) => m.type === "embedding")
+ .forEach((model) => {
+ const developer =
+ model.info?.developer ||
+ model.provider ||
+ model.id?.split("/")[0] ||
+ "AimlApi";
+ models[model.id] = {
+ id: model.id,
+ name: model.name || model.id,
+ developer: developer.charAt(0).toUpperCase() + developer.slice(1),
+ maxLength: model.context_length || model.max_tokens || 4096,
+ };
+ });
+
+ if (!fs.existsSync(embedCacheFolder))
+ fs.mkdirSync(embedCacheFolder, { recursive: true });
+ fs.writeFileSync(
+ path.resolve(embedCacheFolder, "models.json"),
+ JSON.stringify(models),
+ { encoding: "utf-8" }
+ );
+ fs.writeFileSync(
+ path.resolve(embedCacheFolder, ".cached_at"),
+ String(Number(new Date())),
+ { encoding: "utf-8" }
+ );
+
+ return models;
+ })
+ .catch((e) => {
+ console.error(e);
+ return {};
+ });
+}
+
+module.exports = {
+ AimlApiLLM,
+ fetchAimlApiModels,
+ fetchAimlApiEmbeddingModels,
+};
diff --git a/server/utils/EmbeddingEngines/aimlapi/index.js b/server/utils/EmbeddingEngines/aimlapi/index.js
new file mode 100644
index 0000000000..4fb3f73d47
--- /dev/null
+++ b/server/utils/EmbeddingEngines/aimlapi/index.js
@@ -0,0 +1,122 @@
+const { toChunks, maximumChunkLength } = require("../../helpers");
+const {
+ AimlApiLLM,
+ fetchAimlApiEmbeddingModels,
+} = require("../../AiProviders/aimlapi");
+const fs = require("fs");
+const path = require("path");
+const { safeJsonParse } = require("../../http");
+
+const cacheFolder = path.resolve(
+ process.env.STORAGE_DIR
+ ? path.resolve(process.env.STORAGE_DIR, "models", "aimlapi", "embeddings")
+ : path.resolve(__dirname, `../../../storage/models/aimlapi/embeddings`)
+);
+
+class AimlApiEmbedder {
+ constructor() {
+ if (!process.env.AIML_EMBEDDER_API_KEY)
+ throw new Error("No AI/ML API key was set.");
+ const { OpenAI: OpenAIApi } = require("openai");
+ this.openai = new OpenAIApi({
+ apiKey: process.env.AIML_EMBEDDER_API_KEY,
+ baseURL: AimlApiLLM.BASE_URL,
+ defaultHeaders: AimlApiLLM.HEADERS,
+ });
+ this.model = process.env.EMBEDDING_MODEL_PREF || "text-embedding-ada-002";
+ if (!fs.existsSync(cacheFolder))
+ fs.mkdirSync(cacheFolder, { recursive: true });
+ this.cacheModelPath = path.resolve(cacheFolder, "models.json");
+ this.cacheAtPath = path.resolve(cacheFolder, ".cached_at");
+ this.maxConcurrentChunks = 500;
+ this.embeddingMaxChunkLength = maximumChunkLength();
+ this.log(`Initialized ${this.model}`);
+ this.#syncModels().catch((e) =>
+ this.log(`Failed to sync models: ${e.message}`)
+ );
+ }
+
+ log(text, ...args) {
+ console.log(`\x1b[36m[AimlApiEmbedder]\x1b[0m ${text}`, ...args);
+ }
+
+ #cacheIsStale() {
+ const MAX_STALE = 6.048e8; // 1 Week in MS
+ if (!fs.existsSync(this.cacheAtPath)) return true;
+ const now = Number(new Date());
+ const timestampMs = Number(fs.readFileSync(this.cacheAtPath));
+ return now - timestampMs > MAX_STALE;
+ }
+
+ async #syncModels() {
+ if (fs.existsSync(this.cacheModelPath) && !this.#cacheIsStale())
+ return false;
+ this.log("Model cache is not present or stale. Fetching from AimlApi API.");
+ await fetchAimlApiEmbeddingModels();
+ return;
+ }
+
+ models() {
+ if (!fs.existsSync(this.cacheModelPath)) return {};
+ return safeJsonParse(
+ fs.readFileSync(this.cacheModelPath, { encoding: "utf-8" }),
+ {}
+ );
+ }
+
+ async isValidEmbeddingModel(modelName = "") {
+ await this.#syncModels();
+ const availableModels = this.models();
+ return Object.prototype.hasOwnProperty.call(availableModels, modelName);
+ }
+
+ async embedTextInput(textInput) {
+ const result = await this.embedChunks(
+ Array.isArray(textInput) ? textInput : [textInput]
+ );
+ return result?.[0] || [];
+ }
+
+ async embedChunks(textChunks = []) {
+ this.log(`Embedding ${textChunks.length} chunks...`);
+ const embeddingRequests = [];
+ for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
+ embeddingRequests.push(
+ new Promise((resolve) => {
+ this.openai.embeddings
+ .create({ model: this.model, input: chunk })
+ .then((result) => resolve({ data: result?.data, error: null }))
+ .catch((e) => {
+ e.type =
+ e?.response?.data?.error?.code ||
+ e?.response?.status ||
+ "failed_to_embed";
+ e.message = e?.response?.data?.error?.message || e.message;
+ resolve({ data: [], error: e });
+ });
+ })
+ );
+ }
+
+ const { data = [], error = null } = await Promise.all(
+ embeddingRequests
+ ).then((results) => {
+ const errors = results
+ .filter((res) => !!res.error)
+ .map((res) => res.error);
+ if (errors.length > 0) {
+ const unique = new Set();
+ errors.forEach((err) => unique.add(`[${err.type}]: ${err.message}`));
+ return { data: [], error: Array.from(unique).join(", ") };
+ }
+ return { data: results.map((r) => r.data || []).flat(), error: null };
+ });
+
+ if (error) throw new Error(`AimlApi Failed to embed: ${error}`);
+ return data.length > 0 && data.every((d) => d.hasOwnProperty("embedding"))
+ ? data.map((d) => d.embedding)
+ : null;
+ }
+}
+
+module.exports = { AimlApiEmbedder };
diff --git a/server/utils/agents/aibitat/index.js b/server/utils/agents/aibitat/index.js
index 6e069defd3..50e873743e 100644
--- a/server/utils/agents/aibitat/index.js
+++ b/server/utils/agents/aibitat/index.js
@@ -826,6 +826,8 @@ ${this.getHistory({ to: route.to })
return new Providers.PPIOProvider({ model: config.model });
case "gemini":
return new Providers.GeminiProvider({ model: config.model });
+ case "aimlapi":
+ return new Providers.AimlApiProvider({ model: config.model });
case "dpais":
return new Providers.DellProAiStudioProvider({ model: config.model });
default:
diff --git a/server/utils/agents/aibitat/providers/aimlapi.js b/server/utils/agents/aibitat/providers/aimlapi.js
new file mode 100644
index 0000000000..8aeee5ca2b
--- /dev/null
+++ b/server/utils/agents/aibitat/providers/aimlapi.js
@@ -0,0 +1,90 @@
+const OpenAI = require("openai");
+const { AimlApiLLM } = require("../../../AiProviders/aimlapi");
+const Provider = require("./ai-provider.js");
+const { RetryError } = require("../error.js");
+
+
+class AimlApiProvider extends Provider {
+ model;
+
+ constructor(config = {}) {
+ const { model = "gpt-3.5-turbo" } = config;
+ const client = new OpenAI({
+ baseURL: AimlApiLLM.BASE_URL,
+ apiKey: process.env.AIML_LLM_API_KEY ?? null,
+ maxRetries: 3,
+ defaultHeaders: AimlApiLLM.HEADERS,
+ });
+ super(client);
+
+ this.model = model;
+ this.verbose = true;
+ }
+
+ async complete(messages, functions = []) {
+ try {
+ const response = await this.client.chat.completions.create({
+ model: this.model,
+ messages,
+ ...(Array.isArray(functions) && functions.length > 0
+ ? { functions }
+ : {}),
+ });
+
+ const completion = response.choices[0].message;
+ const cost = this.getCost(response.usage);
+
+ if (completion.function_call) {
+ let functionArgs = {};
+ try {
+ functionArgs = JSON.parse(completion.function_call.arguments);
+ } catch (error) {
+ return this.complete(
+ [
+ ...messages,
+ {
+ role: "function",
+ name: completion.function_call.name,
+ function_call: completion.function_call,
+ content: error?.message,
+ },
+ ],
+ functions
+ );
+ }
+
+ return {
+ result: null,
+ functionCall: {
+ name: completion.function_call.name,
+ arguments: functionArgs,
+ },
+ cost,
+ };
+ }
+
+ return {
+ result: completion.content,
+ cost,
+ };
+ } catch (error) {
+ if (error instanceof OpenAI.AuthenticationError) throw error;
+
+ if (
+ error instanceof OpenAI.RateLimitError ||
+ error instanceof OpenAI.InternalServerError ||
+ error instanceof OpenAI.APIError
+ ) {
+ throw new RetryError(error.message);
+ }
+
+ throw error;
+ }
+ }
+
+ getCost(_usage) {
+ return 0;
+ }
+}
+
+module.exports = AimlApiProvider;
diff --git a/server/utils/agents/aibitat/providers/index.js b/server/utils/agents/aibitat/providers/index.js
index d8c174862e..8ea5814e9c 100644
--- a/server/utils/agents/aibitat/providers/index.js
+++ b/server/utils/agents/aibitat/providers/index.js
@@ -23,6 +23,7 @@ const NvidiaNimProvider = require("./nvidiaNim.js");
const PPIOProvider = require("./ppio.js");
const GeminiProvider = require("./gemini.js");
const DellProAiStudioProvider = require("./dellProAiStudio.js");
+const AimlApiProvider = require("./aimlapi.js");
module.exports = {
OpenAIProvider,
@@ -50,4 +51,5 @@ module.exports = {
PPIOProvider,
GeminiProvider,
DellProAiStudioProvider,
+ AimlApiProvider,
};
diff --git a/server/utils/agents/index.js b/server/utils/agents/index.js
index 915e5a59be..6884a93ef5 100644
--- a/server/utils/agents/index.js
+++ b/server/utils/agents/index.js
@@ -185,6 +185,10 @@ class AgentHandler {
if (!process.env.PPIO_API_KEY)
throw new Error("PPIO API Key must be provided to use agents.");
break;
+ case "aimlapi":
+ if (!process.env.AIML_LLM_API_KEY)
+ throw new Error("AI/ML API key must be provided to use agents.");
+ break;
case "gemini":
if (!process.env.GEMINI_API_KEY)
throw new Error("Gemini API key must be provided to use agents.");
@@ -266,6 +270,8 @@ class AgentHandler {
return process.env.PPIO_MODEL_PREF ?? "qwen/qwen2.5-32b-instruct";
case "gemini":
return process.env.GEMINI_LLM_MODEL_PREF ?? "gemini-2.0-flash-lite";
+ case "aimlapi":
+ return process.env.AIML_MODEL_PREF ?? "gpt-3.5-turbo";
case "dpais":
return process.env.DPAIS_LLM_MODEL_PREF;
default:
diff --git a/server/utils/helpers/customModels.js b/server/utils/helpers/customModels.js
index cff97ff888..01b54b9e37 100644
--- a/server/utils/helpers/customModels.js
+++ b/server/utils/helpers/customModels.js
@@ -9,6 +9,10 @@ const { parseLMStudioBasePath } = require("../AiProviders/lmStudio");
const { parseNvidiaNimBasePath } = require("../AiProviders/nvidiaNim");
const { fetchPPIOModels } = require("../AiProviders/ppio");
const { GeminiLLM } = require("../AiProviders/gemini");
+const {
+ fetchAimlApiModels,
+ fetchAimlApiEmbeddingModels,
+} = require("../AiProviders/aimlapi");
const SUPPORT_CUSTOM_MODELS = [
"openai",
@@ -33,6 +37,8 @@ const SUPPORT_CUSTOM_MODELS = [
"gemini",
"ppio",
"dpais",
+ "aimlapi",
+ "aimlapi-embed",
];
async function getCustomModels(provider = "", apiKey = null, basePath = null) {
@@ -84,6 +90,10 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) {
return await getPPIOModels(apiKey);
case "dpais":
return await getDellProAiStudioModels(basePath);
+ case "aimlapi":
+ return await getAimlApiModels(apiKey);
+ case "aimlapi-embed":
+ return await getAimlApiEmbeddingModels(apiKey);
default:
return { models: [], error: "Invalid provider for custom models" };
}
@@ -675,6 +685,44 @@ async function getDellProAiStudioModels(basePath = null) {
}
}
+async function getAimlApiModels(apiKey = null) {
+ const knownModels = await fetchAimlApiModels(apiKey);
+ if (!Object.keys(knownModels).length === 0)
+ return { models: [], error: null };
+
+ if (Object.keys(knownModels).length > 0 && !!apiKey)
+ process.env.AIML_LLM_API_KEY = apiKey;
+
+ const models = Object.values(knownModels).map((model) => {
+ return {
+ id: model.id,
+ organization: model.developer,
+ name: model.name,
+ };
+ });
+ return { models, error: null };
+}
+
+async function getAimlApiEmbeddingModels(apiKey = null) {
+ const knownModels = await fetchAimlApiEmbeddingModels(apiKey);
+ if (!Object.keys(knownModels).length === 0)
+ return { models: [], error: null };
+
+ if (Object.keys(knownModels).length > 0 && !!apiKey)
+ process.env.AIML_EMBEDDER_API_KEY = apiKey;
+
+ const models = Object.values(knownModels).map((model) => {
+ return {
+ id: model.id,
+ organization: model.developer,
+ name: model.name,
+ };
+ });
+ return { models, error: null };
+}
+
module.exports = {
getCustomModels,
+ getAimlApiModels,
+ getAimlApiEmbeddingModels,
};
diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js
index 2017c618fa..18daba32bc 100644
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@@ -203,6 +203,9 @@ function getLLMProvider({ provider = null, model = null } = {}) {
case "ppio":
const { PPIOLLM } = require("../AiProviders/ppio");
return new PPIOLLM(embedder, model);
+ case "aimlapi":
+ const { AimlApiLLM } = require("../AiProviders/aimlapi");
+ return new AimlApiLLM(embedder, model);
case "dpais":
const { DellProAiStudioLLM } = require("../AiProviders/dellProAiStudio");
return new DellProAiStudioLLM(embedder, model);
@@ -260,6 +263,9 @@ function getEmbeddingEngineSelection() {
case "gemini":
const { GeminiEmbedder } = require("../EmbeddingEngines/gemini");
return new GeminiEmbedder();
+ case "aimlapi":
+ const { AimlApiEmbedder } = require("../EmbeddingEngines/aimlapi");
+ return new AimlApiEmbedder();
default:
return new NativeEmbedder();
}
@@ -350,6 +356,9 @@ function getLLMProviderClass({ provider = null } = {}) {
case "ppio":
const { PPIOLLM } = require("../AiProviders/ppio");
return PPIOLLM;
+ case "aimlapi":
+ const { AimlApiLLM } = require("../AiProviders/aimlapi");
+ return AimlApiLLM;
case "dpais":
const { DellProAiStudioLLM } = require("../AiProviders/dellProAiStudio");
return DellProAiStudioLLM;
@@ -417,6 +426,8 @@ function getBaseLLMProviderModel({ provider = null } = {}) {
return process.env.NVIDIA_NIM_LLM_MODEL_PREF;
case "ppio":
return process.env.PPIO_API_KEY;
+ case "aimlapi":
+ return process.env.AIML_MODEL_PREF;
case "dpais":
return process.env.DPAIS_LLM_MODEL_PREF;
default:
diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js
index 87670830d9..b34cb3f9c4 100644
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@@ -203,6 +203,16 @@ const KEY_MAPPING = {
checks: [],
},
+ // AI/ML API Options
+ AimlLlmApiKey: {
+ envKey: "AIML_LLM_API_KEY",
+ checks: [isNotEmpty],
+ },
+ AimlModelPref: {
+ envKey: "AIML_MODEL_PREF",
+ checks: [isNotEmpty],
+ },
+
// Generic OpenAI InferenceSettings
GenericOpenAiBasePath: {
envKey: "GENERIC_OPEN_AI_BASE_PATH",
@@ -311,6 +321,11 @@ const KEY_MAPPING = {
checks: [nonZero],
},
+ AimlEmbedderApiKey: {
+ envKey: "AIML_EMBEDDER_API_KEY",
+ checks: [isNotEmpty],
+ },
+
// Vector Database Selection Settings
VectorDB: {
envKey: "VECTOR_DB",
@@ -784,6 +799,7 @@ function supportedLLM(input = "") {
"nvidia-nim",
"ppio",
"dpais",
+ "aimlapi",
].includes(input);
return validSelection ? null : `${input} is not a valid LLM provider.`;
}
@@ -821,6 +837,7 @@ function supportedEmbeddingModel(input = "") {
"litellm",
"generic-openai",
"mistral",
+ "aimlapi",
];
return supported.includes(input)
? null