diff --git a/README.md b/README.md index a8199403b2..6187d570aa 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,7 @@ AnythingLLM divides your documents into objects called `workspaces`. A Workspace - [LocalAI (all models)](https://localai.io/) - [Together AI (chat models)](https://www.together.ai/) - [Fireworks AI (chat models)](https://fireworks.ai/) +- [AI/ML API (chat models)](https://aimlapi.com/models/?utm_source=anythingllm&utm_medium=github&utm_campaign=integration) - [Perplexity (chat models)](https://www.perplexity.ai/) - [OpenRouter (chat models)](https://openrouter.ai/) - [DeepSeek (chat models)](https://deepseek.com/) diff --git a/docker/.env.example b/docker/.env.example index d0b6dbeb11..b8e6676959 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -129,6 +129,10 @@ GID='1000' # DEEPSEEK_API_KEY='your-deepseek-api-key-here' # DEEPSEEK_MODEL_PREF='deepseek-chat' +# LLM_PROVIDER='aimlapi' +# AIML_LLM_API_KEY='your-aimlapi-key' +# AIML_MODEL_PREF='gpt-3.5-turbo' + # LLM_PROVIDER='ppio' # PPIO_API_KEY='your-ppio-api-key-here' # PPIO_MODEL_PREF=deepseek/deepseek-v3/community @@ -182,6 +186,10 @@ GID='1000' # GENERIC_OPEN_AI_EMBEDDING_API_KEY='sk-123abc' # GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS=500 +# EMBEDDING_ENGINE='aimlapi' +# AIML_EMBEDDER_API_KEY='your-aimlapi-key' +# EMBEDDING_MODEL_PREF='text-embedding-ada-002' + # EMBEDDING_ENGINE='gemini' # GEMINI_EMBEDDING_API_KEY= # EMBEDDING_MODEL_PREF='text-embedding-004' @@ -339,4 +347,4 @@ GID='1000' # Specify the target languages for when using OCR to parse images and PDFs. # This is a comma separated list of language codes as a string. Unsupported languages will be ignored. # Default is English. See https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html for a list of valid language codes. -# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol \ No newline at end of file +# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol diff --git a/frontend/src/components/EmbeddingSelection/AimlApiOptions/index.jsx b/frontend/src/components/EmbeddingSelection/AimlApiOptions/index.jsx new file mode 100644 index 0000000000..18ed62be56 --- /dev/null +++ b/frontend/src/components/EmbeddingSelection/AimlApiOptions/index.jsx @@ -0,0 +1,110 @@ +import { useState, useEffect } from "react"; +import System from "@/models/system"; + +export default function AimlApiOptions({ settings }) { + const [inputValue, setInputValue] = useState(settings?.AimlEmbedderApiKey); + const [apiKey, setApiKey] = useState(settings?.AimlEmbedderApiKey); + + return ( +
+
+
+ + setInputValue(e.target.value)} + onBlur={() => setApiKey(inputValue)} + /> +
+ +
+
+ ); +} + +function AimlApiEmbeddingModelSelection({ apiKey, settings }) { + const [groupedModels, setGroupedModels] = useState({}); + const [loading, setLoading] = useState(true); + + useEffect(() => { + async function findModels() { + if (!apiKey) { + setGroupedModels({}); + setLoading(true); + return; + } + setLoading(true); + const { models } = await System.customModels( + "aimlapi-embed", + typeof apiKey === "boolean" ? null : apiKey + ); + if (models?.length > 0) { + const byDev = models.reduce((acc, model) => { + acc[model.organization] = acc[model.organization] || []; + acc[model.organization].push(model); + return acc; + }, {}); + setGroupedModels(byDev); + } + setLoading(false); + } + findModels(); + }, [apiKey]); + + if (loading || Object.keys(groupedModels).length === 0) { + return ( +
+ + +
+ ); + } + + return ( +
+ + +
+ ); +} diff --git a/frontend/src/components/LLMSelection/AimlApiOptions/index.jsx b/frontend/src/components/LLMSelection/AimlApiOptions/index.jsx new file mode 100644 index 0000000000..86c400c91f --- /dev/null +++ b/frontend/src/components/LLMSelection/AimlApiOptions/index.jsx @@ -0,0 +1,111 @@ +import { useState, useEffect } from "react"; +import System from "@/models/system"; + +export default function AimlApiOptions({ settings }) { + const [inputValue, setInputValue] = useState(settings?.AimlLlmApiKey); + const [apiKey, setApiKey] = useState(settings?.AimlLlmApiKey); + + return ( +
+
+ + setInputValue(e.target.value)} + onBlur={() => setApiKey(inputValue)} + /> +
+ {!settings?.credentialsOnly && ( + + )} +
+ ); +} + +function AimlApiModelSelection({ apiKey, settings }) { + const [groupedModels, setGroupedModels] = useState({}); + const [loading, setLoading] = useState(true); + + useEffect(() => { + async function findCustomModels() { + if (!apiKey) { + setGroupedModels({}); + setLoading(true); + return; + } + + setLoading(true); + const { models } = await System.customModels( + "aimlapi", + typeof apiKey === "boolean" ? null : apiKey + ); + if (models?.length > 0) { + const byDev = models.reduce((acc, model) => { + acc[model.organization] = acc[model.organization] || []; + acc[model.organization].push(model); + return acc; + }, {}); + setGroupedModels(byDev); + } + setLoading(false); + } + findCustomModels(); + }, [apiKey]); + + if (loading || Object.keys(groupedModels).length === 0) { + return ( +
+ + +
+ ); + } + + return ( +
+ + +
+ ); +} diff --git a/frontend/src/hooks/useGetProvidersModels.js b/frontend/src/hooks/useGetProvidersModels.js index 82ef427cfd..9cb964f1e7 100644 --- a/frontend/src/hooks/useGetProvidersModels.js +++ b/frontend/src/hooks/useGetProvidersModels.js @@ -52,6 +52,7 @@ const groupedProviders = [ "novita", "openrouter", "ppio", + "aimlapi", ]; export default function useGetProviderModels(provider = null) { const [defaultModels, setDefaultModels] = useState([]); diff --git a/frontend/src/media/llmprovider/aimlapi.png b/frontend/src/media/llmprovider/aimlapi.png new file mode 100644 index 0000000000..b20cfbeff7 Binary files /dev/null and b/frontend/src/media/llmprovider/aimlapi.png differ diff --git a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx index de27acb80a..33a9ccc1dc 100644 --- a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx +++ b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx @@ -37,6 +37,8 @@ import ModalWrapper from "@/components/ModalWrapper"; import CTAButton from "@/components/lib/CTAButton"; import { useTranslation } from "react-i18next"; import MistralAiOptions from "@/components/EmbeddingSelection/MistralAiOptions"; +import AimlApiLogo from "@/media/llmprovider/aimlapi.png"; +import AimlApiOptions from "@/components/EmbeddingSelection/AimlApiOptions"; const EMBEDDERS = [ { @@ -118,6 +120,13 @@ const EMBEDDERS = [ options: (settings) => , description: "Run powerful embedding models from Mistral AI.", }, + { + name: "AI/ML API", + value: "aimlapi", + logo: AimlApiLogo, + options: (settings) => , + description: "Use embedding models hosted on AI/ML API.", + }, { name: "Generic OpenAI", value: "generic-openai", diff --git a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx index f9aceec189..7e0013f89f 100644 --- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx +++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx @@ -32,6 +32,7 @@ import XAILogo from "@/media/llmprovider/xai.png"; import NvidiaNimLogo from "@/media/llmprovider/nvidia-nim.png"; import PPIOLogo from "@/media/llmprovider/ppio.png"; import DellProAiStudioLogo from "@/media/llmprovider/dpais.png"; +import AimlApiLogo from "@/media/llmprovider/aimlapi.png"; import PreLoader from "@/components/Preloader"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; @@ -61,6 +62,7 @@ import XAILLMOptions from "@/components/LLMSelection/XAiLLMOptions"; import NvidiaNimOptions from "@/components/LLMSelection/NvidiaNimOptions"; import PPIOLLMOptions from "@/components/LLMSelection/PPIOLLMOptions"; import DellProAiStudioOptions from "@/components/LLMSelection/DPAISOptions"; +import AimlApiOptions from "@/components/LLMSelection/AimlApiOptions"; import LLMItem from "@/components/LLMSelection/LLMItem"; import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react"; @@ -263,6 +265,14 @@ export const AVAILABLE_LLM_PROVIDERS = [ description: "Run DeepSeek's powerful LLMs.", requiredConfig: ["DeepSeekApiKey"], }, + { + name: "AI/ML API", + value: "aimlapi", + logo: AimlApiLogo, + options: (settings) => , + description: "Access 300+ AI models with enterprise uptime.", + requiredConfig: ["AimlLlmApiKey"], + }, { name: "PPIO", value: "ppio", diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx index bc48209da9..4b6e819dd8 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx @@ -38,6 +38,7 @@ import VoyageAiLogo from "@/media/embeddingprovider/voyageai.png"; import PPIOLogo from "@/media/llmprovider/ppio.png"; import PGVectorLogo from "@/media/vectordbs/pgvector.png"; import DPAISLogo from "@/media/llmprovider/dpais.png"; +import AimlApiLogo from "@/media/llmprovider/aimlapi.png"; import React, { useState, useEffect } from "react"; import paths from "@/utils/paths"; import { useNavigate } from "react-router-dom"; @@ -235,6 +236,14 @@ export const LLM_SELECTION_PRIVACY = { ], logo: PPIOLogo, }, + aimlapi: { + name: "AI/ML API", + description: [ + "Your chats will not be used for training", + "Your prompts and document text used in response creation are visible to AI/ML API", + ], + logo: AimlApiLogo, + }, dpais: { name: "Dell Pro AI Studio", description: [ @@ -379,6 +388,14 @@ export const EMBEDDING_ENGINE_PRIVACY = { ], logo: MistralLogo, }, + aimlapi: { + name: "AI/ML API", + description: [ + "Your document text is visible to AI/ML API", + "Your documents are not used for training", + ], + logo: AimlApiLogo, + }, litellm: { name: "LiteLLM", description: [ diff --git a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx index 02d97893a7..4b8546d4f2 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx @@ -27,6 +27,7 @@ import NvidiaNimLogo from "@/media/llmprovider/nvidia-nim.png"; import CohereLogo from "@/media/llmprovider/cohere.png"; import PPIOLogo from "@/media/llmprovider/ppio.png"; import DellProAiStudioLogo from "@/media/llmprovider/dpais.png"; +import AimlApiLogo from "@/media/llmprovider/aimlapi.png"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions"; @@ -55,6 +56,7 @@ import XAILLMOptions from "@/components/LLMSelection/XAiLLMOptions"; import NvidiaNimOptions from "@/components/LLMSelection/NvidiaNimOptions"; import PPIOLLMOptions from "@/components/LLMSelection/PPIOLLMOptions"; import DellProAiStudioOptions from "@/components/LLMSelection/DPAISOptions"; +import AimlApiOptions from "@/components/LLMSelection/AimlApiOptions"; import LLMItem from "@/components/LLMSelection/LLMItem"; import System from "@/models/system"; @@ -226,6 +228,13 @@ const LLMS = [ options: (settings) => , description: "Run DeepSeek's powerful LLMs.", }, + { + name: "AI/ML API", + value: "aimlapi", + logo: AimlApiLogo, + options: (settings) => , + description: "Access 300+ AI models with enterprise uptime.", + }, { name: "PPIO", value: "ppio", diff --git a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx index 6baae1ddee..fabf8449df 100644 --- a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx +++ b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx @@ -31,6 +31,7 @@ const ENABLED_PROVIDERS = [ "xai", "nvidia-nim", "gemini", + "aimlapi", // TODO: More agent support. // "cohere", // Has tool calling and will need to build explicit support // "huggingface" // Can be done but already has issues with no-chat templated. Needs to be tested. diff --git a/server/.env.example b/server/.env.example index 12fa5ec226..19d4756076 100644 --- a/server/.env.example +++ b/server/.env.example @@ -57,6 +57,10 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long. # DEEPSEEK_API_KEY=YOUR_API_KEY # DEEPSEEK_MODEL_PREF='deepseek-chat' +# LLM_PROVIDER='aimlapi' +# AIML_LLM_API_KEY='your-aimlapi-key' +# AIML_MODEL_PREF='gpt-3.5-turbo' + # LLM_PROVIDER='openrouter' # OPENROUTER_API_KEY='my-openrouter-key' # OPENROUTER_MODEL_PREF='openrouter/auto' @@ -180,6 +184,10 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long. # GENERIC_OPEN_AI_EMBEDDING_API_KEY='sk-123abc' # GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS=500 +# EMBEDDING_ENGINE='aimlapi' +# AIML_EMBEDDER_API_KEY='your-aimlapi-key' +# EMBEDDING_MODEL_PREF='text-embedding-ada-002' + # EMBEDDING_ENGINE='gemini' # GEMINI_EMBEDDING_API_KEY= # EMBEDDING_MODEL_PREF='text-embedding-004' @@ -336,4 +344,4 @@ TTS_PROVIDER="native" # Specify the target languages for when using OCR to parse images and PDFs. # This is a comma separated list of language codes as a string. Unsupported languages will be ignored. # Default is English. See https://tesseract-ocr.github.io/tessdoc/Data-Files-in-different-versions.html for a list of valid language codes. -# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol \ No newline at end of file +# TARGET_OCR_LANG=eng,deu,ita,spa,fra,por,rus,nld,tur,hun,pol,ita,spa,fra,por,rus,nld,tur,hun,pol diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index 8de54f6c9c..73ac6674d8 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -221,6 +221,7 @@ const SystemSettings = { GenericOpenAiEmbeddingMaxConcurrentChunks: process.env.GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS || 500, GeminiEmbeddingApiKey: !!process.env.GEMINI_EMBEDDING_API_KEY, + AimlEmbedderApiKey: !!process.env.AIML_EMBEDDER_API_KEY, // -------------------------------------------------------- // VectorDB Provider Selection Settings & Configs @@ -588,6 +589,10 @@ const SystemSettings = { PPIOApiKey: !!process.env.PPIO_API_KEY, PPIOModelPref: process.env.PPIO_MODEL_PREF, + // AI/ML API Keys + AimlLlmApiKey: !!process.env.AIML_LLM_API_KEY, + AimlModelPref: process.env.AIML_MODEL_PREF, + // Dell Pro AI Studio Keys DellProAiStudioBasePath: process.env.DPAIS_LLM_BASE_PATH, DellProAiStudioModelPref: process.env.DPAIS_LLM_MODEL_PREF, diff --git a/server/storage/models/.gitignore b/server/storage/models/.gitignore index 5e83df7bcc..b3c9a7a50d 100644 --- a/server/storage/models/.gitignore +++ b/server/storage/models/.gitignore @@ -9,4 +9,5 @@ gemini togetherAi tesseract ppio +aimlapi context-windows/* \ No newline at end of file diff --git a/server/utils/AiProviders/aimlapi/index.js b/server/utils/AiProviders/aimlapi/index.js new file mode 100644 index 0000000000..29a4e98a40 --- /dev/null +++ b/server/utils/AiProviders/aimlapi/index.js @@ -0,0 +1,328 @@ +const { NativeEmbedder } = require("../../EmbeddingEngines/native"); +const { + LLMPerformanceMonitor, +} = require("../../helpers/chat/LLMPerformanceMonitor"); +const { + handleDefaultStreamResponseV2, + formatChatHistory, +} = require("../../helpers/chat/responses"); +const fs = require("fs"); +const path = require("path"); +const { safeJsonParse } = require("../../http"); + +const cacheFolder = path.resolve( + process.env.STORAGE_DIR + ? path.resolve(process.env.STORAGE_DIR, "models", "aimlapi") + : path.resolve(__dirname, `../../../storage/models/aimlapi`) +); +const embedCacheFolder = path.resolve(cacheFolder, "embeddings"); + +class AimlApiLLM { + static BASE_URL = "https://api.aimlapi.com/v1"; + static HEADERS = { + "HTTP-Referer": "https://anythingllm.com/", + "X-Title": "anything", + }; + constructor(embedder = null, modelPreference = null) { + if (!process.env.AIML_LLM_API_KEY) + throw new Error("No AI/ML API key was set."); + const { OpenAI: OpenAIApi } = require("openai"); + this.openai = new OpenAIApi({ + apiKey: process.env.AIML_LLM_API_KEY, + baseURL: AimlApiLLM.BASE_URL, + defaultHeaders: AimlApiLLM.HEADERS, + }); + this.model = + modelPreference || process.env.AIML_MODEL_PREF || "gpt-3.5-turbo"; + this.limits = { + history: this.promptWindowLimit() * 0.15, + system: this.promptWindowLimit() * 0.15, + user: this.promptWindowLimit() * 0.7, + }; + + if (!fs.existsSync(cacheFolder)) + fs.mkdirSync(cacheFolder, { recursive: true }); + this.cacheModelPath = path.resolve(cacheFolder, "models.json"); + this.cacheAtPath = path.resolve(cacheFolder, ".cached_at"); + + this.embedder = embedder ?? new NativeEmbedder(); + this.defaultTemp = 0.7; + this.log( + `Initialized ${this.model} with context window ${this.promptWindowLimit()}` + ); + } + + log(text, ...args) { + console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args); + } + + #appendContext(contextTexts = []) { + if (!contextTexts || !contextTexts.length) return ""; + return ( + "\nContext:\n" + + contextTexts + .map((text, i) => { + return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; + }) + .join("") + ); + } + + async #syncModels() { + if (fs.existsSync(this.cacheModelPath) && !this.#cacheIsStale()) + return false; + this.log("Model cache is not present or stale. Fetching from AimlApi API."); + await fetchAimlApiModels(); + return; + } + + #cacheIsStale() { + const MAX_STALE = 6.048e8; // 1 Week in MS + if (!fs.existsSync(this.cacheAtPath)) return true; + const now = Number(new Date()); + const timestampMs = Number(fs.readFileSync(this.cacheAtPath)); + return now - timestampMs > MAX_STALE; + } + + models() { + if (!fs.existsSync(this.cacheModelPath)) return {}; + return safeJsonParse( + fs.readFileSync(this.cacheModelPath, { encoding: "utf-8" }), + {} + ); + } + + streamingEnabled() { + return "streamGetChatCompletion" in this; + } + + static promptWindowLimit(modelName) { + const cacheModelPath = path.resolve(cacheFolder, "models.json"); + const availableModels = fs.existsSync(cacheModelPath) + ? safeJsonParse( + fs.readFileSync(cacheModelPath, { encoding: "utf-8" }), + {} + ) + : {}; + return availableModels[modelName]?.maxLength || 4096; + } + + promptWindowLimit() { + const availableModels = this.models(); + return availableModels[this.model]?.maxLength || 4096; + } + + async isValidChatCompletionModel(modelName = "") { + await this.#syncModels(); + const availableModels = this.models(); + return Object.prototype.hasOwnProperty.call(availableModels, modelName); + } + + #generateContent({ userPrompt, attachments = [] }) { + if (!attachments.length) return userPrompt; + + const content = [{ type: "text", text: userPrompt }]; + for (let attachment of attachments) { + content.push({ + type: "image_url", + image_url: { url: attachment.contentString, detail: "high" }, + }); + } + return content.flat(); + } + + constructPrompt({ + systemPrompt = "", + contextTexts = [], + chatHistory = [], + userPrompt = "", + attachments = [], + }) { + const prompt = { + role: "system", + content: `${systemPrompt}${this.#appendContext(contextTexts)}`, + }; + return [ + prompt, + ...formatChatHistory(chatHistory, this.#generateContent), + { + role: "user", + content: this.#generateContent({ userPrompt, attachments }), + }, + ]; + } + + async getChatCompletion(messages = null, { temperature = 0.7 }) { + if (!(await this.isValidChatCompletionModel(this.model))) + throw new Error( + `AI/ML API chat: ${this.model} is not valid for chat completion!` + ); + + const result = await LLMPerformanceMonitor.measureAsyncFunction( + this.openai.chat.completions.create({ + model: this.model, + messages, + temperature, + }) + ); + + if ( + !result.output.hasOwnProperty("choices") || + result.output.choices.length === 0 + ) + return null; + + return { + textResponse: result.output.choices[0].message.content, + metrics: { + prompt_tokens: result.output.usage.prompt_tokens || 0, + completion_tokens: result.output.usage.completion_tokens || 0, + total_tokens: result.output.usage.total_tokens || 0, + outputTps: result.output.usage.completion_tokens / result.duration, + duration: result.duration, + }, + }; + } + + async streamGetChatCompletion(messages = null, { temperature = 0.7 }) { + if (!(await this.isValidChatCompletionModel(this.model))) + throw new Error( + `AI/ML API chat: ${this.model} is not valid for chat completion!` + ); + + const measuredStreamRequest = await LLMPerformanceMonitor.measureStream( + this.openai.chat.completions.create({ + model: this.model, + stream: true, + messages, + temperature, + }), + messages, + false + ); + return measuredStreamRequest; + } + + handleStream(response, stream, responseProps) { + return handleDefaultStreamResponseV2(response, stream, responseProps); + } + + async embedTextInput(textInput) { + return await this.embedder.embedTextInput(textInput); + } + async embedChunks(textChunks = []) { + return await this.embedder.embedChunks(textChunks); + } + + async compressMessages(promptArgs = {}, rawHistory = []) { + const { messageArrayCompressor } = require("../../helpers/chat"); + const messageArray = this.constructPrompt(promptArgs); + return await messageArrayCompressor(this, messageArray, rawHistory); + } +} + +async function fetchAimlApiModels(providedApiKey = null) { + const apiKey = providedApiKey || process.env.AIML_LLM_API_KEY || null; + return await fetch(`${AimlApiLLM.BASE_URL}/models`, { + method: "GET", + headers: { + "Content-Type": "application/json", + ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}), + ...AimlApiLLM.HEADERS, + }, + }) + .then((res) => res.json()) + .then(({ data = [] }) => { + const models = {}; + data + .filter((m) => m.type === "chat-completion") + .forEach((model) => { + const developer = + model.info?.developer || + model.provider || + model.id?.split("/")[0] || + "AimlApi"; + models[model.id] = { + id: model.id, + name: model.name || model.id, + developer: developer.charAt(0).toUpperCase() + developer.slice(1), + maxLength: model.context_length || model.max_tokens || 4096, + }; + }); + + if (!fs.existsSync(cacheFolder)) + fs.mkdirSync(cacheFolder, { recursive: true }); + fs.writeFileSync( + path.resolve(cacheFolder, "models.json"), + JSON.stringify(models), + { encoding: "utf-8" } + ); + fs.writeFileSync( + path.resolve(cacheFolder, ".cached_at"), + String(Number(new Date())), + { encoding: "utf-8" } + ); + + return models; + }) + .catch((e) => { + console.error(e); + return {}; + }); +} + +async function fetchAimlApiEmbeddingModels(providedApiKey = null) { + const apiKey = providedApiKey || process.env.AIML_EMBEDDER_API_KEY || null; + return await fetch(`${AimlApiLLM.BASE_URL}/models`, { + method: "GET", + headers: { + "Content-Type": "application/json", + ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}), + ...AimlApiLLM.HEADERS, + }, + }) + .then((res) => res.json()) + .then(({ data = [] }) => { + const models = {}; + data + .filter((m) => m.type === "embedding") + .forEach((model) => { + const developer = + model.info?.developer || + model.provider || + model.id?.split("/")[0] || + "AimlApi"; + models[model.id] = { + id: model.id, + name: model.name || model.id, + developer: developer.charAt(0).toUpperCase() + developer.slice(1), + maxLength: model.context_length || model.max_tokens || 4096, + }; + }); + + if (!fs.existsSync(embedCacheFolder)) + fs.mkdirSync(embedCacheFolder, { recursive: true }); + fs.writeFileSync( + path.resolve(embedCacheFolder, "models.json"), + JSON.stringify(models), + { encoding: "utf-8" } + ); + fs.writeFileSync( + path.resolve(embedCacheFolder, ".cached_at"), + String(Number(new Date())), + { encoding: "utf-8" } + ); + + return models; + }) + .catch((e) => { + console.error(e); + return {}; + }); +} + +module.exports = { + AimlApiLLM, + fetchAimlApiModels, + fetchAimlApiEmbeddingModels, +}; diff --git a/server/utils/EmbeddingEngines/aimlapi/index.js b/server/utils/EmbeddingEngines/aimlapi/index.js new file mode 100644 index 0000000000..4fb3f73d47 --- /dev/null +++ b/server/utils/EmbeddingEngines/aimlapi/index.js @@ -0,0 +1,122 @@ +const { toChunks, maximumChunkLength } = require("../../helpers"); +const { + AimlApiLLM, + fetchAimlApiEmbeddingModels, +} = require("../../AiProviders/aimlapi"); +const fs = require("fs"); +const path = require("path"); +const { safeJsonParse } = require("../../http"); + +const cacheFolder = path.resolve( + process.env.STORAGE_DIR + ? path.resolve(process.env.STORAGE_DIR, "models", "aimlapi", "embeddings") + : path.resolve(__dirname, `../../../storage/models/aimlapi/embeddings`) +); + +class AimlApiEmbedder { + constructor() { + if (!process.env.AIML_EMBEDDER_API_KEY) + throw new Error("No AI/ML API key was set."); + const { OpenAI: OpenAIApi } = require("openai"); + this.openai = new OpenAIApi({ + apiKey: process.env.AIML_EMBEDDER_API_KEY, + baseURL: AimlApiLLM.BASE_URL, + defaultHeaders: AimlApiLLM.HEADERS, + }); + this.model = process.env.EMBEDDING_MODEL_PREF || "text-embedding-ada-002"; + if (!fs.existsSync(cacheFolder)) + fs.mkdirSync(cacheFolder, { recursive: true }); + this.cacheModelPath = path.resolve(cacheFolder, "models.json"); + this.cacheAtPath = path.resolve(cacheFolder, ".cached_at"); + this.maxConcurrentChunks = 500; + this.embeddingMaxChunkLength = maximumChunkLength(); + this.log(`Initialized ${this.model}`); + this.#syncModels().catch((e) => + this.log(`Failed to sync models: ${e.message}`) + ); + } + + log(text, ...args) { + console.log(`\x1b[36m[AimlApiEmbedder]\x1b[0m ${text}`, ...args); + } + + #cacheIsStale() { + const MAX_STALE = 6.048e8; // 1 Week in MS + if (!fs.existsSync(this.cacheAtPath)) return true; + const now = Number(new Date()); + const timestampMs = Number(fs.readFileSync(this.cacheAtPath)); + return now - timestampMs > MAX_STALE; + } + + async #syncModels() { + if (fs.existsSync(this.cacheModelPath) && !this.#cacheIsStale()) + return false; + this.log("Model cache is not present or stale. Fetching from AimlApi API."); + await fetchAimlApiEmbeddingModels(); + return; + } + + models() { + if (!fs.existsSync(this.cacheModelPath)) return {}; + return safeJsonParse( + fs.readFileSync(this.cacheModelPath, { encoding: "utf-8" }), + {} + ); + } + + async isValidEmbeddingModel(modelName = "") { + await this.#syncModels(); + const availableModels = this.models(); + return Object.prototype.hasOwnProperty.call(availableModels, modelName); + } + + async embedTextInput(textInput) { + const result = await this.embedChunks( + Array.isArray(textInput) ? textInput : [textInput] + ); + return result?.[0] || []; + } + + async embedChunks(textChunks = []) { + this.log(`Embedding ${textChunks.length} chunks...`); + const embeddingRequests = []; + for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) { + embeddingRequests.push( + new Promise((resolve) => { + this.openai.embeddings + .create({ model: this.model, input: chunk }) + .then((result) => resolve({ data: result?.data, error: null })) + .catch((e) => { + e.type = + e?.response?.data?.error?.code || + e?.response?.status || + "failed_to_embed"; + e.message = e?.response?.data?.error?.message || e.message; + resolve({ data: [], error: e }); + }); + }) + ); + } + + const { data = [], error = null } = await Promise.all( + embeddingRequests + ).then((results) => { + const errors = results + .filter((res) => !!res.error) + .map((res) => res.error); + if (errors.length > 0) { + const unique = new Set(); + errors.forEach((err) => unique.add(`[${err.type}]: ${err.message}`)); + return { data: [], error: Array.from(unique).join(", ") }; + } + return { data: results.map((r) => r.data || []).flat(), error: null }; + }); + + if (error) throw new Error(`AimlApi Failed to embed: ${error}`); + return data.length > 0 && data.every((d) => d.hasOwnProperty("embedding")) + ? data.map((d) => d.embedding) + : null; + } +} + +module.exports = { AimlApiEmbedder }; diff --git a/server/utils/agents/aibitat/index.js b/server/utils/agents/aibitat/index.js index 6e069defd3..50e873743e 100644 --- a/server/utils/agents/aibitat/index.js +++ b/server/utils/agents/aibitat/index.js @@ -826,6 +826,8 @@ ${this.getHistory({ to: route.to }) return new Providers.PPIOProvider({ model: config.model }); case "gemini": return new Providers.GeminiProvider({ model: config.model }); + case "aimlapi": + return new Providers.AimlApiProvider({ model: config.model }); case "dpais": return new Providers.DellProAiStudioProvider({ model: config.model }); default: diff --git a/server/utils/agents/aibitat/providers/aimlapi.js b/server/utils/agents/aibitat/providers/aimlapi.js new file mode 100644 index 0000000000..8aeee5ca2b --- /dev/null +++ b/server/utils/agents/aibitat/providers/aimlapi.js @@ -0,0 +1,90 @@ +const OpenAI = require("openai"); +const { AimlApiLLM } = require("../../../AiProviders/aimlapi"); +const Provider = require("./ai-provider.js"); +const { RetryError } = require("../error.js"); + + +class AimlApiProvider extends Provider { + model; + + constructor(config = {}) { + const { model = "gpt-3.5-turbo" } = config; + const client = new OpenAI({ + baseURL: AimlApiLLM.BASE_URL, + apiKey: process.env.AIML_LLM_API_KEY ?? null, + maxRetries: 3, + defaultHeaders: AimlApiLLM.HEADERS, + }); + super(client); + + this.model = model; + this.verbose = true; + } + + async complete(messages, functions = []) { + try { + const response = await this.client.chat.completions.create({ + model: this.model, + messages, + ...(Array.isArray(functions) && functions.length > 0 + ? { functions } + : {}), + }); + + const completion = response.choices[0].message; + const cost = this.getCost(response.usage); + + if (completion.function_call) { + let functionArgs = {}; + try { + functionArgs = JSON.parse(completion.function_call.arguments); + } catch (error) { + return this.complete( + [ + ...messages, + { + role: "function", + name: completion.function_call.name, + function_call: completion.function_call, + content: error?.message, + }, + ], + functions + ); + } + + return { + result: null, + functionCall: { + name: completion.function_call.name, + arguments: functionArgs, + }, + cost, + }; + } + + return { + result: completion.content, + cost, + }; + } catch (error) { + if (error instanceof OpenAI.AuthenticationError) throw error; + + if ( + error instanceof OpenAI.RateLimitError || + error instanceof OpenAI.InternalServerError || + error instanceof OpenAI.APIError + ) { + throw new RetryError(error.message); + } + + throw error; + } + } + + getCost(_usage) { + return 0; + } +} + +module.exports = AimlApiProvider; diff --git a/server/utils/agents/aibitat/providers/index.js b/server/utils/agents/aibitat/providers/index.js index d8c174862e..8ea5814e9c 100644 --- a/server/utils/agents/aibitat/providers/index.js +++ b/server/utils/agents/aibitat/providers/index.js @@ -23,6 +23,7 @@ const NvidiaNimProvider = require("./nvidiaNim.js"); const PPIOProvider = require("./ppio.js"); const GeminiProvider = require("./gemini.js"); const DellProAiStudioProvider = require("./dellProAiStudio.js"); +const AimlApiProvider = require("./aimlapi.js"); module.exports = { OpenAIProvider, @@ -50,4 +51,5 @@ module.exports = { PPIOProvider, GeminiProvider, DellProAiStudioProvider, + AimlApiProvider, }; diff --git a/server/utils/agents/index.js b/server/utils/agents/index.js index 915e5a59be..6884a93ef5 100644 --- a/server/utils/agents/index.js +++ b/server/utils/agents/index.js @@ -185,6 +185,10 @@ class AgentHandler { if (!process.env.PPIO_API_KEY) throw new Error("PPIO API Key must be provided to use agents."); break; + case "aimlapi": + if (!process.env.AIML_LLM_API_KEY) + throw new Error("AI/ML API key must be provided to use agents."); + break; case "gemini": if (!process.env.GEMINI_API_KEY) throw new Error("Gemini API key must be provided to use agents."); @@ -266,6 +270,8 @@ class AgentHandler { return process.env.PPIO_MODEL_PREF ?? "qwen/qwen2.5-32b-instruct"; case "gemini": return process.env.GEMINI_LLM_MODEL_PREF ?? "gemini-2.0-flash-lite"; + case "aimlapi": + return process.env.AIML_MODEL_PREF ?? "gpt-3.5-turbo"; case "dpais": return process.env.DPAIS_LLM_MODEL_PREF; default: diff --git a/server/utils/helpers/customModels.js b/server/utils/helpers/customModels.js index cff97ff888..01b54b9e37 100644 --- a/server/utils/helpers/customModels.js +++ b/server/utils/helpers/customModels.js @@ -9,6 +9,10 @@ const { parseLMStudioBasePath } = require("../AiProviders/lmStudio"); const { parseNvidiaNimBasePath } = require("../AiProviders/nvidiaNim"); const { fetchPPIOModels } = require("../AiProviders/ppio"); const { GeminiLLM } = require("../AiProviders/gemini"); +const { + fetchAimlApiModels, + fetchAimlApiEmbeddingModels, +} = require("../AiProviders/aimlapi"); const SUPPORT_CUSTOM_MODELS = [ "openai", @@ -33,6 +37,8 @@ const SUPPORT_CUSTOM_MODELS = [ "gemini", "ppio", "dpais", + "aimlapi", + "aimlapi-embed", ]; async function getCustomModels(provider = "", apiKey = null, basePath = null) { @@ -84,6 +90,10 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) { return await getPPIOModels(apiKey); case "dpais": return await getDellProAiStudioModels(basePath); + case "aimlapi": + return await getAimlApiModels(apiKey); + case "aimlapi-embed": + return await getAimlApiEmbeddingModels(apiKey); default: return { models: [], error: "Invalid provider for custom models" }; } @@ -675,6 +685,44 @@ async function getDellProAiStudioModels(basePath = null) { } } +async function getAimlApiModels(apiKey = null) { + const knownModels = await fetchAimlApiModels(apiKey); + if (!Object.keys(knownModels).length === 0) + return { models: [], error: null }; + + if (Object.keys(knownModels).length > 0 && !!apiKey) + process.env.AIML_LLM_API_KEY = apiKey; + + const models = Object.values(knownModels).map((model) => { + return { + id: model.id, + organization: model.developer, + name: model.name, + }; + }); + return { models, error: null }; +} + +async function getAimlApiEmbeddingModels(apiKey = null) { + const knownModels = await fetchAimlApiEmbeddingModels(apiKey); + if (!Object.keys(knownModels).length === 0) + return { models: [], error: null }; + + if (Object.keys(knownModels).length > 0 && !!apiKey) + process.env.AIML_EMBEDDER_API_KEY = apiKey; + + const models = Object.values(knownModels).map((model) => { + return { + id: model.id, + organization: model.developer, + name: model.name, + }; + }); + return { models, error: null }; +} + module.exports = { getCustomModels, + getAimlApiModels, + getAimlApiEmbeddingModels, }; diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index 2017c618fa..18daba32bc 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -203,6 +203,9 @@ function getLLMProvider({ provider = null, model = null } = {}) { case "ppio": const { PPIOLLM } = require("../AiProviders/ppio"); return new PPIOLLM(embedder, model); + case "aimlapi": + const { AimlApiLLM } = require("../AiProviders/aimlapi"); + return new AimlApiLLM(embedder, model); case "dpais": const { DellProAiStudioLLM } = require("../AiProviders/dellProAiStudio"); return new DellProAiStudioLLM(embedder, model); @@ -260,6 +263,9 @@ function getEmbeddingEngineSelection() { case "gemini": const { GeminiEmbedder } = require("../EmbeddingEngines/gemini"); return new GeminiEmbedder(); + case "aimlapi": + const { AimlApiEmbedder } = require("../EmbeddingEngines/aimlapi"); + return new AimlApiEmbedder(); default: return new NativeEmbedder(); } @@ -350,6 +356,9 @@ function getLLMProviderClass({ provider = null } = {}) { case "ppio": const { PPIOLLM } = require("../AiProviders/ppio"); return PPIOLLM; + case "aimlapi": + const { AimlApiLLM } = require("../AiProviders/aimlapi"); + return AimlApiLLM; case "dpais": const { DellProAiStudioLLM } = require("../AiProviders/dellProAiStudio"); return DellProAiStudioLLM; @@ -417,6 +426,8 @@ function getBaseLLMProviderModel({ provider = null } = {}) { return process.env.NVIDIA_NIM_LLM_MODEL_PREF; case "ppio": return process.env.PPIO_API_KEY; + case "aimlapi": + return process.env.AIML_MODEL_PREF; case "dpais": return process.env.DPAIS_LLM_MODEL_PREF; default: diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index 87670830d9..b34cb3f9c4 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -203,6 +203,16 @@ const KEY_MAPPING = { checks: [], }, + // AI/ML API Options + AimlLlmApiKey: { + envKey: "AIML_LLM_API_KEY", + checks: [isNotEmpty], + }, + AimlModelPref: { + envKey: "AIML_MODEL_PREF", + checks: [isNotEmpty], + }, + // Generic OpenAI InferenceSettings GenericOpenAiBasePath: { envKey: "GENERIC_OPEN_AI_BASE_PATH", @@ -311,6 +321,11 @@ const KEY_MAPPING = { checks: [nonZero], }, + AimlEmbedderApiKey: { + envKey: "AIML_EMBEDDER_API_KEY", + checks: [isNotEmpty], + }, + // Vector Database Selection Settings VectorDB: { envKey: "VECTOR_DB", @@ -784,6 +799,7 @@ function supportedLLM(input = "") { "nvidia-nim", "ppio", "dpais", + "aimlapi", ].includes(input); return validSelection ? null : `${input} is not a valid LLM provider.`; } @@ -821,6 +837,7 @@ function supportedEmbeddingModel(input = "") { "litellm", "generic-openai", "mistral", + "aimlapi", ]; return supported.includes(input) ? null