θΏ™ζ˜―indexlocζδΎ›ηš„ζœεŠ‘οΌŒδΈθ¦θΎ“ε…₯任何密码
Skip to content

Add support for Google Generative AI (Gemini) embedder #2895

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docker/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,10 @@ GID='1000'
# GENERIC_OPEN_AI_EMBEDDING_API_KEY='sk-123abc'
# GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS=500

# EMBEDDING_ENGINE='gemini'
# GEMINI_EMBEDDING_API_KEY=
# EMBEDDING_MODEL_PREF='text-embedding-004'

###########################################
######## Vector Database Selection ########
###########################################
Expand Down
47 changes: 47 additions & 0 deletions frontend/src/components/EmbeddingSelection/GeminiOptions/index.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
export default function GeminiOptions({ settings }) {
return (
<div className="w-full flex flex-col gap-y-4">
<div className="w-full flex items-center gap-[36px] mt-1.5">
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3">
API Key
</label>
<input
type="password"
name="GeminiEmbeddingApiKey"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="Gemini API Key"
defaultValue={settings?.GeminiEmbeddingApiKey ? "*".repeat(20) : ""}
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3">
Model Preference
</label>
<select
name="EmbeddingModelPref"
required={true}
className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
<optgroup label="Available embedding models">
{["text-embedding-004"].map((model) => {
return (
<option
key={model}
value={model}
selected={settings?.EmbeddingModelPref === model}
>
{model}
</option>
);
})}
</optgroup>
</select>
</div>
</div>
</div>
);
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import showToast from "@/utils/toast";
import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
import OpenAiLogo from "@/media/llmprovider/openai.png";
import AzureOpenAiLogo from "@/media/llmprovider/azure.png";
import GemeniAiLogo from "@/media/llmprovider/gemini.png";
import LocalAiLogo from "@/media/llmprovider/localai.png";
import OllamaLogo from "@/media/llmprovider/ollama.png";
import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
Expand All @@ -19,6 +20,7 @@ import PreLoader from "@/components/Preloader";
import ChangeWarningModal from "@/components/ChangeWarning";
import OpenAiOptions from "@/components/EmbeddingSelection/OpenAiOptions";
import AzureAiOptions from "@/components/EmbeddingSelection/AzureAiOptions";
import GeminiOptions from "@/components/EmbeddingSelection/GeminiOptions";
import LocalAiOptions from "@/components/EmbeddingSelection/LocalAiOptions";
import NativeEmbeddingOptions from "@/components/EmbeddingSelection/NativeEmbeddingOptions";
import OllamaEmbeddingOptions from "@/components/EmbeddingSelection/OllamaOptions";
Expand Down Expand Up @@ -59,6 +61,13 @@ const EMBEDDERS = [
options: (settings) => <AzureAiOptions settings={settings} />,
description: "The enterprise option of OpenAI hosted on Azure services.",
},
{
name: "Gemini",
value: "gemini",
logo: GemeniAiLogo,
options: (settings) => <GeminiOptions settings={settings} />,
description: "Run powerful embedding models from Google AI.",
},
{
name: "Local AI",
value: "localai",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,14 @@ export const EMBEDDING_ENGINE_PRIVACY = {
],
logo: GenericOpenAiLogo,
},
gemini: {
name: "Google Gemini",
description: [
"Your document text is sent to Google Gemini's servers for processing",
"Your document text is stored or managed according to the terms of service of Google Gemini API Terms of Service",
],
logo: GeminiLogo,
},
};

export default function DataHandling({ setHeader, setForwardBtn, setBackBtn }) {
Expand Down
4 changes: 4 additions & 0 deletions server/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,10 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
# GENERIC_OPEN_AI_EMBEDDING_API_KEY='sk-123abc'
# GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS=500

# EMBEDDING_ENGINE='gemini'
# GEMINI_EMBEDDING_API_KEY=
# EMBEDDING_MODEL_PREF='text-embedding-004'

###########################################
######## Vector Database Selection ########
###########################################
Expand Down
5 changes: 2 additions & 3 deletions server/models/systemSettings.js
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,12 @@ const SystemSettings = {
EmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
EmbeddingModelMaxChunkLength:
process.env.EMBEDDING_MODEL_MAX_CHUNK_LENGTH,
VoyageAiApiKey: !!process.env.VOYAGEAI_API_KEY,
GenericOpenAiEmbeddingApiKey:
!!process.env.GENERIC_OPEN_AI_EMBEDDING_API_KEY,
GenericOpenAiEmbeddingMaxConcurrentChunks:
process.env.GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS || 500,
GeminiEmbeddingApiKey: !!process.env.GEMINI_EMBEDDING_API_KEY,

// --------------------------------------------------------
// VectorDB Provider Selection Settings & Configs
Expand Down Expand Up @@ -533,9 +535,6 @@ const SystemSettings = {
CohereApiKey: !!process.env.COHERE_API_KEY,
CohereModelPref: process.env.COHERE_MODEL_PREF,

// VoyageAi API Keys
VoyageAiApiKey: !!process.env.VOYAGEAI_API_KEY,

// DeepSeek API Keys
DeepSeekApiKey: !!process.env.DEEPSEEK_API_KEY,
DeepSeekModelPref: process.env.DEEPSEEK_MODEL_PREF,
Expand Down
54 changes: 54 additions & 0 deletions server/utils/EmbeddingEngines/gemini/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
class GeminiEmbedder {
constructor() {
if (!process.env.GEMINI_API_KEY)
throw new Error("No Gemini API key was set.");
const { GoogleGenerativeAI } = require("@google/generative-ai");
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY);
this.model = process.env.EMBEDDING_MODEL_PREF || "text-embedding-004";
this.gemini = genAI.getGenerativeModel({ model: this.model });

// This property is disabled as it causes issues when sending multiple chunks at once
// since when given 4 chunks at once, the gemini api returns 1 embedding for all 4 chunks
// instead of 4 embeddings - no idea why this is the case, but it is not how the results are
// expected to be returned.
// this.maxConcurrentChunks = 1;

// https://ai.google.dev/gemini-api/docs/models/gemini#text-embedding-and-embedding
this.embeddingMaxChunkLength = 2_048;
this.log(`Initialized with ${this.model}`);
}

log(text, ...args) {
console.log(`\x1b[36m[GeminiEmbedder]\x1b[0m ${text}`, ...args);
}

/**
* Embeds a single text input
* @param {string} textInput - The text to embed
* @returns {Promise<Array<number>>} The embedding values
*/
async embedTextInput(textInput) {
const result = await this.gemini.embedContent(textInput);
return result.embedding.values || [];
}

/**
* Embeds a list of text inputs
* @param {Array<string>} textInputs - The list of text to embed
* @returns {Promise<Array<Array<number>>>} The embedding values
*/
async embedChunks(textChunks = []) {
let embeddings = [];
for (const chunk of textChunks) {
const results = await this.gemini.embedContent(chunk);
if (!results.embedding || !results.embedding.values)
throw new Error("No embedding values returned from gemini");
embeddings.push(results.embedding.values);
}
return embeddings;
}
}

module.exports = {
GeminiEmbedder,
};
3 changes: 3 additions & 0 deletions server/utils/helpers/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,9 @@ function getEmbeddingEngineSelection() {
GenericOpenAiEmbedder,
} = require("../EmbeddingEngines/genericOpenAi");
return new GenericOpenAiEmbedder();
case "gemini":
const { GeminiEmbedder } = require("../EmbeddingEngines/gemini");
return new GeminiEmbedder();
default:
return new NativeEmbedder();
}
Expand Down
7 changes: 7 additions & 0 deletions server/utils/helpers/updateENV.js
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,12 @@ const KEY_MAPPING = {
checks: [nonZero],
},

// Gemini Embedding Settings
GeminiEmbeddingApiKey: {
envKey: "GEMINI_EMBEDDING_API_KEY",
checks: [isNotEmpty],
},

// Generic OpenAI Embedding Settings
GenericOpenAiEmbeddingApiKey: {
envKey: "GENERIC_OPEN_AI_EMBEDDING_API_KEY",
Expand Down Expand Up @@ -759,6 +765,7 @@ function supportedEmbeddingModel(input = "") {
const supported = [
"openai",
"azure",
"gemini",
"localai",
"native",
"ollama",
Expand Down