diff --git a/docker/.env.example b/docker/.env.example
index ee53c718bc6..19c04dfa855 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -170,6 +170,10 @@ GID='1000'
# GENERIC_OPEN_AI_EMBEDDING_API_KEY='sk-123abc'
# GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS=500
+# EMBEDDING_ENGINE='gemini'
+# GEMINI_EMBEDDING_API_KEY=
+# EMBEDDING_MODEL_PREF='text-embedding-004'
+
###########################################
######## Vector Database Selection ########
###########################################
diff --git a/frontend/src/components/EmbeddingSelection/GeminiOptions/index.jsx b/frontend/src/components/EmbeddingSelection/GeminiOptions/index.jsx
new file mode 100644
index 00000000000..c25a2b13f8d
--- /dev/null
+++ b/frontend/src/components/EmbeddingSelection/GeminiOptions/index.jsx
@@ -0,0 +1,47 @@
+export default function GeminiOptions({ settings }) {
+ return (
+
+
+
+
+
+
+
+
+
+
+
+
+ );
+}
diff --git a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx
index 77853e0a999..a1833b0cc6f 100644
--- a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx
+++ b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx
@@ -6,6 +6,7 @@ import showToast from "@/utils/toast";
import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
import OpenAiLogo from "@/media/llmprovider/openai.png";
import AzureOpenAiLogo from "@/media/llmprovider/azure.png";
+import GemeniAiLogo from "@/media/llmprovider/gemini.png";
import LocalAiLogo from "@/media/llmprovider/localai.png";
import OllamaLogo from "@/media/llmprovider/ollama.png";
import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
@@ -19,6 +20,7 @@ import PreLoader from "@/components/Preloader";
import ChangeWarningModal from "@/components/ChangeWarning";
import OpenAiOptions from "@/components/EmbeddingSelection/OpenAiOptions";
import AzureAiOptions from "@/components/EmbeddingSelection/AzureAiOptions";
+import GeminiOptions from "@/components/EmbeddingSelection/GeminiOptions";
import LocalAiOptions from "@/components/EmbeddingSelection/LocalAiOptions";
import NativeEmbeddingOptions from "@/components/EmbeddingSelection/NativeEmbeddingOptions";
import OllamaEmbeddingOptions from "@/components/EmbeddingSelection/OllamaOptions";
@@ -59,6 +61,13 @@ const EMBEDDERS = [
options: (settings) => ,
description: "The enterprise option of OpenAI hosted on Azure services.",
},
+ {
+ name: "Gemini",
+ value: "gemini",
+ logo: GemeniAiLogo,
+ options: (settings) => ,
+ description: "Run powerful embedding models from Google AI.",
+ },
{
name: "Local AI",
value: "localai",
diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
index 1fefca235b6..713891300dd 100644
--- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
@@ -378,6 +378,14 @@ export const EMBEDDING_ENGINE_PRIVACY = {
],
logo: GenericOpenAiLogo,
},
+ gemini: {
+ name: "Google Gemini",
+ description: [
+ "Your document text is sent to Google Gemini's servers for processing",
+ "Your document text is stored or managed according to the terms of service of Google Gemini API Terms of Service",
+ ],
+ logo: GeminiLogo,
+ },
};
export default function DataHandling({ setHeader, setForwardBtn, setBackBtn }) {
diff --git a/server/.env.example b/server/.env.example
index 3346fc397d5..bf528359d4d 100644
--- a/server/.env.example
+++ b/server/.env.example
@@ -164,6 +164,10 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
# GENERIC_OPEN_AI_EMBEDDING_API_KEY='sk-123abc'
# GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS=500
+# EMBEDDING_ENGINE='gemini'
+# GEMINI_EMBEDDING_API_KEY=
+# EMBEDDING_MODEL_PREF='text-embedding-004'
+
###########################################
######## Vector Database Selection ########
###########################################
diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js
index 3403c0824cd..1af5d8cdd9d 100644
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@@ -198,10 +198,12 @@ const SystemSettings = {
EmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
EmbeddingModelMaxChunkLength:
process.env.EMBEDDING_MODEL_MAX_CHUNK_LENGTH,
+ VoyageAiApiKey: !!process.env.VOYAGEAI_API_KEY,
GenericOpenAiEmbeddingApiKey:
!!process.env.GENERIC_OPEN_AI_EMBEDDING_API_KEY,
GenericOpenAiEmbeddingMaxConcurrentChunks:
process.env.GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS || 500,
+ GeminiEmbeddingApiKey: !!process.env.GEMINI_EMBEDDING_API_KEY,
// --------------------------------------------------------
// VectorDB Provider Selection Settings & Configs
@@ -533,9 +535,6 @@ const SystemSettings = {
CohereApiKey: !!process.env.COHERE_API_KEY,
CohereModelPref: process.env.COHERE_MODEL_PREF,
- // VoyageAi API Keys
- VoyageAiApiKey: !!process.env.VOYAGEAI_API_KEY,
-
// DeepSeek API Keys
DeepSeekApiKey: !!process.env.DEEPSEEK_API_KEY,
DeepSeekModelPref: process.env.DEEPSEEK_MODEL_PREF,
diff --git a/server/utils/EmbeddingEngines/gemini/index.js b/server/utils/EmbeddingEngines/gemini/index.js
new file mode 100644
index 00000000000..7a1ba9684c3
--- /dev/null
+++ b/server/utils/EmbeddingEngines/gemini/index.js
@@ -0,0 +1,54 @@
+class GeminiEmbedder {
+ constructor() {
+ if (!process.env.GEMINI_API_KEY)
+ throw new Error("No Gemini API key was set.");
+ const { GoogleGenerativeAI } = require("@google/generative-ai");
+ const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY);
+ this.model = process.env.EMBEDDING_MODEL_PREF || "text-embedding-004";
+ this.gemini = genAI.getGenerativeModel({ model: this.model });
+
+ // This property is disabled as it causes issues when sending multiple chunks at once
+ // since when given 4 chunks at once, the gemini api returns 1 embedding for all 4 chunks
+ // instead of 4 embeddings - no idea why this is the case, but it is not how the results are
+ // expected to be returned.
+ // this.maxConcurrentChunks = 1;
+
+ // https://ai.google.dev/gemini-api/docs/models/gemini#text-embedding-and-embedding
+ this.embeddingMaxChunkLength = 2_048;
+ this.log(`Initialized with ${this.model}`);
+ }
+
+ log(text, ...args) {
+ console.log(`\x1b[36m[GeminiEmbedder]\x1b[0m ${text}`, ...args);
+ }
+
+ /**
+ * Embeds a single text input
+ * @param {string} textInput - The text to embed
+ * @returns {Promise>} The embedding values
+ */
+ async embedTextInput(textInput) {
+ const result = await this.gemini.embedContent(textInput);
+ return result.embedding.values || [];
+ }
+
+ /**
+ * Embeds a list of text inputs
+ * @param {Array} textInputs - The list of text to embed
+ * @returns {Promise>>} The embedding values
+ */
+ async embedChunks(textChunks = []) {
+ let embeddings = [];
+ for (const chunk of textChunks) {
+ const results = await this.gemini.embedContent(chunk);
+ if (!results.embedding || !results.embedding.values)
+ throw new Error("No embedding values returned from gemini");
+ embeddings.push(results.embedding.values);
+ }
+ return embeddings;
+ }
+}
+
+module.exports = {
+ GeminiEmbedder,
+};
diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js
index 55d190f4fdb..fa47f9cf78e 100644
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@@ -250,6 +250,9 @@ function getEmbeddingEngineSelection() {
GenericOpenAiEmbedder,
} = require("../EmbeddingEngines/genericOpenAi");
return new GenericOpenAiEmbedder();
+ case "gemini":
+ const { GeminiEmbedder } = require("../EmbeddingEngines/gemini");
+ return new GeminiEmbedder();
default:
return new NativeEmbedder();
}
diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js
index da30b6ee0dd..bdf4e63d68f 100644
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@@ -266,6 +266,12 @@ const KEY_MAPPING = {
checks: [nonZero],
},
+ // Gemini Embedding Settings
+ GeminiEmbeddingApiKey: {
+ envKey: "GEMINI_EMBEDDING_API_KEY",
+ checks: [isNotEmpty],
+ },
+
// Generic OpenAI Embedding Settings
GenericOpenAiEmbeddingApiKey: {
envKey: "GENERIC_OPEN_AI_EMBEDDING_API_KEY",
@@ -759,6 +765,7 @@ function supportedEmbeddingModel(input = "") {
const supported = [
"openai",
"azure",
+ "gemini",
"localai",
"native",
"ollama",