这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docker/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ GID='1000'
# EMBEDDING_ENGINE='localai'
# EMBEDDING_BASE_PATH='https://localhost:8080/v1'
# EMBEDDING_MODEL_PREF='text-embedding-ada-002'
# EMBEDDING_MODEL_MAX_CHUNK_LENGTH=1000 # The max chunk size in chars a string to embed can be

###########################################
######## Vector Database Selection ########
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,22 @@ export default function LocalAiOptions({ settings }) {
/>
</div>
<LocalAIModelSelection settings={settings} basePath={basePath} />
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Max embedding chunk length
</label>
<input
type="number"
name="EmbeddingModelMaxChunkLength"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="1000"
min={1}
onScroll={(e) => e.target.blur()}
defaultValue={settings?.EmbeddingModelMaxChunkLength}
required={false}
autoComplete="off"
/>
</div>
</>
);
}
Expand Down
1 change: 1 addition & 0 deletions server/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
# EMBEDDING_ENGINE='localai'
# EMBEDDING_BASE_PATH='https://localhost:8080/v1'
# EMBEDDING_MODEL_PREF='text-embedding-ada-002'
# EMBEDDING_MODEL_MAX_CHUNK_LENGTH=1000 # The max chunk size in chars a string to embed can be

###########################################
######## Vector Database Selection ########
Expand Down
2 changes: 2 additions & 0 deletions server/models/systemSettings.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ const SystemSettings = {
EmbeddingEngine: process.env.EMBEDDING_ENGINE,
EmbeddingBasePath: process.env.EMBEDDING_BASE_PATH,
EmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
EmbeddingModelMaxChunkLength:
process.env.EMBEDDING_MODEL_MAX_CHUNK_LENGTH,
...(vectorDB === "pinecone"
? {
PineConeEnvironment: process.env.PINECONE_ENVIRONMENT,
Expand Down
6 changes: 3 additions & 3 deletions server/utils/EmbeddingEngines/azureOpenAi/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class AzureOpenAiEmbedder {

// The maximum amount of "inputs" that OpenAI API can process in a single call.
// https://learn.microsoft.com/en-us/azure/ai-services/openai/faq#i-am-trying-to-use-embeddings-and-received-the-error--invalidrequesterror--too-many-inputs--the-max-number-of-inputs-is-1---how-do-i-fix-this-:~:text=consisting%20of%20up%20to%2016%20inputs%20per%20API%20request
this.embeddingChunkLimit = 16;
this.embeddingMaxChunkLength = 16;
}

async embedTextInput(textInput) {
Expand All @@ -34,9 +34,9 @@ class AzureOpenAiEmbedder {

// Because there is a limit on how many chunks can be sent at once to Azure OpenAI
// we concurrently execute each max batch of text chunks possible.
// Refer to constructor embeddingChunkLimit for more info.
// Refer to constructor embeddingMaxChunkLength for more info.
const embeddingRequests = [];
for (const chunk of toChunks(textChunks, this.embeddingChunkLimit)) {
for (const chunk of toChunks(textChunks, this.embeddingMaxChunkLength)) {
embeddingRequests.push(
new Promise((resolve) => {
this.openai
Expand Down
8 changes: 4 additions & 4 deletions server/utils/EmbeddingEngines/localAi/index.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const { toChunks } = require("../../helpers");
const { toChunks, maximumChunkLength } = require("../../helpers");

class LocalAiEmbedder {
constructor() {
Expand All @@ -12,8 +12,8 @@ class LocalAiEmbedder {
});
this.openai = new OpenAIApi(config);

// Arbitrary limit to ensure we stay within reasonable POST request size.
this.embeddingChunkLimit = 1_000;
// Arbitrary limit of string size in chars to ensure we stay within reasonable POST request size.
this.embeddingMaxChunkLength = maximumChunkLength();
}

async embedTextInput(textInput) {
Expand All @@ -23,7 +23,7 @@ class LocalAiEmbedder {

async embedChunks(textChunks = []) {
const embeddingRequests = [];
for (const chunk of toChunks(textChunks, this.embeddingChunkLimit)) {
for (const chunk of toChunks(textChunks, this.embeddingMaxChunkLength)) {
embeddingRequests.push(
new Promise((resolve) => {
this.openai
Expand Down
7 changes: 4 additions & 3 deletions server/utils/EmbeddingEngines/native/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const { toChunks } = require("../../helpers");

class NativeEmbedder {
constructor() {
// Model Card: https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
this.model = "Xenova/all-MiniLM-L6-v2";
this.cacheDir = path.resolve(
process.env.STORAGE_DIR
Expand All @@ -12,8 +13,8 @@ class NativeEmbedder {
);
this.modelPath = path.resolve(this.cacheDir, "Xenova", "all-MiniLM-L6-v2");

// Limit the number of chunks to send per loop to not overload compute.
this.embeddingChunkLimit = 16;
// Arbitrary limit of string size in chars to ensure we stay within reasonable POST request size.
this.embeddingMaxChunkLength = 1_000;

// Make directory when it does not exist in existing installations
if (!fs.existsSync(this.cacheDir)) fs.mkdirSync(this.cacheDir);
Expand Down Expand Up @@ -62,7 +63,7 @@ class NativeEmbedder {
async embedChunks(textChunks = []) {
const Embedder = await this.embedderClient();
const embeddingResults = [];
for (const chunk of toChunks(textChunks, this.embeddingChunkLimit)) {
for (const chunk of toChunks(textChunks, this.embeddingMaxChunkLength)) {
const output = await Embedder(chunk, {
pooling: "mean",
normalize: true,
Expand Down
8 changes: 4 additions & 4 deletions server/utils/EmbeddingEngines/openAi/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ class OpenAiEmbedder {
const openai = new OpenAIApi(config);
this.openai = openai;

// Arbitrary limit to ensure we stay within reasonable POST request size.
this.embeddingChunkLimit = 1_000;
// Arbitrary limit of string size in chars to ensure we stay within reasonable POST request size.
this.embeddingMaxChunkLength = 1_000;
}

async embedTextInput(textInput) {
Expand All @@ -22,9 +22,9 @@ class OpenAiEmbedder {
async embedChunks(textChunks = []) {
// Because there is a hard POST limit on how many chunks can be sent at once to OpenAI (~8mb)
// we concurrently execute each max batch of text chunks possible.
// Refer to constructor embeddingChunkLimit for more info.
// Refer to constructor embeddingMaxChunkLength for more info.
const embeddingRequests = [];
for (const chunk of toChunks(textChunks, this.embeddingChunkLimit)) {
for (const chunk of toChunks(textChunks, this.embeddingMaxChunkLength)) {
embeddingRequests.push(
new Promise((resolve) => {
this.openai
Expand Down
15 changes: 15 additions & 0 deletions server/utils/helpers/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,20 @@ function getEmbeddingEngineSelection() {
}
}

// Some models have lower restrictions on chars that can be encoded in a single pass
// and by default we assume it can handle 1,000 chars, but some models use work with smaller
// chars so here we can override that value when embedding information.
function maximumChunkLength() {
if (
!!process.env.EMBEDDING_MODEL_MAX_CHUNK_LENGTH &&
!isNaN(process.env.EMBEDDING_MODEL_MAX_CHUNK_LENGTH) &&
Number(process.env.EMBEDDING_MODEL_MAX_CHUNK_LENGTH) > 1
)
return Number(process.env.EMBEDDING_MODEL_MAX_CHUNK_LENGTH);

return 1_000;
}

function toChunks(arr, size) {
return Array.from({ length: Math.ceil(arr.length / size) }, (_v, i) =>
arr.slice(i * size, i * size + size)
Expand All @@ -78,6 +92,7 @@ function toChunks(arr, size) {

module.exports = {
getEmbeddingEngineSelection,
maximumChunkLength,
getVectorDbClass,
getLLMProvider,
toChunks,
Expand Down
4 changes: 4 additions & 0 deletions server/utils/helpers/updateENV.js
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ const KEY_MAPPING = {
envKey: "EMBEDDING_MODEL_PREF",
checks: [isNotEmpty],
},
EmbeddingModelMaxChunkLength: {
envKey: "EMBEDDING_MODEL_MAX_CHUNK_LENGTH",
checks: [nonZero],
},

// Vector Database Selection Settings
VectorDB: {
Expand Down
9 changes: 7 additions & 2 deletions server/utils/vectorDbProviders/chroma/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@ const { ChromaClient } = require("chromadb");
const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
const { storeVectorResult, cachedVectorInformation } = require("../../files");
const { v4: uuidv4 } = require("uuid");
const { toChunks, getLLMProvider } = require("../../helpers");
const {
toChunks,
getLLMProvider,
getEmbeddingEngineSelection,
} = require("../../helpers");

const Chroma = {
name: "Chroma",
Expand Down Expand Up @@ -175,7 +179,8 @@ const Chroma = {
// because we then cannot atomically control our namespace to granularly find/remove documents
// from vectordb.
const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: 1000,
chunkSize:
getEmbeddingEngineSelection()?.embeddingMaxChunkLength || 1_000,
chunkOverlap: 20,
});
const textChunks = await textSplitter.splitText(pageContent);
Expand Down
9 changes: 7 additions & 2 deletions server/utils/vectorDbProviders/lance/index.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
const lancedb = require("vectordb");
const { toChunks, getLLMProvider } = require("../../helpers");
const {
toChunks,
getLLMProvider,
getEmbeddingEngineSelection,
} = require("../../helpers");
const { OpenAIEmbeddings } = require("langchain/embeddings/openai");
const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
const { storeVectorResult, cachedVectorInformation } = require("../../files");
Expand Down Expand Up @@ -176,7 +180,8 @@ const LanceDb = {
// because we then cannot atomically control our namespace to granularly find/remove documents
// from vectordb.
const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: 1000,
chunkSize:
getEmbeddingEngineSelection()?.embeddingMaxChunkLength || 1_000,
chunkOverlap: 20,
});
const textChunks = await textSplitter.splitText(pageContent);
Expand Down
9 changes: 7 additions & 2 deletions server/utils/vectorDbProviders/pinecone/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@ const { PineconeClient } = require("@pinecone-database/pinecone");
const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
const { storeVectorResult, cachedVectorInformation } = require("../../files");
const { v4: uuidv4 } = require("uuid");
const { toChunks, getLLMProvider } = require("../../helpers");
const {
toChunks,
getLLMProvider,
getEmbeddingEngineSelection,
} = require("../../helpers");

const Pinecone = {
name: "Pinecone",
Expand Down Expand Up @@ -130,7 +134,8 @@ const Pinecone = {
// from vectordb.
// https://github.com/hwchase17/langchainjs/blob/2def486af734c0ca87285a48f1a04c057ab74bdf/langchain/src/vectorstores/pinecone.ts#L167
const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: 1000,
chunkSize:
getEmbeddingEngineSelection()?.embeddingMaxChunkLength || 1_000,
chunkOverlap: 20,
});
const textChunks = await textSplitter.splitText(pageContent);
Expand Down
9 changes: 7 additions & 2 deletions server/utils/vectorDbProviders/qdrant/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@ const { QdrantClient } = require("@qdrant/js-client-rest");
const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
const { storeVectorResult, cachedVectorInformation } = require("../../files");
const { v4: uuidv4 } = require("uuid");
const { toChunks, getLLMProvider } = require("../../helpers");
const {
toChunks,
getLLMProvider,
getEmbeddingEngineSelection,
} = require("../../helpers");

const QDrant = {
name: "QDrant",
Expand Down Expand Up @@ -174,7 +178,8 @@ const QDrant = {
// because we then cannot atomically control our namespace to granularly find/remove documents
// from vectordb.
const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: 1000,
chunkSize:
getEmbeddingEngineSelection()?.embeddingMaxChunkLength || 1_000,
chunkOverlap: 20,
});
const textChunks = await textSplitter.splitText(pageContent);
Expand Down
9 changes: 7 additions & 2 deletions server/utils/vectorDbProviders/weaviate/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@ const { default: weaviate } = require("weaviate-ts-client");
const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
const { storeVectorResult, cachedVectorInformation } = require("../../files");
const { v4: uuidv4 } = require("uuid");
const { toChunks, getLLMProvider } = require("../../helpers");
const {
toChunks,
getLLMProvider,
getEmbeddingEngineSelection,
} = require("../../helpers");
const { camelCase } = require("../../helpers/camelcase");

const Weaviate = {
Expand Down Expand Up @@ -237,7 +241,8 @@ const Weaviate = {
// because we then cannot atomically control our namespace to granularly find/remove documents
// from vectordb.
const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: 1000,
chunkSize:
getEmbeddingEngineSelection()?.embeddingMaxChunkLength || 1_000,
chunkOverlap: 20,
});
const textChunks = await textSplitter.splitText(pageContent);
Expand Down