θΏ™ζ˜―indexlocζδΎ›ηš„ζœεŠ‘οΌŒδΈθ¦θΎ“ε…₯任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion server/utils/EmbeddingEngines/azureOpenAi/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ class AzureOpenAiEmbedder {
new AzureKeyCredential(process.env.AZURE_OPENAI_KEY)
);
this.openai = openai;
this.dimensions = 1536;

// Limit of how many strings we can process in a single pass to stay with resource or network limits
// https://learn.microsoft.com/en-us/azure/ai-services/openai/faq#i-am-trying-to-use-embeddings-and-received-the-error--invalidrequesterror--too-many-inputs--the-max-number-of-inputs-is-1---how-do-i-fix-this-:~:text=consisting%20of%20up%20to%2016%20inputs%20per%20API%20request
Expand Down
4 changes: 0 additions & 4 deletions server/utils/EmbeddingEngines/localAi/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@ class LocalAiEmbedder {
: {}),
});
this.openai = new OpenAIApi(config);
// We don't know this for user's set model so for vectorDB integrations that requires dimensionality
// in schema, we will throw an error.
// Applies to QDrant and Milvus.
this.dimensions = null;

// Limit of how many strings we can process in a single pass to stay with resource or network limits
this.maxConcurrentChunks = 50;
Expand Down
1 change: 0 additions & 1 deletion server/utils/EmbeddingEngines/native/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ class NativeEmbedder {
: path.resolve(__dirname, `../../../storage/models`)
);
this.modelPath = path.resolve(this.cacheDir, "Xenova", "all-MiniLM-L6-v2");
this.dimensions = 384;

// Limit of how many strings we can process in a single pass to stay with resource or network limits
this.maxConcurrentChunks = 25;
Expand Down
1 change: 0 additions & 1 deletion server/utils/EmbeddingEngines/openAi/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ class OpenAiEmbedder {
});
const openai = new OpenAIApi(config);
this.openai = openai;
this.dimensions = 1536;

// Limit of how many strings we can process in a single pass to stay with resource or network limits
this.maxConcurrentChunks = 500;
Expand Down
20 changes: 12 additions & 8 deletions server/utils/vectorDbProviders/milvus/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,15 @@ const Milvus = {
await client.dropCollection({ collection_name: namespace });
return true;
},
getOrCreateCollection: async function (client, namespace) {
// Milvus requires a dimension aspect for collection creation
// we pass this in from the first chunk to infer the dimensions like other
// providers do.
getOrCreateCollection: async function (client, namespace, dimensions = null) {
const isExists = await this.namespaceExists(client, namespace);
if (!isExists) {
const embedder = getEmbeddingEngineSelection();
if (!embedder.dimensions)
if (!dimensions)
throw new Error(
`Your embedder selection has unknown dimensions output. It should be defined when using ${this.name}. Open an issue on Github for support.`
`Milvus:getOrCreateCollection Unable to infer vector dimension from input. Open an issue on Github for support.`
);

await client.createCollection({
Expand All @@ -104,7 +106,7 @@ const Milvus = {
name: "vector",
description: "vector",
data_type: DataType.FloatVector,
dim: embedder.dimensions,
dim: dimensions,
},
{
name: "metadata",
Expand All @@ -131,18 +133,19 @@ const Milvus = {
) {
const { DocumentVectors } = require("../../../models/vectors");
try {
let vectorDimension = null;
const { pageContent, docId, ...metadata } = documentData;
if (!pageContent || pageContent.length == 0) return false;

console.log("Adding new vectorized document into namespace", namespace);
const cacheResult = await cachedVectorInformation(fullFilePath);
if (cacheResult.exists) {
const { client } = await this.connect();
await this.getOrCreateCollection(client, namespace);

const { chunks } = cacheResult;
const documentVectors = [];
vectorDimension = chunks[0][0].values.length || null;

await this.getOrCreateCollection(client, namespace, vectorDimension);
for (const chunk of chunks) {
// Before sending to Pinecone and saving the records to our db
// we need to assign the id of each chunk that is stored in the cached file.
Expand Down Expand Up @@ -182,6 +185,7 @@ const Milvus = {

if (!!vectorValues && vectorValues.length > 0) {
for (const [i, vector] of vectorValues.entries()) {
if (!vectorDimension) vectorDimension = vector.length;
const vectorRecord = {
id: uuidv4(),
values: vector,
Expand All @@ -202,7 +206,7 @@ const Milvus = {
if (vectors.length > 0) {
const chunks = [];
const { client } = await this.connect();
await this.getOrCreateCollection(client, namespace);
await this.getOrCreateCollection(client, namespace, vectorDimension);

console.log("Inserting vectorized chunks into Milvus.");
for (const chunk of toChunks(vectors, 100)) {
Expand Down
34 changes: 23 additions & 11 deletions server/utils/vectorDbProviders/qdrant/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -108,19 +108,20 @@ const QDrant = {
await client.deleteCollection(namespace);
return true;
},
getOrCreateCollection: async function (client, namespace) {
// QDrant requires a dimension aspect for collection creation
// we pass this in from the first chunk to infer the dimensions like other
// providers do.
getOrCreateCollection: async function (client, namespace, dimensions = null) {
if (await this.namespaceExists(client, namespace)) {
return await client.getCollection(namespace);
}

const embedder = getEmbeddingEngineSelection();
if (!embedder.dimensions)
if (!dimensions)
throw new Error(
`Your embedder selection has unknown dimensions output. It should be defined when using ${this.name}. Open an issue on Github for support.`
`Qdrant:getOrCreateCollection Unable to infer vector dimension from input. Open an issue on Github for support.`
);
await client.createCollection(namespace, {
vectors: {
size: embedder.dimensions,
size: dimensions,
distance: "Cosine",
},
});
Expand All @@ -133,22 +134,28 @@ const QDrant = {
) {
const { DocumentVectors } = require("../../../models/vectors");
try {
let vectorDimension = null;
const { pageContent, docId, ...metadata } = documentData;
if (!pageContent || pageContent.length == 0) return false;

console.log("Adding new vectorized document into namespace", namespace);
const cacheResult = await cachedVectorInformation(fullFilePath);
if (cacheResult.exists) {
const { client } = await this.connect();
const collection = await this.getOrCreateCollection(client, namespace);
const { chunks } = cacheResult;
const documentVectors = [];
vectorDimension = chunks[0][0].vector.length || null;

const collection = await this.getOrCreateCollection(
client,
namespace,
vectorDimension
);
if (!collection)
throw new Error("Failed to create new QDrant collection!", {
namespace,
});

const { chunks } = cacheResult;
const documentVectors = [];

for (const chunk of chunks) {
const submission = {
ids: [],
Expand Down Expand Up @@ -210,6 +217,7 @@ const QDrant = {

if (!!vectorValues && vectorValues.length > 0) {
for (const [i, vector] of vectorValues.entries()) {
if (!vectorDimension) vectorDimension = vector.length;
const vectorRecord = {
id: uuidv4(),
vector: vector,
Expand All @@ -233,7 +241,11 @@ const QDrant = {
}

const { client } = await this.connect();
const collection = await this.getOrCreateCollection(client, namespace);
const collection = await this.getOrCreateCollection(
client,
namespace,
vectorDimension
);
if (!collection)
throw new Error("Failed to create new QDrant collection!", {
namespace,
Expand Down