θΏ™ζ˜―indexlocζδΎ›ηš„ζœεŠ‘οΌŒδΈθ¦θΎ“ε…₯任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 25 additions & 15 deletions server/utils/vectorDbProviders/milvus/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ const {

const Milvus = {
name: "Milvus",
// Milvus/Zilliz only allows letters, numbers, and underscores in collection names
// so we need to enforce that by re-normalizing the names when communicating with
// the DB.
normalize: function (inputString) {
return inputString.replace(/[^a-zA-Z0-9_]/g, "_");
},
connect: async function () {
if (process.env.VECTOR_DB !== "milvus")
throw new Error("Milvus::Invalid ENV settings");
Expand Down Expand Up @@ -42,7 +48,7 @@ const Milvus = {
const { collection_names } = await client.listCollections();
const total = collection_names.reduce(async (acc, collection_name) => {
const statistics = await client.getCollectionStatistics({
collection_name,
collection_name: this.normalize(collection_name),
});
return Number(acc) + Number(statistics?.data?.row_count ?? 0);
}, 0);
Expand All @@ -51,14 +57,14 @@ const Milvus = {
namespaceCount: async function (_namespace = null) {
const { client } = await this.connect();
const statistics = await client.getCollectionStatistics({
collection_name: _namespace,
collection_name: this.normalize(_namespace),
});
return Number(statistics?.data?.row_count ?? 0);
},
namespace: async function (client, namespace = null) {
if (!namespace) throw new Error("No namespace value provided.");
const collection = await client
.getCollectionStatistics({ collection_name: namespace })
.getCollectionStatistics({ collection_name: this.normalize(namespace) })
.catch(() => null);
return collection;
},
Expand All @@ -70,15 +76,15 @@ const Milvus = {
namespaceExists: async function (client, namespace = null) {
if (!namespace) throw new Error("No namespace value provided.");
const { value } = await client
.hasCollection({ collection_name: namespace })
.hasCollection({ collection_name: this.normalize(namespace) })
.catch((e) => {
console.error("MilvusDB::namespaceExists", e.message);
return { value: false };
});
return value;
},
deleteVectorsInNamespace: async function (client, namespace = null) {
await client.dropCollection({ collection_name: namespace });
await client.dropCollection({ collection_name: this.normalize(namespace) });
return true;
},
// Milvus requires a dimension aspect for collection creation
Expand All @@ -93,7 +99,7 @@ const Milvus = {
);

await client.createCollection({
collection_name: namespace,
collection_name: this.normalize(namespace),
fields: [
{
name: "id",
Expand All @@ -116,13 +122,13 @@ const Milvus = {
],
});
await client.createIndex({
collection_name: namespace,
collection_name: this.normalize(namespace),
field_name: "vector",
index_type: IndexType.AUTOINDEX,
metric_type: MetricType.COSINE,
});
await client.loadCollectionSync({
collection_name: namespace,
collection_name: this.normalize(namespace),
});
}
},
Expand Down Expand Up @@ -155,7 +161,7 @@ const Milvus = {
return { id, vector: chunk.values, metadata: chunk.metadata };
});
const insertResult = await client.insert({
collection_name: namespace,
collection_name: this.normalize(namespace),
data: newChunks,
});

Expand All @@ -166,7 +172,9 @@ const Milvus = {
}
}
await DocumentVectors.bulkInsert(documentVectors);
await client.flushSync({ collection_names: [namespace] });
await client.flushSync({
collection_names: [this.normalize(namespace)],
});
return { vectorized: true, error: null };
}

Expand Down Expand Up @@ -212,7 +220,7 @@ const Milvus = {
for (const chunk of toChunks(vectors, 100)) {
chunks.push(chunk);
const insertResult = await client.insert({
collection_name: namespace,
collection_name: this.normalize(namespace),
data: chunk.map((item) => ({
id: item.id,
vector: item.values,
Expand All @@ -227,7 +235,9 @@ const Milvus = {
}
}
await storeVectorResult(chunks, fullFilePath);
await client.flushSync({ collection_names: [namespace] });
await client.flushSync({
collection_names: [this.normalize(namespace)],
});
}

await DocumentVectors.bulkInsert(documentVectors);
Expand All @@ -247,7 +257,7 @@ const Milvus = {
const vectorIds = knownDocuments.map((doc) => doc.vectorId);
const queryIn = vectorIds.map((v) => `'${v}'`).join(",");
await client.deleteEntities({
collection_name: namespace,
collection_name: this.normalize(namespace),
expr: `id in [${queryIn}]`,
});

Expand All @@ -257,7 +267,7 @@ const Milvus = {
// Even after flushing Milvus can take some time to re-calc the count
// so all we can hope to do is flushSync so that the count can be correct
// on a later call.
await client.flushSync({ collection_names: [namespace] });
await client.flushSync({ collection_names: [this.normalize(namespace)] });
return true;
},
performSimilaritySearch: async function ({
Expand Down Expand Up @@ -310,7 +320,7 @@ const Milvus = {
scores: [],
};
const response = await client.search({
collection_name: namespace,
collection_name: this.normalize(namespace),
vectors: queryVector,
limit: topN,
});
Expand Down
40 changes: 25 additions & 15 deletions server/utils/vectorDbProviders/zilliz/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ const {
// to connect to the cloud
const Zilliz = {
name: "Zilliz",
// Milvus/Zilliz only allows letters, numbers, and underscores in collection names
// so we need to enforce that by re-normalizing the names when communicating with
// the DB.
normalize: function (inputString) {
return inputString.replace(/[^a-zA-Z0-9_]/g, "_");
},
connect: async function () {
if (process.env.VECTOR_DB !== "zilliz")
throw new Error("Zilliz::Invalid ENV settings");
Expand All @@ -43,7 +49,7 @@ const Zilliz = {
const { collection_names } = await client.listCollections();
const total = collection_names.reduce(async (acc, collection_name) => {
const statistics = await client.getCollectionStatistics({
collection_name,
collection_name: this.normalize(collection_name),
});
return Number(acc) + Number(statistics?.data?.row_count ?? 0);
}, 0);
Expand All @@ -52,14 +58,14 @@ const Zilliz = {
namespaceCount: async function (_namespace = null) {
const { client } = await this.connect();
const statistics = await client.getCollectionStatistics({
collection_name: _namespace,
collection_name: this.normalize(_namespace),
});
return Number(statistics?.data?.row_count ?? 0);
},
namespace: async function (client, namespace = null) {
if (!namespace) throw new Error("No namespace value provided.");
const collection = await client
.getCollectionStatistics({ collection_name: namespace })
.getCollectionStatistics({ collection_name: this.normalize(namespace) })
.catch(() => null);
return collection;
},
Expand All @@ -71,15 +77,15 @@ const Zilliz = {
namespaceExists: async function (client, namespace = null) {
if (!namespace) throw new Error("No namespace value provided.");
const { value } = await client
.hasCollection({ collection_name: namespace })
.hasCollection({ collection_name: this.normalize(namespace) })
.catch((e) => {
console.error("Zilliz::namespaceExists", e.message);
return { value: false };
});
return value;
},
deleteVectorsInNamespace: async function (client, namespace = null) {
await client.dropCollection({ collection_name: namespace });
await client.dropCollection({ collection_name: this.normalize(namespace) });
return true;
},
// Zilliz requires a dimension aspect for collection creation
Expand All @@ -94,7 +100,7 @@ const Zilliz = {
);

await client.createCollection({
collection_name: namespace,
collection_name: this.normalize(namespace),
fields: [
{
name: "id",
Expand All @@ -117,13 +123,13 @@ const Zilliz = {
],
});
await client.createIndex({
collection_name: namespace,
collection_name: this.normalize(namespace),
field_name: "vector",
index_type: IndexType.AUTOINDEX,
metric_type: MetricType.COSINE,
});
await client.loadCollectionSync({
collection_name: namespace,
collection_name: this.normalize(namespace),
});
}
},
Expand Down Expand Up @@ -156,7 +162,7 @@ const Zilliz = {
return { id, vector: chunk.values, metadata: chunk.metadata };
});
const insertResult = await client.insert({
collection_name: namespace,
collection_name: this.normalize(namespace),
data: newChunks,
});

Expand All @@ -167,7 +173,9 @@ const Zilliz = {
}
}
await DocumentVectors.bulkInsert(documentVectors);
await client.flushSync({ collection_names: [namespace] });
await client.flushSync({
collection_names: [this.normalize(namespace)],
});
return { vectorized: true, error: null };
}

Expand Down Expand Up @@ -213,7 +221,7 @@ const Zilliz = {
for (const chunk of toChunks(vectors, 100)) {
chunks.push(chunk);
const insertResult = await client.insert({
collection_name: namespace,
collection_name: this.normalize(namespace),
data: chunk.map((item) => ({
id: item.id,
vector: item.values,
Expand All @@ -228,7 +236,9 @@ const Zilliz = {
}
}
await storeVectorResult(chunks, fullFilePath);
await client.flushSync({ collection_names: [namespace] });
await client.flushSync({
collection_names: [this.normalize(namespace)],
});
}

await DocumentVectors.bulkInsert(documentVectors);
Expand All @@ -248,7 +258,7 @@ const Zilliz = {
const vectorIds = knownDocuments.map((doc) => doc.vectorId);
const queryIn = vectorIds.map((v) => `'${v}'`).join(",");
await client.deleteEntities({
collection_name: namespace,
collection_name: this.normalize(namespace),
expr: `id in [${queryIn}]`,
});

Expand All @@ -258,7 +268,7 @@ const Zilliz = {
// Even after flushing Zilliz can take some time to re-calc the count
// so all we can hope to do is flushSync so that the count can be correct
// on a later call.
await client.flushSync({ collection_names: [namespace] });
await client.flushSync({ collection_names: [this.normalize(namespace)] });
return true;
},
performSimilaritySearch: async function ({
Expand Down Expand Up @@ -311,7 +321,7 @@ const Zilliz = {
scores: [],
};
const response = await client.search({
collection_name: namespace,
collection_name: this.normalize(namespace),
vectors: queryVector,
limit: topN,
});
Expand Down