From 7090f42ad922af68cf8c3d14cdb17c60b1d071be Mon Sep 17 00:00:00 2001 From: Shuyou Date: Tue, 9 Jan 2024 19:33:07 +0800 Subject: [PATCH 1/3] issue #543 support milvus vector db --- README.md | 1 + .../MilvusDBOptions/index.jsx | 38 ++ frontend/src/media/vectordbs/milvus.png | Bin 0 -> 4250 bytes .../GeneralSettings/VectorDatabase/index.jsx | 9 + server/.env.example | 5 + server/models/systemSettings.js | 6 + server/package.json | 3 +- server/utils/helpers/index.js | 3 + server/utils/helpers/updateENV.js | 19 +- .../vectorDbProviders/milvus/MILVUS_SETUP.md | 39 ++ .../utils/vectorDbProviders/milvus/index.js | 336 ++++++++++++++++++ server/yarn.lock | 223 +++++++++++- 12 files changed, 678 insertions(+), 4 deletions(-) create mode 100644 frontend/src/components/VectorDBSelection/MilvusDBOptions/index.jsx create mode 100644 frontend/src/media/vectordbs/milvus.png create mode 100644 server/utils/vectorDbProviders/milvus/MILVUS_SETUP.md create mode 100644 server/utils/vectorDbProviders/milvus/index.js diff --git a/README.md b/README.md index 5af9278b48d..4249c42bcf8 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,7 @@ Some cool features of AnythingLLM - [Chroma](https://trychroma.com) - [Weaviate](https://weaviate.io) - [QDrant](https://qdrant.tech) +- [Milvus](https://milvus.io) ### Technical Overview diff --git a/frontend/src/components/VectorDBSelection/MilvusDBOptions/index.jsx b/frontend/src/components/VectorDBSelection/MilvusDBOptions/index.jsx new file mode 100644 index 00000000000..d2e6431f724 --- /dev/null +++ b/frontend/src/components/VectorDBSelection/MilvusDBOptions/index.jsx @@ -0,0 +1,38 @@ +export default function MilvusDBOptions({ settings }) { + return ( +
+
+
+ + +
+ +
+ + +
+
+
+ ); +} diff --git a/frontend/src/media/vectordbs/milvus.png b/frontend/src/media/vectordbs/milvus.png new file mode 100644 index 0000000000000000000000000000000000000000..e1bcbbd105480ebe9bb6d111823f9e5add9a466e GIT binary patch literal 4250 zcmds5`#;m|8%HRo6rr#biky0+;h~Wt=Cm4?93pc*mo_n!3Q>fSIgHB0WHgLyjCw?p z&0)^RBx6n$Lo~iy&-eRJe1Ev_`*poO_v^av>w3Sh!~IFVY;P$cDlf{%$0q@{GKX^4 z@4T^3m^+&|&1Z5Kk#MVP5qx~ICwPM&3{~ar;E#Y>Uf`>wDb8~RK|eD)Gd@1zQ?cJS zg!uRloCcelIo{!48jq@Q>X&K%A>k*s z8$R|rUz?IQk~fq&a=0S(ChU+#mVl^+fauF(_wV_nN2P!HQPJVjnWzwtqJfyV24W}a zrka3z8_TGLo#hEXNhj*gCB7+WKVOTs{NeB?!orHh@%-)no-MlncOr2`$oqX82+9o2 zFhZdjktCf79mG;7-gw=5+<~|<=|D(f$ZrjdZHD8Qf~=QdR|8jsp{M{*b@aW9s((#6 zXIC{JynNQRv&7#0G`qtIS{jy4j`mY8S1AXB8mJ79+N|g}Jv@8!$+H0N_QN6t)!824 zrUhxd^o}ONGV0@5fOS$to>ddEiLoQG=gdPvVMUdA={o;>hNTI5Yx5Rf=3Tvpd;c&yX)O*UxT)eENFeYOf z?37>7$&tevq;(R`1lh>wOA}WYkm8~$m^RmifbOMdxUk^kW#z1)J8p9Lt2tnTRJ!n= z@3g+56VR}2XIA0GwesP`F!(r#G4~gq%{F!I9S$u)kWm3m152ojS2%UHETyMnR{SF3 zhL;4CTCLpwiK!p(B9s{s3mN>Wxc!U~r;Szpo>(ey|4&)B7pCquQzwjZBhGQNz67#@ z20@o2LntvRuVM5bIiz;~j|(>7i>1({yN94o4jW&0suLKl#?~QDGctAyNHB-842ko{ zGqOciAUpVFy}0ruMxg3VGb=7Zr3~9+jYeZumGR(aL!>|(B+sU)=>Y>ty6|eTcbL_F z+n`^i6Q?tJP}GZ_yuw)7PD|q#I>z&sC(s4GF_4wHff<1+4PJB3cVG7+rDdEhQe*d% zUL4b3i>;22f|+O4*j=L)tZpP!$xt8n9%&Z)iVj~oWSf6tg7BEMbV&$B4X&9= zb&!A1{37Tzy+E6!JS(=n2!_(btLt*FA$VwS)12E#aAr{6f8W@H@*?FbISpU&3=vKGcJM&7UkSp))> zeT|Osl+H*SF1cF_W`Sc3Ur4K&40<=g{Mw5=0MLH7NRWi73rmK88z_?%y)`a?m%J^`&t-dhF~>EsWrYI+MBqDDkr zTJq8_#6sS2Sz~_Ot}R}yKEcXpzT#Iw95hIFGx@n=M$0|;iJTTm|DXFIr=pT(Y5H<^ zUbcdaj7`pz4F7nWH=j3?OTN^g}cx;2OfhAikiTIlbE{@*dHb6 zmPpsk`DF|a^0cv%z#x*}mv_$qifg{sI;>>LO_G1z(?YOHwBYrYizWnMvy z^!;T%c;Inr6U8<=(6l{?{4kTpj0VWm56zODz)&lTUP{XKL9`YP18|!!hPTffolM>0 z_wLPsLQux$y_c^(A{s6EO4^Ax|wNc@Qb}5h{kE`b5g64K5}i17z`Z!QUS+m(XRJHnc%}hD9AFS1 zvVM*`eYk#+V-jrp8>H5AiDwgd>L&TMkGVA^rINu+Ef#YdEeHx~*lDu403O0CMQJr$ zviOaYGB7JPF{Swgy;dV^FYk`q51w;V3BjO;1GtRQsu2ke!nA9nje^arK4?c<(wjt{ zMs=Na7i%mzmz9;newdySU4MmIFFx{jw;e+1C+l38X+iPE;ZYTI8U4kmqPKQEr>QP+ zg4RIQ`x2ZEh7lQ4pT*iVG$$dYY(6#XN2zcl{0zF~ z>s70!s4G?psZo}tXSr^o(7fUbnTP4G%lzDPVY-mbH-e!GFYyYiw%U~W_ps-P9%cvg ztVk}8J3c4lm$Y8v*bMZ}<5+=zWI<2ncQ^J;sVGKj>^*-~sb= zCqmw5wdbi6F`J*bKRyIcy^B^_^ng7yw;N<;Av--L6JMw=GOQ_Ze@HSRK_3C%5ssBv z<-eOc(vvy3IIIvU!=};#G6|pmYw*TGu6abgRcu&<)k6ZdX$pkjHM|nIn}r^Ni2{2a zWa^*!3*_{R5-Y|5UdbSo`xR#x;I(FZb)9iCH`9083K(>^MiWaJHEUxtQLY!1VGFCD zl$uo#DHD8?1a1@|VuNad%?*CK*(pca7qo1=3MB@fM?dn3_zXFZ%xrH(m()FJxUZ$n zOPaGtmEqJMRddD@Q;{XKkwa29f&!YW>c4@il8Xi~n`>=Q4_9gHlyOWqCGJt{?0wTm zcXVpI00t?)6ci92oaHz(XPr#(z_i@o0e`GRDO-x!O%I}MK(6hrdQu}ELEzu3e?xci zHGQ#CR8!>n;>($|0a6P|dHE-?;ax06(_5X^SE(W z=QfBVS~P#0?S{1|M0ZDcxju-!PFr02fw-V--+gV_%t*HIw~x9=PVGLTR_{zIIgyNN zB_b;!`!hE~fL}cN8ghJr_r-YGAwT~}Fmmez7P#|83ooWZ^IfJ#yC3nc=?j}XtiBvU zb5mifrsJdSZ#VS0Td#xP2RcFsqQruOtCM<i$Vm-^bMhc zs2b-7x5^@H<`#rYO+IJ3nPPfHR0D|2H#Tv{Nbaq`ZV#4P0SsNT{5irPlS9!I28SGs z2zCj3=}0+VoEpb*V4t!a1CA%j^ODbUoF|-jpNcPh016G_zqEGAPCTDo_`GH+p>n7? zB4M|rrUy)5+mCXhUpZ4SV4r%uI+(bu~nE1SzA>pu#=b|Kd>X70ELD|&4 zLcC_3iIw4Sutge3&7`u0+1iQl>2Aq|=6q{_mJJ2(F1YJ%yc=1JOF9J&5ptup6&*uZ zbsl)kLDh+c#2Db0lqXa(oOoR6P<=zxr9YLrPV~`AE;0tQ@Iqu;{d)H!pz4PvH^XN_6Ai61LsdRRm)pCFEc zMe4G3!sYsOu-GPKJVS)+6=0J=>#ZiW++ET@c*biMog|>6$<&=IcRLbmGXvqU7z18G z^_CtQ!VG0@{_}9aUO_>6bYy;6gfQe%LGfFR8%aw6kZYC`1q!GD!lfw@kHNI<~um5j}9&(nF99Vtfr`CwvvdYH}j6-9kO@`qK zaU666EIbxh#Eo-13b{Sh*3bpLerG&q%>8v&q0+(Ci7_{Qre|p~E#=qNer_?x*z-sI zX<+OG6kLyZQ!o>d&A*8!Mruu<=#dXuPT%r2pEaVr3JtxO_fN~K_lIV;#`BU(woZK4 zNk9#Dy^awnWk3BY@0jEq79!Vv?xM!);H5h3C?N7fKH+y>w>wLyOW=P!l0yDCN%eZI zwrY7E)ncO5&r718)6y{}=MmGlW?R)c3wC#z;%d>)*{WTxU*94tYVhDJfq2#&XVP%L zPGPfd(BUCnKfpqTkp*=C^3Nd5vPN`pl+{Bl1w<)g$eV+;8iVh_Rm0OrIOZr?@k!`f7g3nhe#1Tyih>+FWvGx<6Iwba8v*4!cjF#;i z@M#l0TwKBT#kHNa!BO$kJ;TGRdDbDB-#@7uBl=8k{0=E`w7Kp|e3s?A#cr+$`hfC| zSzHm~vYFQr$bQ{w!df7X@;q3Zn2N1mSR{@=@=V^so79tl zQ?FGf+?29oZ@uC4uCrB+a4qJ(B}xg=`u+5^wU1W^g%!`s*^T6^tz|xWJoegz~Klbqa szbCDSd*`!UH%f5DJ^xSQ&@SJYucTO~g+l`OR{|f{!rr{{g6IAJ04Mq>`v3p{ literal 0 HcmV?d00001 diff --git a/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx b/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx index 9ef9cff2dd9..f49054b900a 100644 --- a/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx +++ b/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx @@ -8,6 +8,7 @@ import PineconeLogo from "@/media/vectordbs/pinecone.png"; import LanceDbLogo from "@/media/vectordbs/lancedb.png"; import WeaviateLogo from "@/media/vectordbs/weaviate.png"; import QDrantLogo from "@/media/vectordbs/qdrant.png"; +import MilvusLogo from "@/media/vectordbs/milvus.png"; import PreLoader from "@/components/Preloader"; import ChangeWarningModal from "@/components/ChangeWarning"; import { MagnifyingGlass } from "@phosphor-icons/react"; @@ -17,6 +18,7 @@ import PineconeDBOptions from "@/components/VectorDBSelection/PineconeDBOptions" import QDrantDBOptions from "@/components/VectorDBSelection/QDrantDBOptions"; import WeaviateDBOptions from "@/components/VectorDBSelection/WeaviateDBOptions"; import VectorDBItem from "@/components/VectorDBSelection/VectorDBItem"; +import MilvusDBOptions from "@/components/VectorDBSelection/MilvusDBOptions"; export default function GeneralVectorDatabase() { const [saving, setSaving] = useState(false); @@ -79,6 +81,13 @@ export default function GeneralVectorDatabase() { description: "Open source local and cloud hosted multi-modal vector database.", }, + { + name: "Milvus", + value: "milvus", + logo: MilvusLogo, + options: , + description: "Open-source, highly scalable, and blazing fast.", + }, ]; const updateVectorChoice = (selection) => { diff --git a/server/.env.example b/server/.env.example index e41ab63d077..c92c31903c8 100644 --- a/server/.env.example +++ b/server/.env.example @@ -86,6 +86,11 @@ VECTOR_DB="lancedb" # QDRANT_ENDPOINT="http://localhost:6333" # QDRANT_API_KEY= +# Enable all below if you are using vector database: Milvus. +# VECTOR_DB="milvus" +# MILVUS_ENDPOINT="http://localhost:19530" +# MILVUS_USERNAME= +# MILVUS_PASSWORD= # CLOUD DEPLOYMENT VARIRABLES ONLY # AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting. diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index 29c2238ff1b..3c4608f9da6 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -56,6 +56,12 @@ const SystemSettings = { QdrantApiKey: process.env.QDRANT_API_KEY, } : {}), + ...(vectorDB === "milvus" + ? { + MilvusAddress: process.env.MILVUS_ADDRESS, + MilvusToken: process.env.MILVUS_TOKEN, + } + : {}), LLMProvider: llmProvider, ...(llmProvider === "openai" ? { diff --git a/server/package.json b/server/package.json index 0e2d909c8c3..69cb790c38d 100644 --- a/server/package.json +++ b/server/package.json @@ -28,6 +28,7 @@ "@prisma/client": "5.3.0", "@qdrant/js-client-rest": "^1.4.0", "@xenova/transformers": "^2.10.0", + "@zilliz/milvus2-sdk-node": "^2.3.5", "archiver": "^5.3.1", "bcrypt": "^5.1.0", "body-parser": "^1.20.2", @@ -77,4 +78,4 @@ "nodemon": "^2.0.22", "prettier": "^3.0.3" } -} \ No newline at end of file +} diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index ac702936214..1685acc1a0e 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -16,6 +16,9 @@ function getVectorDbClass() { case "qdrant": const { QDrant } = require("../vectorDbProviders/qdrant"); return QDrant; + case "milvus": + const { Milvus } = require("../vectorDbProviders/milvus"); + return Milvus; default: throw new Error("ENV: No VECTOR_DB value found in environment!"); } diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index e6e97df5f2f..2d6aa545709 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -170,6 +170,16 @@ const KEY_MAPPING = { checks: [], }, + // Milvus Options + MilvusAddress: { + envKey: "MILVUS_ADDRESS", + checks: [isValidURL, validDockerizedUrl], + }, + MilvusToken: { + envKey: "MILVUS_TOKEN", + checks: [], + }, + // Together Ai Options TogetherAiApiKey: { envKey: "TOGETHER_AI_API_KEY", @@ -279,7 +289,14 @@ function supportedEmbeddingModel(input = "") { } function supportedVectorDB(input = "") { - const supported = ["chroma", "pinecone", "lancedb", "weaviate", "qdrant"]; + const supported = [ + "chroma", + "pinecone", + "lancedb", + "weaviate", + "qdrant", + "milvus", + ]; return supported.includes(input) ? null : `Invalid VectorDB type. Must be one of ${supported.join(", ")}.`; diff --git a/server/utils/vectorDbProviders/milvus/MILVUS_SETUP.md b/server/utils/vectorDbProviders/milvus/MILVUS_SETUP.md new file mode 100644 index 00000000000..b7fb43b433c --- /dev/null +++ b/server/utils/vectorDbProviders/milvus/MILVUS_SETUP.md @@ -0,0 +1,39 @@ +# How to setup a local (or remote) Milvus Vector Database + +[Official Milvus Docs](https://milvus.io/docs/example_code.md) for reference. + +### How to get started + +**Requirements** + +Choose one of the following + +- Cloud + + - [Cloud account](https://cloud.zilliz.com/) + +- Local + - Docker + - `git` available in your CLI/terminal + +**Instructions** + +- Cloud + + - Create a Cluster on your cloud account + - Get connect Public Endpoint and Token + - Set .env.development variable in server + +- Local + - Download yaml file `wget https://github.com/milvus-io/milvus/releases/download/v2.3.4/milvus-standalone-docker-compose.yml -O docker-compose.yml` + - Start Milvus `sudo docker compose up -d` + - Check the containers are up and running `sudo docker compose ps` + - Get port number and set .env.development variable in server + +eg: `server/.env.development` + +``` +VECTOR_DB="milvus" +MILVUS_ENDPOINT="http://localhost:19530" +MILVUS_TOKEN="{username}:{password}" +``` diff --git a/server/utils/vectorDbProviders/milvus/index.js b/server/utils/vectorDbProviders/milvus/index.js new file mode 100644 index 00000000000..0fc6d4210f5 --- /dev/null +++ b/server/utils/vectorDbProviders/milvus/index.js @@ -0,0 +1,336 @@ +const { + DataType, + MetricType, + IndexType, + OrmClient, +} = require("@zilliz/milvus2-sdk-node"); +const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter"); +const { v4: uuidv4 } = require("uuid"); +const { storeVectorResult, cachedVectorInformation } = require("../../files"); +const { + toChunks, + getLLMProvider, + getEmbeddingEngineSelection, +} = require("../../helpers"); + +let client = null; + +const Milvus = { + name: "Milvus", + connect: async function () { + if (process.env.VECTOR_DB !== "milvus") + throw new Error("Milvus::Invalid ENV settings"); + + if (!client) { + client = new OrmClient({ + address: process.env.MILVUS_ENDPOINT, + token: process.env.MILVUS_TOKEN, + }); + } + + const { isHealthy } = await client.checkHealth(); + if (!isHealthy) + throw new Error( + "MilvusDB::Invalid Heartbeat received - is the instance online?" + ); + + return { client }; + }, + heartbeat: async function () { + await this.connect(); + return { heartbeat: Number(new Date()) }; + }, + totalVectors: async function () { + const { client } = await this.connect(); + const collections = await client.collections(); + const total = collections.reduce(async (acc, collection) => { + const count = await collection.count(); + return acc + count; + }, 0); + return total; + }, + namespaceCount: async function (_namespace = null) { + const { client } = await this.connect(); + const namespace = await this.namespace(client, _namespace); + return namespace?.count() ?? 0; + }, + namespace: async function (client, namespace = null) { + if (!namespace) throw new Error("No namespace value provided."); + const collections = await client + .collections({ collection_names: [namespace] }) + .catch(() => []); + return collections?.[0]; + }, + hasNamespace: async function (namespace = null) { + if (!namespace) return false; + const { client } = await this.connect(); + return await this.namespaceExists(client, namespace); + }, + namespaceExists: async function (client, namespace = null) { + if (!namespace) throw new Error("No namespace value provided."); + const { value } = await client + .hasCollection({ collection_name: namespace }) + .catch((e) => { + console.error("MilvusDB::namespaceExists", e.message); + return { value: null }; + }); + return !!value; + }, + deleteVectorsInNamespace: async function (client, namespace = null) { + await client.dropCollection({ collection_name: namespace }); + return true; + }, + getOrCreateCollection: async function (client, namespace) { + const isExists = await this.namespaceExists(client, namespace); + if (!isExists) { + await client.createCollection({ + collection_name: namespace, + fields: [ + { + name: "id", + description: "id", + data_type: DataType.VarChar, + max_length: 255, + is_primary_key: true, + }, + { + name: "vector", + description: "vector", + data_type: DataType.FloatVector, + dim: 1536, + }, + { + name: "metadata", + decription: "metadata", + data_type: DataType.JSON, + }, + ], + }); + await client.createIndex({ + collection_name: namespace, + field_name: "vector", + index_type: IndexType.AUTOINDEX, + metric_type: MetricType.COSINE, + }); + await client.loadCollectionSync({ + collection_name: namespace, + }); + } + }, + addDocumentToNamespace: async function ( + namespace, + documentData = {}, + fullFilePath = null + ) { + const { DocumentVectors } = require("../../../models/vectors"); + try { + const { pageContent, docId, ...metadata } = documentData; + if (!pageContent || pageContent.length == 0) return false; + + console.log("Adding new vectorized document into namespace", namespace); + const cacheResult = await cachedVectorInformation(fullFilePath); + if (cacheResult.exists) { + const { client } = await this.connect(); + await this.getOrCreateCollection(client, namespace); + + const { chunks } = cacheResult; + const documentVectors = []; + + for (const chunk of chunks) { + // Before sending to Pinecone and saving the records to our db + // we need to assign the id of each chunk that is stored in the cached file. + const newChunks = chunk.map((chunk) => { + const id = uuidv4(); + documentVectors.push({ docId, vectorId: id }); + return { id, vector: chunk.values, metadata: chunk.metadata }; + }); + await client + .insert({ + collection_name: namespace, + data: newChunks, + }) + .catch((e) => { + console.error("MilvusDB::addDocumentToNamespace", e.message); + }); + } + await DocumentVectors.bulkInsert(documentVectors); + return true; + } + + // If we are here then we are going to embed and store a novel document. + // We have to do this manually as opposed to using LangChains `Chroma.fromDocuments` + // because we then cannot atomically control our namespace to granularly find/remove documents + // from vectordb. + const textSplitter = new RecursiveCharacterTextSplitter({ + chunkSize: + getEmbeddingEngineSelection()?.embeddingMaxChunkLength || 1_000, + chunkOverlap: 20, + }); + const textChunks = await textSplitter.splitText(pageContent); + + console.log("Chunks created from document:", textChunks.length); + const LLMConnector = getLLMProvider(); + const documentVectors = []; + const vectors = []; + const vectorValues = await LLMConnector.embedChunks(textChunks); + + if (!!vectorValues && vectorValues.length > 0) { + for (const [i, vector] of vectorValues.entries()) { + const vectorRecord = { + id: uuidv4(), + values: vector, + // [DO NOT REMOVE] + // LangChain will be unable to find your text if you embed manually and dont include the `text` key. + metadata: { ...metadata, text: textChunks[i] }, + }; + + vectors.push(vectorRecord); + documentVectors.push({ docId, vectorId: vectorRecord.id }); + } + } else { + throw new Error( + "Could not embed document chunks! This document will not be recorded." + ); + } + + if (vectors.length > 0) { + const chunks = []; + const { client } = await this.connect(); + console.log("Inserting vectorized chunks into Milvus."); + for (const chunk of toChunks(vectors, 100)) { + chunks.push(chunk); + await client.upsert({ + collection_name: namespace, + data: chunk.map((item) => ({ + id: item.id, + vector: item.values, + metadata: chunk.metadata, + })), + }); + } + await storeVectorResult(chunks, fullFilePath); + } + + await DocumentVectors.bulkInsert(documentVectors); + return true; + } catch (e) { + console.error(e); + console.error("addDocumentToNamespace", e.message); + return false; + } + }, + deleteDocumentFromNamespace: async function (namespace, docId) { + const { DocumentVectors } = require("../../../models/vectors"); + const { client } = await this.connect(); + if (!(await this.namespaceExists(client, namespace))) return; + const knownDocuments = await DocumentVectors.where({ docId }); + if (knownDocuments.length === 0) return; + + const vectorIds = knownDocuments.map((doc) => doc.vectorId); + await client.delete({ collection_name: namespace, ids: vectorIds }); + + const indexes = knownDocuments.map((doc) => doc.id); + await DocumentVectors.deleteIds(indexes); + return true; + }, + performSimilaritySearch: async function ({ + namespace = null, + input = "", + LLMConnector = null, + similarityThreshold = 0.25, + }) { + if (!namespace || !input || !LLMConnector) + throw new Error("Invalid request to performSimilaritySearch."); + + const { client } = await this.connect(); + if (!(await this.namespaceExists(client, namespace))) { + return { + contextTexts: [], + sources: [], + message: "Invalid query - no documents found for workspace!", + }; + } + + const queryVector = await LLMConnector.embedTextInput(input); + const { contextTexts, sourceDocuments } = await this.similarityResponse( + client, + namespace, + queryVector, + similarityThreshold + ); + + const sources = sourceDocuments.map((metadata, i) => { + return { ...metadata, text: contextTexts[i] }; + }); + return { + contextTexts, + sources: this.curateSources(sources), + message: false, + }; + }, + similarityResponse: async function ( + client, + namespace, + queryVector, + similarityThreshold = 0.25 + ) { + const result = { + contextTexts: [], + sourceDocuments: [], + scores: [], + }; + const response = await client.search({ + collection_name: namespace, + vectors: queryVector, + }); + response.results.forEach((match) => { + if (match.score < similarityThreshold) return; + result.contextTexts.push(match.metadata.text); + result.sourceDocuments.push(match); + result.scores.push(match.score); + }); + return result; + }, + "namespace-stats": async function (reqBody = {}) { + const { namespace = null } = reqBody; + if (!namespace) throw new Error("namespace required"); + const { client } = await this.connect(); + if (!(await this.namespaceExists(client, namespace))) + throw new Error("Namespace by that name does not exist."); + const stats = await this.namespace(client, namespace); + return stats + ? stats + : { message: "No stats were able to be fetched from DB for namespace" }; + }, + "delete-namespace": async function (reqBody = {}) { + const { namespace = null } = reqBody; + const { client } = await this.connect(); + if (!(await this.namespaceExists(client, namespace))) + throw new Error("Namespace by that name does not exist."); + + const details = await this.namespace(client, namespace); + await this.deleteVectorsInNamespace(client, namespace); + const vectorCount = await details?.count(); + return { + message: `Namespace ${namespace} was deleted along with ${vectorCount} vectors.`, + }; + }, + curateSources: function (sources = []) { + const documents = []; + for (const source of sources) { + const { metadata = {} } = source; + if (Object.keys(metadata).length > 0) { + documents.push({ + ...metadata, + ...(source.hasOwnProperty("pageContent") + ? { text: source.pageContent } + : {}), + }); + } + } + + return documents; + }, +}; + +module.exports.Milvus = Milvus; diff --git a/server/yarn.lock b/server/yarn.lock index 6215bf01fd7..175a67947d1 100644 --- a/server/yarn.lock +++ b/server/yarn.lock @@ -160,6 +160,20 @@ "@azure/logger" "^1.0.3" tslib "^2.4.0" +"@colors/colors@1.6.0", "@colors/colors@^1.6.0": + version "1.6.0" + resolved "https://registry.yarnpkg.com/@colors/colors/-/colors-1.6.0.tgz#ec6cd237440700bc23ca23087f513c75508958b0" + integrity sha512-Ir+AOibqzrIsL6ajt3Rz3LskB7OiMVHqltZmspbW/TJuTVuyOMirVqAkjfY6JISiLHgyNqicAC8AyHHGzNd/dA== + +"@dabh/diagnostics@^2.0.2": + version "2.0.3" + resolved "https://registry.yarnpkg.com/@dabh/diagnostics/-/diagnostics-2.0.3.tgz#7f7e97ee9a725dffc7808d93668cc984e1dc477a" + integrity sha512-hrlQOIi7hAfzsMqlGSFyVucrx38O+j6wiGOf//H2ecvIEqYN4ADBSS2iLMh5UFyDunCNniUIPk/q3riFv45xRA== + dependencies: + colorspace "1.1.x" + enabled "2.0.x" + kuler "^2.0.0" + "@eslint-community/eslint-utils@^4.2.0": version "4.4.0" resolved "https://registry.yarnpkg.com/@eslint-community/eslint-utils/-/eslint-utils-4.4.0.tgz#a23514e8fb9af1269d5f7788aa556798d61c6b59" @@ -214,6 +228,35 @@ resolved "https://registry.yarnpkg.com/@graphql-typed-document-node/core/-/core-3.2.0.tgz#5f3d96ec6b2354ad6d8a28bf216a1d97b5426861" integrity sha512-mB9oAsNCm9aM3/SOv4YtBMqZbYj10R7dkq8byBqxGY/ncFwhf2oQzMV+LCRlWoDSEBJ3COiR1yeDvMtsoOsuFQ== +"@grpc/grpc-js@1.8.17": + version "1.8.17" + resolved "https://registry.yarnpkg.com/@grpc/grpc-js/-/grpc-js-1.8.17.tgz#a3a2f826fc033eae7d2f5ee41e0ab39cee948838" + integrity sha512-DGuSbtMFbaRsyffMf+VEkVu8HkSXEUfO3UyGJNtqxW9ABdtTIA+2UXAJpwbJS+xfQxuwqLUeELmL6FuZkOqPxw== + dependencies: + "@grpc/proto-loader" "^0.7.0" + "@types/node" ">=12.12.47" + +"@grpc/proto-loader@0.7.7": + version "0.7.7" + resolved "https://registry.yarnpkg.com/@grpc/proto-loader/-/proto-loader-0.7.7.tgz#d33677a77eea8407f7c66e2abd97589b60eb4b21" + integrity sha512-1TIeXOi8TuSCQprPItwoMymZXxWT0CPxUhkrkeCUH+D8U7QDwQ6b7SUz2MaLuWM2llT+J/TVFLmQI5KtML3BhQ== + dependencies: + "@types/long" "^4.0.1" + lodash.camelcase "^4.3.0" + long "^4.0.0" + protobufjs "^7.0.0" + yargs "^17.7.2" + +"@grpc/proto-loader@^0.7.0": + version "0.7.10" + resolved "https://registry.yarnpkg.com/@grpc/proto-loader/-/proto-loader-0.7.10.tgz#6bf26742b1b54d0a473067743da5d3189d06d720" + integrity sha512-CAqDfoaQ8ykFd9zqBDn4k6iWT9loLAlc2ETmDFS9JCD70gDcnA4L3AFEo2iV7KyAtAAHFW9ftq1Fz+Vsgq80RQ== + dependencies: + lodash.camelcase "^4.3.0" + long "^5.0.0" + protobufjs "^7.2.4" + yargs "^17.7.2" + "@hapi/hoek@^9.0.0": version "9.3.0" resolved "https://registry.yarnpkg.com/@hapi/hoek/-/hoek-9.3.0.tgz#8368869dcb735be2e7f5cb7647de78e167a251fb" @@ -755,6 +798,13 @@ resolved "https://registry.yarnpkg.com/@types/node/-/node-18.14.5.tgz#4a13a6445862159303fc38586598a9396fc408b3" integrity sha512-CRT4tMK/DHYhw1fcCEBwME9CSaZNclxfzVMe7GsO6ULSwsttbj70wSiX6rZdIjGblu93sTJxLdhNIT85KKI7Qw== +"@types/node@>=12.12.47": + version "20.10.8" + resolved "https://registry.yarnpkg.com/@types/node/-/node-20.10.8.tgz#f1e223cbde9e25696661d167a5b93a9b2a5d57c7" + integrity sha512-f8nQs3cLxbAFc00vEU59yf9UyGUftkPaLGfvbVOIDdx2i1b8epBqj2aNGyP19fiyXWvlmZ7qC1XLjAzw/OKIeA== + dependencies: + undici-types "~5.26.4" + "@types/node@>=13.7.0": version "20.10.3" resolved "https://registry.yarnpkg.com/@types/node/-/node-20.10.3.tgz#4900adcc7fc189d5af5bb41da8f543cea6962030" @@ -779,6 +829,11 @@ resolved "https://registry.yarnpkg.com/@types/retry/-/retry-0.12.0.tgz#2b35eccfcee7d38cd72ad99232fbd58bffb3c84d" integrity sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA== +"@types/triple-beam@^1.3.2": + version "1.3.5" + resolved "https://registry.yarnpkg.com/@types/triple-beam/-/triple-beam-1.3.5.tgz#74fef9ffbaa198eb8b588be029f38b00299caa2c" + integrity sha512-6WaYesThRMCl19iryMYP7/x2OVgCtbIVflDGFpWnb9irXI3UjYE4AzmYuiUKY1AJstGijoY+MgUszMgRxIYTYw== + "@types/uuid@^9.0.1": version "9.0.7" resolved "https://registry.yarnpkg.com/@types/uuid/-/uuid-9.0.7.tgz#b14cebc75455eeeb160d5fe23c2fcc0c64f724d8" @@ -806,6 +861,18 @@ optionalDependencies: onnxruntime-node "1.14.0" +"@zilliz/milvus2-sdk-node@^2.3.5": + version "2.3.5" + resolved "https://registry.yarnpkg.com/@zilliz/milvus2-sdk-node/-/milvus2-sdk-node-2.3.5.tgz#6540bc03ebb99ab35f63e4eca7a1fd3ede2cf38c" + integrity sha512-bWbQnhvu+7jZXoqI+qySycwph3vloy0LDV54TBY4wRmu6HhMlqIqyIiI8sQNeSJFs8M1jHg1PlmhE/dvckA1bA== + dependencies: + "@grpc/grpc-js" "1.8.17" + "@grpc/proto-loader" "0.7.7" + dayjs "^1.11.7" + lru-cache "^9.1.2" + protobufjs "7.2.4" + winston "^3.9.0" + abbrev@1: version "1.1.1" resolved "https://registry.yarnpkg.com/abbrev/-/abbrev-1.1.1.tgz#f8f2c887ad10bf67f634f005b6987fed3179aac8" @@ -1487,7 +1554,7 @@ cmake-js@^7.2.1: which "^2.0.2" yargs "^17.6.0" -color-convert@^1.9.0: +color-convert@^1.9.0, color-convert@^1.9.3: version "1.9.3" resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-1.9.3.tgz#bb71850690e1f136567de629d2d5471deda4c1e8" integrity sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg== @@ -1511,7 +1578,7 @@ color-name@^1.0.0, color-name@~1.1.4: resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2" integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA== -color-string@^1.9.0: +color-string@^1.6.0, color-string@^1.9.0: version "1.9.1" resolved "https://registry.yarnpkg.com/color-string/-/color-string-1.9.1.tgz#4467f9146f036f855b764dfb5bf8582bf342c7a4" integrity sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg== @@ -1524,6 +1591,14 @@ color-support@^1.1.2, color-support@^1.1.3: resolved "https://registry.yarnpkg.com/color-support/-/color-support-1.1.3.tgz#93834379a1cc9a0c61f82f52f0d04322251bd5a2" integrity sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg== +color@^3.1.3: + version "3.2.1" + resolved "https://registry.yarnpkg.com/color/-/color-3.2.1.tgz#3544dc198caf4490c3ecc9a790b54fe9ff45e164" + integrity sha512-aBl7dZI9ENN6fUGC7mWpMTPNHmWUSNan9tuWN6ahh5ZLNk9baLJOnSMlrQkHcrfFgz2/RigjUVAjdx36VcemKA== + dependencies: + color-convert "^1.9.3" + color-string "^1.6.0" + color@^4.2.3: version "4.2.3" resolved "https://registry.yarnpkg.com/color/-/color-4.2.3.tgz#d781ecb5e57224ee43ea9627560107c0e0c6463a" @@ -1537,6 +1612,14 @@ colors@^1.4.0: resolved "https://registry.yarnpkg.com/colors/-/colors-1.4.0.tgz#c50491479d4c1bdaed2c9ced32cf7c7dc2360f78" integrity sha512-a+UqTh4kgZg/SlGvfbzDHpgRu7AAQOmmqRHJnxhRZICKFUT91brVhNNt58CMWU9PsBbv3PDCZUHbVxuDiH2mtA== +colorspace@1.1.x: + version "1.1.4" + resolved "https://registry.yarnpkg.com/colorspace/-/colorspace-1.1.4.tgz#8d442d1186152f60453bf8070cd66eb364e59243" + integrity sha512-BgvKJiuVu1igBUF2kEjRCZXol6wiiGbY5ipL/oVPwm0BL9sIpMIzM8IK7vwuxIIzOXMV3Ey5w+vxhm0rR/TN8w== + dependencies: + color "^3.1.3" + text-hex "1.0.x" + combined-stream@^1.0.8: version "1.0.8" resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f" @@ -1680,6 +1763,11 @@ crypt@0.0.2: resolved "https://registry.yarnpkg.com/crypt/-/crypt-0.0.2.tgz#88d7ff7ec0dfb86f713dc87bbb42d044d3e6c41b" integrity sha512-mCxBlsHFYh9C+HVpiEacem8FEBnMXgU9gy4zmNC+SXAZNB/1idgp/aulFJ4FgCi7GPEVbfyng092GqL2k2rmow== +dayjs@^1.11.7: + version "1.11.10" + resolved "https://registry.yarnpkg.com/dayjs/-/dayjs-1.11.10.tgz#68acea85317a6e164457d6d6947564029a6a16a0" + integrity sha512-vjAczensTgRcqDERK0SR2XMwsF/tSvnvlv6VcF2GIhg6Sx4yOIt/irsr1RDJsKiIyBzJDpCoXiWWq28MqH2cnQ== + debug@2.6.9: version "2.6.9" resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f" @@ -1835,6 +1923,11 @@ emoji-regex@^8.0.0: resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-8.0.0.tgz#e818fd69ce5ccfcb404594f842963bf53164cc37" integrity sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A== +enabled@2.0.x: + version "2.0.0" + resolved "https://registry.yarnpkg.com/enabled/-/enabled-2.0.0.tgz#f9dd92ec2d6f4bbc0d5d1e64e21d61cd4665e7c2" + integrity sha512-AKrN98kuwOzMIdAizXGI86UFBoo26CL21UM763y1h/GMSJ4/OHU9k2YlsmBpyScFo/wbLzWQJBMCW4+IO3/+OQ== + encode32@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/encode32/-/encode32-1.1.0.tgz#0c54b45fb314ad5502e3c230cb95acdc5e5cd1dd" @@ -2254,6 +2347,11 @@ fd-slicer@~1.1.0: dependencies: pend "~1.2.0" +fecha@^4.2.0: + version "4.2.3" + resolved "https://registry.yarnpkg.com/fecha/-/fecha-4.2.3.tgz#4d9ccdbc61e8629b259fdca67e65891448d569fd" + integrity sha512-OP2IUU6HeYKJi3i0z4A19kHMQoLVs4Hc+DPqqxI2h/DPZHTm/vjsfC6P0b4jCMy14XizLBqvndQ+UilD7707Jw== + file-entry-cache@^6.0.1: version "6.0.1" resolved "https://registry.yarnpkg.com/file-entry-cache/-/file-entry-cache-6.0.1.tgz#211b2dd9659cb0394b073e7323ac3c933d522027" @@ -2339,6 +2437,11 @@ flow-remove-types@^2.217.1: pirates "^3.0.2" vlq "^0.2.1" +fn.name@1.x.x: + version "1.1.0" + resolved "https://registry.yarnpkg.com/fn.name/-/fn.name-1.1.0.tgz#26cad8017967aea8731bc42961d04a3d5988accc" + integrity sha512-GRnmB5gPyJpAhTQdSZTSp9uaPSvl09KoYcMQtsB9rQoOmzs9dH6ffeccH+Z+cv6P68Hu5bC6JjRh4Ah/mHSNRw== + follow-redirects@^1.14.8, follow-redirects@^1.14.9: version "1.15.2" resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.15.2.tgz#b460864144ba63f2681096f274c4e57026da2c13" @@ -3344,6 +3447,11 @@ keyv@^4.5.3: dependencies: json-buffer "3.0.1" +kuler@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/kuler/-/kuler-2.0.0.tgz#e2c570a3800388fb44407e851531c1d670b061b3" + integrity sha512-Xq9nH7KlWZmXAtodXDDRE7vs6DU1gTU8zYDHDiWLSip45Egwq3plLHzPn27NgvzL2r1LMPC1vdqh98sQxtqj4A== + ky@^0.33.1: version "0.33.3" resolved "https://registry.yarnpkg.com/ky/-/ky-0.33.3.tgz#bf1ad322a3f2c3428c13cfa4b3af95e6c4a2f543" @@ -3500,11 +3608,28 @@ log-symbols@^5.1.0: chalk "^5.0.0" is-unicode-supported "^1.1.0" +logform@^2.3.2, logform@^2.4.0: + version "2.6.0" + resolved "https://registry.yarnpkg.com/logform/-/logform-2.6.0.tgz#8c82a983f05d6eaeb2d75e3decae7a768b2bf9b5" + integrity sha512-1ulHeNPp6k/LD8H91o7VYFBng5i1BDE7HoKxVbZiGFidS1Rj65qcywLxX+pVfAPoQJEjRdvKcusKwOupHCVOVQ== + dependencies: + "@colors/colors" "1.6.0" + "@types/triple-beam" "^1.3.2" + fecha "^4.2.0" + ms "^2.1.1" + safe-stable-stringify "^2.3.1" + triple-beam "^1.3.0" + long@^4.0.0: version "4.0.0" resolved "https://registry.yarnpkg.com/long/-/long-4.0.0.tgz#9a7b71cfb7d361a194ea555241c92f7468d5bf28" integrity sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA== +long@^5.0.0: + version "5.2.3" + resolved "https://registry.yarnpkg.com/long/-/long-5.2.3.tgz#a3ba97f3877cf1d778eccbcb048525ebb77499e1" + integrity sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q== + loose-envify@^1.4.0: version "1.4.0" resolved "https://registry.yarnpkg.com/loose-envify/-/loose-envify-1.4.0.tgz#71ee51fa7be4caec1a63839f7e682d8132d30caf" @@ -3524,6 +3649,11 @@ lru-cache@^6.0.0: dependencies: yallist "^4.0.0" +lru-cache@^9.1.2: + version "9.1.2" + resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-9.1.2.tgz#255fdbc14b75589d6d0e73644ca167a8db506835" + integrity sha512-ERJq3FOzJTxBbFjZ7iDs+NiK4VI9Wz+RdrrAB8dio1oV+YvdPzUEE4QNiT2VD51DkIbCYRUUzCRkssXCHqSnKQ== + make-dir@^3.1.0: version "3.1.0" resolved "https://registry.yarnpkg.com/make-dir/-/make-dir-3.1.0.tgz#415e967046b3a7f1d185277d84aa58203726a13f" @@ -4042,6 +4172,13 @@ once@^1.3.0, once@^1.3.1, once@^1.4.0: dependencies: wrappy "1" +one-time@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/one-time/-/one-time-1.0.0.tgz#e06bc174aed214ed58edede573b433bbf827cb45" + integrity sha512-5DXOiRKwuSEcQ/l0kGCF6Q3jcADFv5tSmRaJck/OqkVFcOzutB134KRSfF0xDrL39MNnqxbHBbUUcjZIhTgb2g== + dependencies: + fn.name "1.x.x" + onetime@^5.1.0: version "5.1.2" resolved "https://registry.yarnpkg.com/onetime/-/onetime-5.1.2.tgz#d0e96ebb56b07476df1dd9c4806e5237985ca45e" @@ -4334,6 +4471,24 @@ prop-types@^15.8.1: object-assign "^4.1.1" react-is "^16.13.1" +protobufjs@7.2.4: + version "7.2.4" + resolved "https://registry.yarnpkg.com/protobufjs/-/protobufjs-7.2.4.tgz#3fc1ec0cdc89dd91aef9ba6037ba07408485c3ae" + integrity sha512-AT+RJgD2sH8phPmCf7OUZR8xGdcJRga4+1cOaXJ64hvcSkVhNcRHOwIxUatPH15+nj59WAGTDv3LSGZPEQbJaQ== + dependencies: + "@protobufjs/aspromise" "^1.1.2" + "@protobufjs/base64" "^1.1.2" + "@protobufjs/codegen" "^2.0.4" + "@protobufjs/eventemitter" "^1.1.0" + "@protobufjs/fetch" "^1.1.0" + "@protobufjs/float" "^1.0.2" + "@protobufjs/inquire" "^1.1.0" + "@protobufjs/path" "^1.1.2" + "@protobufjs/pool" "^1.1.0" + "@protobufjs/utf8" "^1.1.0" + "@types/node" ">=13.7.0" + long "^5.0.0" + protobufjs@^6.8.8: version "6.11.4" resolved "https://registry.yarnpkg.com/protobufjs/-/protobufjs-6.11.4.tgz#29a412c38bf70d89e537b6d02d904a6f448173aa" @@ -4353,6 +4508,24 @@ protobufjs@^6.8.8: "@types/node" ">=13.7.0" long "^4.0.0" +protobufjs@^7.0.0, protobufjs@^7.2.4: + version "7.2.5" + resolved "https://registry.yarnpkg.com/protobufjs/-/protobufjs-7.2.5.tgz#45d5c57387a6d29a17aab6846dcc283f9b8e7f2d" + integrity sha512-gGXRSXvxQ7UiPgfw8gevrfRWcTlSbOFg+p/N+JVJEK5VhueL2miT6qTymqAmjr1Q5WbOCyJbyrk6JfWKwlFn6A== + dependencies: + "@protobufjs/aspromise" "^1.1.2" + "@protobufjs/base64" "^1.1.2" + "@protobufjs/codegen" "^2.0.4" + "@protobufjs/eventemitter" "^1.1.0" + "@protobufjs/fetch" "^1.1.0" + "@protobufjs/float" "^1.0.2" + "@protobufjs/inquire" "^1.1.0" + "@protobufjs/path" "^1.1.2" + "@protobufjs/pool" "^1.1.0" + "@protobufjs/utf8" "^1.1.0" + "@types/node" ">=13.7.0" + long "^5.0.0" + proxy-addr@~2.0.7: version "2.0.7" resolved "https://registry.yarnpkg.com/proxy-addr/-/proxy-addr-2.0.7.tgz#f19fe69ceab311eeb94b42e70e8c2070f9ba1025" @@ -4605,6 +4778,11 @@ safe-regex-test@^1.0.0: get-intrinsic "^1.1.3" is-regex "^1.1.4" +safe-stable-stringify@^2.3.1: + version "2.4.3" + resolved "https://registry.yarnpkg.com/safe-stable-stringify/-/safe-stable-stringify-2.4.3.tgz#138c84b6f6edb3db5f8ef3ef7115b8f55ccbf886" + integrity sha512-e2bDA2WJT0wxseVd4lsDP4+3ONX6HpMXQa1ZhFQ7SU+GjvORCmShbCMltrtIDfkYhVHrOcPtj+KhmDBdPdZD1g== + "safer-buffer@>= 2.1.2 < 3", "safer-buffer@>= 2.1.2 < 3.0.0": version "2.1.2" resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a" @@ -4835,6 +5013,11 @@ ssri@^8.0.0, ssri@^8.0.1: dependencies: minipass "^3.1.1" +stack-trace@0.0.x: + version "0.0.10" + resolved "https://registry.yarnpkg.com/stack-trace/-/stack-trace-0.0.10.tgz#547c70b347e8d32b4e108ea1a2a159e5fdde19c0" + integrity sha512-KGzahc7puUKkzyMt+IqAep+TVNbKP+k2Lmwhub39m1AsTSkaDutx56aDCo+HLDzf/D26BIHTJWNiTG1KAJiQCg== + statuses@2.0.1: version "2.0.1" resolved "https://registry.yarnpkg.com/statuses/-/statuses-2.0.1.tgz#55cb000ccf1d48728bd23c685a063998cf1a1b63" @@ -5078,6 +5261,11 @@ tar@^6.0.2, tar@^6.1.11, tar@^6.1.2: mkdirp "^1.0.3" yallist "^4.0.0" +text-hex@1.0.x: + version "1.0.0" + resolved "https://registry.yarnpkg.com/text-hex/-/text-hex-1.0.0.tgz#69dc9c1b17446ee79a92bf5b884bb4b9127506f5" + integrity sha512-uuVGNWzgJ4yhRaNSiubPY7OjISw4sw4E5Uv0wbjp+OzcbmVU/rsT8ujgcXJhn9ypzsgr5vlzpPqP+MBBKcGvbg== + text-table@^0.2.0: version "0.2.0" resolved "https://registry.yarnpkg.com/text-table/-/text-table-0.2.0.tgz#7f5ee823ae805207c00af2df4a84ec3fcfa570b4" @@ -5107,6 +5295,11 @@ tr46@~0.0.3: resolved "https://registry.yarnpkg.com/tr46/-/tr46-0.0.3.tgz#8184fd347dac9cdc185992f3a6622e14b9d9ab6a" integrity sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw== +triple-beam@^1.3.0: + version "1.4.1" + resolved "https://registry.yarnpkg.com/triple-beam/-/triple-beam-1.4.1.tgz#6fde70271dc6e5d73ca0c3b24e2d92afb7441984" + integrity sha512-aZbgViZrg1QNcG+LULa7nhZpJTZSLm/mXnHXnbAbjmN5aSa0y7V+wvv6+4WaBtpISJzThKy+PIPxc1Nq1EJ9mg== + tslib@^2.2.0, tslib@^2.4.0: version "2.6.1" resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.6.1.tgz#fd8c9a0ff42590b25703c0acb3de3d3f4ede0410" @@ -5448,6 +5641,32 @@ wide-align@^1.1.2, wide-align@^1.1.5: dependencies: string-width "^1.0.2 || 2 || 3 || 4" +winston-transport@^4.5.0: + version "4.6.0" + resolved "https://registry.yarnpkg.com/winston-transport/-/winston-transport-4.6.0.tgz#f1c1a665ad1b366df72199e27892721832a19e1b" + integrity sha512-wbBA9PbPAHxKiygo7ub7BYRiKxms0tpfU2ljtWzb3SjRjv5yl6Ozuy/TkXf00HTAt+Uylo3gSkNwzc4ME0wiIg== + dependencies: + logform "^2.3.2" + readable-stream "^3.6.0" + triple-beam "^1.3.0" + +winston@^3.9.0: + version "3.11.0" + resolved "https://registry.yarnpkg.com/winston/-/winston-3.11.0.tgz#2d50b0a695a2758bb1c95279f0a88e858163ed91" + integrity sha512-L3yR6/MzZAOl0DsysUXHVjOwv8mKZ71TrA/41EIduGpOOV5LQVodqN+QdQ6BS6PJ/RdIshZhq84P/fStEZkk7g== + dependencies: + "@colors/colors" "^1.6.0" + "@dabh/diagnostics" "^2.0.2" + async "^3.2.3" + is-stream "^2.0.0" + logform "^2.4.0" + one-time "^1.0.0" + readable-stream "^3.4.0" + safe-stable-stringify "^2.3.1" + stack-trace "0.0.x" + triple-beam "^1.3.0" + winston-transport "^4.5.0" + wordwrapjs@^4.0.0: version "4.0.1" resolved "https://registry.yarnpkg.com/wordwrapjs/-/wordwrapjs-4.0.1.tgz#d9790bccfb110a0fc7836b5ebce0937b37a8b98f" From dff0e7cc92c5f996ece5ea1a695e7d5cef1e7107 Mon Sep 17 00:00:00 2001 From: timothycarambat Date: Fri, 12 Jan 2024 13:16:40 -0800 Subject: [PATCH 2/3] migrate Milvus to use MilvusClient instead of ORM normalize env setup for docs/implementation feat: embedder model dimension added --- docker/.env.example | 6 ++ .../MilvusDBOptions/index.jsx | 24 ++++- .../Steps/DataHandling/index.jsx | 8 ++ .../Steps/VectorDatabaseConnection/index.jsx | 9 ++ server/.env.example | 2 +- server/models/systemSettings.js | 3 +- .../EmbeddingEngines/azureOpenAi/index.js | 1 + .../utils/EmbeddingEngines/localAi/index.js | 4 + server/utils/EmbeddingEngines/native/index.js | 1 + server/utils/EmbeddingEngines/openAi/index.js | 1 + server/utils/helpers/updateENV.js | 10 +- .../vectorDbProviders/milvus/MILVUS_SETUP.md | 5 +- .../utils/vectorDbProviders/milvus/index.js | 96 ++++++++++++------- .../utils/vectorDbProviders/qdrant/index.js | 8 +- 14 files changed, 131 insertions(+), 47 deletions(-) diff --git a/docker/.env.example b/docker/.env.example index 2f8b2ff35cd..5bd909af66b 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -89,6 +89,12 @@ GID='1000' # QDRANT_ENDPOINT="http://localhost:6333" # QDRANT_API_KEY= +# Enable all below if you are using vector database: Milvus. +# VECTOR_DB="milvus" +# MILVUS_ADDRESS="http://localhost:19530" +# MILVUS_USERNAME= +# MILVUS_PASSWORD= + # CLOUD DEPLOYMENT VARIRABLES ONLY # AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting. diff --git a/frontend/src/components/VectorDBSelection/MilvusDBOptions/index.jsx b/frontend/src/components/VectorDBSelection/MilvusDBOptions/index.jsx index d2e6431f724..07a0ef2f57e 100644 --- a/frontend/src/components/VectorDBSelection/MilvusDBOptions/index.jsx +++ b/frontend/src/components/VectorDBSelection/MilvusDBOptions/index.jsx @@ -10,7 +10,7 @@ export default function MilvusDBOptions({ settings }) { type="text" name="MilvusAddress" className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5" - placeholder="http://localhost:19539" + placeholder="http://localhost:19530" defaultValue={settings?.MilvusAddress} required={true} autoComplete="off" @@ -20,14 +20,28 @@ export default function MilvusDBOptions({ settings }) {
+ +
+
+ diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx index d9fea4c6219..281f1e8cdd9 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx @@ -14,6 +14,7 @@ import PineconeLogo from "@/media/vectordbs/pinecone.png"; import LanceDbLogo from "@/media/vectordbs/lancedb.png"; import WeaviateLogo from "@/media/vectordbs/weaviate.png"; import QDrantLogo from "@/media/vectordbs/qdrant.png"; +import MilvusLogo from "@/media/vectordbs/milvus.png"; import React, { useState, useEffect } from "react"; import paths from "@/utils/paths"; import { useNavigate } from "react-router-dom"; @@ -123,6 +124,13 @@ const VECTOR_DB_PRIVACY = { ], logo: WeaviateLogo, }, + milvus: { + name: "Milvus", + description: [ + "Your vectors and document text are stored on your Milvus instance (cloud or self-hosted)", + ], + logo: MilvusLogo, + }, lancedb: { name: "LanceDB", description: [ diff --git a/frontend/src/pages/OnboardingFlow/Steps/VectorDatabaseConnection/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/VectorDatabaseConnection/index.jsx index f451fc3e247..37e0e5b7364 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/VectorDatabaseConnection/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/VectorDatabaseConnection/index.jsx @@ -5,6 +5,7 @@ import PineconeLogo from "@/media/vectordbs/pinecone.png"; import LanceDbLogo from "@/media/vectordbs/lancedb.png"; import WeaviateLogo from "@/media/vectordbs/weaviate.png"; import QDrantLogo from "@/media/vectordbs/qdrant.png"; +import MilvusLogo from "@/media/vectordbs/milvus.png"; import System from "@/models/system"; import paths from "@/utils/paths"; import PineconeDBOptions from "@/components/VectorDBSelection/PineconeDBOptions"; @@ -12,6 +13,7 @@ import ChromaDBOptions from "@/components/VectorDBSelection/ChromaDBOptions"; import QDrantDBOptions from "@/components/VectorDBSelection/QDrantDBOptions"; import WeaviateDBOptions from "@/components/VectorDBSelection/WeaviateDBOptions"; import LanceDBOptions from "@/components/VectorDBSelection/LanceDBOptions"; +import MilvusOptions from "@/components/VectorDBSelection/MilvusDBOptions"; import showToast from "@/utils/toast"; import { useNavigate } from "react-router-dom"; import VectorDBItem from "@/components/VectorDBSelection/VectorDBItem"; @@ -81,6 +83,13 @@ export default function VectorDatabaseConnection({ description: "Open source local and cloud hosted multi-modal vector database.", }, + { + name: "Milvus", + value: "milvus", + logo: MilvusLogo, + options: , + description: "Open-source, highly scalable, and blazing fast.", + }, ]; function handleForward() { diff --git a/server/.env.example b/server/.env.example index c92c31903c8..d060e0ab501 100644 --- a/server/.env.example +++ b/server/.env.example @@ -88,7 +88,7 @@ VECTOR_DB="lancedb" # Enable all below if you are using vector database: Milvus. # VECTOR_DB="milvus" -# MILVUS_ENDPOINT="http://localhost:19530" +# MILVUS_ADDRESS="http://localhost:19530" # MILVUS_USERNAME= # MILVUS_PASSWORD= diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index 3c4608f9da6..cd008d420f0 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -59,7 +59,8 @@ const SystemSettings = { ...(vectorDB === "milvus" ? { MilvusAddress: process.env.MILVUS_ADDRESS, - MilvusToken: process.env.MILVUS_TOKEN, + MilvusUsername: process.env.MILVUS_USERNAME, + MilvusPassword: !!process.env.MILVUS_PASSWORD, } : {}), LLMProvider: llmProvider, diff --git a/server/utils/EmbeddingEngines/azureOpenAi/index.js b/server/utils/EmbeddingEngines/azureOpenAi/index.js index e80b4b734b3..8cde1fc7cac 100644 --- a/server/utils/EmbeddingEngines/azureOpenAi/index.js +++ b/server/utils/EmbeddingEngines/azureOpenAi/index.js @@ -13,6 +13,7 @@ class AzureOpenAiEmbedder { new AzureKeyCredential(process.env.AZURE_OPENAI_KEY) ); this.openai = openai; + this.dimensions = 1536; // Limit of how many strings we can process in a single pass to stay with resource or network limits // https://learn.microsoft.com/en-us/azure/ai-services/openai/faq#i-am-trying-to-use-embeddings-and-received-the-error--invalidrequesterror--too-many-inputs--the-max-number-of-inputs-is-1---how-do-i-fix-this-:~:text=consisting%20of%20up%20to%2016%20inputs%20per%20API%20request diff --git a/server/utils/EmbeddingEngines/localAi/index.js b/server/utils/EmbeddingEngines/localAi/index.js index 1480755d76a..8e0dd4ede3a 100644 --- a/server/utils/EmbeddingEngines/localAi/index.js +++ b/server/utils/EmbeddingEngines/localAi/index.js @@ -16,6 +16,10 @@ class LocalAiEmbedder { : {}), }); this.openai = new OpenAIApi(config); + // We don't know this for user's set model so for vectorDB integrations that requires dimensionality + // in schema, we will throw an error. + // Applies to Weaviate and Milvus. + this.dimensions = null; // Limit of how many strings we can process in a single pass to stay with resource or network limits this.maxConcurrentChunks = 50; diff --git a/server/utils/EmbeddingEngines/native/index.js b/server/utils/EmbeddingEngines/native/index.js index 69e13a9e3ab..d2acde32aea 100644 --- a/server/utils/EmbeddingEngines/native/index.js +++ b/server/utils/EmbeddingEngines/native/index.js @@ -12,6 +12,7 @@ class NativeEmbedder { : path.resolve(__dirname, `../../../storage/models`) ); this.modelPath = path.resolve(this.cacheDir, "Xenova", "all-MiniLM-L6-v2"); + this.dimensions = 384; // Limit of how many strings we can process in a single pass to stay with resource or network limits this.maxConcurrentChunks = 50; diff --git a/server/utils/EmbeddingEngines/openAi/index.js b/server/utils/EmbeddingEngines/openAi/index.js index 105be9d73ab..31f556e8997 100644 --- a/server/utils/EmbeddingEngines/openAi/index.js +++ b/server/utils/EmbeddingEngines/openAi/index.js @@ -9,6 +9,7 @@ class OpenAiEmbedder { }); const openai = new OpenAIApi(config); this.openai = openai; + this.dimensions = 1536; // Limit of how many strings we can process in a single pass to stay with resource or network limits this.maxConcurrentChunks = 500; diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index 2d6aa545709..c699cf2df33 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -175,9 +175,13 @@ const KEY_MAPPING = { envKey: "MILVUS_ADDRESS", checks: [isValidURL, validDockerizedUrl], }, - MilvusToken: { - envKey: "MILVUS_TOKEN", - checks: [], + MilvusUsername: { + envKey: "MILVUS_USERNAME", + checks: [isNotEmpty], + }, + MilvusPassword: { + envKey: "MILVUS_PASSWORD", + checks: [isNotEmpty], }, // Together Ai Options diff --git a/server/utils/vectorDbProviders/milvus/MILVUS_SETUP.md b/server/utils/vectorDbProviders/milvus/MILVUS_SETUP.md index b7fb43b433c..6bd9b815029 100644 --- a/server/utils/vectorDbProviders/milvus/MILVUS_SETUP.md +++ b/server/utils/vectorDbProviders/milvus/MILVUS_SETUP.md @@ -34,6 +34,7 @@ eg: `server/.env.development` ``` VECTOR_DB="milvus" -MILVUS_ENDPOINT="http://localhost:19530" -MILVUS_TOKEN="{username}:{password}" +MILVUS_ADDRESS="http://localhost:19530" +MILVUS_USERNAME=minioadmin # Whatever your username and password are +MILVUS_PASSWORD=minioadmin ``` diff --git a/server/utils/vectorDbProviders/milvus/index.js b/server/utils/vectorDbProviders/milvus/index.js index 0fc6d4210f5..198e0936ac8 100644 --- a/server/utils/vectorDbProviders/milvus/index.js +++ b/server/utils/vectorDbProviders/milvus/index.js @@ -2,7 +2,7 @@ const { DataType, MetricType, IndexType, - OrmClient, + MilvusClient, } = require("@zilliz/milvus2-sdk-node"); const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter"); const { v4: uuidv4 } = require("uuid"); @@ -13,20 +13,17 @@ const { getEmbeddingEngineSelection, } = require("../../helpers"); -let client = null; - const Milvus = { name: "Milvus", connect: async function () { if (process.env.VECTOR_DB !== "milvus") throw new Error("Milvus::Invalid ENV settings"); - if (!client) { - client = new OrmClient({ - address: process.env.MILVUS_ENDPOINT, - token: process.env.MILVUS_TOKEN, - }); - } + const client = new MilvusClient({ + address: process.env.MILVUS_ADDRESS, + username: process.env.MILVUS_USERNAME, + password: process.env.MILVUS_PASSWORD, + }); const { isHealthy } = await client.checkHealth(); if (!isHealthy) @@ -42,24 +39,28 @@ const Milvus = { }, totalVectors: async function () { const { client } = await this.connect(); - const collections = await client.collections(); - const total = collections.reduce(async (acc, collection) => { - const count = await collection.count(); - return acc + count; + const { collection_names } = await client.listCollections(); + const total = collection_names.reduce(async (acc, collection_name) => { + const statistics = await client.getCollectionStatistics({ + collection_name, + }); + return Number(acc) + Number(statistics?.data?.row_count ?? 0); }, 0); return total; }, namespaceCount: async function (_namespace = null) { const { client } = await this.connect(); - const namespace = await this.namespace(client, _namespace); - return namespace?.count() ?? 0; + const statistics = await client.getCollectionStatistics({ + collection_name: _namespace, + }); + return Number(statistics?.data?.row_count ?? 0); }, namespace: async function (client, namespace = null) { if (!namespace) throw new Error("No namespace value provided."); - const collections = await client - .collections({ collection_names: [namespace] }) - .catch(() => []); - return collections?.[0]; + const collection = await client + .getCollectionStatistics({ collection_name: namespace }) + .catch(() => null); + return collection; }, hasNamespace: async function (namespace = null) { if (!namespace) return false; @@ -72,9 +73,9 @@ const Milvus = { .hasCollection({ collection_name: namespace }) .catch((e) => { console.error("MilvusDB::namespaceExists", e.message); - return { value: null }; + return { value: false }; }); - return !!value; + return value; }, deleteVectorsInNamespace: async function (client, namespace = null) { await client.dropCollection({ collection_name: namespace }); @@ -83,6 +84,12 @@ const Milvus = { getOrCreateCollection: async function (client, namespace) { const isExists = await this.namespaceExists(client, namespace); if (!isExists) { + const embedder = getEmbeddingEngineSelection(); + if (!embedder.dimensions) + throw new Error( + `Your embedder selection has unknown dimensions output. It should be defined when using ${this.name}. Open an issue on Github for support.` + ); + await client.createCollection({ collection_name: namespace, fields: [ @@ -97,7 +104,7 @@ const Milvus = { name: "vector", description: "vector", data_type: DataType.FloatVector, - dim: 1536, + dim: embedder.dimensions, }, { name: "metadata", @@ -144,16 +151,19 @@ const Milvus = { documentVectors.push({ docId, vectorId: id }); return { id, vector: chunk.values, metadata: chunk.metadata }; }); - await client - .insert({ - collection_name: namespace, - data: newChunks, - }) - .catch((e) => { - console.error("MilvusDB::addDocumentToNamespace", e.message); - }); + const insertResult = await client.insert({ + collection_name: namespace, + data: newChunks, + }); + + if (insertResult?.status.error_code !== "Success") { + throw new Error( + `Error embedding into Milvus! Reason:${insertResult?.status.reason}` + ); + } } await DocumentVectors.bulkInsert(documentVectors); + await client.flushSync({ collection_names: [namespace] }); return true; } @@ -196,10 +206,12 @@ const Milvus = { if (vectors.length > 0) { const chunks = []; const { client } = await this.connect(); + await this.getOrCreateCollection(client, namespace); + console.log("Inserting vectorized chunks into Milvus."); for (const chunk of toChunks(vectors, 100)) { chunks.push(chunk); - await client.upsert({ + const insertResult = await client.insert({ collection_name: namespace, data: chunk.map((item) => ({ id: item.id, @@ -207,8 +219,15 @@ const Milvus = { metadata: chunk.metadata, })), }); + + if (insertResult?.status.error_code !== "Success") { + throw new Error( + `Error embedding into Milvus! Reason:${insertResult?.status.reason}` + ); + } } await storeVectorResult(chunks, fullFilePath); + await client.flushSync({ collection_names: [namespace] }); } await DocumentVectors.bulkInsert(documentVectors); @@ -227,10 +246,19 @@ const Milvus = { if (knownDocuments.length === 0) return; const vectorIds = knownDocuments.map((doc) => doc.vectorId); - await client.delete({ collection_name: namespace, ids: vectorIds }); + const queryIn = vectorIds.map((v) => `'${v}'`).join(","); + await client.deleteEntities({ + collection_name: namespace, + expr: `id in [${queryIn}]`, + }); const indexes = knownDocuments.map((doc) => doc.id); await DocumentVectors.deleteIds(indexes); + + // Even after flushing Milvus can take some time to re-calc the count + // so all we can hope to do is flushSync so that the count can be correct + // on a later call. + await client.flushSync({ collection_names: [namespace] }); return true; }, performSimilaritySearch: async function ({ @@ -308,9 +336,9 @@ const Milvus = { if (!(await this.namespaceExists(client, namespace))) throw new Error("Namespace by that name does not exist."); - const details = await this.namespace(client, namespace); + const statistics = await this.namespace(client, namespace); await this.deleteVectorsInNamespace(client, namespace); - const vectorCount = await details?.count(); + const vectorCount = Number(statistics?.data?.row_count ?? 0); return { message: `Namespace ${namespace} was deleted along with ${vectorCount} vectors.`, }; diff --git a/server/utils/vectorDbProviders/qdrant/index.js b/server/utils/vectorDbProviders/qdrant/index.js index 49b25a3d65b..ddc3408da8b 100644 --- a/server/utils/vectorDbProviders/qdrant/index.js +++ b/server/utils/vectorDbProviders/qdrant/index.js @@ -112,9 +112,15 @@ const QDrant = { if (await this.namespaceExists(client, namespace)) { return await client.getCollection(namespace); } + + const embedder = getEmbeddingEngineSelection(); + if (!embedder.dimensions) + throw new Error( + `Your embedder selection has unknown dimensions output. It should be defined when using ${this.name}. Open an issue on Github for support.` + ); await client.createCollection(namespace, { vectors: { - size: 1536, //TODO: Fixed to OpenAI models - when other embeddings exist make variable. + size: embedder.dimensions, distance: "Cosine", }, }); From b369abb9f6e21cfc32c48c39f9d901cbf17ac286 Mon Sep 17 00:00:00 2001 From: timothycarambat Date: Fri, 12 Jan 2024 13:21:25 -0800 Subject: [PATCH 3/3] update comments --- server/utils/EmbeddingEngines/localAi/index.js | 2 +- server/utils/vectorDbProviders/milvus/index.js | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/server/utils/EmbeddingEngines/localAi/index.js b/server/utils/EmbeddingEngines/localAi/index.js index 8e0dd4ede3a..6f9d721b9bc 100644 --- a/server/utils/EmbeddingEngines/localAi/index.js +++ b/server/utils/EmbeddingEngines/localAi/index.js @@ -18,7 +18,7 @@ class LocalAiEmbedder { this.openai = new OpenAIApi(config); // We don't know this for user's set model so for vectorDB integrations that requires dimensionality // in schema, we will throw an error. - // Applies to Weaviate and Milvus. + // Applies to QDrant and Milvus. this.dimensions = null; // Limit of how many strings we can process in a single pass to stay with resource or network limits diff --git a/server/utils/vectorDbProviders/milvus/index.js b/server/utils/vectorDbProviders/milvus/index.js index 198e0936ac8..a9104784bb8 100644 --- a/server/utils/vectorDbProviders/milvus/index.js +++ b/server/utils/vectorDbProviders/milvus/index.js @@ -167,10 +167,6 @@ const Milvus = { return true; } - // If we are here then we are going to embed and store a novel document. - // We have to do this manually as opposed to using LangChains `Chroma.fromDocuments` - // because we then cannot atomically control our namespace to granularly find/remove documents - // from vectordb. const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: getEmbeddingEngineSelection()?.embeddingMaxChunkLength || 1_000,