Mintplex-Labs · timothycarambat · Apr 29, 2025 · Apr 20, 2025 · Apr 21, 2025 · Apr 21, 2025
diff --git a/frontend/src/components/Modals/ManageWorkspace/Documents/WorkspaceDirectory/index.jsx b/frontend/src/components/Modals/ManageWorkspace/Documents/WorkspaceDirectory/index.jsx
@@ -235,7 +235,7 @@ function WorkspaceDirectory({
                     }`}
               </p>
               <p className="mt-2 text-xs italic" hidden={embeddingCosts === 0}>
-                {t("new-workspace.costs")}
+                {t("connectors.directory.costs")}
               </p>
             </div>
 

diff --git a/frontend/src/index.css b/frontend/src/index.css
@@ -117,7 +117,7 @@
   --theme-chat-input-border: #cccccc;
   --theme-action-menu-bg: #eaeaea;
   --theme-action-menu-item-hover: rgba(0, 0, 0, 0.1);
-  --theme-settings-input-bg: #EDF2FA;
+  --theme-settings-input-bg: #edf2fa;
   --theme-settings-input-placeholder: rgba(0, 0, 0, 0.5);
   --theme-settings-input-active: rgb(0 0 0 / 0.2);
   --theme-settings-input-text: #0e0f0f;

diff --git a/server/package.json b/server/package.json
@@ -21,7 +21,6 @@
   "dependencies": {
     "@anthropic-ai/sdk": "^0.39.0",
     "@aws-sdk/client-bedrock-runtime": "^3.775.0",
-    "@azure/openai": "1.0.0-beta.10",
     "@datastax/astra-db-ts": "^0.1.3",
     "@google/generative-ai": "^0.7.1",
     "@ladjs/graceful": "^3.2.2",
@@ -67,7 +66,7 @@
     "multer": "^1.4.5-lts.1",
     "mysql2": "^3.9.8",
     "ollama": "^0.5.10",
-    "openai": "4.38.5",
+    "openai": "4.95.1",
     "pg": "^8.11.5",
     "pinecone-client": "^1.1.0",
     "pluralize": "^8.0.0",

diff --git a/server/utils/AiProviders/azureOpenAi/index.js b/server/utils/AiProviders/azureOpenAi/index.js
@@ -1,29 +1,26 @@
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const {
-  LLMPerformanceMonitor,
-} = require("../../helpers/chat/LLMPerformanceMonitor");
-const {
-  writeResponseChunk,
-  clientAbortedHandler,
   formatChatHistory,
+  handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
+const {
+  LLMPerformanceMonitor,
+} = require("../../helpers/chat/LLMPerformanceMonitor");
 
 class AzureOpenAiLLM {
   constructor(embedder = null, modelPreference = null) {
-    const { OpenAIClient, AzureKeyCredential } = require("@azure/openai");
+    const { AzureOpenAI } = require("openai");
     if (!process.env.AZURE_OPENAI_ENDPOINT)
       throw new Error("No Azure API endpoint was set.");
     if (!process.env.AZURE_OPENAI_KEY)
       throw new Error("No Azure API key was set.");
 
     this.apiVersion = "2024-12-01-preview";
-    this.openai = new OpenAIClient(
-      process.env.AZURE_OPENAI_ENDPOINT,
-      new AzureKeyCredential(process.env.AZURE_OPENAI_KEY),
-      {
-        apiVersion: this.apiVersion,
-      }
-    );
+    this.openai = new AzureOpenAI({
+      apiKey: process.env.AZURE_OPENAI_KEY,
+      apiVersion: this.apiVersion,
+      endpoint: process.env.AZURE_OPENAI_ENDPOINT,
+    });
     this.model = modelPreference ?? process.env.OPEN_MODEL_PREF;
     this.isOTypeModel =
       process.env.AZURE_OPENAI_MODEL_TYPE === "reasoning" || false;
@@ -139,7 +136,9 @@ class AzureOpenAiLLM {
       );
 
     const result = await LLMPerformanceMonitor.measureAsyncFunction(
-      this.openai.getChatCompletions(this.model, messages, {
+      this.openai.chat.completions.create({
+        messages,
+        model: this.model,
         ...(this.isOTypeModel ? {} : { temperature }),
       })
     );
@@ -153,10 +152,10 @@ class AzureOpenAiLLM {
     return {
       textResponse: result.output.choices[0].message.content,
       metrics: {
-        prompt_tokens: result.output.usage.promptTokens || 0,
-        completion_tokens: result.output.usage.completionTokens || 0,
-        total_tokens: result.output.usage.totalTokens || 0,
-        outputTps: result.output.usage.completionTokens / result.duration,
+        prompt_tokens: result.output.usage.prompt_tokens || 0,
+        completion_tokens: result.output.usage.completion_tokens || 0,
+        total_tokens: result.output.usage.total_tokens || 0,
+        outputTps: result.output.usage.completion_tokens / result.duration,
         duration: result.duration,
       },
     };
@@ -169,74 +168,21 @@ class AzureOpenAiLLM {
       );
 
     const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
-      await this.openai.streamChatCompletions(this.model, messages, {
+      await this.openai.chat.completions.create({
+        messages,
+        model: this.model,
         ...(this.isOTypeModel ? {} : { temperature }),
         n: 1,
+        stream: true,
       }),
       messages
     );
 
     return measuredStreamRequest;
   }
 
-  /**
-   * Handles the stream response from the AzureOpenAI API.
-   * Azure does not return the usage metrics in the stream response, but 1msg = 1token
-   * so we can estimate the completion tokens by counting the number of messages.
-   * @param {Object} response - the response object
-   * @param {import('../../helpers/chat/LLMPerformanceMonitor').MonitoredStream} stream - the stream response from the AzureOpenAI API w/tracking
-   * @param {Object} responseProps - the response properties
-   * @returns {Promise<string>}
-   */
   handleStream(response, stream, responseProps) {
-    const { uuid = uuidv4(), sources = [] } = responseProps;
-
-    return new Promise(async (resolve) => {
-      let fullText = "";
-      let usage = {
-        completion_tokens: 0,
-      };
-
-      // Establish listener to early-abort a streaming response
-      // in case things go sideways or the user does not like the response.
-      // We preserve the generated text but continue as if chat was completed
-      // to preserve previously generated content.
-      const handleAbort = () => {
-        stream?.endMeasurement(usage);
-        clientAbortedHandler(resolve, fullText);
-      };
-      response.on("close", handleAbort);
-
-      for await (const event of stream) {
-        for (const choice of event.choices) {
-          const delta = choice.delta?.content;
-          if (!delta) continue;
-          fullText += delta;
-          usage.completion_tokens++;
-
-          writeResponseChunk(response, {
-            uuid,
-            sources: [],
-            type: "textResponseChunk",
-            textResponse: delta,
-            close: false,
-            error: false,
-          });
-        }
-      }
-
-      writeResponseChunk(response, {
-        uuid,
-        sources,
-        type: "textResponseChunk",
-        textResponse: "",
-        close: true,
-        error: false,
-      });
-      response.removeListener("close", handleAbort);
-      stream?.endMeasurement(usage);
-      resolve(fullText);
-    });
+    return handleDefaultStreamResponseV2(response, stream, responseProps);
   }
 
   // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations

diff --git a/server/utils/AiProviders/openAi/index.js b/server/utils/AiProviders/openAi/index.js
@@ -183,6 +183,7 @@ class OpenAiLLM {
       messages
       // runPromptTokenCalculation: true - We manually count the tokens because OpenAI does not provide them in the stream
       // since we are not using the OpenAI API version that supports this `stream_options` param.
+      // TODO: implement this once we upgrade to the OpenAI API version that supports this param.
     );
 
     return measuredStreamRequest;

diff --git a/server/utils/EmbeddingEngines/azureOpenAi/index.js b/server/utils/EmbeddingEngines/azureOpenAi/index.js
@@ -2,16 +2,22 @@ const { toChunks } = require("../../helpers");
 
 class AzureOpenAiEmbedder {
   constructor() {
-    const { OpenAIClient, AzureKeyCredential } = require("@azure/openai");
+    const { AzureOpenAI } = require("openai");
     if (!process.env.AZURE_OPENAI_ENDPOINT)
       throw new Error("No Azure API endpoint was set.");
     if (!process.env.AZURE_OPENAI_KEY)
       throw new Error("No Azure API key was set.");
 
-    const openai = new OpenAIClient(
-      process.env.AZURE_OPENAI_ENDPOINT,
-      new AzureKeyCredential(process.env.AZURE_OPENAI_KEY)
-    );
+    this.apiVersion = "2024-12-01-preview";
+    const openai = new AzureOpenAI({
+      apiKey: process.env.AZURE_OPENAI_KEY,
+      endpoint: process.env.AZURE_OPENAI_ENDPOINT,
+      apiVersion: this.apiVersion,
+    });
+
+    // We cannot assume the model fallback since the model is based on the deployment name
+    // and not the model name - so this will throw on embedding if the model is not defined.
+    this.model = process.env.EMBEDDING_MODEL_PREF;
     this.openai = openai;
 
     // Limit of how many strings we can process in a single pass to stay with resource or network limits
@@ -22,6 +28,10 @@ class AzureOpenAiEmbedder {
     this.embeddingMaxChunkLength = 2048;
   }
 
+  log(text, ...args) {
+    console.log(`\x1b[36m[AzureOpenAiEmbedder]\x1b[0m ${text}`, ...args);
+  }
+
   async embedTextInput(textInput) {
     const result = await this.embedChunks(
       Array.isArray(textInput) ? textInput : [textInput]
@@ -30,22 +40,21 @@ class AzureOpenAiEmbedder {
   }
 
   async embedChunks(textChunks = []) {
-    const textEmbeddingModel =
-      process.env.EMBEDDING_MODEL_PREF || "text-embedding-ada-002";
-    if (!textEmbeddingModel)
-      throw new Error(
-        "No EMBEDDING_MODEL_PREF ENV defined. This must the name of a deployment on your Azure account for an embedding model."
-      );
+    if (!this.model) throw new Error("No Embedding Model preference defined.");
 
+    this.log(`Embedding ${textChunks.length} chunks...`);
     // Because there is a limit on how many chunks can be sent at once to Azure OpenAI
     // we concurrently execute each max batch of text chunks possible.
     // Refer to constructor maxConcurrentChunks for more info.
     const embeddingRequests = [];
     for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
       embeddingRequests.push(
         new Promise((resolve) => {
-          this.openai
-            .getEmbeddings(textEmbeddingModel, chunk)
+          this.openai.embeddings
+            .create({
+              model: this.model,
+              input: chunk,
+            })
             .then((res) => {
               resolve({ data: res.data, error: null });
             })

diff --git a/server/utils/EmbeddingEngines/openAi/index.js b/server/utils/EmbeddingEngines/openAi/index.js
@@ -16,6 +16,10 @@ class OpenAiEmbedder {
     this.embeddingMaxChunkLength = 8_191;
   }
 
+  log(text, ...args) {
+    console.log(`\x1b[36m[OpenAiEmbedder]\x1b[0m ${text}`, ...args);
+  }
+
   async embedTextInput(textInput) {
     const result = await this.embedChunks(
       Array.isArray(textInput) ? textInput : [textInput]
@@ -24,6 +28,8 @@ class OpenAiEmbedder {
   }
 
   async embedChunks(textChunks = []) {
+    this.log(`Embedding ${textChunks.length} chunks...`);
+
     // Because there is a hard POST limit on how many chunks can be sent at once to OpenAI (~8mb)
     // we concurrently execute each max batch of text chunks possible.
     // Refer to constructor maxConcurrentChunks for more info.
-Original file line number
+Diff line change
@@ Expand Up / @@ -235,7 +235,7 @@ function WorkspaceDirectory({ @@
                         }`}
                   </p>
                   <p className="mt-2 text-xs italic" hidden={embeddingCosts === 0}>
-                    {t("new-workspace.costs")}
+                    {t("connectors.directory.costs")}
                   </p>
                 </div>
@@ Expand Down @@