θΏ™ζ˜―indexlocζδΎ›ηš„ζœεŠ‘οΌŒδΈθ¦θΎ“ε…₯任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ function WorkspaceDirectory({
}`}
</p>
<p className="mt-2 text-xs italic" hidden={embeddingCosts === 0}>
{t("new-workspace.costs")}
{t("connectors.directory.costs")}
</p>
</div>

Expand Down
2 changes: 1 addition & 1 deletion frontend/src/index.css
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@
--theme-chat-input-border: #cccccc;
--theme-action-menu-bg: #eaeaea;
--theme-action-menu-item-hover: rgba(0, 0, 0, 0.1);
--theme-settings-input-bg: #EDF2FA;
--theme-settings-input-bg: #edf2fa;
--theme-settings-input-placeholder: rgba(0, 0, 0, 0.5);
--theme-settings-input-active: rgb(0 0 0 / 0.2);
--theme-settings-input-text: #0e0f0f;
Expand Down
3 changes: 1 addition & 2 deletions server/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
"dependencies": {
"@anthropic-ai/sdk": "^0.39.0",
"@aws-sdk/client-bedrock-runtime": "^3.775.0",
"@azure/openai": "1.0.0-beta.10",
"@datastax/astra-db-ts": "^0.1.3",
"@google/generative-ai": "^0.7.1",
"@ladjs/graceful": "^3.2.2",
Expand Down Expand Up @@ -67,7 +66,7 @@
"multer": "^1.4.5-lts.1",
"mysql2": "^3.9.8",
"ollama": "^0.5.10",
"openai": "4.38.5",
"openai": "4.95.1",
"pg": "^8.11.5",
"pinecone-client": "^1.1.0",
"pluralize": "^8.0.0",
Expand Down
98 changes: 22 additions & 76 deletions server/utils/AiProviders/azureOpenAi/index.js
Original file line number Diff line number Diff line change
@@ -1,29 +1,26 @@
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const {
LLMPerformanceMonitor,
} = require("../../helpers/chat/LLMPerformanceMonitor");
const {
writeResponseChunk,
clientAbortedHandler,
formatChatHistory,
handleDefaultStreamResponseV2,
} = require("../../helpers/chat/responses");
const {
LLMPerformanceMonitor,
} = require("../../helpers/chat/LLMPerformanceMonitor");

class AzureOpenAiLLM {
constructor(embedder = null, modelPreference = null) {
const { OpenAIClient, AzureKeyCredential } = require("@azure/openai");
const { AzureOpenAI } = require("openai");
if (!process.env.AZURE_OPENAI_ENDPOINT)
throw new Error("No Azure API endpoint was set.");
if (!process.env.AZURE_OPENAI_KEY)
throw new Error("No Azure API key was set.");

this.apiVersion = "2024-12-01-preview";
this.openai = new OpenAIClient(
process.env.AZURE_OPENAI_ENDPOINT,
new AzureKeyCredential(process.env.AZURE_OPENAI_KEY),
{
apiVersion: this.apiVersion,
}
);
this.openai = new AzureOpenAI({
apiKey: process.env.AZURE_OPENAI_KEY,
apiVersion: this.apiVersion,
endpoint: process.env.AZURE_OPENAI_ENDPOINT,
});
this.model = modelPreference ?? process.env.OPEN_MODEL_PREF;
this.isOTypeModel =
process.env.AZURE_OPENAI_MODEL_TYPE === "reasoning" || false;
Expand Down Expand Up @@ -139,7 +136,9 @@ class AzureOpenAiLLM {
);

const result = await LLMPerformanceMonitor.measureAsyncFunction(
this.openai.getChatCompletions(this.model, messages, {
this.openai.chat.completions.create({
messages,
model: this.model,
...(this.isOTypeModel ? {} : { temperature }),
})
);
Expand All @@ -153,10 +152,10 @@ class AzureOpenAiLLM {
return {
textResponse: result.output.choices[0].message.content,
metrics: {
prompt_tokens: result.output.usage.promptTokens || 0,
completion_tokens: result.output.usage.completionTokens || 0,
total_tokens: result.output.usage.totalTokens || 0,
outputTps: result.output.usage.completionTokens / result.duration,
prompt_tokens: result.output.usage.prompt_tokens || 0,
completion_tokens: result.output.usage.completion_tokens || 0,
total_tokens: result.output.usage.total_tokens || 0,
outputTps: result.output.usage.completion_tokens / result.duration,
duration: result.duration,
},
};
Expand All @@ -169,74 +168,21 @@ class AzureOpenAiLLM {
);

const measuredStreamRequest = await LLMPerformanceMonitor.measureStream(
await this.openai.streamChatCompletions(this.model, messages, {
await this.openai.chat.completions.create({
messages,
model: this.model,
...(this.isOTypeModel ? {} : { temperature }),
n: 1,
stream: true,
}),
messages
);

return measuredStreamRequest;
}

/**
* Handles the stream response from the AzureOpenAI API.
* Azure does not return the usage metrics in the stream response, but 1msg = 1token
* so we can estimate the completion tokens by counting the number of messages.
* @param {Object} response - the response object
* @param {import('../../helpers/chat/LLMPerformanceMonitor').MonitoredStream} stream - the stream response from the AzureOpenAI API w/tracking
* @param {Object} responseProps - the response properties
* @returns {Promise<string>}
*/
handleStream(response, stream, responseProps) {
const { uuid = uuidv4(), sources = [] } = responseProps;

return new Promise(async (resolve) => {
let fullText = "";
let usage = {
completion_tokens: 0,
};

// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => {
stream?.endMeasurement(usage);
clientAbortedHandler(resolve, fullText);
};
response.on("close", handleAbort);

for await (const event of stream) {
for (const choice of event.choices) {
const delta = choice.delta?.content;
if (!delta) continue;
fullText += delta;
usage.completion_tokens++;

writeResponseChunk(response, {
uuid,
sources: [],
type: "textResponseChunk",
textResponse: delta,
close: false,
error: false,
});
}
}

writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: "",
close: true,
error: false,
});
response.removeListener("close", handleAbort);
stream?.endMeasurement(usage);
resolve(fullText);
});
return handleDefaultStreamResponseV2(response, stream, responseProps);
}

// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
Expand Down
1 change: 1 addition & 0 deletions server/utils/AiProviders/openAi/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ class OpenAiLLM {
messages
// runPromptTokenCalculation: true - We manually count the tokens because OpenAI does not provide them in the stream
// since we are not using the OpenAI API version that supports this `stream_options` param.
// TODO: implement this once we upgrade to the OpenAI API version that supports this param.
);

return measuredStreamRequest;
Expand Down
35 changes: 22 additions & 13 deletions server/utils/EmbeddingEngines/azureOpenAi/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,22 @@ const { toChunks } = require("../../helpers");

class AzureOpenAiEmbedder {
constructor() {
const { OpenAIClient, AzureKeyCredential } = require("@azure/openai");
const { AzureOpenAI } = require("openai");
if (!process.env.AZURE_OPENAI_ENDPOINT)
throw new Error("No Azure API endpoint was set.");
if (!process.env.AZURE_OPENAI_KEY)
throw new Error("No Azure API key was set.");

const openai = new OpenAIClient(
process.env.AZURE_OPENAI_ENDPOINT,
new AzureKeyCredential(process.env.AZURE_OPENAI_KEY)
);
this.apiVersion = "2024-12-01-preview";
const openai = new AzureOpenAI({
apiKey: process.env.AZURE_OPENAI_KEY,
endpoint: process.env.AZURE_OPENAI_ENDPOINT,
apiVersion: this.apiVersion,
});

// We cannot assume the model fallback since the model is based on the deployment name
// and not the model name - so this will throw on embedding if the model is not defined.
this.model = process.env.EMBEDDING_MODEL_PREF;
this.openai = openai;

// Limit of how many strings we can process in a single pass to stay with resource or network limits
Expand All @@ -22,6 +28,10 @@ class AzureOpenAiEmbedder {
this.embeddingMaxChunkLength = 2048;
}

log(text, ...args) {
console.log(`\x1b[36m[AzureOpenAiEmbedder]\x1b[0m ${text}`, ...args);
}

async embedTextInput(textInput) {
const result = await this.embedChunks(
Array.isArray(textInput) ? textInput : [textInput]
Expand All @@ -30,22 +40,21 @@ class AzureOpenAiEmbedder {
}

async embedChunks(textChunks = []) {
const textEmbeddingModel =
process.env.EMBEDDING_MODEL_PREF || "text-embedding-ada-002";
if (!textEmbeddingModel)
throw new Error(
"No EMBEDDING_MODEL_PREF ENV defined. This must the name of a deployment on your Azure account for an embedding model."
);
if (!this.model) throw new Error("No Embedding Model preference defined.");

this.log(`Embedding ${textChunks.length} chunks...`);
// Because there is a limit on how many chunks can be sent at once to Azure OpenAI
// we concurrently execute each max batch of text chunks possible.
// Refer to constructor maxConcurrentChunks for more info.
const embeddingRequests = [];
for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
embeddingRequests.push(
new Promise((resolve) => {
this.openai
.getEmbeddings(textEmbeddingModel, chunk)
this.openai.embeddings
.create({
model: this.model,
input: chunk,
})
.then((res) => {
resolve({ data: res.data, error: null });
})
Expand Down
6 changes: 6 additions & 0 deletions server/utils/EmbeddingEngines/openAi/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ class OpenAiEmbedder {
this.embeddingMaxChunkLength = 8_191;
}

log(text, ...args) {
console.log(`\x1b[36m[OpenAiEmbedder]\x1b[0m ${text}`, ...args);
}

async embedTextInput(textInput) {
const result = await this.embedChunks(
Array.isArray(textInput) ? textInput : [textInput]
Expand All @@ -24,6 +28,8 @@ class OpenAiEmbedder {
}

async embedChunks(textChunks = []) {
this.log(`Embedding ${textChunks.length} chunks...`);

// Because there is a hard POST limit on how many chunks can be sent at once to OpenAI (~8mb)
// we concurrently execute each max batch of text chunks possible.
// Refer to constructor maxConcurrentChunks for more info.
Expand Down
Loading