θΏ™ζ˜―indexlocζδΎ›ηš„ζœεŠ‘οΌŒδΈθ¦θΎ“ε…₯任何密码
Skip to content

LLM performance metric tracking #2825

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 44 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
5bd5621
WIP performance metric tracking
timothycarambat Dec 13, 2024
5a4699a
fix: patch UI trying to .toFixed() null metric
timothycarambat Dec 13, 2024
caadbed
Apipie implmentation, not tested
timothycarambat Dec 13, 2024
ae60d7c
Cleanup Anthropic notes, Add support for AzureOpenAI tracking
timothycarambat Dec 13, 2024
c6223f7
bedrock token metric tracking
timothycarambat Dec 13, 2024
43316db
Cohere support
timothycarambat Dec 13, 2024
b8d31d6
feat: improve default stream handler to track for provider who are ac…
timothycarambat Dec 13, 2024
ad51ee7
feat: Add FireworksAI tracking reporting
timothycarambat Dec 13, 2024
1a4f9d1
Add token reporting for GenericOpenAI
timothycarambat Dec 13, 2024
adb0f79
token reporting for koboldcpp + lmstudio
shatfield4 Dec 13, 2024
0da94ec
lint
shatfield4 Dec 13, 2024
4ac4e1d
support Groq token tracking
timothycarambat Dec 13, 2024
06e8d86
Merge branch 'performance-metrics' of github.com:Mintplex-Labs/anythi…
timothycarambat Dec 13, 2024
22ec82a
HF token tracking
timothycarambat Dec 13, 2024
b5c2f7e
token tracking for togetherai
shatfield4 Dec 13, 2024
c780726
Merge branch 'performance-metrics' of github.com:Mintplex-Labs/anythi…
shatfield4 Dec 13, 2024
c422db1
LiteLLM token tracking
timothycarambat Dec 13, 2024
75b402c
Merge branch 'performance-metrics' of github.com:Mintplex-Labs/anythi…
timothycarambat Dec 13, 2024
f05f9ab
linting + Mitral token tracking support
timothycarambat Dec 13, 2024
d7f5fec
XAI token metric reporting
timothycarambat Dec 13, 2024
de8a8c6
native provider runner
timothycarambat Dec 13, 2024
8470d55
LocalAI token tracking
timothycarambat Dec 13, 2024
df1ea1e
Novita token tracking
timothycarambat Dec 13, 2024
ed950f9
OpenRouter token tracking
timothycarambat Dec 13, 2024
8dd797c
Merge branch 'master' into performance-metrics
timothycarambat Dec 13, 2024
d20a1a5
Apipie stream metrics
timothycarambat Dec 13, 2024
02fb104
textwebgenui token tracking
shatfield4 Dec 13, 2024
8268f41
Merge branch 'performance-metrics' of github.com:Mintplex-Labs/anythi…
shatfield4 Dec 13, 2024
33a0f5d
perplexity token reporting
shatfield4 Dec 13, 2024
227cdeb
ollama token reporting
shatfield4 Dec 13, 2024
f889bb0
lint
shatfield4 Dec 13, 2024
323cfd3
put back comment
shatfield4 Dec 13, 2024
02a8bb5
Rip out LC ollama wrapper and use official library
timothycarambat Dec 14, 2024
50bd624
patch images with new ollama lib
timothycarambat Dec 14, 2024
0066641
improve ollama offline message
timothycarambat Dec 14, 2024
3507012
fix image handling in ollama llm provider
shatfield4 Dec 14, 2024
04ed6fc
Merge branch 'performance-metrics' of github.com:Mintplex-Labs/anythi…
shatfield4 Dec 14, 2024
63e21b4
lint
timothycarambat Dec 14, 2024
b43e04f
NVIDIA NIM token tracking
timothycarambat Dec 14, 2024
37567b9
update openai compatbility responses
timothycarambat Dec 14, 2024
1494501
Merge branch 'master' into performance-metrics
timothycarambat Dec 16, 2024
f8da123
Merge branch 'master' into performance-metrics
timothycarambat Dec 16, 2024
1b2d1f9
UI/UX show/hide metrics on click for user preference
timothycarambat Dec 16, 2024
c31dec1
update bedrock client
timothycarambat Dec 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import { numberWithCommas } from "@/utils/numbers";
import React, { useEffect, useState, useContext } from "react";
const MetricsContext = React.createContext();
const SHOW_METRICS_KEY = "anythingllm_show_chat_metrics";
const SHOW_METRICS_EVENT = "anythingllm_show_metrics_change";

/**
* @param {number} duration - duration in milliseconds
* @returns {string}
*/
function formatDuration(duration) {
try {
return duration < 1
? `${(duration * 1000).toFixed(0)}ms`
: `${duration.toFixed(3)}s`;
} catch {
return "";
}
}

/**
* Format the output TPS to a string
* @param {number} outputTps - output TPS
* @returns {string}
*/
function formatTps(outputTps) {
try {
return outputTps < 1000
? outputTps.toFixed(2)
: numberWithCommas(outputTps.toFixed(0));
} catch {
return "";
}
}

/**
* Get the show metrics setting from localStorage `anythingllm_show_chat_metrics` key
* @returns {boolean}
*/
function getAutoShowMetrics() {
return window?.localStorage?.getItem(SHOW_METRICS_KEY) === "true";
}

/**
* Toggle the show metrics setting in localStorage `anythingllm_show_chat_metrics` key
* @returns {void}
*/
function toggleAutoShowMetrics() {
const currentValue = getAutoShowMetrics() || false;
window?.localStorage?.setItem(SHOW_METRICS_KEY, !currentValue);
window.dispatchEvent(
new CustomEvent(SHOW_METRICS_EVENT, {
detail: { showMetricsAutomatically: !currentValue },
})
);
return !currentValue;
}

/**
* Provider for the metrics context that controls the visibility of the metrics
* per-chat based on the user's preference.
* @param {React.ReactNode} children
* @returns {React.ReactNode}
*/
export function MetricsProvider({ children }) {
const [showMetricsAutomatically, setShowMetricsAutomatically] =
useState(getAutoShowMetrics());

useEffect(() => {
function handleShowingMetricsEvent(e) {
if (!e?.detail?.hasOwnProperty("showMetricsAutomatically")) return;
setShowMetricsAutomatically(e.detail.showMetricsAutomatically);
}
console.log("Adding event listener for metrics visibility");
window.addEventListener(SHOW_METRICS_EVENT, handleShowingMetricsEvent);
return () =>
window.removeEventListener(SHOW_METRICS_EVENT, handleShowingMetricsEvent);
}, []);

return (
<MetricsContext.Provider
value={{ showMetricsAutomatically, setShowMetricsAutomatically }}
>
{children}
</MetricsContext.Provider>
);
}

/**
* Render the metrics for a given chat, if available
* @param {metrics: {duration:number, outputTps: number}} props
* @returns
*/
export default function RenderMetrics({ metrics = {} }) {
// Inherit the showMetricsAutomatically state from the MetricsProvider so the state is shared across all chats
const { showMetricsAutomatically, setShowMetricsAutomatically } =
useContext(MetricsContext);
if (!metrics?.duration || !metrics?.outputTps) return null;

return (
<button
type="button"
onClick={() => setShowMetricsAutomatically(toggleAutoShowMetrics())}
data-tooltip-id="metrics-visibility"
data-tooltip-content={
showMetricsAutomatically
? "Click to only show metrics when hovering"
: "Click to show metrics as soon as they are available"
}
className={`border-none flex justify-end items-center gap-x-[8px] ${showMetricsAutomatically ? "opacity-100" : "opacity-0"} md:group-hover:opacity-100 transition-all duration-300`}
>
<p className="cursor-pointer text-xs font-mono text-theme-text-secondary opacity-50">
{formatDuration(metrics.duration)} ({formatTps(metrics.outputTps)}{" "}
tok/s)
</p>
</button>
);
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import useCopyText from "@/hooks/useCopyText";
import { Check, ThumbsUp, ArrowsClockwise, Copy } from "@phosphor-icons/react";
import Workspace from "@/models/workspace";
import { EditMessageAction } from "./EditMessage";
import RenderMetrics from "./RenderMetrics";
import ActionMenu from "./ActionMenu";

const Actions = ({
Expand All @@ -15,6 +16,7 @@ const Actions = ({
forkThread,
isEditing,
role,
metrics = {},
}) => {
const [selectedFeedback, setSelectedFeedback] = useState(feedbackScore);
const handleFeedback = async (newFeedback) => {
Expand Down Expand Up @@ -58,6 +60,7 @@ const Actions = ({
/>
</div>
</div>
<RenderMetrics metrics={metrics} />
</div>
);
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ const HistoricalMessage = ({
regenerateMessage,
saveEditedMessage,
forkThread,
metrics = {},
}) => {
const { isEditing } = useEditMessage({ chatId, role });
const { isDeleted, completeDelete, onEndAnimation } = useWatchDeleteMessage({
Expand Down Expand Up @@ -117,6 +118,7 @@ const HistoricalMessage = ({
isEditing={isEditing}
role={role}
forkThread={forkThread}
metrics={metrics}
/>
</div>
{role === "assistant" && <Citations sources={sources} />}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ export default function ChatHistory({
isLastMessage={isLastBotReply}
saveEditedMessage={saveEditedMessage}
forkThread={forkThread}
metrics={props.metrics}
/>
);
})}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ export function ChatTooltips() {
// as the citation modal is z-indexed above the chat history
className="tooltip !text-xs z-[100]"
/>
<Tooltip
id="metrics-visibility"
place="bottom"
delayShow={300}
className="tooltip !text-xs"
/>
</>
);
}
19 changes: 11 additions & 8 deletions frontend/src/components/WorkspaceChat/ChatContainer/index.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import SpeechRecognition, {
useSpeechRecognition,
} from "react-speech-recognition";
import { ChatTooltips } from "./ChatTooltips";
import { MetricsProvider } from "./ChatHistory/HistoricalMessage/Actions/RenderMetrics";

export default function ChatContainer({ workspace, knownHistory = [] }) {
const { threadSlug = null } = useParams();
Expand Down Expand Up @@ -268,14 +269,16 @@ export default function ChatContainer({ workspace, knownHistory = [] }) {
>
{isMobile && <SidebarMobileHeader />}
<DnDFileUploaderWrapper>
<ChatHistory
history={chatHistory}
workspace={workspace}
sendCommand={sendCommand}
updateHistory={setChatHistory}
regenerateAssistantMessage={regenerateAssistantMessage}
hasAttachments={files.length > 0}
/>
<MetricsProvider>
<ChatHistory
history={chatHistory}
workspace={workspace}
sendCommand={sendCommand}
updateHistory={setChatHistory}
regenerateAssistantMessage={regenerateAssistantMessage}
hasAttachments={files.length > 0}
/>
</MetricsProvider>
<PromptInput
submit={handleSubmit}
onChange={handleMessageChange}
Expand Down
49 changes: 38 additions & 11 deletions frontend/src/utils/chat/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ export default function handleChat(
close,
chatId = null,
action = null,
metrics = {},
} = chatResult;

if (type === "abort" || type === "statusResponse") {
Expand All @@ -35,6 +36,7 @@ export default function handleChat(
error,
animate: false,
pending: false,
metrics,
},
]);
_chatHistory.push({
Expand All @@ -47,6 +49,7 @@ export default function handleChat(
error,
animate: false,
pending: false,
metrics,
});
} else if (type === "textResponse") {
setLoadingResponse(false);
Expand All @@ -62,6 +65,7 @@ export default function handleChat(
animate: !close,
pending: false,
chatId,
metrics,
},
]);
_chatHistory.push({
Expand All @@ -74,21 +78,42 @@ export default function handleChat(
animate: !close,
pending: false,
chatId,
metrics,
});
} else if (type === "textResponseChunk") {
} else if (
type === "textResponseChunk" ||
type === "finalizeResponseStream"
) {
const chatIdx = _chatHistory.findIndex((chat) => chat.uuid === uuid);
if (chatIdx !== -1) {
const existingHistory = { ..._chatHistory[chatIdx] };
const updatedHistory = {
...existingHistory,
content: existingHistory.content + textResponse,
sources,
error,
closed: close,
animate: !close,
pending: false,
chatId,
};
let updatedHistory;

// If the response is finalized, we can set the loading state to false.
// and append the metrics to the history.
if (type === "finalizeResponseStream") {
updatedHistory = {
...existingHistory,
closed: close,
animate: !close,
pending: false,
chatId,
metrics,
};
setLoadingResponse(false);
} else {
updatedHistory = {
...existingHistory,
content: existingHistory.content + textResponse,
sources,
error,
closed: close,
animate: !close,
pending: false,
chatId,
metrics,
};
}
_chatHistory[chatIdx] = updatedHistory;
} else {
_chatHistory.push({
Expand All @@ -101,6 +126,7 @@ export default function handleChat(
animate: !close,
pending: false,
chatId,
metrics,
});
}
setChatHistory([..._chatHistory]);
Expand All @@ -125,6 +151,7 @@ export default function handleChat(
error: null,
animate: false,
pending: false,
metrics,
};
_chatHistory[chatIdx] = updatedHistory;

Expand Down
4 changes: 4 additions & 0 deletions server/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
# PERPLEXITY_API_KEY='my-perplexity-key'
# PERPLEXITY_MODEL_PREF='codellama-34b-instruct'

# LLM_PROVIDER='deepseek'
# DEEPSEEK_API_KEY=YOUR_API_KEY
# DEEPSEEK_MODEL_PREF='deepseek-chat'

# LLM_PROVIDER='openrouter'
# OPENROUTER_API_KEY='my-openrouter-key'
# OPENROUTER_MODEL_PREF='openrouter/auto'
Expand Down
48 changes: 20 additions & 28 deletions server/endpoints/api/admin/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -610,24 +610,20 @@ function apiAdminEndpoints(app) {
const workspaceUsers = await Workspace.workspaceUsers(workspace.id);

if (!workspace) {
response
.status(404)
.json({
success: false,
error: `Workspace ${workspaceSlug} not found`,
users: workspaceUsers,
});
response.status(404).json({
success: false,
error: `Workspace ${workspaceSlug} not found`,
users: workspaceUsers,
});
return;
}

if (userIds.length === 0) {
response
.status(404)
.json({
success: false,
error: `No valid user IDs provided.`,
users: workspaceUsers,
});
response.status(404).json({
success: false,
error: `No valid user IDs provided.`,
users: workspaceUsers,
});
return;
}

Expand All @@ -637,13 +633,11 @@ function apiAdminEndpoints(app) {
workspace.id,
userIds
);
return response
.status(200)
.json({
success,
error,
users: await Workspace.workspaceUsers(workspace.id),
});
return response.status(200).json({
success,
error,
users: await Workspace.workspaceUsers(workspace.id),
});
}

// Add new users to the workspace if they are not already in the workspace
Expand All @@ -653,13 +647,11 @@ function apiAdminEndpoints(app) {
);
if (usersToAdd.length > 0)
await WorkspaceUser.createManyUsers(usersToAdd, workspace.id);
response
.status(200)
.json({
success: true,
error: null,
users: await Workspace.workspaceUsers(workspace.id),
});
response.status(200).json({
success: true,
error: null,
users: await Workspace.workspaceUsers(workspace.id),
});
} catch (e) {
console.error(e);
response.sendStatus(500).end();
Expand Down
Loading