θΏ™ζ˜―indexlocζδΎ›ηš„ζœεŠ‘οΌŒδΈθ¦θΎ“ε…₯任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
238 changes: 238 additions & 0 deletions server/__tests__/utils/helpers/convertTo.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
/* eslint-env jest */
const { prepareChatsForExport } = require("../../../utils/helpers/chat/convertTo");

// Mock the database models
jest.mock("../../../models/workspaceChats");
jest.mock("../../../models/embedChats");

const { WorkspaceChats } = require("../../../models/workspaceChats");
const { EmbedChats } = require("../../../models/embedChats");

const mockChat = (withImages = false) => {
return {
id: 1,
prompt: "Test prompt",
response: JSON.stringify({
text: "Test response",
attachments: withImages ? [
{ mime: "image/png", name: "image.png", contentString: "data:image/png;base64,iVBORw0KGg....=" },
{ mime: "image/jpeg", name: "image2.jpeg", contentString: "data:image/jpeg;base64,iVBORw0KGg....=" }
] : [],
sources: [],
metrics: {},
}),
createdAt: new Date(),
workspace: { name: "Test Workspace", openAiPrompt: "Test OpenAI Prompt" },
user: { username: "testuser" },
feedbackScore: 1,
}
};

describe("prepareChatsForExport", () => {
beforeEach(() => {
jest.clearAllMocks();
WorkspaceChats.whereWithData = jest.fn().mockResolvedValue([]);
EmbedChats.whereWithEmbedAndWorkspace = jest.fn().mockResolvedValue([]);
});

test("should throw error for invalid chat type", async () => {
await expect(prepareChatsForExport("json", "invalid"))
.rejects
.toThrow("Invalid chat type: invalid");
});

test("should throw error for invalid export type", async () => {
await expect(prepareChatsForExport("invalid", "workspace"))
.rejects
.toThrow("Invalid export type: invalid");
});

// CSV and JSON are the same format, so we can test them together
test("should return prepared data in csv and json format for workspace chat type", async () => {
const chatExample = mockChat();
WorkspaceChats.whereWithData.mockResolvedValue([chatExample]);
const result = await prepareChatsForExport("json", "workspace");

const responseJson = JSON.parse(chatExample.response);
expect(result).toBeDefined();
expect(result).toEqual([{
id: chatExample.id,
prompt: chatExample.prompt,
response: responseJson.text,
sent_at: chatExample.createdAt,
rating: chatExample.feedbackScore ? "GOOD" : "BAD",
username: chatExample.user.username,
workspace: chatExample.workspace.name,
attachments: [],
}]);
});

test("Should handle attachments for workspace chat type when json format is selected", async () => {
const chatExample = mockChat(true);
WorkspaceChats.whereWithData.mockResolvedValue([chatExample]);
const result = await prepareChatsForExport("json", "workspace");

const responseJson = JSON.parse(chatExample.response);
expect(result).toBeDefined();
expect(result).toEqual([{
id: chatExample.id,
prompt: chatExample.prompt,
response: responseJson.text,
sent_at: chatExample.createdAt,
rating: chatExample.feedbackScore ? "GOOD" : "BAD",
username: chatExample.user.username,
workspace: chatExample.workspace.name,
attachments: [
{
type: "image",
image: responseJson.attachments[0].contentString,
},
{
type: "image",
image: responseJson.attachments[1].contentString,
},
]
}]);
});

test("Should ignore attachments for workspace chat type when csv format is selected", async () => {
const chatExample = mockChat(true);
WorkspaceChats.whereWithData.mockResolvedValue([chatExample]);
const result = await prepareChatsForExport("csv", "workspace");

const responseJson = JSON.parse(chatExample.response);
expect(result).toBeDefined();
expect(result.attachments).not.toBeDefined();
expect(result).toEqual([{
id: chatExample.id,
prompt: chatExample.prompt,
response: responseJson.text,
sent_at: chatExample.createdAt,
rating: chatExample.feedbackScore ? "GOOD" : "BAD",
username: chatExample.user.username,
workspace: chatExample.workspace.name,
}]);
});

test("should return prepared data in jsonAlpaca format for workspace chat type", async () => {
const chatExample = mockChat();
const imageChatExample = mockChat(true);
WorkspaceChats.whereWithData.mockResolvedValue([chatExample, imageChatExample]);
const result = await prepareChatsForExport("jsonAlpaca", "workspace");

const responseJson1 = JSON.parse(chatExample.response);
const responseJson2 = JSON.parse(imageChatExample.response);
expect(result).toBeDefined();

// Alpaca format does not support attachments - so they are not included
expect(result[0].attachments).not.toBeDefined();
expect(result[1].attachments).not.toBeDefined();
expect(result).toEqual([{
instruction: chatExample.workspace.openAiPrompt,
input: chatExample.prompt,
output: responseJson1.text,
},
{
instruction: chatExample.workspace.openAiPrompt,
input: imageChatExample.prompt,
output: responseJson2.text,
}]);
});

test("should return prepared data in jsonl format for workspace chat type", async () => {
const chatExample = mockChat();
const responseJson = JSON.parse(chatExample.response);
WorkspaceChats.whereWithData.mockResolvedValue([chatExample]);
const result = await prepareChatsForExport("jsonl", "workspace");
expect(result).toBeDefined();
expect(result).toEqual(
{
[chatExample.workspace.id]: {
messages: [
{
role: "system",
content: [{
type: "text",
text: chatExample.workspace.openAiPrompt,
}],
},
{
role: "user",
content: [{
type: "text",
text: chatExample.prompt,
}],
},
{
role: "assistant",
content: [{
type: "text",
text: responseJson.text,
}],
},
],
},
},
);
});

test("should return prepared data in jsonl format for workspace chat type with attachments", async () => {
const chatExample = mockChat();
const imageChatExample = mockChat(true);
const responseJson = JSON.parse(chatExample.response);
const imageResponseJson = JSON.parse(imageChatExample.response);

WorkspaceChats.whereWithData.mockResolvedValue([chatExample, imageChatExample]);
const result = await prepareChatsForExport("jsonl", "workspace");
expect(result).toBeDefined();
expect(result).toEqual(
{
[chatExample.workspace.id]: {
messages: [
{
role: "system",
content: [{
type: "text",
text: chatExample.workspace.openAiPrompt,
}],
},
{
role: "user",
content: [{
type: "text",
text: chatExample.prompt,
}],
},
{
role: "assistant",
content: [{
type: "text",
text: responseJson.text,
}],
},
{
role: "user",
content: [{
type: "text",
text: imageChatExample.prompt,
}, {
type: "image",
image: imageResponseJson.attachments[0].contentString,
}, {
type: "image",
image: imageResponseJson.attachments[1].contentString,
}],
},
{
role: "assistant",
content: [{
type: "text",
text: imageResponseJson.text,
}],
},
],
},
},
);
});
});
64 changes: 56 additions & 8 deletions server/utils/helpers/chat/convertTo.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ async function convertToJSONAlpaca(preparedData) {
return JSON.stringify(preparedData, null, 4);
}

// You can validate JSONL outputs on https://jsonlines.org/validator/
async function convertToJSONL(workspaceChatsMap) {
return Object.values(workspaceChatsMap)
.map((workspaceChats) => JSON.stringify(workspaceChats))
Expand Down Expand Up @@ -64,12 +65,24 @@ async function prepareChatsForExport(format = "jsonl", chatType = "workspace") {

if (format === "csv" || format === "json") {
const preparedData = chats.map((chat) => {
const responseJson = JSON.parse(chat.response);
const responseJson = safeJsonParse(chat.response, {});
const baseData = {
id: chat.id,
prompt: chat.prompt,
response: responseJson.text,
sent_at: chat.createdAt,
// Only add attachments to the json format since we cannot arrange attachments in csv format
...(format === "json"
? {
attachments:
responseJson.attachments?.length > 0
? responseJson.attachments.map((attachment) => ({
type: "image",
image: attachmentToDataUrl(attachment),
}))
: [],
}
: {}),
};

if (chatType === "embed") {
Expand Down Expand Up @@ -101,9 +114,10 @@ async function prepareChatsForExport(format = "jsonl", chatType = "workspace") {
return preparedData;
}

// jsonAlpaca format does not support array outputs
if (format === "jsonAlpaca") {
const preparedData = chats.map((chat) => {
const responseJson = JSON.parse(chat.response);
const responseJson = safeJsonParse(chat.response, {});
return {
instruction: buildSystemPrompt(
chat,
Expand All @@ -117,18 +131,25 @@ async function prepareChatsForExport(format = "jsonl", chatType = "workspace") {
return preparedData;
}

// Export to JSONL format (recommended for fine-tuning)
const workspaceChatsMap = chats.reduce((acc, chat) => {
const { prompt, response, workspaceId } = chat;
const responseJson = JSON.parse(response);
const responseJson = safeJsonParse(response, { attachments: [] });
const attachments = responseJson.attachments;

if (!acc[workspaceId]) {
acc[workspaceId] = {
messages: [
{
role: "system",
content:
chat.workspace?.openAiPrompt ||
"Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed.",
content: [
{
type: "text",
text:
chat.workspace?.openAiPrompt ||
"Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed.",
},
],
},
],
};
Expand All @@ -137,11 +158,27 @@ async function prepareChatsForExport(format = "jsonl", chatType = "workspace") {
acc[workspaceId].messages.push(
{
role: "user",
content: prompt,
content: [
{
type: "text",
text: prompt,
},
...(attachments?.length > 0
? attachments.map((attachment) => ({
type: "image",
image: attachmentToDataUrl(attachment),
}))
: []),
],
},
{
role: "assistant",
content: responseJson.text,
content: [
{
type: "text",
text: responseJson.text,
},
],
}
);

Expand Down Expand Up @@ -203,6 +240,17 @@ function buildSystemPrompt(chat, prompt = null) {
return `${prompt ?? STANDARD_PROMPT}${context}`;
}

/**
* Converts an attachment's content string to a proper data URL format if needed
* @param {Object} attachment - The attachment object containing contentString and mime type
* @returns {string} The properly formatted data URL
*/
function attachmentToDataUrl(attachment) {
return attachment.contentString.startsWith("data:")
? attachment.contentString
: `data:${attachment.mime};base64,${attachment.contentString}`;
}

module.exports = {
prepareChatsForExport,
exportChatsAsType,
Expand Down