θΏ™ζ˜―indexlocζδΎ›ηš„ζœεŠ‘οΌŒδΈθ¦θΎ“ε…₯任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions frontend/src/App.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ const GeneralTranscriptionPreference = lazy(
const GeneralEmbeddingPreference = lazy(
() => import("@/pages/GeneralSettings/EmbeddingPreference")
);
const EmbeddingTextSplitterPreference = lazy(
() => import("@/pages/GeneralSettings/EmbeddingTextSplitterPreference")
);
const GeneralVectorDatabase = lazy(
() => import("@/pages/GeneralSettings/VectorDatabase")
);
Expand Down Expand Up @@ -86,6 +89,12 @@ export default function App() {
path="/settings/embedding-preference"
element={<AdminRoute Component={GeneralEmbeddingPreference} />}
/>
<Route
path="/settings/text-splitter-preference"
element={
<AdminRoute Component={EmbeddingTextSplitterPreference} />
}
/>
<Route
path="/settings/vector-database"
element={<AdminRoute Component={GeneralVectorDatabase} />}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ export default function LMStudioOptions({ settings, showAlert = false }) {
</p>
</div>
<a
href={paths.settings.embeddingPreference()}
href={paths.settings.embedder.modelPreference()}
className="text-sm md:text-base my-2 underline"
>
Manage embedding &rarr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ export default function LocalAiOptions({ settings, showAlert = false }) {
</p>
</div>
<a
href={paths.settings.embeddingPreference()}
href={paths.settings.embedder.modelPreference()}
className="text-sm md:text-base my-2 underline"
>
Manage embedding &rarr;
Expand Down
18 changes: 16 additions & 2 deletions frontend/src/components/SettingsSidebar/index.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import {
Barcode,
ClosedCaptioning,
EyeSlash,
SplitVertical,
} from "@phosphor-icons/react";
import useUser from "@/hooks/useUser";
import { USER_BACKGROUND_COLOR } from "@/utils/constants";
Expand Down Expand Up @@ -288,12 +289,25 @@ const SidebarOptions = ({ user = null }) => (
allowedRole={["admin"]}
/>
<Option
href={paths.settings.embeddingPreference()}
btnText="Embedding Model"
href={paths.settings.embedder.modelPreference()}
childLinks={[paths.settings.embedder.chunkingPreference()]}
btnText="Embedder Preferences"
icon={<FileCode className="h-5 w-5 flex-shrink-0" />}
user={user}
flex={true}
allowedRole={["admin"]}
subOptions={
<>
<Option
href={paths.settings.embedder.chunkingPreference()}
btnText="Text Splitter & Chunking"
icon={<SplitVertical className="h-5 w-5 flex-shrink-0" />}
user={user}
flex={true}
allowedRole={["admin"]}
/>
</>
}
/>
<Option
href={paths.settings.vectorDatabase()}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
import React, { useEffect, useState } from "react";
import Sidebar from "@/components/SettingsSidebar";
import { isMobile } from "react-device-detect";
import PreLoader from "@/components/Preloader";
import CTAButton from "@/components/lib/CTAButton";
import Admin from "@/models/admin";
import showToast from "@/utils/toast";
import { nFormatter, numberWithCommas } from "@/utils/numbers";

function isNullOrNaN(value) {
if (value === null) return true;
return isNaN(value);
}

export default function EmbeddingTextSplitterPreference() {
const [settings, setSettings] = useState({});
const [loading, setLoading] = useState(true);
const [saving, setSaving] = useState(false);
const [hasChanges, setHasChanges] = useState(false);

const handleSubmit = async (e) => {
e.preventDefault();
const form = new FormData(e.target);

if (
Number(form.get("text_splitter_chunk_overlap")) >=
Number(form.get("text_splitter_chunk_size"))
) {
showToast(
"Chunk overlap cannot be larger or equal to chunk size.",
"error"
);
return;
}

setSaving(true);
await Admin.updateSystemPreferences({
text_splitter_chunk_size: isNullOrNaN(
form.get("text_splitter_chunk_size")
)
? 1000
: Number(form.get("text_splitter_chunk_size")),
text_splitter_chunk_overlap: isNullOrNaN(
form.get("text_splitter_chunk_overlap")
)
? 1000
: Number(form.get("text_splitter_chunk_overlap")),
});
setSaving(false);
setHasChanges(false);
showToast("Text chunking strategy settings saved.", "success");
};

useEffect(() => {
async function fetchSettings() {
const _settings = (await Admin.systemPreferences())?.settings;
setSettings(_settings ?? {});
setLoading(false);
}
fetchSettings();
}, []);

return (
<div className="w-screen h-screen overflow-hidden bg-sidebar flex">
<Sidebar />
{loading ? (
<div
style={{ height: isMobile ? "100%" : "calc(100% - 32px)" }}
className="relative md:ml-[2px] md:mr-[16px] md:my-[16px] md:rounded-[16px] bg-main-gradient w-full h-full overflow-y-scroll"
>
<div className="w-full h-full flex justify-center items-center">
<PreLoader />
</div>
</div>
) : (
<div
style={{ height: isMobile ? "100%" : "calc(100% - 32px)" }}
className="relative md:ml-[2px] md:mr-[16px] md:my-[16px] md:rounded-[16px] bg-main-gradient w-full h-full overflow-y-scroll"
>
<form
onSubmit={handleSubmit}
onChange={() => setHasChanges(true)}
className="flex w-full"
>
<div className="flex flex-col w-full px-1 md:pl-6 md:pr-[50px] md:py-6 py-16">
<div className="w-full flex flex-col gap-y-1 pb-4 border-white border-b-2 border-opacity-10">
<div className="flex gap-x-4 items-center">
<p className="text-lg leading-6 font-bold text-white">
Text splitting & Chunking Preferences
</p>
</div>
<p className="text-xs leading-[18px] font-base text-white text-opacity-60">
Sometimes, you may want to change the default way that new
documents are split and chunked before being inserted into
your vector database. <br />
You should only modify this setting if you understand how text
splitting works and it's side effects.
</p>
<p className="text-xs leading-[18px] font-semibold text-white/80">
Changes here will only apply to{" "}
<i>newly embedded documents</i>, not existing documents.
</p>
</div>
<div className="w-full justify-end flex">
{hasChanges && (
<CTAButton className="mt-3 mr-0 -mb-14 z-10">
{saving ? "Saving..." : "Save changes"}
</CTAButton>
)}
</div>

<div className="flex flex-col gap-y-4 mt-8">
<div className="flex flex-col max-w-[300px]">
<div className="flex flex-col gap-y-2 mb-4">
<label className="text-white text-sm font-semibold block">
Text Chunk Size
</label>
<p className="text-xs text-white/60">
This is the maximum length of characters that can be
present in a single vector.
</p>
</div>
<input
type="number"
name="text_splitter_chunk_size"
min={1}
max={settings?.max_embed_chunk_size || 1000}
onWheel={(e) => e?.currentTarget?.blur()}
className="border-none bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="maximum length of vectorized text"
defaultValue={
isNullOrNaN(settings?.text_splitter_chunk_size)
? 1000
: Number(settings?.text_splitter_chunk_size)
}
required={true}
autoComplete="off"
/>
<p className="text-xs text-white/40">
Embed model maximum length is{" "}
{numberWithCommas(settings?.max_embed_chunk_size || 1000)}.
</p>
</div>
</div>

<div className="flex flex-col gap-y-4 mt-8">
<div className="flex flex-col max-w-[300px]">
<div className="flex flex-col gap-y-2 mb-4">
<label className="text-white text-sm font-semibold block">
Text Chunk Overlap
</label>
<p className="text-xs text-white/60">
This is the maximum overlap of characters that occurs
during chunking between two adjacent text chunks.
</p>
</div>
<input
type="number"
name="text_splitter_chunk_overlap"
min={0}
onWheel={(e) => e?.currentTarget?.blur()}
className="border-none bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="maximum length of vectorized text"
defaultValue={
isNullOrNaN(settings?.text_splitter_chunk_overlap)
? 20
: Number(settings?.text_splitter_chunk_overlap)
}
required={true}
autoComplete="off"
/>
</div>
</div>
</div>
</form>
</div>
)}
</div>
);
}
4 changes: 4 additions & 0 deletions frontend/src/utils/paths.js
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@ export default {
transcriptionPreference: () => {
return "/settings/transcription-preference";
},
embedder: {
modelPreference: () => "/settings/embedding-preference",
chunkingPreference: () => "/settings/text-splitter-preference",
},
embeddingPreference: () => {
return "/settings/embedding-preference";
},
Expand Down
16 changes: 15 additions & 1 deletion server/endpoints/admin.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@ const { User } = require("../models/user");
const { DocumentVectors } = require("../models/vectors");
const { Workspace } = require("../models/workspace");
const { WorkspaceChats } = require("../models/workspaceChats");
const { getVectorDbClass } = require("../utils/helpers");
const {
getVectorDbClass,
getEmbeddingEngineSelection,
} = require("../utils/helpers");
const {
validRoleSelection,
canModifyAdmin,
Expand Down Expand Up @@ -311,6 +314,7 @@ function adminEndpoints(app) {
}
);

// TODO: Allow specification of which props to get instead of returning all of them all the time.
app.get(
"/admin/system-preferences",
[validatedRequest, flexUserRoleValid([ROLES.admin, ROLES.manager])],
Expand All @@ -333,6 +337,16 @@ function adminEndpoints(app) {
support_email:
(await SystemSettings.get({ label: "support_email" }))?.value ||
null,
text_splitter_chunk_size:
(await SystemSettings.get({ label: "text_splitter_chunk_size" }))
?.value ||
getEmbeddingEngineSelection()?.embeddingMaxChunkLength ||
null,
text_splitter_chunk_overlap:
(await SystemSettings.get({ label: "text_splitter_chunk_overlap" }))
?.value || null,
max_embed_chunk_size:
getEmbeddingEngineSelection()?.embeddingMaxChunkLength || 1000,
};
response.status(200).json({ settings });
} catch (e) {
Expand Down
42 changes: 42 additions & 0 deletions server/models/systemSettings.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ process.env.NODE_ENV === "development"
const { isValidUrl } = require("../utils/http");
const prisma = require("../utils/prisma");

function isNullOrNaN(value) {
if (value === null) return true;
return isNaN(value);
}

const SystemSettings = {
protectedFields: ["multi_user_mode"],
supportedFields: [
Expand All @@ -15,6 +20,8 @@ const SystemSettings = {
"telemetry_id",
"footer_data",
"support_email",
"text_splitter_chunk_size",
"text_splitter_chunk_overlap",
],
validations: {
footer_data: (updates) => {
Expand All @@ -28,6 +35,32 @@ const SystemSettings = {
return JSON.stringify([]);
}
},
text_splitter_chunk_size: (update) => {
try {
if (isNullOrNaN(update)) throw new Error("Value is not a number.");
if (Number(update) <= 0) throw new Error("Value must be non-zero.");
return Number(update);
} catch (e) {
console.error(
`Failed to run validation function on text_splitter_chunk_size`,
e.message
);
return 1000;
}
},
text_splitter_chunk_overlap: (update) => {
try {
if (isNullOrNaN(update)) throw new Error("Value is not a number");
if (Number(update) < 0) throw new Error("Value cannot be less than 0.");
return Number(update);
} catch (e) {
console.error(
`Failed to run validation function on text_splitter_chunk_overlap`,
e.message
);
return 20;
}
},
},
currentSettings: async function () {
const llmProvider = process.env.LLM_PROVIDER;
Expand Down Expand Up @@ -84,6 +117,15 @@ const SystemSettings = {
}
},

getValueOrFallback: async function (clause = {}, fallback = null) {
try {
return (await this.get(clause))?.value ?? fallback;
} catch (error) {
console.error(error.message);
return fallback;
}
},

where: async function (clause = {}, limit) {
try {
const settings = await prisma.system_settings.findMany({
Expand Down
4 changes: 3 additions & 1 deletion server/utils/EmbeddingEngines/azureOpenAi/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ class AzureOpenAiEmbedder {
// Limit of how many strings we can process in a single pass to stay with resource or network limits
// https://learn.microsoft.com/en-us/azure/ai-services/openai/faq#i-am-trying-to-use-embeddings-and-received-the-error--invalidrequesterror--too-many-inputs--the-max-number-of-inputs-is-1---how-do-i-fix-this-:~:text=consisting%20of%20up%20to%2016%20inputs%20per%20API%20request
this.maxConcurrentChunks = 16;
this.embeddingMaxChunkLength = 1_000;

// https://learn.microsoft.com/en-us/answers/questions/1188074/text-embedding-ada-002-token-context-length
this.embeddingMaxChunkLength = 2048;
}

async embedTextInput(textInput) {
Expand Down
4 changes: 3 additions & 1 deletion server/utils/EmbeddingEngines/openAi/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ class OpenAiEmbedder {

// Limit of how many strings we can process in a single pass to stay with resource or network limits
this.maxConcurrentChunks = 500;
this.embeddingMaxChunkLength = 1_000;

// https://platform.openai.com/docs/guides/embeddings/embedding-models
this.embeddingMaxChunkLength = 8_191;
}

async embedTextInput(textInput) {
Expand Down
Loading