θΏ™ζ˜―indexlocζδΎ›ηš„ζœεŠ‘οΌŒδΈθ¦θΎ“ε…₯任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"cooldowns",
"Deduplicator",
"Dockerized",
"elevenlabs",
"Embeddable",
"epub",
"GROQ",
Expand Down
13 changes: 13 additions & 0 deletions docker/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,19 @@ GID='1000'
# WHISPER_PROVIDER="openai"
# OPEN_AI_KEY=sk-xxxxxxxx

###########################################
######## TTS/STT Model Selection ##########
###########################################
# TTS_PROVIDER="native"

# TTS_PROVIDER="openai"
# TTS_OPEN_AI_KEY=sk-example
# TTS_OPEN_AI_VOICE_MODEL=nova

# TTS_PROVIDER="elevenlabs"
# TTS_ELEVEN_LABS_KEY=
# TTS_ELEVEN_LABS_VOICE_MODEL=21m00Tcm4TlvDq8ikWAM # Rachel

# CLOUD DEPLOYMENT VARIRABLES ONLY
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
# DISABLE_TELEMETRY="false"
Expand Down
1 change: 1 addition & 0 deletions frontend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"react-dropzone": "^14.2.3",
"react-loading-skeleton": "^3.1.0",
"react-router-dom": "^6.3.0",
"react-speech-recognition": "^3.10.0",
"react-tag-input-component": "^2.0.2",
"react-toastify": "^9.1.3",
"react-tooltip": "^5.25.2",
Expand Down
7 changes: 7 additions & 0 deletions frontend/src/App.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ const GeneralLLMPreference = lazy(
const GeneralTranscriptionPreference = lazy(
() => import("@/pages/GeneralSettings/TranscriptionPreference")
);
const GeneralAudioPreference = lazy(
() => import("@/pages/GeneralSettings/AudioPreference")
);
const GeneralEmbeddingPreference = lazy(
() => import("@/pages/GeneralSettings/EmbeddingPreference")
);
Expand Down Expand Up @@ -85,6 +88,10 @@ export default function App() {
<AdminRoute Component={GeneralTranscriptionPreference} />
}
/>
<Route
path="/settings/audio-preference"
element={<AdminRoute Component={GeneralAudioPreference} />}
/>
<Route
path="/settings/embedding-preference"
element={<AdminRoute Component={GeneralEmbeddingPreference} />}
Expand Down
9 changes: 9 additions & 0 deletions frontend/src/components/SettingsSidebar/index.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import {
ClosedCaptioning,
EyeSlash,
SplitVertical,
Microphone,
} from "@phosphor-icons/react";
import useUser from "@/hooks/useUser";
import { USER_BACKGROUND_COLOR } from "@/utils/constants";
Expand Down Expand Up @@ -280,6 +281,14 @@ const SidebarOptions = ({ user = null }) => (
flex={true}
allowedRole={["admin"]}
/>
<Option
href={paths.settings.audioPreference()}
btnText="Voice and Speech Support"
icon={<Microphone className="h-5 w-5 flex-shrink-0" />}
user={user}
flex={true}
allowedRole={["admin"]}
/>
<Option
href={paths.settings.transcriptionPreference()}
btnText="Transcription Model"
Expand Down
9 changes: 9 additions & 0 deletions frontend/src/components/SpeechToText/BrowserNative/index.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
export default function BrowserNative() {
return (
<div className="w-full h-10 items-center flex">
<p className="text-sm font-base text-white text-opacity-60">
There is no configuration needed for this provider.
</p>
</div>
);
}
9 changes: 9 additions & 0 deletions frontend/src/components/TextToSpeech/BrowserNative/index.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
export default function BrowserNative() {
return (
<div className="w-full h-10 items-center flex">
<p className="text-sm font-base text-white text-opacity-60">
There is no configuration needed for this provider.
</p>
</div>
);
}
107 changes: 107 additions & 0 deletions frontend/src/components/TextToSpeech/ElevenLabsOptions/index.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import { useState, useEffect } from "react";
import System from "@/models/system";

export default function ElevenLabsOptions({ settings }) {
const [inputValue, setInputValue] = useState(settings?.TTSElevenLabsKey);
const [openAIKey, setOpenAIKey] = useState(settings?.TTSElevenLabsKey);

return (
<div className="flex gap-x-4">
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
API Key
</label>
<input
type="password"
name="TTSElevenLabsKey"
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="ElevenLabs API Key"
defaultValue={settings?.TTSElevenLabsKey ? "*".repeat(20) : ""}
required={true}
autoComplete="off"
spellCheck={false}
onChange={(e) => setInputValue(e.target.value)}
onBlur={() => setOpenAIKey(inputValue)}
/>
</div>
{!settings?.credentialsOnly && (
<ElevenLabsModelSelection settings={settings} apiKey={openAIKey} />
)}
</div>
);
}

function ElevenLabsModelSelection({ apiKey, settings }) {
const [groupedModels, setGroupedModels] = useState({});
const [loading, setLoading] = useState(true);

useEffect(() => {
async function findCustomModels() {
setLoading(true);
const { models } = await System.customModels(
"elevenlabs-tts",
typeof apiKey === "boolean" ? null : apiKey
);

if (models?.length > 0) {
const modelsByOrganization = models.reduce((acc, model) => {
acc[model.organization] = acc[model.organization] || [];
acc[model.organization].push(model);
return acc;
}, {});
setGroupedModels(modelsByOrganization);
}

setLoading(false);
}
findCustomModels();
}, [apiKey]);

if (loading) {
return (
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Chat Model Selection
</label>
<select
name="TTSElevenLabsVoiceModel"
disabled={true}
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
<option disabled={true} selected={true}>
-- loading available models --
</option>
</select>
</div>
);
}

return (
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Chat Model Selection
</label>
<select
name="TTSElevenLabsVoiceModel"
required={true}
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
{Object.keys(groupedModels)
.sort()
.map((organization) => (
<optgroup key={organization} label={organization}>
{groupedModels[organization].map((model) => (
<option
key={model.id}
value={model.id}
selected={settings?.OpenAiModelPref === model.id}
>
{model.name}
</option>
))}
</optgroup>
))}
</select>
</div>
);
}
45 changes: 45 additions & 0 deletions frontend/src/components/TextToSpeech/OpenAiOptions/index.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
function toProperCase(string) {
return string.replace(/\w\S*/g, function (txt) {
return txt.charAt(0).toUpperCase() + txt.substr(1).toLowerCase();
});
}

export default function OpenAiTextToSpeechOptions({ settings }) {
const apiKey = settings?.TTSOpenAIKey;

return (
<div className="flex gap-x-4">
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
API Key
</label>
<input
type="password"
name="TTSOpenAIKey"
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="OpenAI API Key"
defaultValue={apiKey ? "*".repeat(20) : ""}
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Voice Model
</label>
<select
name="TTSOpenAIVoiceModel"
defaultValue={settings?.TTSOpenAIVoiceModel ?? "alloy"}
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
{["alloy", "echo", "fable", "onyx", "nova", "shimmer"].map(
(voice) => {
return <option value={voice}>{toProperCase(voice)}</option>;
}
)}
</select>
</div>
</div>
);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import { useEffect, useState, useRef } from "react";
import { SpeakerHigh, PauseCircle, CircleNotch } from "@phosphor-icons/react";
import { Tooltip } from "react-tooltip";
import Workspace from "@/models/workspace";
import showToast from "@/utils/toast";

export default function AsyncTTSMessage({ slug, chatId }) {
const playerRef = useRef(null);
const [speaking, setSpeaking] = useState(false);
const [loading, setLoading] = useState(false);
const [audioSrc, setAudioSrc] = useState(null);

function speakMessage() {
if (speaking) {
playerRef?.current?.pause();
return;
}

try {
if (!audioSrc) {
setLoading(true);
Workspace.ttsMessage(slug, chatId)
.then((audioBlob) => {
if (!audioBlob)
throw new Error("Failed to load or play TTS message response.");
setAudioSrc(audioBlob);
})
.catch((e) => showToast(e.message, "error", { clear: true }))
.finally(() => setLoading(false));
} else {
playerRef.current.play();
}
} catch (e) {
console.error(e);
setLoading(false);
setSpeaking(false);
}
}

useEffect(() => {
function setupPlayer() {
if (!playerRef?.current) return;
playerRef.current.addEventListener("play", () => {
setSpeaking(true);
});

playerRef.current.addEventListener("pause", () => {
playerRef.current.currentTime = 0;
setSpeaking(false);
});
}
setupPlayer();
}, []);

if (!chatId) return null;
return (
<div className="mt-3 relative">
<button
onClick={speakMessage}
data-tooltip-id="message-to-speech"
data-tooltip-content={
speaking ? "Pause TTS speech of message" : "TTS Speak message"
}
className="border-none text-zinc-300"
aria-label={speaking ? "Pause speech" : "Speak message"}
>
{speaking ? (
<PauseCircle size={18} className="mb-1" />
) : (
<>
{loading ? (
<CircleNotch size={18} className="mb-1 animate-spin" />
) : (
<SpeakerHigh size={18} className="mb-1" />
)}
</>
)}
<audio
ref={playerRef}
hidden={true}
src={audioSrc}
autoPlay={true}
controls={false}
/>
</button>
<Tooltip
id="message-to-speech"
place="bottom"
delayShow={300}
className="tooltip !text-xs"
/>
</div>
);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import { useEffect, useState } from "react";
import NativeTTSMessage from "./native";
import AsyncTTSMessage from "./asyncTts";
import System from "@/models/system";

export default function TTSMessage({ slug, chatId, message }) {
const [provider, setProvider] = useState("native");
const [loading, setLoading] = useState(true);

useEffect(() => {
async function getSettings() {
const _settings = await System.keys();
setProvider(_settings?.TextToSpeechProvider ?? "native");
setLoading(false);
}
getSettings();
}, []);

if (loading) return null;
if (provider !== "native")
return <AsyncTTSMessage slug={slug} chatId={chatId} />;
return <NativeTTSMessage message={message} />;
}
Loading