From 40c214ae574c9da152826b5748322f9938d16001 Mon Sep 17 00:00:00 2001 From: timothycarambat Date: Mon, 13 May 2024 22:59:28 -0700 Subject: [PATCH 1/4] Add Speech-to-text and Text-to-speech providers --- .vscode/settings.json | 1 + docker/.env.example | 13 ++++ frontend/package.json | 1 + frontend/src/App.jsx | 7 ++ .../src/components/SettingsSidebar/index.jsx | 9 +++ .../HistoricalMessage/Actions/index.jsx | 65 +------------------ .../ChatContainer/PromptInput/index.jsx | 5 ++ frontend/src/models/system.js | 2 +- frontend/src/models/workspace.js | 17 ++++- frontend/src/utils/paths.js | 3 + frontend/yarn.lock | 5 ++ server/.env.example | 13 ++++ server/endpoints/workspaces.js | 50 +++++++++++++- server/models/systemSettings.js | 11 ++++ server/package.json | 1 + server/utils/helpers/customModels.js | 30 +++++++++ server/utils/helpers/updateENV.js | 31 +++++++++ server/yarn.lock | 65 ++++++++++++++++++- 18 files changed, 261 insertions(+), 68 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 110c4fa6ed5..4930aa2d153 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -11,6 +11,7 @@ "cooldowns", "Deduplicator", "Dockerized", + "elevenlabs", "Embeddable", "epub", "GROQ", diff --git a/docker/.env.example b/docker/.env.example index 8cfa2aea872..70059ea5135 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -171,6 +171,19 @@ GID='1000' # WHISPER_PROVIDER="openai" # OPEN_AI_KEY=sk-xxxxxxxx +########################################### +######## TTS/STT Model Selection ########## +########################################### +# TTS_PROVIDER="native" + +# TTS_PROVIDER="openai" +# TTS_OPEN_AI_KEY=sk-example +# TTS_OPEN_AI_VOICE_MODEL=nova + +# TTS_PROVIDER="elevenlabs" +# TTS_ELEVEN_LABS_KEY= +# TTS_ELEVEN_LABS_VOICE_MODEL=21m00Tcm4TlvDq8ikWAM # Rachel + # CLOUD DEPLOYMENT VARIRABLES ONLY # AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting. # DISABLE_TELEMETRY="false" diff --git a/frontend/package.json b/frontend/package.json index ded06aa9c77..11e612fcdfd 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -28,6 +28,7 @@ "react-dropzone": "^14.2.3", "react-loading-skeleton": "^3.1.0", "react-router-dom": "^6.3.0", + "react-speech-recognition": "^3.10.0", "react-tag-input-component": "^2.0.2", "react-toastify": "^9.1.3", "react-tooltip": "^5.25.2", diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index 0a5ed65fc85..b29e6eea925 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -32,6 +32,9 @@ const GeneralLLMPreference = lazy( const GeneralTranscriptionPreference = lazy( () => import("@/pages/GeneralSettings/TranscriptionPreference") ); +const GeneralAudioPreference = lazy( + () => import("@/pages/GeneralSettings/AudioPreference") +); const GeneralEmbeddingPreference = lazy( () => import("@/pages/GeneralSettings/EmbeddingPreference") ); @@ -85,6 +88,10 @@ export default function App() { } /> + } + /> } diff --git a/frontend/src/components/SettingsSidebar/index.jsx b/frontend/src/components/SettingsSidebar/index.jsx index 67797d26619..6b8f79e5edd 100644 --- a/frontend/src/components/SettingsSidebar/index.jsx +++ b/frontend/src/components/SettingsSidebar/index.jsx @@ -21,6 +21,7 @@ import { ClosedCaptioning, EyeSlash, SplitVertical, + Microphone, } from "@phosphor-icons/react"; import useUser from "@/hooks/useUser"; import { USER_BACKGROUND_COLOR } from "@/utils/constants"; @@ -280,6 +281,14 @@ const SidebarOptions = ({ user = null }) => ( flex={true} allowedRole={["admin"]} /> +