diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/SpeechToText/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/SpeechToText/index.jsx index ed4d0ac8505..a08e0561d8d 100644 --- a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/SpeechToText/index.jsx +++ b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/SpeechToText/index.jsx @@ -10,7 +10,7 @@ import { useTranslation } from "react-i18next"; let timeout; const SILENCE_INTERVAL = 3_200; // wait in seconds of silence before closing. -export default function SpeechToText({ sendCommand }) { +export default function SpeechToText({ sendSTTCommand }) { const { transcript, listening, @@ -40,7 +40,7 @@ export default function SpeechToText({ sendCommand }) { function endSTTSession() { SpeechRecognition.stopListening(); if (transcript.length > 0) { - sendCommand(transcript, true); + sendSTTCommand(transcript); } resetTranscript(); @@ -83,7 +83,6 @@ export default function SpeechToText({ sendCommand }) { useEffect(() => { if (transcript?.length > 0 && listening) { - sendCommand(transcript, false); clearTimeout(timeout); timeout = setTimeout(() => { endSTTSession(); diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx index f4711c4ab9c..57db3970535 100644 --- a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx +++ b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx @@ -17,6 +17,7 @@ import AttachmentManager from "./Attachments"; import AttachItem from "./AttachItem"; import { PASTE_ATTACHMENT_EVENT } from "../DnDWrapper"; import useTextSize from "@/hooks/useTextSize"; +import System from "@/models/system"; import { useTranslation } from "react-i18next"; export const PROMPT_INPUT_EVENT = "set_prompt_input"; @@ -39,6 +40,7 @@ export default function PromptInput({ const undoStack = useRef([]); const redoStack = useRef([]); const { textSizeClass } = useTextSize(); + const [speechToTextAutoSubmit, setSpeechToTextAutoSubmit] = useState(true); /** * To prevent too many re-renders we remotely listen for updates from the parent @@ -62,6 +64,14 @@ export default function PromptInput({ resetTextAreaHeight(); }, [isStreaming]); + useEffect(() => { + async function getSettings() { + const _settings = await System.keys(); + setSpeechToTextAutoSubmit(_settings?.SpeechToTextAutoSubmit === "true"); + } + getSettings(); + }, []); + /** * Save the current state before changes * @param {number} adjustment @@ -202,26 +212,36 @@ export default function PromptInput({ const pasteText = e.clipboardData.getData("text/plain"); if (pasteText) { - const textarea = textareaRef.current; - const start = textarea.selectionStart; - const end = textarea.selectionEnd; - const newPromptInput = - promptInput.substring(0, start) + - pasteText + - promptInput.substring(end); - setPromptInput(newPromptInput); - onChange({ target: { value: newPromptInput } }); - - // Set the cursor position after the pasted text - // we need to use setTimeout to prevent the cursor from being set to the end of the text - setTimeout(() => { - textarea.selectionStart = textarea.selectionEnd = - start + pasteText.length; - }, 0); + addToInputPrompt(pasteText); } return; } + function addToInputPrompt(text) { + const textarea = textareaRef.current; + const start = textarea.selectionStart; + const end = textarea.selectionEnd; + const newPromptInput = + promptInput.substring(0, start) + text + promptInput.substring(end); + setPromptInput(newPromptInput); + onChange({ target: { value: newPromptInput } }); + + // Set the cursor position after the pasted text + // we need to use setTimeout to prevent the cursor from being set to the end of the text + setTimeout(() => { + textarea.selectionStart = textarea.selectionEnd = + start + text.length; + }, 0); + } + + function sendSTTCommand(text) { + if (speechToTextAutoSubmit) { + sendCommand(text, true); + } else { + addToInputPrompt(text); + } + } + function handleChange(e) { debouncedSaveState(-1); onChange(e); @@ -312,7 +332,7 @@ export default function PromptInput({
- +
diff --git a/frontend/src/pages/GeneralSettings/AudioPreference/stt.jsx b/frontend/src/pages/GeneralSettings/AudioPreference/stt.jsx index 5d5b8995fdd..441c25dbb27 100644 --- a/frontend/src/pages/GeneralSettings/AudioPreference/stt.jsx +++ b/frontend/src/pages/GeneralSettings/AudioPreference/stt.jsx @@ -27,11 +27,14 @@ export default function SpeechToTextProvider({ settings }) { ); const [searchMenuOpen, setSearchMenuOpen] = useState(false); const searchInputRef = useRef(null); + const [autoSubmit, setAutoSubmit] = useState( + settings?.SpeechToTextAutoSubmit === "true" + ); const handleSubmit = async (e) => { e.preventDefault(); const form = e.target; - const data = { SpeechToTextProvider: selectedProvider }; + const data = { SpeechToTextProvider: selectedProvider, SpeechToTextAutoSubmit: String(autoSubmit) }; const formData = new FormData(form); for (var [key, value] of formData.entries()) data[key] = value; @@ -54,6 +57,11 @@ export default function SpeechToTextProvider({ settings }) { setHasChanges(true); }; + const updateAutoSubmitChoice = () => { + setAutoSubmit(!autoSubmit); + setHasChanges(true); + } + const handleXButton = () => { if (searchQuery.length > 0) { setSearchQuery(""); @@ -185,6 +193,18 @@ export default function SpeechToTextProvider({ settings }) { (provider) => provider.value === selectedProvider )?.options(settings)} +
Autosubmit
+
+

+   + Here you can specify whether you want your speech to text prompt to be submitted automatically after 3s of silence + or if it should be submitted manually +

+
); diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index bd811af1ca3..e04ee7d51e7 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -241,6 +241,8 @@ const SystemSettings = { // TTS/STT Selection Settings & Configs // - Currently the only 3rd party is OpenAI or the native browser-built in // -------------------------------------------------------- + SpeechToTextAutoSubmit: process.env.STT_AUTO_SUBMIT || "true", + TextToSpeechProvider: process.env.TTS_PROVIDER || "native", TTSOpenAIKey: !!process.env.TTS_OPEN_AI_KEY, TTSOpenAIVoiceModel: process.env.TTS_OPEN_AI_VOICE_MODEL, diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index 8d5825f64bd..90126293b43 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -518,6 +518,11 @@ const KEY_MAPPING = { }, // TTS/STT Integration ENVS + SpeechToTextAutoSubmit: { + envKey: "STT_AUTO_SUBMIT", + checks: [validBooleanString], + }, + TextToSpeechProvider: { envKey: "TTS_PROVIDER", checks: [supportedTTSProvider], @@ -683,6 +688,14 @@ function validOllamaLLMBasePath(input = "") { } } +function validBooleanString(input = "") { + const validSelection = [ + "true", + "false", + ].includes(input); + return validSelection ? null : `${input} is not a valid boolean flag.`; +} + function supportedTTSProvider(input = "") { const validSelection = [ "native",