Mintplex-Labs · yashschandra · Feb 19, 2025 · Feb 19, 2025 · Feb 24, 2025 · Feb 26, 2025
diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/SpeechToText/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/SpeechToText/index.jsx
@@ -10,7 +10,7 @@ import { useTranslation } from "react-i18next";
 
 let timeout;
 const SILENCE_INTERVAL = 3_200; // wait in seconds of silence before closing.
-export default function SpeechToText({ sendCommand }) {
+export default function SpeechToText({ sendSTTCommand }) {
   const {
     transcript,
     listening,
@@ -40,7 +40,7 @@ export default function SpeechToText({ sendCommand }) {
   function endSTTSession() {
     SpeechRecognition.stopListening();
     if (transcript.length > 0) {
-      sendCommand(transcript, true);
+      sendSTTCommand(transcript);
     }
 
     resetTranscript();
@@ -83,7 +83,6 @@ export default function SpeechToText({ sendCommand }) {
 
   useEffect(() => {
     if (transcript?.length > 0 && listening) {
-      sendCommand(transcript, false);
       clearTimeout(timeout);
       timeout = setTimeout(() => {
         endSTTSession();

diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx
@@ -17,6 +17,7 @@ import AttachmentManager from "./Attachments";
 import AttachItem from "./AttachItem";
 import { PASTE_ATTACHMENT_EVENT } from "../DnDWrapper";
 import useTextSize from "@/hooks/useTextSize";
+import System from "@/models/system";
 import { useTranslation } from "react-i18next";
 
 export const PROMPT_INPUT_EVENT = "set_prompt_input";
@@ -39,6 +40,7 @@ export default function PromptInput({
   const undoStack = useRef([]);
   const redoStack = useRef([]);
   const { textSizeClass } = useTextSize();
+  const [speechToTextAutoSubmit, setSpeechToTextAutoSubmit] = useState(true);
 
   /**
    * To prevent too many re-renders we remotely listen for updates from the parent
@@ -62,6 +64,14 @@ export default function PromptInput({
     resetTextAreaHeight();
   }, [isStreaming]);
 
+  useEffect(() => {
+    async function getSettings() {
+      const _settings = await System.keys();
+      setSpeechToTextAutoSubmit(_settings?.SpeechToTextAutoSubmit === "true");
+    }
+    getSettings();
+  }, []);
+
   /**
    * Save the current state before changes
    * @param {number} adjustment
@@ -202,26 +212,36 @@ export default function PromptInput({
 
     const pasteText = e.clipboardData.getData("text/plain");
     if (pasteText) {
-      const textarea = textareaRef.current;
-      const start = textarea.selectionStart;
-      const end = textarea.selectionEnd;
-      const newPromptInput =
-        promptInput.substring(0, start) +
-        pasteText +
-        promptInput.substring(end);
-      setPromptInput(newPromptInput);
-      onChange({ target: { value: newPromptInput } });
-
-      // Set the cursor position after the pasted text
-      // we need to use setTimeout to prevent the cursor from being set to the end of the text
-      setTimeout(() => {
-        textarea.selectionStart = textarea.selectionEnd =
-          start + pasteText.length;
-      }, 0);
+      addToInputPrompt(pasteText);
     }
     return;
   }
 
+  function addToInputPrompt(text) {
+    const textarea = textareaRef.current;
+    const start = textarea.selectionStart;
+    const end = textarea.selectionEnd;
+    const newPromptInput =
+      promptInput.substring(0, start) + text + promptInput.substring(end);
+    setPromptInput(newPromptInput);
+    onChange({ target: { value: newPromptInput } });
+
+    // Set the cursor position after the pasted text
+    // we need to use setTimeout to prevent the cursor from being set to the end of the text
+    setTimeout(() => {
+      textarea.selectionStart = textarea.selectionEnd =
+        start + text.length;
+    }, 0);
+  }
+
+  function sendSTTCommand(text) {
+    if (speechToTextAutoSubmit) {
+      sendCommand(text, true);
+    } else {
+      addToInputPrompt(text);
+    }
+  }
+
   function handleChange(e) {
     debouncedSaveState(-1);
     onChange(e);
@@ -312,7 +332,7 @@ export default function PromptInput({
                 <TextSizeButton />
               </div>
               <div className="flex gap-x-2">
-                <SpeechToText sendCommand={sendCommand} />
+                <SpeechToText sendSTTCommand={sendSTTCommand} />
               </div>
             </div>
           </div>

diff --git a/frontend/src/pages/GeneralSettings/AudioPreference/stt.jsx b/frontend/src/pages/GeneralSettings/AudioPreference/stt.jsx
@@ -27,11 +27,14 @@ export default function SpeechToTextProvider({ settings }) {
   );
   const [searchMenuOpen, setSearchMenuOpen] = useState(false);
   const searchInputRef = useRef(null);
+  const [autoSubmit, setAutoSubmit] = useState(
+    settings?.SpeechToTextAutoSubmit === "true"
+  );
 
   const handleSubmit = async (e) => {
     e.preventDefault();
     const form = e.target;
-    const data = { SpeechToTextProvider: selectedProvider };
+    const data = { SpeechToTextProvider: selectedProvider, SpeechToTextAutoSubmit: String(autoSubmit) };
     const formData = new FormData(form);
 
     for (var [key, value] of formData.entries()) data[key] = value;
@@ -54,6 +57,11 @@ export default function SpeechToTextProvider({ settings }) {
     setHasChanges(true);
   };
 
+  const updateAutoSubmitChoice = () => {
+    setAutoSubmit(!autoSubmit);
+    setHasChanges(true);
+  }
+
   const handleXButton = () => {
     if (searchQuery.length > 0) {
       setSearchQuery("");
@@ -185,6 +193,18 @@ export default function SpeechToTextProvider({ settings }) {
               (provider) => provider.value === selectedProvider
             )?.options(settings)}
         </div>
+        <div className="text-base font-bold text-white mt-6 mb-4">Autosubmit</div>
+        <div>
+          <p className="text-xs leading-[18px] font-base text-white text-opacity-60">
+            <input
+              onChange={updateAutoSubmitChoice}
+              type="checkbox"
+              checked={autoSubmit}
+            />&nbsp;
+            Here you can specify whether you want your speech to text prompt to be submitted automatically after 3s of silence
+            or if it should be submitted manually
+          </p>
+        </div>
       </div>
     </form>
   );

diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js
@@ -241,6 +241,8 @@ const SystemSettings = {
       // TTS/STT  Selection Settings & Configs
       // - Currently the only 3rd party is OpenAI or the native browser-built in
       // --------------------------------------------------------
+      SpeechToTextAutoSubmit: process.env.STT_AUTO_SUBMIT || "true",
+
       TextToSpeechProvider: process.env.TTS_PROVIDER || "native",
       TTSOpenAIKey: !!process.env.TTS_OPEN_AI_KEY,
       TTSOpenAIVoiceModel: process.env.TTS_OPEN_AI_VOICE_MODEL,

diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js
@@ -518,6 +518,11 @@ const KEY_MAPPING = {
   },
 
   // TTS/STT Integration ENVS
+  SpeechToTextAutoSubmit: {
+    envKey: "STT_AUTO_SUBMIT",
+    checks: [validBooleanString],
+  },
+
   TextToSpeechProvider: {
     envKey: "TTS_PROVIDER",
     checks: [supportedTTSProvider],
@@ -683,6 +688,14 @@ function validOllamaLLMBasePath(input = "") {
   }
 }
 
+function validBooleanString(input = "") {
+  const validSelection = [
+    "true",
+    "false",
+  ].includes(input);
+  return validSelection ? null : `${input} is not a valid boolean flag.`;
+}
+
 function supportedTTSProvider(input = "") {
   const validSelection = [
     "native",