+ Most TTS services will have several models available. This is the{" "}
+ model parameter you will use to select the model you
+ want to use. Note: This is not the same as the voice model.
+
+
Voice Model
diff --git a/server/.env.example b/server/.env.example
index 809e36ed8fd..12fa5ec226a 100644
--- a/server/.env.example
+++ b/server/.env.example
@@ -257,6 +257,7 @@ TTS_PROVIDER="native"
# TTS_PROVIDER="generic-openai"
# TTS_OPEN_AI_COMPATIBLE_KEY=sk-example
+# TTS_OPEN_AI_COMPATIBLE_MODEL=tts-1
# TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova
# TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1"
diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js
index bb7311fb857..04596e5b74d 100644
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@@ -256,6 +256,7 @@ const SystemSettings = {
process.env.TTS_PIPER_VOICE_MODEL ?? "en_US-hfc_female-medium",
// OpenAI Generic TTS
TTSOpenAICompatibleKey: !!process.env.TTS_OPEN_AI_COMPATIBLE_KEY,
+ TTSOpenAICompatibleModel: process.env.TTS_OPEN_AI_COMPATIBLE_MODEL,
TTSOpenAICompatibleVoiceModel:
process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL,
TTSOpenAICompatibleEndpoint: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,
diff --git a/server/utils/TextToSpeech/openAiGeneric/index.js b/server/utils/TextToSpeech/openAiGeneric/index.js
index df39e6348f8..5694ed33a38 100644
--- a/server/utils/TextToSpeech/openAiGeneric/index.js
+++ b/server/utils/TextToSpeech/openAiGeneric/index.js
@@ -4,6 +4,10 @@ class GenericOpenAiTTS {
this.#log(
"No OpenAI compatible API key was set. You might need to set this to use your OpenAI compatible TTS service."
);
+ if (!process.env.TTS_OPEN_AI_COMPATIBLE_MODEL)
+ this.#log(
+ "No OpenAI compatible TTS model was set. We will use the default voice model 'tts-1'. This may not exist or be valid your selected endpoint."
+ );
if (!process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL)
this.#log(
"No OpenAI compatible voice model was set. We will use the default voice model 'alloy'. This may not exist for your selected endpoint."
@@ -18,7 +22,11 @@ class GenericOpenAiTTS {
apiKey: process.env.TTS_OPEN_AI_COMPATIBLE_KEY || null,
baseURL: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,
});
+ this.model = process.env.TTS_OPEN_AI_COMPATIBLE_MODEL ?? "tts-1";
this.voice = process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL ?? "alloy";
+ this.#log(
+ `Service (${process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT}) with model: ${this.model} and voice: ${this.voice}`
+ );
}
#log(text, ...args) {
@@ -33,7 +41,7 @@ class GenericOpenAiTTS {
async ttsBuffer(textInput) {
try {
const result = await this.openai.audio.speech.create({
- model: "tts-1",
+ model: this.model,
voice: this.voice,
input: textInput,
});
diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js
index f64a042ce9a..87670830d9c 100644
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@@ -601,6 +601,10 @@ const KEY_MAPPING = {
envKey: "TTS_OPEN_AI_COMPATIBLE_KEY",
checks: [],
},
+ TTSOpenAICompatibleModel: {
+ envKey: "TTS_OPEN_AI_COMPATIBLE_MODEL",
+ checks: [],
+ },
TTSOpenAICompatibleVoiceModel: {
envKey: "TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL",
checks: [isNotEmpty],