diff --git a/flake.lock b/flake.lock index 7468d6da..a649ed0b 100644 --- a/flake.lock +++ b/flake.lock @@ -216,11 +216,11 @@ }, "nixpkgs_2": { "locked": { - "lastModified": 1755704039, - "narHash": "sha256-gKlP0LbyJ3qX0KObfIWcp5nbuHSb5EHwIvU6UcNBg2A=", + "lastModified": 1757408970, + "narHash": "sha256-aSgK4BLNFFGvDTNKPeB28lVXYqVn8RdyXDNAvgGq+k0=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "9cb344e96d5b6918e94e1bca2d9f3ea1e9615545", + "rev": "d179d77c139e0a3f5c416477f7747e9d6b7ec315", "type": "github" }, "original": { diff --git a/public/i18n/en/translation.json b/public/i18n/en/translation.json index d281653f..9f7a5aef 100644 --- a/public/i18n/en/translation.json +++ b/public/i18n/en/translation.json @@ -84,6 +84,7 @@ "whisper_vad_silence": "VAD Silence", "whisper_silence_note": "Duration in ms to wait for to detect end of speech segment. Default: 240", "whisper_use_gpu": "GPU Acceleration", + "whisper_speaking_prompt": "Toggle Speaking Prompt", "deepgram_title": "Deepgram", "deepgram_key": "Key", diff --git a/public/i18n/zh_cn/translation.json b/public/i18n/zh_cn/translation.json index 61ed6e77..00eed6ff 100644 --- a/public/i18n/zh_cn/translation.json +++ b/public/i18n/zh_cn/translation.json @@ -76,6 +76,15 @@ "speechly_title": "Speechly", "speechly_appid": "App ID", + "whisper_title": "Whisper", + "whisper_model_path": "Whisper 模型", + "whisper_translate_to_english": "翻译为英文", + "whisper_lang_desc_note": "注意:质量因选择的语言和模型而异", + "whisper_vad_silence": "VAD 静音", + "whisper_silence_note": "等待检测语音段落结尾的毫秒数。默认值:240", + "whisper_use_gpu": "GPU 加速", + "whisper_speaking_prompt": "切换说话提示", + "deepgram_title": "Deepgram", "deepgram_key": "Key", "deepgram_quality": "质量", diff --git a/public/i18n/zh_tw/translation.json b/public/i18n/zh_tw/translation.json index 646ce27e..910c6f55 100644 --- a/public/i18n/zh_tw/translation.json +++ b/public/i18n/zh_tw/translation.json @@ -76,6 +76,15 @@ "speechly_title": "Speechly", "speechly_appid": "App ID", + "whisper_title": "Whisper", + "whisper_model_path": "Whisper 模型", + "whisper_translate_to_english": "翻譯為英文", + "whisper_lang_desc_note": "注意:質量因選擇的語言和模型而異", + "whisper_vad_silence": "VAD 靜音", + "whisper_silence_note": "等待檢測語音段落結尾的毫秒數。默認值:240", + "whisper_use_gpu": "GPU 加速", + "whisper_speaking_prompt": "切換說話提示", + "deepgram_title": "Deepgram", "deepgram_key": "Key", "deepgram_quality": "質量", diff --git a/src-tauri/src/services/whisper_stt/mod.rs b/src-tauri/src/services/whisper_stt/mod.rs index ec7d3b86..fc38c9f3 100644 --- a/src-tauri/src/services/whisper_stt/mod.rs +++ b/src-tauri/src/services/whisper_stt/mod.rs @@ -65,6 +65,7 @@ pub struct WhisperArgs { translate_to_english: bool, silence_interval: u64, use_gpu: bool, + show_speaking_prompt: bool, } pub fn init() -> plugin::TauriPlugin { @@ -200,7 +201,8 @@ pub async fn start(app: AppHandle, args: WhisperArgs) -> Result<( event = activity_rx.next() => { match event { Some(VadActivity::SpeechStart) => { - if app.emit("whisper_stt_interim", "[speaking]").is_err() { + let interim_text = if args.show_speaking_prompt { "[speaking]" } else { "" }; + if app.emit("whisper_stt_interim", interim_text).is_err() { eprintln!("wasn't able to emit to frontend {}:{}", file!(), line!()); } }, diff --git a/src/server/services/stt/schema.ts b/src/server/services/stt/schema.ts index 7fcbdbf4..4ee9326c 100644 --- a/src/server/services/stt/schema.ts +++ b/src/server/services/stt/schema.ts @@ -49,6 +49,7 @@ export const Service_STT_Schema = z.object({ translateToEnglish: zSafe(z.coerce.boolean(), false), silenceInterval: zSafe(zStringNumber(), "240"), useGpu: zSafe(z.coerce.boolean(), true), + showSpeakingPrompt: zSafe(z.coerce.boolean(), true), }).default({}), deepgram: z.object({ device: zSafe(z.coerce.string(), "default"), diff --git a/src/server/services/stt/services/whisper.ts b/src/server/services/stt/services/whisper.ts index 092845fc..28298eeb 100644 --- a/src/server/services/stt/services/whisper.ts +++ b/src/server/services/stt/services/whisper.ts @@ -43,6 +43,7 @@ export class STT_WhisperService implements ISTTService { translateToEnglish: this.state.translateToEnglish, silenceInterval: parseInt(this.state.silenceInterval), useGpu: this.state.useGpu, + showSpeakingPrompt: this.state.showSpeakingPrompt, }, }).catch(err => { this.#initialized = false; diff --git a/src/server/ui/inspector/inspector_stt.tsx b/src/server/ui/inspector/inspector_stt.tsx index 3c906c32..58b979a1 100644 --- a/src/server/ui/inspector/inspector_stt.tsx +++ b/src/server/ui/inspector/inspector_stt.tsx @@ -190,6 +190,7 @@ const Whisper: FC = () => { handleUpdate("silenceInterval", e.target.value)} /> {t('stt.whisper_silence_note')} handleUpdate("useGpu", e)} value={data.useGpu}/> + handleUpdate("showSpeakingPrompt", e)} value={data.showSpeakingPrompt}/>