diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index fa15f6b..d51c795 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -138,6 +138,16 @@ jobs: - name: Typecheck run: pnpm typecheck + - name: Prepare Kokoro TTS assets + shell: bash + env: + GOAGENT_TTS_ASSETS_STRICT: "1" + GOAGENT_TTS_SMOKE_STRICT: "1" + run: | + pnpm prepare:tts-assets + pnpm check:tts-assets + pnpm smoke:tts + - name: Prepare KataGo assets shell: bash env: @@ -333,6 +343,16 @@ jobs: - name: Typecheck run: pnpm typecheck + - name: Prepare Kokoro TTS assets + shell: bash + env: + GOAGENT_TTS_ASSETS_STRICT: "1" + GOAGENT_TTS_SMOKE_STRICT: "1" + run: | + pnpm prepare:tts-assets + pnpm check:tts-assets + pnpm smoke:tts + - name: Prepare NVIDIA KataGo assets shell: bash env: diff --git a/.gitignore b/.gitignore index 01c16ed..2c3d9b7 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ release .DS_Store *.log .goagent +.goagent-smoke __pycache__ *.pyc coverage @@ -24,3 +25,7 @@ data/katago/bin/** data/katago/models/** data/katago/edition.json !data/katago/README.md + +# Large local Kokoro TTS model files are prepared by scripts for release packaging. +# Voices and metadata are small enough to version; the ONNX model is not. +data/tts/kokoro/**/onnx/*.onnx diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a7b7ac..3b6d9ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,20 @@ All notable changes to GoAgent will be documented here. This project follows semantic versioning once public releases begin. +## 0.3.16 - Strict Offline Kokoro TTS + +### Added + +- Added a strict selected-provider TTS system with bundled Kokoro zh-CN offline synthesis as the default provider. +- Added teacher answer playback controls and a TTS settings panel. +- Added custom OpenAI-compatible, HTTP JSON, and local-service TTS providers that only run when explicitly selected. +- Added Kokoro asset preparation, validation, provider-policy checks, and real strict offline synthesis smoke testing. + +### Changed + +- Updated release packaging so GitHub Release builds prepare Kokoro ONNX assets before packaging installers. +- Kept the no-fallback policy: no system voice, no Web Speech, no provider chain, and no automatic provider switching. + ## 0.3.15 - GoAgent Brand Identity ### Changed diff --git a/README.md b/README.md index b5c7641..5cad794 100644 --- a/README.md +++ b/README.md @@ -49,16 +49,16 @@ KataGo 是事实裁判,LLM 是讲棋老师。GoAgent 的目标是让学生不 当前发布版本: -[GoAgent v0.3.15](https://github.com/wimi321/GoAgent/releases/tag/v0.3.15) +[GoAgent v0.3.16](https://github.com/wimi321/GoAgent/releases/tag/v0.3.16) | 平台 | 下载 | | --- | --- | -| macOS Apple Silicon | [GoAgent-0.3.15-mac-arm64.dmg](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-mac-arm64.dmg) | -| macOS Intel | [GoAgent-0.3.15-mac-x64.dmg](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-mac-x64.dmg) | -| Windows x64 免安装 ZIP | [GoAgent-0.3.15-win-x64-portable.zip](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-win-x64-portable.zip) | -| Windows x64 安装版 | [GoAgent-0.3.15-win-x64.exe](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-win-x64.exe) | -| Windows NVIDIA 免安装 ZIP | [GoAgent-0.3.15-win-x64-nvidia-portable.zip](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-win-x64-nvidia-portable.zip) | -| Windows NVIDIA 安装版 | [GoAgent-0.3.15-win-x64-nvidia.exe](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-win-x64-nvidia.exe) | +| macOS Apple Silicon | [GoAgent-0.3.16-mac-arm64.dmg](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-mac-arm64.dmg) | +| macOS Intel | [GoAgent-0.3.16-mac-x64.dmg](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-mac-x64.dmg) | +| Windows x64 免安装 ZIP | [GoAgent-0.3.16-win-x64-portable.zip](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-win-x64-portable.zip) | +| Windows x64 安装版 | [GoAgent-0.3.16-win-x64.exe](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-win-x64.exe) | +| Windows NVIDIA 免安装 ZIP | [GoAgent-0.3.16-win-x64-nvidia-portable.zip](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-win-x64-nvidia-portable.zip) | +| Windows NVIDIA 安装版 | [GoAgent-0.3.16-win-x64-nvidia.exe](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-win-x64-nvidia.exe) | 发布说明: diff --git a/README_EN.md b/README_EN.md index 2370e70..77bae87 100644 --- a/README_EN.md +++ b/README_EN.md @@ -49,14 +49,14 @@ KataGo is the source of truth. The LLM is the teacher that turns those facts int Current release: -[GoAgent v0.3.15](https://github.com/wimi321/GoAgent/releases/tag/v0.3.15) +[GoAgent v0.3.16](https://github.com/wimi321/GoAgent/releases/tag/v0.3.16) | Platform | Download | | --- | --- | -| macOS Apple Silicon | [GoAgent-0.3.15-mac-arm64.dmg](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-mac-arm64.dmg) | -| macOS Intel | [GoAgent-0.3.15-mac-x64.dmg](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-mac-x64.dmg) | -| Windows x64 portable ZIP | [GoAgent-0.3.15-win-x64-portable.zip](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-win-x64-portable.zip) | -| Windows x64 installer | [GoAgent-0.3.15-win-x64.exe](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-win-x64.exe) | +| macOS Apple Silicon | [GoAgent-0.3.16-mac-arm64.dmg](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-mac-arm64.dmg) | +| macOS Intel | [GoAgent-0.3.16-mac-x64.dmg](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-mac-x64.dmg) | +| Windows x64 portable ZIP | [GoAgent-0.3.16-win-x64-portable.zip](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-win-x64-portable.zip) | +| Windows x64 installer | [GoAgent-0.3.16-win-x64.exe](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-win-x64.exe) | Release caveats: diff --git a/README_JA.md b/README_JA.md index 3b6685d..1e28b11 100644 --- a/README_JA.md +++ b/README_JA.md @@ -39,14 +39,14 @@ GoAgent は、KataGo、棋盤スクリーンショット、ローカル知識カ 公開リリース: -[GoAgent v0.3.15](https://github.com/wimi321/GoAgent/releases/tag/v0.3.15) +[GoAgent v0.3.16](https://github.com/wimi321/GoAgent/releases/tag/v0.3.16) | プラットフォーム | ダウンロード | | --- | --- | -| macOS Apple Silicon | [DMG](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-mac-arm64.dmg) | -| macOS Intel | [DMG](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-mac-x64.dmg) | -| Windows x64 portable ZIP | [ZIP](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-win-x64-portable.zip) | -| Windows x64 installer | [EXE](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-win-x64.exe) | +| macOS Apple Silicon | [DMG](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-mac-arm64.dmg) | +| macOS Intel | [DMG](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-mac-x64.dmg) | +| Windows x64 portable ZIP | [ZIP](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-win-x64-portable.zip) | +| Windows x64 installer | [EXE](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-win-x64.exe) | 注意:現在のリリースは未署名です。macOS Gatekeeper や Windows SmartScreen の警告が表示される場合があります。 diff --git a/README_KO.md b/README_KO.md index ce23c56..74ff3cc 100644 --- a/README_KO.md +++ b/README_KO.md @@ -39,14 +39,14 @@ GoAgent는 KataGo, 바둑판 스크린샷, 로컬 지식 카드, 학생 프로 공개 릴리스: -[GoAgent v0.3.15](https://github.com/wimi321/GoAgent/releases/tag/v0.3.15) +[GoAgent v0.3.16](https://github.com/wimi321/GoAgent/releases/tag/v0.3.16) | 플랫폼 | 다운로드 | | --- | --- | -| macOS Apple Silicon | [DMG](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-mac-arm64.dmg) | -| macOS Intel | [DMG](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-mac-x64.dmg) | -| Windows x64 portable ZIP | [ZIP](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-win-x64-portable.zip) | -| Windows x64 installer | [EXE](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-win-x64.exe) | +| macOS Apple Silicon | [DMG](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-mac-arm64.dmg) | +| macOS Intel | [DMG](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-mac-x64.dmg) | +| Windows x64 portable ZIP | [ZIP](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-win-x64-portable.zip) | +| Windows x64 installer | [EXE](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-win-x64.exe) | 현재 릴리스 패키지는 서명되지 않았으므로 macOS Gatekeeper 또는 Windows SmartScreen 경고가 표시될 수 있습니다. diff --git a/README_TH.md b/README_TH.md index 723b8d4..77199f3 100644 --- a/README_TH.md +++ b/README_TH.md @@ -39,14 +39,14 @@ GoAgent เป็นแอปเดสก์ท็อปแบบ local-first รุ่นเผยแพร่สาธารณะ: -[GoAgent v0.3.15](https://github.com/wimi321/GoAgent/releases/tag/v0.3.15) +[GoAgent v0.3.16](https://github.com/wimi321/GoAgent/releases/tag/v0.3.16) | แพลตฟอร์ม | ดาวน์โหลด | | --- | --- | -| macOS Apple Silicon | [DMG](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-mac-arm64.dmg) | -| macOS Intel | [DMG](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-mac-x64.dmg) | -| Windows x64 portable ZIP | [ZIP](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-win-x64-portable.zip) | -| Windows x64 installer | [EXE](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-win-x64.exe) | +| macOS Apple Silicon | [DMG](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-mac-arm64.dmg) | +| macOS Intel | [DMG](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-mac-x64.dmg) | +| Windows x64 portable ZIP | [ZIP](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-win-x64-portable.zip) | +| Windows x64 installer | [EXE](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-win-x64.exe) | หมายเหตุ: รุ่นนี้ยังไม่ได้ signed/notarized บน macOS และยังไม่ได้ code-signed บน Windows จึงอาจมีคำเตือนจากระบบปฏิบัติการ diff --git a/README_VI.md b/README_VI.md index e367c72..19cfbe6 100644 --- a/README_VI.md +++ b/README_VI.md @@ -39,14 +39,14 @@ GoAgent là ứng dụng desktop local-first cho người học và giáo viên Bản phát hành công khai: -[GoAgent v0.3.15](https://github.com/wimi321/GoAgent/releases/tag/v0.3.15) +[GoAgent v0.3.16](https://github.com/wimi321/GoAgent/releases/tag/v0.3.16) | Nền tảng | Tải xuống | | --- | --- | -| macOS Apple Silicon | [DMG](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-mac-arm64.dmg) | -| macOS Intel | [DMG](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-mac-x64.dmg) | -| Windows x64 portable ZIP | [ZIP](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-win-x64-portable.zip) | -| Windows x64 installer | [EXE](https://github.com/wimi321/GoAgent/releases/download/v0.3.15/GoAgent-0.3.15-win-x64.exe) | +| macOS Apple Silicon | [DMG](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-mac-arm64.dmg) | +| macOS Intel | [DMG](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-mac-x64.dmg) | +| Windows x64 portable ZIP | [ZIP](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-win-x64-portable.zip) | +| Windows x64 installer | [EXE](https://github.com/wimi321/GoAgent/releases/download/v0.3.16/GoAgent-0.3.16-win-x64.exe) | Lưu ý: bản phát hành hiện chưa được ký và notarize trên macOS, cũng chưa được code-sign trên Windows, vì vậy hệ điều hành có thể hiển thị cảnh báo bảo mật. diff --git a/data/tts/kokoro/zh-CN/LICENSE b/data/tts/kokoro/zh-CN/LICENSE new file mode 100644 index 0000000..ac34041 --- /dev/null +++ b/data/tts/kokoro/zh-CN/LICENSE @@ -0,0 +1,9 @@ +Kokoro-82M-v1.1-zh-ONNX source notice + +Model repository: onnx-community/Kokoro-82M-v1.1-zh-ONNX +License stated by upstream model card: Apache-2.0 +Default model file: onnx/model_int8.onnx + +This directory is a release asset target. The large ONNX and voice files are +prepared by scripts/prepare_tts_assets.mjs and should be verified by +scripts/check_tts_assets.mjs before release packaging. diff --git a/data/tts/kokoro/zh-CN/MODEL_CARD.md b/data/tts/kokoro/zh-CN/MODEL_CARD.md new file mode 100644 index 0000000..b746cf7 --- /dev/null +++ b/data/tts/kokoro/zh-CN/MODEL_CARD.md @@ -0,0 +1,15 @@ +# Kokoro Chinese ONNX bundle + +GoAgent bundles a local Kokoro Chinese ONNX voice pack for offline teacher speech. + +- Upstream model: `onnx-community/Kokoro-82M-v1.1-zh-ONNX` +- License: Apache-2.0 as stated by the upstream model card +- Default quantized model: `onnx/model_int8.onnx` +- Expected model SHA256: `58b9b997faeaf42b427bac24c8a6246b236b0561311f6b118318cd9d2f47acb1` + +Large binary assets are not stored in ordinary source patches. Run: + +```bash +pnpm prepare:tts-assets +pnpm check:tts-assets +``` diff --git a/data/tts/kokoro/zh-CN/config.json b/data/tts/kokoro/zh-CN/config.json new file mode 100644 index 0000000..790faf2 --- /dev/null +++ b/data/tts/kokoro/zh-CN/config.json @@ -0,0 +1,3 @@ +{ + "model_type": "style_text_to_speech_2" +} \ No newline at end of file diff --git a/data/tts/kokoro/zh-CN/manifest.json b/data/tts/kokoro/zh-CN/manifest.json new file mode 100644 index 0000000..ea519d1 --- /dev/null +++ b/data/tts/kokoro/zh-CN/manifest.json @@ -0,0 +1,21 @@ +{ + "id": "kokoro-zh-cn-int8", + "provider": "kokoro-bundled", + "language": "zh-CN", + "modelRepo": "onnx-community/Kokoro-82M-v1.1-zh-ONNX", + "modelRevision": "6cc0f0d2ebe369a68b0df87c2b65c1af8c0ac3e3", + "modelFile": "onnx/model_int8.onnx", + "modelSha256": "58b9b997faeaf42b427bac24c8a6246b236b0561311f6b118318cd9d2f47acb1", + "runtimeModelFile": "onnx/model_quantized.onnx", + "runtimeModelSha256": "a39469be791eeaa3089c1ed5e58b8731d1f2462ea0e7dae2bc44388e58f973d8", + "modelSizeMb": 127, + "license": "Apache-2.0", + "offline": true, + "bundled": true, + "defaultVoiceId": "zf_001", + "voices": [ + { "id": "zf_001", "label": "中文女声 001", "file": "voices/zf_001.bin", "sha256": "0a89ec12bb93fb9c74077924daf02568baad64e1f869389f5aaee01a386035f8" }, + { "id": "zm_009", "label": "中文男声 009", "file": "voices/zm_009.bin", "sha256": "7b74d6ed22f201e2fa28758e78ce6197082779f2b80e69ea1bf877908609514a" } + ], + "sourcePolicy": "Bundled Kokoro provider must use this selected local asset only. No automatic provider switch is allowed." +} diff --git a/data/tts/kokoro/zh-CN/onnx/.gitkeep b/data/tts/kokoro/zh-CN/onnx/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/tts/kokoro/zh-CN/tokenizer.json b/data/tts/kokoro/zh-CN/tokenizer.json new file mode 100644 index 0000000..247f772 --- /dev/null +++ b/data/tts/kokoro/zh-CN/tokenizer.json @@ -0,0 +1,232 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [], + "normalizer": { + "type": "Replace", + "pattern": { + "Regex": "[^ !\"(),./12345:;?AIOQRSTWYabcdefhijklmnopqrstuvwxyz\u00e6\u00e7\u00f0\u00f8\u014b\u0153\u0250\u0251\u0252\u0254\u0255\u0256\u0259\u025b\u025c\u025f\u0261\u0263\u0268\u026a\u026f\u0270\u0272\u0273\u0274\u0278\u0279\u027d\u027e\u0281\u0282\u0283\u0288\u028a\u028b\u028c\u028e\u0292\u0294\u029d\u02a3\u02a4\u02a5\u02a6\u02a7\u02a8\u02b0\u02b2\u02c8\u02cc\u02d0\u0303\u03b2\u03b8\u03c7\u1d4a\u1d5d\u1d7b\u2014\u201c\u201d\u2026\u3105\u3106\u3107\u3108\u3109\u310a\u310b\u310c\u310d\u310e\u310f\u3110\u3111\u3112\u3113\u3114\u3115\u3116\u3117\u3118\u3119\u311a\u311b\u311c\u311d\u311e\u311f\u3120\u3121\u3122\u3123\u3124\u3125\u3126\u3127\u3128\u3129\u312d\u4e07\u4e2d\u4e3a\u4e91\u5143\u5341\u538b\u53c8\u5916\u5e94\u6211\u6587\u6708\u738b\u74ee\u7528\u7a75\u8981\u8a00\u9633\u9634]" + }, + "content": "" + }, + "pre_tokenizer": { + "type": "Split", + "pattern": { + "Regex": "" + }, + "behavior": "Isolated", + "invert": false + }, + "post_processor": { + "type": "TemplateProcessing", + "single": [ + { + "SpecialToken": { + "id": "$", + "type_id": 0 + } + }, + { + "Sequence": { + "id": "A", + "type_id": 0 + } + }, + { + "SpecialToken": { + "id": "$", + "type_id": 0 + } + } + ], + "special_tokens": { + "$": { + "id": "$", + "ids": [ + 0 + ], + "tokens": [ + "$" + ] + } + } + }, + "decoder": null, + "model": { + "vocab": { + "$": 0, + ";": 1, + ":": 2, + ",": 3, + ".": 4, + "!": 5, + "?": 6, + "/": 7, + "\u2014": 9, + "\u2026": 10, + "\"": 11, + "(": 12, + ")": 13, + "\u201c": 14, + "\u201d": 15, + " ": 16, + "\u0303": 17, + "\u02a3": 18, + "\u02a5": 19, + "\u02a6": 20, + "\u02a8": 21, + "\u1d5d": 22, + "\u3113": 23, + "A": 24, + "I": 25, + "\u3105": 30, + "O": 31, + "\u3106": 32, + "Q": 33, + "R": 34, + "S": 35, + "T": 36, + "\u3107": 37, + "\u3108": 38, + "W": 39, + "\u3109": 40, + "Y": 41, + "\u1d4a": 42, + "a": 43, + "b": 44, + "c": 45, + "d": 46, + "e": 47, + "f": 48, + "\u310a": 49, + "h": 50, + "i": 51, + "j": 52, + "k": 53, + "l": 54, + "m": 55, + "n": 56, + "o": 57, + "p": 58, + "q": 59, + "r": 60, + "s": 61, + "t": 62, + "u": 63, + "v": 64, + "w": 65, + "x": 66, + "y": 67, + "z": 68, + "\u0251": 69, + "\u0250": 70, + "\u0252": 71, + "\u00e6": 72, + "\u310b": 73, + "\u310c": 74, + "\u03b2": 75, + "\u0254": 76, + "\u0255": 77, + "\u00e7": 78, + "\u310d": 79, + "\u0256": 80, + "\u00f0": 81, + "\u02a4": 82, + "\u0259": 83, + "\u310e": 84, + "\u3126": 85, + "\u025b": 86, + "\u025c": 87, + "\u310f": 88, + "\u3110": 89, + "\u025f": 90, + "\u3111": 91, + "\u0261": 92, + "\u3112": 93, + "\u3114": 94, + "\u3115": 95, + "\u3117": 96, + "\u3118": 97, + "\u3119": 98, + "\u6708": 99, + "\u311a": 100, + "\u0268": 101, + "\u026a": 102, + "\u029d": 103, + "\u311b": 104, + "\u311d": 105, + "\u311e": 106, + "\u311f": 107, + "\u3120": 108, + "\u3121": 109, + "\u026f": 110, + "\u0270": 111, + "\u014b": 112, + "\u0273": 113, + "\u0272": 114, + "\u0274": 115, + "\u00f8": 116, + "\u3122": 117, + "\u0278": 118, + "\u03b8": 119, + "\u0153": 120, + "\u3123": 121, + "\u3124": 122, + "\u0279": 123, + "\u3125": 124, + "\u027e": 125, + "\u3116": 126, + "\u3127": 127, + "\u0281": 128, + "\u027d": 129, + "\u0282": 130, + "\u0283": 131, + "\u0288": 132, + "\u02a7": 133, + "\u3128": 134, + "\u028a": 135, + "\u028b": 136, + "\u3129": 137, + "\u028c": 138, + "\u0263": 139, + "\u311c": 140, + "\u312d": 141, + "\u03c7": 142, + "\u028e": 143, + "\u5341": 144, + "\u538b": 145, + "\u8a00": 146, + "\u0292": 147, + "\u0294": 148, + "\u9633": 149, + "\u8981": 150, + "\u9634": 151, + "\u5e94": 152, + "\u7528": 153, + "\u53c8": 154, + "\u4e2d": 155, + "\u02c8": 156, + "\u02cc": 157, + "\u02d0": 158, + "\u7a75": 159, + "\u5916": 160, + "\u4e07": 161, + "\u02b0": 162, + "\u738b": 163, + "\u02b2": 164, + "\u4e3a": 165, + "\u6587": 166, + "\u74ee": 167, + "\u6211": 168, + "3": 169, + "5": 170, + "1": 171, + "2": 172, + "4": 173, + "\u5143": 175, + "\u4e91": 176, + "\u1d7b": 177 + } + } +} diff --git a/data/tts/kokoro/zh-CN/tokenizer_config.json b/data/tts/kokoro/zh-CN/tokenizer_config.json new file mode 100644 index 0000000..5c81e9a --- /dev/null +++ b/data/tts/kokoro/zh-CN/tokenizer_config.json @@ -0,0 +1,6 @@ +{ + "model_max_length": 512, + "pad_token": "$", + "tokenizer_class": "PreTrainedTokenizer", + "unk_token": "$" +} \ No newline at end of file diff --git a/data/tts/kokoro/zh-CN/voices/.gitkeep b/data/tts/kokoro/zh-CN/voices/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/tts/kokoro/zh-CN/voices/zf_001.bin b/data/tts/kokoro/zh-CN/voices/zf_001.bin new file mode 100644 index 0000000..236a2a1 Binary files /dev/null and b/data/tts/kokoro/zh-CN/voices/zf_001.bin differ diff --git a/data/tts/kokoro/zh-CN/voices/zm_009.bin b/data/tts/kokoro/zh-CN/voices/zm_009.bin new file mode 100644 index 0000000..1f68cf8 Binary files /dev/null and b/data/tts/kokoro/zh-CN/voices/zm_009.bin differ diff --git a/docs/RELEASE_NOTES_v0.3.16.md b/docs/RELEASE_NOTES_v0.3.16.md new file mode 100644 index 0000000..24394f6 --- /dev/null +++ b/docs/RELEASE_NOTES_v0.3.16.md @@ -0,0 +1,153 @@ +# GoAgent v0.3.16 + +GoAgent v0.3.16 adds strict offline Kokoro Chinese TTS for AI teacher answers. The default speech provider is bundled Kokoro zh-CN local synthesis, with no system voice, no Web Speech, no fallback chain, and no automatic provider switching. If users select a custom TTS provider, GoAgent uses only that selected provider and reports a clear error if it fails. + +QQ群:1030632742,欢迎一起交流、提建议、完善 GoAgent。 + +## 中文 + +### 下载前先选版本 + +| 平台 / 场景 | 推荐下载 | +| --- | --- | +| macOS Apple Silicon | `GoAgent-0.3.16-mac-arm64.dmg` | +| macOS Intel | `GoAgent-0.3.16-mac-x64.dmg` | +| Windows 普通版,OpenCL 推荐包 | `GoAgent-0.3.16-win-x64.exe` 或 `GoAgent-0.3.16-win-x64-portable.zip` | +| Windows NVIDIA 专版,适合 NVIDIA 显卡和 CUDA 环境 | `GoAgent-0.3.16-win-x64-nvidia.exe` 或 `GoAgent-0.3.16-win-x64-nvidia-portable.zip` | +| 校验文件 | `SHA256SUMS.txt` | + +### 本版重点 + +- 新增默认打包的 Kokoro 中文离线神经 TTS,老师回答可以直接本机朗读。 +- 新增语音朗读控制和设置面板,支持播放、暂停、继续、停止。 +- 新增严格 selected-provider TTS 策略:选哪个 provider 就只用哪个 provider。 +- 自定义 OpenAI-compatible / HTTP JSON / 本地 TTS 服务只在用户显式选择时调用。 +- Release workflow 会在打包前准备 Kokoro ONNX 资源,并运行真实 offline synthesis smoke。 + +## 繁體中文 + +### 下載前先選版本 + +| 平台 / 使用情境 | 建議下載 | +| --- | --- | +| macOS Apple Silicon | `GoAgent-0.3.16-mac-arm64.dmg` | +| macOS Intel | `GoAgent-0.3.16-mac-x64.dmg` | +| Windows 一般版,OpenCL 推薦包 | `GoAgent-0.3.16-win-x64.exe` 或 `GoAgent-0.3.16-win-x64-portable.zip` | +| Windows NVIDIA 專版 | `GoAgent-0.3.16-win-x64-nvidia.exe` 或 `GoAgent-0.3.16-win-x64-nvidia-portable.zip` | +| 校驗檔 | `SHA256SUMS.txt` | + +### 本版重點 + +- 新增預設打包的 Kokoro 中文離線神經 TTS,老師回答可在本機朗讀。 +- 新增語音播放控制與設定面板。 +- 採用嚴格 selected-provider TTS:目前選擇哪個 provider,就只使用該 provider。 +- 自訂 API provider 只有使用者明確選擇時才會呼叫。 +- Release workflow 會在打包前準備 Kokoro ONNX 資源並執行真實 offline synthesis smoke。 + +## English + +### Pick the right package before downloading + +| Platform / use case | Recommended download | +| --- | --- | +| macOS Apple Silicon | `GoAgent-0.3.16-mac-arm64.dmg` | +| macOS Intel | `GoAgent-0.3.16-mac-x64.dmg` | +| Standard Windows x64, OpenCL recommended | `GoAgent-0.3.16-win-x64.exe` or `GoAgent-0.3.16-win-x64-portable.zip` | +| Windows NVIDIA edition for NVIDIA GPUs and CUDA runtimes | `GoAgent-0.3.16-win-x64-nvidia.exe` or `GoAgent-0.3.16-win-x64-nvidia-portable.zip` | +| Checksums | `SHA256SUMS.txt` | + +### Why update + +- Adds bundled Kokoro zh-CN offline neural TTS for AI teacher answers. +- Adds teacher speech controls and a TTS settings panel. +- Enforces strict selected-provider TTS: the selected provider is the only provider used. +- Custom OpenAI-compatible, HTTP JSON, and local-service TTS providers are called only after explicit user selection. +- Release packaging now prepares Kokoro ONNX assets and runs a real offline synthesis smoke before installers are built. + +## 日本語 + +### ダウンロード前に選ぶもの + +| 環境 | 推奨ファイル | +| --- | --- | +| macOS Apple Silicon | `GoAgent-0.3.16-mac-arm64.dmg` | +| macOS Intel | `GoAgent-0.3.16-mac-x64.dmg` | +| Windows 標準版、OpenCL 推奨 | `GoAgent-0.3.16-win-x64.exe` または `GoAgent-0.3.16-win-x64-portable.zip` | +| NVIDIA GPU / CUDA 向け Windows NVIDIA 版 | `GoAgent-0.3.16-win-x64-nvidia.exe` または `GoAgent-0.3.16-win-x64-nvidia-portable.zip` | +| チェックサム | `SHA256SUMS.txt` | + +### 主な変更 + +- Kokoro 中国語オフライン神経 TTS を同梱し、先生の回答をローカルで読み上げられるようにしました。 +- 読み上げの再生、停止、一時停止、再開と設定画面を追加しました。 +- strict selected-provider TTS により、選択中の provider だけを使用します。 +- カスタム TTS API はユーザーが明示的に選んだ場合だけ呼び出されます。 +- Release workflow はパッケージ前に Kokoro ONNX を準備し、offline synthesis smoke を実行します。 + +## 한국어 + +### 다운로드 전 선택 + +| 환경 | 권장 다운로드 | +| --- | --- | +| macOS Apple Silicon | `GoAgent-0.3.16-mac-arm64.dmg` | +| macOS Intel | `GoAgent-0.3.16-mac-x64.dmg` | +| Windows 표준 x64, OpenCL 권장 | `GoAgent-0.3.16-win-x64.exe` 또는 `GoAgent-0.3.16-win-x64-portable.zip` | +| NVIDIA GPU / CUDA용 Windows NVIDIA 에디션 | `GoAgent-0.3.16-win-x64-nvidia.exe` 또는 `GoAgent-0.3.16-win-x64-nvidia-portable.zip` | +| 체크섬 | `SHA256SUMS.txt` | + +### 이번 버전 + +- Kokoro 중국어 오프라인 신경망 TTS를 기본 포함해 AI 선생님 답변을 로컬에서 읽어 줍니다. +- 재생, 일시정지, 계속, 정지 컨트롤과 TTS 설정 패널을 추가했습니다. +- strict selected-provider TTS 정책으로 선택한 provider만 사용합니다. +- 사용자 지정 API는 사용자가 명시적으로 선택한 경우에만 호출됩니다. +- Release workflow가 패키징 전에 Kokoro ONNX 자산을 준비하고 offline synthesis smoke를 실행합니다. + +## ภาษาไทย + +### เลือกไฟล์ก่อนดาวน์โหลด + +| แพลตฟอร์ม | ไฟล์ที่แนะนำ | +| --- | --- | +| macOS Apple Silicon | `GoAgent-0.3.16-mac-arm64.dmg` | +| macOS Intel | `GoAgent-0.3.16-mac-x64.dmg` | +| Windows x64 มาตรฐาน แนะนำ OpenCL | `GoAgent-0.3.16-win-x64.exe` หรือ `GoAgent-0.3.16-win-x64-portable.zip` | +| Windows NVIDIA edition สำหรับ NVIDIA GPU และ CUDA | `GoAgent-0.3.16-win-x64-nvidia.exe` หรือ `GoAgent-0.3.16-win-x64-nvidia-portable.zip` | +| Checksums | `SHA256SUMS.txt` | + +### จุดสำคัญของรุ่นนี้ + +- เพิ่ม Kokoro ภาษาจีนแบบ offline neural TTS สำหรับอ่านข้อความคำสอนของ AI teacher ในเครื่อง +- เพิ่มปุ่มเล่น หยุดชั่วคราว เล่นต่อ และหยุด พร้อมแผงตั้งค่า TTS +- ใช้นโยบาย strict selected-provider TTS: เลือก provider ใดก็ใช้เฉพาะ provider นั้น +- custom API จะถูกเรียกใช้เฉพาะเมื่อผู้ใช้เลือกเองอย่างชัดเจน +- Release workflow เตรียม Kokoro ONNX และรัน offline synthesis smoke ก่อนสร้าง installer + +## Tiếng Việt + +### Chọn gói tải xuống + +| Nền tảng | Gói khuyến nghị | +| --- | --- | +| macOS Apple Silicon | `GoAgent-0.3.16-mac-arm64.dmg` | +| macOS Intel | `GoAgent-0.3.16-mac-x64.dmg` | +| Windows x64 tiêu chuẩn, khuyến nghị OpenCL | `GoAgent-0.3.16-win-x64.exe` hoặc `GoAgent-0.3.16-win-x64-portable.zip` | +| Windows NVIDIA edition cho GPU NVIDIA và CUDA | `GoAgent-0.3.16-win-x64-nvidia.exe` hoặc `GoAgent-0.3.16-win-x64-nvidia-portable.zip` | +| Checksums | `SHA256SUMS.txt` | + +### Điểm mới + +- Thêm Kokoro zh-CN offline neural TTS để đọc câu trả lời của AI teacher ngay trên máy. +- Thêm điều khiển phát, tạm dừng, tiếp tục, dừng và bảng cài đặt TTS. +- Áp dụng strict selected-provider TTS: provider được chọn là provider duy nhất được dùng. +- Custom API chỉ được gọi khi người dùng chọn rõ ràng. +- Release workflow chuẩn bị Kokoro ONNX và chạy offline synthesis smoke trước khi đóng gói. + +## Quality baseline + +This release keeps the existing top-quality baseline: grounded shape recognition engine, local pattern matcher, knowledge source-policy gates, optimized move-range review, quality checks and eval gates, Real Eval / engine silver fixture gate, KataGo engine pool telemetry, Release artifact smoke, student level, student age, teacher persona style settings with evidence boundary, teacher sessions, and selective PR #6 integration. + +It also adds Kokoro selected-provider TTS, strict offline synthesis validation, and release packaging checks for bundled zh-CN speech assets. Windows packages continue to follow the OpenCL and NVIDIA split. The standard Windows package includes the Windows OpenCL runtime bundle and KataGo OpenCL adjacent runtime files; GPU vendor OpenCL drivers still come from the user's NVIDIA / AMD / Intel graphics driver. + +Thanks to layiku and wimi321. diff --git a/docs/TTS_KOKORO.md b/docs/TTS_KOKORO.md new file mode 100644 index 0000000..fa62576 --- /dev/null +++ b/docs/TTS_KOKORO.md @@ -0,0 +1,65 @@ +# GoAgent TTS: Kokoro-first selected-provider design + +GoAgent uses a strict selected-provider TTS design. + +## Product policy + +- Default provider: `kokoro-bundled`. +- Default language: `zh-CN`. +- Default asset target: `data/tts/kokoro/zh-CN`. +- Other languages are optional language packs. +- Custom TTS APIs are only used when the user explicitly selects a custom provider. +- GoAgent does not automatically switch providers when a selected provider fails. + +If the selected provider is not ready, playback fails with a clear error and a repair action. + +## Bundled Kokoro asset + +The default bundled Chinese voice pack is based on: + +```text +onnx-community/Kokoro-82M-v1.1-zh-ONNX +onnx/model_int8.onnx +onnx/model_quantized.onnx +``` + +The expected model size is about 127 MB and the expected SHA256 is recorded in +`data/tts/kokoro/zh-CN/manifest.json`. + +`model_int8.onnx` is kept as the source asset requested by the release manifest. +`model_quantized.onnx` is the exact filename resolved by `kokoro-js` when the +selected bundled provider runs with `q8`. + +Large binary assets are not stored in ordinary text patches. Prepare them before +release packaging: + +```bash +pnpm prepare:tts-assets +GOAGENT_TTS_ASSETS_STRICT=1 pnpm check:tts-assets +GOAGENT_TTS_SMOKE_STRICT=1 pnpm smoke:tts +``` + +Strict smoke performs a real offline synthesis with the selected local zh-CN +voice. It does not call a system voice, Web Speech, or a custom API. + +## Custom API providers + +GoAgent supports explicit custom providers: + +- `custom-openai-compatible` +- `custom-http-json` +- `external-local-service` + +When a custom provider is selected, the text is sent to the configured endpoint. +When `kokoro-bundled` is selected, no custom API is called. + +## Privacy + +`kokoro-bundled` runs on local assets. Custom providers are user-configured and +must be treated as user-selected external processors. + +## Runtime notes + +The first version synthesizes completed teacher answers only. It should not +rewrite teacher content before speech. Speech text must be produced from the +already verified teacher markdown or structured result. diff --git a/package.json b/package.json index e58c670..0d90856 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "goagent", - "version": "0.3.15", + "version": "0.3.16", "description": "Agentic AI teacher for Go / Weiqi / Baduk, powered by KataGo and multimodal LLMs.", "main": "out/main/index.js", "type": "module", @@ -35,8 +35,12 @@ "eval:real-teaching:strict": "node scripts/eval_real_teaching.mjs --strict", "eval:teacher-style": "node scripts/eval_teacher_style.mjs", "eval:teacher-session": "node scripts/eval_teacher_session.mjs", + "eval:tts-provider-policy": "node scripts/eval_tts_provider_policy.mjs", + "prepare:tts-assets": "node scripts/prepare_tts_assets.mjs", + "check:tts-assets": "node scripts/check_tts_assets.mjs", + "smoke:tts": "node scripts/smoke_tts.mjs", "smoke:release-artifacts": "node scripts/release_artifact_smoke.mjs", - "check:teacher-quality": "pnpm build && pnpm eval:teacher && pnpm eval:claims && pnpm eval:quality-gate && pnpm check:knowledge-sources && pnpm eval:knowledge-coverage && pnpm eval:shape-recognition && pnpm eval:move-range && pnpm eval:engine-silver && pnpm eval:teacher-style && pnpm eval:teacher-session", + "check:teacher-quality": "pnpm build && pnpm eval:teacher && pnpm eval:claims && pnpm eval:quality-gate && pnpm check:knowledge-sources && pnpm eval:knowledge-coverage && pnpm eval:shape-recognition && pnpm eval:move-range && pnpm eval:engine-silver && pnpm eval:teacher-style && pnpm eval:teacher-session && pnpm eval:tts-provider-policy", "package": "pnpm dist", "dist": "pnpm build && electron-builder", "check:deep-teacher-quality": "pnpm check:teacher-quality && pnpm eval:real-teaching:strict", @@ -50,7 +54,7 @@ "check:katago-assets:release": "node scripts/check_katago_assets.mjs --mode=release", "check:nvidia-release-assets": "node scripts/check_nvidia_release_assets.mjs", "check:release-notes-i18n": "node scripts/check_release_notes_i18n.mjs", - "check:release-quality": "pnpm check:teacher-quality && pnpm check:nvidia-release-assets && pnpm check:release-notes-i18n && pnpm smoke:release-artifacts", + "check:release-quality": "pnpm check:teacher-quality && pnpm check:nvidia-release-assets && pnpm check:release-notes-i18n && pnpm check:tts-assets && pnpm smoke:tts && pnpm smoke:release-artifacts", "check:p0-beta": "node scripts/p0_beta_acceptance.mjs", "check:artifacts": "node scripts/package_artifact_smoke.mjs --mode=dev", "smoke:teacher-llm": "pnpm build && node scripts/teacher_llm_smoke.mjs", @@ -61,6 +65,7 @@ }, "dependencies": { "electron-store": "^10.0.1", + "kokoro-js": "^1.2.1", "openai": "^6.3.0", "react": "^19.1.1", "react-dom": "^19.1.1", @@ -94,6 +99,7 @@ "data/**/*", "!data/katago/bin/**/*", "!data/katago/models/**/*", + "!data/tts/**/*", "assets/**/*", "package.json" ], @@ -104,10 +110,18 @@ "filter": [ "**/*" ] + }, + { + "from": "data/tts", + "to": "data/tts", + "filter": [ + "**/*" + ] } ], "asarUnpack": [ - "data/katago/**/*" + "data/katago/**/*", + "data/tts/**/*" ], "asar": true, "mac": { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 2b3dd84..ed72518 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -11,6 +11,9 @@ importers: electron-store: specifier: ^10.0.1 version: 10.1.0 + kokoro-js: + specifier: ^1.2.1 + version: 1.2.1 openai: specifier: ^6.3.0 version: 6.33.0(zod@4.3.6) @@ -199,6 +202,9 @@ packages: engines: {node: '>=14.14'} hasBin: true + '@emnapi/runtime@1.10.0': + resolution: {integrity: sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA==} + '@esbuild/aix-ppc64@0.25.12': resolution: {integrity: sha512-Hhmwd6CInZ3dwpuGTF8fJG6yoWmsToE+vYgD4nytZVxcu1ulHpUQRAB1UJ8+N1Am3Mz4+xOByoQoSZf4D+CpkA==} engines: {node: '>=18'} @@ -549,6 +555,13 @@ packages: resolution: {integrity: sha512-43/qtrDUokr7LJqoF2c3+RInu/t4zfrpYdoSDfYyhg52rwLV6TnOvdG4fXm7IkSB3wErkcmJS9iEhjVtOSEjjA==} engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} + '@huggingface/jinja@0.5.8': + resolution: {integrity: sha512-ZdElB7DPS7QQS8ZnFc5RPPtkg+eN11z8AmIZWAyes6pSbwXqiFB/POVevvm01begdSX1ho9Gxln/F6qlQMsuaA==} + engines: {node: '>=18'} + + '@huggingface/transformers@3.8.1': + resolution: {integrity: sha512-tsTk4zVjImqdqjS8/AOZg2yNLd1z9S5v+7oUPpXaasDRwEDhB+xnglK1k5cad26lL5/ZIaeREgWWy0bs9y9pPA==} + '@humanfs/core@0.19.1': resolution: {integrity: sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==} engines: {node: '>=18.18.0'} @@ -565,6 +578,159 @@ packages: resolution: {integrity: sha512-bV0Tgo9K4hfPCek+aMAn81RppFKv2ySDQeMoSZuvTASywNTnVJCArCZE2FWqpvIatKu7VMRLWlR1EazvVhDyhQ==} engines: {node: '>=18.18'} + '@img/colour@1.1.0': + resolution: {integrity: sha512-Td76q7j57o/tLVdgS746cYARfSyxk8iEfRxewL9h4OMzYhbW4TAcppl0mT4eyqXddh6L/jwoM75mo7ixa/pCeQ==} + engines: {node: '>=18'} + + '@img/sharp-darwin-arm64@0.34.5': + resolution: {integrity: sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [darwin] + + '@img/sharp-darwin-x64@0.34.5': + resolution: {integrity: sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [darwin] + + '@img/sharp-libvips-darwin-arm64@1.2.4': + resolution: {integrity: sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g==} + cpu: [arm64] + os: [darwin] + + '@img/sharp-libvips-darwin-x64@1.2.4': + resolution: {integrity: sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg==} + cpu: [x64] + os: [darwin] + + '@img/sharp-libvips-linux-arm64@1.2.4': + resolution: {integrity: sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw==} + cpu: [arm64] + os: [linux] + libc: [glibc] + + '@img/sharp-libvips-linux-arm@1.2.4': + resolution: {integrity: sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A==} + cpu: [arm] + os: [linux] + libc: [glibc] + + '@img/sharp-libvips-linux-ppc64@1.2.4': + resolution: {integrity: sha512-FMuvGijLDYG6lW+b/UvyilUWu5Ayu+3r2d1S8notiGCIyYU/76eig1UfMmkZ7vwgOrzKzlQbFSuQfgm7GYUPpA==} + cpu: [ppc64] + os: [linux] + libc: [glibc] + + '@img/sharp-libvips-linux-riscv64@1.2.4': + resolution: {integrity: sha512-oVDbcR4zUC0ce82teubSm+x6ETixtKZBh/qbREIOcI3cULzDyb18Sr/Wcyx7NRQeQzOiHTNbZFF1UwPS2scyGA==} + cpu: [riscv64] + os: [linux] + libc: [glibc] + + '@img/sharp-libvips-linux-s390x@1.2.4': + resolution: {integrity: sha512-qmp9VrzgPgMoGZyPvrQHqk02uyjA0/QrTO26Tqk6l4ZV0MPWIW6LTkqOIov+J1yEu7MbFQaDpwdwJKhbJvuRxQ==} + cpu: [s390x] + os: [linux] + libc: [glibc] + + '@img/sharp-libvips-linux-x64@1.2.4': + resolution: {integrity: sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw==} + cpu: [x64] + os: [linux] + libc: [glibc] + + '@img/sharp-libvips-linuxmusl-arm64@1.2.4': + resolution: {integrity: sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw==} + cpu: [arm64] + os: [linux] + libc: [musl] + + '@img/sharp-libvips-linuxmusl-x64@1.2.4': + resolution: {integrity: sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg==} + cpu: [x64] + os: [linux] + libc: [musl] + + '@img/sharp-linux-arm64@0.34.5': + resolution: {integrity: sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [linux] + libc: [glibc] + + '@img/sharp-linux-arm@0.34.5': + resolution: {integrity: sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm] + os: [linux] + libc: [glibc] + + '@img/sharp-linux-ppc64@0.34.5': + resolution: {integrity: sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [ppc64] + os: [linux] + libc: [glibc] + + '@img/sharp-linux-riscv64@0.34.5': + resolution: {integrity: sha512-51gJuLPTKa7piYPaVs8GmByo7/U7/7TZOq+cnXJIHZKavIRHAP77e3N2HEl3dgiqdD/w0yUfiJnII77PuDDFdw==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [riscv64] + os: [linux] + libc: [glibc] + + '@img/sharp-linux-s390x@0.34.5': + resolution: {integrity: sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [s390x] + os: [linux] + libc: [glibc] + + '@img/sharp-linux-x64@0.34.5': + resolution: {integrity: sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [linux] + libc: [glibc] + + '@img/sharp-linuxmusl-arm64@0.34.5': + resolution: {integrity: sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [linux] + libc: [musl] + + '@img/sharp-linuxmusl-x64@0.34.5': + resolution: {integrity: sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [linux] + libc: [musl] + + '@img/sharp-wasm32@0.34.5': + resolution: {integrity: sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [wasm32] + + '@img/sharp-win32-arm64@0.34.5': + resolution: {integrity: sha512-WQ3AgWCWYSb2yt+IG8mnC6Jdk9Whs7O0gxphblsLvdhSpSTtmu69ZG1Gkb6NuvxsNACwiPV6cNSZNzt0KPsw7g==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [arm64] + os: [win32] + + '@img/sharp-win32-ia32@0.34.5': + resolution: {integrity: sha512-FV9m/7NmeCmSHDD5j4+4pNI8Cp3aW+JvLoXcTUo0IqyjSfAZJ8dIUmijx1qaJsIiU+Hosw6xM5KijAWRJCSgNg==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [ia32] + os: [win32] + + '@img/sharp-win32-x64@0.34.5': + resolution: {integrity: sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + cpu: [x64] + os: [win32] + '@isaacs/cliui@8.0.2': resolution: {integrity: sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==} engines: {node: '>=12'} @@ -609,6 +775,36 @@ packages: resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==} engines: {node: '>=14'} + '@protobufjs/aspromise@1.1.2': + resolution: {integrity: sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==} + + '@protobufjs/base64@1.1.2': + resolution: {integrity: sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==} + + '@protobufjs/codegen@2.0.5': + resolution: {integrity: sha512-zgXFLzW3Ap33e6d0Wlj4MGIm6Ce8O89n/apUaGNB/jx+hw+ruWEp7EwGUshdLKVRCxZW12fp9r40E1mQrf/34g==} + + '@protobufjs/eventemitter@1.1.0': + resolution: {integrity: sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==} + + '@protobufjs/fetch@1.1.0': + resolution: {integrity: sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==} + + '@protobufjs/float@1.0.2': + resolution: {integrity: sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==} + + '@protobufjs/inquire@1.1.1': + resolution: {integrity: sha512-mnzgDV26ueAvk7rsbt9L7bE0SuAoqyuys/sMMrmVcN5x9VsxpcG3rqAUSgDyLp0UZlmNfIbQ4fHfCtreVBk8Ew==} + + '@protobufjs/path@1.1.2': + resolution: {integrity: sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==} + + '@protobufjs/pool@1.1.0': + resolution: {integrity: sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==} + + '@protobufjs/utf8@1.1.1': + resolution: {integrity: sha512-oOAWABowe8EAbMyWKM0tYDKi8Yaox52D+HWZhAIJqQXbqe0xI/GV7FhLWqlEKreMkfDjshR5FKgi3mnle0h6Eg==} + '@rolldown/pluginutils@1.0.0-rc.3': resolution: {integrity: sha512-eybk3TjzzzV97Dlj5c+XrBFW57eTNhzod66y9HrBlzJ6NsCrWCp/2kaPS3K9wJmurBC0Tdw4yPjXKZqlznim3Q==} @@ -1376,6 +1572,9 @@ packages: resolution: {integrity: sha512-f7ccFPK3SXFHpx15UIGyRJ/FJQctuKZ0zVuN3frBo4HnK3cay9VEW0R6yPYFHC0AgqhukPzKjq22t5DmAyqGyw==} engines: {node: '>=16'} + flatbuffers@25.9.23: + resolution: {integrity: sha512-MI1qs7Lo4Syw0EOzUl0xjs2lsoeqFku44KpngfIduHBYvzm8h2+7K8YMQh1JtVVVrUvhLpNwqVi4DERegUJhPQ==} + flatted@3.4.2: resolution: {integrity: sha512-PjDse7RzhcPkIJwy5t7KPWQSZ9cAbzQXcafsetQoD7sOJRQlGikNbx7yZp2OotDnJyrDcbyRq3Ttb18iYOqkxA==} @@ -1478,6 +1677,9 @@ packages: graceful-fs@4.2.11: resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==} + guid-typescript@1.0.9: + resolution: {integrity: sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==} + has-flag@4.0.0: resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==} engines: {node: '>=8'} @@ -1642,6 +1844,9 @@ packages: keyv@4.5.4: resolution: {integrity: sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==} + kokoro-js@1.2.1: + resolution: {integrity: sha512-oq0HZJWis3t8lERkMJh84WLU86dpYD0EuBPtqYnLlQzyFP1OkyBRDcweAqCfhNOpltyN9j/azp1H6uuC47gShw==} + lazy-val@1.0.5: resolution: {integrity: sha512-0/BnGCCfyUMkBpeDgWihanIAF9JmZhHBgUhEqzvf+adhNGLoP6TaiI5oF8oyb3I45P+PcnrqihSf01M0l0G5+Q==} @@ -1663,6 +1868,9 @@ packages: resolution: {integrity: sha512-8XPvpAA8uyhfteu8pIvQxpJZ7SYYdpUivZpGy6sFsBuKRY/7rQGavedeB8aK+Zkyq6upMFVL/9AW6vOYzfRyLg==} engines: {node: '>=10'} + long@5.3.2: + resolution: {integrity: sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==} + lowercase-keys@2.0.0: resolution: {integrity: sha512-tqNXrS78oMOE73NMxK4EMLQsQowWf8jKooH9g7xPavRT706R6bkQJ6DY2Te7QukaZsulxa30wQ7bk0pm4XiHmA==} engines: {node: '>=8'} @@ -1828,6 +2036,19 @@ packages: resolution: {integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==} engines: {node: '>=6'} + onnxruntime-common@1.21.0: + resolution: {integrity: sha512-Q632iLLrtCAVOTO65dh2+mNbQir/QNTVBG3h/QdZBpns7mZ0RYbLRBgGABPbpU9351AgYy7SJf1WaeVwMrBFPQ==} + + onnxruntime-common@1.22.0-dev.20250409-89f8206ba4: + resolution: {integrity: sha512-vDJMkfCfb0b1A836rgHj+ORuZf4B4+cc2bASQtpeoJLueuFc5DuYwjIZUBrSvx/fO5IrLjLz+oTrB3pcGlhovQ==} + + onnxruntime-node@1.21.0: + resolution: {integrity: sha512-NeaCX6WW2L8cRCSqy3bInlo5ojjQqu2fD3D+9W5qb5irwxhEyWKXeH2vZ8W9r6VxaMPUan+4/7NDwZMtouZxEw==} + os: [win32, darwin, linux] + + onnxruntime-web@1.22.0-dev.20250409-89f8206ba4: + resolution: {integrity: sha512-0uS76OPgH0hWCPrFKlL8kYVV7ckM7t/36HfbgoFw6Nd0CZVVbQC4PkrR8mBX8LtNUFZO25IQBqV2Hx2ho3FlbQ==} + openai@6.33.0: resolution: {integrity: sha512-xAYN1W3YsDXJWA5F277135YfkEk6H7D3D6vWwRhJ3OEkzRgcyK8z/P5P9Gyi/wB4N8kK9kM5ZjprfvyHagKmpw==} hasBin: true @@ -1894,6 +2115,9 @@ packages: pend@1.2.0: resolution: {integrity: sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==} + phonemizer@1.2.1: + resolution: {integrity: sha512-v0KJ4mi2T4Q7eJQ0W15Xd4G9k4kICSXE8bpDeJ8jisL4RyJhNWsweKTOi88QXFc4r4LZlz5jVL5lCHhkpdT71A==} + picocolors@1.1.1: resolution: {integrity: sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==} @@ -1901,6 +2125,9 @@ packages: resolution: {integrity: sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==} engines: {node: '>=12'} + platform@1.3.6: + resolution: {integrity: sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==} + plist@3.1.0: resolution: {integrity: sha512-uysumyrvkUX0rX/dEVqt8gC3sTBzd4zoWfLeS29nb53imdaXVvLINYXTI2GNqzaMuvacNx4uJQ8+b3zXR0pkgQ==} engines: {node: '>=10.4.0'} @@ -1933,6 +2160,10 @@ packages: proper-lockfile@4.1.2: resolution: {integrity: sha512-TjNPblN4BwAWMXU8s9AEz4JmQxnD1NNL7bNOY/AKUzyamc379FWASUhc/K1pL2noVb+XmZKLL68cjzLsiOAMaA==} + protobufjs@7.5.6: + resolution: {integrity: sha512-M71sTMB146U3u0di3yup8iM+zv8yPRNQVr1KK4tyBitl3qFvEGucq/rGDRShD2rsJhtN02RJaJ7j5X5hmy8SJg==} + engines: {node: '>=12.0.0'} + pump@3.0.4: resolution: {integrity: sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==} @@ -2045,6 +2276,10 @@ packages: resolution: {integrity: sha512-8I8TjW5KMOKsZQTvoxjuSIa7foAwPWGOts+6o7sgjz41/qMD9VQHEDxi6PBvK2l0MXUmqZyNpUK+T2tQaaElvw==} engines: {node: '>=10'} + sharp@0.34.5: + resolution: {integrity: sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg==} + engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} + shebang-command@2.0.0: resolution: {integrity: sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==} engines: {node: '>=8'} @@ -2167,6 +2402,9 @@ packages: truncate-utf8-bytes@1.0.2: resolution: {integrity: sha512-95Pu1QXQvruGEhv62XCMO3Mm90GscOCClvrIUwCM0PYOXK3kaF3l3sIHxx71ThJfcbM2O5Au6SO3AWCSEfW4mQ==} + tslib@2.8.1: + resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==} + type-check@0.4.0: resolution: {integrity: sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==} engines: {node: '>= 0.8.0'} @@ -2562,6 +2800,11 @@ snapshots: - supports-color optional: true + '@emnapi/runtime@1.10.0': + dependencies: + tslib: 2.8.1 + optional: true + '@esbuild/aix-ppc64@0.25.12': optional: true @@ -2764,6 +3007,15 @@ snapshots: '@eslint/core': 0.17.0 levn: 0.4.1 + '@huggingface/jinja@0.5.8': {} + + '@huggingface/transformers@3.8.1': + dependencies: + '@huggingface/jinja': 0.5.8 + onnxruntime-node: 1.21.0 + onnxruntime-web: 1.22.0-dev.20250409-89f8206ba4 + sharp: 0.34.5 + '@humanfs/core@0.19.1': {} '@humanfs/node@0.16.7': @@ -2775,6 +3027,102 @@ snapshots: '@humanwhocodes/retry@0.4.3': {} + '@img/colour@1.1.0': {} + + '@img/sharp-darwin-arm64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-darwin-arm64': 1.2.4 + optional: true + + '@img/sharp-darwin-x64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-darwin-x64': 1.2.4 + optional: true + + '@img/sharp-libvips-darwin-arm64@1.2.4': + optional: true + + '@img/sharp-libvips-darwin-x64@1.2.4': + optional: true + + '@img/sharp-libvips-linux-arm64@1.2.4': + optional: true + + '@img/sharp-libvips-linux-arm@1.2.4': + optional: true + + '@img/sharp-libvips-linux-ppc64@1.2.4': + optional: true + + '@img/sharp-libvips-linux-riscv64@1.2.4': + optional: true + + '@img/sharp-libvips-linux-s390x@1.2.4': + optional: true + + '@img/sharp-libvips-linux-x64@1.2.4': + optional: true + + '@img/sharp-libvips-linuxmusl-arm64@1.2.4': + optional: true + + '@img/sharp-libvips-linuxmusl-x64@1.2.4': + optional: true + + '@img/sharp-linux-arm64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linux-arm64': 1.2.4 + optional: true + + '@img/sharp-linux-arm@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linux-arm': 1.2.4 + optional: true + + '@img/sharp-linux-ppc64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linux-ppc64': 1.2.4 + optional: true + + '@img/sharp-linux-riscv64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linux-riscv64': 1.2.4 + optional: true + + '@img/sharp-linux-s390x@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linux-s390x': 1.2.4 + optional: true + + '@img/sharp-linux-x64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linux-x64': 1.2.4 + optional: true + + '@img/sharp-linuxmusl-arm64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linuxmusl-arm64': 1.2.4 + optional: true + + '@img/sharp-linuxmusl-x64@0.34.5': + optionalDependencies: + '@img/sharp-libvips-linuxmusl-x64': 1.2.4 + optional: true + + '@img/sharp-wasm32@0.34.5': + dependencies: + '@emnapi/runtime': 1.10.0 + optional: true + + '@img/sharp-win32-arm64@0.34.5': + optional: true + + '@img/sharp-win32-ia32@0.34.5': + optional: true + + '@img/sharp-win32-x64@0.34.5': + optional: true + '@isaacs/cliui@8.0.2': dependencies: string-width: 5.1.2 @@ -2837,6 +3185,29 @@ snapshots: '@pkgjs/parseargs@0.11.0': optional: true + '@protobufjs/aspromise@1.1.2': {} + + '@protobufjs/base64@1.1.2': {} + + '@protobufjs/codegen@2.0.5': {} + + '@protobufjs/eventemitter@1.1.0': {} + + '@protobufjs/fetch@1.1.0': + dependencies: + '@protobufjs/aspromise': 1.1.2 + '@protobufjs/inquire': 1.1.1 + + '@protobufjs/float@1.0.2': {} + + '@protobufjs/inquire@1.1.1': {} + + '@protobufjs/path@1.1.2': {} + + '@protobufjs/pool@1.1.0': {} + + '@protobufjs/utf8@1.1.1': {} + '@rolldown/pluginutils@1.0.0-rc.3': {} '@rollup/rollup-android-arm-eabi@4.60.1': @@ -3138,8 +3509,7 @@ snapshots: inherits: 2.0.4 readable-stream: 3.6.2 - boolean@3.2.0: - optional: true + boolean@3.2.0: {} brace-expansion@1.1.13: dependencies: @@ -3351,21 +3721,18 @@ snapshots: es-define-property: 1.0.1 es-errors: 1.3.0 gopd: 1.2.0 - optional: true define-properties@1.2.1: dependencies: define-data-property: 1.1.4 has-property-descriptors: 1.0.2 object-keys: 1.1.1 - optional: true delayed-stream@1.0.0: {} detect-libc@2.1.2: {} - detect-node@2.1.0: - optional: true + detect-node@2.1.0: {} dir-compare@4.2.0: dependencies: @@ -3530,8 +3897,7 @@ snapshots: has-tostringtag: 1.0.2 hasown: 2.0.2 - es6-error@4.1.1: - optional: true + es6-error@4.1.1: {} esbuild@0.25.12: optionalDependencies: @@ -3720,6 +4086,8 @@ snapshots: flatted: 3.4.2 keyv: 4.5.4 + flatbuffers@25.9.23: {} + flatted@3.4.2: {} foreground-child@3.3.1: @@ -3833,7 +4201,6 @@ snapshots: roarr: 2.15.4 semver: 7.7.4 serialize-error: 7.0.1 - optional: true globals@14.0.0: {} @@ -3841,7 +4208,6 @@ snapshots: dependencies: define-properties: 1.2.1 gopd: 1.2.0 - optional: true gopd@1.2.0: {} @@ -3861,12 +4227,13 @@ snapshots: graceful-fs@4.2.11: {} + guid-typescript@1.0.9: {} + has-flag@4.0.0: {} has-property-descriptors@1.0.2: dependencies: es-define-property: 1.0.1 - optional: true has-symbols@1.1.0: {} @@ -3985,8 +4352,7 @@ snapshots: json-stable-stringify-without-jsonify@1.0.1: {} - json-stringify-safe@5.0.1: - optional: true + json-stringify-safe@5.0.1: {} json5@2.2.3: {} @@ -4004,6 +4370,11 @@ snapshots: dependencies: json-buffer: 3.0.1 + kokoro-js@1.2.1: + dependencies: + '@huggingface/transformers': 3.8.1 + phonemizer: 1.2.1 + lazy-val@1.0.5: {} levn@0.4.1: @@ -4024,6 +4395,8 @@ snapshots: chalk: 4.1.2 is-unicode-supported: 0.1.0 + long@5.3.2: {} + lowercase-keys@2.0.0: {} lru-cache@10.4.3: {} @@ -4059,7 +4432,6 @@ snapshots: matcher@3.0.0: dependencies: escape-string-regexp: 4.0.0 - optional: true math-intrinsics@1.1.0: {} @@ -4177,8 +4549,7 @@ snapshots: normalize-url@6.1.0: {} - object-keys@1.1.1: - optional: true + object-keys@1.1.1: {} once@1.4.0: dependencies: @@ -4188,6 +4559,25 @@ snapshots: dependencies: mimic-fn: 2.1.0 + onnxruntime-common@1.21.0: {} + + onnxruntime-common@1.22.0-dev.20250409-89f8206ba4: {} + + onnxruntime-node@1.21.0: + dependencies: + global-agent: 3.0.0 + onnxruntime-common: 1.21.0 + tar: 7.5.13 + + onnxruntime-web@1.22.0-dev.20250409-89f8206ba4: + dependencies: + flatbuffers: 25.9.23 + guid-typescript: 1.0.9 + long: 5.3.2 + onnxruntime-common: 1.22.0-dev.20250409-89f8206ba4 + platform: 1.3.6 + protobufjs: 7.5.6 + openai@6.33.0(zod@4.3.6): optionalDependencies: zod: 4.3.6 @@ -4246,10 +4636,14 @@ snapshots: pend@1.2.0: {} + phonemizer@1.2.1: {} + picocolors@1.1.1: {} picomatch@4.0.4: {} + platform@1.3.6: {} + plist@3.1.0: dependencies: '@xmldom/xmldom': 0.8.12 @@ -4284,6 +4678,21 @@ snapshots: retry: 0.12.0 signal-exit: 3.0.7 + protobufjs@7.5.6: + dependencies: + '@protobufjs/aspromise': 1.1.2 + '@protobufjs/base64': 1.1.2 + '@protobufjs/codegen': 2.0.5 + '@protobufjs/eventemitter': 1.1.0 + '@protobufjs/fetch': 1.1.0 + '@protobufjs/float': 1.0.2 + '@protobufjs/inquire': 1.1.1 + '@protobufjs/path': 1.1.2 + '@protobufjs/pool': 1.1.0 + '@protobufjs/utf8': 1.1.1 + '@types/node': 24.12.0 + long: 5.3.2 + pump@3.0.4: dependencies: end-of-stream: 1.4.5 @@ -4349,7 +4758,6 @@ snapshots: json-stringify-safe: 5.0.1 semver-compare: 1.0.0 sprintf-js: 1.1.3 - optional: true rollup@4.60.1: dependencies: @@ -4394,8 +4802,7 @@ snapshots: scheduler@0.27.0: {} - semver-compare@1.0.0: - optional: true + semver-compare@1.0.0: {} semver@5.7.2: {} @@ -4406,7 +4813,37 @@ snapshots: serialize-error@7.0.1: dependencies: type-fest: 0.13.1 - optional: true + + sharp@0.34.5: + dependencies: + '@img/colour': 1.1.0 + detect-libc: 2.1.2 + semver: 7.7.4 + optionalDependencies: + '@img/sharp-darwin-arm64': 0.34.5 + '@img/sharp-darwin-x64': 0.34.5 + '@img/sharp-libvips-darwin-arm64': 1.2.4 + '@img/sharp-libvips-darwin-x64': 1.2.4 + '@img/sharp-libvips-linux-arm': 1.2.4 + '@img/sharp-libvips-linux-arm64': 1.2.4 + '@img/sharp-libvips-linux-ppc64': 1.2.4 + '@img/sharp-libvips-linux-riscv64': 1.2.4 + '@img/sharp-libvips-linux-s390x': 1.2.4 + '@img/sharp-libvips-linux-x64': 1.2.4 + '@img/sharp-libvips-linuxmusl-arm64': 1.2.4 + '@img/sharp-libvips-linuxmusl-x64': 1.2.4 + '@img/sharp-linux-arm': 0.34.5 + '@img/sharp-linux-arm64': 0.34.5 + '@img/sharp-linux-ppc64': 0.34.5 + '@img/sharp-linux-riscv64': 0.34.5 + '@img/sharp-linux-s390x': 0.34.5 + '@img/sharp-linux-x64': 0.34.5 + '@img/sharp-linuxmusl-arm64': 0.34.5 + '@img/sharp-linuxmusl-x64': 0.34.5 + '@img/sharp-wasm32': 0.34.5 + '@img/sharp-win32-arm64': 0.34.5 + '@img/sharp-win32-ia32': 0.34.5 + '@img/sharp-win32-x64': 0.34.5 shebang-command@2.0.0: dependencies: @@ -4453,8 +4890,7 @@ snapshots: source-map@0.6.1: {} - sprintf-js@1.1.3: - optional: true + sprintf-js@1.1.3: {} ssri@12.0.0: dependencies: @@ -4541,12 +4977,14 @@ snapshots: dependencies: utf8-byte-length: 1.0.5 + tslib@2.8.1: + optional: true + type-check@0.4.0: dependencies: prelude-ls: 1.2.1 - type-fest@0.13.1: - optional: true + type-fest@0.13.1: {} type-fest@4.41.0: {} diff --git a/scripts/check_release_notes_i18n.mjs b/scripts/check_release_notes_i18n.mjs index ff2de3e..83e4f99 100644 --- a/scripts/check_release_notes_i18n.mjs +++ b/scripts/check_release_notes_i18n.mjs @@ -45,6 +45,9 @@ const requiredTopics = [ 'teacher persona style settings with evidence boundary', 'teacher sessions', 'selective PR #6 integration', + 'Kokoro', + 'selected-provider TTS', + 'offline synthesis', 'Windows OpenCL runtime bundle', 'KataGo OpenCL adjacent runtime files', 'GPU vendor OpenCL drivers', diff --git a/scripts/check_tts_assets.mjs b/scripts/check_tts_assets.mjs new file mode 100755 index 0000000..597adff --- /dev/null +++ b/scripts/check_tts_assets.mjs @@ -0,0 +1,65 @@ +#!/usr/bin/env node +import { createHash } from 'node:crypto' +import { existsSync, readFileSync, statSync } from 'node:fs' +import { join } from 'node:path' + +const root = process.cwd() +const strict = process.env.GOAGENT_TTS_ASSETS_STRICT === '1' +const dir = join(root, 'data', 'tts', 'kokoro', 'zh-CN') +const manifestPath = join(dir, 'manifest.json') +const failures = [] +const warnings = [] + +function addFailure(message) { (strict ? failures : warnings).push(message) } +function sha256(path) { const h = createHash('sha256'); h.update(readFileSync(path)); return h.digest('hex') } +function checkFileSha(path, expected, label) { + if (!existsSync(path)) { + addFailure(`missing ${label}: ${path}`) + return + } + if (expected) { + const actual = sha256(path) + if (actual !== expected) failures.push(`${label} SHA256 mismatch: ${actual}`) + } +} + +if (!existsSync(manifestPath)) { + addFailure(`missing manifest: ${manifestPath}`) +} else { + const manifest = JSON.parse(readFileSync(manifestPath, 'utf8')) + for (const [key, expected] of [['provider', 'kokoro-bundled'], ['language', 'zh-CN'], ['license', 'Apache-2.0']]) { + if (manifest[key] !== expected) failures.push(`manifest ${key} must be ${expected}`) + } + const modelPath = join(dir, manifest.modelFile ?? 'onnx/model_int8.onnx') + checkFileSha(modelPath, manifest.modelSha256, 'Kokoro ONNX model') + if (existsSync(modelPath)) { + const sizeMb = statSync(modelPath).size / 1024 / 1024 + if (sizeMb < 100 || sizeMb > 180) failures.push(`unexpected Kokoro model size: ${sizeMb.toFixed(1)} MB`) + } + const runtimeModelPath = join(dir, manifest.runtimeModelFile ?? 'onnx/model_quantized.onnx') + checkFileSha(runtimeModelPath, manifest.runtimeModelSha256, 'Kokoro runtime ONNX model') + if (existsSync(runtimeModelPath)) { + const sizeMb = statSync(runtimeModelPath).size / 1024 / 1024 + if (sizeMb < 100 || sizeMb > 180) failures.push(`unexpected Kokoro runtime model size: ${sizeMb.toFixed(1)} MB`) + } + if (!Array.isArray(manifest.voices) || manifest.voices.length < 1) failures.push('manifest must list at least one voice') + for (const voice of manifest.voices ?? []) { + const voicePath = join(dir, voice.file) + checkFileSha(voicePath, voice.sha256, `Kokoro voice ${voice.id}`) + } + for (const file of ['config.json', 'tokenizer_config.json', 'tokenizer.json']) { + const path = join(dir, file) + if (!existsSync(path)) addFailure(`missing Kokoro tokenizer/config file: ${path}`) + else JSON.parse(readFileSync(path, 'utf8')) + } + for (const file of ['LICENSE', 'MODEL_CARD.md']) { + if (!existsSync(join(dir, file))) failures.push(`missing source notice file: ${file}`) + } +} + +for (const warning of warnings) console.warn(`[check-tts-assets] warning: ${warning}`) +if (failures.length) { + for (const failure of failures) console.error(`[check-tts-assets] ${failure}`) + process.exit(1) +} +console.log(strict ? '[check-tts-assets] strict Kokoro asset check OK' : '[check-tts-assets] Kokoro asset contract OK') diff --git a/scripts/eval_tts_provider_policy.mjs b/scripts/eval_tts_provider_policy.mjs new file mode 100755 index 0000000..226898b --- /dev/null +++ b/scripts/eval_tts_provider_policy.mjs @@ -0,0 +1,37 @@ +#!/usr/bin/env node +import assert from 'node:assert/strict' +import { readFileSync, readdirSync, existsSync } from 'node:fs' +import { join } from 'node:path' + +const root = process.cwd() +const packageJson = JSON.parse(readFileSync(join(root, 'package.json'), 'utf8')) +assert.equal(packageJson.scripts['check:tts-assets'], 'node scripts/check_tts_assets.mjs') +assert.equal(packageJson.scripts['smoke:tts'], 'node scripts/smoke_tts.mjs') +assert.equal(packageJson.scripts['eval:tts-provider-policy'], 'node scripts/eval_tts_provider_policy.mjs') +assert.ok(packageJson.dependencies?.['kokoro-js'], 'kokoro-js dependency is required for bundled Kokoro provider') + +const types = readFileSync(join(root, 'src', 'main', 'lib', 'types.ts'), 'utf8') +assert.match(types, /TtsProviderId = 'kokoro-bundled' \| 'custom-openai-compatible' \| 'custom-http-json' \| 'external-local-service'/) +assert.doesNotMatch(types, /system-web-speech|speechSynthesis/) + +const serviceDir = join(root, 'src', 'main', 'services', 'tts') +assert.ok(existsSync(join(serviceDir, 'kokoroBundledProvider.ts')), 'missing Kokoro provider') +assert.ok(existsSync(join(serviceDir, 'customOpenAiSpeechProvider.ts')), 'missing custom OpenAI-compatible provider') +assert.ok(existsSync(join(serviceDir, 'index.ts')), 'missing TTS registry') + +const implementation = readdirSync(serviceDir) + .filter((name) => name.endsWith('.ts')) + .map((name) => readFileSync(join(serviceDir, name), 'utf8')) + .join('\n') +assert.doesNotMatch(implementation, /system-web-speech|speechSynthesis|webkitSpeechSynthesis/) +assert.doesNotMatch(implementation, /tryNextProvider|providerChain|autoSwitchProvider/) +assert.match(implementation, /assertSelectedProvider/) +assert.match(implementation, /kokoro-bundled/) + +const manifest = JSON.parse(readFileSync(join(root, 'data', 'tts', 'kokoro', 'zh-CN', 'manifest.json'), 'utf8')) +assert.equal(manifest.provider, 'kokoro-bundled') +assert.equal(manifest.language, 'zh-CN') +assert.equal(manifest.license, 'Apache-2.0') +assert.ok(manifest.modelSha256) + +console.log('TTS provider policy eval passed') diff --git a/scripts/prepare_tts_assets.mjs b/scripts/prepare_tts_assets.mjs new file mode 100755 index 0000000..bff9d7d --- /dev/null +++ b/scripts/prepare_tts_assets.mjs @@ -0,0 +1,58 @@ +#!/usr/bin/env node +import { createWriteStream, existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs' +import { dirname, join } from 'node:path' +import { pipeline } from 'node:stream/promises' + +const root = process.cwd() +const targetRoot = join(root, 'data', 'tts', 'kokoro', 'zh-CN') +const revision = '6cc0f0d2ebe369a68b0df87c2b65c1af8c0ac3e3' +const base = `https://huggingface.co/onnx-community/Kokoro-82M-v1.1-zh-ONNX/resolve/${revision}` + +const assets = [ + ['onnx/model_int8.onnx', 'onnx/model_int8.onnx'], + // kokoro-js resolves q8 models to this filename at runtime. + ['onnx/model_quantized.onnx', 'onnx/model_quantized.onnx'], + ['config.json', 'config.json'], + ['tokenizer.json', 'tokenizer.json'], + ['tokenizer_config.json', 'tokenizer_config.json'], + ['voices/zf_001.bin', 'voices/zf_001.bin'], + ['voices/zm_009.bin', 'voices/zm_009.bin'] +] + +async function download(url, output) { + mkdirSync(dirname(output), { recursive: true }) + if (existsSync(output)) { + console.log(`[prepare-tts-assets] exists ${output}`) + return + } + console.log(`[prepare-tts-assets] downloading ${url}`) + const response = await fetch(url) + if (!response.ok || !response.body) { + throw new Error(`Failed to download ${url}: HTTP ${response.status}`) + } + await pipeline(response.body, createWriteStream(output)) +} + +function normalizeTokenizerJson(path) { + const text = readFileSync(path, 'utf8') + try { + JSON.parse(text) + return + } catch { + // The upstream zh-ONNX tokenizer currently ships one malformed vocab entry + // (`"$", 0`). Fix only that syntax bug and fail loudly if anything else is + // still invalid, so release assets remain deterministic. + const repaired = text.replace(/"vocab":\s*\{\s*"\$",\s*0,/, '"vocab": {\n "$": 0,') + JSON.parse(repaired) + writeFileSync(path, repaired) + console.log(`[prepare-tts-assets] normalized ${path}`) + } +} + +for (const [remote, local] of assets) { + await download(`${base}/${remote}?download=true`, join(targetRoot, local)) +} + +normalizeTokenizerJson(join(targetRoot, 'tokenizer.json')) + +console.log('[prepare-tts-assets] Kokoro zh-CN assets prepared.') diff --git a/scripts/smoke_tts.mjs b/scripts/smoke_tts.mjs new file mode 100755 index 0000000..9243994 --- /dev/null +++ b/scripts/smoke_tts.mjs @@ -0,0 +1,71 @@ +#!/usr/bin/env node +import { existsSync, mkdirSync, readFileSync, readdirSync, statSync } from 'node:fs' +import fsPromises from 'node:fs/promises' +import { basename, join } from 'node:path' + +const root = process.cwd() +const strict = process.env.GOAGENT_TTS_SMOKE_STRICT === '1' +const assetRoot = join(root, 'data', 'tts', 'kokoro', 'zh-CN') +const cacheRoot = join(root, process.env.GOAGENT_APP_HOME || '.goagent-smoke', 'cache', 'tts', 'kokoro-bundled') +const failures = [] +let strictSynthesisOk = false + +function requireFile(path, label) { + if (!existsSync(path)) failures.push(`missing ${label}: ${path}`) +} + +requireFile(join(assetRoot, 'manifest.json'), 'Kokoro manifest') +requireFile(join(assetRoot, 'onnx', 'model_int8.onnx'), 'Kokoro model') +requireFile(join(assetRoot, 'onnx', 'model_quantized.onnx'), 'Kokoro runtime model') +requireFile(join(assetRoot, 'voices', 'zf_001.bin'), 'Kokoro default voice') + +async function strictSynthesizeSmoke() { + const manifest = JSON.parse(readFileSync(join(assetRoot, 'manifest.json'), 'utf8')) + const voices = new Map((manifest.voices ?? []).map((voice) => [`${voice.id}.bin`, join(assetRoot, voice.file)])) + const originalReadFile = fsPromises.readFile.bind(fsPromises) + fsPromises.readFile = async function readLocalVoice(path, ...args) { + const source = String(path) + const localVoice = voices.get(basename(source)) + if (localVoice && source.includes('kokoro-js') && /[\\/]voices[\\/]/.test(source)) { + return originalReadFile(localVoice, ...args) + } + return originalReadFile(path, ...args) + } + const { KokoroTTS } = await import('kokoro-js') + const tts = await KokoroTTS.from_pretrained(assetRoot, { dtype: 'q8', device: 'cpu' }) + tts._validate_voice = (voice) => { + if (voices.has(`${voice}.bin`)) return 'a' + throw new Error(`Smoke voice is not present in bundled manifest: ${voice}`) + } + mkdirSync(cacheRoot, { recursive: true }) + const output = join(cacheRoot, 'kokoro-zh-cn-smoke.wav') + const audio = await tts.generate('围棋智能体开始复盘。', { + voice: manifest.defaultVoiceId ?? 'zf_001', + speed: 1 + }) + await audio.save(output) + const stat = statSync(output) + if (stat.size < 4096) failures.push(`suspiciously small Kokoro synthesis output: ${output}`) + else strictSynthesisOk = true +} + +if (strict && failures.length === 0) { + try { + await strictSynthesizeSmoke() + } catch (error) { + failures.push(`Kokoro synthesis failed: ${error instanceof Error ? error.message : String(error)}`) + } +} + +if (failures.length) { + if (strict) { + for (const failure of failures) console.error(`[smoke-tts] ${failure}`) + process.exit(1) + } + for (const failure of failures) console.warn(`[smoke-tts] warning: ${failure}`) + console.log('[smoke-tts] non-strict mode: TTS runtime smoke skipped until bundled assets are prepared') +} else { + console.log(strict && strictSynthesisOk + ? '[smoke-tts] strict Kokoro offline synthesis smoke OK' + : '[smoke-tts] Kokoro TTS assets are present for runtime smoke') +} diff --git a/src/main/index.ts b/src/main/index.ts index 7e1c959..5f6491d 100644 --- a/src/main/index.ts +++ b/src/main/index.ts @@ -1,6 +1,6 @@ import { app, BrowserWindow, dialog, ipcMain, Menu, shell, type ContextMenuParams, type IpcMainInvokeEvent, type MenuItemConstructorOptions } from 'electron' import { isAbsolute, relative, resolve, join } from 'node:path' -import { appHome, findGame, getGames, getSettings, hasLlmApiKey, replaceSettings, setSettings, upsertGames } from './lib/store' +import { appHome, findGame, getGames, getSettings, getTtsCustomApiKey, hasLlmApiKey, hasTtsCustomApiKey, replaceSettings, setSettings, upsertGames } from './lib/store' import { BRAND_NAME } from '@shared/brand' import type { AnalyzeGameQuickRequest, AnalyzePositionRequest, AppSettings, DashboardData, FoxSyncRequest, KataGoAssetInstallRequest, KataGoBenchmarkRequest, KataGoCancelAnalysisRequest, LibraryDeleteRequest, LlmModelsListRequest, LlmSettingsTestRequest, ReviewRequest, TeacherChatMessage, TeacherRunCancelRequest, TeacherRunRequest } from './lib/types' import { importSgfFile, readGameRecord } from './services/sgf' @@ -27,6 +27,7 @@ import { upsertStudentAlias } from './services/studentProfile' import { archiveTeacherSession, createTeacherSession, deleteTeacherSession, getActiveTeacherSession, listTeacherSessions, updateTeacherSessionMessages } from './services/teacherSession' +import { clearTtsCache, inspectTtsAssets, listTtsVoices, synthesizeTts, testTtsSettings } from './services/tts' let mainWindow: BrowserWindow | null = null type DesktopCommand = @@ -230,7 +231,7 @@ function buildApplicationMenu(): void { async function dashboard(): Promise { const hydratedSettings = await applyDetectedDefaults(getSettings()) replaceSettings(hydratedSettings) - const publicSettings = { ...hydratedSettings, llmApiKey: '' } + const publicSettings = { ...hydratedSettings, llmApiKey: '', ttsCustomApiKey: '' } const detectedProfile = await detectSystemProfile(hydratedSettings) return { settings: publicSettings, @@ -400,6 +401,15 @@ app.whenReady().then(() => { apiKey: settings.llmApiKey } }) + ipcMain.handle('tts:inspect-assets', async () => inspectTtsAssets()) + ipcMain.handle('tts:list-voices', async () => listTtsVoices()) + ipcMain.handle('tts:synthesize', async (_event, payload) => synthesizeTts(payload)) + ipcMain.handle('tts:clear-cache', async () => clearTtsCache()) + ipcMain.handle('tts:test', async (_event, payload) => testTtsSettings(payload)) + ipcMain.handle('tts:get-saved-api-key', async () => ({ + hasKey: hasTtsCustomApiKey(), + apiKey: getTtsCustomApiKey() + })) ipcMain.handle('release:readiness', async () => inspectReleaseReadiness()) ipcMain.handle('path:open', async (_event, filePath: string) => shell.showItemInFolder(assertManagedPath(filePath))) diff --git a/src/main/lib/store.ts b/src/main/lib/store.ts index e696c7e..38c87ca 100644 --- a/src/main/lib/store.ts +++ b/src/main/lib/store.ts @@ -37,6 +37,26 @@ const defaults: AppSettings = { llmModel: 'gpt-5-mini', reviewLanguage: 'zh-CN', defaultPlayerName: '', + ttsEnabled: true, + ttsAutoPlay: false, + ttsProvider: 'kokoro-bundled', + ttsLanguage: 'zh-CN', + ttsVoiceId: 'zf_001', + ttsRate: 1, + ttsPitch: 1, + ttsVolume: 1, + ttsReadMode: 'summary', + ttsCacheEnabled: true, + ttsKokoroDType: 'q8', + ttsKokoroDevice: 'cpu', + ttsCustomBaseUrl: '', + ttsCustomApiKey: '', + ttsCustomModel: '', + ttsCustomVoice: '', + ttsCustomHeadersJson: '', + ttsCustomBodyTemplate: '', + ttsCustomResponseType: 'audio-bytes', + ttsCustomAudioJsonPath: '', defaultCoachLevel: 'intermediate', defaultStudentRank: 'sub1d', defaultStudentAge: 0, @@ -57,7 +77,7 @@ type SecretValue = | { mode: 'safeStorage'; value: string } | { mode: 'plain'; value: string } -export const secretStore = new Store<{ llmApiKey?: SecretValue }>({ +export const secretStore = new Store<{ llmApiKey?: SecretValue; ttsCustomApiKey?: SecretValue }>({ name: 'secrets', cwd: appHome, defaults: {} @@ -103,6 +123,10 @@ export function hasLlmApiKey(): boolean { return decryptSecret(secretStore.get('llmApiKey')).trim().length > 0 } +export function hasTtsCustomApiKey(): boolean { + return decryptSecret(secretStore.get('ttsCustomApiKey')).trim().length > 0 +} + function saveLlmApiKey(value: string): void { const trimmed = value.trim() if (trimmed) { @@ -110,6 +134,13 @@ function saveLlmApiKey(value: string): void { } } +function saveTtsCustomApiKey(value: string): void { + const trimmed = value.trim() + if (trimmed) { + secretStore.set('ttsCustomApiKey', encryptSecret(trimmed)) + } +} + function migratePlaintextApiKey(settings: AppSettings): AppSettings { if (settings.llmApiKey.trim()) { saveLlmApiKey(settings.llmApiKey) @@ -121,14 +152,21 @@ function migratePlaintextApiKey(settings: AppSettings): AppSettings { export function getSettings(): AppSettings { const persisted = migratePlaintextApiKey({ ...defaults, ...settingsStore.store }) - return { ...persisted, llmApiKey: decryptSecret(secretStore.get('llmApiKey')) } + return { + ...persisted, + llmApiKey: decryptSecret(secretStore.get('llmApiKey')), + ttsCustomApiKey: decryptSecret(secretStore.get('ttsCustomApiKey')) + } } export function setSettings(next: Partial): AppSettings { if (typeof next.llmApiKey === 'string') { saveLlmApiKey(next.llmApiKey) } - const { llmApiKey: _llmApiKey, ...safeNext } = next + if (typeof next.ttsCustomApiKey === 'string') { + saveTtsCustomApiKey(next.ttsCustomApiKey) + } + const { llmApiKey: _llmApiKey, ttsCustomApiKey: _ttsCustomApiKey, ...safeNext } = next settingsStore.set(safeNext) return getSettings() } @@ -137,10 +175,17 @@ export function replaceSettings(next: AppSettings): AppSettings { if (next.llmApiKey.trim()) { saveLlmApiKey(next.llmApiKey) } - settingsStore.store = { ...next, llmApiKey: '' } + if (next.ttsCustomApiKey.trim()) { + saveTtsCustomApiKey(next.ttsCustomApiKey) + } + settingsStore.store = { ...next, llmApiKey: '', ttsCustomApiKey: '' } return getSettings() } +export function getTtsCustomApiKey(): string { + return decryptSecret(secretStore.get('ttsCustomApiKey')) +} + export function getGames(): LibraryGame[] { return [...libraryStore.get('games', [])].sort((a, b) => b.createdAt.localeCompare(a.createdAt)) } diff --git a/src/main/lib/types.ts b/src/main/lib/types.ts index 2d93413..0752778 100644 --- a/src/main/lib/types.ts +++ b/src/main/lib/types.ts @@ -1,5 +1,53 @@ export type ReviewStatus = 'idle' | 'running' | 'done' | 'error' +export type TtsProviderId = 'kokoro-bundled' | 'custom-openai-compatible' | 'custom-http-json' | 'external-local-service' +export type TtsReadMode = 'summary' | 'full' | 'selection' +export type TtsAudioFormat = 'wav' | 'mp3' | 'pcm' | 'opus' | 'aac' | 'flac' +export type TtsRuntimeDevice = 'cpu' | 'wasm' | 'webgpu' +export type TtsKokoroDType = 'q8' | 'fp32' | 'fp16' | 'q4' | 'q4f16' + +export interface TtsVoice { + id: string + label: string + language: AppSettings['reviewLanguage'] + provider: TtsProviderId + bundled?: boolean +} + +export interface TtsAssetStatus { + provider: TtsProviderId + language: AppSettings['reviewLanguage'] + ready: boolean + detail: string + rootPath: string + manifestFound: boolean + modelPath: string + modelFound: boolean + modelSha256?: string + voicesFound: number + defaultVoiceId: string + license: string +} + +export interface TtsSynthesisRequest { + text: string + language?: AppSettings['reviewLanguage'] + voiceId?: string + readMode?: TtsReadMode + format?: TtsAudioFormat +} + +export interface TtsSynthesisResult { + id: string + provider: TtsProviderId + mimeType: string + audioPath: string + audioDataUrl: string + cached: boolean + textHash: string + createdAt: string +} + export interface AppSettings { katagoBin: string katagoConfig: string @@ -18,6 +66,26 @@ export interface AppSettings { llmModel: string reviewLanguage: 'zh-CN' | 'zh-TW' | 'en-US' | 'ja-JP' | 'ko-KR' | 'th-TH' | 'vi-VN' defaultPlayerName: string + ttsEnabled: boolean + ttsAutoPlay: boolean + ttsProvider: TtsProviderId + ttsLanguage: 'zh-CN' | 'zh-TW' | 'en-US' | 'ja-JP' | 'ko-KR' | 'th-TH' | 'vi-VN' + ttsVoiceId: string + ttsRate: number + ttsPitch: number + ttsVolume: number + ttsReadMode: TtsReadMode + ttsCacheEnabled: boolean + ttsKokoroDType: TtsKokoroDType + ttsKokoroDevice: TtsRuntimeDevice + ttsCustomBaseUrl: string + ttsCustomApiKey: string + ttsCustomModel: string + ttsCustomVoice: string + ttsCustomHeadersJson: string + ttsCustomBodyTemplate: string + ttsCustomResponseType: 'audio-bytes' | 'json-audio-url' | 'json-base64' + ttsCustomAudioJsonPath: string defaultCoachLevel: CoachUserLevel defaultStudentRank: StudentRank defaultStudentAge: number @@ -668,6 +736,11 @@ export interface LlmSavedApiKeyResult { apiKey: string } +export interface TtsSavedApiKeyResult { + hasKey: boolean + apiKey: string +} + export interface DashboardData { settings: AppSettings games: LibraryGame[] diff --git a/src/main/services/tts/assets.ts b/src/main/services/tts/assets.ts new file mode 100644 index 0000000..e3671fd --- /dev/null +++ b/src/main/services/tts/assets.ts @@ -0,0 +1,123 @@ +import { createHash } from 'node:crypto' +import { existsSync, readdirSync, readFileSync } from 'node:fs' +import { join } from 'node:path' +import { app } from 'electron' +import type { AppSettings, TtsAssetStatus, TtsVoice } from '@main/lib/types' + +interface KokoroManifest { + id: string + provider: 'kokoro-bundled' + language: AppSettings['ttsLanguage'] + modelFile: string + modelSha256?: string + runtimeModelFile?: string + runtimeModelSha256?: string + modelSizeMb?: number + license: string + defaultVoiceId: string + voices: Array<{ id: string; label: string; file: string; sha256?: string }> +} + +export function ttsDataRoot(): string { + if (app.isPackaged) return join(process.resourcesPath, 'data', 'tts') + return join(process.cwd(), 'data', 'tts') +} + +export function kokoroLanguageRoot(language: AppSettings['ttsLanguage']): string { + return join(ttsDataRoot(), 'kokoro', language) +} + +export function readKokoroManifest(language: AppSettings['ttsLanguage']): KokoroManifest | null { + const path = join(kokoroLanguageRoot(language), 'manifest.json') + if (!existsSync(path)) return null + try { + return JSON.parse(readFileSync(path, 'utf8')) as KokoroManifest + } catch { + return null + } +} + +function sha256(path: string): string { + const hash = createHash('sha256') + hash.update(readFileSync(path)) + return hash.digest('hex') +} + +export function inspectKokoroBundledAssets(settings: AppSettings): TtsAssetStatus { + const language = settings.ttsLanguage || 'zh-CN' + const rootPath = kokoroLanguageRoot(language) + const manifestPath = join(rootPath, 'manifest.json') + const manifest = readKokoroManifest(language) + const modelPath = manifest ? join(rootPath, manifest.modelFile) : join(rootPath, 'onnx', 'model_int8.onnx') + const runtimeModelPath = manifest ? join(rootPath, manifest.runtimeModelFile ?? 'onnx/model_quantized.onnx') : join(rootPath, 'onnx', 'model_quantized.onnx') + const modelFound = existsSync(modelPath) + const runtimeModelFound = existsSync(runtimeModelPath) + const voicesDir = join(rootPath, 'voices') + const voicesFound = existsSync(voicesDir) + ? readdirSync(voicesDir).filter((name) => name.endsWith('.bin')).length + : 0 + let checksumOk = true + let detail = '' + if (!manifest) { + checksumOk = false + detail = `Kokoro manifest not found: ${manifestPath}` + } else if (!modelFound) { + checksumOk = false + detail = `Kokoro model not found: ${modelPath}` + } else if (!runtimeModelFound) { + checksumOk = false + detail = `Kokoro runtime model not found: ${runtimeModelPath}` + } else if (manifest.modelSha256) { + const actual = sha256(modelPath) + checksumOk = actual === manifest.modelSha256 + detail = checksumOk ? 'Kokoro bundled assets are ready.' : `Kokoro model checksum mismatch: ${actual}` + if (checksumOk && manifest.runtimeModelSha256) { + const runtimeActual = sha256(runtimeModelPath) + checksumOk = runtimeActual === manifest.runtimeModelSha256 + detail = checksumOk ? 'Kokoro bundled assets are ready.' : `Kokoro runtime model checksum mismatch: ${runtimeActual}` + } + } else { + detail = 'Kokoro bundled assets are present; manifest has no model checksum.' + } + if (voicesFound < 1) { + checksumOk = false + detail = `Kokoro voice files not found under ${voicesDir}` + } + return { + provider: 'kokoro-bundled', + language, + ready: Boolean(manifest && modelFound && runtimeModelFound && voicesFound > 0 && checksumOk), + detail, + rootPath, + manifestFound: Boolean(manifest), + modelPath, + modelFound, + modelSha256: manifest?.modelSha256, + voicesFound, + defaultVoiceId: manifest?.defaultVoiceId ?? 'zf_001', + license: manifest?.license ?? 'unknown' + } +} + +export function listKokoroBundledVoices(settings: AppSettings): TtsVoice[] { + const language = settings.ttsLanguage || 'zh-CN' + const manifest = readKokoroManifest(language) + if (!manifest) return [] + return manifest.voices + .filter((voice) => existsSync(join(kokoroLanguageRoot(language), voice.file))) + .map((voice) => ({ + id: voice.id, + label: voice.label, + language, + provider: 'kokoro-bundled', + bundled: true + })) +} + +export function kokoroModelRoot(settings: AppSettings): string { + return kokoroLanguageRoot(settings.ttsLanguage || 'zh-CN') +} + +export function kokoroDefaultVoice(settings: AppSettings): string { + return settings.ttsVoiceId || readKokoroManifest(settings.ttsLanguage || 'zh-CN')?.defaultVoiceId || 'zf_001' +} diff --git a/src/main/services/tts/cache.ts b/src/main/services/tts/cache.ts new file mode 100644 index 0000000..dcb0d42 --- /dev/null +++ b/src/main/services/tts/cache.ts @@ -0,0 +1,65 @@ +import { createHash, randomUUID } from 'node:crypto' +import { existsSync, mkdirSync, readdirSync, readFileSync, rmSync, statSync, writeFileSync } from 'node:fs' +import { join } from 'node:path' +import { cacheDir } from '@main/lib/store' +import type { TtsAudioFormat, TtsProviderId } from '@main/lib/types' + +const ttsCacheRoot = join(cacheDir, 'tts') + +export function ensureTtsCacheDir(provider: TtsProviderId): string { + const dir = join(ttsCacheRoot, provider) + mkdirSync(dir, { recursive: true }) + return dir +} + +export function hashTtsInput(input: unknown): string { + return createHash('sha256').update(JSON.stringify(input)).digest('hex') +} + +export function audioExtension(format: TtsAudioFormat | undefined): string { + if (format === 'mp3') return 'mp3' + if (format === 'pcm') return 'pcm' + if (format === 'opus') return 'opus' + if (format === 'aac') return 'aac' + if (format === 'flac') return 'flac' + return 'wav' +} + +export function mimeForFormat(format: TtsAudioFormat | undefined): string { + if (format === 'mp3') return 'audio/mpeg' + if (format === 'pcm') return 'audio/pcm' + if (format === 'opus') return 'audio/ogg' + if (format === 'aac') return 'audio/aac' + if (format === 'flac') return 'audio/flac' + return 'audio/wav' +} + +export function cachedAudioPath(provider: TtsProviderId, key: string, format: TtsAudioFormat | undefined): string { + return join(ensureTtsCacheDir(provider), `${key}.${audioExtension(format)}`) +} + +export function writeAudio(provider: TtsProviderId, key: string, format: TtsAudioFormat | undefined, data: Buffer): string { + const output = cachedAudioPath(provider, key, format) + writeFileSync(output, data) + return output +} + +export function audioDataUrl(path: string, mimeType: string): string { + return `data:${mimeType};base64,${readFileSync(path).toString('base64')}` +} + +export function makeTtsResultId(): string { return randomUUID() } + +export function clearTtsCacheFiles(): { deleted: number } { + if (!existsSync(ttsCacheRoot)) return { deleted: 0 } + let deleted = 0 + for (const provider of readdirSync(ttsCacheRoot)) { + const dir = join(ttsCacheRoot, provider) + if (!statSync(dir).isDirectory()) continue + for (const name of readdirSync(dir)) { + rmSync(join(dir, name), { force: true }) + deleted += 1 + } + } + return { deleted } +} diff --git a/src/main/services/tts/customHttpJsonProvider.ts b/src/main/services/tts/customHttpJsonProvider.ts new file mode 100644 index 0000000..5362c1b --- /dev/null +++ b/src/main/services/tts/customHttpJsonProvider.ts @@ -0,0 +1,67 @@ +import { existsSync } from 'node:fs' +import type { AppSettings, TtsSynthesisRequest, TtsSynthesisResult, TtsVoice } from '@main/lib/types' +import { getTtsCustomApiKey } from '@main/lib/store' +import { audioDataUrl, cachedAudioPath, hashTtsInput, makeTtsResultId, mimeForFormat, writeAudio } from './cache' +import { assertSelectedProvider, type TtsProvider } from './ttsTypes' + +function renderTemplate(template: string, values: Record): string { + return template.replace(/\{\{\s*([a-zA-Z0-9_]+)\s*\}\}/g, (_match, key) => String(values[key] ?? '')) +} + +function readJsonPath(value: unknown, path: string): unknown { + return path.split('.').filter(Boolean).reduce((current, key) => { + if (current && typeof current === 'object') return (current as Record)[key] + return undefined + }, value) +} + +async function responseToBuffer(response: Response, settings: AppSettings): Promise { + if (settings.ttsCustomResponseType === 'audio-bytes') return Buffer.from(await response.arrayBuffer()) + const json = await response.json() as Record + const value = readJsonPath(json, settings.ttsCustomAudioJsonPath || 'audio') + if (settings.ttsCustomResponseType === 'json-audio-url') { + if (typeof value !== 'string') throw new Error('自定义 TTS JSON 响应中没有音频 URL。') + const audio = await fetch(value) + if (!audio.ok) throw new Error(`自定义 TTS 音频 URL 下载失败: HTTP ${audio.status}`) + return Buffer.from(await audio.arrayBuffer()) + } + if (typeof value !== 'string') throw new Error('自定义 TTS JSON 响应中没有 base64 音频。') + return Buffer.from(value.replace(/^data:audio\/[^;]+;base64,/, ''), 'base64') +} + +export const customHttpJsonProvider: TtsProvider = { + id: 'custom-http-json', + label: '自定义 HTTP JSON TTS API', + async inspect(settings) { + assertSelectedProvider('custom-http-json', settings) + if (!settings.ttsCustomBaseUrl.trim()) return { ready: false, code: 'missing-endpoint', message: '自定义 TTS endpoint 未配置。' } + if (!settings.ttsCustomBodyTemplate.trim()) return { ready: false, code: 'missing-template', message: '自定义 TTS 请求模板未配置。' } + return { ready: true, code: 'ready', message: '自定义 HTTP JSON TTS API 已配置。' } + }, + async listVoices(settings): Promise { + assertSelectedProvider('custom-http-json', settings) + return [{ id: settings.ttsCustomVoice || 'default', label: settings.ttsCustomVoice || 'Default voice', language: settings.ttsLanguage, provider: 'custom-http-json' }] + }, + async synthesize(request: TtsSynthesisRequest, settings: AppSettings): Promise { + assertSelectedProvider('custom-http-json', settings) + const readiness = await this.inspect(settings) + if (!readiness.ready) throw new Error(readiness.message) + const text = request.text.trim() + if (!text) throw new Error('TTS text is empty') + const format = request.format ?? 'wav' + const voice = request.voiceId || settings.ttsCustomVoice || settings.ttsVoiceId + const cacheKey = hashTtsInput({ provider: 'custom-http-json', endpoint: settings.ttsCustomBaseUrl, text, voice, format, rate: settings.ttsRate }) + const output = cachedAudioPath('custom-http-json', cacheKey, format) + const mimeType = mimeForFormat(format) + if (settings.ttsCacheEnabled && existsSync(output)) return { id: makeTtsResultId(), provider: 'custom-http-json', mimeType, audioPath: output, audioDataUrl: audioDataUrl(output, mimeType), cached: true, textHash: cacheKey, createdAt: new Date().toISOString() } + const headers = settings.ttsCustomHeadersJson.trim() ? JSON.parse(settings.ttsCustomHeadersJson) as Record : {} + const apiKey = getTtsCustomApiKey().trim() + if (apiKey && !headers.Authorization) headers.Authorization = `Bearer ${apiKey}` + const body = renderTemplate(settings.ttsCustomBodyTemplate, { text, voice, language: request.language ?? settings.ttsLanguage, rate: settings.ttsRate || 1, model: settings.ttsCustomModel }) + const response = await fetch(settings.ttsCustomBaseUrl, { method: 'POST', headers: { 'Content-Type': 'application/json', ...headers }, body }) + if (!response.ok) throw new Error(`自定义 HTTP JSON TTS API 请求失败: HTTP ${response.status} ${await response.text().catch(() => '')}`) + const buffer = await responseToBuffer(response, settings) + const audioPath = writeAudio('custom-http-json', cacheKey, format, buffer) + return { id: makeTtsResultId(), provider: 'custom-http-json', mimeType, audioPath, audioDataUrl: audioDataUrl(audioPath, mimeType), cached: false, textHash: cacheKey, createdAt: new Date().toISOString() } + } +} diff --git a/src/main/services/tts/customOpenAiSpeechProvider.ts b/src/main/services/tts/customOpenAiSpeechProvider.ts new file mode 100644 index 0000000..dd6ff5f --- /dev/null +++ b/src/main/services/tts/customOpenAiSpeechProvider.ts @@ -0,0 +1,57 @@ +import { existsSync } from 'node:fs' +import type { AppSettings, TtsSynthesisRequest, TtsSynthesisResult, TtsVoice } from '@main/lib/types' +import { getTtsCustomApiKey } from '@main/lib/store' +import { audioDataUrl, cachedAudioPath, hashTtsInput, makeTtsResultId, mimeForFormat, writeAudio } from './cache' +import { assertSelectedProvider, type TtsProvider } from './ttsTypes' + +export const customOpenAiSpeechProvider: TtsProvider = { + id: 'custom-openai-compatible', + label: '自定义 OpenAI-compatible TTS API', + async inspect(settings) { + assertSelectedProvider('custom-openai-compatible', settings) + if (!settings.ttsCustomBaseUrl.trim()) return { ready: false, code: 'missing-base-url', message: '自定义 TTS Base URL 未配置。' } + if (!settings.ttsCustomModel.trim()) return { ready: false, code: 'missing-model', message: '自定义 TTS model 未配置。' } + if (!settings.ttsCustomVoice.trim()) return { ready: false, code: 'missing-voice', message: '自定义 TTS voice 未配置。' } + if (!getTtsCustomApiKey().trim()) return { ready: false, code: 'missing-api-key', message: '自定义 TTS API Key 未配置。' } + return { ready: true, code: 'ready', message: '自定义 OpenAI-compatible TTS API 已配置。' } + }, + async listVoices(settings): Promise { + assertSelectedProvider('custom-openai-compatible', settings) + return [{ id: settings.ttsCustomVoice || 'default', label: settings.ttsCustomVoice || 'Default voice', language: settings.ttsLanguage, provider: 'custom-openai-compatible' }] + }, + async synthesize(request: TtsSynthesisRequest, settings: AppSettings): Promise { + assertSelectedProvider('custom-openai-compatible', settings) + const readiness = await this.inspect(settings) + if (!readiness.ready) throw new Error(readiness.message) + const text = request.text.trim() + if (!text) throw new Error('TTS text is empty') + const format = request.format ?? 'mp3' + const voice = request.voiceId || settings.ttsCustomVoice + const cacheKey = hashTtsInput({ provider: 'custom-openai-compatible', text, model: settings.ttsCustomModel, voice, format, rate: settings.ttsRate }) + const output = cachedAudioPath('custom-openai-compatible', cacheKey, format) + const mimeType = mimeForFormat(format) + if (settings.ttsCacheEnabled && existsSync(output)) { + return { id: makeTtsResultId(), provider: 'custom-openai-compatible', mimeType, audioPath: output, audioDataUrl: audioDataUrl(output, mimeType), cached: true, textHash: cacheKey, createdAt: new Date().toISOString() } + } + const response = await fetch(`${settings.ttsCustomBaseUrl.replace(/\/$/, '')}/audio/speech`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${getTtsCustomApiKey()}` + }, + body: JSON.stringify({ + model: settings.ttsCustomModel, + input: text, + voice, + response_format: format, + speed: settings.ttsRate || 1 + }) + }) + if (!response.ok) { + throw new Error(`自定义 OpenAI-compatible TTS API 请求失败: HTTP ${response.status} ${await response.text().catch(() => '')}`) + } + const buffer = Buffer.from(await response.arrayBuffer()) + const audioPath = writeAudio('custom-openai-compatible', cacheKey, format, buffer) + return { id: makeTtsResultId(), provider: 'custom-openai-compatible', mimeType, audioPath, audioDataUrl: audioDataUrl(audioPath, mimeType), cached: false, textHash: cacheKey, createdAt: new Date().toISOString() } + } +} diff --git a/src/main/services/tts/externalLocalTtsProvider.ts b/src/main/services/tts/externalLocalTtsProvider.ts new file mode 100644 index 0000000..8d28496 --- /dev/null +++ b/src/main/services/tts/externalLocalTtsProvider.ts @@ -0,0 +1,18 @@ +import type { AppSettings } from '@main/lib/types' +import { customHttpJsonProvider } from './customHttpJsonProvider' +import { assertSelectedProvider, type TtsProvider } from './ttsTypes' + +function assertLocalEndpoint(settings: AppSettings): void { + const url = new URL(settings.ttsCustomBaseUrl) + if (!['localhost', '127.0.0.1', '::1'].includes(url.hostname)) { + throw new Error('外部本地 TTS 服务只允许 localhost / 127.0.0.1 / ::1。') + } +} + +export const externalLocalTtsProvider: TtsProvider = { + id: 'external-local-service', + label: '外部本地 TTS 服务', + async inspect(settings) { assertSelectedProvider('external-local-service', settings); assertLocalEndpoint(settings); return customHttpJsonProvider.inspect({ ...settings, ttsProvider: 'custom-http-json' }) }, + async listVoices(settings) { assertSelectedProvider('external-local-service', settings); assertLocalEndpoint(settings); return customHttpJsonProvider.listVoices({ ...settings, ttsProvider: 'custom-http-json' }) }, + async synthesize(request, settings) { assertSelectedProvider('external-local-service', settings); assertLocalEndpoint(settings); const result = await customHttpJsonProvider.synthesize(request, { ...settings, ttsProvider: 'custom-http-json' }); return { ...result, provider: 'external-local-service' } } +} diff --git a/src/main/services/tts/index.ts b/src/main/services/tts/index.ts new file mode 100644 index 0000000..da50b44 --- /dev/null +++ b/src/main/services/tts/index.ts @@ -0,0 +1,54 @@ +import { getSettings } from '@main/lib/store' +import type { AppSettings, TtsAssetStatus, TtsProviderId, TtsSynthesisRequest, TtsSynthesisResult, TtsVoice } from '@main/lib/types' +import { inspectKokoroBundledAssets } from './assets' +import { clearTtsCacheFiles } from './cache' +import { customHttpJsonProvider } from './customHttpJsonProvider' +import { customOpenAiSpeechProvider } from './customOpenAiSpeechProvider' +import { externalLocalTtsProvider } from './externalLocalTtsProvider' +import { kokoroBundledProvider } from './kokoroBundledProvider' +import { markdownToSpeechText } from './speechText' +import type { TtsProvider } from './ttsTypes' + +const providers: Record = { + 'kokoro-bundled': kokoroBundledProvider, + 'custom-openai-compatible': customOpenAiSpeechProvider, + 'custom-http-json': customHttpJsonProvider, + 'external-local-service': externalLocalTtsProvider +} + +function selectedProvider(settings: AppSettings): TtsProvider { + const provider = providers[settings.ttsProvider] + if (!provider) throw new Error(`Unknown selected TTS provider: ${settings.ttsProvider}`) + return provider +} + +export async function inspectTtsAssets(): Promise { + return inspectKokoroBundledAssets(getSettings()) +} + +export async function listTtsVoices(): Promise { + const settings = getSettings() + return selectedProvider(settings).listVoices(settings) +} + +export async function synthesizeTts(payload: TtsSynthesisRequest): Promise { + const settings = getSettings() + if (!settings.ttsEnabled) throw new Error('TTS is disabled in settings.') + const text = markdownToSpeechText(payload.text ?? '') + if (!text) throw new Error('TTS text is empty after speech cleanup.') + return selectedProvider(settings).synthesize({ ...payload, text }, settings) +} + +export async function testTtsSettings(payload: Partial): Promise { + const settings = { ...getSettings(), ...payload } + const provider = selectedProvider(settings) + const readiness = await provider.inspect(settings) + if (!readiness.ready) throw new Error(readiness.message) + return provider.synthesize({ text: '现在开始复盘第八十七手。', language: settings.ttsLanguage, voiceId: settings.ttsVoiceId, format: settings.ttsProvider === 'custom-openai-compatible' ? 'mp3' : 'wav' }, settings) +} + +export async function clearTtsCache(): Promise<{ deleted: number }> { + return clearTtsCacheFiles() +} + +export { markdownToSpeechText } diff --git a/src/main/services/tts/kokoroBundledProvider.ts b/src/main/services/tts/kokoroBundledProvider.ts new file mode 100644 index 0000000..0dd1c7d --- /dev/null +++ b/src/main/services/tts/kokoroBundledProvider.ts @@ -0,0 +1,123 @@ +import { existsSync } from 'node:fs' +import fsPromises from 'node:fs/promises' +import { basename, join } from 'node:path' +import type { AppSettings, TtsSynthesisRequest, TtsSynthesisResult } from '@main/lib/types' +import { audioDataUrl, cachedAudioPath, hashTtsInput, makeTtsResultId, mimeForFormat } from './cache' +import { inspectKokoroBundledAssets, kokoroDefaultVoice, kokoroLanguageRoot, kokoroModelRoot, listKokoroBundledVoices, readKokoroManifest } from './assets' +import type { TtsProvider } from './ttsTypes' +import { assertSelectedProvider } from './ttsTypes' + +type KokoroModule = { + KokoroTTS: { + from_pretrained: (model: string, options: Record) => Promise<{ + generate: (text: string, options: Record) => Promise<{ save: (path: string) => Promise | void }> + list_voices?: () => string[] + _validate_voice?: (voice: string) => string + }> + } +} + +let cachedModelKey = '' +let cachedTts: Awaited> | null = null +let voiceRedirectInstalled = false +const localVoiceFiles = new Map() + +function installLocalVoiceRedirect(): void { + if (voiceRedirectInstalled) return + voiceRedirectInstalled = true + const originalReadFile = fsPromises.readFile.bind(fsPromises) + fsPromises.readFile = (async (path: Parameters[0], ...args: unknown[]) => { + const source = String(path) + const voiceFile = localVoiceFiles.get(basename(source)) + if (voiceFile && source.includes('kokoro-js') && source.includes('/voices/')) { + return originalReadFile(voiceFile, ...(args as [])) + } + return originalReadFile(path, ...(args as [])) + }) as typeof fsPromises.readFile +} + +function registerLocalVoices(settings: AppSettings): Set { + const language = settings.ttsLanguage || 'zh-CN' + const root = kokoroLanguageRoot(language) + const manifest = readKokoroManifest(language) + const ids = new Set() + for (const voice of manifest?.voices ?? []) { + const path = join(root, voice.file) + if (existsSync(path)) { + ids.add(voice.id) + localVoiceFiles.set(`${voice.id}.bin`, path) + } + } + return ids +} + +function bindManifestVoices( + tts: NonNullable, + voiceIds: Set +): void { + tts._validate_voice = (voice: string) => { + if (voiceIds.has(voice)) return 'a' + throw new Error(`Kokoro bundled voice is not installed for the selected language: ${voice}`) + } + // Keep the upstream validator out of the selected-provider path. The bundled + // zh-CN voice list lives in GoAgent's manifest, not in kokoro-js's English + // voice table. +} + +async function loadKokoro(settings: AppSettings): Promise> { + const modelRoot = kokoroModelRoot(settings) + const modelKey = `${modelRoot}:${settings.ttsKokoroDType}:${settings.ttsKokoroDevice}` + if (cachedTts && cachedModelKey === modelKey) return cachedTts + const voiceIds = registerLocalVoices(settings) + installLocalVoiceRedirect() + const module = await import('kokoro-js') as unknown as KokoroModule + cachedTts = await module.KokoroTTS.from_pretrained(modelRoot, { + dtype: settings.ttsKokoroDType || 'q8', + device: settings.ttsKokoroDevice || 'cpu', + local_files_only: true + }) + bindManifestVoices(cachedTts, voiceIds) + cachedModelKey = modelKey + return cachedTts +} + +export const kokoroBundledProvider: TtsProvider = { + id: 'kokoro-bundled', + label: 'Kokoro 中文离线语音', + async inspect(settings) { + assertSelectedProvider('kokoro-bundled', settings) + const status = inspectKokoroBundledAssets(settings) + return { + ready: status.ready, + code: status.ready ? 'ready' : 'asset-not-ready', + message: status.detail + } + }, + async listVoices(settings) { + assertSelectedProvider('kokoro-bundled', settings) + return listKokoroBundledVoices(settings) + }, + async synthesize(request: TtsSynthesisRequest, settings: AppSettings): Promise { + assertSelectedProvider('kokoro-bundled', settings) + const readiness = inspectKokoroBundledAssets(settings) + if (!readiness.ready) throw new Error(readiness.detail) + const text = request.text.trim() + if (!text) throw new Error('TTS text is empty') + const format = request.format ?? 'wav' + if (format !== 'wav') throw new Error('Kokoro bundled provider currently outputs wav only') + const voice = request.voiceId || kokoroDefaultVoice(settings) + const cacheKey = hashTtsInput({ provider: 'kokoro-bundled', text, language: request.language ?? settings.ttsLanguage, voice, rate: settings.ttsRate, pitch: settings.ttsPitch, format }) + const output = cachedAudioPath('kokoro-bundled', cacheKey, format) + const mimeType = mimeForFormat(format) + if (settings.ttsCacheEnabled && existsSync(output)) { + return { id: makeTtsResultId(), provider: 'kokoro-bundled', mimeType, audioPath: output, audioDataUrl: audioDataUrl(output, mimeType), cached: true, textHash: cacheKey, createdAt: new Date().toISOString() } + } + const tts = await loadKokoro(settings) + const audio = await tts.generate(text, { + voice, + speed: settings.ttsRate || 1 + }) + await audio.save(output) + return { id: makeTtsResultId(), provider: 'kokoro-bundled', mimeType, audioPath: output, audioDataUrl: audioDataUrl(output, mimeType), cached: false, textHash: cacheKey, createdAt: new Date().toISOString() } + } +} diff --git a/src/main/services/tts/speechText.ts b/src/main/services/tts/speechText.ts new file mode 100644 index 0000000..70eb63c --- /dev/null +++ b/src/main/services/tts/speechText.ts @@ -0,0 +1,46 @@ +import type { TeacherRunResult, TtsReadMode } from '@main/lib/types' + +const COORDINATE_PATTERN = /\b([A-HJ-T])(\d{1,2})\b/g + +export function normalizeGoCoordinatesForSpeech(text: string): string { + return text.replace(COORDINATE_PATTERN, (_match, letter: string, number: string) => `${letter} ${number}`) +} + +export function markdownToSpeechText(markdown: string): string { + return normalizeGoCoordinatesForSpeech(markdown) + .replace(/```[\s\S]*?```/g, '') + .replace(/`([^`]+)`/g, '$1') + .replace(/^#{1,6}\s+/gm, '') + .replace(/\*\*([^*]+)\*\*/g, '$1') + .replace(/__([^_]+)__/g, '$1') + .replace(/\[([^\]]+)\]\([^)]*\)/g, '$1') + .replace(/^\s*[-*+]\s+/gm, '') + .replace(/^\s*\|.*\|\s*$/gm, '') + .replace(/sourceRefs?:\s*[^\n]+/gi, '') + .replace(/evidenceRefs?:\s*[^\n]+/gi, '') + .replace(/\bPV\b/g, '参考变化') + .replace(/winrateLoss\s*=\s*([0-9.]+)/gi, '胜率损失约 $1 个百分点') + .replace(/scoreLoss\s*=\s*([0-9.]+)/gi, '目差损失约 $1 目') + .replace(/\n{3,}/g, '\n\n') + .trim() +} + +export function limitSpeechLength(text: string, maxChars = 900): string { + const cleaned = text.trim() + if (cleaned.length <= maxChars) return cleaned + return `${cleaned.slice(0, maxChars).replace(/[,。;,.!?!?][^,。;,.!?!?]*$/, '')}。后面的细节可以在文字复盘中继续看。` +} + +export function teacherResultToSpeechText(result: TeacherRunResult | undefined, markdown: string, mode: TtsReadMode): string { + if (mode === 'full') return limitSpeechLength(markdownToSpeechText(markdown), 2400) + const structured = result?.structuredResult ?? result?.structured + const parts = [ + structured?.headline, + structured?.summary, + structured?.keyMistakes?.slice(0, 2).map((item) => item.explanation).join('。'), + structured?.correctThinking?.slice(0, 2).join('。'), + structured?.drills?.slice(0, 1).join('。') + ].filter((part): part is string => Boolean(part && part.trim())) + const base = parts.length ? parts.join('。') : markdown + return limitSpeechLength(markdownToSpeechText(base), 900) +} diff --git a/src/main/services/tts/ttsTypes.ts b/src/main/services/tts/ttsTypes.ts new file mode 100644 index 0000000..a688ec9 --- /dev/null +++ b/src/main/services/tts/ttsTypes.ts @@ -0,0 +1,21 @@ +import type { AppSettings, TtsProviderId, TtsSynthesisRequest, TtsSynthesisResult, TtsVoice } from '@main/lib/types' + +export interface TtsProviderReadiness { + ready: boolean + code: string + message: string +} + +export interface TtsProvider { + id: TtsProviderId + label: string + inspect(settings: AppSettings): Promise + listVoices(settings: AppSettings): Promise + synthesize(request: TtsSynthesisRequest, settings: AppSettings): Promise +} + +export function assertSelectedProvider(expected: TtsProviderId, settings: AppSettings): void { + if (settings.ttsProvider !== expected) { + throw new Error(`TTS provider mismatch: selected=${settings.ttsProvider}, required=${expected}`) + } +} diff --git a/src/preload/index.ts b/src/preload/index.ts index 5c1aa4a..f5a495a 100644 --- a/src/preload/index.ts +++ b/src/preload/index.ts @@ -36,7 +36,11 @@ import type { TeacherRunCancelResult, TeacherRunRequest, TeacherRunProgress, - TeacherRunResult + TeacherRunResult, + TtsAssetStatus, + TtsSynthesisRequest, + TtsSynthesisResult, + TtsVoice } from '@main/lib/types' import type { DiagnosticsReport } from '@main/services/diagnostics/types' import type { KnowledgeSearchQuery, KnowledgeSearchResult } from '@main/services/knowledge/schema' @@ -109,6 +113,12 @@ const api = { testLlmSettings: (payload: LlmSettingsTestRequest): Promise => ipcRenderer.invoke('llm:test', payload), listLlmModels: (payload: LlmModelsListRequest): Promise => ipcRenderer.invoke('llm:list-models', payload), getSavedLlmApiKey: (): Promise<{ hasKey: boolean; apiKey: string }> => ipcRenderer.invoke('llm:get-saved-api-key'), + inspectTtsAssets: (): Promise => ipcRenderer.invoke('tts:inspect-assets'), + listTtsVoices: (): Promise => ipcRenderer.invoke('tts:list-voices'), + synthesizeTts: (payload: TtsSynthesisRequest): Promise => ipcRenderer.invoke('tts:synthesize', payload), + clearTtsCache: (): Promise<{ deleted: number }> => ipcRenderer.invoke('tts:clear-cache'), + testTtsSettings: (payload: Partial): Promise => ipcRenderer.invoke('tts:test', payload), + getSavedTtsApiKey: (): Promise<{ hasKey: boolean; apiKey: string }> => ipcRenderer.invoke('tts:get-saved-api-key'), getReleaseReadiness: (): Promise => ipcRenderer.invoke('release:readiness'), openPath: (filePath: string): Promise => ipcRenderer.invoke('path:open', filePath), onDesktopCommand: (handler: (command: DesktopCommand) => void): (() => void) => { diff --git a/src/renderer/src/App.tsx b/src/renderer/src/App.tsx index 6a4f36c..80dd75e 100644 --- a/src/renderer/src/App.tsx +++ b/src/renderer/src/App.tsx @@ -47,6 +47,8 @@ import { BetaAcceptancePanel, type BetaAcceptanceItem } from './features/release import { StudentBindingDialog } from './features/student/StudentBindingDialog' import { StudentRailCard } from './features/student/StudentRailCard' import { KataGoAssetsPanel } from './features/settings/KataGoAssetsPanel' +import { TeacherSpeechControls } from './features/tts/TeacherSpeechControls' +import { TtsSettingsPanel } from './features/tts/TtsSettingsPanel' import { TeacherComposerPro } from './features/teacher/TeacherComposerPro' import { createUiTranslator, @@ -81,6 +83,26 @@ const emptyDashboard: DashboardData = { llmModel: 'gpt-5-mini', reviewLanguage: 'zh-CN', defaultPlayerName: '', + ttsEnabled: true, + ttsAutoPlay: false, + ttsProvider: 'kokoro-bundled', + ttsLanguage: 'zh-CN', + ttsVoiceId: 'zf_001', + ttsRate: 1, + ttsPitch: 1, + ttsVolume: 1, + ttsReadMode: 'summary', + ttsCacheEnabled: true, + ttsKokoroDType: 'q8', + ttsKokoroDevice: 'cpu', + ttsCustomBaseUrl: '', + ttsCustomApiKey: '', + ttsCustomModel: '', + ttsCustomVoice: '', + ttsCustomHeadersJson: '', + ttsCustomBodyTemplate: '', + ttsCustomResponseType: 'audio-bytes', + ttsCustomAudioJsonPath: '', defaultCoachLevel: 'intermediate', defaultStudentRank: 'sub1d', defaultStudentAge: 0, @@ -2479,6 +2501,7 @@ export function App(): ReactElement { onBenchmark={() => void runKataGoBenchmark()} onInstallOfficialModel={(presetId) => void installOfficialKataGoModel(presetId)} onRefreshKataGoAssets={() => void refreshKataGoAssets()} + onDashboardUpdated={setDashboard} /> void onInstallOfficialModel: (presetId: KataGoModelPresetId) => void onRefreshKataGoAssets: () => void + onDashboardUpdated: (dashboard: DashboardData) => void t: UiTranslator }): ReactElement | null { if (!open) { @@ -2841,6 +2866,7 @@ function DesktopPreferencesModal({ onBenchmark={onBenchmark} onInstallOfficialModel={onInstallOfficialModel} onRefreshKataGoAssets={onRefreshKataGoAssets} + onDashboardUpdated={onDashboardUpdated} t={t} /> @@ -3103,7 +3129,9 @@ function TeacherInlineResponse({ onFlashPoint, boardSize, totalMoves, - onAnalyzeMove + onAnalyzeMove, + ttsEnabled, + ttsAutoPlay }: { message: ChatMessage t: UiTranslator @@ -3112,6 +3140,8 @@ function TeacherInlineResponse({ boardSize: number totalMoves: number onAnalyzeMove: (moveNumber: number) => void + ttsEnabled: boolean + ttsAutoPlay: boolean }): ReactElement { const keyMoves = teacherResultKeyMoves(message.result, t) const toolLogs = message.toolLogs ?? message.result?.toolLogs ?? [] @@ -3173,6 +3203,12 @@ function TeacherInlineResponse({ ) : null} {isRunning ? : null} + ) : message.content} @@ -3665,6 +3701,8 @@ function TeacherPanel({ boardSize={boardSize} totalMoves={totalMoves} onAnalyzeMove={onAnalyzeMove} + ttsEnabled={dashboard.settings.ttsEnabled} + ttsAutoPlay={dashboard.settings.ttsAutoPlay} /> @@ -3714,7 +3752,8 @@ function SettingsDrawer({ onTest, onBenchmark, onInstallOfficialModel, - onRefreshKataGoAssets + onRefreshKataGoAssets, + onDashboardUpdated }: { dashboard: DashboardData katagoAssets: KataGoAssetStatus | null @@ -3730,6 +3769,7 @@ function SettingsDrawer({ onBenchmark: () => void onInstallOfficialModel: (presetId: KataGoModelPresetId) => void onRefreshKataGoAssets: () => void + onDashboardUpdated: (dashboard: DashboardData) => void }): ReactElement { const [releaseReadiness, setReleaseReadiness] = useState(null) const [releaseReadinessError, setReleaseReadinessError] = useState('') @@ -3887,15 +3927,21 @@ function SettingsDrawer({ } } + async function saveTtsSettings(next: Partial): Promise { + const updated = await window.goagent.updateSettings(next) + onDashboardUpdated(updated) + } + return ( -
{ - event.preventDefault() - onSave(event.currentTarget) - }} - > +
+ { + event.preventDefault() + onSave(event.currentTarget) + }} + > + + + + + + + +
+ 自定义 API 设置 + + + + +