Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions docs/PRODUCT_READINESS.md
Original file line number Diff line number Diff line change
Expand Up @@ -669,9 +669,26 @@
為 None)。本機相關子集 65 passed、全套 2650 passed(3 個 QR/journal 字型像素為容器缺 Noto CJK
假象,CI 權威)。下一刀 = F9-2h(render 旁白以 `get_glossary(project_id).to_pronunciation_map()`
透傳 TTS `extra_pronunciation`)。
- ⏸️ **後續 offline slice**:F9-2h(runner 旁白帶該課 `to_pronunciation_map()` → TTS)/F9-2i
(翻譯 route 接 `to_translation_rules()`),見上 RFC §4。**自動建議術語**(掃教材抽術語)碰
Gemini 額度 = GATE,另寫 proposal 再做。
- ✅ 2026-06-14 **F9-2h:render 旁白套該課 glossary 讀音表完成(offline)**。把 F9-2g 落地的
`JobRecord.project_id` 接到 TTS:render 前以 `ProjectStore.get_glossary(project_id).
to_pronunciation_map()` 現讀該課讀音表,render 期間掛上 → 旁白照課程術語讀音念。**深埋的
`pipeline.main → gen_tts → synthesize → normalize_text` 一條鏈不逐層穿參**——比照既有
render-scoped override 慣例(`core.config.video_dimensions_override` / `talking_head_override`),
在 `tts_backend.py` 加 `course_pronunciation_override` context manager(module-level 覆寫,
sequential job 設計下非 thread-safe 可接受,同既有取捨)+ `normalize_text` 在「呼叫端未顯式給
`extra_pronunciation`」時自動沿用該覆寫(**顯式 arg 含 `{}` 永遠優先**);`server/runner.py`
新增 `_resolve_course_pronunciation(rec)`(**fail-soft**:無 `project_id` / 課不存在 / 無
glossary / 讀音表空 → None=沿用全域 pronunciation,glossary 解析絕不讓 render 失敗,RFC §5)
並把 `_run_render` 的 inner render 包進第三層 override。守紀律:**完全不碰 R-2 render 入口 assert
/ 狀態機 / reviewed**(只影響「旁白怎麼念」,硬規則 #1)。補 `tests/test_glossary_tts_render.py`
15 測(normalize_text 套用/顯式優先/`{}` 停用/還原·例外還原·巢狀·None no-op;
`_resolve_course_pronunciation` 無 pid/有 glossary 回 map/課不存在 fail-soft/無 glossary/空 map
收斂 None;`_run_render` wiring inner 期間掛上·出去還原·無 glossary 沿用全域,**全 offline 不打
API、不真跑 TTS**)。本機相關子集 105 passed、全套 2665 passed(3 個 QR/journal 字型像素為容器缺
Noto CJK 假象,CI 權威)。
- ⏸️ **後續 offline slice**:F9-2i(翻譯 route 接 `to_translation_rules()`,同 `project_id` 關聯
讓在地化翻譯套固定譯名),見上 RFC §4。**自動建議術語**(掃教材抽術語)碰 Gemini 額度 = GATE,
另寫 proposal 再做。
- [~] 🟡 **F9-3 本機可插拔模型後端**(GATE,= M 軸 Option B 的本機 provider)— 支援
**Ollama 等本機 LLM** 跑文字(大綱/旁白/翻譯),老師可零雲端成本跑(翻譯已用本機
translategemma 驗過路子)。**依賴 M-4 provider 介面就緒**後加 ollama adapter + 設定頁可選
Expand Down
37 changes: 36 additions & 1 deletion server/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,9 +581,44 @@ async def _run_render(
or (rec.options.length_mode or "") == "ultra_quick"
or bool(rec.options.short_video_layout)
)
# F9-2h: 取該 job 所屬課程 glossary 讀音表, render 期間掛上 (旁白套術語讀音)。
# fail-soft: 沒 project_id / 課不存在 / 沒 glossary / 讀音表空 → None = 沿用全域。
from tts_backend import course_pronunciation_override

course_pron = _resolve_course_pronunciation(rec)
with video_dimensions_override(aspect, resolution):
with talking_head_override(th_mode, is_short_form=is_short):
await _run_render_inner(store, rec, section_id=section_id)
with course_pronunciation_override(course_pron):
await _run_render_inner(store, rec, section_id=section_id)


def _resolve_course_pronunciation(rec: JobRecord) -> dict[str, str] | None:
"""F9-2h: 取 job 所屬課程 glossary 的 TTS 讀音表 (surface form → reading)。

走 F9-2g 落地的 `JobRecord.project_id`:有值 →
`ProjectStore.get_glossary(project_id).to_pronunciation_map()`。

**fail-soft**(RFC §5):沒 project_id(直接 POST /jobs 的無主 job)/ 課已不存在 /
該課沒 glossary / 讀音表為空 → 一律回 None=沿用全域 pronunciation 行為, 零影響。
glossary 解析絕不讓 render 失敗(只想「套術語讀音」不該害整支影片渲染不出來)。
"""
project_id = rec.project_id
if not project_id:
return None
try:
from core.glossary import to_pronunciation_map
from core.project import ProjectStore

glossary = ProjectStore().get_glossary(project_id)
if glossary is None:
return None
return to_pronunciation_map(glossary) or None
except Exception as e:
logger.warning(
"取課程 glossary 讀音表失敗 (project_id=%s), 旁白沿用全域 pronunciation: %s",
project_id, e,
)
return None


async def _run_render_inner(
Expand Down
231 changes: 231 additions & 0 deletions tests/test_glossary_tts_render.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
"""F9-2h — render 旁白套該課 glossary 讀音表(job ↔ 課 → TTS)。

對應 [JOB_COURSE_ASSOCIATION_RFC.md](../docs/JOB_COURSE_ASSOCIATION_RFC.md) §4.2:
F9-2g 已把 `JobRecord.project_id` 落地,本刀讓 render 旁白前以
`ProjectStore.get_glossary(project_id).to_pronunciation_map()` 取讀音表,render 期間
掛上 `tts_backend.course_pronunciation_override`,`normalize_text` 在呼叫端未顯式給
`extra_pronunciation` 時自動沿用 → 旁白照該課術語讀音念。

三層覆蓋:
1. `tts_backend.course_pronunciation_override` + `normalize_text` 的 render-scoped 套用
(顯式 arg 優先、context 還原、巢狀安全)。
2. `server.runner._resolve_course_pronunciation`(fail-soft:無 project_id / 課不存在 /
無 glossary / 空讀音表 → None)。
3. `server.runner._run_render` 把 override 在 inner render 期間掛上、出去還原(wiring)。

全程不打真 API、不真跑 TTS/ffmpeg(monkeypatch inner / tmp 隔離 ProjectStore)。
"""
from __future__ import annotations

import asyncio

import pytest

import core.project as project_mod
import server.runner as runner_mod
import tts_backend
from core.glossary import Glossary, GlossaryEntry
from server.schemas import (
JobOptions,
JobRecord,
JobSource,
JobState,
SourceType,
utc_now,
)


# ---------------------------------------------------------------- fixtures / helpers


@pytest.fixture(autouse=True)
def _reset_course_override():
"""每測前後都把 module-level 課程讀音覆寫清乾淨,避免測試互相污染。"""
tts_backend._COURSE_PRONUNCIATION = None
yield
tts_backend._COURSE_PRONUNCIATION = None


def _make_rec(project_id: str | None = None) -> JobRecord:
return JobRecord(
id="job_test",
source_type=SourceType.DOCUMENT,
source=JobSource(path="/fake.md"),
options=JobOptions(),
state=JobState.RENDERING,
created_at=utc_now(),
updated_at=utc_now(),
project_id=project_id,
)


def _glossary_with_reading() -> Glossary:
return Glossary(
course="材力",
entries=[
GlossaryEntry(term="PID", reading="P I D 控制器", aliases=["pid"]),
GlossaryEntry(term="ω_n", reading="omega n"),
# 沒 reading 的條目不該進讀音表
GlossaryEntry(term="自然頻率", translations={"en": "natural frequency"}),
],
)


# ---------------------------------------------------------------- normalize_text 套用


class TestNormalizeTextCourseOverride:
"""normalize_text 在 render-scoped 覆寫下的行為。"""

def test_no_override_pure_global(self):
"""沒掛覆寫 → 純全域行為(PID 不會被改)。"""
assert "PID" in tts_backend.normalize_text("看 PID 控制")

def test_override_applies_when_arg_absent(self):
"""掛了覆寫、呼叫端未給 extra → 自動套課程讀音。"""
with tts_backend.course_pronunciation_override({"PID": "P I D"}):
out = tts_backend.normalize_text("看 PID 控制")
assert "P I D" in out
assert "PID" not in out

def test_explicit_arg_wins_over_override(self):
"""顯式給 extra → 蓋掉 render-scoped 覆寫(caller 永遠優先)。"""
with tts_backend.course_pronunciation_override({"PID": "P I D"}):
out = tts_backend.normalize_text(
"看 PID 控制", extra_pronunciation={"PID": "屁挨低"}
)
assert "屁挨低" in out
assert "P I D" not in out

def test_explicit_empty_dict_disables_course(self):
"""顯式給 {} = 「不要課程讀音」(is not None → 不退回覆寫)。"""
with tts_backend.course_pronunciation_override({"PID": "P I D"}):
out = tts_backend.normalize_text("看 PID 控制", extra_pronunciation={})
assert "PID" in out # 課程讀音被顯式 {} 擋掉,沿用全域

def test_context_restores_after_exit(self):
"""出 with 後 module-level state 還原成 None。"""
assert tts_backend._COURSE_PRONUNCIATION is None
with tts_backend.course_pronunciation_override({"PID": "P I D"}):
assert tts_backend._COURSE_PRONUNCIATION == {"PID": "P I D"}
assert tts_backend._COURSE_PRONUNCIATION is None

def test_context_restores_on_exception(self):
"""with 內拋例外也要還原(finally 語意)。"""
with pytest.raises(RuntimeError):
with tts_backend.course_pronunciation_override({"PID": "P I D"}):
raise RuntimeError("boom")
assert tts_backend._COURSE_PRONUNCIATION is None

def test_nested_override_restores_outer(self):
"""巢狀覆寫:內層出去後還原成外層、最後還原成 None。"""
with tts_backend.course_pronunciation_override({"A": "a"}):
with tts_backend.course_pronunciation_override({"B": "b"}):
assert tts_backend._COURSE_PRONUNCIATION == {"B": "b"}
assert tts_backend._COURSE_PRONUNCIATION == {"A": "a"}
assert tts_backend._COURSE_PRONUNCIATION is None

def test_none_or_empty_mapping_is_noop(self):
"""None / 空 dict 掛上 = no-op(fail-soft,沿用全域)。"""
with tts_backend.course_pronunciation_override(None):
assert tts_backend._COURSE_PRONUNCIATION is None
with tts_backend.course_pronunciation_override({}):
assert tts_backend._COURSE_PRONUNCIATION is None


# ---------------------------------------------------------------- _resolve_course_pronunciation


class TestResolveCoursePronunciation:
"""runner._resolve_course_pronunciation — 從 job.project_id 取該課讀音表 (fail-soft)。"""

@pytest.fixture
def store_at(self, tmp_path, monkeypatch):
"""讓 `ProjectStore()`(無參數)落在 tmp_path,回傳已建好的真 store。"""
OrigStore = project_mod.ProjectStore
store = OrigStore(root=tmp_path)
monkeypatch.setattr(
project_mod, "ProjectStore", lambda *a, **k: OrigStore(root=tmp_path)
)
return store

def test_no_project_id_returns_none(self):
"""無主 job(project_id=None)→ None,零 ProjectStore 觸碰。"""
assert runner_mod._resolve_course_pronunciation(_make_rec(None)) is None

def test_project_with_glossary_returns_reading_map(self, store_at):
"""課有 glossary + 有 reading → 回 surface form → reading map。"""
store_at.create("mech101", "材力")
store_at.save_glossary("mech101", _glossary_with_reading())
out = runner_mod._resolve_course_pronunciation(_make_rec("mech101"))
assert out == {
"PID": "P I D 控制器",
"pid": "P I D 控制器",
"ω_n": "omega n",
}
assert "自然頻率" not in out # 沒 reading 的不進讀音表

def test_unknown_project_returns_none_fail_soft(self, store_at):
"""課不存在(get_glossary 拋 ProjectNotFoundError)→ fail-soft None,不爆。"""
out = runner_mod._resolve_course_pronunciation(_make_rec("ghost"))
assert out is None

def test_project_without_glossary_returns_none(self, store_at):
"""課在但還沒建 glossary → None(沿用全域)。"""
store_at.create("mech101", "材力")
out = runner_mod._resolve_course_pronunciation(_make_rec("mech101"))
assert out is None

def test_empty_reading_map_returns_none(self, store_at):
"""glossary 有條目但都沒 reading → 空 map → 收斂成 None(no-op override)。"""
store_at.create("mech101", "材力")
store_at.save_glossary(
"mech101",
Glossary(course="材力", entries=[GlossaryEntry(term="自然頻率")]),
)
out = runner_mod._resolve_course_pronunciation(_make_rec("mech101"))
assert out is None


# ---------------------------------------------------------------- _run_render wiring


class TestRunRenderWiring:
"""_run_render 把課程讀音覆寫在 inner render 期間掛上、出去還原。"""

@pytest.mark.asyncio
async def test_override_active_during_inner_and_restored(self, monkeypatch):
"""resolve 出讀音表 → inner 執行時 module-level 已掛上 → 出去還原 None。"""
captured: dict = {}

async def stub_inner(store, rec, *, section_id=None):
captured["during"] = tts_backend._COURSE_PRONUNCIATION

monkeypatch.setattr(runner_mod, "_run_render_inner", stub_inner)
monkeypatch.setattr(
runner_mod, "_resolve_course_pronunciation", lambda rec: {"PID": "P I D"}
)

await runner_mod._run_render(store=None, rec=_make_rec("mech101"))

assert captured["during"] == {"PID": "P I D"}
# 出 with 後還原(不洩漏到下個 render)
assert tts_backend._COURSE_PRONUNCIATION is None

@pytest.mark.asyncio
async def test_no_glossary_leaves_global_behavior(self, monkeypatch):
"""resolve 回 None(無主 job / 無 glossary)→ inner 期間覆寫仍 None(沿用全域)。"""
captured: dict = {}

async def stub_inner(store, rec, *, section_id=None):
captured["during"] = tts_backend._COURSE_PRONUNCIATION

monkeypatch.setattr(runner_mod, "_run_render_inner", stub_inner)
monkeypatch.setattr(
runner_mod, "_resolve_course_pronunciation", lambda rec: None
)

await runner_mod._run_render(store=None, rec=_make_rec(None))

assert captured["during"] is None
assert tts_backend._COURSE_PRONUNCIATION is None
53 changes: 52 additions & 1 deletion tts_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,47 @@ def _merged_pronunciation(
return sorted(merged.items(), key=lambda x: -len(x[0]))


# ---------- F9-2h: render 期間的 per-course 讀音覆寫 ----------
# render 旁白深埋在 pipeline.main → gen_tts → synthesize → normalize_text 一條鏈,
# 把 extra_pronunciation 逐層穿過去要動 synthesize 抽象介面 + 三個 backend +
# FallbackTTS + gen_tts + render_video 簽章。改用 render 期間的 module-level 覆寫
# (比照 core.config.video_dimensions_override / talking_head_override 既有 render-scoped
# override 慣例): runner 在 render 前後掛上該課 glossary 讀音表, normalize_text 在
# 「呼叫端未顯式給 extra_pronunciation」時自動沿用它。
#
# 非 thread-safe — 兩個 render 同時開 context 會搶, 跟 server.runner.py 現有
# sequential job 設計相容 (同 video_dimensions_override 的取捨)。
_COURSE_PRONUNCIATION: dict[str, str] | None = None


class course_pronunciation_override:
"""context manager — render 期間暫時掛上該課 glossary 的 TTS 讀音表。

使用:
with course_pronunciation_override(glossary_reading_map):
await render_video(...) # normalize_text 自動套該課讀音
# 出 with 後 restore (巢狀也安全, 存舊值還原)

`mapping` 為 None / 空 dict → no-op (沿用全域 pronunciation, 零影響), 對應
RFC §5「glossary 缺失 fail-soft」。
"""

def __init__(self, mapping: dict[str, str] | None):
self._mapping = mapping or None
self._old: dict[str, str] | None = None

def __enter__(self) -> "course_pronunciation_override":
global _COURSE_PRONUNCIATION
self._old = _COURSE_PRONUNCIATION
_COURSE_PRONUNCIATION = self._mapping
return self

def __exit__(self, *exc) -> bool:
global _COURSE_PRONUNCIATION
_COURSE_PRONUNCIATION = self._old
return False


def split_for_f5(text: str, max_chars: int = 30) -> list[str]:
"""PR-5b: F5-TTS 預切句, 解決 F5 內部 batch 不顧中文詞邊界的問題。

Expand Down Expand Up @@ -154,6 +195,11 @@ def normalize_text(
(`core.glossary.to_pronunciation_map`), 與全域 pronunciation.json longest-first
合併、同 key 課程優先。預設 None = 完全沿用全域 (既有 caller 零影響)。

F9-2h: 呼叫端**未顯式給** `extra_pronunciation` 時 (None), 自動沿用 render 期間
`course_pronunciation_override` 掛上的該課讀音表 (render 旁白透過此路徑套到 glossary,
深埋的 synthesize 不必逐層穿參)。顯式給的 arg 永遠優先 (含顯式給 `{}` 表「不要課程
讀音」)。兩者皆無 → 純全域, 行為與舊版一致。

iter 93 新增 (實測 GCP Wavenet 念公式糟):
- 剝 markdown backtick / 星號 (LLM 偶爾把變數包成 `e(t)`, TTS 念「上句點」)
- 函式記法 `e(t)` → `e of t` (TTS 才會念 "e of t" 不是 "et")
Expand Down Expand Up @@ -202,7 +248,12 @@ def _split_var(m: re.Match) -> str:
text,
)
# 發音對照: longest-match 替換, 前後補空白避免黏字 (全域 + per-course glossary)
for src, dst in _merged_pronunciation(extra_pronunciation):
# F9-2h: 未顯式給 extra 時沿用 render-scoped 課程覆寫 (顯式 arg 含 {} 永遠優先)
effective_extra = (
extra_pronunciation if extra_pronunciation is not None
else _COURSE_PRONUNCIATION
)
for src, dst in _merged_pronunciation(effective_extra):
text = text.replace(src, f" {dst} ")
return re.sub(r"\s+", " ", text).strip()

Expand Down
Loading