Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions docs/PRODUCT_READINESS.md
Original file line number Diff line number Diff line change
Expand Up @@ -686,9 +686,22 @@
收斂 None;`_run_render` wiring inner 期間掛上·出去還原·無 glossary 沿用全域,**全 offline 不打
API、不真跑 TTS**)。本機相關子集 105 passed、全套 2665 passed(3 個 QR/journal 字型像素為容器缺
Noto CJK 假象,CI 權威)。
- ⏸️ **後續 offline slice**:F9-2i(翻譯 route 接 `to_translation_rules()`,同 `project_id` 關聯
讓在地化翻譯套固定譯名),見上 RFC §4。**自動建議術語**(掃教材抽術語)碰 Gemini 額度 = GATE,
另寫 proposal 再做。
- ✅ 2026-06-14 **F9-2i:翻譯 route 接 `to_translation_rules()` 完成(offline,F9-2 offline 收尾)**。
`POST /localization/translate` 的 `TranslateRequest` 加**選填** `project_id`:給了 → 以
`ProjectStore.get_glossary(project_id)` 現讀該課 glossary → `to_translation_rules(target_lang)`
產固定譯名規則文字塊,與呼叫端顯式 `glossary` 合併(顯式在前、課程在後)後送 `translator.
translate(glossary=...)`,讓在地化翻譯術語前後一致。canonical 區域碼↔glossary 短碼對得上
(`_glossary_lang_candidates`:完整碼優先、再退基底子標籤,`en-US`→`en`、`zh-CN`/`zh-TW`
完整碼本就是 glossary key)。守 RFC §5 **fail-soft**:沒 `project_id` / 課不存在
(`ProjectNotFoundError`)/ 無 glossary / 該語言無譯名 / 讀檔出錯 → 一律回空課程規則、沿用現行
行為,**絕不讓翻譯失敗**;glossary 讀檔沿 R-3 走 `to_thread` 不阻 event loop。**完全不碰 review
gate / 狀態機**(只影響「術語怎麼譯」,硬規則 #1)。`project_id` 為 optional=既有 caller 零影響、
向後相容。補 `tests/test_localization_glossary.py` 9 測(注入固定譯名含別名並排/顯式+課程合併順序/
完整區域碼命中/該語言無譯名不附/沒 pid·未知 pid·無 glossary·空白 pid fail-soft,**全 mock
translate 不打真 API、ProjectStore tmp 隔離**=offline-first)。本機相關子集 68 passed、全套 2673
passed(3 個 QR/journal 字型像素為容器缺 Noto CJK 假象,CI 權威)。**前端 `LocalizeMenu` 傳
`project_id`** 屬後續前端 slice(route 欄位選填、不破壞現況)。**自動建議術語**(掃教材抽術語)
碰 Gemini 額度 = GATE,另寫 proposal 再做。F9-2 offline slice 至此到齊。
- [~] 🟡 **F9-3 本機可插拔模型後端**(GATE,= M 軸 Option B 的本機 provider)— 支援
**Ollama 等本機 LLM** 跑文字(大綱/旁白/翻譯),老師可零雲端成本跑(翻譯已用本機
translategemma 驗過路子)。**依賴 M-4 provider 介面就緒**後加 ollama adapter + 設定頁可選
Expand Down
80 changes: 76 additions & 4 deletions server/routes/localization.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,18 @@
import os
import tempfile

from fastapi import APIRouter, File, Form, UploadFile
from fastapi import APIRouter, Depends, File, Form, UploadFile
from pydantic import BaseModel, Field

from core.glossary import to_translation_rules
from core.langcode import LANGUAGES, to_underscore
from core.meeting.summarizer import meeting_summarizer
from core.project import ProjectNotFoundError, ProjectStore
from core.translation.service import translator
from core.video.dubber import get_video_dubber

from .projects import get_default_project_store

router = APIRouter(prefix="/localization", tags=["localization"])


Expand All @@ -45,6 +49,9 @@ class TranslateRequest(BaseModel):
target_lang: str = "zh-TW" # canonical 連字號
source_lang: str = "auto"
glossary: str = ""
# F9-2i:選填課程關聯。給了 → 載入該課 glossary 的固定譯名(to_translation_rules)
# 併進 glossary 規則,讓在地化翻譯術語前後一致。沒給/查無 → 沿用現行行為(零影響)。
project_id: str | None = None
style: str = ""


Expand Down Expand Up @@ -89,6 +96,60 @@ def _first(gen) -> str:
return out


# ---------- F9-2i:課程 glossary → 翻譯固定譯名 ----------
def _glossary_lang_candidates(code: str) -> list[str]:
"""canonical 目標語言碼 → glossary translations key 候選(完整碼優先、再退基底子標籤)。

glossary 的逐語言譯名 key 用前端 LANGS 短碼(en/ja/ko/zh-CN/vi),但翻譯 route 對外收
canonical BCP-47 區域碼(en-US/ja-JP/...)。先試完整碼(zh-CN/zh-TW 本就是 glossary key),
再退基底(en-US → en)涵蓋「術語表用短碼登錄、route 收區域碼」的常見情形。
"""
out = [code]
base = code.split("-", 1)[0] if code else ""
if base and base != code:
out.append(base)
return out


def _course_glossary_rules(
project_id: str | None, target_lang: str, store: ProjectStore
) -> str:
"""載入該課 glossary → 該目標語言的 `to_translation_rules` 文字塊。

fail-soft(RFC §5):沒 project_id / 課不存在 / 無 glossary / 該語言無譯名 / 讀檔出錯 →
一律回空字串,**絕不因為「想套術語」而讓翻譯失敗**。
"""
if not project_id or not project_id.strip():
return ""
try:
glossary = store.get_glossary(project_id)
except ProjectNotFoundError:
return ""
except Exception: # noqa: BLE001 — glossary 壞檔等任何問題都不該擋翻譯(fail-soft)
return ""
if glossary is None:
return ""
for lang in _glossary_lang_candidates(target_lang):
rules = to_translation_rules(glossary, lang)
if rules:
return rules
return ""


def _merge_glossary(caller_glossary: str, course_rules: str) -> str:
"""合併呼叫端顯式 glossary 與該課 glossary 規則(都丟給 translate 的 glossary 參數)。

呼叫端顯式規則放前面(較專一/手動覆寫優先呈現),課程規則接在後;任一為空就只留另一條,
兩者皆空回空字串(translate 對空字串 no-op,行為與不傳 glossary 一致)。
"""
parts = []
if caller_glossary and caller_glossary.strip():
parts.append(caller_glossary.strip())
if course_rules:
parts.append(course_rules)
return "\n".join(parts)


# ---------- 端點 ----------
@router.get("/languages")
async def list_languages() -> dict:
Expand All @@ -102,13 +163,24 @@ async def list_languages() -> dict:


@router.post("/translate")
async def translate_text(req: TranslateRequest) -> dict:
"""文字翻譯。對外 zh-TW,邊界轉 zh_TW 後送 Gemini 服務。"""
async def translate_text(
req: TranslateRequest,
store: ProjectStore = Depends(get_default_project_store),
) -> dict:
"""文字翻譯。對外 zh-TW,邊界轉 zh_TW 後送 Gemini 服務。

F9-2i:給了 `project_id` → 載入該課 glossary 的固定譯名併進 glossary 規則(fail-soft)。
"""
# 課程 glossary 讀檔(小型本機 JSON + RLock)也走 to_thread,沿 R-3 不阻 event loop。
course_rules = await asyncio.to_thread(
_course_glossary_rules, req.project_id, req.target_lang, store
)
glossary = _merge_glossary(req.glossary, course_rules)
# R-3: 翻譯是 blocking (Gemini HTTP) → to_thread 不阻 event loop
translated = await asyncio.to_thread(
translator.translate,
req.text, _u(req.source_lang), _u(req.target_lang),
glossary=req.glossary, style=req.style,
glossary=glossary, style=req.style,
)
return {
"translated_text": translated,
Expand Down
158 changes: 158 additions & 0 deletions tests/test_localization_glossary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
"""F9-2i:在地化翻譯 route 接課程 glossary 固定譯名測試。

驗收(對應 docs/JOB_COURSE_ASSOCIATION_RFC.md §4.3):
- `POST /localization/translate` 帶 `project_id` → 載入該課 glossary 的固定譯名
(`to_translation_rules`)併進 glossary 規則送翻譯。
- 呼叫端顯式 `glossary` 與課程規則合併(顯式在前、課程在後)。
- canonical 區域碼(en-US)↔ glossary 短碼(en)對得上(候選碼退基底)。
- fail-soft(RFC §5):沒 project_id / 課不存在 / 無 glossary / 該語言無譯名 →
沿用現行行為(不傳課程規則、絕不讓翻譯失敗)。

Mock 策略:monkeypatch translator.translate 攔截實際送進去的 glossary 字串、不打真
Gemini;ProjectStore 注入 tmp_path 隔離(全 offline-first)。
"""
from __future__ import annotations

import pytest

pytest.importorskip("fastapi.testclient", reason="需要 fastapi 安裝")
pytest.importorskip("multipart", reason="server.main 內 upload route 需要")

from fastapi.testclient import TestClient

import core.translation.service as svc
import server.routes.projects as projects_mod
from core.glossary import Glossary, GlossaryEntry
from server.main import create_app


@pytest.fixture
def client(tmp_path, monkeypatch):
"""TestClient + 隔離 ProjectStore + 攔截 translate 的 glossary 字串。"""
seen = {}

def fake_translate(text, source_code, target_code, glossary="", style=""):
seen["glossary"] = glossary
seen["target"] = target_code
return "譯文"

monkeypatch.setattr(svc.translator, "translate", fake_translate)

app = create_app()
project_store = projects_mod.ProjectStore(root=tmp_path / "projects")
app.dependency_overrides[projects_mod.get_default_project_store] = lambda: project_store
with TestClient(app) as c:
yield c, project_store, seen


def _make_course_with_glossary(store, pid="course_statics", *, course="靜力學"):
"""建一門課 + 一份含固定譯名的 glossary(en/ja)。"""
store.create(pid, title=course)
glossary = Glossary(
course=course,
entries=[
GlossaryEntry(
term="自然頻率",
aliases=["ω_n", "wn"],
translations={"en": "natural frequency", "ja": "固有振動数"},
),
GlossaryEntry(term="阻尼比", translations={"en": "damping ratio"}),
],
)
store.save_glossary(pid, glossary)
return pid


class TestCourseGlossaryWiring:
def test_project_id_injects_translation_rules(self, client):
"""帶 project_id → glossary 固定譯名(含別名並排)併進送翻譯的 glossary。"""
c, store, seen = client
pid = _make_course_with_glossary(store)
r = c.post("/localization/translate", json={
"text": "自然頻率與阻尼比", "target_lang": "en-US", "project_id": pid,
})
assert r.status_code == 200
g = seen["glossary"]
# en-US 退基底 en → 對上 glossary 的 "en" 譯名
assert "natural frequency" in g
assert "damping ratio" in g
# 來源面列出 term + 別名(longest-first,/ 並排)
assert "ω_n" in g and "自然頻率" in g

def test_caller_glossary_merged_first(self, client):
"""呼叫端顯式 glossary 與課程規則合併,顯式在前。"""
c, store, seen = client
pid = _make_course_with_glossary(store)
r = c.post("/localization/translate", json={
"text": "x", "target_lang": "en-US", "project_id": pid,
"glossary": "手動規則 → manual",
})
assert r.status_code == 200
g = seen["glossary"]
assert "手動規則 → manual" in g
assert "natural frequency" in g
# 顯式規則排在課程規則之前
assert g.index("手動規則") < g.index("natural frequency")

def test_exact_region_code_matches(self, client):
"""glossary key 用完整區域碼(zh-CN)時直接命中、不誤退基底。"""
c, store, seen = client
store.create("c2", title="材力")
store.save_glossary("c2", Glossary(
course="材力",
entries=[GlossaryEntry(term="應力", translations={"zh-CN": "应力"})],
))
r = c.post("/localization/translate", json={
"text": "應力", "target_lang": "zh-CN", "project_id": "c2",
})
assert r.status_code == 200
assert "应力" in seen["glossary"]

def test_no_translation_for_lang_no_rules(self, client):
"""該課 glossary 沒有目標語言譯名 → 不附課程規則(沿用空 glossary)。"""
c, store, seen = client
pid = _make_course_with_glossary(store) # 只有 en/ja
r = c.post("/localization/translate", json={
"text": "x", "target_lang": "ko-KR", "project_id": pid,
})
assert r.status_code == 200
assert seen["glossary"] == ""


class TestFailSoft:
def test_no_project_id_passes_caller_glossary_only(self, client):
"""沒 project_id → 只送呼叫端 glossary(現行行為,零影響)。"""
c, _store, seen = client
r = c.post("/localization/translate", json={
"text": "x", "target_lang": "en-US", "glossary": "只有這條 → only",
})
assert r.status_code == 200
assert seen["glossary"] == "只有這條 → only"

def test_unknown_project_id_fail_soft(self, client):
"""project_id 指向不存在的課 → fail-soft 不報錯、不附課程規則。"""
c, _store, seen = client
r = c.post("/localization/translate", json={
"text": "x", "target_lang": "en-US", "project_id": "nope",
})
assert r.status_code == 200
assert seen["glossary"] == ""

def test_course_without_glossary_fail_soft(self, client):
"""課存在但還沒建 glossary → fail-soft 回空課程規則。"""
c, store, seen = client
store.create("bare", title="尚無術語表")
r = c.post("/localization/translate", json={
"text": "x", "target_lang": "en-US", "project_id": "bare",
})
assert r.status_code == 200
assert seen["glossary"] == ""

def test_blank_project_id_treated_as_none(self, client):
"""空白 project_id 視同未提供。"""
c, _store, seen = client
r = c.post("/localization/translate", json={
"text": "x", "target_lang": "en-US", "project_id": " ",
})
assert r.status_code == 200
assert seen["glossary"] == ""
Loading