From 250d359b21ad275f0f9f9f1ee950cb76cf19f7ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maple=EF=BC=81?= Date: Fri, 26 Jun 2026 16:58:40 +0800 Subject: [PATCH 01/15] feat: support OpenAI-compatible proxy for CLI-based translation backends Allow CLIProxyAPI (or any OpenAI-compatible proxy) to serve as the translation backend by setting OPENAI_BASE_URL. This enables using CLI subscription tokens (Codex, Antigravity, Copilot) instead of paying for LLM API calls directly. - Add openai_base_url setting with trailing-slash validator - Build OpenAIChat model when proxy is configured, bypass Agno registry - Skip API key validation when proxy handles auth - Add 6 unit tests covering proxy/non-proxy model construction Co-Authored-By: Claude Opus 4.6 --- src/bilingualsub/core/translator.py | 51 +++++++++-- src/bilingualsub/utils/config.py | 8 ++ tests/unit/core/test_translator.py | 129 ++++++++++++++++++++++++++++ 3 files changed, 180 insertions(+), 8 deletions(-) diff --git a/src/bilingualsub/core/translator.py b/src/bilingualsub/core/translator.py index ab2d13d..4b529d6 100644 --- a/src/bilingualsub/core/translator.py +++ b/src/bilingualsub/core/translator.py @@ -7,9 +7,12 @@ import structlog from agno.agent import Agent +from agno.models.base import Model +from agno.models.openai import OpenAIChat from bilingualsub.core.subtitle import Subtitle, SubtitleEntry from bilingualsub.utils.config import ( + Settings, get_groq_api_key, get_openai_api_key, get_settings, @@ -24,6 +27,8 @@ _PARTIAL_CONTEXT_WINDOW = 2 _MAX_METADATA_TITLE_CHARS = 200 _MAX_METADATA_DESC_CHARS = 1200 +_OPENAI_PREFIX = "openai:" +_PROXY_PLACEHOLDER_API_KEY = "dummy" # pragma: allowlist secret class TranslationError(Exception): @@ -47,19 +52,49 @@ class RetranslateEntry: translated: str = "" -def _ensure_translator_api_key(translator_model: str) -> None: +def _is_openai_model(model_str: str) -> bool: + return model_str.strip().lower().startswith(_OPENAI_PREFIX) + + +def _ensure_translator_api_key(settings: Settings) -> None: """Validate API key for managed translator providers. + Skips the OpenAI key check when a proxy base URL is configured, + since proxies supply their own authentication. + Raises: ValueError: If required provider key is missing. """ - model_prefix = translator_model.strip().lower() - if model_prefix.startswith("groq:"): + model_str = settings.translator_model + if model_str.strip().lower().startswith("groq:"): get_groq_api_key() - elif model_prefix.startswith("openai:"): + elif _is_openai_model(model_str) and not settings.openai_base_url: get_openai_api_key() +def _build_model(settings: Settings) -> str | Model: + """Build an Agno model instance or model string for the translator. + + When the translator model has an ``openai:`` prefix AND a custom + ``OPENAI_BASE_URL`` is configured, constructs an :class:`OpenAIChat` model + pointed at the proxy endpoint. This allows OpenAI-compatible proxies + (e.g. CLIProxyAPI) to be used without touching the Agno provider registry. + + In all other cases the raw model string is returned and Agno handles + provider resolution itself (existing behaviour). + """ + model_str = settings.translator_model + if _is_openai_model(model_str) and settings.openai_base_url: + # Slice original to preserve model ID casing + model_id = model_str[len(_OPENAI_PREFIX) :] + return OpenAIChat( + id=model_id, + base_url=settings.openai_base_url, + api_key=settings.openai_api_key or _PROXY_PLACEHOLDER_API_KEY, + ) + return model_str + + def _compact_text(text: str) -> str: """Normalize whitespace while preserving readable punctuation.""" return re.sub(r"\s+", " ", text).strip() @@ -334,9 +369,9 @@ def translate_subtitle( ValueError: If provider API key is missing """ settings = get_settings() - _ensure_translator_api_key(settings.translator_model) + _ensure_translator_api_key(settings) translator = Agent( - model=settings.translator_model, + model=_build_model(settings), description=_build_translator_description( source_lang=source_lang, target_lang=target_lang, @@ -491,9 +526,9 @@ def retranslate_entries( raise ValueError(f"selected_indices not found: {missing}") settings = get_settings() - _ensure_translator_api_key(settings.translator_model) + _ensure_translator_api_key(settings) translator = Agent( - model=settings.translator_model, + model=_build_model(settings), description=_build_translator_description( source_lang=source_lang, target_lang=target_lang, diff --git a/src/bilingualsub/utils/config.py b/src/bilingualsub/utils/config.py index 8fffec1..8f6cced 100644 --- a/src/bilingualsub/utils/config.py +++ b/src/bilingualsub/utils/config.py @@ -2,6 +2,7 @@ from functools import lru_cache +from pydantic import field_validator from pydantic_settings import BaseSettings, SettingsConfigDict @@ -11,6 +12,7 @@ class Settings(BaseSettings): Attributes: groq_api_key: API key for Groq services (Whisper + LLM) openai_api_key: API key for OpenAI services + openai_base_url: Base URL for OpenAI-compatible proxy (e.g. http://localhost:8317/v1) transcriber_provider: Whisper provider ("groq" or "openai") transcriber_model: Whisper model name translator_model: Agno model string (e.g. "ollama:model_id", "groq:model_id") @@ -20,6 +22,12 @@ class Settings(BaseSettings): groq_api_key: str = "" openai_api_key: str = "" + openai_base_url: str = "" + + @field_validator("openai_base_url") + @classmethod + def strip_trailing_slash(cls, v: str) -> str: + return v.rstrip("/") if v else v transcriber_provider: str = "groq" transcriber_model: str = "whisper-large-v3-turbo" diff --git a/tests/unit/core/test_translator.py b/tests/unit/core/test_translator.py index bcc0e54..f6bcebb 100644 --- a/tests/unit/core/test_translator.py +++ b/tests/unit/core/test_translator.py @@ -4,11 +4,14 @@ from unittest.mock import Mock, patch import pytest +from agno.models.openai import OpenAIChat from bilingualsub.core.subtitle import Subtitle, SubtitleEntry from bilingualsub.core.translator import ( + _PROXY_PLACEHOLDER_API_KEY, RetranslateEntry, TranslationError, + _build_model, _parse_batch_response, retranslate_entries, translate_subtitle, @@ -728,3 +731,129 @@ def test_retranslate_entries_invalid_index_raises_error(self): entries = [RetranslateEntry(index=1, original="Line 1", translated="第一句")] with pytest.raises(ValueError, match="selected_indices not found"): retranslate_entries(entries=entries, selected_indices=[2]) + + +@pytest.mark.unit +class TestBuildModel: + """Test cases for _build_model helper.""" + + @pytest.fixture(autouse=True) + def _clear_settings_cache(self): + get_settings.cache_clear() + yield + get_settings.cache_clear() + + def test_given_groq_model_when_build_model_then_returns_raw_string( + self, monkeypatch + ): + monkeypatch.setenv("TRANSLATOR_MODEL", "groq:openai/gpt-oss-120b") + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) + get_settings.cache_clear() + + result = _build_model(get_settings()) + + assert result == "groq:openai/gpt-oss-120b" + + def test_given_openai_model_without_base_url_when_build_model_then_returns_raw_string( + self, monkeypatch + ): + monkeypatch.setenv("TRANSLATOR_MODEL", "openai:gpt-4o") + monkeypatch.setenv("OPENAI_API_KEY", "sk-test") + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) + get_settings.cache_clear() + + result = _build_model(get_settings()) + + assert result == "openai:gpt-4o" + + def test_given_openai_model_with_base_url_when_build_model_then_returns_openai_chat( + self, monkeypatch + ): + monkeypatch.setenv("TRANSLATOR_MODEL", "openai:claude-sonnet-4-5") + monkeypatch.setenv("OPENAI_API_KEY", "cli-token") + monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:3000/v1") + get_settings.cache_clear() + + result = _build_model(get_settings()) + + assert isinstance(result, OpenAIChat) + assert result.id == "claude-sonnet-4-5" + assert str(result.base_url) == "http://localhost:3000/v1" + + def test_given_proxy_without_api_key_when_build_model_then_uses_placeholder( + self, monkeypatch + ): + # Guards: proxy users without a real OpenAI key must not get auth errors + monkeypatch.setenv("TRANSLATOR_MODEL", "openai:any-model") + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:3000/v1") + get_settings.cache_clear() + + result = _build_model(get_settings()) + + assert isinstance(result, OpenAIChat) + assert result.api_key == _PROXY_PLACEHOLDER_API_KEY + + def test_given_proxy_without_api_key_when_translate_subtitle_then_no_value_error( + self, monkeypatch + ): + # Guards: _ensure_translator_api_key must skip key check when proxy is configured, + # otherwise translate_subtitle raises ValueError before _build_model runs. + monkeypatch.setenv("TRANSLATOR_MODEL", "openai:any-model") + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:3000/v1") + get_settings.cache_clear() + + entries = [ + SubtitleEntry( + index=1, + start=timedelta(seconds=0), + end=timedelta(seconds=2), + text="Hello", + ) + ] + subtitle = Subtitle(entries=entries) + + with patch("bilingualsub.core.translator.Agent") as mock_agent: + mock_translator = Mock() + mock_agent.return_value = mock_translator + mock_response = Mock() + mock_response.content = "1. 你好" + mock_translator.run.return_value = mock_response + + translate_subtitle(subtitle) + + model_arg = mock_agent.call_args.kwargs["model"] + assert isinstance(model_arg, OpenAIChat) + assert model_arg.api_key == _PROXY_PLACEHOLDER_API_KEY + + def test_given_openai_model_with_proxy_when_translate_subtitle_then_uses_openai_chat( + self, monkeypatch + ): + monkeypatch.setenv("TRANSLATOR_MODEL", "openai:claude-sonnet-4-5") + monkeypatch.setenv("OPENAI_API_KEY", "cli-token") + monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:3000/v1") + get_settings.cache_clear() + + entries = [ + SubtitleEntry( + index=1, + start=timedelta(seconds=0), + end=timedelta(seconds=2), + text="Hello", + ) + ] + subtitle = Subtitle(entries=entries) + + with patch("bilingualsub.core.translator.Agent") as mock_agent: + mock_translator = Mock() + mock_agent.return_value = mock_translator + mock_response = Mock() + mock_response.content = "1. 你好" + mock_translator.run.return_value = mock_response + + translate_subtitle(subtitle) + + model_arg = mock_agent.call_args.kwargs["model"] + assert isinstance(model_arg, OpenAIChat) + assert model_arg.id == "claude-sonnet-4-5" From 0fcd4d57f09a1337765adb82fbcdf1582a35708c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maple=EF=BC=81?= Date: Fri, 26 Jun 2026 17:21:37 +0800 Subject: [PATCH 02/15] feat: Whisper prompt hint from video title and glossary terms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass video title and glossary source terms as Whisper's prompt parameter to improve proper noun recognition during transcription. Previously, metadata only flowed to the translator — now the transcriber also benefits from contextual hints. - Add build_whisper_prompt() to construct prompt from title + terms - Add prompt parameter to transcribe_audio / _transcribe_single - Add extract_source_terms() to parse glossary formatted text - Wire prompt construction into pipeline before transcription call - Add 12 unit tests (8 transcriber + 4 glossary) Co-Authored-By: Claude Opus 4.6 --- .secrets.baseline | 4 +- src/bilingualsub/api/pipeline.py | 12 +++- src/bilingualsub/core/glossary.py | 17 +++++ src/bilingualsub/core/transcriber.py | 55 +++++++++++++---- tests/unit/core/test_glossary.py | 26 +++++++- tests/unit/core/test_transcriber.py | 92 +++++++++++++++++++++++++++- 6 files changed, 190 insertions(+), 16 deletions(-) diff --git a/.secrets.baseline b/.secrets.baseline index 5d1a66b..2626389 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -139,7 +139,7 @@ "filename": "tests/unit/core/test_transcriber.py", "hashed_secret": "2e7a7ee14caebf378fc32d6cf6f557f347c96773", "is_verified": false, - "line_number": 78 + "line_number": 82 } ], "tests/unit/utils/test_config.py": [ @@ -166,5 +166,5 @@ } ] }, - "generated_at": "2026-02-11T14:21:40Z" + "generated_at": "2026-06-26T09:21:31Z" } diff --git a/src/bilingualsub/api/pipeline.py b/src/bilingualsub/api/pipeline.py index 42fab1f..43c8805 100644 --- a/src/bilingualsub/api/pipeline.py +++ b/src/bilingualsub/api/pipeline.py @@ -36,7 +36,9 @@ transcribe_audio, translate_subtitle, ) +from bilingualsub.core.glossary import extract_source_terms from bilingualsub.core.subtitle_fetcher import fetch_manual_subtitle +from bilingualsub.core.transcriber import build_whisper_prompt from bilingualsub.formats import serialize_bilingual_ass, serialize_srt from bilingualsub.utils import ( FFmpegError, @@ -491,8 +493,16 @@ async def run_subtitle(job: Job) -> None: job, JobStatus.TRANSCRIBING, 20.0, "transcribe", "Transcribing audio" ) t0 = time.monotonic() + glossary_terms = extract_source_terms(job.glossary_text) + whisper_prompt = build_whisper_prompt( + video_title=job.video_title, + glossary_terms=glossary_terms or None, + ) original_sub = await asyncio.to_thread( - transcribe_audio, audio_path, language=job.source_lang + transcribe_audio, + audio_path, + language=job.source_lang, + prompt=whisper_prompt, ) log.info( "step_done", diff --git a/src/bilingualsub/core/glossary.py b/src/bilingualsub/core/glossary.py index 0352e21..56e3cdf 100644 --- a/src/bilingualsub/core/glossary.py +++ b/src/bilingualsub/core/glossary.py @@ -132,3 +132,20 @@ def format_for_prompt(self) -> str: "以下是術語表,請嚴格依照此表翻譯對應的專有名詞:\n" + "\n".join(lines) # noqa: RUF001 ) return self._prompt_cache + + +def extract_source_terms(formatted_text: str) -> list[str]: + """Extract source terms from formatted glossary prompt text. + + Parses text produced by GlossaryManager.format_for_prompt(), + which uses the format "source → target" per line. + """ + if not formatted_text: + return [] + terms: list[str] = [] + for line in formatted_text.strip().split("\n"): + if "→" in line: + source = line.split("→")[0].strip() + if source: + terms.append(source) + return terms diff --git a/src/bilingualsub/core/transcriber.py b/src/bilingualsub/core/transcriber.py index 6c0a658..83e48e0 100644 --- a/src/bilingualsub/core/transcriber.py +++ b/src/bilingualsub/core/transcriber.py @@ -11,18 +11,40 @@ from bilingualsub.utils.config import get_groq_api_key, get_openai_api_key, get_settings from bilingualsub.utils.ffmpeg import split_audio +_MAX_WHISPER_PROMPT_CHARS = 800 + class TranscriptionError(Exception): """Raised when audio transcription fails.""" -def _transcribe_single(audio_path: Path, *, language: str, settings: Any) -> Subtitle: +def build_whisper_prompt( + video_title: str = "", + glossary_terms: list[str] | None = None, +) -> str | None: + parts: list[str] = [] + if video_title: + parts.append(video_title.strip()) + if glossary_terms: + parts.append(", ".join(glossary_terms)) + if not parts: + return None + prompt = ". ".join(parts) + if len(prompt) > _MAX_WHISPER_PROMPT_CHARS: + return prompt[:_MAX_WHISPER_PROMPT_CHARS] + return prompt + + +def _transcribe_single( + audio_path: Path, *, language: str, settings: Any, prompt: str | None = None +) -> Subtitle: """Transcribe a single audio file (must be <= 25MB). Args: audio_path: Path to audio file language: ISO 639-1 language code settings: Application settings + prompt: Optional hint text to guide transcription accuracy Returns: Subtitle object with transcribed entries @@ -45,12 +67,15 @@ def _transcribe_single(audio_path: Path, *, language: str, settings: Any) -> Sub try: with audio_path.open("rb") as audio_file: - transcription = client.audio.transcriptions.create( - file=(audio_path.name, audio_file), - model=settings.transcriber_model, - response_format="verbose_json", - language=language, - ) + create_kwargs: dict[str, Any] = { + "file": (audio_path.name, audio_file), + "model": settings.transcriber_model, + "response_format": "verbose_json", + "language": language, + } + if prompt: + create_kwargs["prompt"] = prompt + transcription = client.audio.transcriptions.create(**create_kwargs) except Exception as e: raise TranscriptionError(f"Failed to transcribe audio: {e}") from e @@ -76,7 +101,9 @@ def _transcribe_single(audio_path: Path, *, language: str, settings: Any) -> Sub raise TranscriptionError(f"Failed to parse transcription result: {e}") from e -def transcribe_audio(audio_path: Path, *, language: str = "en") -> Subtitle: +def transcribe_audio( + audio_path: Path, *, language: str = "en", prompt: str | None = None +) -> Subtitle: """ Transcribe audio file to subtitle using Whisper API. @@ -85,6 +112,8 @@ def transcribe_audio(audio_path: Path, *, language: str = "en") -> Subtitle: Args: audio_path: Path to audio/video file language: ISO 639-1 language code (e.g., "en", "zh", "ja") + prompt: Optional hint text (e.g., from build_whisper_prompt) to improve + proper noun recognition Returns: Subtitle object with transcribed entries @@ -99,12 +128,14 @@ def transcribe_audio(audio_path: Path, *, language: str = "en") -> Subtitle: if not audio_path.is_file(): raise ValueError(f"Audio path is not a file: {audio_path}") - language = language.split("-")[0] + language = language.split("-", maxsplit=1)[0] settings = get_settings() file_size_mb = audio_path.stat().st_size / (1024 * 1024) if file_size_mb <= 25: - return _transcribe_single(audio_path, language=language, settings=settings) + return _transcribe_single( + audio_path, language=language, settings=settings, prompt=prompt + ) # Large file: split into chunks and transcribe each @@ -112,7 +143,9 @@ def transcribe_audio(audio_path: Path, *, language: str = "en") -> Subtitle: all_entries: list[SubtitleEntry] = [] idx = 1 for chunk_path, time_offset in chunks: - subtitle = _transcribe_single(chunk_path, language=language, settings=settings) + subtitle = _transcribe_single( + chunk_path, language=language, settings=settings, prompt=prompt + ) offset_td = timedelta(seconds=time_offset) for entry in subtitle.entries: all_entries.append( diff --git a/tests/unit/core/test_glossary.py b/tests/unit/core/test_glossary.py index 4598683..cc84d55 100644 --- a/tests/unit/core/test_glossary.py +++ b/tests/unit/core/test_glossary.py @@ -2,7 +2,12 @@ import pytest -from bilingualsub.core.glossary import GlossaryEntry, GlossaryError, GlossaryManager +from bilingualsub.core.glossary import ( + GlossaryEntry, + GlossaryError, + GlossaryManager, + extract_source_terms, +) @pytest.mark.unit @@ -119,3 +124,22 @@ def test_get_all_sorted_case_insensitive(self, tmp_path): manager.add("Alpha", "Alpha") entries = manager.get_all() assert [e.source for e in entries] == ["agent", "Alpha", "Zebra"] + + +@pytest.mark.unit +class TestExtractSourceTerms: + def test_given_empty_string_when_extract_then_returns_empty_list(self): + assert extract_source_terms("") == [] + + def test_given_formatted_glossary_when_extract_then_returns_source_terms(self): + text = "以下是術語表,請嚴格依照此表翻譯對應的專有名詞:\nClaude → 克勞德\nGPT → GPT" + result = extract_source_terms(text) + assert result == ["Claude", "GPT"] + + def test_given_single_entry_when_extract_then_returns_one_term(self): + text = "以下是術語表,請嚴格依照此表翻譯對應的專有名詞:\nWhisper → Whisper" + assert extract_source_terms(text) == ["Whisper"] + + def test_given_no_arrow_lines_when_extract_then_skips_them(self): + text = "Header line\nno arrow here\nClaude → 克勞德" + assert extract_source_terms(text) == ["Claude"] diff --git a/tests/unit/core/test_transcriber.py b/tests/unit/core/test_transcriber.py index 1fd37f5..a531aec 100644 --- a/tests/unit/core/test_transcriber.py +++ b/tests/unit/core/test_transcriber.py @@ -6,7 +6,11 @@ import pytest from bilingualsub.core.subtitle import Subtitle -from bilingualsub.core.transcriber import TranscriptionError, transcribe_audio +from bilingualsub.core.transcriber import ( + TranscriptionError, + build_whisper_prompt, + transcribe_audio, +) from bilingualsub.utils.config import get_settings @@ -417,3 +421,89 @@ def test_small_file_does_not_trigger_chunking( mock_split.assert_not_called() assert isinstance(result, Subtitle) assert len(result.entries) == 2 + + +@pytest.mark.unit +class TestWhisperPrompt: + """Test cases for build_whisper_prompt and prompt passthrough in transcription.""" + + @pytest.fixture(autouse=True) + def clear_settings_cache(self): + get_settings.cache_clear() + yield + get_settings.cache_clear() + + @pytest.fixture + def mock_groq(self): + with patch("bilingualsub.core.transcriber.Groq") as mock: + yield mock + + @pytest.fixture + def valid_verbose_json_response(self): + response = Mock() + response.segments = [ + {"id": 0, "start": 0.0, "end": 2.0, "text": " Hello world"}, + ] + return response + + def test_build_whisper_prompt_with_title_only(self): + result = build_whisper_prompt(video_title="My Product Review") + assert result == "My Product Review" + + def test_build_whisper_prompt_with_terms_only(self): + result = build_whisper_prompt(glossary_terms=["Claude", "GPT"]) + assert result == "Claude, GPT" + + def test_build_whisper_prompt_with_title_and_terms(self): + result = build_whisper_prompt( + video_title="My Product Review", glossary_terms=["Claude", "GPT"] + ) + assert result == "My Product Review. Claude, GPT" + + def test_build_whisper_prompt_empty(self): + result = build_whisper_prompt() + assert result is None + + def test_build_whisper_prompt_truncates_long_input(self): + long_title = "A" * 900 + result = build_whisper_prompt(video_title=long_title) + assert result is not None + assert len(result) == 800 + + def test_transcribe_single_passes_prompt_to_api( + self, tmp_path, mock_groq, valid_verbose_json_response, monkeypatch + ): + monkeypatch.setenv("GROQ_API_KEY", "test-api-key") + + audio_path = tmp_path / "audio.mp3" + audio_path.write_bytes(b"fake audio content") + + mock_client = MagicMock() + mock_groq.return_value = mock_client + mock_client.audio.transcriptions.create.return_value = ( + valid_verbose_json_response + ) + + transcribe_audio(audio_path, prompt="My Product Review. Claude, GPT") + + call_kwargs = mock_client.audio.transcriptions.create.call_args[1] + assert call_kwargs["prompt"] == "My Product Review. Claude, GPT" + + def test_transcribe_single_omits_prompt_when_none( + self, tmp_path, mock_groq, valid_verbose_json_response, monkeypatch + ): + monkeypatch.setenv("GROQ_API_KEY", "test-api-key") + + audio_path = tmp_path / "audio.mp3" + audio_path.write_bytes(b"fake audio content") + + mock_client = MagicMock() + mock_groq.return_value = mock_client + mock_client.audio.transcriptions.create.return_value = ( + valid_verbose_json_response + ) + + transcribe_audio(audio_path) + + call_kwargs = mock_client.audio.transcriptions.create.call_args[1] + assert "prompt" not in call_kwargs From 54bfef6e683883a70f80dfad3521d102790dbd0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maple=EF=BC=81?= Date: Fri, 26 Jun 2026 21:46:19 +0800 Subject: [PATCH 03/15] feat: Whisper prompt hint from video title and glossary terms Co-Authored-By: Claude Opus 4.6 --- pyproject.toml | 7 +++ src/bilingualsub/api/pipeline.py | 9 +-- src/bilingualsub/core/__init__.py | 7 ++- src/bilingualsub/core/glossary.py | 17 ------ src/bilingualsub/core/transcriber.py | 41 +++++++++----- src/bilingualsub/core/translator.py | 9 +-- src/bilingualsub/utils/config.py | 2 +- tests/unit/core/test_glossary.py | 20 ------- tests/unit/core/test_transcriber.py | 84 ++++++++++++++++++++++------ tests/unit/core/test_translator.py | 16 +++++- uv.lock | 14 +++++ 11 files changed, 143 insertions(+), 83 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1f558cf..e973a41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -200,3 +200,10 @@ sort_by_size = true targets = ["src/bilingualsub"] exclude_dirs = ["tests", ".venv"] skips = ["B101"] # Skip assert warnings (used in tests) + +[dependency-groups] +dev = [ + "mypy>=1.19.1", + "pytest-asyncio>=1.3.0", + "pytest-cov>=7.0.0", +] diff --git a/src/bilingualsub/api/pipeline.py b/src/bilingualsub/api/pipeline.py index 43c8805..6b6e566 100644 --- a/src/bilingualsub/api/pipeline.py +++ b/src/bilingualsub/api/pipeline.py @@ -30,15 +30,14 @@ TranslationError, VideoMetadata, VisualDescriptionError, + build_whisper_prompt, describe_video, download_video, merge_subtitles, transcribe_audio, translate_subtitle, ) -from bilingualsub.core.glossary import extract_source_terms from bilingualsub.core.subtitle_fetcher import fetch_manual_subtitle -from bilingualsub.core.transcriber import build_whisper_prompt from bilingualsub.formats import serialize_bilingual_ass, serialize_srt from bilingualsub.utils import ( FFmpegError, @@ -493,11 +492,7 @@ async def run_subtitle(job: Job) -> None: job, JobStatus.TRANSCRIBING, 20.0, "transcribe", "Transcribing audio" ) t0 = time.monotonic() - glossary_terms = extract_source_terms(job.glossary_text) - whisper_prompt = build_whisper_prompt( - video_title=job.video_title, - glossary_terms=glossary_terms or None, - ) + whisper_prompt = build_whisper_prompt(video_title=job.video_title) original_sub = await asyncio.to_thread( transcribe_audio, audio_path, diff --git a/src/bilingualsub/core/__init__.py b/src/bilingualsub/core/__init__.py index e3d2e7b..dceefbe 100644 --- a/src/bilingualsub/core/__init__.py +++ b/src/bilingualsub/core/__init__.py @@ -9,7 +9,11 @@ from bilingualsub.core.merger import merge_subtitles from bilingualsub.core.subtitle import Subtitle, SubtitleEntry from bilingualsub.core.subtitle_fetcher import SubtitleFetchError, fetch_manual_subtitle -from bilingualsub.core.transcriber import TranscriptionError, transcribe_audio +from bilingualsub.core.transcriber import ( + TranscriptionError, + build_whisper_prompt, + transcribe_audio, +) from bilingualsub.core.translator import ( RetranslateEntry, TranslationError, @@ -34,6 +38,7 @@ "TranslationError", "VideoMetadata", "VisualDescriptionError", + "build_whisper_prompt", "describe_video", "download_video", "fetch_manual_subtitle", diff --git a/src/bilingualsub/core/glossary.py b/src/bilingualsub/core/glossary.py index 56e3cdf..0352e21 100644 --- a/src/bilingualsub/core/glossary.py +++ b/src/bilingualsub/core/glossary.py @@ -132,20 +132,3 @@ def format_for_prompt(self) -> str: "以下是術語表,請嚴格依照此表翻譯對應的專有名詞:\n" + "\n".join(lines) # noqa: RUF001 ) return self._prompt_cache - - -def extract_source_terms(formatted_text: str) -> list[str]: - """Extract source terms from formatted glossary prompt text. - - Parses text produced by GlossaryManager.format_for_prompt(), - which uses the format "source → target" per line. - """ - if not formatted_text: - return [] - terms: list[str] = [] - for line in formatted_text.strip().split("\n"): - if "→" in line: - source = line.split("→")[0].strip() - if source: - terms.append(source) - return terms diff --git a/src/bilingualsub/core/transcriber.py b/src/bilingualsub/core/transcriber.py index 83e48e0..597629f 100644 --- a/src/bilingualsub/core/transcriber.py +++ b/src/bilingualsub/core/transcriber.py @@ -20,19 +20,24 @@ class TranscriptionError(Exception): def build_whisper_prompt( video_title: str = "", - glossary_terms: list[str] | None = None, ) -> str | None: - parts: list[str] = [] - if video_title: - parts.append(video_title.strip()) - if glossary_terms: - parts.append(", ".join(glossary_terms)) - if not parts: + """Build a Whisper prompt from video title to improve transcription accuracy. + + Strips whitespace from the title, returns None if empty, and truncates + to ``_MAX_WHISPER_PROMPT_CHARS`` when the title exceeds the limit. + + Args: + video_title: Raw video title string. + + Returns: + Cleaned title string, or None if the title is blank. + """ + title = video_title.strip() + if not title: return None - prompt = ". ".join(parts) - if len(prompt) > _MAX_WHISPER_PROMPT_CHARS: - return prompt[:_MAX_WHISPER_PROMPT_CHARS] - return prompt + if len(title) > _MAX_WHISPER_PROMPT_CHARS: + return title[:_MAX_WHISPER_PROMPT_CHARS] + return title def _transcribe_single( @@ -84,15 +89,21 @@ def _transcribe_single( if not segments: raise TranscriptionError("Transcription returned no segments") - entries = [] - for i, seg in enumerate(segments, start=1): - entry = SubtitleEntry( + entries = [ + SubtitleEntry( index=i, start=timedelta(seconds=seg["start"]), end=timedelta(seconds=seg["end"]), text=seg["text"].strip(), ) - entries.append(entry) + for i, seg in enumerate( + (s for s in segments if s["start"] < s["end"] and s["text"].strip()), + start=1, + ) + ] + + if not entries: + raise TranscriptionError("No valid segments after filtering") return Subtitle(entries=entries) except TranscriptionError: diff --git a/src/bilingualsub/core/translator.py b/src/bilingualsub/core/translator.py index 4b529d6..d6082f2 100644 --- a/src/bilingualsub/core/translator.py +++ b/src/bilingualsub/core/translator.py @@ -27,6 +27,7 @@ _PARTIAL_CONTEXT_WINDOW = 2 _MAX_METADATA_TITLE_CHARS = 200 _MAX_METADATA_DESC_CHARS = 1200 +_GROQ_PREFIX = "groq:" _OPENAI_PREFIX = "openai:" _PROXY_PLACEHOLDER_API_KEY = "dummy" # pragma: allowlist secret @@ -66,7 +67,7 @@ def _ensure_translator_api_key(settings: Settings) -> None: ValueError: If required provider key is missing. """ model_str = settings.translator_model - if model_str.strip().lower().startswith("groq:"): + if model_str.strip().lower().startswith(_GROQ_PREFIX): get_groq_api_key() elif _is_openai_model(model_str) and not settings.openai_base_url: get_openai_api_key() @@ -81,12 +82,12 @@ def _build_model(settings: Settings) -> str | Model: (e.g. CLIProxyAPI) to be used without touching the Agno provider registry. In all other cases the raw model string is returned and Agno handles - provider resolution itself (existing behaviour). + provider resolution itself (existing behavior). """ model_str = settings.translator_model if _is_openai_model(model_str) and settings.openai_base_url: - # Slice original to preserve model ID casing - model_id = model_str[len(_OPENAI_PREFIX) :] + # _is_openai_model lowercases; slice original to preserve casing + model_id = model_str.strip()[len(_OPENAI_PREFIX) :] return OpenAIChat( id=model_id, base_url=settings.openai_base_url, diff --git a/src/bilingualsub/utils/config.py b/src/bilingualsub/utils/config.py index 8f6cced..ab26abe 100644 --- a/src/bilingualsub/utils/config.py +++ b/src/bilingualsub/utils/config.py @@ -27,7 +27,7 @@ class Settings(BaseSettings): @field_validator("openai_base_url") @classmethod def strip_trailing_slash(cls, v: str) -> str: - return v.rstrip("/") if v else v + return v.rstrip("/") transcriber_provider: str = "groq" transcriber_model: str = "whisper-large-v3-turbo" diff --git a/tests/unit/core/test_glossary.py b/tests/unit/core/test_glossary.py index cc84d55..e1f5c67 100644 --- a/tests/unit/core/test_glossary.py +++ b/tests/unit/core/test_glossary.py @@ -6,7 +6,6 @@ GlossaryEntry, GlossaryError, GlossaryManager, - extract_source_terms, ) @@ -124,22 +123,3 @@ def test_get_all_sorted_case_insensitive(self, tmp_path): manager.add("Alpha", "Alpha") entries = manager.get_all() assert [e.source for e in entries] == ["agent", "Alpha", "Zebra"] - - -@pytest.mark.unit -class TestExtractSourceTerms: - def test_given_empty_string_when_extract_then_returns_empty_list(self): - assert extract_source_terms("") == [] - - def test_given_formatted_glossary_when_extract_then_returns_source_terms(self): - text = "以下是術語表,請嚴格依照此表翻譯對應的專有名詞:\nClaude → 克勞德\nGPT → GPT" - result = extract_source_terms(text) - assert result == ["Claude", "GPT"] - - def test_given_single_entry_when_extract_then_returns_one_term(self): - text = "以下是術語表,請嚴格依照此表翻譯對應的專有名詞:\nWhisper → Whisper" - assert extract_source_terms(text) == ["Whisper"] - - def test_given_no_arrow_lines_when_extract_then_skips_them(self): - text = "Header line\nno arrow here\nClaude → 克勞德" - assert extract_source_terms(text) == ["Claude"] diff --git a/tests/unit/core/test_transcriber.py b/tests/unit/core/test_transcriber.py index a531aec..2a71939 100644 --- a/tests/unit/core/test_transcriber.py +++ b/tests/unit/core/test_transcriber.py @@ -83,7 +83,7 @@ def test_transcribe_valid_audio_file( # Verify transcription API was called mock_client.audio.transcriptions.create.assert_called_once() - call_kwargs = mock_client.audio.transcriptions.create.call_args[1] + call_kwargs = mock_client.audio.transcriptions.create.call_args.kwargs assert call_kwargs["model"] == "whisper-large-v3-turbo" assert call_kwargs["response_format"] == "verbose_json" assert call_kwargs["language"] == "en" @@ -119,7 +119,7 @@ def test_transcribe_with_chinese_language(self, tmp_path, mock_groq, monkeypatch result = transcribe_audio(audio_path, language="zh") # Verify language parameter was passed - call_kwargs = mock_client.audio.transcriptions.create.call_args[1] + call_kwargs = mock_client.audio.transcriptions.create.call_args.kwargs assert call_kwargs["language"] == "zh" # Verify result @@ -308,7 +308,7 @@ def test_various_audio_formats(self, tmp_path, mock_groq, monkeypatch): assert result.entries[0].text == "Test" # Verify correct filename was sent - call_kwargs = mock_client.audio.transcriptions.create.call_args[1] + call_kwargs = mock_client.audio.transcriptions.create.call_args.kwargs assert call_kwargs["file"][0] == f"audio{fmt}" def test_empty_api_key_raises_error(self, tmp_path, monkeypatch, no_env_file): @@ -341,7 +341,7 @@ def test_default_language_is_english(self, tmp_path, mock_groq, monkeypatch): transcribe_audio(audio_path) - call_kwargs = mock_client.audio.transcriptions.create.call_args[1] + call_kwargs = mock_client.audio.transcriptions.create.call_args.kwargs assert call_kwargs["language"] == "en" def test_transcribe_with_openai_provider( @@ -386,7 +386,7 @@ def test_transcribe_with_custom_model( transcribe_audio(audio_path) - call_kwargs = mock_client.audio.transcriptions.create.call_args[1] + call_kwargs = mock_client.audio.transcriptions.create.call_args.kwargs assert call_kwargs["model"] == "whisper-large-v3" def test_unknown_provider_raises_error(self, tmp_path, monkeypatch): @@ -450,24 +450,21 @@ def test_build_whisper_prompt_with_title_only(self): result = build_whisper_prompt(video_title="My Product Review") assert result == "My Product Review" - def test_build_whisper_prompt_with_terms_only(self): - result = build_whisper_prompt(glossary_terms=["Claude", "GPT"]) - assert result == "Claude, GPT" - - def test_build_whisper_prompt_with_title_and_terms(self): - result = build_whisper_prompt( - video_title="My Product Review", glossary_terms=["Claude", "GPT"] - ) - assert result == "My Product Review. Claude, GPT" + def test_build_whisper_prompt_with_whitespace_title(self): + result = build_whisper_prompt(video_title=" My Product Review ") + assert result == "My Product Review" def test_build_whisper_prompt_empty(self): result = build_whisper_prompt() assert result is None + def test_build_whisper_prompt_whitespace_only(self): + result = build_whisper_prompt(video_title=" ") + assert result is None + def test_build_whisper_prompt_truncates_long_input(self): long_title = "A" * 900 result = build_whisper_prompt(video_title=long_title) - assert result is not None assert len(result) == 800 def test_transcribe_single_passes_prompt_to_api( @@ -486,7 +483,7 @@ def test_transcribe_single_passes_prompt_to_api( transcribe_audio(audio_path, prompt="My Product Review. Claude, GPT") - call_kwargs = mock_client.audio.transcriptions.create.call_args[1] + call_kwargs = mock_client.audio.transcriptions.create.call_args.kwargs assert call_kwargs["prompt"] == "My Product Review. Claude, GPT" def test_transcribe_single_omits_prompt_when_none( @@ -505,5 +502,58 @@ def test_transcribe_single_omits_prompt_when_none( transcribe_audio(audio_path) - call_kwargs = mock_client.audio.transcriptions.create.call_args[1] + call_kwargs = mock_client.audio.transcriptions.create.call_args.kwargs assert "prompt" not in call_kwargs + + def test_transcribe_filters_zero_duration_segments( + self, tmp_path, mock_groq, monkeypatch + ): + monkeypatch.setenv("GROQ_API_KEY", "test-api-key") + + audio_path = tmp_path / "audio.mp3" + audio_path.write_bytes(b"fake audio content") + + response = Mock() + response.segments = [ + {"id": 0, "start": 0.0, "end": 2.0, "text": " Valid segment"}, + {"id": 1, "start": 3.0, "end": 3.0, "text": " Zero duration"}, + {"id": 2, "start": 5.0, "end": 4.0, "text": " Negative duration"}, + {"id": 3, "start": 6.0, "end": 6.5, "text": " "}, + {"id": 4, "start": 7.0, "end": 9.0, "text": " Another valid"}, + ] + + mock_client = MagicMock() + mock_groq.return_value = mock_client + mock_client.audio.transcriptions.create.return_value = response + + result = transcribe_audio(audio_path) + + assert len(result.entries) == 2 + assert result.entries[0].text == "Valid segment" + assert result.entries[1].text == "Another valid" + assert result.entries[0].index == 1 + assert result.entries[1].index == 2 + + def test_transcribe_raises_when_all_segments_filtered( + self, tmp_path, mock_groq, monkeypatch + ): + monkeypatch.setenv("GROQ_API_KEY", "test-api-key") + + audio_path = tmp_path / "audio.mp3" + audio_path.write_bytes(b"fake audio content") + + response = Mock() + response.segments = [ + {"id": 0, "start": 1.0, "end": 1.0, "text": " Zero duration"}, + {"id": 1, "start": 3.0, "end": 2.0, "text": " Negative"}, + {"id": 2, "start": 5.0, "end": 6.0, "text": " "}, + ] + + mock_client = MagicMock() + mock_groq.return_value = mock_client + mock_client.audio.transcriptions.create.return_value = response + + with pytest.raises( + TranscriptionError, match="No valid segments after filtering" + ): + transcribe_audio(audio_path) diff --git a/tests/unit/core/test_translator.py b/tests/unit/core/test_translator.py index f6bcebb..adefbca 100644 --- a/tests/unit/core/test_translator.py +++ b/tests/unit/core/test_translator.py @@ -778,7 +778,7 @@ def test_given_openai_model_with_base_url_when_build_model_then_returns_openai_c assert isinstance(result, OpenAIChat) assert result.id == "claude-sonnet-4-5" - assert str(result.base_url) == "http://localhost:3000/v1" + assert result.base_url == "http://localhost:3000/v1" def test_given_proxy_without_api_key_when_build_model_then_uses_placeholder( self, monkeypatch @@ -794,6 +794,19 @@ def test_given_proxy_without_api_key_when_build_model_then_uses_placeholder( assert isinstance(result, OpenAIChat) assert result.api_key == _PROXY_PLACEHOLDER_API_KEY + def test_given_base_url_with_trailing_slash_when_build_model_then_slash_stripped( + self, monkeypatch + ): + monkeypatch.setenv("TRANSLATOR_MODEL", "openai:gpt-4o") + monkeypatch.setenv("OPENAI_API_KEY", "test-key") + monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:3000/v1/") + get_settings.cache_clear() + + result = _build_model(get_settings()) + + assert isinstance(result, OpenAIChat) + assert result.base_url == "http://localhost:3000/v1" + def test_given_proxy_without_api_key_when_translate_subtitle_then_no_value_error( self, monkeypatch ): @@ -821,6 +834,7 @@ def test_given_proxy_without_api_key_when_translate_subtitle_then_no_value_error mock_response.content = "1. 你好" mock_translator.run.return_value = mock_response + # Primary assertion: no ValueError raised — proxy skips API key check translate_subtitle(subtitle) model_arg = mock_agent.call_args.kwargs["model"] diff --git a/uv.lock b/uv.lock index c39dbe2..800f909 100644 --- a/uv.lock +++ b/uv.lock @@ -115,6 +115,13 @@ e2e = [ { name = "pytest-playwright" }, ] +[package.dev-dependencies] +dev = [ + { name = "mypy" }, + { name = "pytest-asyncio" }, + { name = "pytest-cov" }, +] + [package.metadata] requires-dist = [ { name = "agno", specifier = ">=1.0.0" }, @@ -150,6 +157,13 @@ requires-dist = [ ] provides-extras = ["dev", "e2e"] +[package.metadata.requires-dev] +dev = [ + { name = "mypy", specifier = ">=1.19.1" }, + { name = "pytest-asyncio", specifier = ">=1.3.0" }, + { name = "pytest-cov", specifier = ">=7.0.0" }, +] + [[package]] name = "boolean-py" version = "5.0" From 96e075fa9dad7991db5b632094f7da7c5c79a98d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maple=EF=BC=81?= Date: Fri, 3 Jul 2026 10:52:40 +0800 Subject: [PATCH 04/15] fix: strengthen subtitle retranslation context --- src/bilingualsub/core/translator.py | 12 ++++++++---- tests/unit/core/test_translator.py | 6 +++++- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/bilingualsub/core/translator.py b/src/bilingualsub/core/translator.py index d6082f2..21e985b 100644 --- a/src/bilingualsub/core/translator.py +++ b/src/bilingualsub/core/translator.py @@ -21,10 +21,10 @@ logger = structlog.get_logger() _BATCH_SIZE = 10 -_CONTEXT_SIZE = 3 # Number of previous entries to include as context +_CONTEXT_SIZE = 5 # Number of previous entries to include as context _LOOKAHEAD_SIZE = 3 # Number of upcoming entries to include as forward context _MAX_RETRIES = 5 -_PARTIAL_CONTEXT_WINDOW = 2 +_PARTIAL_CONTEXT_WINDOW = 5 _MAX_METADATA_TITLE_CHARS = 200 _MAX_METADATA_DESC_CHARS = 1200 _GROQ_PREFIX = "groq:" @@ -259,7 +259,9 @@ def _translate_batch( prompt = ( f"{context_section}" f"將以下編號字幕從{source_lang}翻譯成{target_lang}。\n" - f"只回傳編號翻譯,每行一條,編號與原文一致。\n\n" # noqa: RUF001 + f"只回傳編號翻譯,每行一條,編號與原文一致。\n" # noqa: RUF001 + "若原文專有名詞疑似語音辨識錯字,請依上文、下文、影片背景與術語表修正後翻譯。" # noqa: RUF001 + "例如同一影片已出現的品牌、人名、產品名與網域應保持一致。\n\n" f"{numbered_lines}" f"{lookahead_section}" ) @@ -569,7 +571,9 @@ def retranslate_entries( prompt_sections = "\n\n".join(sections) prompt = (f"{prompt_sections}\n\n" if prompt_sections else "") + ( f"請將以下字幕從{source_lang}翻譯成{target_lang}。\n" - "只回傳單行翻譯內容,不要加編號、引號或任何說明。\n\n" # noqa: RUF001 + "只回傳單行翻譯內容,不要加編號、引號或任何說明。\n" # noqa: RUF001 + "若原文專有名詞疑似語音辨識錯字,請依上文、下文、影片背景、術語表與使用者補充上下文修正後翻譯。" # noqa: RUF001 + "例如同一影片已出現的品牌、人名、產品名與網域應保持一致。\n\n" f"原文: {target_entry.original}\n" f"目前翻譯(可修正): {target_entry.translated or '(空)'}" # noqa: RUF001 ) diff --git a/tests/unit/core/test_translator.py b/tests/unit/core/test_translator.py index adefbca..2fc1edd 100644 --- a/tests/unit/core/test_translator.py +++ b/tests/unit/core/test_translator.py @@ -460,10 +460,13 @@ def make_response(*args, **kwargs): assert mock_translator.run.call_count == 2 second_prompt = mock_translator.run.call_args_list[1][0][0] assert "上文參考" in second_prompt - # Should contain entries from the end of first batch (last 3) + # Should contain entries from the end of first batch (last 5) + assert "Line 6" in second_prompt + assert "Line 7" in second_prompt assert "Line 8" in second_prompt assert "Line 9" in second_prompt assert "Line 10" in second_prompt + assert "語音辨識錯字" in second_prompt @pytest.mark.unit def test_context_contains_original_and_translated(self): @@ -726,6 +729,7 @@ def test_retranslate_entries_with_context(self): assert "上文參考" in prompt assert "下文參考" in prompt assert "主題是太空探索" in prompt + assert "語音辨識錯字" in prompt def test_retranslate_entries_invalid_index_raises_error(self): entries = [RetranslateEntry(index=1, original="Line 1", translated="第一句")] From afc6467ad63e86865d38d1de7bf46ba1f415127e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maple=EF=BC=81?= Date: Fri, 3 Jul 2026 11:12:54 +0800 Subject: [PATCH 05/15] fix: use CJK font fallback for intro text --- src/bilingualsub/utils/ffmpeg.py | 7 ++++--- tests/unit/utils/test_ffmpeg_intro.py | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/bilingualsub/utils/ffmpeg.py b/src/bilingualsub/utils/ffmpeg.py index e00fbd0..9e96fee 100644 --- a/src/bilingualsub/utils/ffmpeg.py +++ b/src/bilingualsub/utils/ffmpeg.py @@ -19,6 +19,7 @@ _FONT_EN_BOLD = _ASSETS_DIR / "LINESeedSans_Bd.ttf" _FONT_ZH_REGULAR = _ASSETS_DIR / "NotoSansTC-Regular.ttf" _FONT_ZH_BOLD = _ASSETS_DIR / "NotoSansTC-Bold.ttf" +_FONT_ZH_FALLBACK = "Noto Sans CJK TC" def _font_arg(fontfile: Path, fallback_name: str) -> str: @@ -590,7 +591,7 @@ def _next_start() -> float: blocks.append( _dt( "原始影片來自", - _font_arg(_FONT_ZH_REGULAR, "serif"), + _font_arg(_FONT_ZH_REGULAR, _FONT_ZH_FALLBACK), max(1, int(height / 42)), "white@0.6", x_left, @@ -636,7 +637,7 @@ def _next_start() -> float: blocks.append( _dt( video_title, - _font_arg(_FONT_ZH_REGULAR, "serif"), + _font_arg(_FONT_ZH_REGULAR, _FONT_ZH_FALLBACK), max(1, int(height / 34)), "white@0.7", x_left, @@ -673,7 +674,7 @@ def _next_start() -> float: blocks.append( _dt( line, - _font_arg(_FONT_ZH_REGULAR, "serif"), + _font_arg(_FONT_ZH_REGULAR, _FONT_ZH_FALLBACK), max(1, int(height / 45)), "white@0.45", x_left, diff --git a/tests/unit/utils/test_ffmpeg_intro.py b/tests/unit/utils/test_ffmpeg_intro.py index 4ccb131..e0a4336 100644 --- a/tests/unit/utils/test_ffmpeg_intro.py +++ b/tests/unit/utils/test_ffmpeg_intro.py @@ -238,6 +238,30 @@ def test_when_channel_url_empty_then_vf_does_not_contain_channel_url_value( # Exactly 12 drawtext blocks when channel_url is omitted (13 when present) assert vf_value.count("drawtext=") == 12 + def test_generate_intro_uses_cjk_font_fallback_for_chinese_text( + self, tmp_path: Path, mock_intro_ffmpeg: dict + ) -> None: + """Chinese intro text should target an installed CJK font, not generic serif.""" + output_path = tmp_path / "intro.mp4" + + generate_intro( + output_path, + width=1280, + height=720, + fps=30.0, + channel="ClaudeDevs", + video_title="ClaudeDevs - Artifacts in Claude Code", + video_url="https://x.com/ClaudeDevs/status/2072770790114914317?s=20", + channel_url="https://x.com/ClaudeDevs", + ) + + cmd = _get_popen_cmd(mock_intro_ffmpeg["popen"]) + vf_idx = cmd.index("-vf") + vf_value = cmd[vf_idx + 1] + + assert "font='Noto Sans CJK TC'" in vf_value + assert "font='serif'" not in vf_value + def test_when_ffmpeg_fails_then_raises_ffmpeg_error( self, tmp_path: Path, mock_intro_ffmpeg: dict ) -> None: From 257e9a954eeebef050aaa08a7018e368e807dd2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maple=EF=BC=81?= Date: Fri, 3 Jul 2026 11:53:34 +0800 Subject: [PATCH 06/15] feat: add CLIProxyAPI compose setup --- .env.example | 11 +++++++++++ .gitignore | 4 ++++ README.md | 47 ++++++++++++++++++++++++++++++++++++++++++++++ cliproxyapi.conf | 25 ++++++++++++++++++++++++ docker-compose.yml | 27 ++++++++++++++++++++++++++ 5 files changed, 114 insertions(+) create mode 100644 cliproxyapi.conf create mode 100644 docker-compose.yml diff --git a/.env.example b/.env.example index 52bb86d..60c5ec9 100644 --- a/.env.example +++ b/.env.example @@ -8,9 +8,20 @@ TRANSCRIBER_MODEL=whisper-large-v3-turbo # groq:openai/gpt-oss-120b (cloud, default) # groq:llama-3.3-70b-versatile (cloud, stable) # openai:gpt-4o-mini (cloud, reliable) +# openai:bilingualsub-gemini-flash (Docker Compose via CLIProxyAPI) # ollama:TwinkleAI/gemma-3-4B-T1-it (local, free) TRANSLATOR_MODEL=groq:openai/gpt-oss-120b +# === CLIProxyAPI (used by docker-compose.yml) === +# 1. Run host OAuth login first: cliproxyapi -antigravity-login +# 2. docker-compose.yml mounts this auth directory into the cli-proxy container. +# Leave CLIPROXY_AUTH_DIR unset to use ${HOME}/.cli-proxy-api. +# Set an absolute path only if your CLIProxyAPI auth directory is elsewhere. +# CLIPROXY_AUTH_DIR=/Users/you/.cli-proxy-api +CLIPROXY_API_KEY=bilingualsub-local +CLIPROXY_PORT=8317 +BILINGUALSUB_PORT=7860 + # === API Keys (only needed for cloud providers) === GROQ_API_KEY= OPENAI_API_KEY= diff --git a/.gitignore b/.gitignore index 781f01e..260b9e6 100644 --- a/.gitignore +++ b/.gitignore @@ -91,6 +91,10 @@ frontend/.vite/ # Logs *.log logs/ +cliproxy-logs/ + +# CLIProxyAPI OAuth tokens +.cli-proxy-api/ # Temporary tmp/ diff --git a/README.md b/README.md index c2f7f9c..cf20f3c 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,52 @@ docker build -t bilingualsub . && docker run -p 7860:7860 -e GROQ_API_KEY=your_k Then open http://localhost:7860 in your browser. +### Docker Compose with CLIProxyAPI + +Use this path when you want translations to go through a local CLIProxyAPI +container backed by your own Antigravity/Codex/Claude OAuth login. + +First, install CLIProxyAPI on the host and log in. This creates OAuth token files +under `~/.cli-proxy-api`, which are mounted read/write into the proxy container: + +```bash +cliproxyapi -antigravity-login +``` + +Create a local `.env` from the example and set at least `GROQ_API_KEY`: + +```bash +cp .env.example .env +``` + +For the compose setup, use an OpenAI-compatible proxy model: + +```env +TRANSLATOR_MODEL=openai:bilingualsub-gemini-flash +CLIPROXY_API_KEY=bilingualsub-local +# Optional: set only when your auth directory is not ~/.cli-proxy-api +# CLIPROXY_AUTH_DIR=/absolute/path/to/.cli-proxy-api +``` + +Then start both services: + +```bash +docker compose up --build +``` + +BilingualSub runs at http://localhost:7860. It talks to CLIProxyAPI through the +compose network at `http://cli-proxy:8317/v1`, so OAuth tokens are never baked +into the image or committed to the repository. + +The default alias maps to Antigravity's `gemini-3.5-flash-low`, which is the +most consistently discoverable Flash variant in current CLIProxyAPI releases. +If the alias does not exist in your version, list the available proxy models +and set `TRANSLATOR_MODEL=openai:` in `.env`: + +```bash +curl -H "Authorization: Bearer bilingualsub-local" http://localhost:8317/v1/models +``` + ### Local Development **Prerequisites**: Python 3.11+, FFmpeg, Node.js 18+, pnpm @@ -56,6 +102,7 @@ Backend runs at http://localhost:8000, frontend at http://localhost:5173. | `TRANSCRIBER_PROVIDER` | Transcription provider | `groq` | No | | `TRANSCRIBER_MODEL` | Whisper model to use | `whisper-large-v3-turbo` | No | | `TRANSLATOR_MODEL` | LLM model for translation | `groq:openai/gpt-oss-120b` | No | +| `OPENAI_BASE_URL` | OpenAI-compatible proxy URL | - | No | ## Architecture diff --git a/cliproxyapi.conf b/cliproxyapi.conf new file mode 100644 index 0000000..3e05a79 --- /dev/null +++ b/cliproxyapi.conf @@ -0,0 +1,25 @@ +# Minimal CLIProxyAPI config for BilingualSub's Docker Compose setup. +# OAuth credentials are discovered from auth-dir, mounted from the host's +# ~/.cli-proxy-api directory by docker-compose.yml. + +host: "" +port: 8317 +auth-dir: "/root/.cli-proxy-api" + +api-keys: + - "bilingualsub-local" + +debug: false +logging-to-file: false +usage-statistics-enabled: false + +# Optional alias used by docker-compose.yml's default TRANSLATOR_MODEL. +# If CLIProxyAPI changes Antigravity upstream model IDs, set TRANSLATOR_MODEL +# in .env to one of the IDs returned by: +# curl -H "Authorization: Bearer bilingualsub-local" http://localhost:8317/v1/models +oauth-model-alias: + antigravity: + - name: "gemini-3.5-flash-low" + alias: "bilingualsub-gemini-flash" + fork: true + force-mapping: true diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..b22e0f3 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,27 @@ +services: + cli-proxy: + image: eceasy/cli-proxy-api:latest + restart: unless-stopped + ports: + - '${CLIPROXY_PORT:-8317}:8317' + volumes: + - '${CLIPROXY_AUTH_DIR:-${HOME}/.cli-proxy-api}:/root/.cli-proxy-api' + - './cliproxyapi.conf:/CLIProxyAPI/config.yaml:ro' + command: ['-config', '/CLIProxyAPI/config.yaml'] + + bilingualsub: + build: . + image: bilingualsub:latest + restart: unless-stopped + ports: + - '${BILINGUALSUB_PORT:-7860}:7860' + environment: + GROQ_API_KEY: '${GROQ_API_KEY:?Set GROQ_API_KEY in .env or your shell}' + GEMINI_API_KEY: '${GEMINI_API_KEY:-}' + TRANSCRIBER_PROVIDER: '${TRANSCRIBER_PROVIDER:-groq}' + TRANSCRIBER_MODEL: '${TRANSCRIBER_MODEL:-whisper-large-v3-turbo}' + TRANSLATOR_MODEL: '${TRANSLATOR_MODEL:-openai:bilingualsub-gemini-flash}' + OPENAI_BASE_URL: 'http://cli-proxy:8317/v1' + OPENAI_API_KEY: '${CLIPROXY_API_KEY:-bilingualsub-local}' + depends_on: + - cli-proxy From ba4f765d2bd1772f221aed9d817e7429772d7182 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maple=EF=BC=81?= Date: Fri, 3 Jul 2026 12:00:21 +0800 Subject: [PATCH 07/15] fix: harden CLIProxyAPI compose setup --- .env.example | 1 - README.md | 3 +- README.zh-TW.md | 46 +++++++++++++++++ docker-compose.yml | 6 +-- pyproject.toml | 7 --- tests/unit/api/test_pipeline.py | 1 + tests/unit/core/test_transcriber.py | 3 +- tests/unit/core/test_translator.py | 73 ++++++++++++--------------- tests/unit/utils/test_ffmpeg_intro.py | 6 ++- uv.lock | 14 ----- 10 files changed, 92 insertions(+), 68 deletions(-) diff --git a/.env.example b/.env.example index 60c5ec9..099c69e 100644 --- a/.env.example +++ b/.env.example @@ -18,7 +18,6 @@ TRANSLATOR_MODEL=groq:openai/gpt-oss-120b # Leave CLIPROXY_AUTH_DIR unset to use ${HOME}/.cli-proxy-api. # Set an absolute path only if your CLIProxyAPI auth directory is elsewhere. # CLIPROXY_AUTH_DIR=/Users/you/.cli-proxy-api -CLIPROXY_API_KEY=bilingualsub-local CLIPROXY_PORT=8317 BILINGUALSUB_PORT=7860 diff --git a/README.md b/README.md index cf20f3c..2d18676 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,6 @@ For the compose setup, use an OpenAI-compatible proxy model: ```env TRANSLATOR_MODEL=openai:bilingualsub-gemini-flash -CLIPROXY_API_KEY=bilingualsub-local # Optional: set only when your auth directory is not ~/.cli-proxy-api # CLIPROXY_AUTH_DIR=/absolute/path/to/.cli-proxy-api ``` @@ -63,6 +62,8 @@ docker compose up --build BilingualSub runs at http://localhost:7860. It talks to CLIProxyAPI through the compose network at `http://cli-proxy:8317/v1`, so OAuth tokens are never baked into the image or committed to the repository. +The proxy port is bound to `127.0.0.1` only; the compose stack uses the fixed +local bearer key `bilingualsub-local` internally. The default alias maps to Antigravity's `gemini-3.5-flash-low`, which is the most consistently discoverable Flash variant in current CLIProxyAPI releases. diff --git a/README.zh-TW.md b/README.zh-TW.md index 73489f5..4794b0a 100644 --- a/README.zh-TW.md +++ b/README.zh-TW.md @@ -27,6 +27,51 @@ docker build -t bilingualsub . && docker run -p 7860:7860 -e GROQ_API_KEY=your_k 然後在瀏覽器開啟 http://localhost:7860。 +### 使用 CLIProxyAPI 的 Docker Compose + +如果你想讓翻譯走本機 CLIProxyAPI container,並使用自己的 +Antigravity/Codex/Claude OAuth 登入狀態,使用這個流程。 + +先在 host 安裝 CLIProxyAPI 並登入。OAuth token 會建立在 +`~/.cli-proxy-api`,之後由 compose 掛進 proxy container: + +```bash +cliproxyapi -antigravity-login +``` + +從範例建立本機 `.env`,並至少設定 `GROQ_API_KEY`: + +```bash +cp .env.example .env +``` + +Compose 模式請使用 OpenAI-compatible proxy model: + +```env +TRANSLATOR_MODEL=openai:bilingualsub-gemini-flash +# 選填:只有 auth 目錄不是 ~/.cli-proxy-api 時才需要設定 +# CLIPROXY_AUTH_DIR=/absolute/path/to/.cli-proxy-api +``` + +啟動兩個服務: + +```bash +docker compose up --build +``` + +BilingualSub 會跑在 http://localhost:7860。它會透過 compose network 連到 +`http://cli-proxy:8317/v1`,OAuth token 不會被打包進 image,也不會 commit +到 repo。proxy 對 host 只綁定 `127.0.0.1`;compose stack 內部固定使用本機 +bearer key `bilingualsub-local`。 + +預設 alias 對應 Antigravity 的 `gemini-3.5-flash-low`,這是目前 CLIProxyAPI +版本中較穩定可發現的 Flash 變體。如果你的版本沒有這個 alias,可以列出可用 +模型,並在 `.env` 設定 `TRANSLATOR_MODEL=openai:`: + +```bash +curl -H "Authorization: Bearer bilingualsub-local" http://localhost:8317/v1/models +``` + ### 本地開發 **前置需求**:Python 3.11+、FFmpeg、Node.js 18+、pnpm @@ -56,6 +101,7 @@ cd frontend && pnpm dev | `TRANSCRIBER_PROVIDER` | 語音辨識供應商 | `groq` | 否 | | `TRANSCRIBER_MODEL` | 使用的 Whisper 模型 | `whisper-large-v3-turbo` | 否 | | `TRANSLATOR_MODEL` | 翻譯用的 LLM 模型 | `groq:openai/gpt-oss-120b` | 否 | +| `OPENAI_BASE_URL` | OpenAI-compatible proxy URL | - | 否 | ## 架構說明 diff --git a/docker-compose.yml b/docker-compose.yml index b22e0f3..e3feec2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,11 +3,11 @@ services: image: eceasy/cli-proxy-api:latest restart: unless-stopped ports: - - '${CLIPROXY_PORT:-8317}:8317' + - '127.0.0.1:${CLIPROXY_PORT:-8317}:8317' volumes: - '${CLIPROXY_AUTH_DIR:-${HOME}/.cli-proxy-api}:/root/.cli-proxy-api' - './cliproxyapi.conf:/CLIProxyAPI/config.yaml:ro' - command: ['-config', '/CLIProxyAPI/config.yaml'] + command: ['./CLIProxyAPI', '-config', '/CLIProxyAPI/config.yaml'] bilingualsub: build: . @@ -22,6 +22,6 @@ services: TRANSCRIBER_MODEL: '${TRANSCRIBER_MODEL:-whisper-large-v3-turbo}' TRANSLATOR_MODEL: '${TRANSLATOR_MODEL:-openai:bilingualsub-gemini-flash}' OPENAI_BASE_URL: 'http://cli-proxy:8317/v1' - OPENAI_API_KEY: '${CLIPROXY_API_KEY:-bilingualsub-local}' + OPENAI_API_KEY: 'bilingualsub-local' # pragma: allowlist secret depends_on: - cli-proxy diff --git a/pyproject.toml b/pyproject.toml index e973a41..1f558cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -200,10 +200,3 @@ sort_by_size = true targets = ["src/bilingualsub"] exclude_dirs = ["tests", ".venv"] skips = ["B101"] # Skip assert warnings (used in tests) - -[dependency-groups] -dev = [ - "mypy>=1.19.1", - "pytest-asyncio>=1.3.0", - "pytest-cov>=7.0.0", -] diff --git a/tests/unit/api/test_pipeline.py b/tests/unit/api/test_pipeline.py index 6cc3ea2..b64e3b6 100644 --- a/tests/unit/api/test_pipeline.py +++ b/tests/unit/api/test_pipeline.py @@ -106,6 +106,7 @@ async def test_successful_pipeline( translate_call_kwargs = mock_translate.call_args.kwargs assert "on_progress" in translate_call_kwargs assert callable(translate_call_kwargs["on_progress"]) + assert mock_transcribe.call_args.kwargs["prompt"] == "Test Video" @patch("bilingualsub.api.pipeline.download_video") async def test_download_error(self, mock_download) -> None: diff --git a/tests/unit/core/test_transcriber.py b/tests/unit/core/test_transcriber.py index 2a71939..b0d9222 100644 --- a/tests/unit/core/test_transcriber.py +++ b/tests/unit/core/test_transcriber.py @@ -463,8 +463,9 @@ def test_build_whisper_prompt_whitespace_only(self): assert result is None def test_build_whisper_prompt_truncates_long_input(self): - long_title = "A" * 900 + long_title = "".join(str(i % 10) for i in range(900)) result = build_whisper_prompt(video_title=long_title) + assert result == long_title[:800] assert len(result) == 800 def test_transcribe_single_passes_prompt_to_api( diff --git a/tests/unit/core/test_translator.py b/tests/unit/core/test_translator.py index 2fc1edd..7f96490 100644 --- a/tests/unit/core/test_translator.py +++ b/tests/unit/core/test_translator.py @@ -11,7 +11,6 @@ _PROXY_PLACEHOLDER_API_KEY, RetranslateEntry, TranslationError, - _build_model, _parse_batch_response, retranslate_entries, translate_subtitle, @@ -739,7 +738,7 @@ def test_retranslate_entries_invalid_index_raises_error(self): @pytest.mark.unit class TestBuildModel: - """Test cases for _build_model helper.""" + """Test cases for translator model selection behavior.""" @pytest.fixture(autouse=True) def _clear_settings_cache(self): @@ -747,18 +746,40 @@ def _clear_settings_cache(self): yield get_settings.cache_clear() - def test_given_groq_model_when_build_model_then_returns_raw_string( + def _translate_one_entry_with_agent_mock(self): + entries = [ + SubtitleEntry( + index=1, + start=timedelta(seconds=0), + end=timedelta(seconds=2), + text="Hello", + ) + ] + subtitle = Subtitle(entries=entries) + + with patch("bilingualsub.core.translator.Agent") as mock_agent: + mock_translator = Mock() + mock_agent.return_value = mock_translator + mock_response = Mock() + mock_response.content = "1. 你好" + mock_translator.run.return_value = mock_response + + translate_subtitle(subtitle) + + return mock_agent.call_args.kwargs["model"] + + def test_given_groq_model_when_translate_subtitle_then_uses_raw_model_string( self, monkeypatch ): monkeypatch.setenv("TRANSLATOR_MODEL", "groq:openai/gpt-oss-120b") monkeypatch.delenv("OPENAI_BASE_URL", raising=False) get_settings.cache_clear() - result = _build_model(get_settings()) + model_arg = self._translate_one_entry_with_agent_mock() - assert result == "groq:openai/gpt-oss-120b" + assert model_arg == "groq:openai/gpt-oss-120b" - def test_given_openai_model_without_base_url_when_build_model_then_returns_raw_string( + def test_given_openai_model_without_base_url_when_translate_then_uses_raw_string( self, monkeypatch ): monkeypatch.setenv("TRANSLATOR_MODEL", "openai:gpt-4o") @@ -766,39 +787,11 @@ def test_given_openai_model_without_base_url_when_build_model_then_returns_raw_s monkeypatch.delenv("OPENAI_BASE_URL", raising=False) get_settings.cache_clear() - result = _build_model(get_settings()) - - assert result == "openai:gpt-4o" - - def test_given_openai_model_with_base_url_when_build_model_then_returns_openai_chat( - self, monkeypatch - ): - monkeypatch.setenv("TRANSLATOR_MODEL", "openai:claude-sonnet-4-5") - monkeypatch.setenv("OPENAI_API_KEY", "cli-token") - monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:3000/v1") - get_settings.cache_clear() - - result = _build_model(get_settings()) - - assert isinstance(result, OpenAIChat) - assert result.id == "claude-sonnet-4-5" - assert result.base_url == "http://localhost:3000/v1" - - def test_given_proxy_without_api_key_when_build_model_then_uses_placeholder( - self, monkeypatch - ): - # Guards: proxy users without a real OpenAI key must not get auth errors - monkeypatch.setenv("TRANSLATOR_MODEL", "openai:any-model") - monkeypatch.delenv("OPENAI_API_KEY", raising=False) - monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:3000/v1") - get_settings.cache_clear() - - result = _build_model(get_settings()) + model_arg = self._translate_one_entry_with_agent_mock() - assert isinstance(result, OpenAIChat) - assert result.api_key == _PROXY_PLACEHOLDER_API_KEY + assert model_arg == "openai:gpt-4o" - def test_given_base_url_with_trailing_slash_when_build_model_then_slash_stripped( + def test_given_base_url_with_trailing_slash_when_translate_then_slash_stripped( self, monkeypatch ): monkeypatch.setenv("TRANSLATOR_MODEL", "openai:gpt-4o") @@ -806,10 +799,10 @@ def test_given_base_url_with_trailing_slash_when_build_model_then_slash_stripped monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:3000/v1/") get_settings.cache_clear() - result = _build_model(get_settings()) + model_arg = self._translate_one_entry_with_agent_mock() - assert isinstance(result, OpenAIChat) - assert result.base_url == "http://localhost:3000/v1" + assert isinstance(model_arg, OpenAIChat) + assert model_arg.base_url == "http://localhost:3000/v1" def test_given_proxy_without_api_key_when_translate_subtitle_then_no_value_error( self, monkeypatch diff --git a/tests/unit/utils/test_ffmpeg_intro.py b/tests/unit/utils/test_ffmpeg_intro.py index e0a4336..6af1015 100644 --- a/tests/unit/utils/test_ffmpeg_intro.py +++ b/tests/unit/utils/test_ffmpeg_intro.py @@ -239,10 +239,14 @@ def test_when_channel_url_empty_then_vf_does_not_contain_channel_url_value( assert vf_value.count("drawtext=") == 12 def test_generate_intro_uses_cjk_font_fallback_for_chinese_text( - self, tmp_path: Path, mock_intro_ffmpeg: dict + self, tmp_path: Path, mock_intro_ffmpeg: dict, monkeypatch: pytest.MonkeyPatch ) -> None: """Chinese intro text should target an installed CJK font, not generic serif.""" output_path = tmp_path / "intro.mp4" + monkeypatch.setattr( + "bilingualsub.utils.ffmpeg._FONT_ZH_REGULAR", + tmp_path / "missing-noto-sans-tc.ttf", + ) generate_intro( output_path, diff --git a/uv.lock b/uv.lock index 800f909..c39dbe2 100644 --- a/uv.lock +++ b/uv.lock @@ -115,13 +115,6 @@ e2e = [ { name = "pytest-playwright" }, ] -[package.dev-dependencies] -dev = [ - { name = "mypy" }, - { name = "pytest-asyncio" }, - { name = "pytest-cov" }, -] - [package.metadata] requires-dist = [ { name = "agno", specifier = ">=1.0.0" }, @@ -157,13 +150,6 @@ requires-dist = [ ] provides-extras = ["dev", "e2e"] -[package.metadata.requires-dev] -dev = [ - { name = "mypy", specifier = ">=1.19.1" }, - { name = "pytest-asyncio", specifier = ">=1.3.0" }, - { name = "pytest-cov", specifier = ">=7.0.0" }, -] - [[package]] name = "boolean-py" version = "5.0" From 461488be284d31e960f948a17eeed9b618bb4024 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maple=EF=BC=81?= Date: Fri, 3 Jul 2026 14:50:45 +0800 Subject: [PATCH 08/15] Extend partial retranslation results --- src/bilingualsub/api/routes.py | 10 +- src/bilingualsub/api/schemas.py | 1 + src/bilingualsub/core/__init__.py | 2 + src/bilingualsub/core/translator.py | 348 ++++++++++++++++++++++------ tests/unit/api/test_routes.py | 17 +- tests/unit/api/test_schemas.py | 14 ++ tests/unit/core/test_translator.py | 84 ++++++- 7 files changed, 398 insertions(+), 78 deletions(-) diff --git a/src/bilingualsub/api/routes.py b/src/bilingualsub/api/routes.py index 09c0306..b728937 100644 --- a/src/bilingualsub/api/routes.py +++ b/src/bilingualsub/api/routes.py @@ -377,8 +377,14 @@ async def partial_retranslate( return PartialRetranslateResponse( results=[ - PartialRetranslateItem(index=index, translated=translated) - for index, translated in sorted(results.items()) + PartialRetranslateItem( + index=index, + original=(result.original if hasattr(result, "original") else ""), + translated=( + result.translated if hasattr(result, "translated") else str(result) + ), + ) + for index, result in sorted(results.items()) ] ) diff --git a/src/bilingualsub/api/schemas.py b/src/bilingualsub/api/schemas.py index 06fa1dd..e61a4e6 100644 --- a/src/bilingualsub/api/schemas.py +++ b/src/bilingualsub/api/schemas.py @@ -108,6 +108,7 @@ class PartialRetranslateItem(BaseModel): """Single re-translated item.""" index: int + original: str translated: str diff --git a/src/bilingualsub/core/__init__.py b/src/bilingualsub/core/__init__.py index dceefbe..bf15e4d 100644 --- a/src/bilingualsub/core/__init__.py +++ b/src/bilingualsub/core/__init__.py @@ -16,6 +16,7 @@ ) from bilingualsub.core.translator import ( RetranslateEntry, + RetranslateResult, TranslationError, retranslate_entries, translate_subtitle, @@ -31,6 +32,7 @@ "GlossaryError", "GlossaryManager", "RetranslateEntry", + "RetranslateResult", "Subtitle", "SubtitleEntry", "SubtitleFetchError", diff --git a/src/bilingualsub/core/translator.py b/src/bilingualsub/core/translator.py index 21e985b..defeccb 100644 --- a/src/bilingualsub/core/translator.py +++ b/src/bilingualsub/core/translator.py @@ -4,6 +4,9 @@ import time from collections.abc import Callable from dataclasses import dataclass +from json import JSONDecodeError, loads +from typing import Any +from urllib.parse import urlparse import structlog from agno.agent import Agent @@ -53,6 +56,15 @@ class RetranslateEntry: translated: str = "" +@dataclass +class RetranslateResult: + """Structured result from partial re-translation.""" + + index: int + original: str + translated: str + + def _is_openai_model(model_str: str) -> bool: return model_str.strip().lower().startswith(_OPENAI_PREFIX) @@ -96,6 +108,28 @@ def _build_model(settings: Settings) -> str | Model: return model_str +def _model_log_metadata(settings: Settings) -> dict[str, str | None]: + """Return safe model metadata for structured logs.""" + model_str = settings.translator_model.strip() + provider_kind = "agno" + model_id = model_str + lower_model = model_str.lower() + if lower_model.startswith(_GROQ_PREFIX): + provider_kind = "groq" + model_id = model_str[len(_GROQ_PREFIX) :] + elif lower_model.startswith(_OPENAI_PREFIX): + provider_kind = "openai" + model_id = model_str[len(_OPENAI_PREFIX) :] + + parsed_base_url = urlparse(settings.openai_base_url or "") + base_url_host = parsed_base_url.hostname if parsed_base_url.hostname else None + return { + "model_id": model_id, + "provider_kind": provider_kind, + "base_url_host": base_url_host, + } + + def _compact_text(text: str) -> str: """Normalize whitespace while preserving readable punctuation.""" return re.sub(r"\s+", " ", text).strip() @@ -157,6 +191,74 @@ def _strip_number_prefix(text: str) -> str: return re.sub(r"^\s*\d+\s*[.):\uff0e]\s*", "", text, count=1) +def _strip_json_fence(text: str) -> str: + """Remove a Markdown JSON fence if the model wrapped the response.""" + stripped = text.strip() + if not stripped.startswith("```"): + return stripped + lines = stripped.splitlines() + if len(lines) >= 3 and lines[-1].strip() == "```": + return "\n".join(lines[1:-1]).strip() + return stripped + + +def _extract_retranslate_payload(payload: Any, expected_index: int) -> dict[str, Any]: + """Extract one re-translation object from supported JSON response shapes.""" + if isinstance(payload, dict): + if "results" in payload: + return _extract_retranslate_payload(payload["results"], expected_index) + if str(expected_index) in payload: + value = payload[str(expected_index)] + if isinstance(value, dict): + return {"index": expected_index, **value} + return {"index": expected_index, "translated": value} + if "original" in payload or "translated" in payload: + return payload + if isinstance(payload, list): + for item in payload: + if isinstance(item, dict) and item.get("index") == expected_index: + return item + if len(payload) == 1 and isinstance(payload[0], dict): + return payload[0] + raise TranslationError(f"Could not parse re-translation JSON for {expected_index}") + + +def _parse_retranslate_response( + response_text: str, + *, + expected_index: int, + fallback_original: str, +) -> RetranslateResult: + """Parse structured partial re-translation output, with plain-text fallback.""" + cleaned = _strip_json_fence(response_text) + try: + payload = _extract_retranslate_payload(loads(cleaned), expected_index) + except (JSONDecodeError, TranslationError) as err: + translated = _strip_number_prefix(response_text).strip() + if not translated: + raise TranslationError( + f"Empty re-translation response for entry {expected_index}" + ) from err + return RetranslateResult( + index=expected_index, + original=fallback_original, + translated=translated, + ) + + translated = str(payload.get("translated") or "").strip() + original = str(payload.get("original") or fallback_original).strip() + index = int(payload.get("index") or expected_index) + if index != expected_index: + raise TranslationError( + f"Expected re-translation index {expected_index}, got {index}" + ) + if not translated: + raise TranslationError( + f"Empty re-translation response for entry {expected_index}" + ) + return RetranslateResult(index=index, original=original, translated=translated) + + def _check_rate_limit(response_text: str) -> None: """Raise RateLimitError if response contains rate limit error. @@ -267,13 +369,19 @@ def _translate_batch( ) logger.debug( - "Batch translation prompt (entries %d-%d):\n%s", - batch[0].index, - batch[-1].index, - prompt, + "translation_batch_request", + source_lang=source_lang, + target_lang=target_lang, + entry_count=len(batch), + batch_start_index=batch[0].index, + batch_end_index=batch[-1].index, + has_context=bool(context), + lookahead_count=len(lookahead or []), ) + started_at = time.monotonic() response = translator.run(prompt) + duration_ms = round((time.monotonic() - started_at) * 1000) response_text = response.content.strip() if response.content else "" if not response_text: raise TranslationError("Empty batch translation response") @@ -281,10 +389,14 @@ def _translate_batch( _check_rate_limit(response_text) logger.debug( - "Batch translation response (entries %d-%d):\n%s", - batch[0].index, - batch[-1].index, - response_text, + "translation_batch_response", + source_lang=source_lang, + target_lang=target_lang, + entry_count=len(batch), + batch_start_index=batch[0].index, + batch_end_index=batch[-1].index, + duration_ms=duration_ms, + response_chars=len(response_text), ) return _parse_batch_response(response_text, len(batch)) @@ -328,10 +440,11 @@ def _translate_one_by_one( f"{entry.index}: {entry.text}" ) logger.debug( - "One-by-one translation for entry %d: '%s' -> '%s'", - entry.index, - entry.text, - translated_text, + "translation_one_by_one_entry_completed", + index=entry.index, + source_lang=source_lang, + target_lang=target_lang, + response_chars=len(translated_text), ) results.append(translated_text) return results @@ -373,6 +486,7 @@ def translate_subtitle( """ settings = get_settings() _ensure_translator_api_key(settings) + model_metadata = _model_log_metadata(settings) translator = Agent( model=_build_model(settings), description=_build_translator_description( @@ -386,16 +500,29 @@ def translate_subtitle( entries = subtitle.entries translated_texts: list[str] = [] + logger.info( + "translation_started", + **model_metadata, + source_lang=source_lang, + target_lang=target_lang, + entry_count=len(entries), + batch_size=_BATCH_SIZE, + ) + started_at = time.monotonic() for i in range(0, len(entries), _BATCH_SIZE): batch = entries[i : i + _BATCH_SIZE] logger.debug( - "Processing batch %d/%d (entries %d-%d)", - i // _BATCH_SIZE + 1, - (len(entries) + _BATCH_SIZE - 1) // _BATCH_SIZE, - batch[0].index, - batch[-1].index, + "translation_batch_started", + **model_metadata, + source_lang=source_lang, + target_lang=target_lang, + batch_number=i // _BATCH_SIZE + 1, + batch_count=(len(entries) + _BATCH_SIZE - 1) // _BATCH_SIZE, + batch_start_index=batch[0].index, + batch_end_index=batch[-1].index, + entry_count=len(batch), ) # Collect context from previously translated entries @@ -431,16 +558,19 @@ def translate_subtitle( except (TranslationError, Exception) as exc: # Fallback to one-by-one for non-rate-limit errors logger.warning( - "Batch translation failed for entries %d-%d, " - "falling back to one-by-one: %s", - i + 1, - i + len(batch), - exc, + "translation_batch_fallback", + **model_metadata, + source_lang=source_lang, + target_lang=target_lang, + batch_start_index=batch[0].index, + batch_end_index=batch[-1].index, + entry_count=len(batch), + error_type=type(exc).__name__, ) logger.debug( - "Falling back to one-by-one for entries %d-%d", - i + 1, - i + len(batch), + "translation_one_by_one_fallback_started", + batch_start_index=batch[0].index, + batch_end_index=batch[-1].index, ) batch_translations = _translate_one_by_one( translator, batch, source_lang, target_lang @@ -454,12 +584,13 @@ def translate_subtitle( except RateLimitError as exc: if attempt < _MAX_RETRIES: logger.warning( - "Rate limited at entries %d-%d (attempt %d/%d), waiting %.0fs", - batch[0].index, - batch[-1].index, - attempt + 1, - _MAX_RETRIES, - exc.retry_after, + "translation_rate_limited", + **model_metadata, + batch_start_index=batch[0].index, + batch_end_index=batch[-1].index, + attempt=attempt + 1, + max_retries=_MAX_RETRIES, + retry_after_seconds=exc.retry_after, ) if on_rate_limit is not None: on_rate_limit(exc.retry_after, attempt + 1, _MAX_RETRIES) @@ -484,9 +615,61 @@ def translate_subtitle( for entry, text in zip(entries, translated_texts, strict=True) ] + logger.info( + "translation_completed", + **model_metadata, + source_lang=source_lang, + target_lang=target_lang, + entry_count=len(entries), + duration_ms=round((time.monotonic() - started_at) * 1000), + ) + return Subtitle(entries=translated_entries) +def _build_retranslate_prompt( + *, + target_entry: RetranslateEntry, + prev_entries: list[RetranslateEntry], + next_entries: list[RetranslateEntry], + normalized_user_context: str, + source_lang: str, + target_lang: str, +) -> str: + """Build the partial re-translation prompt for one selected entry.""" + sections: list[str] = [] + if prev_entries: + prev_lines = "\n".join( + f"- {entry.original} → {entry.translated or '(待翻譯)'}" + for entry in prev_entries + ) + sections.append(f"【上文參考】\n{prev_lines}") + + if next_entries: + next_lines = "\n".join( + f"- {entry.original} → {entry.translated or '(待翻譯)'}" + for entry in next_entries + ) + sections.append(f"【下文參考】\n{next_lines}") + + if normalized_user_context: + sections.append(f"【使用者補充上下文】\n{normalized_user_context}") + + prompt_sections = "\n\n".join(sections) + instruction = ( + f"請將以下字幕從{source_lang}翻譯成{target_lang}。\n" + "只回傳一個 JSON 物件,不要加 Markdown、引號外文字或任何說明。\n" # noqa: RUF001 + '格式:{"index": 數字, "original": "修正後原文", ' # noqa: RUF001 + '"translated": "目標語言翻譯"}。\n' + "若原文專有名詞疑似語音辨識錯字,請依上文、下文、影片背景、術語表與使用者補充上下文修正後翻譯。" # noqa: RUF001 + "例如同一影片已出現的品牌、人名、產品名與網域應保持一致。\n\n" + f"index: {target_entry.index}\n" + f"原文: {target_entry.original}\n" + f"目前翻譯(可修正): {target_entry.translated or '(空)'}" # noqa: RUF001 + ) + return (f"{prompt_sections}\n\n" if prompt_sections else "") + instruction + + def retranslate_entries( *, entries: list[RetranslateEntry], @@ -497,7 +680,7 @@ def retranslate_entries( video_description: str = "", glossary_text: str = "", user_context: str | None = None, -) -> dict[int, str]: +) -> dict[int, RetranslateResult]: """Re-translate selected subtitle entries with local context. Args: @@ -510,7 +693,8 @@ def retranslate_entries( user_context: Optional extra context provided by user. Returns: - Mapping: entry index -> translated text. + Mapping: entry index -> structured result containing corrected source and + translated text. Raises: ValueError: If request payload is invalid. @@ -530,6 +714,7 @@ def retranslate_entries( settings = get_settings() _ensure_translator_api_key(settings) + model_metadata = _model_log_metadata(settings) translator = Agent( model=_build_model(settings), description=_build_translator_description( @@ -542,40 +727,41 @@ def retranslate_entries( ) normalized_user_context = _compact_text(user_context or "") - results: dict[int, str] = {} + results: dict[int, RetranslateResult] = {} + logger.info( + "retranslation_started", + **model_metadata, + source_lang=source_lang, + target_lang=target_lang, + entry_count=len(entries), + selected_indices_count=len(ordered_indices), + ) + retranslation_started_at = time.monotonic() for target_index in ordered_indices: + entry_started_at = time.monotonic() position = position_by_index[target_index] target_entry = entries[position] prev_entries = entries[max(0, position - _PARTIAL_CONTEXT_WINDOW) : position] next_entries = entries[position + 1 : position + 1 + _PARTIAL_CONTEXT_WINDOW] + prompt = _build_retranslate_prompt( + target_entry=target_entry, + prev_entries=prev_entries, + next_entries=next_entries, + normalized_user_context=normalized_user_context, + source_lang=source_lang, + target_lang=target_lang, + ) - sections: list[str] = [] - if prev_entries: - prev_lines = "\n".join( - f"- {entry.original} → {entry.translated or '(待翻譯)'}" - for entry in prev_entries - ) - sections.append(f"【上文參考】\n{prev_lines}") - - if next_entries: - next_lines = "\n".join( - f"- {entry.original} → {entry.translated or '(待翻譯)'}" - for entry in next_entries - ) - sections.append(f"【下文參考】\n{next_lines}") - - if normalized_user_context: - sections.append(f"【使用者補充上下文】\n{normalized_user_context}") - - prompt_sections = "\n\n".join(sections) - prompt = (f"{prompt_sections}\n\n" if prompt_sections else "") + ( - f"請將以下字幕從{source_lang}翻譯成{target_lang}。\n" - "只回傳單行翻譯內容,不要加編號、引號或任何說明。\n" # noqa: RUF001 - "若原文專有名詞疑似語音辨識錯字,請依上文、下文、影片背景、術語表與使用者補充上下文修正後翻譯。" # noqa: RUF001 - "例如同一影片已出現的品牌、人名、產品名與網域應保持一致。\n\n" - f"原文: {target_entry.original}\n" - f"目前翻譯(可修正): {target_entry.translated or '(空)'}" # noqa: RUF001 + logger.debug( + "retranslation_entry_request", + **model_metadata, + source_lang=source_lang, + target_lang=target_lang, + index=target_index, + previous_context_count=len(prev_entries), + next_context_count=len(next_entries), + has_user_context=bool(normalized_user_context), ) for attempt in range(_MAX_RETRIES + 1): @@ -587,22 +773,30 @@ def retranslate_entries( f"Empty re-translation response for entry {target_index}" ) _check_rate_limit(response_text) - cleaned = _strip_number_prefix(response_text).strip() - if not cleaned: - raise TranslationError( - f"Empty re-translation response for entry {target_index}" - ) - results[target_index] = cleaned + results[target_index] = _parse_retranslate_response( + response_text, + expected_index=target_index, + fallback_original=target_entry.original, + ) + logger.debug( + "retranslation_entry_response", + **model_metadata, + source_lang=source_lang, + target_lang=target_lang, + index=target_index, + duration_ms=round((time.monotonic() - entry_started_at) * 1000), + response_chars=len(response_text), + ) break except RateLimitError as exc: if attempt < _MAX_RETRIES: logger.warning( - "Rate limited during re-translation for entry %d " - "(attempt %d/%d), waiting %.0fs", - target_index, - attempt + 1, - _MAX_RETRIES, - exc.retry_after, + "retranslation_rate_limited", + **model_metadata, + index=target_index, + attempt=attempt + 1, + max_retries=_MAX_RETRIES, + retry_after_seconds=exc.retry_after, ) time.sleep(exc.retry_after) else: @@ -613,4 +807,14 @@ def retranslate_entries( else: raise TranslationError(f"Failed to re-translate entry {target_index}") + logger.info( + "retranslation_completed", + **model_metadata, + source_lang=source_lang, + target_lang=target_lang, + entry_count=len(entries), + selected_indices_count=len(ordered_indices), + duration_ms=round((time.monotonic() - retranslation_started_at) * 1000), + ) + return results diff --git a/tests/unit/api/test_routes.py b/tests/unit/api/test_routes.py index 32efc77..596271b 100644 --- a/tests/unit/api/test_routes.py +++ b/tests/unit/api/test_routes.py @@ -10,6 +10,7 @@ from bilingualsub.api.constants import FileType, JobStatus from bilingualsub.api.jobs import Job, JobManager from bilingualsub.api.routes import _build_download_filename, _sanitize_filename +from bilingualsub.core import RetranslateResult from bilingualsub.core.glossary import GlossaryManager @@ -196,7 +197,13 @@ async def test_partial_retranslate_success(self, client: AsyncClient, app) -> No job.target_lang = "zh-TW" with patch("bilingualsub.api.routes.retranslate_entries") as mock_retranslate: - mock_retranslate.return_value = {2: "修正版第二句"} + mock_retranslate.return_value = { + 2: RetranslateResult( + index=2, + original="Corrected Line 2", + translated="修正版第二句", + ) + } response = await client.post( f"/api/jobs/{job_id}/retranslate", json={ @@ -211,7 +218,13 @@ async def test_partial_retranslate_success(self, client: AsyncClient, app) -> No assert response.status_code == 200 data = response.json() - assert data["results"] == [{"index": 2, "translated": "修正版第二句"}] + assert data["results"] == [ + { + "index": 2, + "original": "Corrected Line 2", + "translated": "修正版第二句", + } + ] call_kwargs = mock_retranslate.call_args.kwargs assert call_kwargs["glossary_text"] == "" # empty glossary diff --git a/tests/unit/api/test_schemas.py b/tests/unit/api/test_schemas.py index 9228e57..b9947cf 100644 --- a/tests/unit/api/test_schemas.py +++ b/tests/unit/api/test_schemas.py @@ -9,6 +9,7 @@ JobCreateRequest, JobCreateResponse, JobStatusResponse, + PartialRetranslateItem, PartialRetranslateRequest, SSEProgressData, StartSubtitleRequest, @@ -185,3 +186,16 @@ def test_selected_indices_must_exist(self) -> None: {"index": 1, "original": "Line 1", "translated": "第一句"}, ], ) + + def test_response_item_includes_original(self) -> None: + item = PartialRetranslateItem( + index=2, + original="Corrected Line 2", + translated="修正版第二句", + ) + + assert item.model_dump() == { + "index": 2, + "original": "Corrected Line 2", + "translated": "修正版第二句", + } diff --git a/tests/unit/core/test_translator.py b/tests/unit/core/test_translator.py index 7f96490..38b8cba 100644 --- a/tests/unit/core/test_translator.py +++ b/tests/unit/core/test_translator.py @@ -10,8 +10,10 @@ from bilingualsub.core.translator import ( _PROXY_PLACEHOLDER_API_KEY, RetranslateEntry, + RetranslateResult, TranslationError, _parse_batch_response, + _parse_retranslate_response, retranslate_entries, translate_subtitle, ) @@ -250,6 +252,54 @@ def test_parse_batch_response_missing_number(self): _parse_batch_response(response, 3) +class TestParseRetranslateResponse: + def test_parse_retranslate_response_json_object(self): + response = ( + '{"index": 2, "original": "OpenAI released GPT-5", ' + '"translated": "OpenAI 發布了 GPT-5"}' + ) + + result = _parse_retranslate_response( + response, + expected_index=2, + fallback_original="Open eye released GP five", + ) + + assert result == RetranslateResult( + index=2, + original="OpenAI released GPT-5", + translated="OpenAI 發布了 GPT-5", + ) + + def test_parse_retranslate_response_json_results_list(self): + response = ( + '{"results": [{"index": 2, "original": "Line two", ' + '"translated": "第二句"}]}' + ) + + result = _parse_retranslate_response( + response, + expected_index=2, + fallback_original="Line 2", + ) + + assert result.original == "Line two" + assert result.translated == "第二句" + + def test_parse_retranslate_response_plain_text_fallback(self): + result = _parse_retranslate_response( + "2. 修正版第二句", + expected_index=2, + fallback_original="Line 2", + ) + + assert result == RetranslateResult( + index=2, + original="Line 2", + translated="修正版第二句", + ) + + class TestBatchTranslation: """Test batch translation behavior.""" @@ -714,7 +764,10 @@ def test_retranslate_entries_with_context(self): mock_translator = Mock() mock_agent.return_value = mock_translator mock_response = Mock() - mock_response.content = "修正版第二句" + mock_response.content = ( + '{"index": 2, "original": "Corrected Line 2", ' + '"translated": "修正版第二句"}' + ) mock_translator.run.return_value = mock_response result = retranslate_entries( @@ -723,12 +776,39 @@ def test_retranslate_entries_with_context(self): user_context="主題是太空探索", ) - assert result == {2: "修正版第二句"} + assert result == { + 2: RetranslateResult( + index=2, + original="Corrected Line 2", + translated="修正版第二句", + ) + } prompt = mock_translator.run.call_args[0][0] assert "上文參考" in prompt assert "下文參考" in prompt assert "主題是太空探索" in prompt assert "語音辨識錯字" in prompt + assert '"original": "修正後原文"' in prompt + + def test_retranslate_entries_falls_back_to_plain_translation(self): + entries = [RetranslateEntry(index=1, original="Line 1", translated="第一句")] + + with patch("bilingualsub.core.translator.Agent") as mock_agent: + mock_translator = Mock() + mock_agent.return_value = mock_translator + mock_response = Mock() + mock_response.content = "修正版第一句" + mock_translator.run.return_value = mock_response + + result = retranslate_entries(entries=entries, selected_indices=[1]) + + assert result == { + 1: RetranslateResult( + index=1, + original="Line 1", + translated="修正版第一句", + ) + } def test_retranslate_entries_invalid_index_raises_error(self): entries = [RetranslateEntry(index=1, original="Line 1", translated="第一句")] From 5c640e79fc9f30ee66eaa7dfd246415c276f1279 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maple=EF=BC=81?= Date: Fri, 3 Jul 2026 14:51:52 +0800 Subject: [PATCH 09/15] Update partial retranslate preview flow --- .../src/components/SubtitleEditor.test.tsx | 113 ++++++++++++++++++ frontend/src/components/SubtitleEditor.tsx | 63 ++++++---- frontend/src/types.ts | 1 + 3 files changed, 154 insertions(+), 23 deletions(-) create mode 100644 frontend/src/components/SubtitleEditor.test.tsx diff --git a/frontend/src/components/SubtitleEditor.test.tsx b/frontend/src/components/SubtitleEditor.test.tsx new file mode 100644 index 0000000..a75ab7c --- /dev/null +++ b/frontend/src/components/SubtitleEditor.test.tsx @@ -0,0 +1,113 @@ +import { fireEvent, render, screen, waitFor } from '@testing-library/react'; +import { SubtitleEditor } from './SubtitleEditor'; + +const apiMocks = vi.hoisted(() => ({ + fetchSrtContent: vi.fn(), + partialRetranslate: vi.fn(), + addGlossaryEntry: vi.fn(), +})); + +const i18nMocks = vi.hoisted(() => ({ + t: (key: string) => key, +})); + +vi.mock('react-i18next', () => ({ + useTranslation: () => ({ + t: i18nMocks.t, + i18n: { language: 'zh-TW', changeLanguage: vi.fn() }, + }), +})); + +vi.mock('@/api/client', () => ({ + apiClient: { + fetchSrtContent: apiMocks.fetchSrtContent, + partialRetranslate: apiMocks.partialRetranslate, + getDownloadUrl: (jobId: string, fileType: string) => `/api/jobs/${jobId}/download/${fileType}`, + addGlossaryEntry: apiMocks.addGlossaryEntry, + }, +})); + +const srtContent = `1 +00:00:01,000 --> 00:00:02,000 +Old translation +old source + +2 +00:00:03,000 --> 00:00:04,000 +Untouched translation +untouched source`; + +function mockTextTrack() { + const cues: TextTrackCue[] = []; + return { + cues, + mode: 'hidden', + addCue: vi.fn((cue: TextTrackCue) => cues.push(cue)), + removeCue: vi.fn((cue: TextTrackCue) => { + const index = cues.indexOf(cue); + if (index >= 0) cues.splice(index, 1); + }), + }; +} + +describe('SubtitleEditor partial retranslate preview', () => { + beforeEach(() => { + vi.clearAllMocks(); + apiMocks.fetchSrtContent.mockResolvedValue(srtContent); + HTMLMediaElement.prototype.addTextTrack = vi.fn(() => mockTextTrack() as unknown as TextTrack); + HTMLMediaElement.prototype.play = vi.fn(); + globalThis.VTTCue = vi.fn(function VTTCue(startTime, endTime, text) { + return { startTime, endTime, text }; + }) as unknown as typeof VTTCue; + }); + + it('previews and applies corrected source text with translated text', async () => { + apiMocks.partialRetranslate.mockResolvedValue({ + results: [{ index: 1, original: 'correct source', translated: 'New translation' }], + }); + + render(); + + await screen.findByDisplayValue('Old translation'); + const retranslateButton = screen.getByRole('button', { name: 'editor.retranslate' }); + fireEvent.click(screen.getAllByTitle('editor.selectForRetranslate')[0]); + await waitFor(() => expect(retranslateButton).toBeEnabled()); + fireEvent.click(retranslateButton); + + await screen.findByText('correct source'); + expect(screen.getAllByText('old source')).toHaveLength(2); + expect(screen.getByText('New translation')).toBeInTheDocument(); + + fireEvent.click(screen.getByText('editor.retranslatePreviewApply')); + + await waitFor(() => { + expect(screen.getByDisplayValue('New translation')).toBeInTheDocument(); + }); + expect(screen.getByText('correct source')).toBeInTheDocument(); + expect(screen.queryByText('old source')).not.toBeInTheDocument(); + }); + + it('keeps the current source text for legacy retranslate results without original', async () => { + apiMocks.partialRetranslate.mockResolvedValue({ + results: [{ index: 1, translated: 'Legacy new translation' }], + }); + + render(); + + await screen.findByDisplayValue('Old translation'); + const retranslateButton = screen.getByRole('button', { name: 'editor.retranslate' }); + fireEvent.click(screen.getAllByTitle('editor.selectForRetranslate')[0]); + await waitFor(() => expect(retranslateButton).toBeEnabled()); + fireEvent.click(retranslateButton); + await screen.findByText('Legacy new translation'); + + expect(screen.getAllByText('old source')).toHaveLength(1); + + fireEvent.click(screen.getByText('editor.retranslatePreviewApply')); + + await waitFor(() => { + expect(screen.getByDisplayValue('Legacy new translation')).toBeInTheDocument(); + }); + expect(screen.getByText('old source')).toBeInTheDocument(); + }); +}); diff --git a/frontend/src/components/SubtitleEditor.tsx b/frontend/src/components/SubtitleEditor.tsx index 3deb6f5..38cc957 100644 --- a/frontend/src/components/SubtitleEditor.tsx +++ b/frontend/src/components/SubtitleEditor.tsx @@ -16,9 +16,10 @@ interface SubtitleEditorProps { interface RetranslatePreviewItem { index: number; - original: string; - before: string; - after: string; + originalBefore: string; + originalAfter: string; + translatedBefore: string; + translatedAfter: string; } type RetranslateChoice = 'before' | 'after'; @@ -122,6 +123,7 @@ export function SubtitleEditor({ jobId, onBurn, isBurning }: SubtitleEditorProps return entries.some( (entry, i) => entry.translated !== originalEntries[i].translated || + entry.original !== originalEntries[i].original || entry.startTime !== originalEntries[i].startTime || entry.endTime !== originalEntries[i].endTime ); @@ -234,17 +236,19 @@ export function SubtitleEditor({ jobId, onBurn, isBurning }: SubtitleEditorProps user_context: retranslateContext.trim() || undefined, }); - const translatedMap = new Map( - response.results.map(item => [item.index, item.translated] as const) - ); + const resultMap = new Map(response.results.map(item => [item.index, item] as const)); const previewItems = entries - .filter(entry => translatedMap.has(entry.index)) - .map(entry => ({ - index: entry.index, - original: entry.original, - before: entry.translated, - after: translatedMap.get(entry.index) ?? entry.translated, - })) + .filter(entry => resultMap.has(entry.index)) + .map(entry => { + const result = resultMap.get(entry.index); + return { + index: entry.index, + originalBefore: entry.original, + originalAfter: result?.original ?? entry.original, + translatedBefore: entry.translated, + translatedAfter: result?.translated ?? entry.translated, + }; + }) .sort((a, b) => a.index - b.index); if (previewItems.length === 0) { @@ -274,18 +278,22 @@ export function SubtitleEditor({ jobId, onBurn, isBurning }: SubtitleEditorProps ]); const handleApplyRetranslatePreview = useCallback(() => { - const translatedMap = new Map( + const acceptedMap = new Map( retranslatePreview.map(item => { const choice = retranslateChoices[item.index] ?? 'after'; - return [item.index, choice === 'before' ? item.before : item.after] as const; + return [ + item.index, + choice === 'before' + ? { original: item.originalBefore, translated: item.translatedBefore } + : { original: item.originalAfter, translated: item.translatedAfter }, + ] as const; }) ); setEntries(prev => - prev.map(entry => - translatedMap.has(entry.index) - ? { ...entry, translated: translatedMap.get(entry.index) ?? entry.translated } - : entry - ) + prev.map(entry => { + const accepted = acceptedMap.get(entry.index); + return accepted ? { ...entry, ...accepted } : entry; + }) ); setSelectedIndices(new Set()); setRetranslatePreview([]); @@ -451,7 +459,6 @@ export function SubtitleEditor({ jobId, onBurn, isBurning }: SubtitleEditorProps className="border border-gray-100 rounded-xl p-4" >

#{item.index}

- {item.original &&

{item.original}

}
diff --git a/frontend/src/types.ts b/frontend/src/types.ts index ce6d2e8..f82e698 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -68,6 +68,7 @@ export interface PartialRetranslateRequest { export interface PartialRetranslateResult { index: number; + original?: string; translated: string; } From ee649f9a80e1e9f411225b6af95771ef57c07b45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maple=EF=BC=81?= Date: Fri, 3 Jul 2026 14:53:08 +0800 Subject: [PATCH 10/15] refactor: tighten retranslate response mapping --- src/bilingualsub/api/routes.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/bilingualsub/api/routes.py b/src/bilingualsub/api/routes.py index b728937..c16f982 100644 --- a/src/bilingualsub/api/routes.py +++ b/src/bilingualsub/api/routes.py @@ -379,10 +379,8 @@ async def partial_retranslate( results=[ PartialRetranslateItem( index=index, - original=(result.original if hasattr(result, "original") else ""), - translated=( - result.translated if hasattr(result, "translated") else str(result) - ), + original=result.original, + translated=result.translated, ) for index, result in sorted(results.items()) ] From fb5acf05b0dd74bf3413398def751abb9c934fa0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maple=EF=BC=81?= Date: Fri, 3 Jul 2026 15:20:37 +0800 Subject: [PATCH 11/15] Fix partial retranslation review findings --- .../src/components/SubtitleEditor.test.tsx | 24 ---- frontend/src/components/SubtitleEditor.tsx | 6 +- frontend/src/types.ts | 2 +- src/bilingualsub/core/transcriber.py | 5 +- src/bilingualsub/core/translator.py | 64 ++++----- tests/unit/core/test_translator.py | 127 +++++++++++------- 6 files changed, 108 insertions(+), 120 deletions(-) diff --git a/frontend/src/components/SubtitleEditor.test.tsx b/frontend/src/components/SubtitleEditor.test.tsx index a75ab7c..614721e 100644 --- a/frontend/src/components/SubtitleEditor.test.tsx +++ b/frontend/src/components/SubtitleEditor.test.tsx @@ -86,28 +86,4 @@ describe('SubtitleEditor partial retranslate preview', () => { expect(screen.getByText('correct source')).toBeInTheDocument(); expect(screen.queryByText('old source')).not.toBeInTheDocument(); }); - - it('keeps the current source text for legacy retranslate results without original', async () => { - apiMocks.partialRetranslate.mockResolvedValue({ - results: [{ index: 1, translated: 'Legacy new translation' }], - }); - - render(); - - await screen.findByDisplayValue('Old translation'); - const retranslateButton = screen.getByRole('button', { name: 'editor.retranslate' }); - fireEvent.click(screen.getAllByTitle('editor.selectForRetranslate')[0]); - await waitFor(() => expect(retranslateButton).toBeEnabled()); - fireEvent.click(retranslateButton); - await screen.findByText('Legacy new translation'); - - expect(screen.getAllByText('old source')).toHaveLength(1); - - fireEvent.click(screen.getByText('editor.retranslatePreviewApply')); - - await waitFor(() => { - expect(screen.getByDisplayValue('Legacy new translation')).toBeInTheDocument(); - }); - expect(screen.getByText('old source')).toBeInTheDocument(); - }); }); diff --git a/frontend/src/components/SubtitleEditor.tsx b/frontend/src/components/SubtitleEditor.tsx index 38cc957..ba879f1 100644 --- a/frontend/src/components/SubtitleEditor.tsx +++ b/frontend/src/components/SubtitleEditor.tsx @@ -240,13 +240,13 @@ export function SubtitleEditor({ jobId, onBurn, isBurning }: SubtitleEditorProps const previewItems = entries .filter(entry => resultMap.has(entry.index)) .map(entry => { - const result = resultMap.get(entry.index); + const result = resultMap.get(entry.index)!; return { index: entry.index, originalBefore: entry.original, - originalAfter: result?.original ?? entry.original, + originalAfter: result.original, translatedBefore: entry.translated, - translatedAfter: result?.translated ?? entry.translated, + translatedAfter: result.translated, }; }) .sort((a, b) => a.index - b.index); diff --git a/frontend/src/types.ts b/frontend/src/types.ts index f82e698..5a17160 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -68,7 +68,7 @@ export interface PartialRetranslateRequest { export interface PartialRetranslateResult { index: number; - original?: string; + original: string; translated: string; } diff --git a/src/bilingualsub/core/transcriber.py b/src/bilingualsub/core/transcriber.py index 597629f..e5f9701 100644 --- a/src/bilingualsub/core/transcriber.py +++ b/src/bilingualsub/core/transcriber.py @@ -21,10 +21,7 @@ class TranscriptionError(Exception): def build_whisper_prompt( video_title: str = "", ) -> str | None: - """Build a Whisper prompt from video title to improve transcription accuracy. - - Strips whitespace from the title, returns None if empty, and truncates - to ``_MAX_WHISPER_PROMPT_CHARS`` when the title exceeds the limit. + """Build a concise Whisper hint from the video title. Args: video_title: Raw video title string. diff --git a/src/bilingualsub/core/translator.py b/src/bilingualsub/core/translator.py index defeccb..50a5355 100644 --- a/src/bilingualsub/core/translator.py +++ b/src/bilingualsub/core/translator.py @@ -5,7 +5,6 @@ from collections.abc import Callable from dataclasses import dataclass from json import JSONDecodeError, loads -from typing import Any from urllib.parse import urlparse import structlog @@ -202,56 +201,47 @@ def _strip_json_fence(text: str) -> str: return stripped -def _extract_retranslate_payload(payload: Any, expected_index: int) -> dict[str, Any]: - """Extract one re-translation object from supported JSON response shapes.""" - if isinstance(payload, dict): - if "results" in payload: - return _extract_retranslate_payload(payload["results"], expected_index) - if str(expected_index) in payload: - value = payload[str(expected_index)] - if isinstance(value, dict): - return {"index": expected_index, **value} - return {"index": expected_index, "translated": value} - if "original" in payload or "translated" in payload: - return payload - if isinstance(payload, list): - for item in payload: - if isinstance(item, dict) and item.get("index") == expected_index: - return item - if len(payload) == 1 and isinstance(payload[0], dict): - return payload[0] - raise TranslationError(f"Could not parse re-translation JSON for {expected_index}") - - def _parse_retranslate_response( response_text: str, *, expected_index: int, fallback_original: str, ) -> RetranslateResult: - """Parse structured partial re-translation output, with plain-text fallback.""" + """Parse structured partial re-translation output.""" + _ = fallback_original cleaned = _strip_json_fence(response_text) try: - payload = _extract_retranslate_payload(loads(cleaned), expected_index) - except (JSONDecodeError, TranslationError) as err: - translated = _strip_number_prefix(response_text).strip() - if not translated: - raise TranslationError( - f"Empty re-translation response for entry {expected_index}" - ) from err - return RetranslateResult( - index=expected_index, - original=fallback_original, - translated=translated, + payload = loads(cleaned) + except JSONDecodeError as err: + raise TranslationError( + f"Could not parse re-translation JSON for entry {expected_index}" + ) from err + + if not isinstance(payload, dict): + raise TranslationError( + f"Expected re-translation JSON object for entry {expected_index}" ) - translated = str(payload.get("translated") or "").strip() - original = str(payload.get("original") or fallback_original).strip() - index = int(payload.get("index") or expected_index) + try: + index = int(payload["index"]) + except (KeyError, TypeError, ValueError) as err: + raise TranslationError( + f"Invalid re-translation index for entry {expected_index}" + ) from err + if index != expected_index: raise TranslationError( f"Expected re-translation index {expected_index}, got {index}" ) + + original = str(payload.get("original") or "").strip() + if not original: + raise TranslationError( + "Missing original text in re-translation response " + f"for entry {expected_index}" + ) + + translated = str(payload.get("translated") or "").strip() if not translated: raise TranslationError( f"Empty re-translation response for entry {expected_index}" diff --git a/tests/unit/core/test_translator.py b/tests/unit/core/test_translator.py index 38b8cba..084fbb4 100644 --- a/tests/unit/core/test_translator.py +++ b/tests/unit/core/test_translator.py @@ -12,6 +12,7 @@ RetranslateEntry, RetranslateResult, TranslationError, + _build_model, _parse_batch_response, _parse_retranslate_response, retranslate_entries, @@ -271,10 +272,9 @@ def test_parse_retranslate_response_json_object(self): translated="OpenAI 發布了 GPT-5", ) - def test_parse_retranslate_response_json_results_list(self): + def test_parse_retranslate_response_markdown_json_fence(self): response = ( - '{"results": [{"index": 2, "original": "Line two", ' - '"translated": "第二句"}]}' + '```json\n{"index": 2, "original": "Line two", "translated": "第二句"}\n```' ) result = _parse_retranslate_response( @@ -286,18 +286,57 @@ def test_parse_retranslate_response_json_results_list(self): assert result.original == "Line two" assert result.translated == "第二句" - def test_parse_retranslate_response_plain_text_fallback(self): - result = _parse_retranslate_response( + @pytest.mark.parametrize( + "response", + [ + '{"results": [{"index": 2, "original": "Line two", "translated": "第二句"}]}', + '{"2": {"original": "Line two", "translated": "第二句"}}', + '[{"index": 2, "original": "Line two", "translated": "第二句"}]', + '"第二句"', "2. 修正版第二句", - expected_index=2, - fallback_original="Line 2", - ) + ], + ) + def test_parse_retranslate_response_rejects_unsupported_shapes(self, response): + with pytest.raises(TranslationError): + _parse_retranslate_response( + response, + expected_index=2, + fallback_original="Line 2", + ) - assert result == RetranslateResult( - index=2, - original="Line 2", - translated="修正版第二句", - ) + @pytest.mark.parametrize( + "response", + [ + '{"original": "Line two", "translated": "第二句"}', + '{"index": "1.", "original": "Line two", "translated": "第二句"}', + ], + ) + def test_parse_retranslate_response_invalid_index_raises_translation_error( + self, response + ): + with pytest.raises(TranslationError, match="Invalid re-translation index"): + _parse_retranslate_response( + response, + expected_index=2, + fallback_original="Line 2", + ) + + @pytest.mark.parametrize( + "response", + [ + '{"index": 2, "translated": "第二句"}', + '{"index": 2, "original": "Line two"}', + ], + ) + def test_parse_retranslate_response_requires_original_and_translated( + self, response + ): + with pytest.raises(TranslationError): + _parse_retranslate_response( + response, + expected_index=2, + fallback_original="Line 2", + ) class TestBatchTranslation: @@ -515,7 +554,6 @@ def make_response(*args, **kwargs): assert "Line 8" in second_prompt assert "Line 9" in second_prompt assert "Line 10" in second_prompt - assert "語音辨識錯字" in second_prompt @pytest.mark.unit def test_context_contains_original_and_translated(self): @@ -785,12 +823,15 @@ def test_retranslate_entries_with_context(self): } prompt = mock_translator.run.call_args[0][0] assert "上文參考" in prompt + assert "Line 1 → 第一句" in prompt assert "下文參考" in prompt + assert "Line 3 → 第三句" in prompt assert "主題是太空探索" in prompt - assert "語音辨識錯字" in prompt - assert '"original": "修正後原文"' in prompt + assert "index: 2" in prompt + assert "原文: Line 2" in prompt + assert "目前翻譯(可修正): 第二句" in prompt - def test_retranslate_entries_falls_back_to_plain_translation(self): + def test_retranslate_entries_requires_structured_json_result(self): entries = [RetranslateEntry(index=1, original="Line 1", translated="第一句")] with patch("bilingualsub.core.translator.Agent") as mock_agent: @@ -800,15 +841,8 @@ def test_retranslate_entries_falls_back_to_plain_translation(self): mock_response.content = "修正版第一句" mock_translator.run.return_value = mock_response - result = retranslate_entries(entries=entries, selected_indices=[1]) - - assert result == { - 1: RetranslateResult( - index=1, - original="Line 1", - translated="修正版第一句", - ) - } + with pytest.raises(TranslationError): + retranslate_entries(entries=entries, selected_indices=[1]) def test_retranslate_entries_invalid_index_raises_error(self): entries = [RetranslateEntry(index=1, original="Line 1", translated="第一句")] @@ -879,7 +913,7 @@ def test_given_base_url_with_trailing_slash_when_translate_then_slash_stripped( monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:3000/v1/") get_settings.cache_clear() - model_arg = self._translate_one_entry_with_agent_mock() + model_arg = _build_model(get_settings()) assert isinstance(model_arg, OpenAIChat) assert model_arg.base_url == "http://localhost:3000/v1" @@ -914,11 +948,7 @@ def test_given_proxy_without_api_key_when_translate_subtitle_then_no_value_error # Primary assertion: no ValueError raised — proxy skips API key check translate_subtitle(subtitle) - model_arg = mock_agent.call_args.kwargs["model"] - assert isinstance(model_arg, OpenAIChat) - assert model_arg.api_key == _PROXY_PLACEHOLDER_API_KEY - - def test_given_openai_model_with_proxy_when_translate_subtitle_then_uses_openai_chat( + def test_given_openai_model_with_proxy_when_build_model_then_uses_openai_chat( self, monkeypatch ): monkeypatch.setenv("TRANSLATOR_MODEL", "openai:claude-sonnet-4-5") @@ -926,25 +956,20 @@ def test_given_openai_model_with_proxy_when_translate_subtitle_then_uses_openai_ monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:3000/v1") get_settings.cache_clear() - entries = [ - SubtitleEntry( - index=1, - start=timedelta(seconds=0), - end=timedelta(seconds=2), - text="Hello", - ) - ] - subtitle = Subtitle(entries=entries) + model_arg = _build_model(get_settings()) - with patch("bilingualsub.core.translator.Agent") as mock_agent: - mock_translator = Mock() - mock_agent.return_value = mock_translator - mock_response = Mock() - mock_response.content = "1. 你好" - mock_translator.run.return_value = mock_response + assert isinstance(model_arg, OpenAIChat) + assert model_arg.id == "claude-sonnet-4-5" - translate_subtitle(subtitle) + def test_given_proxy_without_api_key_when_build_model_then_uses_placeholder_key( + self, monkeypatch + ): + monkeypatch.setenv("TRANSLATOR_MODEL", "openai:any-model") + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:3000/v1") + get_settings.cache_clear() + + model_arg = _build_model(get_settings()) - model_arg = mock_agent.call_args.kwargs["model"] - assert isinstance(model_arg, OpenAIChat) - assert model_arg.id == "claude-sonnet-4-5" + assert isinstance(model_arg, OpenAIChat) + assert model_arg.api_key == _PROXY_PLACEHOLDER_API_KEY From 69f9680c59f1101f5efdfa45d67df8b7de8b3d04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maple=EF=BC=81?= Date: Fri, 3 Jul 2026 15:22:04 +0800 Subject: [PATCH 12/15] Remove unused retranslate fallback parameter --- src/bilingualsub/core/translator.py | 3 --- tests/unit/core/test_translator.py | 5 ----- 2 files changed, 8 deletions(-) diff --git a/src/bilingualsub/core/translator.py b/src/bilingualsub/core/translator.py index 50a5355..c84ea9b 100644 --- a/src/bilingualsub/core/translator.py +++ b/src/bilingualsub/core/translator.py @@ -205,10 +205,8 @@ def _parse_retranslate_response( response_text: str, *, expected_index: int, - fallback_original: str, ) -> RetranslateResult: """Parse structured partial re-translation output.""" - _ = fallback_original cleaned = _strip_json_fence(response_text) try: payload = loads(cleaned) @@ -766,7 +764,6 @@ def retranslate_entries( results[target_index] = _parse_retranslate_response( response_text, expected_index=target_index, - fallback_original=target_entry.original, ) logger.debug( "retranslation_entry_response", diff --git a/tests/unit/core/test_translator.py b/tests/unit/core/test_translator.py index 084fbb4..05507f0 100644 --- a/tests/unit/core/test_translator.py +++ b/tests/unit/core/test_translator.py @@ -263,7 +263,6 @@ def test_parse_retranslate_response_json_object(self): result = _parse_retranslate_response( response, expected_index=2, - fallback_original="Open eye released GP five", ) assert result == RetranslateResult( @@ -280,7 +279,6 @@ def test_parse_retranslate_response_markdown_json_fence(self): result = _parse_retranslate_response( response, expected_index=2, - fallback_original="Line 2", ) assert result.original == "Line two" @@ -301,7 +299,6 @@ def test_parse_retranslate_response_rejects_unsupported_shapes(self, response): _parse_retranslate_response( response, expected_index=2, - fallback_original="Line 2", ) @pytest.mark.parametrize( @@ -318,7 +315,6 @@ def test_parse_retranslate_response_invalid_index_raises_translation_error( _parse_retranslate_response( response, expected_index=2, - fallback_original="Line 2", ) @pytest.mark.parametrize( @@ -335,7 +331,6 @@ def test_parse_retranslate_response_requires_original_and_translated( _parse_retranslate_response( response, expected_index=2, - fallback_original="Line 2", ) From a1367a4f17dbcf785df7b2df737e432a8a8ccf71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maple=EF=BC=81?= Date: Fri, 3 Jul 2026 15:41:20 +0800 Subject: [PATCH 13/15] fix: preserve audio after intro concat --- src/bilingualsub/utils/ffmpeg.py | 10 +- tests/integration/test_intro_concat_audio.py | 101 +++++++++++++++++++ tests/unit/utils/test_ffmpeg_intro.py | 24 +++++ 3 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_intro_concat_audio.py diff --git a/src/bilingualsub/utils/ffmpeg.py b/src/bilingualsub/utils/ffmpeg.py index 9e96fee..08319f1 100644 --- a/src/bilingualsub/utils/ffmpeg.py +++ b/src/bilingualsub/utils/ffmpeg.py @@ -730,6 +730,10 @@ def _next_start() -> float: "lavfi", "-i", f"color=c=black:s={width}x{height}:r={fps}:d={duration}", + "-f", + "lavfi", + "-i", + "anullsrc=channel_layout=stereo:sample_rate=48000", "-vf", vf, "-c:v", @@ -738,7 +742,11 @@ def _next_start() -> float: "23", "-preset", "fast", - "-an", + "-c:a", + "aac", + "-b:a", + "128k", + "-shortest", "-progress", "pipe:1", "-y", diff --git a/tests/integration/test_intro_concat_audio.py b/tests/integration/test_intro_concat_audio.py new file mode 100644 index 0000000..228cc01 --- /dev/null +++ b/tests/integration/test_intro_concat_audio.py @@ -0,0 +1,101 @@ +"""Regression tests for intro + video concat audio preservation.""" + +from __future__ import annotations + +import json +import shutil +import subprocess +from typing import TYPE_CHECKING + +import pytest + +from bilingualsub.utils.ffmpeg import concat_videos, generate_intro + +if TYPE_CHECKING: + from pathlib import Path + + +def _has_ffmpeg_tools() -> bool: + return shutil.which("ffmpeg") is not None and shutil.which("ffprobe") is not None + + +requires_ffmpeg_tools = pytest.mark.skipif( + not _has_ffmpeg_tools(), + reason="ffmpeg and ffprobe are required for media regression tests", +) + + +def _create_video_with_audio(path: Path) -> None: + subprocess.run( + [ + "ffmpeg", + "-f", + "lavfi", + "-i", + "testsrc2=size=320x180:rate=24:d=1", + "-f", + "lavfi", + "-i", + "sine=frequency=440:sample_rate=48000:d=1", + "-c:v", + "libx264", + "-pix_fmt", + "yuv420p", + "-c:a", + "aac", + "-ac", + "2", + "-shortest", + "-y", + str(path), + ], + check=True, + capture_output=True, + text=True, + ) + + +def _audio_stream_count(path: Path) -> int: + result = subprocess.run( + [ + "ffprobe", + "-v", + "error", + "-show_streams", + "-select_streams", + "a", + "-of", + "json", + str(path), + ], + check=True, + capture_output=True, + text=True, + ) + return len(json.loads(result.stdout).get("streams", [])) + + +@pytest.mark.integration +@requires_ffmpeg_tools +def test_intro_concat_preserves_main_video_audio(tmp_path: Path) -> None: + """Regression: a silent intro must not make the final concat output video-only.""" + intro = tmp_path / "intro.mp4" + main = tmp_path / "main.mp4" + final = tmp_path / "final.mp4" + + generate_intro( + intro, + width=320, + height=180, + fps=24.0, + channel="ClaudeDevs", + video_title="Artifacts in Claude Code", + video_url="https://x.com/ClaudeDevs/status/2072770790114914317?s=20", + channel_url="https://x.com/ClaudeDevs", + duration=1.0, + ) + _create_video_with_audio(main) + + concat_videos(intro, main, final) + + assert _audio_stream_count(final) == 1 diff --git a/tests/unit/utils/test_ffmpeg_intro.py b/tests/unit/utils/test_ffmpeg_intro.py index 6af1015..0610693 100644 --- a/tests/unit/utils/test_ffmpeg_intro.py +++ b/tests/unit/utils/test_ffmpeg_intro.py @@ -313,6 +313,30 @@ def test_generate_intro_always_uses_libx264_regardless_of_platform( # VideoToolbox must NOT be used for intro assert "h264_videotoolbox" not in cmd + def test_generate_intro_includes_silent_audio_track_for_concat( + self, tmp_path: Path, mock_intro_ffmpeg: dict + ) -> None: + """Intro must include silent AAC audio so concat keeps the main video's audio.""" + output_path = tmp_path / "intro.mp4" + + generate_intro( + output_path, + width=1920, + height=1080, + fps=30.0, + channel="Ch", + video_title="T", + video_url="https://example.com", + ) + + cmd = _get_popen_cmd(mock_intro_ffmpeg["popen"]) + assert "anullsrc=channel_layout=stereo:sample_rate=48000" in cmd + assert "-an" not in cmd + assert "-c:a" in cmd + ca_idx = cmd.index("-c:a") + assert cmd[ca_idx + 1] == "aac" + assert "-shortest" in cmd + # --------------------------------------------------------------------------- # concat_videos From 9f6dda24cb18a926a20bebab5ed8681296c65252 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maple=EF=BC=81?= Date: Fri, 3 Jul 2026 15:52:39 +0800 Subject: [PATCH 14/15] docs: clarify optional CLIProxyAPI setup --- .env.example | 3 ++- README.md | 13 ++++++++----- README.zh-TW.md | 10 ++++++---- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/.env.example b/.env.example index 099c69e..8414808 100644 --- a/.env.example +++ b/.env.example @@ -12,7 +12,8 @@ TRANSCRIBER_MODEL=whisper-large-v3-turbo # ollama:TwinkleAI/gemma-3-4B-T1-it (local, free) TRANSLATOR_MODEL=groq:openai/gpt-oss-120b -# === CLIProxyAPI (used by docker-compose.yml) === +# === Optional CLIProxyAPI (used only by docker-compose.yml) === +# Needed only if you want translations to route through CLIProxyAPI/agy. # 1. Run host OAuth login first: cliproxyapi -antigravity-login # 2. docker-compose.yml mounts this auth directory into the cli-proxy container. # Leave CLIPROXY_AUTH_DIR unset to use ${HOME}/.cli-proxy-api. diff --git a/README.md b/README.md index 2d18676..7c323de 100644 --- a/README.md +++ b/README.md @@ -27,13 +27,16 @@ docker build -t bilingualsub . && docker run -p 7860:7860 -e GROQ_API_KEY=your_k Then open http://localhost:7860 in your browser. -### Docker Compose with CLIProxyAPI +### Optional: Docker Compose with CLIProxyAPI -Use this path when you want translations to go through a local CLIProxyAPI -container backed by your own Antigravity/Codex/Claude OAuth login. +This path is optional. Use it only when you want translations to go through a +local CLIProxyAPI container backed by your own Antigravity/Codex/Claude OAuth +login. The regular Docker flow above does not require CLIProxyAPI. -First, install CLIProxyAPI on the host and log in. This creates OAuth token files -under `~/.cli-proxy-api`, which are mounted read/write into the proxy container: +For Antigravity/agy, CLIProxyAPI can route requests out of the box once the +host has OAuth credentials. Install CLIProxyAPI on the host and log in. This +creates OAuth token files under `~/.cli-proxy-api`, which are mounted read/write +into the proxy container: ```bash cliproxyapi -antigravity-login diff --git a/README.zh-TW.md b/README.zh-TW.md index 4794b0a..256e340 100644 --- a/README.zh-TW.md +++ b/README.zh-TW.md @@ -27,12 +27,14 @@ docker build -t bilingualsub . && docker run -p 7860:7860 -e GROQ_API_KEY=your_k 然後在瀏覽器開啟 http://localhost:7860。 -### 使用 CLIProxyAPI 的 Docker Compose +### 選用:使用 CLIProxyAPI 的 Docker Compose -如果你想讓翻譯走本機 CLIProxyAPI container,並使用自己的 -Antigravity/Codex/Claude OAuth 登入狀態,使用這個流程。 +這個流程是選用的。只有當你想讓翻譯走本機 CLIProxyAPI container,並使用 +自己的 Antigravity/Codex/Claude OAuth 登入狀態時才需要。上面的單純 Docker +流程不需要 CLIProxyAPI。 -先在 host 安裝 CLIProxyAPI 並登入。OAuth token 會建立在 +對 Antigravity/agy 來說,只要 host 已有 OAuth credentials,CLIProxyAPI 就能 +開箱路由請求。先在 host 安裝 CLIProxyAPI 並登入。OAuth token 會建立在 `~/.cli-proxy-api`,之後由 compose 掛進 proxy container: ```bash From bbea6cfd9dcc70f389d40465de7b0e2c127cd8f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maple=EF=BC=81?= Date: Fri, 3 Jul 2026 15:56:19 +0800 Subject: [PATCH 15/15] chore: ignore local tool caches in prettier --- .prettierignore | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.prettierignore b/.prettierignore index 5719d80..2944443 100644 --- a/.prettierignore +++ b/.prettierignore @@ -15,6 +15,10 @@ __pycache__/ .venv/ venv/ htmlcov/ +.mypy_cache/ +.pytest_cache/ +.ruff_cache/ +coverage.xml # IDE .idea/ @@ -24,3 +28,5 @@ htmlcov/ *.min.js *.min.css coverage/ +.playwright-mcp/ +preview-*.html