diff --git a/src/bilingualsub/api/pipeline.py b/src/bilingualsub/api/pipeline.py index 6b6e566..e22ccef 100644 --- a/src/bilingualsub/api/pipeline.py +++ b/src/bilingualsub/api/pipeline.py @@ -226,6 +226,7 @@ async def _acquire_video( width=int(meta_dict["width"]), height=int(meta_dict["height"]), fps=float(meta_dict["fps"]), + has_audio=bool(meta_dict.get("has_audio", True)), ) log.info("step_done", step="upload", source=str(video_path)) return video_path, metadata @@ -291,7 +292,14 @@ async def run_download(job: Job) -> None: try: video_path, metadata = await _acquire_video(job, work_dir, log) if job.processing_mode != ProcessingMode.VISUAL_DESCRIPTION: - await _extract_audio_step(job, video_path, work_dir, log) + if not metadata.has_audio: + log.info( + "no_audio_stream_detected", + msg="Auto-switching to visual description mode", + ) + job.processing_mode = ProcessingMode.VISUAL_DESCRIPTION + else: + await _extract_audio_step(job, video_path, work_dir, log) # Save metadata for subtitle phase job.video_width = metadata.width diff --git a/src/bilingualsub/core/downloader.py b/src/bilingualsub/core/downloader.py index 2a29c9a..9b311b9 100644 --- a/src/bilingualsub/core/downloader.py +++ b/src/bilingualsub/core/downloader.py @@ -31,6 +31,7 @@ class VideoMetadata: description: str = "" channel: str = "" # channel name; empty for local uploads channel_url: str = "" # raw channel URL from yt-dlp; empty for local uploads + has_audio: bool = True def __post_init__(self) -> None: """Validate metadata constraints.""" @@ -291,6 +292,16 @@ def _extract_metadata_from_info_dict( if fps is None or fps <= 0: fps = 30.0 + # Detect audio: check acodec field and requested_formats + acodec = info_dict.get("acodec", "none") + has_audio = acodec not in ("none", None) + if not has_audio: + # Also check requested_formats for separate audio streams + requested_formats = info_dict.get("requested_formats") or [] + has_audio = any( + fmt.get("acodec", "none") not in ("none", None) for fmt in requested_formats + ) + channel, channel_url = _extract_channel_from_info(info_dict) return VideoMetadata( @@ -302,6 +313,7 @@ def _extract_metadata_from_info_dict( description=_sanitize_description(info_dict.get("description", "")), channel=channel, channel_url=channel_url, + has_audio=has_audio, ) @@ -336,6 +348,8 @@ def _extract_metadata_with_ffprobe(video_path: Path) -> VideoMetadata: if not video_stream: raise DownloadError("No video stream found in file") + has_audio = any(s.get("codec_type") == "audio" for s in data.get("streams", [])) + # Extract metadata try: title = data.get("format", {}).get("tags", {}).get("title", video_path.stem) @@ -357,4 +371,5 @@ def _extract_metadata_with_ffprobe(video_path: Path) -> VideoMetadata: width=width, height=height, fps=fps, + has_audio=has_audio, ) diff --git a/src/bilingualsub/utils/ffmpeg.py b/src/bilingualsub/utils/ffmpeg.py index 08319f1..3cbacf6 100644 --- a/src/bilingualsub/utils/ffmpeg.py +++ b/src/bilingualsub/utils/ffmpeg.py @@ -329,7 +329,7 @@ def extract_video_metadata(video_path: Path) -> dict[str, str | float | int]: video_path: Path to the video file Returns: - Dict with keys: title, duration, width, height, fps + Dict with keys: title, duration, width, height, fps, has_audio Raises: FFmpegError: If ffprobe fails or no video stream found @@ -367,6 +367,8 @@ def extract_video_metadata(video_path: Path) -> dict[str, str | float | int]: if not video_stream: raise FFmpegError(f"No video stream found in {video_path}") + has_audio = any(s.get("codec_type") == "audio" for s in data.get("streams", [])) + try: title = data.get("format", {}).get("tags", {}).get("title", video_path.stem) duration = float(data.get("format", {}).get("duration", 0)) @@ -386,6 +388,7 @@ def extract_video_metadata(video_path: Path) -> dict[str, str | float | int]: "width": width, "height": height, "fps": fps, + "has_audio": has_audio, } diff --git a/tests/unit/api/test_pipeline.py b/tests/unit/api/test_pipeline.py index b64e3b6..2534b6b 100644 --- a/tests/unit/api/test_pipeline.py +++ b/tests/unit/api/test_pipeline.py @@ -6,7 +6,7 @@ import pytest -from bilingualsub.api.constants import FileType, JobStatus, SSEEvent +from bilingualsub.api.constants import FileType, JobStatus, ProcessingMode, SSEEvent from bilingualsub.api.jobs import Job from bilingualsub.api.pipeline import run_burn, run_download, run_subtitle from bilingualsub.core.downloader import DownloadError, VideoMetadata @@ -217,6 +217,52 @@ async def test_run_download_extract_audio_failure_sends_error( assert "ffmpeg segfault" in error_events[0]["data"]["detail"] assert job.status == JobStatus.FAILED + @patch("bilingualsub.api.pipeline.download_video") + async def test_run_download_no_audio_switches_to_visual_description( + self, mock_download + ) -> None: + """When video has no audio stream, auto-switch to visual description mode.""" + metadata = VideoMetadata( + title="Silent Video", + duration=60.0, + width=1920, + height=1080, + fps=30.0, + has_audio=False, + ) + mock_download.return_value = metadata + + job = _make_job() + assert job.processing_mode == ProcessingMode.SUBTITLE + + await run_download(job) + + assert job.processing_mode == ProcessingMode.VISUAL_DESCRIPTION + assert job.status == JobStatus.DOWNLOAD_COMPLETE + + @patch("bilingualsub.api.pipeline.extract_audio") + @patch("bilingualsub.api.pipeline.download_video") + async def test_run_download_with_audio_keeps_subtitle_mode( + self, mock_download, mock_extract_audio + ) -> None: + """When video has audio stream, processing mode stays as SUBTITLE.""" + metadata = VideoMetadata( + title="Normal Video", + duration=60.0, + width=1920, + height=1080, + fps=30.0, + has_audio=True, + ) + mock_download.return_value = metadata + + job = _make_job() + await run_download(job) + + assert job.processing_mode == ProcessingMode.SUBTITLE + assert job.status == JobStatus.DOWNLOAD_COMPLETE + mock_extract_audio.assert_called_once() + @pytest.mark.unit @pytest.mark.asyncio diff --git a/tests/unit/utils/test_ffmpeg.py b/tests/unit/utils/test_ffmpeg.py index e341d20..faa0e3e 100644 --- a/tests/unit/utils/test_ffmpeg.py +++ b/tests/unit/utils/test_ffmpeg.py @@ -9,6 +9,7 @@ FFmpegError, burn_subtitles, extract_audio, + extract_video_metadata, get_audio_duration, split_audio, trim_video, @@ -48,6 +49,7 @@ def mock_ffmpeg(self): "height": 1080, "fps": 30.0, "title": "test video", + "has_audio": True, } yield { @@ -804,3 +806,69 @@ def test_non_existent_file_raises_error(self, tmp_path): with pytest.raises(ValueError, match="Audio file does not exist"): split_audio(audio_path, output_dir=tmp_path) + + +def _ffprobe_json( + streams: list[dict], duration: float = 120.0, title: str = "test" +) -> str: + """Build a minimal ffprobe JSON output.""" + return json.dumps( + { + "streams": streams, + "format": {"duration": str(duration), "tags": {"title": title}}, + } + ) + + +_VIDEO_STREAM = { + "codec_type": "video", + "width": 1920, + "height": 1080, + "r_frame_rate": "30/1", +} +_AUDIO_STREAM = {"codec_type": "audio", "codec_name": "aac"} + + +@pytest.mark.unit +class TestExtractVideoMetadata: + """Test cases for extract_video_metadata has_audio detection.""" + + @patch("bilingualsub.utils.ffmpeg.subprocess.run") + def test_has_audio_true_when_audio_stream_present(self, mock_run, tmp_path): + """Given video with audio+video streams, has_audio is True.""" + mock_run.return_value = MagicMock( + stdout=_ffprobe_json([_VIDEO_STREAM, _AUDIO_STREAM]), + ) + + result = extract_video_metadata(tmp_path / "video.mp4") + + assert result["has_audio"] is True + + @patch("bilingualsub.utils.ffmpeg.subprocess.run") + def test_has_audio_false_when_no_audio_stream(self, mock_run, tmp_path): + """Given video with only video stream, has_audio is False.""" + mock_run.return_value = MagicMock( + stdout=_ffprobe_json([_VIDEO_STREAM]), + ) + + result = extract_video_metadata(tmp_path / "video.mp4") + + assert result["has_audio"] is False + + @patch("bilingualsub.utils.ffmpeg.subprocess.run") + def test_returns_standard_metadata_fields(self, mock_run, tmp_path): + """Given a normal video, all standard metadata fields are returned.""" + mock_run.return_value = MagicMock( + stdout=_ffprobe_json( + [_VIDEO_STREAM, _AUDIO_STREAM], duration=60.0, title="My Video" + ), + ) + + result = extract_video_metadata(tmp_path / "video.mp4") + + assert result["title"] == "My Video" + assert result["duration"] == 60.0 + assert result["width"] == 1920 + assert result["height"] == 1080 + assert result["fps"] == 30.0 + assert "has_audio" in result