From d27c07a8049dcd97568c93e6edd743edf6083ae3 Mon Sep 17 00:00:00 2001 From: unohee Date: Sun, 31 May 2026 23:26:38 +0900 Subject: [PATCH] =?UTF-8?q?feat(stream):=20DAW=20=EC=8B=A4=EC=8B=9C?= =?UTF-8?q?=EA=B0=84=20=EB=B8=94=EB=A1=9D=20=EC=B2=98=EB=A6=AC=20=EC=9E=AC?= =?UTF-8?q?=ED=98=84=20+=20=ED=94=8C=EB=9F=AC=EA=B7=B8=EC=9D=B8=20?= =?UTF-8?q?=ED=81=B4=EB=A6=AD/=EB=93=9C=EB=A1=AD=EC=95=84=EC=9B=83=20triag?= =?UTF-8?q?e?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 실제 DAW(Ableton 등)의 고정 블록 콜백 처리를 재현해, 오프라인 바운스와 블록 스트리밍이 어디서 갈리는지 노출하고 클릭을 자동 triage 한다. 핵심 발견(pedalboard 0.9.22 실측): reset=False 연속 호출은 오프라인 렌더와 비트 단위로 일치(-600dB)하지만, 기존 VST3PluginWrapper.process는 매 호출 reset(기본 True)이라 블록 처리 시 경계마다 클릭(-7.5dB)이 발생했다. 추가: - core/streaming.py: render_offline(기준)/render_streamed(연속 블록, reset_per_block·reset_first 옵션) + BlockTiming RT factor 측정 - core/discontinuity.py: 클릭 triage. 블록 경계 정렬 여부로 "스트리밍 상태 단절(=DAW 클릭)" vs "소스 클릭" 구분, NaN/Inf, offline 대비 null test - core/rt_bench.py: RT factor p50/p99/max, xrun, worst-case 동시 트랙 추정 - cli/stream.py: audioman stream {bench,triage,compare,play}. --json은 기존 finding.v1.json 스키마 그대로. builtin: 접두사로 VST3 없이 테스트 가능 - VST3PluginWrapper.process(reset=): reset 인자 추가(기본 True로 하위호환) - tests/unit/test_streaming.py (14): null test, 블록 경계 클릭 검출, RT factor 단조성/블록 크기 의존성 이 working tree에는 이전 세션의 미커밋 변경 'LLM-native Phase A'도 함께 포함된다(app.py가 두 작업에 걸쳐 분리 불가하여 동봉): - --plain 글로벌 출력 모드, i18n 인프라 삭제(src/audioman/i18n.py 제거) - Finding 스키마(core/findings.py) + detectors.py - audioman observe / changelog / schemas 명령, schemas/*.json 발행 - tests: test_plain_mode/test_findings/test_observe/test_changelog_cmd (29) --- CHANGELOG.md | 27 ++ README.md | 25 ++ src/audioman/__init__.py | 2 +- src/audioman/cli/analyze.py | 64 ++-- src/audioman/cli/app.py | 49 ++- src/audioman/cli/bounce.py | 17 +- src/audioman/cli/chain.py | 17 +- src/audioman/cli/changelog_cmd.py | 162 ++++++++++ src/audioman/cli/commit_cmd.py | 15 +- src/audioman/cli/doctor.py | 33 +-- src/audioman/cli/dump.py | 19 +- src/audioman/cli/edl.py | 37 ++- src/audioman/cli/eq_profile.py | 25 +- src/audioman/cli/fader_compare.py | 9 +- src/audioman/cli/fader_test.py | 9 +- src/audioman/cli/fx.py | 117 ++++---- src/audioman/cli/info.py | 5 +- src/audioman/cli/list_cmd.py | 7 +- src/audioman/cli/master.py | 49 ++- src/audioman/cli/mixdown.py | 29 +- src/audioman/cli/observe.py | 236 +++++++++++++++ src/audioman/cli/output.py | 77 ++++- src/audioman/cli/preset.py | 29 +- src/audioman/cli/process.py | 21 +- src/audioman/cli/scan.py | 7 +- src/audioman/cli/schemas_cmd.py | 92 ++++++ src/audioman/cli/screen.py | 11 +- src/audioman/cli/stream.py | 394 +++++++++++++++++++++++++ src/audioman/cli/visualize.py | 37 ++- src/audioman/core/detectors.py | 274 +++++++++++++++++ src/audioman/core/discontinuity.py | 235 +++++++++++++++ src/audioman/core/findings.py | 177 +++++++++++ src/audioman/core/rt_bench.py | 89 ++++++ src/audioman/core/streaming.py | 207 +++++++++++++ src/audioman/i18n.py | 184 ------------ src/audioman/plugins/vst3.py | 16 +- src/audioman/schemas/analyze.v1.json | 65 ++++ src/audioman/schemas/changelog.v1.json | 33 +++ src/audioman/schemas/finding.v1.json | 53 ++++ src/audioman/schemas/observe.v1.json | 48 +++ tests/unit/test_changelog_cmd.py | 77 +++++ tests/unit/test_findings.py | 160 ++++++++++ tests/unit/test_observe.py | 77 +++++ tests/unit/test_plain_mode.py | 45 +++ tests/unit/test_streaming.py | 179 +++++++++++ 45 files changed, 3067 insertions(+), 473 deletions(-) create mode 100644 src/audioman/cli/changelog_cmd.py create mode 100644 src/audioman/cli/observe.py create mode 100644 src/audioman/cli/schemas_cmd.py create mode 100644 src/audioman/cli/stream.py create mode 100644 src/audioman/core/detectors.py create mode 100644 src/audioman/core/discontinuity.py create mode 100644 src/audioman/core/findings.py create mode 100644 src/audioman/core/rt_bench.py create mode 100644 src/audioman/core/streaming.py delete mode 100644 src/audioman/i18n.py create mode 100644 src/audioman/schemas/analyze.v1.json create mode 100644 src/audioman/schemas/changelog.v1.json create mode 100644 src/audioman/schemas/finding.v1.json create mode 100644 src/audioman/schemas/observe.v1.json create mode 100644 tests/unit/test_changelog_cmd.py create mode 100644 tests/unit/test_findings.py create mode 100644 tests/unit/test_observe.py create mode 100644 tests/unit/test_plain_mode.py create mode 100644 tests/unit/test_streaming.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 89a7eee..514f48d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,33 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Added — LLM-native Phase A +- **--plain 글로벌 출력 모드**: rich color/markup/i18n을 모두 끄고 영어 ASCII 출력. `AUDIOMAN_PLAIN=1` 환경변수로도 활성화. `--help`/`print_table`이 grep/awk 친화 텍스트로 fallback. LLM agent 후기 #1 직접 대응. +- **Finding 스키마** (`audioman.core.findings`): signal / spectral / plugin / container 4개 카테고리 통합 결함 표현. `code` (안정 enum), `severity` (info/warn/critical), `where` (file/sample/sec/freq), `measurement`, `hint`, `fix_hint` 필드. JSONSchema 발행 (`audioman://schema/finding.v1.json`). +- **`audioman observe`**: 새 1급 명령 — `signal+spectral` 카테고리 fault detector(clipping, DC offset, channel imbalance, leading/trailing/inner silence, mains hum, HF noise floor)를 통합된 `finding[]` 배열로 emit. `--category`, `--severity`, `--recursive` 지원. JSON envelope에 `duration_sec`, `total_samples`, `sample_rate`, `channels` 항상 채워짐 (후기 #3 대응). +- **`audioman changelog`**: CHANGELOG.md 파서. `--since X.Y.Z` 필터, `--json` envelope. 후기 #5 대응. +- **`audioman schemas list|show`**: 발행된 JSONSchema 노출. LLM agent가 `audioman --json` 출력의 모양을 호출 전에 알 수 있다. +- **analyze --json 메타 보강**: `$schema`, `audioman_version`, `duration_sec`, `total_samples`, `findings[]` 필드 추가. 기존 `duration`, `frames` 필드는 호환을 위해 유지. +- **새 detector 모듈** (`audioman.core.detectors`): `detect_clipping`, `detect_dc_offset`, `detect_channel_imbalance`, `silence_to_findings`, `spectrum_to_findings`. 기존 `core/analysis.py` 출력을 그대로 받아 Finding으로 어댑팅. + +### Added — DAW 실시간 스트리밍 재현 / 플러그인 벤치마크·디버깅 +- **`audioman stream`**: 실제 DAW(Ableton 등)의 고정 블록 콜백 처리를 재현해 플러그인 클릭/드롭아웃을 triage 하는 1급 명령. 4개 서브커맨드: + - `bench`: 블록 크기(64/128/256/512/1024)별 실시간 CPU 부하 측정 — 블록당 처리시간 vs 실시간 마감(deadline) 비율(RT factor p50/p99/max), xrun 수, 추정 동시 트랙 수. + - `triage`: 블록 스트리밍 출력에서 클릭/불연속을 검출해 `finding[]`로 emit. 블록 경계 정렬 여부로 "스트리밍 상태 단절(=DAW 클릭)" vs "소스 콘텐츠 클릭"을 구분. offline 렌더 대비 null test 포함. + - `compare`: 여러 블록 크기 출력을 서로/오프라인과 null test 비교 — block-size 의존 버그 탐지. + - `play`: sounddevice로 플러그인 통과 신호 실시간 재생 + PortAudio underflow(실 xrun) 카운트. +- **`audioman.core.streaming`**: 블록 단위 결정적 처리 엔진. `render_offline`(whole-buffer ground truth) / `render_streamed`(연속 블록, `reset_per_block`·`reset_first` 옵션). pedalboard 실측 확인: `reset=False` 연속 호출은 오프라인 렌더와 비트 단위 일치(-600dB), 매 블록 reset 시 경계 클릭(-7.5dB). +- **`audioman.core.discontinuity`**: 클릭 triage 디텍터. `detect_discontinuities`(MAD-robust sample-diff spike + 블록 경계 정렬 분류), `detect_nonfinite`(NaN/Inf), `null_test`(PDC 보상 후 offline 대비 차이). 기존 `Finding`/`Code.CLICK_DENSITY`/`SAMPLE_DROPOUT`/`NONFINITE_SAMPLES` 스키마 재사용. +- **`audioman.core.rt_bench`**: `BlockTiming`→`RTBenchReport` 집계. worst-case(p99/max) 기반 동시 트랙 추정, 워밍업 블록 제외. +- **`VST3PluginWrapper.process(reset=)`**: reset 인자 추가 — 블록 스트리밍에서 `reset=False`로 내부 상태(필터 히스토리/lookahead) 연속 유지. 기본값 True로 기존 오프라인 동작 하위호환. + +### Removed +- **i18n 인프라 전면 삭제**: `src/audioman/i18n.py` 및 한국어 카탈로그 제거. 282개 `_("...")` 호출을 모두 평문 영어 문자열로 변환. `AUDIOMAN_LANG` 환경변수 미지원. CLI 출력 언어는 항상 영어로 통일 (`--plain` 플래그의 역할은 ANSI/Rich 끄기로 축소). + +### Tests +- `tests/unit/test_plain_mode.py` (3), `test_findings.py` (16), `test_observe.py` (5), `test_changelog_cmd.py` (5) — 신규 29개 추가. +- `tests/unit/test_streaming.py` (14) — streaming null test, 블록 경계 클릭 검출, RT factor 단조성/블록 크기 의존성. pedalboard 빌트인만 사용(VST3 불필요). + ## [0.2.0] - 2026-05-10 ### Added diff --git a/README.md b/README.md index aae923f..e3d0b5f 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,31 @@ audioman process ./input_dir/ -p dereverb -o ./output_dir/ -r # recursive | `doctor -p ` | Plugin analysis (freq response, THD, dynamics, waveshaper) | | `vo {analyze,process}` | Voiceover workflow (VAD + RX denoise + utterance LUFS leveling) | | `obs {probe,dry-run}` | OBS multitrack 영상 자동 진단 — track topology + voice/music classification + 처치 계획 (dry-run only) | +| `stream {bench,triage,compare,play}` | DAW 실시간 블록 처리 재현 — 플러그인 클릭/드롭아웃 triage + CPU 부하 벤치마크 | + +## Plugin Click / Dropout Triage (DAW streaming) + +실제 DAW(Ableton 등)에서 나는 클릭을 audioman이 재현하고 자동 진단한다. DAW는 오디오를 고정 블록(128/256/512 samples)으로 콜백 처리하며 블록 사이 플러그인 내부 상태를 연속 유지한다 — `stream`은 그 환경을 재현해 오프라인 바운스와 무엇이 다른지 노출한다. + +```bash +# 블록 크기별 실시간 CPU 부하 (RT factor, xrun, 동시 트랙 추정) +audioman stream bench mix.wav -p reverb --blocks 64,128,256,512,1024 + +# 클릭/불연속 triage — 블록 경계 정렬 = 스트리밍 버그, 비정렬 = 소스 클릭 +audioman stream triage mix.wav -p denoise --block-size 512 --json + +# 잘못된 호스트 동작(매 블록 reset) 시뮬레이션 — 클릭 강제 유발 +audioman stream triage mix.wav -p denoise --reset-per-block + +# 블록 크기 의존 버그: 출력이 블록 크기마다 다른지 null test +audioman stream compare mix.wav -p delay --blocks 128,256,512 + +# 실제 오디오 디바이스로 재생 + PortAudio underflow(실 xrun) 카운트 +audioman stream play mix.wav -p reverb --block-size 256 + +# VST3 없이 테스트: builtin: 접두사로 pedalboard 내장 이펙트 사용 +audioman stream triage sine -p builtin:reverb --reset-per-block +``` ## Batch Processing diff --git a/src/audioman/__init__.py b/src/audioman/__init__.py index da6c891..b66315f 100644 --- a/src/audioman/__init__.py +++ b/src/audioman/__init__.py @@ -1,4 +1,4 @@ # Created: 2026-03-21 # Purpose: audioman - Cross-platform CLI wrapper for VST3/AU audio plugins -__version__ = "0.1.0" +__version__ = "0.3.0-dev" diff --git a/src/audioman/cli/analyze.py b/src/audioman/cli/analyze.py index 8beb207..297de39 100644 --- a/src/audioman/cli/analyze.py +++ b/src/audioman/cli/analyze.py @@ -5,6 +5,7 @@ import json from pathlib import Path +from audioman import __version__ from audioman.cli.output import print_error, print_json, print_table, print_success, output_console from audioman.core.audio_file import read_audio, get_audio_stats from audioman.core.analysis import ( @@ -14,28 +15,32 @@ spectrum_diagnostics, ) from audioman.core.batch import collect_audio_files +from audioman.core.detectors import ( + detect_signal_findings, + spectrum_to_findings, + silence_to_findings, +) from audioman.core.waveform import render_waveform, render_envelope, render_spectral_envelope -from audioman.i18n import _ def add_parser(subparsers: argparse._SubParsersAction) -> None: - parser = subparsers.add_parser("analyze", help=_("Audio analysis (RMS, spectral entropy, silence detection, etc.)")) - parser.add_argument("input", help=_("Input audio file or directory")) - parser.add_argument("--frames", action="store_true", help=_("Per-frame detailed output")) - parser.add_argument("--frame-size", type=int, default=2048, help=_("Frame size (default: 2048)")) - parser.add_argument("--hop", type=int, default=512, help=_("Hop size (default: 512)")) - parser.add_argument("--silence-threshold", type=float, default=-40.0, help=_("Silence detection threshold dB (default: -40)")) - parser.add_argument("--waveform", "-w", action="store_true", help=_("Show ASCII waveform")) - parser.add_argument("--waveform-width", type=int, default=80, help=_("Waveform width (default: 80)")) - parser.add_argument("--waveform-height", type=int, default=16, help=_("Waveform height (default: 16)")) - parser.add_argument("--waveform-mode", choices=["rms", "peak"], default="peak", help=_("Waveform mode (default: peak)")) - parser.add_argument("--recursive", "-r", action="store_true", help=_("Include subdirectories (batch)")) + parser = subparsers.add_parser("analyze", help="Audio analysis (RMS, spectral entropy, silence detection, etc.)") + parser.add_argument("input", help="Input audio file or directory") + parser.add_argument("--frames", action="store_true", help="Per-frame detailed output") + parser.add_argument("--frame-size", type=int, default=2048, help="Frame size (default: 2048)") + parser.add_argument("--hop", type=int, default=512, help="Hop size (default: 512)") + parser.add_argument("--silence-threshold", type=float, default=-40.0, help="Silence detection threshold dB (default: -40)") + parser.add_argument("--waveform", "-w", action="store_true", help="Show ASCII waveform") + parser.add_argument("--waveform-width", type=int, default=80, help="Waveform width (default: 80)") + parser.add_argument("--waveform-height", type=int, default=16, help="Waveform height (default: 16)") + parser.add_argument("--waveform-mode", choices=["rms", "peak"], default="peak", help="Waveform mode (default: peak)") + parser.add_argument("--recursive", "-r", action="store_true", help="Include subdirectories (batch)") parser.add_argument("--spectrum", action="store_true", - help=_("Add long-term FFT diagnostics (band energy, dominant frequencies, hum, hf slope)")) + help="Add long-term FFT diagnostics (band energy, dominant frequencies, hum, hf slope)") parser.add_argument("--spectrum-fft", type=int, default=16384, - help=_("FFT size for spectrum diagnostics (default: 16384)")) + help="FFT size for spectrum diagnostics (default: 16384)") parser.add_argument("--spectrum-min-rms", type=float, default=0.01, - help=_("Skip frames below this RMS when averaging spectrum (default: 0.01)")) + help="Skip frames below this RMS when averaging spectrum (default: 0.01)") parser.set_defaults(func=run) @@ -45,17 +50,26 @@ def _analyze_file( ) -> dict: audio, sr = read_audio(path) stats = get_audio_stats(audio, sr) + audio_length = audio.shape[-1] if audio.ndim == 2 else audio.shape[0] metrics = compute_frame_metrics(audio, sr, frame_size=frame_size, hop_size=hop) summary = compute_summary(metrics) silence = detect_silence(audio, sr, threshold_db=silence_threshold) + # Findings: signal + (optional) spectral. LLM agent 후기 대응. + findings = detect_signal_findings(audio, sr, file=str(path)) + findings.extend(silence_to_findings(silence, audio_length, sr, file=str(path))) + + # LLM agent 후기 #3 대응: duration/total_samples를 명시적으로 보장. + # `frames`는 채널당 샘플 수, `total_samples`는 그 별칭(명시적 이름). result = { "file": str(path), "sample_rate": sr, "channels": stats.channels, "duration": round(stats.duration, 4), + "duration_sec": round(stats.duration, 6), "frames": stats.frames, + "total_samples": int(stats.frames), "rms": round(stats.rms, 6), "peak": round(stats.peak, 6), "summary": summary, @@ -64,9 +78,13 @@ def _analyze_file( } if spectrum: - result["spectrum"] = spectrum_diagnostics( + spec = spectrum_diagnostics( audio, sr, fft_size=spectrum_fft, min_rms=spectrum_min_rms ) + result["spectrum"] = spec + findings.extend(spectrum_to_findings(spec, file=str(path))) + + result["findings"] = [f.to_dict() for f in findings] if frames_mode: result["frame_metrics"] = { @@ -124,7 +142,12 @@ def _run_single(args: argparse.Namespace, path: Path) -> None: ) if args.json: - out = {"command": "analyze", **result} + out = { + "$schema": "audioman://schema/analyze.v1.json", + "audioman_version": __version__, + "command": "analyze", + **result, + } if waveform_text: out["ascii_waveform"] = waveform_text out["ascii_envelope"] = envelope_text @@ -206,7 +229,12 @@ def _run_batch(args: argparse.Namespace, input_dir: Path) -> None: spectrum_min_rms=args.spectrum_min_rms, ) if args.json: - print(json.dumps({"command": "analyze", **result}, ensure_ascii=False, default=str)) + print(json.dumps({ + "$schema": "audioman://schema/analyze.v1.json", + "audioman_version": __version__, + "command": "analyze", + **result, + }, ensure_ascii=False, default=str)) else: output_console.print( f" [{i+1}/{len(files)}] {fpath.name}: " diff --git a/src/audioman/cli/app.py b/src/audioman/cli/app.py index 1fed495..fb6b942 100644 --- a/src/audioman/cli/app.py +++ b/src/audioman/cli/app.py @@ -3,23 +3,46 @@ import argparse import logging +import os import sys -from audioman import __version__ -from audioman.cli import scan, list_cmd, info, process, chain, preset, dump, analyze, fx, visualize, doctor, eq_profile, bounce, commit_cmd, mixdown, edl as edl_cli, master as master_cli, fader_test as fader_test_cli, fader_compare as fader_compare_cli, voiceover as voiceover_cli, screen as screen_cli, obs as obs_cli -from audioman.i18n import _ + +def _early_plain_detect(argv: list[str] | None) -> bool: + """parse_args 전에 --plain / AUDIOMAN_PLAIN을 감지. + + i18n._detect_lang()이 import 시점에 호출될 수 있으므로 + env를 먼저 세팅해야 한국어 카탈로그가 활성화되지 않는다. + """ + args = list(argv) if argv is not None else sys.argv[1:] + if "--plain" in args: + os.environ["AUDIOMAN_PLAIN"] = "1" + return True + val = os.environ.get("AUDIOMAN_PLAIN", "").strip().lower() + return val in ("1", "true", "yes", "on") + + +_PLAIN_EARLY = _early_plain_detect(None) + +from audioman import __version__ # noqa: E402 +from audioman.cli import scan, list_cmd, info, process, chain, preset, dump, analyze, fx, visualize, doctor, eq_profile, bounce, commit_cmd, mixdown, edl as edl_cli, master as master_cli, fader_test as fader_test_cli, fader_compare as fader_compare_cli, voiceover as voiceover_cli, screen as screen_cli, obs as obs_cli, observe as observe_cli, changelog_cmd, schemas_cmd, stream as stream_cli # noqa: E402 +from audioman.cli.output import set_plain # noqa: E402 def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( prog="audioman", - description=_("Cross-platform CLI wrapper for VST3/AU audio plugins"), + description="Cross-platform CLI wrapper for VST3/AU audio plugins", ) parser.add_argument("--version", action="version", version=f"audioman {__version__}") - parser.add_argument("--json", action="store_true", help=_("JSON output mode")) - parser.add_argument("--verbose", "-v", action="store_true", help=_("Verbose logging")) + parser.add_argument("--json", action="store_true", help="JSON output mode") + parser.add_argument( + "--plain", + action="store_true", + help="LLM-friendly output: no color, no rich tables, English help (also via AUDIOMAN_PLAIN=1)", + ) + parser.add_argument("--verbose", "-v", action="store_true", help="Verbose logging") - subparsers = parser.add_subparsers(dest="command", help=_("Available commands")) + subparsers = parser.add_subparsers(dest="command", help="Available commands") scan.add_parser(subparsers) list_cmd.add_parser(subparsers) @@ -43,14 +66,26 @@ def build_parser() -> argparse.ArgumentParser: voiceover_cli.add_parser(subparsers) screen_cli.add_parser(subparsers) obs_cli.add_parser(subparsers) + observe_cli.add_parser(subparsers) + changelog_cmd.add_parser(subparsers) + schemas_cmd.add_parser(subparsers) + stream_cli.add_parser(subparsers) return parser def main(argv: list[str] | None = None) -> None: + # parse 전에 --plain 재감지 (argv가 명시적으로 전달된 경우) + plain = _early_plain_detect(argv) or _PLAIN_EARLY + if plain: + set_plain(True) + parser = build_parser() args = parser.parse_args(argv) + if getattr(args, "plain", False): + set_plain(True) + if args.verbose: logging.basicConfig(level=logging.DEBUG, format="%(name)s: %(message)s") diff --git a/src/audioman/cli/bounce.py b/src/audioman/cli/bounce.py index 33def53..5c94e6b 100644 --- a/src/audioman/cli/bounce.py +++ b/src/audioman/cli/bounce.py @@ -4,27 +4,26 @@ import argparse from audioman.cli.output import print_error, print_json, print_success, print_warning, output_console -from audioman.i18n import _ def add_parser(subparsers: argparse._SubParsersAction) -> None: - parser = subparsers.add_parser("bounce", help=_("Bounce multiple tracks into a single stereo file")) - parser.add_argument("inputs", nargs="*", help=_("Input audio files")) - parser.add_argument("--output", "-o", required=True, help=_("Output file path")) + parser = subparsers.add_parser("bounce", help="Bounce multiple tracks into a single stereo file") + parser.add_argument("inputs", nargs="*", help="Input audio files") + parser.add_argument("--output", "-o", required=True, help="Output file path") parser.add_argument( "--gain", default="", - help=_("Comma-separated gain values in dB per track (e.g. '0,-3,-6')"), + help="Comma-separated gain values in dB per track (e.g. '0,-3,-6')", ) parser.add_argument( "--pan", default="", - help=_("Comma-separated pan values per track (-1.0 L ~ 0.0 C ~ 1.0 R, e.g. '0,-0.5,0.5')"), + help="Comma-separated pan values per track (-1.0 L ~ 0.0 C ~ 1.0 R, e.g. '0,-0.5,0.5')", ) parser.add_argument( "--chain", default="", - help=_("Per-track plugin chains separated by '|' (e.g. 'denoise|limiter:threshold=-1|')"), + help="Per-track plugin chains separated by '|' (e.g. 'denoise|limiter:threshold=-1|')", ) - parser.add_argument("--session", help=_("Session file (YAML/JSON) — overrides other track options")) - parser.add_argument("--dry-run", action="store_true", help=_("Show plan without executing")) + parser.add_argument("--session", help="Session file (YAML/JSON) — overrides other track options") + parser.add_argument("--dry-run", action="store_true", help="Show plan without executing") parser.set_defaults(func=run) diff --git a/src/audioman/cli/chain.py b/src/audioman/cli/chain.py index ecaf81e..2d23670 100644 --- a/src/audioman/cli/chain.py +++ b/src/audioman/cli/chain.py @@ -7,22 +7,21 @@ from audioman.cli.output import print_error, print_json, print_success, print_warning, output_console from audioman.core.pipeline import parse_chain_string, run_pipeline from audioman.core.batch import collect_audio_files, resolve_output_path -from audioman.i18n import _ from pathlib import Path def add_parser(subparsers: argparse._SubParsersAction) -> None: - parser = subparsers.add_parser("chain", help=_("Process audio through multiple plugins sequentially")) - parser.add_argument("input", help=_("Input audio file or directory")) + parser = subparsers.add_parser("chain", help="Process audio through multiple plugins sequentially") + parser.add_argument("input", help="Input audio file or directory") parser.add_argument( "--steps", "-s", required=True, - help=_("Processing chain (e.g. 'dehum:notch_frequency=60,declick,denoise:noise_reduction_db=15')"), + help="Processing chain (e.g. 'dehum:notch_frequency=60,declick,denoise:noise_reduction_db=15')", ) - parser.add_argument("--output", "-o", required=True, help=_("Output file or directory")) - parser.add_argument("--recursive", "-r", action="store_true", help=_("Include subdirectories (batch)")) - parser.add_argument("--suffix", default="", help=_("Output filename suffix (batch)")) - parser.add_argument("--dry-run", action="store_true", help=_("Show plan without executing")) - parser.add_argument("--workers", "-w", type=int, default=1, help=_("Number of parallel workers (default: 1)")) + parser.add_argument("--output", "-o", required=True, help="Output file or directory") + parser.add_argument("--recursive", "-r", action="store_true", help="Include subdirectories (batch)") + parser.add_argument("--suffix", default="", help="Output filename suffix (batch)") + parser.add_argument("--dry-run", action="store_true", help="Show plan without executing") + parser.add_argument("--workers", "-w", type=int, default=1, help="Number of parallel workers (default: 1)") parser.set_defaults(func=run) diff --git a/src/audioman/cli/changelog_cmd.py b/src/audioman/cli/changelog_cmd.py new file mode 100644 index 0000000..129c3ad --- /dev/null +++ b/src/audioman/cli/changelog_cmd.py @@ -0,0 +1,162 @@ +# Created: 2026-05-11 +# Purpose: `audioman changelog` — LLM agent에게 변경 이력을 노출. +# 후기 #5 대응: --version만 있고 어떤 인자가 언제 들어왔는지 추적 불가. +# CHANGELOG.md(Keep a Changelog 형식)를 파싱해 plain text 또는 JSON으로. + +from __future__ import annotations + +import argparse +import json +import re +import sys +from pathlib import Path +from typing import Optional + +from audioman import __version__ + + +_HEADER_RE = re.compile(r"^##\s*\[(?P[^\]]+)\](?:\s*-\s*(?P\S+))?\s*$") +_SECTION_RE = re.compile(r"^###\s+(?P.+?)\s*$") + + +def _find_changelog() -> Optional[Path]: + """패키지 설치 위치 → repo root → cwd 순으로 CHANGELOG.md를 찾는다.""" + here = Path(__file__).resolve() + candidates = [ + here.parent.parent.parent.parent / "CHANGELOG.md", # src/audioman/cli → repo + here.parent.parent.parent / "CHANGELOG.md", + Path.cwd() / "CHANGELOG.md", + ] + for c in candidates: + if c.is_file(): + return c + return None + + +def parse_changelog(text: str) -> list[dict]: + """Keep-a-Changelog 형식의 텍스트를 entries[]로 파싱.""" + entries: list[dict] = [] + current: Optional[dict] = None + current_section: Optional[str] = None + + for line in text.splitlines(): + m = _HEADER_RE.match(line) + if m: + current = { + "version": m.group("version"), + "date": m.group("date"), + "sections": {}, + } + entries.append(current) + current_section = None + continue + + if current is None: + continue + + ms = _SECTION_RE.match(line) + if ms: + current_section = ms.group("name").strip().lower() + current["sections"].setdefault(current_section, []) + continue + + if current_section is not None: + stripped = line.strip() + if stripped.startswith("- "): + current["sections"][current_section].append(stripped[2:].strip()) + elif stripped and current["sections"][current_section]: + # 들여쓰기 줄 = 직전 bullet 연속 + current["sections"][current_section][-1] += " " + stripped + + return entries + + +def _version_tuple(v: str) -> tuple: + """`0.2.0`, `unreleased` 같은 값을 비교 가능한 튜플로.""" + if v.lower() == "unreleased": + return (1 << 30,) # 항상 최신 + parts = [] + for p in re.split(r"[.\-+]", v): + if p.isdigit(): + parts.append(int(p)) + else: + parts.append(p) + return tuple(parts) + + +def filter_since(entries: list[dict], since: str) -> list[dict]: + since_t = _version_tuple(since) + out = [] + for e in entries: + try: + v_t = _version_tuple(e["version"]) + except Exception: + continue + if v_t > since_t: + out.append(e) + return out + + +def add_parser(subparsers: argparse._SubParsersAction) -> None: + parser = subparsers.add_parser( + "changelog", + help="Show audioman changelog (LLM-friendly, parses CHANGELOG.md)", + description=( + "Surface the project CHANGELOG so LLM agents can tell which flags / " + "commands exist in which version. Use --since X.Y.Z to filter." + ), + ) + parser.add_argument("--since", default=None, help="Only show entries newer than this version") + parser.add_argument("--path", default=None, help="Explicit path to CHANGELOG.md") + parser.set_defaults(func=run) + + +def run(args: argparse.Namespace) -> None: + path: Optional[Path] + if args.path: + path = Path(args.path) + else: + path = _find_changelog() + + if path is None or not path.is_file(): + msg = "CHANGELOG.md not found" + if args.json: + print(json.dumps({ + "$schema": "audioman://schema/changelog.v1.json", + "audioman_version": __version__, + "command": "changelog", + "error": msg, + "entries": [], + })) + sys.exit(1) + print(f"error: {msg}", file=sys.stderr) + sys.exit(1) + + text = path.read_text(encoding="utf-8") + entries = parse_changelog(text) + if args.since: + entries = filter_since(entries, args.since) + + if args.json: + print(json.dumps({ + "$schema": "audioman://schema/changelog.v1.json", + "audioman_version": __version__, + "command": "changelog", + "source": str(path), + "entries": entries, + }, indent=2, ensure_ascii=False)) + return + + # plain text 출력 (rich 미사용 — LLM grep 친화) + for e in entries: + header = f"## [{e['version']}]" + if e.get("date"): + header += f" - {e['date']}" + print(header) + for section, bullets in e["sections"].items(): + if not bullets: + continue + print(f"### {section}") + for b in bullets: + print(f"- {b}") + print() diff --git a/src/audioman/cli/commit_cmd.py b/src/audioman/cli/commit_cmd.py index 7dd2662..de66878 100644 --- a/src/audioman/cli/commit_cmd.py +++ b/src/audioman/cli/commit_cmd.py @@ -4,26 +4,25 @@ import argparse from audioman.cli.output import print_error, print_json, print_success, print_warning, output_console -from audioman.i18n import _ def add_parser(subparsers: argparse._SubParsersAction) -> None: - parser = subparsers.add_parser("commit", help=_("Commit plugin chain to audio with auto delay compensation")) - parser.add_argument("input", help=_("Input audio file")) - parser.add_argument("--output", "-o", required=True, help=_("Output file path")) + parser = subparsers.add_parser("commit", help="Commit plugin chain to audio with auto delay compensation") + parser.add_argument("input", help="Input audio file") + parser.add_argument("--output", "-o", required=True, help="Output file path") parser.add_argument( "--chain", "-s", required=True, - help=_("Plugin chain (e.g. 'denoise:threshold=-20,dehum:freq=60')"), + help="Plugin chain (e.g. 'denoise:threshold=-20,dehum:freq=60')", ) parser.add_argument( "--no-compensation", action="store_true", - help=_("Disable auto delay compensation"), + help="Disable auto delay compensation", ) parser.add_argument( "--no-tail-trim", action="store_true", - help=_("Keep plugin tail (don't trim to original length)"), + help="Keep plugin tail (don't trim to original length)", ) - parser.add_argument("--dry-run", action="store_true", help=_("Measure latency only (no processing)")) + parser.add_argument("--dry-run", action="store_true", help="Measure latency only (no processing)") parser.set_defaults(func=run) diff --git a/src/audioman/cli/doctor.py b/src/audioman/cli/doctor.py index 334a6c2..45e4f5f 100644 --- a/src/audioman/cli/doctor.py +++ b/src/audioman/cli/doctor.py @@ -5,16 +5,15 @@ import json from audioman.cli.output import print_error, print_json, print_success, print_info, output_console -from audioman.i18n import _ def add_parser(subparsers: argparse._SubParsersAction) -> None: parser = subparsers.add_parser( "doctor", - help=_("Plugin analysis — frequency response, THD, dynamics, waveshaper, performance"), + help="Plugin analysis — frequency response, THD, dynamics, waveshaper, performance", ) - parser.add_argument("--plugin", "-p", required=True, help=_("Plugin name or path")) - parser.add_argument("--param", action="append", default=[], help=_("Parameter (key=value)")) + parser.add_argument("--plugin", "-p", required=True, help="Plugin name or path") + parser.add_argument("--param", action="append", default=[], help="Parameter (key=value)") # 분석 모드 parser.add_argument( @@ -22,37 +21,37 @@ def add_parser(subparsers: argparse._SubParsersAction) -> None: choices=["linear", "thd", "imd", "sweep", "dynamics", "attack-release", "waveshaper", "performance", "all"], default="all", - help=_("Analysis mode (default: all)"), + help="Analysis mode (default: all)", ) # 옵션 - parser.add_argument("--frequency", "-f", type=float, default=1000.0, help=_("Test frequency Hz")) - parser.add_argument("--level", type=float, default=-6.0, help=_("Input level dB")) + parser.add_argument("--frequency", "-f", type=float, default=1000.0, help="Test frequency Hz") + parser.add_argument("--level", type=float, default=-6.0, help="Input level dB") parser.add_argument("--sample-rate", "-sr", type=int, default=44100) parser.add_argument("--fft-size", type=int, default=16384) - parser.add_argument("--mid-side", action="store_true", help=_("M/S mode")) + parser.add_argument("--mid-side", action="store_true", help="M/S mode") # 비교 모드 - parser.add_argument("--compare", metavar="PLUGIN2", help=_("Compare with second plugin")) - parser.add_argument("--compare-param", action="append", default=[], help=_("Second plugin parameters")) + parser.add_argument("--compare", metavar="PLUGIN2", help="Compare with second plugin") + parser.add_argument("--compare-param", action="append", default=[], help="Second plugin parameters") # CLAP 임베딩 - parser.add_argument("--clap", action="store_true", help=_("CLAP embedding profiling (per-parameter saturation fingerprint)")) + parser.add_argument("--clap", action="store_true", help="CLAP embedding profiling (per-parameter saturation fingerprint)") parser.add_argument("--clap-sweep", metavar="PARAM=v1,v2,...", action="append", default=[], - help=_("CLAP sweep parameters (e.g. --clap-sweep drive=0,25,50,75,100)")) - parser.add_argument("--clap-output", metavar="NPY", help=_("CLAP embedding npy save path")) + help="CLAP sweep parameters (e.g. --clap-sweep drive=0,25,50,75,100)") + parser.add_argument("--clap-output", metavar="NPY", help="CLAP embedding npy save path") # waveshaper v2 옵션 parser.add_argument("--legacy-waveshaper", action="store_true", - help=_("Use legacy waveshaper (single level, single cycle)")) + help="Use legacy waveshaper (single level, single cycle)") parser.add_argument("--ws-levels", metavar="dB", type=float, nargs="+", default=None, - help=_("Waveshaper v2 measurement levels in dBFS (default: -24 -18 -12 -6 -3 -1 0)")) + help="Waveshaper v2 measurement levels in dBFS (default: -24 -18 -12 -6 -3 -1 0)") parser.add_argument("--ws-points", type=int, default=256, - help=_("Waveshaper v2 resampling points (default: 256)")) + help="Waveshaper v2 resampling points (default: 256)") # 출력 - parser.add_argument("--output", "-o", metavar="FILE", help=_("Save result JSON file")) + parser.add_argument("--output", "-o", metavar="FILE", help="Save result JSON file") parser.set_defaults(func=run) diff --git a/src/audioman/cli/dump.py b/src/audioman/cli/dump.py index 5cf9461..1042cf8 100644 --- a/src/audioman/cli/dump.py +++ b/src/audioman/cli/dump.py @@ -8,25 +8,24 @@ from audioman.cli.output import print_error, print_json, print_warning, output_console from audioman.core.registry import get_registry from audioman.core.engine import parse_params -from audioman.i18n import _ from audioman.plugins.vst3 import VST3PluginWrapper def add_parser(subparsers: argparse._SubParsersAction) -> None: parser = subparsers.add_parser( "dump", - help=_("Dump plugin parameter state to JSON/JSONL"), + help="Dump plugin parameter state to JSON/JSONL", ) # 단일 모드: 플러그인 이름 지정 - parser.add_argument("plugin", nargs="?", default=None, help=_("Plugin name (omit for --all)")) - parser.add_argument("--param", action="append", default=[], help=_("Set parameter before dump (key=value)")) - parser.add_argument("--preset", help=_("Preset name (apply before dump)")) - parser.add_argument("--save-preset", metavar="NAME", help=_("Save dump as preset")) + parser.add_argument("plugin", nargs="?", default=None, help="Plugin name (omit for --all)") + parser.add_argument("--param", action="append", default=[], help="Set parameter before dump (key=value)") + parser.add_argument("--preset", help="Preset name (apply before dump)") + parser.add_argument("--save-preset", metavar="NAME", help="Save dump as preset") # 배치 모드 - parser.add_argument("--all", action="store_true", help=_("Dump all plugins as JSONL")) - parser.add_argument("--filter", metavar="KEYWORD", help=_("Plugin name filter (with --all)")) - parser.add_argument("--format-filter", choices=["vst3", "au"], help=_("Format filter (with --all)")) - parser.add_argument("--output-file", "-o", metavar="PATH", help=_("JSONL output file (default: stdout)")) + parser.add_argument("--all", action="store_true", help="Dump all plugins as JSONL") + parser.add_argument("--filter", metavar="KEYWORD", help="Plugin name filter (with --all)") + parser.add_argument("--format-filter", choices=["vst3", "au"], help="Format filter (with --all)") + parser.add_argument("--output-file", "-o", metavar="PATH", help="JSONL output file (default: stdout)") parser.set_defaults(func=run) diff --git a/src/audioman/cli/edl.py b/src/audioman/cli/edl.py index 4b08dbb..0882170 100644 --- a/src/audioman/cli/edl.py +++ b/src/audioman/cli/edl.py @@ -16,64 +16,63 @@ print_table, ) from audioman.core import edl as edl_core -from audioman.i18n import _ def add_parser(subparsers: argparse._SubParsersAction) -> None: parser = subparsers.add_parser( - "edl", help=_("Non-destructive edit workflow (EDL)") + "edl", help="Non-destructive edit workflow (EDL)" ) - sub = parser.add_subparsers(dest="action", help=_("EDL action")) + sub = parser.add_subparsers(dest="action", help="EDL action") # init - p_init = sub.add_parser("init", help=_("Initialize EDL workspace for an input file")) - p_init.add_argument("input", help=_("Source audio file")) + p_init = sub.add_parser("init", help="Initialize EDL workspace for an input file") + p_init.add_argument("input", help="Source audio file") p_init.set_defaults(func=run_init) # add - p_add = sub.add_parser("add", help=_("Append an op to the active EDL")) - p_add.add_argument("--source", "-s", required=True, help=_("Source audio file")) - p_add.add_argument("op_type", help=_( + p_add = sub.add_parser("add", help="Append an op to the active EDL") + p_add.add_argument("--source", "-s", required=True, help="Source audio file") + p_add.add_argument("op_type", help= "Op type (cut_region, trim, trim_silence, splice, fade_in, fade_out, " "normalize, gain, gate, process, chain)" - )) + ) p_add.add_argument( "--param", "-p", action="append", default=[], - help=_("Op parameter as key=value (repeat). Numbers/bools auto-detected; " - "use json: for arrays/objects."), + help="Op parameter as key=value (repeat). Numbers/bools auto-detected; " + "use json: for arrays/objects.", ) p_add.set_defaults(func=run_add) # list (= show ops) - p_list = sub.add_parser("list", help=_("Show all ops in the active EDL")) + p_list = sub.add_parser("list", help="Show all ops in the active EDL") p_list.add_argument("--source", "-s", required=True) p_list.set_defaults(func=run_list) # undo - p_undo = sub.add_parser("undo", help=_("Undo the most recent op")) + p_undo = sub.add_parser("undo", help="Undo the most recent op") p_undo.add_argument("--source", "-s", required=True) p_undo.set_defaults(func=run_undo) # redo - p_redo = sub.add_parser("redo", help=_("Redo the most recently undone op")) + p_redo = sub.add_parser("redo", help="Redo the most recently undone op") p_redo.add_argument("--source", "-s", required=True) p_redo.set_defaults(func=run_redo) # render - p_render = sub.add_parser("render", help=_("Render the EDL to a final output file")) + p_render = sub.add_parser("render", help="Render the EDL to a final output file") p_render.add_argument("--source", "-s", required=True) - p_render.add_argument("--output", "-o", required=True, help=_("Output WAV path")) + p_render.add_argument("--output", "-o", required=True, help="Output WAV path") p_render.add_argument("--no-verify", action="store_true", - help=_("Skip source SHA-256 verification")) + help="Skip source SHA-256 verification") p_render.set_defaults(func=run_render) # status (= 워크스페이스 상태) - p_status = sub.add_parser("status", help=_("Show workspace status")) + p_status = sub.add_parser("status", help="Show workspace status") p_status.add_argument("--source", "-s", required=True) p_status.set_defaults(func=run_status) # clear - p_clear = sub.add_parser("clear", help=_("Remove all ops from active EDL (history kept)")) + p_clear = sub.add_parser("clear", help="Remove all ops from active EDL (history kept)") p_clear.add_argument("--source", "-s", required=True) p_clear.set_defaults(func=run_clear) diff --git a/src/audioman/cli/eq_profile.py b/src/audioman/cli/eq_profile.py index 7271306..1ab7c58 100644 --- a/src/audioman/cli/eq_profile.py +++ b/src/audioman/cli/eq_profile.py @@ -7,57 +7,56 @@ import numpy as np from audioman.cli.output import print_error, print_json, print_success, output_console -from audioman.i18n import _ def add_parser(subparsers: argparse._SubParsersAction) -> None: parser = subparsers.add_parser( "eq-profile", - help=_("EQ plugin profiling — frequency response, phase, group delay, nonlinearity"), + help="EQ plugin profiling — frequency response, phase, group delay, nonlinearity", ) - parser.add_argument("--plugin", "-p", required=True, help=_("Plugin name or path")) - parser.add_argument("--param", action="append", default=[], help=_("EQ parameter (key=value)")) + parser.add_argument("--plugin", "-p", required=True, help="Plugin name or path") + parser.add_argument("--param", action="append", default=[], help="EQ parameter (key=value)") parser.add_argument("--bypass-param", action="append", default=[], - help=_("Bypass state parameter (key=value)")) + help="Bypass state parameter (key=value)") # 분석 모드 parser.add_argument( "--mode", "-m", choices=["response", "sweep", "nonlinear", "all"], default="all", - help=_("Analysis mode (default: all)"), + help="Analysis mode (default: all)", ) # 스윕 파라미터 parser.add_argument( "--sweep-param", action="append", default=[], metavar="NAME=v1,v2,...", - help=_("Parameter sweep (e.g. --sweep-param band1_gain=-12,-6,0,6,12)"), + help="Parameter sweep (e.g. --sweep-param band1_gain=-12,-6,0,6,12)", ) parser.add_argument( "--sweep-fixed", action="append", default=[], metavar="KEY=VALUE", - help=_("Fixed parameters during sweep (e.g. --sweep-fixed band1_freq=1000)"), + help="Fixed parameters during sweep (e.g. --sweep-fixed band1_freq=1000)", ) # 비선형성 레벨 parser.add_argument( "--levels", type=float, nargs="+", default=None, - help=_("Input levels for nonlinearity test (dBFS, default: -36 -24 -18 -12 -6 -3 0)"), + help="Input levels for nonlinearity test (dBFS, default: -36 -24 -18 -12 -6 -3 0)", ) # 공통 옵션 parser.add_argument("--sample-rate", "-sr", type=int, default=44100) parser.add_argument("--fft-size", type=int, default=32768) - parser.add_argument("--level", type=float, default=-12.0, help=_("Input level dB")) + parser.add_argument("--level", type=float, default=-12.0, help="Input level dB") parser.add_argument("--sweep-duration", type=float, default=6.0, - help=_("Log sweep duration in seconds")) + help="Log sweep duration in seconds") # 출력 - parser.add_argument("--output", "-o", metavar="FILE", help=_("Save result JSON file")) + parser.add_argument("--output", "-o", metavar="FILE", help="Save result JSON file") parser.add_argument("--save-npy", metavar="DIR", - help=_("Save frequency/phase/delay curves as .npy files")) + help="Save frequency/phase/delay curves as .npy files") parser.set_defaults(func=run) diff --git a/src/audioman/cli/fader_compare.py b/src/audioman/cli/fader_compare.py index 16c0cc7..e71145f 100644 --- a/src/audioman/cli/fader_compare.py +++ b/src/audioman/cli/fader_compare.py @@ -14,22 +14,21 @@ print_json, print_table, ) -from audioman.i18n import _ def add_parser(subparsers: argparse._SubParsersAction) -> None: parser = subparsers.add_parser( "fader-compare", - help=_("Compare automix recommendations against a fader-test ground truth"), + help="Compare automix recommendations against a fader-test ground truth", ) - parser.add_argument("ground_truth", help=_("fader-test gains JSON (ground truth)")) + parser.add_argument("ground_truth", help="fader-test gains JSON (ground truth)") parser.add_argument( "--target", default="archive_techno_standard", - help=_("Automix target profile (default: archive_techno_standard)"), + help="Automix target profile (default: archive_techno_standard)", ) parser.add_argument( "--reference", default=None, - help=_("Reference WAV (used when --target reference)"), + help="Reference WAV (used when --target reference)", ) parser.set_defaults(func=run) diff --git a/src/audioman/cli/fader_test.py b/src/audioman/cli/fader_test.py index e4f196c..0a443fc 100644 --- a/src/audioman/cli/fader_test.py +++ b/src/audioman/cli/fader_test.py @@ -11,7 +11,6 @@ from pathlib import Path from audioman.cli.output import print_error, print_success -from audioman.i18n import _ logger = logging.getLogger(__name__) @@ -19,12 +18,12 @@ def add_parser(subparsers: argparse._SubParsersAction) -> None: parser = subparsers.add_parser( "fader-test", - help=_("Open a multitrack mixer GUI to set per-track gain balance (export as ground truth JSON)"), + help="Open a multitrack mixer GUI to set per-track gain balance (export as ground truth JSON)", ) - parser.add_argument("input", help=_("Stem directory (folder of .wav files)")) - parser.add_argument("--load", help=_("Load gains JSON at startup")) + parser.add_argument("input", help="Stem directory (folder of .wav files)") + parser.add_argument("--load", help="Load gains JSON at startup") parser.add_argument("--block-size", type=int, default=1024, - help=_("Audio block size (default: 1024)")) + help="Audio block size (default: 1024)") parser.set_defaults(func=run) diff --git a/src/audioman/cli/fx.py b/src/audioman/cli/fx.py index d8b6209..c75fed3 100644 --- a/src/audioman/cli/fx.py +++ b/src/audioman/cli/fx.py @@ -12,113 +12,112 @@ from audioman.core.audio_file import read_audio, write_audio, get_audio_stats from audioman.core.batch import collect_audio_files, resolve_output_path from audioman.core import dsp -from audioman.i18n import _ def add_parser(subparsers: argparse._SubParsersAction) -> None: - parser = subparsers.add_parser("fx", help=_("Built-in DSP effects (fade, trim, cut, splice, normalize, gate, gain)")) - parser.add_argument("input", help=_("Input audio file or directory")) + parser = subparsers.add_parser("fx", help="Built-in DSP effects (fade, trim, cut, splice, normalize, gate, gain)") + parser.add_argument("input", help="Input audio file or directory") - fx_sub = parser.add_subparsers(dest="effect", help=_("Effect type")) + fx_sub = parser.add_subparsers(dest="effect", help="Effect type") - fade_curve_help = _("Fade curve: linear/cosine/equal_power/exponential/logarithmic (default: linear)") + fade_curve_help = "Fade curve: linear/cosine/equal_power/exponential/logarithmic (default: linear)" # fade-in - fi = fx_sub.add_parser("fade-in", help=_("Fade in (linear or curved)")) - fi.add_argument("--samples", type=int, default=None, help=_("Fade length (samples)")) - fi.add_argument("--duration", type=float, default=None, help=_("Fade length (seconds)")) + fi = fx_sub.add_parser("fade-in", help="Fade in (linear or curved)") + fi.add_argument("--samples", type=int, default=None, help="Fade length (samples)") + fi.add_argument("--duration", type=float, default=None, help="Fade length (seconds)") fi.add_argument("--curve", choices=list(dsp.FADE_CURVES), default="linear", help=fade_curve_help) - fi.add_argument("--output", "-o", required=True, help=_("Output path")) + fi.add_argument("--output", "-o", required=True, help="Output path") fi.add_argument("--recursive", "-r", action="store_true") fi.add_argument("--suffix", default="") # fade-out - fo = fx_sub.add_parser("fade-out", help=_("Fade out (linear or curved)")) - fo.add_argument("--samples", type=int, default=None, help=_("Fade length (samples)")) - fo.add_argument("--duration", type=float, default=None, help=_("Fade length (seconds)")) + fo = fx_sub.add_parser("fade-out", help="Fade out (linear or curved)") + fo.add_argument("--samples", type=int, default=None, help="Fade length (samples)") + fo.add_argument("--duration", type=float, default=None, help="Fade length (seconds)") fo.add_argument("--curve", choices=list(dsp.FADE_CURVES), default="linear", help=fade_curve_help) - fo.add_argument("--output", "-o", required=True, help=_("Output path")) + fo.add_argument("--output", "-o", required=True, help="Output path") fo.add_argument("--recursive", "-r", action="store_true") fo.add_argument("--suffix", default="") # pad (헤드/테일 무음 추가) - pd = fx_sub.add_parser("pad", help=_("Prepend/append silence (mastering delivery prep)")) - pd.add_argument("--head-ms", type=float, default=0.0, help=_("Head silence (ms)")) - pd.add_argument("--head-sec", type=float, default=None, help=_("Head silence (seconds, overrides --head-ms)")) - pd.add_argument("--tail-ms", type=float, default=0.0, help=_("Tail silence (ms)")) - pd.add_argument("--tail-sec", type=float, default=None, help=_("Tail silence (seconds, overrides --tail-ms)")) - pd.add_argument("--output", "-o", required=True, help=_("Output path")) + pd = fx_sub.add_parser("pad", help="Prepend/append silence (mastering delivery prep)") + pd.add_argument("--head-ms", type=float, default=0.0, help="Head silence (ms)") + pd.add_argument("--head-sec", type=float, default=None, help="Head silence (seconds, overrides --head-ms)") + pd.add_argument("--tail-ms", type=float, default=0.0, help="Tail silence (ms)") + pd.add_argument("--tail-sec", type=float, default=None, help="Tail silence (seconds, overrides --tail-ms)") + pd.add_argument("--output", "-o", required=True, help="Output path") pd.add_argument("--recursive", "-r", action="store_true") pd.add_argument("--suffix", default="") # remove-dc - dc = fx_sub.add_parser("remove-dc", help=_("Remove DC offset (subtract per-channel mean)")) - dc.add_argument("--output", "-o", required=True, help=_("Output path")) + dc = fx_sub.add_parser("remove-dc", help="Remove DC offset (subtract per-channel mean)") + dc.add_argument("--output", "-o", required=True, help="Output path") dc.add_argument("--recursive", "-r", action="store_true") dc.add_argument("--suffix", default="") # trim - tr = fx_sub.add_parser("trim", help=_("Trim by samples/time")) - tr.add_argument("--start", type=int, default=0, help=_("Start sample")) - tr.add_argument("--end", type=int, default=None, help=_("End sample")) - tr.add_argument("--start-sec", type=float, default=None, help=_("Start (seconds)")) - tr.add_argument("--end-sec", type=float, default=None, help=_("End (seconds)")) - tr.add_argument("--output", "-o", required=True, help=_("Output path")) + tr = fx_sub.add_parser("trim", help="Trim by samples/time") + tr.add_argument("--start", type=int, default=0, help="Start sample") + tr.add_argument("--end", type=int, default=None, help="End sample") + tr.add_argument("--start-sec", type=float, default=None, help="Start (seconds)") + tr.add_argument("--end-sec", type=float, default=None, help="End (seconds)") + tr.add_argument("--output", "-o", required=True, help="Output path") tr.add_argument("--recursive", "-r", action="store_true") tr.add_argument("--suffix", default="") # cut-region (중간 구간 삭제) - cr = fx_sub.add_parser("cut-region", help=_("Delete a middle region and join the remainder")) - cr.add_argument("--start", type=int, default=None, help=_("Region start sample")) - cr.add_argument("--end", type=int, default=None, help=_("Region end sample")) - cr.add_argument("--start-sec", type=float, default=None, help=_("Region start (seconds)")) - cr.add_argument("--end-sec", type=float, default=None, help=_("Region end (seconds)")) - cr.add_argument("--crossfade", type=int, default=0, help=_("Crossfade samples at the join (default: 0)")) - cr.add_argument("--crossfade-ms", type=float, default=None, help=_("Crossfade length in milliseconds")) - cr.add_argument("--output", "-o", required=True, help=_("Output path")) + cr = fx_sub.add_parser("cut-region", help="Delete a middle region and join the remainder") + cr.add_argument("--start", type=int, default=None, help="Region start sample") + cr.add_argument("--end", type=int, default=None, help="Region end sample") + cr.add_argument("--start-sec", type=float, default=None, help="Region start (seconds)") + cr.add_argument("--end-sec", type=float, default=None, help="Region end (seconds)") + cr.add_argument("--crossfade", type=int, default=0, help="Crossfade samples at the join (default: 0)") + cr.add_argument("--crossfade-ms", type=float, default=None, help="Crossfade length in milliseconds") + cr.add_argument("--output", "-o", required=True, help="Output path") cr.add_argument("--recursive", "-r", action="store_true") cr.add_argument("--suffix", default="") # splice (다른 클립을 삽입/덮어쓰기/믹스) - sp = fx_sub.add_parser("splice", help=_("Insert/overwrite/mix another clip into the input")) - sp.add_argument("--clip", required=True, help=_("Clip audio file to splice in")) - sp.add_argument("--position", type=int, default=None, help=_("Splice position sample")) - sp.add_argument("--position-sec", type=float, default=None, help=_("Splice position (seconds)")) + sp = fx_sub.add_parser("splice", help="Insert/overwrite/mix another clip into the input") + sp.add_argument("--clip", required=True, help="Clip audio file to splice in") + sp.add_argument("--position", type=int, default=None, help="Splice position sample") + sp.add_argument("--position-sec", type=float, default=None, help="Splice position (seconds)") sp.add_argument("--mode", choices=["insert", "overwrite", "mix"], default="insert", - help=_("Splice mode (default: insert)")) - sp.add_argument("--crossfade", type=int, default=0, help=_("Crossfade samples (insert mode only)")) - sp.add_argument("--crossfade-ms", type=float, default=None, help=_("Crossfade length in milliseconds")) - sp.add_argument("--output", "-o", required=True, help=_("Output path")) + help="Splice mode (default: insert)") + sp.add_argument("--crossfade", type=int, default=0, help="Crossfade samples (insert mode only)") + sp.add_argument("--crossfade-ms", type=float, default=None, help="Crossfade length in milliseconds") + sp.add_argument("--output", "-o", required=True, help="Output path") # trim-silence - ts = fx_sub.add_parser("trim-silence", help=_("Trim leading/trailing silence")) - ts.add_argument("--threshold", type=float, default=-40.0, help=_("Threshold dB (default: -40)")) - ts.add_argument("--pad", type=int, default=0, help=_("Silence boundary padding samples")) - ts.add_argument("--output", "-o", required=True, help=_("Output path")) + ts = fx_sub.add_parser("trim-silence", help="Trim leading/trailing silence") + ts.add_argument("--threshold", type=float, default=-40.0, help="Threshold dB (default: -40)") + ts.add_argument("--pad", type=int, default=0, help="Silence boundary padding samples") + ts.add_argument("--output", "-o", required=True, help="Output path") ts.add_argument("--recursive", "-r", action="store_true") ts.add_argument("--suffix", default="") # normalize - nm = fx_sub.add_parser("normalize", help=_("Normalize (peak or RMS)")) - nm.add_argument("--peak", type=float, default=None, help=_("Peak target dB (e.g. -1)")) - nm.add_argument("--target-rms", type=float, default=None, help=_("RMS target dB (e.g. -20)")) - nm.add_argument("--output", "-o", required=True, help=_("Output path")) + nm = fx_sub.add_parser("normalize", help="Normalize (peak or RMS)") + nm.add_argument("--peak", type=float, default=None, help="Peak target dB (e.g. -1)") + nm.add_argument("--target-rms", type=float, default=None, help="RMS target dB (e.g. -20)") + nm.add_argument("--output", "-o", required=True, help="Output path") nm.add_argument("--recursive", "-r", action="store_true") nm.add_argument("--suffix", default="") # gate - gt = fx_sub.add_parser("gate", help=_("Noise gate (RMS-based)")) - gt.add_argument("--threshold", type=float, default=-50.0, help=_("Threshold dB (default: -50)")) - gt.add_argument("--attack", type=float, default=0.01, help=_("Attack time (seconds)")) - gt.add_argument("--release", type=float, default=0.05, help=_("Release time (seconds)")) - gt.add_argument("--output", "-o", required=True, help=_("Output path")) + gt = fx_sub.add_parser("gate", help="Noise gate (RMS-based)") + gt.add_argument("--threshold", type=float, default=-50.0, help="Threshold dB (default: -50)") + gt.add_argument("--attack", type=float, default=0.01, help="Attack time (seconds)") + gt.add_argument("--release", type=float, default=0.05, help="Release time (seconds)") + gt.add_argument("--output", "-o", required=True, help="Output path") gt.add_argument("--recursive", "-r", action="store_true") gt.add_argument("--suffix", default="") # gain - gn = fx_sub.add_parser("gain", help=_("dB gain")) - gn.add_argument("--db", type=float, required=True, help=_("Gain (dB)")) - gn.add_argument("--output", "-o", required=True, help=_("Output path")) + gn = fx_sub.add_parser("gain", help="dB gain") + gn.add_argument("--db", type=float, required=True, help="Gain (dB)") + gn.add_argument("--output", "-o", required=True, help="Output path") gn.add_argument("--recursive", "-r", action="store_true") gn.add_argument("--suffix", default="") diff --git a/src/audioman/cli/info.py b/src/audioman/cli/info.py index 9a709b6..25c0c61 100644 --- a/src/audioman/cli/info.py +++ b/src/audioman/cli/info.py @@ -5,13 +5,12 @@ from audioman.cli.output import print_error, print_json, print_table, output_console from audioman.core.registry import get_registry -from audioman.i18n import _ from audioman.plugins.vst3 import VST3PluginWrapper def add_parser(subparsers: argparse._SubParsersAction) -> None: - parser = subparsers.add_parser("info", help=_("Plugin details + parameter list")) - parser.add_argument("plugin", help=_("Plugin name (short_name or alias)")) + parser = subparsers.add_parser("info", help="Plugin details + parameter list") + parser.add_argument("plugin", help="Plugin name (short_name or alias)") parser.set_defaults(func=run) diff --git a/src/audioman/cli/list_cmd.py b/src/audioman/cli/list_cmd.py index cb4c44b..4951476 100644 --- a/src/audioman/cli/list_cmd.py +++ b/src/audioman/cli/list_cmd.py @@ -5,13 +5,12 @@ from audioman.cli.output import print_json, print_table from audioman.core.registry import get_registry -from audioman.i18n import _ def add_parser(subparsers: argparse._SubParsersAction) -> None: - parser = subparsers.add_parser("list", help=_("List registered plugins")) - parser.add_argument("--format", choices=["vst3", "au"], help=_("Format filter")) - parser.add_argument("--vendor", help=_("Vendor filter")) + parser = subparsers.add_parser("list", help="List registered plugins") + parser.add_argument("--format", choices=["vst3", "au"], help="Format filter") + parser.add_argument("--vendor", help="Vendor filter") parser.set_defaults(func=run) diff --git a/src/audioman/cli/master.py b/src/audioman/cli/master.py index 1563d29..fda5185 100644 --- a/src/audioman/cli/master.py +++ b/src/audioman/cli/master.py @@ -21,7 +21,6 @@ print_table, ) from audioman.core import dsp, edl as edl_core, qc -from audioman.i18n import _ # 마스터링 프로파일별 권장 prep 파라미터 @@ -61,49 +60,49 @@ def add_parser(subparsers: argparse._SubParsersAction) -> None: parser = subparsers.add_parser( - "master", help=_("Mastering delivery workflow (prep / qc / verify)") + "master", help="Mastering delivery workflow (prep / qc / verify)" ) - sub = parser.add_subparsers(dest="action", help=_("Master action")) + sub = parser.add_subparsers(dest="action", help="Master action") # prep - p_prep = sub.add_parser("prep", help=_("Prepare master file (DC remove, pad, fade, loudness norm)")) - p_prep.add_argument("input", help=_("Source audio file")) - p_prep.add_argument("--output", "-o", required=True, help=_("Output file path")) + p_prep = sub.add_parser("prep", help="Prepare master file (DC remove, pad, fade, loudness norm)") + p_prep.add_argument("input", help="Source audio file") + p_prep.add_argument("--output", "-o", required=True, help="Output file path") p_prep.add_argument("--profile", choices=list(PREP_PROFILES.keys()), default="spotify", - help=_("Mastering profile (default: spotify)")) - p_prep.add_argument("--head-pad-ms", type=float, default=None, help=_("Override head pad (ms)")) - p_prep.add_argument("--tail-pad-sec", type=float, default=None, help=_("Override tail pad (seconds)")) - p_prep.add_argument("--fade-in-ms", type=float, default=None, help=_("Override fade-in (ms)")) - p_prep.add_argument("--fade-out-ms", type=float, default=None, help=_("Override fade-out (ms)")) + help="Mastering profile (default: spotify)") + p_prep.add_argument("--head-pad-ms", type=float, default=None, help="Override head pad (ms)") + p_prep.add_argument("--tail-pad-sec", type=float, default=None, help="Override tail pad (seconds)") + p_prep.add_argument("--fade-in-ms", type=float, default=None, help="Override fade-in (ms)") + p_prep.add_argument("--fade-out-ms", type=float, default=None, help="Override fade-out (ms)") p_prep.add_argument("--fade-curve", choices=list(dsp.FADE_CURVES), default=None, - help=_("Fade curve (default: cosine)")) - p_prep.add_argument("--target-lufs", type=float, default=None, help=_("Target LUFS (None to skip norm)")) - p_prep.add_argument("--max-tp", type=float, default=None, help=_("Max true peak dBTP")) - p_prep.add_argument("--no-dc-remove", action="store_true", help=_("Skip DC offset removal")) + help="Fade curve (default: cosine)") + p_prep.add_argument("--target-lufs", type=float, default=None, help="Target LUFS (None to skip norm)") + p_prep.add_argument("--max-tp", type=float, default=None, help="Max true peak dBTP") + p_prep.add_argument("--no-dc-remove", action="store_true", help="Skip DC offset removal") p_prep.add_argument("--write-edl", action="store_true", - help=_("Also write the generated EDL into .audioman workspace")) + help="Also write the generated EDL into .audioman workspace") p_prep.set_defaults(func=run_prep) # qc - p_qc = sub.add_parser("qc", help=_("Run mastering QC report against a target profile")) - p_qc.add_argument("input", help=_("Audio file to evaluate")) + p_qc = sub.add_parser("qc", help="Run mastering QC report against a target profile") + p_qc.add_argument("input", help="Audio file to evaluate") p_qc.add_argument("--target", choices=qc.list_targets(), default="spotify", - help=_("Target profile (default: spotify)")) + help="Target profile (default: spotify)") p_qc.add_argument("--click-sensitivity", type=float, default=6.0, - help=_("Click detector sensitivity (default: 6.0). Lower = more sensitive")) + help="Click detector sensitivity (default: 6.0). Lower = more sensitive") p_qc.set_defaults(func=run_qc) # verify (prep + qc) - p_verify = sub.add_parser("verify", help=_("Prep + QC in one shot")) - p_verify.add_argument("input", help=_("Source audio file")) - p_verify.add_argument("--output", "-o", required=True, help=_("Output file path")) + p_verify = sub.add_parser("verify", help="Prep + QC in one shot") + p_verify.add_argument("input", help="Source audio file") + p_verify.add_argument("--output", "-o", required=True, help="Output file path") p_verify.add_argument("--profile", choices=list(PREP_PROFILES.keys()), default="spotify") p_verify.add_argument("--target", choices=qc.list_targets(), default=None, - help=_("QC target profile (default: same as --profile)")) + help="QC target profile (default: same as --profile)") p_verify.set_defaults(func=run_verify) # list-profiles - p_list = sub.add_parser("list-profiles", help=_("List available mastering profiles")) + p_list = sub.add_parser("list-profiles", help="List available mastering profiles") p_list.set_defaults(func=run_list_profiles) parser.set_defaults(func=lambda args: parser.print_help()) diff --git a/src/audioman/cli/mixdown.py b/src/audioman/cli/mixdown.py index 2dd5133..bead0c4 100644 --- a/src/audioman/cli/mixdown.py +++ b/src/audioman/cli/mixdown.py @@ -4,51 +4,50 @@ import argparse from audioman.cli.output import print_error, print_json, print_success, print_warning, output_console -from audioman.i18n import _ def add_parser(subparsers: argparse._SubParsersAction) -> None: - parser = subparsers.add_parser("mixdown", help=_("Mix tracks with master chain processing")) - parser.add_argument("inputs", nargs="*", help=_("Input audio files")) - parser.add_argument("--output", "-o", required=True, help=_("Output file path")) + parser = subparsers.add_parser("mixdown", help="Mix tracks with master chain processing") + parser.add_argument("inputs", nargs="*", help="Input audio files") + parser.add_argument("--output", "-o", required=True, help="Output file path") parser.add_argument( "--gain", default="", - help=_("Comma-separated gain values in dB per track"), + help="Comma-separated gain values in dB per track", ) parser.add_argument( "--pan", default="", - help=_("Comma-separated pan values per track (-1.0 L ~ 0.0 C ~ 1.0 R)"), + help="Comma-separated pan values per track (-1.0 L ~ 0.0 C ~ 1.0 R)", ) parser.add_argument( "--chain", default="", - help=_("Per-track plugin chains separated by '|'"), + help="Per-track plugin chains separated by '|'", ) parser.add_argument( "--master", default="", - help=_("Master bus plugin chain (e.g. 'limiter:threshold=-1')"), + help="Master bus plugin chain (e.g. 'limiter:threshold=-1')", ) - parser.add_argument("--session", help=_("Session file (YAML/JSON)")) + parser.add_argument("--session", help="Session file (YAML/JSON)") parser.add_argument( "--no-compensation", action="store_true", - help=_("Disable master chain delay compensation"), + help="Disable master chain delay compensation", ) - parser.add_argument("--dry-run", action="store_true", help=_("Show plan without executing")) + parser.add_argument("--dry-run", action="store_true", help="Show plan without executing") parser.add_argument( "--automix", action="store_true", - help=_("Auto-balance track gains using spectral analysis (default target: pink noise)"), + help="Auto-balance track gains using spectral analysis (default target: pink noise)", ) parser.add_argument( "--target", default="", - help=_("Automix target: 'pink' (default), 'pop', 'rock', 'electronica', 'default', " + help="Automix target: 'pink' (default), 'pop', 'rock', 'electronica', 'default', " "'reference', or genre cluster profiles: " "yt_rock, yt_bright_pop, yt_hiphop, yt_mid_scoop, yt_low_heavy_vocal, " "yt_high_dr, yt_ballad, yt_dark_lofi, " "archive_techno_standard, archive_sub_kick_driven, archive_minimal_sub, " - "archive_groovy_low, archive_dub_techno, archive_midrange_ambient"), + "archive_groovy_low, archive_dub_techno, archive_midrange_ambient", ) parser.add_argument( "--reference", default="", - help=_("Reference audio file for automix target spectrum (requires --automix --target reference)"), + help="Reference audio file for automix target spectrum (requires --automix --target reference)", ) parser.set_defaults(func=run) diff --git a/src/audioman/cli/observe.py b/src/audioman/cli/observe.py new file mode 100644 index 0000000..d5e2c21 --- /dev/null +++ b/src/audioman/cli/observe.py @@ -0,0 +1,236 @@ +# Created: 2026-05-11 +# Purpose: `audioman observe` — fault 관측 1급 명령. +# analyze/doctor가 산발적으로 만들던 결함 정보를 단일 Finding[] 스키마로 통합. +# LLM agent가 `audioman observe X --json | jq '.findings'`로 바로 소비할 수 있게 한다. + +from __future__ import annotations + +import argparse +from pathlib import Path + +from audioman import __version__ +from audioman.cli.output import ( + output_console, + print_error, + print_json, + print_table, +) +from audioman.core.analysis import detect_silence, spectrum_diagnostics +from audioman.core.audio_file import get_audio_stats, read_audio +from audioman.core.batch import collect_audio_files +from audioman.core.detectors import ( + detect_signal_findings, + silence_to_findings, + spectrum_to_findings, +) +from audioman.core.findings import ( + Category, + SCHEMA_URI, + Severity, + filter_findings, +) + + +_CATEGORY_CHOICES = [c.value for c in Category] +_SEVERITY_CHOICES = [s.value for s in Severity] + + +def add_parser(subparsers: argparse._SubParsersAction) -> None: + parser = subparsers.add_parser( + "observe", + help="Observe audio faults across categories (signal, spectral, plugin, container)", + description=( + "Single-shot audio fault observation. Emits a uniform finding[] array " + "across signal/spectral/plugin/container categories with $schema metadata. " + "Designed for LLM agents — pair with --plain and --json for clean piping." + ), + ) + parser.add_argument("input", help="Input audio file or directory") + parser.add_argument( + "--category", + default=",".join(_CATEGORY_CHOICES), + help=( + f"Comma-separated categories to enable (default: all). " + f"Choices: {','.join(_CATEGORY_CHOICES)}" + ), + ) + parser.add_argument( + "--severity", + choices=_SEVERITY_CHOICES, + default="info", + help="Minimum severity to report (info|warn|critical). Default: info", + ) + parser.add_argument( + "--silence-threshold", + type=float, + default=-40.0, + help="Silence detection threshold dB (default: -40)", + ) + parser.add_argument( + "--spectrum-fft", + type=int, + default=16384, + help="FFT size for spectral detectors (default: 16384)", + ) + parser.add_argument( + "--spectrum-min-rms", + type=float, + default=0.01, + help="Skip frames below this RMS when averaging spectrum (default: 0.01)", + ) + parser.add_argument( + "--recursive", "-r", + action="store_true", + help="Recurse into subdirectories (batch mode)", + ) + parser.set_defaults(func=run) + + +def _observe_file( + path: Path, + *, + categories: set[str], + min_severity: Severity, + silence_threshold: float, + spectrum_fft: int, + spectrum_min_rms: float, +) -> dict: + audio, sr = read_audio(path) + stats = get_audio_stats(audio, sr) + audio_length = audio.shape[-1] if audio.ndim == 2 else audio.shape[0] + + all_findings = [] + + if "signal" in categories: + all_findings.extend(detect_signal_findings(audio, sr, file=str(path))) + silence = detect_silence(audio, sr, threshold_db=silence_threshold) + all_findings.extend(silence_to_findings(silence, audio_length, sr, file=str(path))) + + spectrum = None + if "spectral" in categories: + spectrum = spectrum_diagnostics( + audio, sr, fft_size=spectrum_fft, min_rms=spectrum_min_rms + ) + all_findings.extend(spectrum_to_findings(spectrum, file=str(path))) + + # plugin/container 카테고리는 Phase C에서 채움. 지금은 사용자가 명시적으로 + # 지정하면 빈 결과를 반환 (스키마 일관성 유지). + + filtered = filter_findings( + all_findings, + categories=categories, + min_severity=min_severity, + ) + + payload = { + "$schema": SCHEMA_URI, + "audioman_version": __version__, + "command": "observe", + "file": str(path), + "sample_rate": sr, + "channels": stats.channels, + "duration_sec": round(stats.duration, 6), + "total_samples": int(stats.frames), + "filter": { + "categories": sorted(categories), + "min_severity": min_severity.value, + }, + "findings": [f.to_dict() for f in filtered], + "summary": { + "total": len(filtered), + "by_severity": { + "info": sum(1 for f in filtered if f.severity is Severity.INFO), + "warn": sum(1 for f in filtered if f.severity is Severity.WARN), + "critical": sum(1 for f in filtered if f.severity is Severity.CRITICAL), + }, + "by_category": { + cat: sum(1 for f in filtered if f.category.value == cat) + for cat in _CATEGORY_CHOICES + }, + }, + } + return payload + + +def _parse_categories(raw: str) -> set[str]: + categories = {c.strip() for c in raw.split(",") if c.strip()} + invalid = categories - set(_CATEGORY_CHOICES) + if invalid: + print_error(f"unknown category: {','.join(sorted(invalid))}") + return categories + + +def run(args: argparse.Namespace) -> None: + categories = _parse_categories(args.category) + min_severity = Severity(args.severity) + input_path = Path(args.input) + + if input_path.is_dir(): + files = collect_audio_files(input_path, recursive=args.recursive) + if not files: + print_error(f"No audio files in: {input_path}") + for fpath in files: + payload = _observe_file( + fpath, + categories=categories, + min_severity=min_severity, + silence_threshold=args.silence_threshold, + spectrum_fft=args.spectrum_fft, + spectrum_min_rms=args.spectrum_min_rms, + ) + if args.json: + print_json(payload) + else: + _print_human(payload) + return + + payload = _observe_file( + input_path, + categories=categories, + min_severity=min_severity, + silence_threshold=args.silence_threshold, + spectrum_fft=args.spectrum_fft, + spectrum_min_rms=args.spectrum_min_rms, + ) + if args.json: + print_json(payload) + return + _print_human(payload) + + +def _print_human(payload: dict) -> None: + output_console.print(f"\n[bold]{payload['file']}[/bold]") + output_console.print( + f" {payload['duration_sec']}s @ {payload['sample_rate']}Hz, " + f"{payload['channels']} ch, {payload['total_samples']} samples" + ) + summary = payload["summary"] + output_console.print( + f" Findings: {summary['total']} " + f"(critical={summary['by_severity']['critical']}, " + f"warn={summary['by_severity']['warn']}, " + f"info={summary['by_severity']['info']})" + ) + + if not payload["findings"]: + output_console.print(" [green]No findings at requested severity.[/green]") + return + + rows = [] + for f in payload["findings"]: + where = f["where"] + loc = "" + if "start_sec" in where: + loc = f"{where['start_sec']:.3f}s" + if "end_sec" in where and where["end_sec"] != where["start_sec"]: + loc += f"-{where['end_sec']:.3f}s" + elif "frequency_hz" in where: + loc = f"{where['frequency_hz']}Hz" + rows.append([ + f["severity"].upper(), + f["category"], + f["code"], + loc, + f["hint"][:80], + ]) + print_table("Findings", ["Sev", "Category", "Code", "Where", "Hint"], rows) diff --git a/src/audioman/cli/output.py b/src/audioman/cli/output.py index d939bbb..bae0254 100644 --- a/src/audioman/cli/output.py +++ b/src/audioman/cli/output.py @@ -1,15 +1,64 @@ # Created: 2026-03-21 -# Purpose: CLI 출력 포매터 (human-readable / JSON) +# Purpose: CLI 출력 포매터 (human-readable / JSON / plain) +# +# Plain 모드 (--plain 또는 AUDIOMAN_PLAIN=1) 사용 시: +# - Rich Console이 색상/markup/highlight를 모두 끄도록 재설정 +# - print_table은 TSV로 fallback +# - rich markup 토큰 ([red], [bold] 등)이 제거된 채 출력 import json +import os +import re import sys from typing import Any from rich.console import Console from rich.table import Table -console = Console(stderr=True) -output_console = Console() +_PLAIN: bool = False + + +def _is_plain_env() -> bool: + val = os.environ.get("AUDIOMAN_PLAIN", "").strip().lower() + return val in ("1", "true", "yes", "on") + + +def _make_console(*, stderr: bool) -> Console: + if _PLAIN: + return Console( + stderr=stderr, + no_color=True, + force_terminal=False, + markup=False, + highlight=False, + emoji=False, + ) + return Console(stderr=stderr) + + +def set_plain(enabled: bool) -> None: + """app.py에서 --plain 결정 후 호출. console 인스턴스를 재구성한다.""" + global _PLAIN, console, output_console + _PLAIN = bool(enabled) + console = _make_console(stderr=True) + output_console = _make_console(stderr=False) + + +def is_plain() -> bool: + return _PLAIN + + +_PLAIN = _is_plain_env() +console = _make_console(stderr=True) +output_console = _make_console(stderr=False) + + +_RICH_MARKUP_RE = re.compile(r"\[/?[a-zA-Z0-9 _#=,\.\-]+\]") + + +def _strip_markup(text: str) -> str: + """Plain 모드용: [bold]X[/bold] 같은 rich markup 토큰을 제거.""" + return _RICH_MARKUP_RE.sub("", text) def print_json(data: Any) -> None: @@ -18,7 +67,15 @@ def print_json(data: Any) -> None: def print_table(title: str, columns: list[str], rows: list[list[str]]) -> None: - """Rich 테이블 출력""" + """테이블 출력. Plain 모드면 TSV로 stdout에 출력.""" + if _PLAIN: + if title: + print(f"# {title}") + print("\t".join(columns)) + for row in rows: + print("\t".join(str(c) for c in row)) + return + table = Table(title=title) for col in columns: table.add_column(col) @@ -28,17 +85,29 @@ def print_table(title: str, columns: list[str], rows: list[list[str]]) -> None: def print_info(message: str) -> None: + if _PLAIN: + print(_strip_markup(message), file=sys.stderr) + return console.print(f"[dim]{message}[/dim]", highlight=False) def print_success(message: str) -> None: + if _PLAIN: + print(_strip_markup(message), file=sys.stderr) + return console.print(f"[green]{message}[/green]", highlight=False) def print_error(message: str) -> None: + if _PLAIN: + print(f"error: {_strip_markup(message)}", file=sys.stderr) + sys.exit(1) console.print(f"[red]error:[/red] {message}", highlight=False) sys.exit(1) def print_warning(message: str) -> None: + if _PLAIN: + print(f"warning: {_strip_markup(message)}", file=sys.stderr) + return console.print(f"[yellow]warning:[/yellow] {message}", highlight=False) diff --git a/src/audioman/cli/preset.py b/src/audioman/cli/preset.py index 5a86962..aea2e45 100644 --- a/src/audioman/cli/preset.py +++ b/src/audioman/cli/preset.py @@ -7,36 +7,35 @@ from audioman.config.paths import ensure_app_dirs from audioman.core.engine import parse_params from audioman.core.preset_manager import PresetManager -from audioman.i18n import _ def add_parser(subparsers: argparse._SubParsersAction) -> None: - parser = subparsers.add_parser("preset", help=_("Preset management")) + parser = subparsers.add_parser("preset", help="Preset management") preset_sub = parser.add_subparsers(dest="preset_command") # save - save_p = preset_sub.add_parser("save", help=_("Save preset")) - save_p.add_argument("name", help=_("Preset name")) - save_p.add_argument("--plugin", "-p", required=True, help=_("Plugin name")) - save_p.add_argument("--param", action="append", default=[], help=_("Parameter (key=value)")) - save_p.add_argument("--description", "-d", default="", help=_("Description")) + save_p = preset_sub.add_parser("save", help="Save preset") + save_p.add_argument("name", help="Preset name") + save_p.add_argument("--plugin", "-p", required=True, help="Plugin name") + save_p.add_argument("--param", action="append", default=[], help="Parameter (key=value)") + save_p.add_argument("--description", "-d", default="", help="Description") save_p.set_defaults(func=run_save) # load - load_p = preset_sub.add_parser("load", help=_("Show preset info")) - load_p.add_argument("name", help=_("Preset name")) - load_p.add_argument("--plugin", "-p", help=_("Plugin name (optional)")) + load_p = preset_sub.add_parser("load", help="Show preset info") + load_p.add_argument("name", help="Preset name") + load_p.add_argument("--plugin", "-p", help="Plugin name (optional)") load_p.set_defaults(func=run_load) # list - list_p = preset_sub.add_parser("list", help=_("List presets")) - list_p.add_argument("--plugin", "-p", help=_("Plugin filter")) + list_p = preset_sub.add_parser("list", help="List presets") + list_p.add_argument("--plugin", "-p", help="Plugin filter") list_p.set_defaults(func=run_list) # delete - del_p = preset_sub.add_parser("delete", help=_("Delete preset")) - del_p.add_argument("name", help=_("Preset name")) - del_p.add_argument("--plugin", "-p", help=_("Plugin name (optional)")) + del_p = preset_sub.add_parser("delete", help="Delete preset") + del_p.add_argument("name", help="Preset name") + del_p.add_argument("--plugin", "-p", help="Plugin name (optional)") del_p.set_defaults(func=run_delete) parser.set_defaults(func=lambda args: parser.print_help()) diff --git a/src/audioman/cli/process.py b/src/audioman/cli/process.py index 614d5cd..86b7b1c 100644 --- a/src/audioman/cli/process.py +++ b/src/audioman/cli/process.py @@ -8,22 +8,21 @@ from audioman.cli.output import print_error, print_json, print_success, print_warning, output_console from audioman.core.engine import parse_params, process_file from audioman.core.batch import collect_audio_files, resolve_output_path -from audioman.i18n import _ from pathlib import Path def add_parser(subparsers: argparse._SubParsersAction) -> None: - parser = subparsers.add_parser("process", help=_("Process audio with a single plugin")) + parser = subparsers.add_parser("process", help="Process audio with a single plugin") # 입력: 파일 또는 디렉토리 - parser.add_argument("input", help=_("Input audio file or directory")) - parser.add_argument("--plugin", "-p", required=True, help=_("Plugin name")) - parser.add_argument("--param", action="append", default=[], help=_("Parameter (key=value)")) - parser.add_argument("--output", "-o", required=True, help=_("Output file or directory")) - parser.add_argument("--passes", type=int, default=1, help=_("Number of passes (2=adaptive learning multi-pass)")) - parser.add_argument("--recursive", "-r", action="store_true", help=_("Include subdirectories (batch)")) - parser.add_argument("--suffix", default="", help=_("Output filename suffix (batch)")) - parser.add_argument("--dry-run", action="store_true", help=_("Show plan without executing")) - parser.add_argument("--workers", "-w", type=int, default=1, help=_("Number of parallel workers (default: 1)")) + parser.add_argument("input", help="Input audio file or directory") + parser.add_argument("--plugin", "-p", required=True, help="Plugin name") + parser.add_argument("--param", action="append", default=[], help="Parameter (key=value)") + parser.add_argument("--output", "-o", required=True, help="Output file or directory") + parser.add_argument("--passes", type=int, default=1, help="Number of passes (2=adaptive learning multi-pass)") + parser.add_argument("--recursive", "-r", action="store_true", help="Include subdirectories (batch)") + parser.add_argument("--suffix", default="", help="Output filename suffix (batch)") + parser.add_argument("--dry-run", action="store_true", help="Show plan without executing") + parser.add_argument("--workers", "-w", type=int, default=1, help="Number of parallel workers (default: 1)") parser.set_defaults(func=run) diff --git a/src/audioman/cli/scan.py b/src/audioman/cli/scan.py index 1a71306..eb2e5eb 100644 --- a/src/audioman/cli/scan.py +++ b/src/audioman/cli/scan.py @@ -6,13 +6,12 @@ from audioman.cli.output import print_json, print_success, print_table from audioman.config.paths import ensure_app_dirs from audioman.core.registry import get_registry -from audioman.i18n import _ def add_parser(subparsers: argparse._SubParsersAction) -> None: - parser = subparsers.add_parser("scan", help=_("Scan system for VST3/AU plugins")) - parser.add_argument("--paths", nargs="*", help=_("Additional search paths")) - parser.add_argument("--refresh", action="store_true", help=_("Ignore cache and rescan")) + parser = subparsers.add_parser("scan", help="Scan system for VST3/AU plugins") + parser.add_argument("--paths", nargs="*", help="Additional search paths") + parser.add_argument("--refresh", action="store_true", help="Ignore cache and rescan") parser.set_defaults(func=run) diff --git a/src/audioman/cli/schemas_cmd.py b/src/audioman/cli/schemas_cmd.py new file mode 100644 index 0000000..852b4b0 --- /dev/null +++ b/src/audioman/cli/schemas_cmd.py @@ -0,0 +1,92 @@ +# Created: 2026-05-11 +# Purpose: `audioman schemas {list,show}` — JSONSchema 발행. +# LLM agent가 audioman --json 출력의 모양을 사전에 알 수 있게 한다. + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path + +from audioman import __version__ + + +def _schemas_dir() -> Path: + return Path(__file__).resolve().parent.parent / "schemas" + + +def _list_schemas() -> list[dict]: + d = _schemas_dir() + if not d.is_dir(): + return [] + out = [] + for p in sorted(d.glob("*.json")): + try: + obj = json.loads(p.read_text(encoding="utf-8")) + except json.JSONDecodeError: + continue + out.append({ + "name": p.stem, + "id": obj.get("$id", ""), + "title": obj.get("title", ""), + "path": str(p), + }) + return out + + +def add_parser(subparsers: argparse._SubParsersAction) -> None: + parser = subparsers.add_parser( + "schemas", + help="Show audioman JSONSchemas (machine-readable contract for --json output)", + description="LLM agents can use these schemas to validate audioman output without running it.", + ) + sub = parser.add_subparsers(dest="schemas_action") + + p_list = sub.add_parser("list", help="List available schemas") + p_list.set_defaults(func=_run_list) + + p_show = sub.add_parser("show", help="Show a schema body") + p_show.add_argument("name", help="Schema name (e.g. finding.v1, observe.v1, analyze.v1)") + p_show.set_defaults(func=_run_show) + + parser.set_defaults(func=_run_default) + + +def _run_default(args: argparse.Namespace) -> None: + # 서브명령 미지정 시 list와 동일하게 동작 + _run_list(args) + + +def _run_list(args: argparse.Namespace) -> None: + schemas = _list_schemas() + if getattr(args, "json", False): + print(json.dumps({ + "$schema": "audioman://schema/schemas.v1.json", + "audioman_version": __version__, + "command": "schemas", + "schemas": schemas, + }, indent=2, ensure_ascii=False)) + return + for s in schemas: + print(f"{s['name']}\t{s['id']}\t{s['title']}") + + +def _run_show(args: argparse.Namespace) -> None: + name = args.name + if not name.endswith(".json"): + name = name + ".json" + target = _schemas_dir() / name + if not target.is_file(): + # fallback: name 기반 검색 ("finding" → "finding.v1.json") + matches = sorted(_schemas_dir().glob(f"{args.name}*.json")) + if not matches: + print(f"error: schema not found: {args.name}", file=sys.stderr) + sys.exit(1) + target = matches[0] + + text = target.read_text(encoding="utf-8") + # 그대로 stdout (JSONSchema 자체가 JSON이므로 --json과 무관하게 valid JSON) + sys.stdout.write(text) + if not text.endswith("\n"): + sys.stdout.write("\n") diff --git a/src/audioman/cli/screen.py b/src/audioman/cli/screen.py index c9657ab..f584ae6 100644 --- a/src/audioman/cli/screen.py +++ b/src/audioman/cli/screen.py @@ -9,27 +9,26 @@ from audioman.cli.output import output_console, print_error, print_json, print_table, print_warning from audioman.core.aesthetic import DEFAULT_ISSUES, screen_file from audioman.core.batch import collect_audio_files -from audioman.i18n import _ def add_parser(subparsers: argparse._SubParsersAction) -> None: parser = subparsers.add_parser( "screen", - help=_("Screen audio for aesthetic issues such as clicks, hum, breaths, sibilance, and noise"), + help="Screen audio for aesthetic issues such as clicks, hum, breaths, sibilance, and noise", ) - parser.add_argument("input", help=_("Input audio file or directory")) + parser.add_argument("input", help="Input audio file or directory") parser.add_argument( "--issues", default=",".join(DEFAULT_ISSUES), - help=_("Comma-separated issue list (default: click,hum,mouth_click,sibilance,breath,background_noise,rf_noise)"), + help="Comma-separated issue list (default: click,hum,mouth_click,sibilance,breath,background_noise,rf_noise)", ) parser.add_argument( "--backend", choices=["auto", "essentia", "fallback"], default="auto", - help=_("Detector backend (default: auto)"), + help="Detector backend (default: auto)", ) - parser.add_argument("--recursive", "-r", action="store_true", help=_("Include subdirectories (batch)")) + parser.add_argument("--recursive", "-r", action="store_true", help="Include subdirectories (batch)") parser.set_defaults(func=run) diff --git a/src/audioman/cli/stream.py b/src/audioman/cli/stream.py new file mode 100644 index 0000000..16e74ab --- /dev/null +++ b/src/audioman/cli/stream.py @@ -0,0 +1,394 @@ +# Created: 2026-05-31 +# Purpose: audioman stream — DAW 실시간 블록 처리 재현 + 벤치마크/triage. +# +# 서브커맨드: +# bench : 블록 크기별 실시간 CPU 부하 측정 (RT factor, xrun, est tracks) +# triage : 블록 스트리밍 출력의 클릭/불연속 검출 + offline 대비 null test +# compare : 여러 블록 크기의 출력을 서로/오프라인과 비교 (block-size 의존 버그) +# play : 실제 오디오 디바이스로 플러그인 통과 신호 재생 (실청 모니터링) + +import argparse +from pathlib import Path + +from audioman.cli.output import print_error, print_json, print_info, print_table, print_success + + +DEFAULT_BLOCK_SIZES = [64, 128, 256, 512, 1024] + + +def add_parser(subparsers: argparse._SubParsersAction) -> None: + parser = subparsers.add_parser( + "stream", + help="Reproduce DAW real-time block processing — benchmark & triage plugin clicks/dropouts", + ) + sub = parser.add_subparsers(dest="stream_command", help="stream subcommands") + + # --- bench --- + p_bench = sub.add_parser("bench", help="Real-time CPU load across block sizes") + p_bench.add_argument("input", help="Audio file (or 'sine'/'impulse' for synthetic)") + p_bench.add_argument("--plugin", "-p", required=True, help="Plugin name/path, or 'chain:...'") + p_bench.add_argument("--param", action="append", default=[], help="Parameter key=value") + p_bench.add_argument("--blocks", help="Comma block sizes (default 64,128,256,512,1024)") + p_bench.add_argument("--sample-rate", "-sr", type=int, default=48000, help="For synthetic input") + p_bench.add_argument("--duration", type=float, default=5.0, help="Synthetic input seconds") + p_bench.set_defaults(func=run_bench) + + # --- triage --- + p_tri = sub.add_parser("triage", help="Detect clicks/discontinuities in block streaming") + p_tri.add_argument("input", help="Audio file (or 'sine'/'impulse')") + p_tri.add_argument("--plugin", "-p", required=True, help="Plugin name/path, or 'chain:...'") + p_tri.add_argument("--param", action="append", default=[], help="Parameter key=value") + p_tri.add_argument("--block-size", "-b", type=int, default=512) + p_tri.add_argument("--sample-rate", "-sr", type=int, default=48000) + p_tri.add_argument("--duration", type=float, default=2.0) + p_tri.add_argument("--reset-per-block", action="store_true", + help="Reset plugin every block (simulate a broken streaming host)") + p_tri.add_argument("--no-reset-first", action="store_true", + help="Start without resetting (carry stale tail → start-of-playback click)") + p_tri.add_argument("--output", "-o", metavar="FILE", help="Write streamed output WAV") + p_tri.set_defaults(func=run_triage) + + # --- compare --- + p_cmp = sub.add_parser("compare", help="Compare outputs across block sizes vs offline render") + p_cmp.add_argument("input", help="Audio file (or 'sine'/'impulse')") + p_cmp.add_argument("--plugin", "-p", required=True, help="Plugin name/path, or 'chain:...'") + p_cmp.add_argument("--param", action="append", default=[], help="Parameter key=value") + p_cmp.add_argument("--blocks", help="Comma block sizes (default 64,128,256,512,1024)") + p_cmp.add_argument("--sample-rate", "-sr", type=int, default=48000) + p_cmp.add_argument("--duration", type=float, default=2.0) + p_cmp.set_defaults(func=run_compare) + + # --- play --- + p_play = sub.add_parser("play", help="Play plugin-processed signal through audio device") + p_play.add_argument("input", help="Audio file (or 'sine'/'impulse')") + p_play.add_argument("--plugin", "-p", required=True, help="Plugin name/path, or 'chain:...'") + p_play.add_argument("--param", action="append", default=[], help="Parameter key=value") + p_play.add_argument("--block-size", "-b", type=int, default=512) + p_play.add_argument("--sample-rate", "-sr", type=int, default=48000) + p_play.add_argument("--duration", type=float, default=5.0) + p_play.add_argument("--reset-per-block", action="store_true") + p_play.set_defaults(func=run_play) + + parser.set_defaults(func=_no_subcommand) + + +def _no_subcommand(args: argparse.Namespace) -> None: + print_error("stream requires a subcommand: bench | triage | compare | play") + + +# --------------------------------------------------------------------------- +# Shared helpers +# --------------------------------------------------------------------------- + +def _parse_blocks(raw: str | None) -> list[int]: + if not raw: + return list(DEFAULT_BLOCK_SIZES) + return [int(x.strip()) for x in raw.split(",") if x.strip()] + + +def _load_input(args: argparse.Namespace): + """파일 경로 또는 합성 신호('sine'/'impulse')를 (audio (ch,n), sr)로.""" + import numpy as np + + spec = args.input + sr = args.sample_rate + if spec in ("sine", "impulse", "two-tone"): + from audioman.core import test_signal as ts + dur = getattr(args, "duration", 2.0) + if spec == "sine": + audio = ts.generate_sine(440.0, sr, dur, level_db=-6.0, channels=2) + elif spec == "impulse": + audio = ts.generate_impulse(sr, dur, channels=2) + else: + audio = ts.generate_two_tone(sample_rate=sr, duration_sec=dur, channels=2) + return audio.astype(np.float32), sr + + from audioman.core.audio_file import read_audio + p = Path(spec) + if not p.exists(): + print_error(f"input not found (and not 'sine'/'impulse'): {spec}") + audio, file_sr = read_audio(p) + return audio, file_sr + + +def _build_process_fn(args: argparse.Namespace, sample_rate: int): + """--plugin 인자를 pedalboard 또는 VST3 체인으로 해석해 ProcessFn 반환. + + 'chain:reverb,delay' 또는 'builtin:reverb' 형식이면 pedalboard 빌트인 사용 + (테스트/데모용). 그 외에는 registry에서 VST3 플러그인 해석. + """ + from audioman.core.engine import parse_params + from audioman.core.streaming import make_pedalboard_process_fn, make_wrapper_process_fn + + spec = args.plugin + params = parse_params(args.param) if args.param else {} + + # builtin pedalboard 이펙트 (의존성 없이 테스트 가능) + if spec.startswith("builtin:"): + from pedalboard import Pedalboard, Reverb, Compressor, Delay, Chorus, Gain, Distortion + registry = {"reverb": Reverb, "compressor": Compressor, "delay": Delay, + "chorus": Chorus, "gain": Gain, "distortion": Distortion} + names = spec.split(":", 1)[1].split(",") + fx = [] + for nm in names: + nm = nm.strip().lower() + if nm not in registry: + print_error(f"unknown builtin effect: {nm} (have {sorted(registry)})") + fx.append(registry[nm]()) + board = Pedalboard(fx) + return make_pedalboard_process_fn(board) + + # VST3 플러그인 (registry 해석) + from audioman.core.registry import get_registry + reg = get_registry() + meta = reg.get(spec) + if not meta: + print_error(f"plugin not found: '{spec}' (use 'builtin:reverb' for a dependency-free test)") + from audioman.plugins.vst3 import VST3PluginWrapper + wrapper = VST3PluginWrapper(meta.path) + wrapper.load() + if params: + wrapper.set_parameters(params) + return make_wrapper_process_fn(wrapper) + + +# --------------------------------------------------------------------------- +# bench +# --------------------------------------------------------------------------- + +def run_bench(args: argparse.Namespace) -> None: + from audioman.core.streaming import render_streamed + from audioman.core.rt_bench import benchmark + + audio, sr = _load_input(args) + blocks = _parse_blocks(args.blocks) + fn = _build_process_fn(args, sr) + + reports = [] + for bs in blocks: + result = render_streamed(audio, sr, fn, block_size=bs, reset_per_block=False) + reports.append(benchmark(result)) + + payload = { + "input": args.input, + "plugin": args.plugin, + "sample_rate": sr, + "reports": [r.to_dict() for r in reports], + } + + if getattr(args, "json", False): + print_json(payload) + return + + rows = [] + for r in reports: + rows.append([ + str(r.block_size), + f"{r.deadline_ms:.2f}", + f"{r.block_ms_mean:.3f}", + f"{r.block_ms_max:.3f}", + f"{r.rt_factor_p50:.4f}", + f"{r.rt_factor_p99:.4f}", + f"{r.rt_factor_max:.4f}", + str(r.xruns), + str(r.est_max_tracks), + ]) + print_table( + f"RT bench — {args.plugin} @ {sr}Hz", + ["block", "deadline_ms", "proc_ms_avg", "proc_ms_max", + "rt_p50", "rt_p99", "rt_max", "xruns", "est_tracks"], + rows, + ) + worst = max(reports, key=lambda r: r.rt_factor_max) + if worst.xruns > 0: + print_info(f"xruns at block={worst.block_size}: {worst.xruns} blocks missed deadline") + else: + print_success(f"no xruns; smallest safe est_tracks = {min(r.est_max_tracks for r in reports)}") + + +# --------------------------------------------------------------------------- +# triage +# --------------------------------------------------------------------------- + +def run_triage(args: argparse.Namespace) -> None: + from audioman.core.streaming import render_offline, render_streamed + from audioman.core.discontinuity import detect_discontinuities, detect_nonfinite, null_test + from audioman.core.findings import envelope, Severity + + audio, sr = _load_input(args) + fn = _build_process_fn(args, sr) + bs = args.block_size + + offline = render_offline(audio, sr, fn) + # process_fn은 상태를 들고 있으므로 streamed는 새 fn으로 다시 만들어 오염 방지 + fn2 = _build_process_fn(args, sr) + streamed = render_streamed( + audio, sr, fn2, + block_size=bs, + reset_per_block=args.reset_per_block, + reset_first=not args.no_reset_first, + ) + + findings = [] + findings += detect_nonfinite(streamed.audio, sr, file=args.input) + findings += detect_discontinuities(streamed.audio, sr, block_size=bs, file=args.input) + findings += null_test(offline.audio, streamed.audio, sr, file=args.input) + + if args.output: + from audioman.core.audio_file import write_audio + write_audio(args.output, streamed.audio, sr) + + env = envelope(findings, file=args.input, extra={ + "stream": { + "block_size": bs, + "reset_per_block": args.reset_per_block, + "reset_first": not args.no_reset_first, + "offline_summary": offline.to_summary(), + "streamed_summary": streamed.to_summary(), + } + }) + + if getattr(args, "json", False): + print_json(env) + return + + n_crit = env["summary"]["by_severity"]["critical"] + if not findings: + print_success(f"clean — no clicks/discontinuities at block_size={bs}") + return + rows = [] + for f in findings: + m = f.measurement + rows.append([ + f.code.value, + f.severity.value, + str(f.where.start_sample), + f"{f.where.start_sec:.4f}" if f.where.start_sec is not None else "-", + str(m.get("block_aligned", "-")), + (str(m.get("peak_jump_db")) if "peak_jump_db" in m + else str(m.get("max_diff_db", "-"))), + ]) + print_table( + f"triage — {args.plugin} block={bs}", + ["code", "severity", "sample", "sec", "block_aligned", "level_db"], + rows, + ) + if n_crit: + print_info(f"{n_crit} critical finding(s) — block-aligned discontinuities are streaming bugs, " + f"not source clicks.") + + +# --------------------------------------------------------------------------- +# compare +# --------------------------------------------------------------------------- + +def run_compare(args: argparse.Namespace) -> None: + import numpy as np + from audioman.core.streaming import render_offline, render_streamed + + audio, sr = _load_input(args) + blocks = _parse_blocks(args.blocks) + + offline = render_offline(audio, sr, _build_process_fn(args, sr)) + ref = offline.audio.mean(axis=0) if offline.audio.ndim == 2 else offline.audio + + def diff_db(a, b): + n = min(len(a), len(b)) + return float(20 * np.log10(np.max(np.abs(a[:n] - b[:n])) + 1e-30)) + + streamed_mono = {} + rows = [] + for bs in blocks: + st = render_streamed(audio, sr, _build_process_fn(args, sr), block_size=bs) + mono = st.audio.mean(axis=0) if st.audio.ndim == 2 else st.audio + streamed_mono[bs] = mono + rows.append([str(bs), f"{diff_db(ref, mono):.2f}"]) + + # 블록 크기 간 상호 비교 (인접) + cross = [] + for i in range(len(blocks) - 1): + a, b = blocks[i], blocks[i + 1] + cross.append([f"{a} vs {b}", f"{diff_db(streamed_mono[a], streamed_mono[b]):.2f}"]) + + payload = { + "input": args.input, "plugin": args.plugin, "sample_rate": sr, + "vs_offline_db": {str(b): round(diff_db(ref, streamed_mono[b]), 2) for b in blocks}, + "cross_block_db": {f"{blocks[i]}_vs_{blocks[i+1]}": + round(diff_db(streamed_mono[blocks[i]], streamed_mono[blocks[i+1]]), 2) + for i in range(len(blocks) - 1)}, + } + if getattr(args, "json", False): + print_json(payload) + return + + print_table(f"streamed vs offline — {args.plugin}", + ["block", "max_diff_db"], rows) + print_table("cross block-size diff", ["pair", "max_diff_db"], cross) + worst = max(payload["vs_offline_db"].values()) + if worst > -60.0: + print_info(f"block streaming diverges from offline by up to {worst:.1f} dB — " + f"this plugin is block-size sensitive.") + else: + print_success("all block sizes match offline render (≤ -60 dB) — deterministic.") + + +# --------------------------------------------------------------------------- +# play +# --------------------------------------------------------------------------- + +def run_play(args: argparse.Namespace) -> None: + import numpy as np + audio, sr = _load_input(args) + fn = _build_process_fn(args, sr) + bs = args.block_size + + try: + import sounddevice as sd + except Exception as e: # pragma: no cover + print_error(f"sounddevice unavailable: {e}") + + # 블록 단위로 처리하면서 실시간 재생. callback이 마감을 못 맞추면 PortAudio가 + # underflow status를 올린다 — 실제 DAW xrun과 동일한 신호. + n_ch = audio.shape[0] + pos = {"i": 0} + reset_first = {"done": False} + underflows = {"n": 0} + + def callback(outdata, frames, time_info, status): + if status: + underflows["n"] += 1 + i = pos["i"] + block = audio[:, i:i + frames] + if block.shape[1] == 0: + raise sd.CallbackStop() + reset = args.reset_per_block or (not reset_first["done"]) + reset_first["done"] = True + out = fn(block, sr, reset) + out = np.asarray(out, dtype=np.float32) + m = out.shape[1] + # stereo 출력 정규화 + if out.shape[0] == 1: + outdata[:m, 0] = out[0] + outdata[:m, 1] = out[0] + else: + outdata[:m, 0] = out[0] + outdata[:m, 1] = out[1] + if m < frames: + outdata[m:].fill(0.0) + pos["i"] += frames + + print_info(f"playing {args.input} through {args.plugin} (block={bs}, sr={sr}) — Ctrl-C to stop") + import threading + done = threading.Event() + stream = sd.OutputStream(samplerate=sr, channels=2, dtype="float32", + blocksize=bs, callback=callback, + finished_callback=done.set) + try: + with stream: + done.wait(timeout=args.duration + audio.shape[1] / sr + 1.0) + except KeyboardInterrupt: + pass + if underflows["n"]: + print_info(f"{underflows['n']} PortAudio underflow(s) — real xruns at block={bs}") + else: + print_success("playback complete, no underflows") diff --git a/src/audioman/cli/visualize.py b/src/audioman/cli/visualize.py index 63124a1..843e27a 100644 --- a/src/audioman/cli/visualize.py +++ b/src/audioman/cli/visualize.py @@ -15,7 +15,6 @@ write_notes, write_dense3d, ) -from audioman.i18n import _ # 내장 분석 타입과 설명 @@ -32,35 +31,35 @@ def add_parser(subparsers: argparse._SubParsersAction) -> None: parser = subparsers.add_parser( "visualize", - help=_("Vamp plugin or built-in analysis -> Sonic Visualiser SVL file"), + help="Vamp plugin or built-in analysis -> Sonic Visualiser SVL file", ) - parser.add_argument("input", help=_("Input audio file")) + parser.add_argument("input", help="Input audio file") source = parser.add_mutually_exclusive_group(required=False) source.add_argument( "--plugin", "-p", - help=_("Vamp plugin ID (e.g. qm-vamp-plugins:qm-chromagram)"), + help="Vamp plugin ID (e.g. qm-vamp-plugins:qm-chromagram)", ) source.add_argument( "--builtin", "-b", choices=list(BUILTIN_TYPES.keys()), - help=f"{_('Built-in analysis type')}: {', '.join(BUILTIN_TYPES.keys())}", + help=f"{'Built-in analysis type'}: {', '.join(BUILTIN_TYPES.keys())}", ) - parser.add_argument("-o", "--output", help=_("Output SVL file path (default: auto)")) - parser.add_argument("--output-name", help=_("Vamp plugin output name (for multiple outputs)")) - parser.add_argument("--frame-size", type=int, default=2048, help=_("FFT frame size (default: 2048)")) - parser.add_argument("--hop", type=int, default=512, help=_("Hop size (default: 512)")) - parser.add_argument("--list-plugins", action="store_true", help=_("List installed Vamp plugins")) - parser.add_argument("--plugin-info", help=_("Query plugin output info")) - parser.add_argument("--open", action="store_true", help=_("Open in Sonic Visualiser after creation")) - parser.add_argument("--png", help=_("Also write a PNG spectrogram image (multimodal-friendly) to this path")) - parser.add_argument("--png-only", action="store_true", help=_("Write only PNG, skip SVL output")) - parser.add_argument("--png-width", type=int, default=1600, help=_("PNG width in pixels (default: 1600)")) - parser.add_argument("--png-height", type=int, default=600, help=_("PNG height in pixels (default: 600)")) - parser.add_argument("--png-db-min", type=float, default=-90.0, help=_("PNG color floor in dB (default: -90)")) - parser.add_argument("--png-db-max", type=float, default=0.0, help=_("PNG color ceiling in dB (default: 0)")) - parser.add_argument("--png-fmax", type=float, default=None, help=_("PNG max display frequency Hz (default: Nyquist)")) + parser.add_argument("-o", "--output", help="Output SVL file path (default: auto)") + parser.add_argument("--output-name", help="Vamp plugin output name (for multiple outputs)") + parser.add_argument("--frame-size", type=int, default=2048, help="FFT frame size (default: 2048)") + parser.add_argument("--hop", type=int, default=512, help="Hop size (default: 512)") + parser.add_argument("--list-plugins", action="store_true", help="List installed Vamp plugins") + parser.add_argument("--plugin-info", help="Query plugin output info") + parser.add_argument("--open", action="store_true", help="Open in Sonic Visualiser after creation") + parser.add_argument("--png", help="Also write a PNG spectrogram image (multimodal-friendly) to this path") + parser.add_argument("--png-only", action="store_true", help="Write only PNG, skip SVL output") + parser.add_argument("--png-width", type=int, default=1600, help="PNG width in pixels (default: 1600)") + parser.add_argument("--png-height", type=int, default=600, help="PNG height in pixels (default: 600)") + parser.add_argument("--png-db-min", type=float, default=-90.0, help="PNG color floor in dB (default: -90)") + parser.add_argument("--png-db-max", type=float, default=0.0, help="PNG color ceiling in dB (default: 0)") + parser.add_argument("--png-fmax", type=float, default=None, help="PNG max display frequency Hz (default: Nyquist)") parser.set_defaults(func=run) diff --git a/src/audioman/core/detectors.py b/src/audioman/core/detectors.py new file mode 100644 index 0000000..5b51e84 --- /dev/null +++ b/src/audioman/core/detectors.py @@ -0,0 +1,274 @@ +# Created: 2026-05-11 +# Purpose: 오디오 분석 결과 → 통일된 Finding[] 변환기. +# 기존 core/analysis.py 출력(spectrum_diagnostics, detect_silence 등)을 그대로 받아 +# Finding 객체로 어댑팅하므로 분석 엔진 재구현이 없다. + +from __future__ import annotations + +from pathlib import Path +from typing import Any, Optional + +import numpy as np + +from audioman.core.analysis import SilenceRegion +from audioman.core.findings import ( + Category, + Code, + Finding, + FixHint, + Severity, + Where, +) + + +# --- signal -------------------------------------------------------------- + +def _to_mono(audio: np.ndarray) -> np.ndarray: + if audio.ndim == 2: + return audio.mean(axis=0) + return audio + + +def detect_clipping( + audio: np.ndarray, + sample_rate: int, + *, + file: Optional[str] = None, + threshold: float = 0.999, +) -> list[Finding]: + """Sample peak ≥ threshold 가 연속된 구간(런)을 clipping으로 잡는다.""" + mono = _to_mono(audio) + clipped = np.abs(mono) >= threshold + if not clipped.any(): + return [] + + # 연속 런 찾기 + diffs = np.diff(clipped.astype(np.int8)) + starts = np.where(diffs == 1)[0] + 1 + ends = np.where(diffs == -1)[0] + 1 + if clipped[0]: + starts = np.concatenate(([0], starts)) + if clipped[-1]: + ends = np.concatenate((ends, [len(clipped)])) + + findings: list[Finding] = [] + total_clipped = int(clipped.sum()) + n_runs = len(starts) + peak = float(np.max(np.abs(mono))) + peak_dbfs = 20.0 * np.log10(peak + 1e-30) + + severity = Severity.CRITICAL if total_clipped > sample_rate * 0.001 else Severity.WARN + findings.append(Finding( + code=Code.CLIP_SAMPLE_PEAK_EXCEEDED, + category=Category.SIGNAL, + severity=severity, + where=Where( + file=file, + start_sample=int(starts[0]), + end_sample=int(ends[-1]), + start_sec=round(float(starts[0]) / sample_rate, 6), + end_sec=round(float(ends[-1]) / sample_rate, 6), + ), + measurement={ + "peak_dbfs": round(peak_dbfs, 3), + "samples_clipped": total_clipped, + "run_count": n_runs, + "threshold": threshold, + }, + hint=f"{n_runs} clipping run(s), {total_clipped} samples at/above {threshold}. Reduce input gain or apply a limiter.", + fix_hint=FixHint( + kind="ffmpeg-plan", + args=["loudnorm-or-limit", "--ceiling", "-1.0"], + note="audioman plan loudnorm / limit (Phase B)", + ), + )) + return findings + + +def detect_dc_offset( + audio: np.ndarray, + sample_rate: int, + *, + file: Optional[str] = None, + threshold: float = 0.002, +) -> list[Finding]: + """채널별 평균이 threshold 초과면 DC offset finding.""" + if audio.ndim == 1: + means = [float(audio.mean())] + else: + means = [float(audio[ch].mean()) for ch in range(audio.shape[0])] + + findings: list[Finding] = [] + for ch, m in enumerate(means): + if abs(m) > threshold: + findings.append(Finding( + code=Code.DC_OFFSET_DETECTED, + category=Category.SIGNAL, + severity=Severity.WARN if abs(m) < 0.01 else Severity.CRITICAL, + where=Where(file=file, channel=ch), + measurement={"dc_offset": round(m, 6), "threshold": threshold}, + hint=f"Channel {ch} DC offset {m:+.4f} (threshold {threshold}). Apply high-pass at ~20Hz.", + fix_hint=FixHint( + kind="audioman-fx", + args=["highpass", "--cutoff", "20"], + note="DC removal via 20Hz HPF", + ), + )) + return findings + + +def detect_channel_imbalance( + audio: np.ndarray, + sample_rate: int, + *, + file: Optional[str] = None, + db_threshold: float = 3.0, +) -> list[Finding]: + """스테레오 채널 RMS 차가 db_threshold 초과면 finding.""" + if audio.ndim != 2 or audio.shape[0] != 2: + return [] + rms_l = float(np.sqrt(np.mean(audio[0] ** 2))) + rms_r = float(np.sqrt(np.mean(audio[1] ** 2))) + if rms_l < 1e-6 or rms_r < 1e-6: + return [] + diff_db = 20.0 * np.log10(rms_l / rms_r) + if abs(diff_db) < db_threshold: + return [] + return [Finding( + code=Code.CHANNEL_IMBALANCE, + category=Category.SIGNAL, + severity=Severity.WARN if abs(diff_db) < 6.0 else Severity.CRITICAL, + where=Where(file=file), + measurement={ + "rms_left_db": round(20.0 * np.log10(rms_l + 1e-30), 2), + "rms_right_db": round(20.0 * np.log10(rms_r + 1e-30), 2), + "diff_db": round(diff_db, 2), + "threshold_db": db_threshold, + }, + hint=f"Left/right RMS differ by {diff_db:+.2f} dB. Investigate panning, mono summing or one-sided signal.", + fix_hint=FixHint(kind="manual", args=[], note="Inspect track in DAW"), + )] + + +def silence_to_findings( + silence_regions: list[SilenceRegion], + audio_length_samples: int, + sample_rate: int, + *, + file: Optional[str] = None, + inner_min_sec: float = 1.0, +) -> list[Finding]: + """SilenceRegion 리스트를 leading/trailing/inner finding으로 분류.""" + if not silence_regions: + return [] + + findings: list[Finding] = [] + head_thresh = int(0.05 * sample_rate) + tail_thresh = audio_length_samples - int(0.05 * sample_rate) + + for region in silence_regions: + if region.start_sample <= head_thresh: + code = Code.SILENCE_LEADING + sev = Severity.INFO + hint = f"Leading silence {region.duration_sec:.3f}s. `audioman plan cut-silence --strategy leading` will trim it." + elif region.end_sample >= tail_thresh: + code = Code.SILENCE_TRAILING + sev = Severity.INFO + hint = f"Trailing silence {region.duration_sec:.3f}s. `audioman plan cut-silence --strategy trailing` will trim it." + elif region.duration_sec >= inner_min_sec: + code = Code.SILENCE_INNER + sev = Severity.WARN + hint = f"Inner silence gap {region.duration_sec:.3f}s. Use `audioman plan cut-silence --strategy all` to remove." + else: + continue # 짧은 inner silence는 노이즈로 간주, finding 안 만듦 + + findings.append(Finding( + code=code, + category=Category.SIGNAL, + severity=sev, + where=Where( + file=file, + start_sample=region.start_sample, + end_sample=region.end_sample, + start_sec=round(region.start_sample / sample_rate, 6), + end_sec=round(region.end_sample / sample_rate, 6), + ), + measurement={"duration_sec": round(region.duration_sec, 6)}, + hint=hint, + fix_hint=FixHint( + kind="ffmpeg-plan", + args=["cut-silence", "--strategy", code.value.split("_")[-1].lower()], + note="Phase B: audioman plan cut-silence", + ), + )) + return findings + + +def detect_signal_findings( + audio: np.ndarray, + sample_rate: int, + *, + file: Optional[str] = None, +) -> list[Finding]: + """signal 카테고리 detector 일괄.""" + findings: list[Finding] = [] + findings.extend(detect_clipping(audio, sample_rate, file=file)) + findings.extend(detect_dc_offset(audio, sample_rate, file=file)) + findings.extend(detect_channel_imbalance(audio, sample_rate, file=file)) + return findings + + +# --- spectral ------------------------------------------------------------ + +def spectrum_to_findings( + spectrum_dict: dict[str, Any], + *, + file: Optional[str] = None, +) -> list[Finding]: + """core/analysis.py:spectrum_diagnostics() 출력 → spectral Finding[].""" + findings: list[Finding] = [] + + # MAINS_HUM + for h in spectrum_dict.get("hum_check", []) or []: + if not h.get("is_hum"): + continue + freq = h.get("frequency_hz") + snr = h.get("snr_db") + sev = Severity.CRITICAL if (snr is not None and snr > 25.0) else Severity.WARN + findings.append(Finding( + code=Code.MAINS_HUM, + category=Category.SPECTRAL, + severity=sev, + where=Where(file=file, frequency_hz=freq), + measurement={"frequency_hz": freq, "snr_db": snr}, + hint=f"Mains hum at {freq} Hz (SNR {snr:+.1f} dB). Apply a notch filter / RX De-hum.", + fix_hint=FixHint( + kind="audioman-process", + args=["--plugin", "dehum", "--", f"notch_frequency={freq}"], + ), + )) + + # HF_NOISE_FLOOR — hf_slope가 양수로 크면 hiss 후보 (단, 발화/음악 자체일 수도 있어 INFO) + sl = spectrum_dict.get("hf_slope") or {} + slope = sl.get("slope_db") + if slope is not None and slope > -3.0 and sl.get("high_db") is not None and sl["high_db"] > -50.0: + # 고역이 mid 대비 거의 안 빠짐 → hiss/노이즈 floor 가능성 + findings.append(Finding( + code=Code.HF_NOISE_FLOOR, + category=Category.SPECTRAL, + severity=Severity.INFO, + where=Where(file=file), + measurement={ + "mid_db": sl.get("mid_db"), + "high_db": sl.get("high_db"), + "slope_db": slope, + }, + hint=f"Flat/elevated HF energy (slope {slope:+.1f} dB). May indicate hiss or wideband noise.", + fix_hint=FixHint( + kind="audioman-process", + args=["--plugin", "denoise"], + note="Optional — verify against content first", + ), + )) + + return findings diff --git a/src/audioman/core/discontinuity.py b/src/audioman/core/discontinuity.py new file mode 100644 index 0000000..cd5c118 --- /dev/null +++ b/src/audioman/core/discontinuity.py @@ -0,0 +1,235 @@ +# Created: 2026-05-31 +# Purpose: 블록 스트리밍 출력에서 클릭/불연속을 검출하고 triage 한다. +# +# 동기: 실제 DAW(Ableton 등)에서 클릭이 나는데, audioman의 단순 블록 스트리밍 +# 비교만으로는 어디서 왜 나는지 triage가 안 됐다. 이 모듈은 두 방향으로 잡는다. +# +# 1) 출력 신호 자체의 불연속 검출 (기준 신호 없이) — sample diff spike. +# 그 위치가 블록 경계(block_size 배수)에 정렬되면 "스트리밍 상태 단절"로 +# 분류한다. 이것이 reset-per-block / lookahead 끊김 / 파라미터 스무딩 끊김의 +# 서명이다. +# +# 2) null test — offline(ground truth) vs streamed 출력의 차이. 차이가 큰 +# 구간 = 블록 스트리밍이 오프라인 렌더와 갈리는 지점. +# +# 모든 결과는 core/findings.Finding 으로 반환되어 기존 envelope/스키마에 합류한다. + +from __future__ import annotations + +from typing import Optional + +import numpy as np + +from audioman.core.findings import ( + Category, + Code, + Finding, + FixHint, + Severity, + Where, +) + + +def _to_mono(audio: np.ndarray) -> np.ndarray: + if audio.ndim == 2: + return audio.mean(axis=0) + return audio + + +def detect_nonfinite( + audio: np.ndarray, + sample_rate: int, + *, + file: Optional[str] = None, +) -> list[Finding]: + """NaN/Inf 샘플 검출 — 플러그인이 발산했거나 미초기화 버퍼를 낸 경우.""" + bad = ~np.isfinite(audio) + if not bad.any(): + return [] + flat = bad.any(axis=0) if audio.ndim == 2 else bad + idx = np.where(flat)[0] + return [Finding( + code=Code.NONFINITE_SAMPLES, + category=Category.PLUGIN, + severity=Severity.CRITICAL, + where=Where( + file=file, + start_sample=int(idx[0]), + end_sample=int(idx[-1]) + 1, + start_sec=round(float(idx[0]) / sample_rate, 6), + end_sec=round(float(idx[-1] + 1) / sample_rate, 6), + ), + measurement={"nonfinite_samples": int(flat.sum())}, + hint=(f"{int(flat.sum())} non-finite (NaN/Inf) sample(s). Plugin diverged or " + f"emitted uninitialized buffer — likely state/reset bug at a block edge."), + fix_hint=FixHint(kind="manual", args=[], note="Check plugin reset/init at stream start"), + )] + + +def detect_discontinuities( + audio: np.ndarray, + sample_rate: int, + *, + block_size: Optional[int] = None, + file: Optional[str] = None, + sigma: float = 8.0, + min_jump: float = 0.02, + edge_tolerance: int = 1, +) -> list[Finding]: + """샘플 간 1차 차분 spike(클릭)를 검출하고 블록 경계 정렬 여부로 분류한다. + + 알고리즘: + d[n] = x[n] - x[n-1]. 클릭은 광대역 임펄스라 |d|가 국소적으로 급증한다. + threshold = max(min_jump, sigma * MAD-based-std(|d|)). robust 통계로 + 음악 신호의 정상 트랜지언트와 구분(MAD는 outlier에 둔감). + + block_size가 주어지면 각 spike 위치 n이 block_size 배수 ± edge_tolerance에 + 들어가는지 본다. 정렬되면 BLOCK_ALIGNED → 스트리밍 상태 단절 서명. + 정렬 안 되면 콘텐츠 자체의 클릭(소스 결함)일 가능성. + + Args: + sigma: robust std 대비 임계 배수. 높을수록 보수적(오검 적음). + min_jump: 절대 최소 점프(선형 진폭). 조용한 구간 false positive 방지. + edge_tolerance: 블록 경계 정렬 판정 허용 오차(samples). + """ + mono = _to_mono(audio).astype(np.float64) + if mono.size < 4: + return [] + + d = np.abs(np.diff(mono)) + # MAD 기반 robust scale (정상 트랜지언트에 둔감) + med = np.median(d) + mad = np.median(np.abs(d - med)) + robust_std = 1.4826 * mad if mad > 0 else float(np.std(d)) + threshold = max(min_jump, med + sigma * robust_std) + + spike_idx = np.where(d > threshold)[0] + 1 # x[n] - x[n-1] → 위치 n + if spike_idx.size == 0: + return [] + + # 인접 spike(같은 클릭의 여러 샘플)를 하나의 이벤트로 묶기 + events: list[tuple[int, int, float]] = [] # (start, end, peak_jump) + run_start = spike_idx[0] + prev = spike_idx[0] + peak = d[spike_idx[0] - 1] + for s in spike_idx[1:]: + if s - prev <= 2: + peak = max(peak, d[s - 1]) + prev = s + else: + events.append((int(run_start), int(prev), float(peak))) + run_start = s + prev = s + peak = d[s - 1] + events.append((int(run_start), int(prev), float(peak))) + + findings: list[Finding] = [] + for start, end, peak_jump in events: + aligned = False + nearest_edge = None + if block_size: + r = start % block_size + dist = min(r, block_size - r) + if dist <= edge_tolerance: + aligned = True + nearest_edge = int(round(start / block_size)) * block_size + + peak_db = 20.0 * np.log10(peak_jump + 1e-30) + if aligned: + sev = Severity.CRITICAL + hint = (f"Discontinuity at sample {start} aligns to block edge " + f"(block_size={block_size}, edge≈{nearest_edge}). Signature of a " + f"streaming state break — plugin reset/lookahead/parameter-smoothing " + f"discontinuity at the buffer boundary. This is the click you hear in the DAW.") + else: + sev = Severity.WARN + hint = (f"Discontinuity (jump {peak_jump:.4f}, {peak_db:+.1f} dB) at sample {start}, " + f"not block-aligned — likely a click in the source content, not a streaming bug.") + + findings.append(Finding( + code=Code.CLICK_DENSITY, + category=Category.SIGNAL, + severity=sev, + where=Where( + file=file, + start_sample=start, + end_sample=end + 1, + start_sec=round(start / sample_rate, 6), + end_sec=round((end + 1) / sample_rate, 6), + ), + measurement={ + "peak_jump": round(peak_jump, 6), + "peak_jump_db": round(peak_db, 2), + "threshold": round(threshold, 6), + "block_aligned": aligned, + "block_size": block_size, + "nearest_block_edge": nearest_edge, + }, + hint=hint, + fix_hint=FixHint( + kind="manual", + args=[], + note=("Stream with reset_per_block=False / verify PDC; ensure host resets " + "plugin only at transport start" if aligned else "Inspect source audio"), + ), + )) + + return findings + + +def null_test( + reference: np.ndarray, + candidate: np.ndarray, + sample_rate: int, + *, + latency_samples: int = 0, + file: Optional[str] = None, + threshold_db: float = -60.0, +) -> list[Finding]: + """reference(오프라인 렌더) 대비 candidate(스트리밍) 차이를 측정한다. + + latency_samples만큼 candidate를 당겨 정렬(PDC 보상)한 뒤 차이를 본다. + 최대 차이가 threshold_db를 넘으면 finding. 넘지 않으면 빈 리스트(= 일치). + + Returns: 차이가 유의미하면 길이 1, 아니면 0. + """ + ref = _to_mono(reference).astype(np.float64) + cand = _to_mono(candidate).astype(np.float64) + + if latency_samples > 0: + cand = cand[latency_samples:] + n = min(len(ref), len(cand)) + if n == 0: + return [] + diff = ref[:n] - cand[:n] + + max_abs = float(np.max(np.abs(diff))) + max_db = 20.0 * np.log10(max_abs + 1e-30) + rms = float(np.sqrt(np.mean(diff ** 2))) + rms_db = 20.0 * np.log10(rms + 1e-30) + + if max_db < threshold_db: + return [] + + peak_idx = int(np.argmax(np.abs(diff))) + sev = Severity.CRITICAL if max_db > -20.0 else Severity.WARN + return [Finding( + code=Code.SAMPLE_DROPOUT, + category=Category.PLUGIN, + severity=sev, + where=Where( + file=file, + start_sample=peak_idx, + start_sec=round(peak_idx / sample_rate, 6), + ), + measurement={ + "max_diff_db": round(max_db, 2), + "rms_diff_db": round(rms_db, 2), + "latency_compensated_samples": latency_samples, + "compared_samples": n, + }, + hint=(f"Streamed output diverges from offline render by up to {max_db:+.1f} dB " + f"(RMS {rms_db:+.1f} dB) at sample {peak_idx}. Block streaming is NOT " + f"bit-equivalent to the offline bounce — investigate state continuity / PDC."), + fix_hint=FixHint(kind="manual", args=[], note="Compare block sizes; check latency reporting"), + )] diff --git a/src/audioman/core/findings.py b/src/audioman/core/findings.py new file mode 100644 index 0000000..9b874b7 --- /dev/null +++ b/src/audioman/core/findings.py @@ -0,0 +1,177 @@ +# Created: 2026-05-11 +# Purpose: Audio fault 통합 스키마 — LLM이 audioman 분석 결과를 +# 단일 finding[] 배열로 소비할 수 있게 한다. +# JSONSchema 발행: src/audioman/schemas/finding.v1.json +# +# Category 정의: +# - signal : 시간영역 결함 (clipping, DC offset, silence, channel imbalance) +# - spectral : 주파수영역/지각 결함 (hum, hiss, harshness, LUFS, click density) +# - plugin : 플러그인 처리 전/후 결함 (true-peak, gain stage, NaN/inf) +# - container: 컨테이너/인코딩 결함 (codec, channel layout, mapping_family 등) + +from __future__ import annotations + +from dataclasses import asdict, dataclass, field +from enum import Enum +from typing import Any, Optional + + +SCHEMA_URI = "audioman://schema/finding.v1.json" + + +class Category(str, Enum): + SIGNAL = "signal" + SPECTRAL = "spectral" + PLUGIN = "plugin" + CONTAINER = "container" + + +class Severity(str, Enum): + INFO = "info" + WARN = "warn" + CRITICAL = "critical" + + @property + def rank(self) -> int: + return {"info": 0, "warn": 1, "critical": 2}[self.value] + + +# 기계 판독용 안정 코드. 새 코드 추가는 가능하나 기존 코드 제거/변경 금지. +class Code(str, Enum): + # signal + CLIP_SAMPLE_PEAK_EXCEEDED = "CLIP_SAMPLE_PEAK_EXCEEDED" + DC_OFFSET_DETECTED = "DC_OFFSET_DETECTED" + CHANNEL_IMBALANCE = "CHANNEL_IMBALANCE" + SILENCE_LEADING = "SILENCE_LEADING" + SILENCE_TRAILING = "SILENCE_TRAILING" + SILENCE_INNER = "SILENCE_INNER" + SAMPLE_DROPOUT = "SAMPLE_DROPOUT" + # spectral + MAINS_HUM = "MAINS_HUM" + HF_NOISE_FLOOR = "HF_NOISE_FLOOR" + CLICK_DENSITY = "CLICK_DENSITY" + LUFS_OUT_OF_RANGE = "LUFS_OUT_OF_RANGE" + HARSHNESS_HIGH = "HARSHNESS_HIGH" + # plugin + TRUE_PEAK_EXCEEDED = "TRUE_PEAK_EXCEEDED" + GAIN_STAGE_LEAK = "GAIN_STAGE_LEAK" + NONFINITE_SAMPLES = "NONFINITE_SAMPLES" + # container + OPUS_MAPPING_FAMILY_MISSING = "OPUS_MAPPING_FAMILY_MISSING" + SAMPLE_RATE_MISMATCH = "SAMPLE_RATE_MISMATCH" + UNSUPPORTED_CODEC = "UNSUPPORTED_CODEC" + CHANNEL_LAYOUT_LOSS = "CHANNEL_LAYOUT_LOSS" + + +@dataclass +class Where: + """Finding이 가리키는 위치. 모든 필드는 옵션.""" + file: Optional[str] = None + track: Optional[int] = None + start_sample: Optional[int] = None + end_sample: Optional[int] = None + start_sec: Optional[float] = None + end_sec: Optional[float] = None + channel: Optional[int] = None + frequency_hz: Optional[float] = None + + def to_dict(self) -> dict[str, Any]: + return {k: v for k, v in asdict(self).items() if v is not None} + + +@dataclass +class FixHint: + """이 finding을 해결하기 위한 audioman/외부 명령 힌트.""" + kind: str # "ffmpeg-plan" | "audioman-fx" | "audioman-process" | "manual" + args: list[str] = field(default_factory=list) + note: Optional[str] = None + + def to_dict(self) -> dict[str, Any]: + d: dict[str, Any] = {"kind": self.kind, "args": list(self.args)} + if self.note is not None: + d["note"] = self.note + return d + + +@dataclass +class Finding: + code: Code + category: Category + severity: Severity + hint: str = "" + where: Where = field(default_factory=Where) + measurement: dict[str, Any] = field(default_factory=dict) + fix_hint: Optional[FixHint] = None + id: Optional[str] = None # 자동 생성 가능 + + def to_dict(self) -> dict[str, Any]: + d: dict[str, Any] = { + "id": self.id or self._auto_id(), + "code": self.code.value, + "category": self.category.value, + "severity": self.severity.value, + "where": self.where.to_dict(), + "measurement": dict(self.measurement), + "hint": self.hint, + } + if self.fix_hint is not None: + d["fix_hint"] = self.fix_hint.to_dict() + return d + + def _auto_id(self) -> str: + # category-code의 hash가 아닌, 안정적인 슬러그 + slug = self.code.value.lower().replace("_", "-") + return slug + + +def filter_findings( + findings: list[Finding], + *, + categories: Optional[set[str]] = None, + min_severity: Severity = Severity.INFO, +) -> list[Finding]: + """카테고리/심각도 필터.""" + result = [] + for f in findings: + if categories is not None and f.category.value not in categories: + continue + if f.severity.rank < min_severity.rank: + continue + result.append(f) + return result + + +def envelope( + findings: list[Finding], + *, + file: Optional[str] = None, + audioman_version: Optional[str] = None, + extra: Optional[dict[str, Any]] = None, +) -> dict[str, Any]: + """findings[]를 LLM-friendly JSON envelope으로 감싼다.""" + from audioman import __version__ + + out: dict[str, Any] = { + "$schema": SCHEMA_URI, + "audioman_version": audioman_version or __version__, + "findings": [f.to_dict() for f in findings], + "summary": { + "total": len(findings), + "by_severity": { + "info": sum(1 for f in findings if f.severity is Severity.INFO), + "warn": sum(1 for f in findings if f.severity is Severity.WARN), + "critical": sum(1 for f in findings if f.severity is Severity.CRITICAL), + }, + "by_category": { + "signal": sum(1 for f in findings if f.category is Category.SIGNAL), + "spectral": sum(1 for f in findings if f.category is Category.SPECTRAL), + "plugin": sum(1 for f in findings if f.category is Category.PLUGIN), + "container": sum(1 for f in findings if f.category is Category.CONTAINER), + }, + }, + } + if file is not None: + out["file"] = file + if extra: + out.update(extra) + return out diff --git a/src/audioman/core/rt_bench.py b/src/audioman/core/rt_bench.py new file mode 100644 index 0000000..e216fbf --- /dev/null +++ b/src/audioman/core/rt_bench.py @@ -0,0 +1,89 @@ +# Created: 2026-05-31 +# Purpose: 블록 스트리밍의 실시간 CPU 부하 벤치마크. +# +# DAW에서 플러그인이 "무겁다"는 건 블록당 처리시간이 그 블록의 실시간 길이 +# (block_size / sample_rate)를 잡아먹는다는 뜻이다. 처리시간 > 마감이면 xrun +# (오디오 드롭아웃/클릭)이 난다. 이 모듈은 core/streaming의 BlockTiming을 받아 +# RT factor 분포, p50/p95/p99/max, xrun 수, 추정 동시 트랙 수를 산출한다. +# +# 단일 블록 timing은 OS 스케줄링 지터에 민감하므로, percentile과 worst-case를 +# 함께 본다 (DAW에서 중요한 건 평균이 아니라 worst-case — 한 블록만 늦어도 클릭). + +from __future__ import annotations + +from dataclasses import dataclass, asdict +from typing import Any + +import numpy as np + +from audioman.core.streaming import StreamResult + + +@dataclass +class RTBenchReport: + """단일 (플러그인, 블록 크기) 조합의 실시간 성능 리포트.""" + block_size: int + sample_rate: int + blocks: int + audio_seconds: float + + rt_factor_mean: float # 전체 처리시간 / 전체 오디오 길이 + rt_factor_p50: float # 블록별 RT factor 중앙값 + rt_factor_p95: float + rt_factor_p99: float + rt_factor_max: float # worst-case — DAW 클릭 여부를 좌우 + + block_ms_mean: float # 블록당 처리시간 평균 (ms) + block_ms_max: float + deadline_ms: float # block_size / sr * 1000 + + xruns: int # rt_factor > 1.0 인 블록 수 + xrun_ratio: float # xruns / blocks + est_max_tracks: int # 1/p99_rt_factor — 동시에 돌릴 수 있는 트랙 추정 + + def to_dict(self) -> dict[str, Any]: + return asdict(self) + + +def benchmark(result: StreamResult, *, warmup_blocks: int = 1) -> RTBenchReport: + """StreamResult의 블록 타이밍을 RT 성능 리포트로 집계한다. + + Args: + warmup_blocks: 첫 N개 블록을 제외(첫 블록은 JIT/캐시 워밍업으로 느려 + worst-case를 오염시킨다). 블록 수가 충분할 때만 적용. + """ + timings = result.timings + if warmup_blocks > 0 and len(timings) > warmup_blocks * 2: + timings = timings[warmup_blocks:] + if not timings: + raise ValueError("StreamResult has no block timings to benchmark") + + rt = np.array([t.rt_factor for t in timings], dtype=np.float64) + proc_ms = np.array([t.process_sec * 1000.0 for t in timings], dtype=np.float64) + deadline_ms = result.block_size / result.sample_rate * 1000.0 + + n = result.audio.shape[1] if result.audio.ndim == 2 else len(result.audio) + audio_sec = n / result.sample_rate if result.sample_rate else 0.0 + total_proc = sum(t.process_sec for t in result.timings) + + p99 = float(np.percentile(rt, 99)) + xruns = int(np.sum(rt > 1.0)) + + return RTBenchReport( + block_size=result.block_size, + sample_rate=result.sample_rate, + blocks=len(timings), + audio_seconds=round(audio_sec, 4), + rt_factor_mean=round(total_proc / audio_sec, 6) if audio_sec > 0 else float("inf"), + rt_factor_p50=round(float(np.percentile(rt, 50)), 6), + rt_factor_p95=round(float(np.percentile(rt, 95)), 6), + rt_factor_p99=round(p99, 6), + rt_factor_max=round(float(np.max(rt)), 6), + block_ms_mean=round(float(np.mean(proc_ms)), 4), + block_ms_max=round(float(np.max(proc_ms)), 4), + deadline_ms=round(deadline_ms, 4), + xruns=xruns, + xrun_ratio=round(xruns / len(timings), 4), + # p99 기준으로 안전하게 돌릴 수 있는 동시 트랙 수 (worst-case 여유) + est_max_tracks=int(1.0 / p99) if p99 > 0 else 0, + ) diff --git a/src/audioman/core/streaming.py b/src/audioman/core/streaming.py new file mode 100644 index 0000000..f9a7127 --- /dev/null +++ b/src/audioman/core/streaming.py @@ -0,0 +1,207 @@ +# Created: 2026-05-31 +# Purpose: DAW 재생 환경을 재현하는 블록 단위 결정적 처리 엔진. +# +# 실제 DAW(Ableton 등)는 오디오를 고정 블록 크기(128/256/512/1024 samples)로 +# 콜백마다 plugin.process()를 호출하며, 블록 사이에 플러그인 내부 상태 +# (필터 히스토리, lookahead 버퍼, 파라미터 스무딩)가 연속 유지된다. +# +# audioman의 기존 process_file은 전체 버퍼를 한 번에 통과시킨다(오프라인 렌더). +# 이 모듈은 그 둘의 차이를 노출한다: +# - render_offline: whole-buffer 1회 process (ground truth) +# - render_streamed: 고정 블록으로 연속 process(reset=False) — 올바른 DAW 재현 +# - reset_per_block=True 옵션: 매 블록 reset (상태 끊김 시뮬레이션 = 클릭 재현) +# +# pedalboard 실측(0.9.22): reset=False 연속 호출은 내부 상태를 유지하며, +# Reverb 등은 whole-buffer와 비트 단위로 일치(-600dB). 그러나 Delay/Chorus/ +# Compressor는 블록 처리 자체가 오프라인과 미세하게 달라질 수 있어, 이 차이를 +# discontinuity 모듈이 triage 한다. + +from __future__ import annotations + +import logging +import time +from dataclasses import dataclass, field +from typing import Any, Callable, Optional + +import numpy as np + +logger = logging.getLogger(__name__) + + +# DAW에서 흔히 쓰이는 블록 크기. 벤치마크 sweep 기본값. +COMMON_BLOCK_SIZES = (64, 128, 256, 512, 1024, 2048) + + +@dataclass +class BlockTiming: + """단일 블록의 처리 시간 기록.""" + index: int + n_samples: int + process_sec: float # 이 블록을 처리하는 데 걸린 실시간 + deadline_sec: float # n_samples / sample_rate (실시간 마감) + + @property + def rt_factor(self) -> float: + """처리시간 / 마감. >1.0 이면 실시간 추종 실패(xrun).""" + return self.process_sec / self.deadline_sec if self.deadline_sec > 0 else float("inf") + + @property + def is_xrun(self) -> bool: + return self.rt_factor > 1.0 + + +@dataclass +class StreamResult: + """블록 스트리밍 처리 결과.""" + audio: np.ndarray # (channels, samples) 처리된 출력 + sample_rate: int + block_size: int + reset_per_block: bool + timings: list[BlockTiming] = field(default_factory=list) + total_process_sec: float = 0.0 + + # 처리 모드 식별 (디버깅/리포트용) + mode: str = "streamed" # "offline" | "streamed" + + def to_summary(self) -> dict[str, Any]: + """오디오 데이터를 뺀 메트릭 요약 (JSON 직렬화용).""" + n = self.audio.shape[1] if self.audio.ndim == 2 else len(self.audio) + audio_sec = n / self.sample_rate if self.sample_rate else 0.0 + return { + "mode": self.mode, + "block_size": self.block_size, + "reset_per_block": self.reset_per_block, + "sample_rate": self.sample_rate, + "blocks": len(self.timings), + "audio_seconds": round(audio_sec, 4), + "total_process_sec": round(self.total_process_sec, 6), + "realtime_factor": round(self.total_process_sec / audio_sec, 4) if audio_sec > 0 else None, + } + + +# 처리 함수 시그니처: (block (channels, n), sample_rate, reset: bool) -> block +ProcessFn = Callable[[np.ndarray, int, bool], np.ndarray] + + +def _as_2d(audio: np.ndarray) -> np.ndarray: + """(samples,) → (1, samples), float32 보장.""" + if audio.ndim == 1: + audio = audio.reshape(1, -1) + if audio.dtype != np.float32: + audio = audio.astype(np.float32) + return audio + + +def render_offline( + audio: np.ndarray, + sample_rate: int, + process_fn: ProcessFn, +) -> StreamResult: + """전체 버퍼를 한 번에 처리 — DAW 'freeze/bounce'에 해당하는 ground truth. + + process_fn은 (audio, sr, reset)을 받는다. 오프라인은 reset=True 1회. + """ + audio = _as_2d(audio) + start = time.perf_counter() + out = process_fn(audio, sample_rate, True) + elapsed = time.perf_counter() - start + out = _as_2d(np.asarray(out)) + return StreamResult( + audio=out, + sample_rate=sample_rate, + block_size=audio.shape[1], + reset_per_block=False, + timings=[BlockTiming(0, audio.shape[1], elapsed, audio.shape[1] / sample_rate)], + total_process_sec=elapsed, + mode="offline", + ) + + +def render_streamed( + audio: np.ndarray, + sample_rate: int, + process_fn: ProcessFn, + block_size: int = 512, + reset_per_block: bool = False, + reset_first: bool = True, +) -> StreamResult: + """고정 블록 크기로 연속 처리 — DAW 실시간 콜백 재현. + + Args: + block_size: 블록당 샘플 수 (DAW 버퍼 크기). + reset_per_block: True면 매 블록 plugin reset — 상태 단절 버그 시뮬레이션. + 실제 DAW는 False(연속)지만, 일부 잘못 구현된 플러그인/ + 호스트는 블록마다 상태가 끊겨 경계 클릭을 낸다. + reset_first: True(기본)면 첫 블록을 reset 상태에서 시작 — DAW가 재생 시작 시 + 플러그인을 reset하는 동작 재현. False면 process_fn에 남아 있는 + 이전 상태(tail)를 물고 시작 → 재생 시작 지점 클릭 재현. + 단, reset_per_block=True면 이 값과 무관하게 매 블록 reset. + + 각 블록의 처리 시간을 perf_counter로 측정해 RT factor / xrun을 산출한다. + """ + if block_size < 1: + raise ValueError(f"block_size must be >= 1, got {block_size}") + + audio = _as_2d(audio) + n_ch, n = audio.shape + + out_chunks: list[np.ndarray] = [] + timings: list[BlockTiming] = [] + total = 0.0 + + idx = 0 + pos = 0 + while pos < n: + block = audio[:, pos:pos + block_size] + bn = block.shape[1] + # 첫 블록만 reset_first 적용, 이후는 reset_per_block 따름 + reset = reset_per_block or (idx == 0 and reset_first) + start = time.perf_counter() + processed = process_fn(block, sample_rate, reset) + elapsed = time.perf_counter() - start + + processed = _as_2d(np.asarray(processed)) + out_chunks.append(processed) + timings.append(BlockTiming( + index=idx, + n_samples=bn, + process_sec=elapsed, + deadline_sec=bn / sample_rate, + )) + total += elapsed + pos += block_size + idx += 1 + + out = np.concatenate(out_chunks, axis=1) if out_chunks else np.zeros((n_ch, 0), dtype=np.float32) + + return StreamResult( + audio=out, + sample_rate=sample_rate, + block_size=block_size, + reset_per_block=reset_per_block, + timings=timings, + total_process_sec=total, + mode="streamed", + ) + + +def make_pedalboard_process_fn(board) -> ProcessFn: + """pedalboard.Pedalboard 또는 단일 Plugin을 ProcessFn으로 감싼다. + + pedalboard의 process(audio, sr, reset=...)를 그대로 호출한다. reset=False면 + 이전 블록의 내부 상태를 유지하므로 올바른 스트리밍이 된다. + """ + def fn(block: np.ndarray, sr: int, reset: bool) -> np.ndarray: + return board.process(block, sr, reset=reset) + return fn + + +def make_wrapper_process_fn(wrapper) -> ProcessFn: + """VST3PluginWrapper를 ProcessFn으로 감싼다. + + reset 플래그를 wrapper.process로 그대로 전달한다. 스트리밍(reset=False)에서는 + 블록 사이 상태가 연속 유지되고, reset=True면 매 호출마다 상태가 끊긴다. + """ + def fn(block: np.ndarray, sr: int, reset: bool) -> np.ndarray: + return wrapper.process(block, sr, reset=reset) + return fn diff --git a/src/audioman/i18n.py b/src/audioman/i18n.py deleted file mode 100644 index 9f7255c..0000000 --- a/src/audioman/i18n.py +++ /dev/null @@ -1,184 +0,0 @@ -# Created: 2026-03-25 -# Purpose: i18n 지원 — locale 감지, 기본 영어 + 한국어, 확장 가능한 구조 -# Dependencies: locale, os (stdlib only) - -from __future__ import annotations - -import locale -import os -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - pass - -# --------------------------------------------------------------------------- -# 메시지 카탈로그 -# 기본값(영어)은 키 자체이므로 영어 카탈로그는 불필요. -# 새 언어 추가: CATALOGS["lang_code"] = { "english string": "translated" } -# --------------------------------------------------------------------------- - -CATALOGS: dict[str, dict[str, str]] = {} - -# --- 한국어 --- -CATALOGS["ko"] = { - # app.py - "Cross-platform CLI wrapper for VST3/AU audio plugins": "VST3/AU 오디오 플러그인을 위한 크로스플랫폼 CLI 래퍼", - "JSON output mode": "JSON 출력 모드", - "Verbose logging": "상세 로깅", - "Available commands": "사용 가능한 명령", - # scan - "Scan system for VST3/AU plugins": "시스템에서 VST3/AU 플러그인 검색", - "Additional search paths": "추가 검색 경로", - "Ignore cache and rescan": "캐시 무시하고 재스캔", - # list - "List registered plugins": "등록된 플러그인 목록", - "Format filter": "포맷 필터", - "Vendor filter": "벤더 필터", - # info - "Plugin details + parameter list": "플러그인 상세 정보 + 파라미터 목록", - "Plugin name (short_name or alias)": "플러그인 이름 (short_name 또는 별칭)", - # process - "Process audio with a single plugin": "단일 플러그인으로 오디오 처리", - "Input audio file or directory": "입력 오디오 파일 또는 디렉토리", - "Plugin name": "플러그인 이름", - "Parameter (key=value)": "파라미터 (key=value)", - "Output file or directory": "출력 파일 또는 디렉토리", - "Number of passes (2=adaptive learning multi-pass)": "처리 횟수 (2=adaptive 학습용 멀티패스)", - "Include subdirectories (batch)": "하위 디렉토리 포함 (배치)", - "Output filename suffix (batch)": "출력 파일명 접미사 (배치)", - "Show plan without executing": "실행하지 않고 계획만 표시", - "Number of parallel workers (default: 1)": "병렬 처리 워커 수 (기본: 1)", - # chain - "Process audio through multiple plugins sequentially": "다중 플러그인 순차 처리", - "Processing chain (e.g. 'dehum:notch_frequency=60,declick,denoise:noise_reduction_db=15')": "처리 체인 (예: 'dehum:notch_frequency=60,declick,denoise:noise_reduction_db=15')", - # preset - "Preset management": "프리셋 관리", - "Save preset": "프리셋 저장", - "Preset name": "프리셋 이름", - "Description": "설명", - "Show preset info": "프리셋 정보 표시", - "Plugin name (optional)": "플러그인 이름 (선택)", - "List presets": "프리셋 목록", - "Plugin filter": "플러그인 필터", - "Delete preset": "프리셋 삭제", - # dump - "Dump plugin parameter state to JSON/JSONL": "플러그인 파라미터 상태를 JSON/JSONL로 덤프", - "Plugin name (omit for --all)": "플러그인 이름 (생략 시 --all 필요)", - "Set parameter before dump (key=value)": "덤프 전 파라미터 설정 (key=value)", - "Preset name (apply before dump)": "프리셋 이름 (적용 후 덤프)", - "Save dump as preset": "덤프 결과를 프리셋으로 저장", - "Dump all plugins as JSONL": "모든 플러그인 JSONL 덤프", - "Plugin name filter (with --all)": "플러그인 이름 필터 (--all과 함께)", - "Format filter (with --all)": "포맷 필터 (--all과 함께)", - "JSONL output file (default: stdout)": "JSONL 출력 파일 (기본: stdout)", - # analyze - "Audio analysis (RMS, spectral entropy, silence detection, etc.)": "오디오 분석 (RMS, spectral entropy, silence 감지 등)", - "Per-frame detailed output": "프레임 단위 상세 출력", - "Frame size (default: 2048)": "프레임 크기 (기본: 2048)", - "Hop size (default: 512)": "홉 크기 (기본: 512)", - "Silence detection threshold dB (default: -40)": "Silence 감지 임계값 dB (기본: -40)", - "Show ASCII waveform": "ASCII 웨이브폼 표시", - "Waveform width (default: 80)": "웨이브폼 가로 폭 (기본: 80)", - "Waveform height (default: 16)": "웨이브폼 세로 높이 (기본: 16)", - "Waveform mode (default: peak)": "웨이브폼 모드 (기본: peak)", - # fx - "Built-in DSP effects (fade, trim, normalize, gate, gain)": "내장 DSP 이펙트 (fade, trim, normalize, gate, gain)", - "Effect type": "이펙트 종류", - "Linear fade in": "선형 fade in", - "Fade length (samples)": "fade 길이 (샘플)", - "Fade length (seconds)": "fade 길이 (초)", - "Output path": "출력 경로", - "Linear fade out": "선형 fade out", - "Trim by samples/time": "샘플/시간 단위 트리밍", - "Start sample": "시작 샘플", - "End sample": "끝 샘플", - "Start (seconds)": "시작 (초)", - "End (seconds)": "끝 (초)", - "Trim leading/trailing silence": "앞뒤 silence 제거", - "Threshold dB (default: -40)": "임계값 dB (기본: -40)", - "Silence boundary padding samples": "silence 경계 패딩 샘플", - "Normalize (peak or RMS)": "정규화 (peak 또는 RMS)", - "Peak target dB (e.g. -1)": "피크 목표 dB (예: -1)", - "RMS target dB (e.g. -20)": "RMS 목표 dB (예: -20)", - "Noise gate (RMS-based)": "노이즈 게이트 (RMS 기반)", - "Threshold dB (default: -50)": "임계값 dB (기본: -50)", - "Attack time (seconds)": "attack 시간 (초)", - "Release time (seconds)": "release 시간 (초)", - "dB gain": "dB 게인", - "Gain (dB)": "게인 (dB)", - # visualize - "Vamp plugin or built-in analysis -> Sonic Visualiser SVL file": "Vamp 플러그인 또는 내장 분석 → Sonic Visualiser SVL 파일 생성", - "Built-in analysis type": "내장 분석 타입", - "Input audio file": "입력 오디오 파일", - "Vamp plugin ID (e.g. qm-vamp-plugins:qm-chromagram)": "Vamp 플러그인 ID (예: qm-vamp-plugins:qm-chromagram)", - "Output SVL file path (default: auto)": "출력 SVL 파일 경로 (기본: 자동 생성)", - "Vamp plugin output name (for multiple outputs)": "Vamp 플러그인 출력 이름 (복수 출력 시)", - "FFT frame size (default: 2048)": "FFT 프레임 크기 (기본: 2048)", - "List installed Vamp plugins": "설치된 Vamp 플러그인 목록", - "Query plugin output info": "플러그인 출력 정보 조회", - "Open in Sonic Visualiser after creation": "생성 후 Sonic Visualiser로 열기", - # doctor - "Plugin analysis — frequency response, THD, dynamics, waveshaper, performance": "플러그인 분석 — frequency response, THD, dynamics, waveshaper, performance", - "Plugin name or path": "플러그인 이름 또는 경로", - "Analysis mode (default: all)": "분석 모드 (기본: all)", - "Test frequency Hz": "테스트 주파수 Hz", - "Input level dB": "입력 레벨 dB", - "M/S mode": "M/S 모드", - "Compare with second plugin": "2번째 플러그인과 비교", - "Second plugin parameters": "2번째 플러그인 파라미터", - "CLAP embedding profiling (per-parameter saturation fingerprint)": "CLAP 임베딩 프로파일링 (파라미터별 새추레이션 지문)", - "CLAP sweep parameters (e.g. --clap-sweep drive=0,25,50,75,100)": "CLAP 스윕 파라미터 (예: --clap-sweep drive=0,25,50,75,100)", - "CLAP embedding npy save path": "CLAP 임베딩 npy 저장 경로", - "Save result JSON file": "결과 JSON 파일 저장", -} - - -# --------------------------------------------------------------------------- -# Locale 감지 및 번역 함수 -# --------------------------------------------------------------------------- - -def _detect_lang() -> str: - """AUDIOMAN_LANG > LC_ALL > LC_MESSAGES > LANG 순서로 언어 코드 감지.""" - # 환경변수로 직접 지정 가능 - env_lang = os.environ.get("AUDIOMAN_LANG", "") - if env_lang: - return env_lang.split("_")[0].split("-")[0].lower() - - # 시스템 locale - try: - loc = locale.getlocale()[0] or locale.getdefaultlocale()[0] or "" - except ValueError: - loc = "" - - if loc: - return loc.split("_")[0].lower() - - return "en" - - -_current_lang: str | None = None - - -def get_lang() -> str: - """현재 활성 언어 코드 반환.""" - global _current_lang - if _current_lang is None: - _current_lang = _detect_lang() - return _current_lang - - -def set_lang(lang: str) -> None: - """언어를 수동으로 설정.""" - global _current_lang - _current_lang = lang.split("_")[0].split("-")[0].lower() - - -def _(msg: str) -> str: - """메시지를 현재 locale에 맞게 번역. 번역이 없으면 원문(영어) 반환.""" - lang = get_lang() - if lang == "en": - return msg - catalog = CATALOGS.get(lang) - if catalog is None: - return msg - return catalog.get(msg, msg) diff --git a/src/audioman/plugins/vst3.py b/src/audioman/plugins/vst3.py index e44c754..6a08dc8 100644 --- a/src/audioman/plugins/vst3.py +++ b/src/audioman/plugins/vst3.py @@ -112,8 +112,18 @@ def set_parameters(self, params: dict[str, Any]) -> None: except Exception as e: logger.warning(f"파라미터 설정 실패: {name} = {value}: {e}") - def process(self, audio: np.ndarray, sample_rate: int) -> np.ndarray: - """오디오 처리. audio shape: (channels, samples), float32""" + def process(self, audio: np.ndarray, sample_rate: int, reset: bool = True) -> np.ndarray: + """오디오 처리. audio shape: (channels, samples), float32 + + Args: + reset: True(기본)면 처리 전 플러그인 내부 상태를 리셋한다 — 전체 버퍼를 + 한 번에 통과시키는 오프라인 렌더에 맞다. 블록 단위 스트리밍 + (DAW 재생 재현)에서는 reset=False로 호출해 이전 블록의 상태 + (필터 히스토리, lookahead 버퍼)를 연속 유지해야 한다. + pedalboard 실측: reset=False 연속 호출은 whole-buffer 렌더와 + 비트 단위로 일치(-600dB)하지만, 매 블록 reset=True면 경계마다 + 상태가 끊겨 클릭이 발생(-7.5dB)한다. + """ self.load() # shape 검증/변환 @@ -124,7 +134,7 @@ def process(self, audio: np.ndarray, sample_rate: int) -> np.ndarray: if audio.dtype != np.float32: audio = audio.astype(np.float32) - return self._plugin.process(audio, sample_rate) + return self._plugin.process(audio, sample_rate, reset=reset) def reset(self) -> None: """플러그인 상태 리셋""" diff --git a/src/audioman/schemas/analyze.v1.json b/src/audioman/schemas/analyze.v1.json new file mode 100644 index 0000000..baefebd --- /dev/null +++ b/src/audioman/schemas/analyze.v1.json @@ -0,0 +1,65 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "audioman://schema/analyze.v1.json", + "title": "Audioman Analyze Result", + "description": "Output envelope for `audioman analyze --json`. Contains audio metadata, summary statistics, silence regions, optional spectrum diagnostics, and a uniform finding[] array.", + "type": "object", + "required": [ + "$schema", + "audioman_version", + "command", + "file", + "sample_rate", + "channels", + "duration_sec", + "total_samples", + "frames", + "rms", + "peak", + "summary", + "silence_regions", + "findings" + ], + "properties": { + "$schema": {"const": "audioman://schema/analyze.v1.json"}, + "audioman_version": {"type": "string"}, + "command": {"const": "analyze"}, + "file": {"type": "string"}, + "sample_rate": {"type": "integer"}, + "channels": {"type": "integer"}, + "duration": { + "type": "number", + "description": "Duration in seconds (legacy field, alias of duration_sec)." + }, + "duration_sec": {"type": "number"}, + "frames": { + "type": "integer", + "description": "Number of samples per channel (alias of total_samples)." + }, + "total_samples": {"type": "integer"}, + "rms": {"type": "number"}, + "peak": {"type": "number"}, + "summary": {"type": "object"}, + "silence_regions": { + "type": "array", + "items": { + "type": "object", + "required": ["start_sample", "end_sample", "duration_sec"], + "properties": { + "start_sample": {"type": "integer"}, + "end_sample": {"type": "integer"}, + "duration_sec": {"type": "number"} + } + } + }, + "silence_total_sec": {"type": "number"}, + "spectrum": {"type": "object"}, + "findings": { + "type": "array", + "items": {"$ref": "audioman://schema/finding.v1.json"} + }, + "ascii_waveform": {"type": "string"}, + "ascii_envelope": {"type": "string"}, + "ascii_spectral": {"type": "string"} + } +} diff --git a/src/audioman/schemas/changelog.v1.json b/src/audioman/schemas/changelog.v1.json new file mode 100644 index 0000000..aea263d --- /dev/null +++ b/src/audioman/schemas/changelog.v1.json @@ -0,0 +1,33 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "audioman://schema/changelog.v1.json", + "title": "Audioman Changelog", + "description": "Parsed view of CHANGELOG.md (Keep a Changelog format).", + "type": "object", + "required": ["$schema", "audioman_version", "command", "entries"], + "properties": { + "$schema": {"const": "audioman://schema/changelog.v1.json"}, + "audioman_version": {"type": "string"}, + "command": {"const": "changelog"}, + "source": {"type": "string"}, + "entries": { + "type": "array", + "items": { + "type": "object", + "required": ["version", "sections"], + "properties": { + "version": {"type": "string"}, + "date": {"type": ["string", "null"]}, + "sections": { + "type": "object", + "additionalProperties": { + "type": "array", + "items": {"type": "string"} + } + } + } + } + }, + "error": {"type": "string"} + } +} diff --git a/src/audioman/schemas/finding.v1.json b/src/audioman/schemas/finding.v1.json new file mode 100644 index 0000000..69950b4 --- /dev/null +++ b/src/audioman/schemas/finding.v1.json @@ -0,0 +1,53 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "audioman://schema/finding.v1.json", + "title": "Audioman Finding", + "description": "Single audio fault observation emitted by audioman observe/analyze.", + "type": "object", + "required": ["id", "code", "category", "severity", "where", "measurement", "hint"], + "properties": { + "id": {"type": "string"}, + "code": { + "type": "string", + "description": "Stable machine-readable code. New values may be added; existing values will not change." + }, + "category": { + "type": "string", + "enum": ["signal", "spectral", "plugin", "container"] + }, + "severity": { + "type": "string", + "enum": ["info", "warn", "critical"] + }, + "where": { + "type": "object", + "properties": { + "file": {"type": ["string", "null"]}, + "track": {"type": ["integer", "null"]}, + "start_sample": {"type": ["integer", "null"]}, + "end_sample": {"type": ["integer", "null"]}, + "start_sec": {"type": ["number", "null"]}, + "end_sec": {"type": ["number", "null"]}, + "channel": {"type": ["integer", "null"]}, + "frequency_hz": {"type": ["number", "null"]} + } + }, + "measurement": { + "type": "object", + "description": "Detector-specific numeric evidence (e.g. peak_dbfs, snr_db, samples_clipped)." + }, + "hint": {"type": "string"}, + "fix_hint": { + "type": "object", + "required": ["kind", "args"], + "properties": { + "kind": { + "type": "string", + "enum": ["ffmpeg-plan", "audioman-fx", "audioman-process", "manual"] + }, + "args": {"type": "array", "items": {"type": "string"}}, + "note": {"type": "string"} + } + } + } +} diff --git a/src/audioman/schemas/observe.v1.json b/src/audioman/schemas/observe.v1.json new file mode 100644 index 0000000..a372569 --- /dev/null +++ b/src/audioman/schemas/observe.v1.json @@ -0,0 +1,48 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "audioman://schema/observe.v1.json", + "title": "Audioman Observe Result", + "description": "Output envelope for `audioman observe`. Contains audio metadata and a uniform finding[] array.", + "type": "object", + "required": [ + "$schema", + "audioman_version", + "command", + "file", + "sample_rate", + "channels", + "duration_sec", + "total_samples", + "findings", + "summary" + ], + "properties": { + "$schema": {"const": "audioman://schema/finding.v1.json"}, + "audioman_version": {"type": "string"}, + "command": {"const": "observe"}, + "file": {"type": "string"}, + "sample_rate": {"type": "integer"}, + "channels": {"type": "integer"}, + "duration_sec": {"type": "number"}, + "total_samples": {"type": "integer"}, + "filter": { + "type": "object", + "properties": { + "categories": {"type": "array", "items": {"type": "string"}}, + "min_severity": {"type": "string", "enum": ["info", "warn", "critical"]} + } + }, + "findings": { + "type": "array", + "items": {"$ref": "audioman://schema/finding.v1.json"} + }, + "summary": { + "type": "object", + "properties": { + "total": {"type": "integer"}, + "by_severity": {"type": "object"}, + "by_category": {"type": "object"} + } + } + } +} diff --git a/tests/unit/test_changelog_cmd.py b/tests/unit/test_changelog_cmd.py new file mode 100644 index 0000000..26a5342 --- /dev/null +++ b/tests/unit/test_changelog_cmd.py @@ -0,0 +1,77 @@ +# Created: 2026-05-11 +# Purpose: changelog 파서 + --since 필터 회귀. + +import json +import os +import subprocess +import sys + +from audioman.cli.changelog_cmd import filter_since, parse_changelog + + +SAMPLE = """\ +# Changelog + +## [Unreleased] +### Added +- foo + +## [0.2.0] - 2026-05-10 +### Added +- new feature X +- another item + +### Changed +- behavior Y + +## [0.1.0] - 2026-03-26 +### Added +- initial release +""" + + +class TestParser: + def test_parses_versions(self): + entries = parse_changelog(SAMPLE) + versions = [e["version"] for e in entries] + assert versions == ["Unreleased", "0.2.0", "0.1.0"] + + def test_parses_dates(self): + entries = parse_changelog(SAMPLE) + assert entries[1]["date"] == "2026-05-10" + assert entries[2]["date"] == "2026-03-26" + assert entries[0]["date"] is None + + def test_parses_section_bullets(self): + entries = parse_changelog(SAMPLE) + v020 = entries[1] + assert "new feature X" in v020["sections"]["added"] + assert "behavior Y" in v020["sections"]["changed"] + + +class TestSinceFilter: + def test_since_filters_older(self): + entries = parse_changelog(SAMPLE) + filtered = filter_since(entries, "0.1.0") + versions = [e["version"] for e in filtered] + assert "0.2.0" in versions + assert "Unreleased" in versions + assert "0.1.0" not in versions + + +class TestChangelogCommand: + def test_json_envelope(self): + env = os.environ.copy() + result = subprocess.run( + [sys.executable, "-m", "audioman", "--json", "changelog"], + env=env, + capture_output=True, + text=True, + cwd="/Users/unohee/dev/audioman", + ) + assert result.returncode == 0, result.stderr + payload = json.loads(result.stdout) + assert payload["$schema"] == "audioman://schema/changelog.v1.json" + assert payload["command"] == "changelog" + assert isinstance(payload["entries"], list) + assert len(payload["entries"]) >= 1 diff --git a/tests/unit/test_findings.py b/tests/unit/test_findings.py new file mode 100644 index 0000000..e468db3 --- /dev/null +++ b/tests/unit/test_findings.py @@ -0,0 +1,160 @@ +# Created: 2026-05-11 +# Purpose: Finding 스키마 + signal/spectral detector 회귀. + +import numpy as np +import pytest + +from audioman.core.detectors import ( + detect_channel_imbalance, + detect_clipping, + detect_dc_offset, + detect_signal_findings, + silence_to_findings, + spectrum_to_findings, +) +from audioman.core.analysis import SilenceRegion +from audioman.core.findings import ( + Category, + Code, + Finding, + Severity, + SCHEMA_URI, + envelope, + filter_findings, +) + + +SR = 48000 + + +def _sine(freq=440.0, duration=1.0, amp=0.5, sr=SR): + t = np.linspace(0, duration, int(sr * duration), endpoint=False, dtype=np.float32) + return amp * np.sin(2 * np.pi * freq * t) + + +class TestFindingSerialization: + def test_to_dict_has_required_fields(self): + f = Finding( + code=Code.CLIP_SAMPLE_PEAK_EXCEEDED, + category=Category.SIGNAL, + severity=Severity.CRITICAL, + hint="x", + ) + d = f.to_dict() + assert d["code"] == "CLIP_SAMPLE_PEAK_EXCEEDED" + assert d["category"] == "signal" + assert d["severity"] == "critical" + assert "id" in d + assert "where" in d + assert "measurement" in d + + def test_severity_rank(self): + assert Severity.INFO.rank < Severity.WARN.rank < Severity.CRITICAL.rank + + def test_filter_findings_by_severity(self): + items = [ + Finding(Code.SILENCE_LEADING, Category.SIGNAL, Severity.INFO), + Finding(Code.DC_OFFSET_DETECTED, Category.SIGNAL, Severity.WARN), + Finding(Code.CLIP_SAMPLE_PEAK_EXCEEDED, Category.SIGNAL, Severity.CRITICAL), + ] + warn = filter_findings(items, min_severity=Severity.WARN) + assert len(warn) == 2 + crit = filter_findings(items, min_severity=Severity.CRITICAL) + assert len(crit) == 1 + + def test_filter_findings_by_category(self): + items = [ + Finding(Code.SILENCE_LEADING, Category.SIGNAL, Severity.INFO), + Finding(Code.MAINS_HUM, Category.SPECTRAL, Severity.WARN), + ] + signal = filter_findings(items, categories={"signal"}) + assert len(signal) == 1 + assert signal[0].category is Category.SIGNAL + + def test_envelope_has_schema_uri(self): + env = envelope([]) + assert env["$schema"] == SCHEMA_URI + assert "audioman_version" in env + assert env["summary"]["total"] == 0 + + +class TestClipDetector: + def test_clip_finding_emitted(self): + sig = np.clip(2.0 * _sine(), -1.0, 1.0) + findings = detect_clipping(sig, SR, file="x.wav") + assert len(findings) == 1 + f = findings[0] + assert f.code is Code.CLIP_SAMPLE_PEAK_EXCEEDED + assert f.measurement["samples_clipped"] > 0 + + def test_no_clip_no_finding(self): + sig = 0.5 * _sine() + assert detect_clipping(sig, SR) == [] + + +class TestDcOffsetDetector: + def test_offset_above_threshold(self): + sig = _sine() + 0.01 + findings = detect_dc_offset(sig, SR, file="x.wav") + assert len(findings) == 1 + assert findings[0].code is Code.DC_OFFSET_DETECTED + + def test_clean_no_offset(self): + sig = _sine() + assert detect_dc_offset(sig, SR) == [] + + +class TestChannelImbalance: + def test_stereo_imbalance(self): + left = _sine(amp=0.5) + right = _sine(amp=0.1) + stereo = np.stack([left, right]) + findings = detect_channel_imbalance(stereo, SR) + assert len(findings) == 1 + assert findings[0].code is Code.CHANNEL_IMBALANCE + + def test_balanced_stereo_no_finding(self): + sig = _sine() + stereo = np.stack([sig, sig]) + assert detect_channel_imbalance(stereo, SR) == [] + + def test_mono_skipped(self): + assert detect_channel_imbalance(_sine(), SR) == [] + + +class TestSilenceToFindings: + def test_leading_and_trailing_classified(self): + regions = [ + SilenceRegion(start_sample=0, end_sample=int(0.3 * SR), duration_sec=0.3), + SilenceRegion(start_sample=int(0.9 * SR), end_sample=SR, duration_sec=0.1), + ] + findings = silence_to_findings(regions, SR, SR) + codes = {f.code for f in findings} + assert Code.SILENCE_LEADING in codes + assert Code.SILENCE_TRAILING in codes + + def test_inner_long_silence_warns(self): + regions = [ + SilenceRegion(start_sample=int(0.4 * SR), end_sample=int(0.41 * SR + SR), duration_sec=1.01), + ] + findings = silence_to_findings(regions, SR * 3, SR) + assert len(findings) == 1 + assert findings[0].code is Code.SILENCE_INNER + assert findings[0].severity is Severity.WARN + + +class TestSpectrumToFindings: + def test_hum_finding(self): + spec = { + "hum_check": [{"frequency_hz": 60, "snr_db": 30.0, "is_hum": True}], + "hf_slope": {"mid_db": -20.0, "high_db": -40.0, "slope_db": -20.0}, + } + findings = spectrum_to_findings(spec) + assert any(f.code is Code.MAINS_HUM for f in findings) + + +class TestSignalFindingsCombined: + def test_clean_signal_emits_no_findings(self): + sig = _sine() + findings = detect_signal_findings(sig, SR) + assert findings == [] diff --git a/tests/unit/test_observe.py b/tests/unit/test_observe.py new file mode 100644 index 0000000..0b60f16 --- /dev/null +++ b/tests/unit/test_observe.py @@ -0,0 +1,77 @@ +# Created: 2026-05-11 +# Purpose: `audioman observe` 명령 통합 — JSON envelope, 메타 필드, finding[] 보장. + +import json +import os +import subprocess +import sys + +import numpy as np +import soundfile as sf + + +SR = 48000 + + +def _make_faulty_wav(path): + t = np.linspace(0, 1.0, SR, endpoint=False, dtype=np.float32) + sig = np.clip(2.0 * np.sin(2 * np.pi * 1000 * t), -1.0, 1.0) + sf.write(str(path), sig, SR) + + +def _run_observe(path, *extra, env_extra=None): + env = os.environ.copy() + if env_extra: + env.update(env_extra) + result = subprocess.run( + [sys.executable, "-m", "audioman", "--json", "--plain", "observe", str(path), *extra], + env=env, + capture_output=True, + text=True, + ) + assert result.returncode == 0, result.stderr + return json.loads(result.stdout) + + +class TestObserveEnvelope: + def test_required_meta_fields(self, tmp_path): + p = tmp_path / "clip.wav" + _make_faulty_wav(p) + payload = _run_observe(p) + + # 후기 #3 직접 검증: duration/total_samples 항상 채워진다. + assert payload["duration_sec"] is not None + assert payload["total_samples"] == SR + assert payload["sample_rate"] == SR + assert payload["channels"] == 1 + assert payload["$schema"] == "audioman://schema/finding.v1.json" + assert "audioman_version" in payload + assert payload["command"] == "observe" + + def test_clip_finding_present(self, tmp_path): + p = tmp_path / "clip.wav" + _make_faulty_wav(p) + payload = _run_observe(p) + codes = {f["code"] for f in payload["findings"]} + assert "CLIP_SAMPLE_PEAK_EXCEEDED" in codes + + def test_summary_counts_match_findings(self, tmp_path): + p = tmp_path / "clip.wav" + _make_faulty_wav(p) + payload = _run_observe(p) + sev_counts = payload["summary"]["by_severity"] + assert sum(sev_counts.values()) == payload["summary"]["total"] + assert payload["summary"]["total"] == len(payload["findings"]) + + def test_category_filter(self, tmp_path): + p = tmp_path / "clip.wav" + _make_faulty_wav(p) + payload = _run_observe(p, "--category", "spectral") + # signal 카테고리가 disable됐으므로 clipping finding이 없어야 한다. + assert all(f["category"] == "spectral" for f in payload["findings"]) + + def test_severity_filter(self, tmp_path): + p = tmp_path / "clip.wav" + _make_faulty_wav(p) + payload = _run_observe(p, "--severity", "critical") + assert all(f["severity"] == "critical" for f in payload["findings"]) diff --git a/tests/unit/test_plain_mode.py b/tests/unit/test_plain_mode.py new file mode 100644 index 0000000..6d4eac2 --- /dev/null +++ b/tests/unit/test_plain_mode.py @@ -0,0 +1,45 @@ +# Created: 2026-05-11 +# Purpose: --plain / AUDIOMAN_PLAIN 출력 모드 회귀 테스트. +# LLM agent 후기 #1 대응: --help가 ANSI/색상 토큰 없이 영어로 출력돼야 한다. + +import os +import re +import subprocess +import sys + +import pytest + + +ANSI_RE = re.compile(r"\x1b\[") + + +def _run(args, env_extra=None): + env = os.environ.copy() + if env_extra: + env.update(env_extra) + return subprocess.run( + [sys.executable, "-m", "audioman", *args], + env=env, + capture_output=True, + text=True, + ) + + +class TestPlainMode: + def test_plain_flag_strips_ansi_from_help(self): + result = _run(["--plain", "--help"]) + assert result.returncode == 0 + assert ANSI_RE.search(result.stdout) is None, ( + "Plain help should not contain ANSI escape sequences" + ) + + def test_plain_help_is_english(self): + result = _run(["--plain", "--help"]) + assert result.returncode == 0 + assert "Available commands" in result.stdout + + def test_env_var_alone_enables_plain(self): + result = _run(["--help"], env_extra={"AUDIOMAN_PLAIN": "1"}) + assert result.returncode == 0 + assert ANSI_RE.search(result.stdout) is None + assert "Available commands" in result.stdout diff --git a/tests/unit/test_streaming.py b/tests/unit/test_streaming.py new file mode 100644 index 0000000..6c3f808 --- /dev/null +++ b/tests/unit/test_streaming.py @@ -0,0 +1,179 @@ +# Created: 2026-05-31 +# Purpose: 블록 스트리밍 엔진 + 클릭 triage + RT 벤치 테스트. +# pedalboard 빌트인만 사용 — VST3 플러그인 불필요. + +import numpy as np +import pytest + +from audioman.core.streaming import ( + render_offline, + render_streamed, + make_pedalboard_process_fn, +) +from audioman.core.discontinuity import ( + detect_discontinuities, + detect_nonfinite, + null_test, +) +from audioman.core.rt_bench import benchmark + + +SR = 48000 + + +def _sine(freq=440.0, dur=0.5, sr=SR): + t = np.arange(int(sr * dur)) / sr + return (0.3 * np.sin(2 * np.pi * freq * t)).astype(np.float32).reshape(1, -1) + + +def _reverb_fn(): + from pedalboard import Pedalboard, Reverb + return make_pedalboard_process_fn(Pedalboard([Reverb(room_size=0.8, wet_level=0.5)])) + + +# --- streaming engine ------------------------------------------------------- + +def test_streamed_matches_offline_bit_for_bit(): + """올바른 스트리밍(reset=False 연속)은 오프라인 렌더와 비트 단위로 일치해야 한다.""" + x = _sine() + off = render_offline(x, SR, _reverb_fn()) + st = render_streamed(x, SR, _reverb_fn(), block_size=512, reset_per_block=False) + n = min(off.audio.shape[1], st.audio.shape[1]) + max_db = 20 * np.log10(np.max(np.abs(off.audio[:, :n] - st.audio[:, :n])) + 1e-30) + assert max_db < -120.0, f"streamed should match offline, got {max_db:.1f} dB" + + +def test_reset_per_block_breaks_continuity(): + """매 블록 reset하면 오프라인과 유의미하게 갈린다 (클릭 재현).""" + x = _sine() + off = render_offline(x, SR, _reverb_fn()) + bad = render_streamed(x, SR, _reverb_fn(), block_size=512, reset_per_block=True) + n = min(off.audio.shape[1], bad.audio.shape[1]) + max_db = 20 * np.log10(np.max(np.abs(off.audio[:, :n] - bad.audio[:, :n])) + 1e-30) + assert max_db > -30.0, f"reset-per-block should diverge, got {max_db:.1f} dB" + + +def test_block_count_and_length(): + """블록 수와 출력 길이가 입력과 일치해야 한다.""" + x = _sine(dur=1.0) + st = render_streamed(x, SR, _reverb_fn(), block_size=512) + expected_blocks = -(-x.shape[1] // 512) # ceil + assert len(st.timings) == expected_blocks + assert st.audio.shape[1] == x.shape[1] + + +def test_invalid_block_size(): + with pytest.raises(ValueError): + render_streamed(_sine(), SR, _reverb_fn(), block_size=0) + + +# --- discontinuity / triage ------------------------------------------------- + +def test_clean_sine_no_false_positives(): + """깨끗한 사인파에서 클릭을 검출하면 안 된다.""" + x = _sine() + findings = detect_discontinuities(x, SR, block_size=512) + assert findings == [] + + +def test_block_aligned_click_detected(): + """블록 경계에 주입한 step은 block_aligned=True, critical로 분류된다.""" + x = _sine().copy() + bs = 512 + x[0, bs * 10] += 0.4 # 정확히 블록 경계 + findings = detect_discontinuities(x, SR, block_size=bs) + aligned = [f for f in findings if f.measurement["block_aligned"]] + assert len(aligned) >= 1 + f = aligned[0] + assert f.severity.value == "critical" + assert f.measurement["nearest_block_edge"] == bs * 10 + + +def test_unaligned_click_is_warn_not_critical(): + """블록 경계가 아닌 곳의 클릭은 source 결함(warn)으로 분류.""" + x = _sine().copy() + bs = 512 + pos = bs * 10 + 137 # 경계에서 충분히 떨어진 위치 + x[0, pos] += 0.4 + findings = detect_discontinuities(x, SR, block_size=bs) + matched = [f for f in findings if abs(f.where.start_sample - pos) <= 2] + assert matched, "click should be detected" + assert matched[0].measurement["block_aligned"] is False + assert matched[0].severity.value == "warn" + + +def test_nonfinite_detection(): + x = _sine().copy() + x[0, 1000] = np.nan + x[0, 2000] = np.inf + findings = detect_nonfinite(x, SR) + assert len(findings) == 1 + assert findings[0].measurement["nonfinite_samples"] == 2 + assert findings[0].code.value == "NONFINITE_SAMPLES" + + +def test_null_test_identical_is_empty(): + x = _sine() + assert null_test(x, x, SR) == [] + + +def test_null_test_detects_divergence(): + x = _sine() + y = x.copy() + y[0, 5000] += 0.5 + findings = null_test(x, y, SR) + assert len(findings) == 1 + assert findings[0].measurement["max_diff_db"] > -60.0 + + +def test_null_test_latency_compensation(): + """latency_samples만큼 어긋난 동일 신호는 보상 후 일치해야 한다.""" + x = _sine() + shifted = np.concatenate([np.zeros((1, 64), dtype=np.float32), x], axis=1) + # 보상 없이는 차이가 큼, 보상하면 일치 + assert null_test(x, shifted, SR) != [] + assert null_test(x, shifted, SR, latency_samples=64) == [] + + +# --- RT bench --------------------------------------------------------------- + +def test_benchmark_basic_fields(): + x = _sine(dur=1.0) + st = render_streamed(x, SR, _reverb_fn(), block_size=512) + rep = benchmark(st) + assert rep.block_size == 512 + assert rep.blocks > 0 + assert rep.deadline_ms == pytest.approx(512 / SR * 1000, abs=1e-3) + assert rep.rt_factor_max >= rep.rt_factor_p99 >= rep.rt_factor_p50 + assert rep.est_max_tracks >= 1 + + +def test_benchmark_rt_factor_monotonic_with_load(): + """무거운 처리가 가벼운 처리보다 RT factor가 커야 한다.""" + x = _sine(dur=1.0) + + def light(b, sr, reset): + return b * 0.5 + + def heavy(b, sr, reset): + for _ in range(30): + np.fft.rfft(b, axis=1) + return b * 0.5 + + rl = benchmark(render_streamed(x, SR, light, block_size=512)) + rh = benchmark(render_streamed(x, SR, heavy, block_size=512)) + assert rh.rt_factor_mean > rl.rt_factor_mean + + +def test_benchmark_smaller_block_higher_rt_factor(): + """작은 블록일수록 RT factor가 크다 (고정 오버헤드 / 짧은 deadline).""" + x = _sine(dur=1.0) + + def heavy(b, sr, reset): + for _ in range(30): + np.fft.rfft(b, axis=1) + return b * 0.5 + + small = benchmark(render_streamed(x, SR, heavy, block_size=64)) + large = benchmark(render_streamed(x, SR, heavy, block_size=1024)) + assert small.rt_factor_mean > large.rt_factor_mean