From 01fae437bc2aab144aa9445d0065ae370acc32bc Mon Sep 17 00:00:00 2001 From: grtninja Date: Fri, 10 Apr 2026 22:28:03 -0400 Subject: [PATCH 1/2] fix: localize sounddevice import for mic path --- Demos/ASR/Whisper/run_whisper.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Demos/ASR/Whisper/run_whisper.py b/Demos/ASR/Whisper/run_whisper.py index e4f66cc0..4adbd1aa 100644 --- a/Demos/ASR/Whisper/run_whisper.py +++ b/Demos/ASR/Whisper/run_whisper.py @@ -216,6 +216,13 @@ def build_provider_opts(opts): def mic_stream(model, duration=0, silence_threshold=0.01, silence_duration=5.0): + try: + import sounddevice as sd + except ImportError: + print("\n⚠️ sounddevice is required for microphone input.") + print(" Install it with `pip install sounddevice` or use a .wav file instead.") + return + q_audio = queue.Queue() stop_flag = threading.Event() @@ -354,11 +361,7 @@ def main(): return if args.input.lower() == 'mic': - import sounddevice as sd - try: - mic_stream(model, args.duration) - except sd.PortAudioError as e: - print("Fix your device or try using a .wav file instead of mic. Exiting") + mic_stream(model, args.duration) return else: waveform, sr = torchaudio.load(args.input) From 8eabeca739850a08d85aa769e9725d98859a6d3b Mon Sep 17 00:00:00 2001 From: grtninja Date: Sat, 11 Apr 2026 00:15:46 -0400 Subject: [PATCH 2/2] fix: align whisper auto-download model contract --- Demos/ASR/Whisper/run_whisper.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/Demos/ASR/Whisper/run_whisper.py b/Demos/ASR/Whisper/run_whisper.py index 4adbd1aa..ecd932fc 100644 --- a/Demos/ASR/Whisper/run_whisper.py +++ b/Demos/ASR/Whisper/run_whisper.py @@ -14,6 +14,12 @@ SAMPLE_RATE = 16000 CHUNK_SIZE = 1600 # 0.1 sec chunks +SUPPORTED_WHISPER_MODEL_TYPES = ( + "whisper-base", + "whisper-small", + "whisper-medium", + "whisper-large-v3-turbo", +) class WhisperONNX: @@ -281,9 +287,10 @@ def download_whisper_onnx(model_type: str): Returns paths to encoder and decoder model files. """ hf_model_map = { + "whisper-base": "amd/whisper-base-onnx-npu", "whisper-small": "amd/whisper-small-onnx-npu", "whisper-medium": "amd/whisper-medium-onnx-npu", - "whisper-large-v3-turbo": "amd/whisper-large-turbo-onnx-npu" + "whisper-large-v3-turbo": "amd/whisper-large-turbo-onnx-npu", } repo_id = hf_model_map.get(model_type) @@ -310,10 +317,12 @@ def main(): parser.add_argument("--input", help="WAV file path or 'mic'") parser.add_argument("--encoder", help="Path to Whisper encoder ONNX model (optional, auto-download if not provided)") parser.add_argument("--decoder", help="Path to Whisper decoder ONNX model (optional, auto-download if not provided)") - parser.add_argument("--model-type", required=True, default="whisper-base", - choices=["whisper-tiny", "whisper-base", "whisper-small", - "whisper-medium", "whisper-large-v3-turbo"], - help="Whisper model name") + parser.add_argument( + "--model-type", + default="whisper-base", + choices=SUPPORTED_WHISPER_MODEL_TYPES, + help="Whisper model name", + ) parser.add_argument("--eval-dir", help="Dataset directory with wavs/ and transcripts.txt") parser.add_argument("--results-dir", default="results", help="Directory to store evaluation results") parser.add_argument("--config-file", default="./config/model_config.json", help="Path to Model provider configs")