jafreck · jafreck · Mar 30, 2026 · Mar 30, 2026
diff --git a/crates/murmur-core/src/audio/activate.rs b/crates/murmur-core/src/audio/activate.rs
@@ -0,0 +1,134 @@
+//! Platform-specific audio input device activation.
+//!
+//! On macOS, Bluetooth devices (like AirPods) connect in A2DP mode which
+//! provides high-quality audio *output* but no microphone input.  The mic
+//! requires a switch to the HFP/SCO profile, which macOS normally triggers
+//! when an app selects the device as the system input.
+//!
+//! Higher-level Apple frameworks (`AVAudioSession`, `AVCaptureSession`)
+//! handle this automatically, but the low-level AudioUnit HAL that `cpal`
+//! uses does not always trigger the switch.
+//!
+//! This module provides a best-effort activation hook that re-sets the
+//! default input device via CoreAudio, nudging macOS into establishing the
+//! SCO link.  It is a no-op on non-macOS platforms.
+
+/// Attempt to activate the system default input device for audio capture.
+///
+/// On macOS this re-sets the default input device via CoreAudio to nudge
+/// Bluetooth devices into HFP mode.  On other platforms this is a no-op.
+///
+/// This is a best-effort operation — failures are logged at debug level
+/// and do not propagate errors.
+pub fn prepare_default_input() {
+    #[cfg(target_os = "macos")]
+    macos::activate_default_input();
+}
+
+// ── macOS CoreAudio implementation ──────────────────────────────────────
+
+#[cfg(target_os = "macos")]
+mod macos {
+    use std::os::raw::c_void;
+
+    // CoreAudio HAL types
+    type AudioObjectID = u32;
+    type AudioDeviceID = u32;
+    type OSStatus = i32;
+
+    const K_AUDIO_OBJECT_SYSTEM_OBJECT: AudioObjectID = 1;
+
+    // Property selectors (FourCC encoded)
+    const K_AUDIO_HARDWARE_PROPERTY_DEFAULT_INPUT_DEVICE: u32 = u32::from_be_bytes(*b"dIn ");
+    const K_AUDIO_OBJECT_PROPERTY_SCOPE_GLOBAL: u32 = u32::from_be_bytes(*b"glob");
+    const K_AUDIO_OBJECT_PROPERTY_ELEMENT_MAIN: u32 = 0;
+
+    #[repr(C)]
+    struct AudioObjectPropertyAddress {
+        selector: u32,
+        scope: u32,
+        element: u32,
+    }
+
+    #[link(name = "CoreAudio", kind = "framework")]
+    extern "C" {
+        fn AudioObjectGetPropertyData(
+            object_id: AudioObjectID,
+            address: *const AudioObjectPropertyAddress,
+            qualifier_data_size: u32,
+            qualifier_data: *const c_void,
+            data_size: *mut u32,
+            data: *mut c_void,
+        ) -> OSStatus;
+
+        fn AudioObjectSetPropertyData(
+            object_id: AudioObjectID,
+            address: *const AudioObjectPropertyAddress,
+            qualifier_data_size: u32,
+            qualifier_data: *const c_void,
+            data_size: u32,
+            data: *const c_void,
+        ) -> OSStatus;
+    }
+
+    /// Re-set the current default input device via CoreAudio.
+    ///
+    /// Writing the same device ID back to `kAudioHardwarePropertyDefaultInputDevice`
+    /// can trigger macOS to establish the Bluetooth SCO/HFP link if it hasn't
+    /// already.  This mirrors what System Settings does when the user selects
+    /// a Bluetooth input device.
+    pub(super) fn activate_default_input() {
+        let addr = AudioObjectPropertyAddress {
+            selector: K_AUDIO_HARDWARE_PROPERTY_DEFAULT_INPUT_DEVICE,
+            scope: K_AUDIO_OBJECT_PROPERTY_SCOPE_GLOBAL,
+            element: K_AUDIO_OBJECT_PROPERTY_ELEMENT_MAIN,
+        };
+
+        let mut device_id: AudioDeviceID = 0;
+        let mut size = std::mem::size_of::<AudioDeviceID>() as u32;
+
+        let status = unsafe {
+            AudioObjectGetPropertyData(
+                K_AUDIO_OBJECT_SYSTEM_OBJECT,
+                &addr,
+                0,
+                std::ptr::null(),
+                &mut size,
+                &mut device_id as *mut _ as *mut c_void,
+            )
+        };
+
+        if status != 0 {
+            log::debug!("CoreAudio: failed to get default input device (status={status})");
+            return;
+        }
+
+        let status = unsafe {
+            AudioObjectSetPropertyData(
+                K_AUDIO_OBJECT_SYSTEM_OBJECT,
+                &addr,
+                0,
+                std::ptr::null(),
+                std::mem::size_of::<AudioDeviceID>() as u32,
+                &device_id as *const _ as *const c_void,
+            )
+        };
+
+        if status != 0 {
+            log::debug!("CoreAudio: failed to re-set default input device (status={status})");
+        } else {
+            log::info!("CoreAudio: activated default input device (id={device_id})");
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn prepare_default_input_does_not_panic() {
+        // Should be a no-op on non-macOS, best-effort on macOS.
+        prepare_default_input();
+    }
+}
diff --git a/crates/murmur-core/src/audio/capture.rs b/crates/murmur-core/src/audio/capture.rs
@@ -35,6 +35,9 @@ struct SharedCaptureState {
     pre_roll: Mutex<VecDeque<f32>>,
     /// Count of samples dropped due to lock contention.
     dropped_samples: AtomicU64,
+    /// Monotonically increasing count of audio callback invocations.
+    /// Used to detect dead streams (e.g. when a Bluetooth device disconnects).
+    callback_count: AtomicU64,
 }
 
 impl SharedCaptureState {
@@ -46,12 +49,14 @@ impl SharedCaptureState {
             samples: Arc::new(Mutex::new(Vec::with_capacity(initial_capacity))),
             pre_roll: Mutex::new(VecDeque::with_capacity(PRE_ROLL_SAMPLES + 512)),
             dropped_samples: AtomicU64::new(0),
+            callback_count: AtomicU64::new(0),
         }
     }
 
     /// Dispatch processed audio samples to the appropriate buffer.
     /// Called from the audio callback after mixing/resampling/denoising.
     fn dispatch_samples(&self, samples: &[f32]) {
+        self.callback_count.fetch_add(1, Ordering::Relaxed);
         if self.recording.load(Ordering::Acquire) {
             if let Ok(mut buf) = self.samples.try_lock() {
                 buf.extend_from_slice(samples);
@@ -267,8 +272,21 @@ impl AudioRecorder {
             return Ok(());
         }
 
+        // Platform hook: on macOS, nudge Bluetooth devices into HFP mode
+        // so the microphone is active when we open the stream.
+        super::activate::prepare_default_input();
+
         let host = cpal::default_host();
         let device = host.default_input_device().context("No microphone found")?;
+        self.open_device(device)
+    }
+
+    /// Build and start an input stream on the given device.
+    fn open_device(&mut self, device: cpal::Device) -> Result<()> {
+        let device_name = device
+            .description()
+            .map(|d| d.name().to_string())
+            .unwrap_or_else(|_| "<unknown>".into());
 
         let supported_config = device
             .default_input_config()
@@ -277,6 +295,11 @@ impl AudioRecorder {
         let native_rate = supported_config.sample_rate();
         let native_channels = supported_config.channels() as u32;
 
+        log::info!(
+            "Opening audio device: \"{device_name}\" ({native_rate}Hz, {native_channels}ch, {:?})",
+            supported_config.sample_format(),
+        );
+
         let shared = Arc::clone(&self.shared);
         let ns_flag = Arc::clone(&self.noise_suppression);
         let denoiser = Arc::clone(&self.denoiser);
@@ -322,9 +345,35 @@ impl AudioRecorder {
         Ok(())
     }
 
+    /// Close the current stream and re-open on the current default input device.
+    fn rewarm(&mut self) -> Result<()> {
+        log::info!("Re-opening audio stream on current default device");
+        self.stream = None;
+        if let Ok(mut ring) = self.shared.pre_roll.lock() {
+            ring.clear();
+        }
+        super::activate::prepare_default_input();
+        let host = cpal::default_host();
+        let device = host.default_input_device().context("No microphone found")?;
+        self.open_device(device)
+    }
+
     /// Ensure the stream is warm, warming it up if needed.
+    /// If the stream exists but is no longer producing audio (e.g. the
+    /// Bluetooth device disconnected), close and re-open it.
     fn ensure_warm(&mut self) -> Result<()> {
-        if self.stream.is_none() {
+        if self.stream.is_some() {
+            // Direct probe: snapshot the counter, wait briefly, check again.
+            // This avoids false positives from stale counters that were set
+            // during a previous recording session.
+            let before = self.shared.callback_count.load(Ordering::Relaxed);
+            std::thread::sleep(std::time::Duration::from_millis(50));
+            let after = self.shared.callback_count.load(Ordering::Relaxed);
+            if after == before {
+                log::warn!("Audio stream appears dead (no callbacks in 50ms), re-opening");
+                self.rewarm()?;
+            }
+        } else {
             self.warm()?;
         }
         Ok(())

diff --git a/crates/murmur-core/src/audio/mod.rs b/crates/murmur-core/src/audio/mod.rs
@@ -1,8 +1,10 @@
+pub mod activate;
 pub mod capture;
 pub mod recordings;
 pub mod speaker;
 pub mod system_capture;
 
+pub use activate::prepare_default_input;
 pub use capture::{AudioRecorder, TARGET_RATE, WHISPER_WAV_SPEC};
 pub use recordings::RecordingStore;
 pub use speaker::{ActiveSpeaker, SpeakerTracker};

diff --git a/crates/murmur-core/src/transcription/vad.rs b/crates/murmur-core/src/transcription/vad.rs
@@ -50,14 +50,23 @@ pub fn contains_speech(samples: &[f32]) -> bool {
         return false;
     }
 
+    let rms = audio_rms(samples);
+
     // Fast path: reject digital silence without loading the model.
-    if is_below_noise_floor(samples) {
-        log::debug!("VAD: audio below noise floor, skipping");
+    if rms < SILENCE_RMS_FLOOR {
+        log::info!("VAD: audio below noise floor (RMS={rms:.6}), skipping");
         return false;
     }
 
+    log::debug!("VAD: audio RMS={rms:.4}, running speech detection");
+
     match detect_speech(samples) {
-        Ok(has_speech) => has_speech,
+        Ok(has_speech) => {
+            if !has_speech {
+                log::info!("VAD: no speech detected (RMS={rms:.4})");
+            }
+            has_speech
+        }
         Err(e) => {
             log::warn!("VAD inference failed, conservatively skipping transcription: {e}");
             false
@@ -106,12 +115,20 @@ fn detect_speech(samples: &[f32]) -> Result<bool, voice_activity_detector::Error
 }
 
 /// Quick RMS energy check to reject near-zero audio without neural inference.
+#[cfg_attr(not(test), allow(dead_code))]
 fn is_below_noise_floor(samples: &[f32]) -> bool {
     if samples.is_empty() {
         return true;
     }
-    let rms = (samples.iter().map(|s| s * s).sum::<f32>() / samples.len() as f32).sqrt();
-    rms < SILENCE_RMS_FLOOR
+    audio_rms(samples) < SILENCE_RMS_FLOOR
+}
+
+/// Compute RMS (root-mean-square) energy of audio samples.
+fn audio_rms(samples: &[f32]) -> f32 {
+    if samples.is_empty() {
+        return 0.0;
+    }
+    (samples.iter().map(|s| s * s).sum::<f32>() / samples.len() as f32).sqrt()
 }
 
 #[cfg(test)]

diff --git a/crates/murmur/src/app/effects.rs b/crates/murmur/src/app/effects.rs
@@ -438,7 +438,12 @@ fn stop_and_transcribe(ctx: &mut EffectContext<'_>) {
                                 filler_word_removal,
                                 spoken_punctuation,
                             );
-                            let _ = tx.send(AppMessage::TranscriptionDone(text));
+                            if text.is_empty() {
+                                info!("Transcription produced no text (VAD likely detected no speech)");
+                                let _ = tx.send(AppMessage::TranscriptionDone(String::new()));
+                            } else {
+                                let _ = tx.send(AppMessage::TranscriptionDone(text));
+                            }
                         }
                         Err(e) => {
                             let _ = tx.send(AppMessage::TranscriptionError(e.to_string()));
@@ -471,7 +476,12 @@ fn stop_and_transcribe(ctx: &mut EffectContext<'_>) {
                                 filler_word_removal,
                                 spoken_punctuation,
                             );
-                            let _ = tx.send(AppMessage::TranscriptionDone(text));
+                            if text.is_empty() {
+                                info!("Transcription produced no text (VAD likely detected no speech)");
+                                let _ = tx.send(AppMessage::TranscriptionDone(String::new()));
+                            } else {
+                                let _ = tx.send(AppMessage::TranscriptionDone(text));
+                            }
                         }
                         Err(e) => {
                             let _ = tx.send(AppMessage::TranscriptionError(e.to_string()));