Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 134 additions & 0 deletions crates/murmur-core/src/audio/activate.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
//! Platform-specific audio input device activation.
//!
//! On macOS, Bluetooth devices (like AirPods) connect in A2DP mode which
//! provides high-quality audio *output* but no microphone input. The mic
//! requires a switch to the HFP/SCO profile, which macOS normally triggers
//! when an app selects the device as the system input.
//!
//! Higher-level Apple frameworks (`AVAudioSession`, `AVCaptureSession`)
//! handle this automatically, but the low-level AudioUnit HAL that `cpal`
//! uses does not always trigger the switch.
//!
//! This module provides a best-effort activation hook that re-sets the
//! default input device via CoreAudio, nudging macOS into establishing the
//! SCO link. It is a no-op on non-macOS platforms.

/// Attempt to activate the system default input device for audio capture.
///
/// On macOS this re-sets the default input device via CoreAudio to nudge
/// Bluetooth devices into HFP mode. On other platforms this is a no-op.
///
/// This is a best-effort operation — failures are logged at debug level
/// and do not propagate errors.
pub fn prepare_default_input() {
#[cfg(target_os = "macos")]
macos::activate_default_input();
}

// ── macOS CoreAudio implementation ──────────────────────────────────────

#[cfg(target_os = "macos")]
mod macos {
use std::os::raw::c_void;

// CoreAudio HAL types
type AudioObjectID = u32;
type AudioDeviceID = u32;
type OSStatus = i32;

const K_AUDIO_OBJECT_SYSTEM_OBJECT: AudioObjectID = 1;

// Property selectors (FourCC encoded)
const K_AUDIO_HARDWARE_PROPERTY_DEFAULT_INPUT_DEVICE: u32 = u32::from_be_bytes(*b"dIn ");
const K_AUDIO_OBJECT_PROPERTY_SCOPE_GLOBAL: u32 = u32::from_be_bytes(*b"glob");
const K_AUDIO_OBJECT_PROPERTY_ELEMENT_MAIN: u32 = 0;

#[repr(C)]
struct AudioObjectPropertyAddress {
selector: u32,
scope: u32,
element: u32,
}

#[link(name = "CoreAudio", kind = "framework")]
extern "C" {
fn AudioObjectGetPropertyData(
object_id: AudioObjectID,
address: *const AudioObjectPropertyAddress,
qualifier_data_size: u32,
qualifier_data: *const c_void,
data_size: *mut u32,
data: *mut c_void,
) -> OSStatus;

fn AudioObjectSetPropertyData(
object_id: AudioObjectID,
address: *const AudioObjectPropertyAddress,
qualifier_data_size: u32,
qualifier_data: *const c_void,
data_size: u32,
data: *const c_void,
) -> OSStatus;
}

/// Re-set the current default input device via CoreAudio.
///
/// Writing the same device ID back to `kAudioHardwarePropertyDefaultInputDevice`
/// can trigger macOS to establish the Bluetooth SCO/HFP link if it hasn't
/// already. This mirrors what System Settings does when the user selects
/// a Bluetooth input device.
pub(super) fn activate_default_input() {
let addr = AudioObjectPropertyAddress {
selector: K_AUDIO_HARDWARE_PROPERTY_DEFAULT_INPUT_DEVICE,
scope: K_AUDIO_OBJECT_PROPERTY_SCOPE_GLOBAL,
element: K_AUDIO_OBJECT_PROPERTY_ELEMENT_MAIN,
};

let mut device_id: AudioDeviceID = 0;
let mut size = std::mem::size_of::<AudioDeviceID>() as u32;

let status = unsafe {
AudioObjectGetPropertyData(
K_AUDIO_OBJECT_SYSTEM_OBJECT,
&addr,
0,
std::ptr::null(),
&mut size,
&mut device_id as *mut _ as *mut c_void,
)
};

if status != 0 {
log::debug!("CoreAudio: failed to get default input device (status={status})");
return;
}

let status = unsafe {
AudioObjectSetPropertyData(
K_AUDIO_OBJECT_SYSTEM_OBJECT,
&addr,
0,
std::ptr::null(),
std::mem::size_of::<AudioDeviceID>() as u32,
&device_id as *const _ as *const c_void,
)
};

if status != 0 {
log::debug!("CoreAudio: failed to re-set default input device (status={status})");
} else {
log::info!("CoreAudio: activated default input device (id={device_id})");
}
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn prepare_default_input_does_not_panic() {
// Should be a no-op on non-macOS, best-effort on macOS.
prepare_default_input();
}
}
51 changes: 50 additions & 1 deletion crates/murmur-core/src/audio/capture.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ struct SharedCaptureState {
pre_roll: Mutex<VecDeque<f32>>,
/// Count of samples dropped due to lock contention.
dropped_samples: AtomicU64,
/// Monotonically increasing count of audio callback invocations.
/// Used to detect dead streams (e.g. when a Bluetooth device disconnects).
callback_count: AtomicU64,
}

impl SharedCaptureState {
Expand All @@ -46,12 +49,14 @@ impl SharedCaptureState {
samples: Arc::new(Mutex::new(Vec::with_capacity(initial_capacity))),
pre_roll: Mutex::new(VecDeque::with_capacity(PRE_ROLL_SAMPLES + 512)),
dropped_samples: AtomicU64::new(0),
callback_count: AtomicU64::new(0),
}
}

/// Dispatch processed audio samples to the appropriate buffer.
/// Called from the audio callback after mixing/resampling/denoising.
fn dispatch_samples(&self, samples: &[f32]) {
self.callback_count.fetch_add(1, Ordering::Relaxed);
if self.recording.load(Ordering::Acquire) {
if let Ok(mut buf) = self.samples.try_lock() {
buf.extend_from_slice(samples);
Expand Down Expand Up @@ -267,8 +272,21 @@ impl AudioRecorder {
return Ok(());
}

// Platform hook: on macOS, nudge Bluetooth devices into HFP mode
// so the microphone is active when we open the stream.
super::activate::prepare_default_input();

let host = cpal::default_host();
let device = host.default_input_device().context("No microphone found")?;
self.open_device(device)
}

/// Build and start an input stream on the given device.
fn open_device(&mut self, device: cpal::Device) -> Result<()> {
let device_name = device
.description()
.map(|d| d.name().to_string())
.unwrap_or_else(|_| "<unknown>".into());

let supported_config = device
.default_input_config()
Expand All @@ -277,6 +295,11 @@ impl AudioRecorder {
let native_rate = supported_config.sample_rate();
let native_channels = supported_config.channels() as u32;

log::info!(
"Opening audio device: \"{device_name}\" ({native_rate}Hz, {native_channels}ch, {:?})",
supported_config.sample_format(),
);

let shared = Arc::clone(&self.shared);
let ns_flag = Arc::clone(&self.noise_suppression);
let denoiser = Arc::clone(&self.denoiser);
Expand Down Expand Up @@ -322,9 +345,35 @@ impl AudioRecorder {
Ok(())
}

/// Close the current stream and re-open on the current default input device.
fn rewarm(&mut self) -> Result<()> {
log::info!("Re-opening audio stream on current default device");
self.stream = None;
if let Ok(mut ring) = self.shared.pre_roll.lock() {
ring.clear();
}
super::activate::prepare_default_input();
let host = cpal::default_host();
let device = host.default_input_device().context("No microphone found")?;
self.open_device(device)
}

/// Ensure the stream is warm, warming it up if needed.
/// If the stream exists but is no longer producing audio (e.g. the
/// Bluetooth device disconnected), close and re-open it.
fn ensure_warm(&mut self) -> Result<()> {
if self.stream.is_none() {
if self.stream.is_some() {
// Direct probe: snapshot the counter, wait briefly, check again.
// This avoids false positives from stale counters that were set
// during a previous recording session.
let before = self.shared.callback_count.load(Ordering::Relaxed);
std::thread::sleep(std::time::Duration::from_millis(50));
let after = self.shared.callback_count.load(Ordering::Relaxed);
if after == before {
log::warn!("Audio stream appears dead (no callbacks in 50ms), re-opening");
self.rewarm()?;
}
} else {
self.warm()?;
}
Ok(())
Expand Down
2 changes: 2 additions & 0 deletions crates/murmur-core/src/audio/mod.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
pub mod activate;
pub mod capture;
pub mod recordings;
pub mod speaker;
pub mod system_capture;

pub use activate::prepare_default_input;
pub use capture::{AudioRecorder, TARGET_RATE, WHISPER_WAV_SPEC};
pub use recordings::RecordingStore;
pub use speaker::{ActiveSpeaker, SpeakerTracker};
Expand Down
27 changes: 22 additions & 5 deletions crates/murmur-core/src/transcription/vad.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,23 @@ pub fn contains_speech(samples: &[f32]) -> bool {
return false;
}

let rms = audio_rms(samples);

// Fast path: reject digital silence without loading the model.
if is_below_noise_floor(samples) {
log::debug!("VAD: audio below noise floor, skipping");
if rms < SILENCE_RMS_FLOOR {
log::info!("VAD: audio below noise floor (RMS={rms:.6}), skipping");
return false;
}

log::debug!("VAD: audio RMS={rms:.4}, running speech detection");

match detect_speech(samples) {
Ok(has_speech) => has_speech,
Ok(has_speech) => {
if !has_speech {
log::info!("VAD: no speech detected (RMS={rms:.4})");
}
has_speech
}
Err(e) => {
log::warn!("VAD inference failed, conservatively skipping transcription: {e}");
false
Expand Down Expand Up @@ -106,12 +115,20 @@ fn detect_speech(samples: &[f32]) -> Result<bool, voice_activity_detector::Error
}

/// Quick RMS energy check to reject near-zero audio without neural inference.
#[cfg_attr(not(test), allow(dead_code))]
fn is_below_noise_floor(samples: &[f32]) -> bool {
if samples.is_empty() {
return true;
}
let rms = (samples.iter().map(|s| s * s).sum::<f32>() / samples.len() as f32).sqrt();
rms < SILENCE_RMS_FLOOR
audio_rms(samples) < SILENCE_RMS_FLOOR
}

/// Compute RMS (root-mean-square) energy of audio samples.
fn audio_rms(samples: &[f32]) -> f32 {
if samples.is_empty() {
return 0.0;
}
(samples.iter().map(|s| s * s).sum::<f32>() / samples.len() as f32).sqrt()
}

#[cfg(test)]
Expand Down
14 changes: 12 additions & 2 deletions crates/murmur/src/app/effects.rs
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,12 @@ fn stop_and_transcribe(ctx: &mut EffectContext<'_>) {
filler_word_removal,
spoken_punctuation,
);
let _ = tx.send(AppMessage::TranscriptionDone(text));
if text.is_empty() {
info!("Transcription produced no text (VAD likely detected no speech)");
let _ = tx.send(AppMessage::TranscriptionDone(String::new()));
} else {
let _ = tx.send(AppMessage::TranscriptionDone(text));
}
}
Err(e) => {
let _ = tx.send(AppMessage::TranscriptionError(e.to_string()));
Expand Down Expand Up @@ -471,7 +476,12 @@ fn stop_and_transcribe(ctx: &mut EffectContext<'_>) {
filler_word_removal,
spoken_punctuation,
);
let _ = tx.send(AppMessage::TranscriptionDone(text));
if text.is_empty() {
info!("Transcription produced no text (VAD likely detected no speech)");
let _ = tx.send(AppMessage::TranscriptionDone(String::new()));
} else {
let _ = tx.send(AppMessage::TranscriptionDone(text));
}
}
Err(e) => {
let _ = tx.send(AppMessage::TranscriptionError(e.to_string()));
Expand Down