diff --git a/crates/wavekat-turn/src/audio/pipecat.rs b/crates/wavekat-turn/src/audio/pipecat.rs index 7b87c1a..eefe115 100644 --- a/crates/wavekat-turn/src/audio/pipecat.rs +++ b/crates/wavekat-turn/src/audio/pipecat.rs @@ -522,11 +522,14 @@ impl AudioTurnDetector for PipecatSmartTurn { (TurnState::Unfinished, 1.0 - probability) }; + let audio_duration_ms = (self.ring_buffer.len() as u64 * 1000) / SAMPLE_RATE as u64; + Ok(TurnPrediction { state, confidence, latency_ms, stage_times, + audio_duration_ms, }) } diff --git a/crates/wavekat-turn/src/lib.rs b/crates/wavekat-turn/src/lib.rs index 4c6f020..fa68fa2 100644 --- a/crates/wavekat-turn/src/lib.rs +++ b/crates/wavekat-turn/src/lib.rs @@ -64,6 +64,12 @@ pub struct TurnPrediction { pub latency_ms: u64, /// Per-stage timing breakdown in pipeline order. pub stage_times: Vec, + /// Duration of audio in the detector's buffer at prediction time (ms). + /// + /// For PipecatSmartTurn this reflects how much of the 8 s ring buffer + /// was filled. With soft reset the buffer may span multiple speech + /// segments, so this can exceed the current segment duration. + pub audio_duration_ms: u64, } /// A single turn in the conversation, for context-aware text detectors. diff --git a/crates/wavekat-turn/tests/controller.rs b/crates/wavekat-turn/tests/controller.rs index e8edf88..90f5071 100644 --- a/crates/wavekat-turn/tests/controller.rs +++ b/crates/wavekat-turn/tests/controller.rs @@ -42,11 +42,13 @@ impl AudioTurnDetector for MockDetector { TurnState::Unfinished => 0.80, TurnState::Wait => 0.70, }; + let audio_duration_ms = (self.buffer_len as u64 * 1000) / 16000; Ok(TurnPrediction { state, confidence, latency_ms: 0, stage_times: vec![], + audio_duration_ms, }) }