diff --git a/Makefile b/Makefile
index 7065549..4f1b0bd 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: help check test fmt lint doc ci accuracy mel
+.PHONY: help check test fmt lint doc ci accuracy mel example-controller
 
 help:
 	@echo "Available targets:"
@@ -10,6 +10,7 @@ help:
 	@echo "  lint      Run clippy with warnings as errors"
 	@echo "  doc       Build and open docs in browser"
 	@echo "  ci        Run all CI checks locally (fmt, clippy, test, doc, features)"
+	@echo "  example-controller  Run TurnController example"
 
 # Check workspace compiles
 check:
@@ -27,6 +28,10 @@ accuracy:
 mel:
 	cargo test --features pipecat -- mel_report --ignored --nocapture
 
+# Run TurnController example
+example-controller:
+	cargo run --features pipecat --example controller
+
 # Format code
 fmt:
 	cargo fmt --all
diff --git a/README.md b/README.md
index 97d850d..c560e4b 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ models behind common Rust traits. Same pattern as
 [wavekat-vad](https://github.com/wavekat/wavekat-vad).
 
 > [!WARNING]
-> Early development. Trait API is defined; backend implementations are stubs pending ONNX model integration.
+> Early development. API may change between minor versions.
 
 ## Backends
 
@@ -27,25 +27,34 @@ models behind common Rust traits. Same pattern as
 cargo add wavekat-turn --features pipecat
 ```
 
-Use the audio-based detector:
+Use `TurnController` to wrap any detector with automatic state tracking:
 
 ```rust
-use wavekat_turn::{AudioTurnDetector, TurnState};
+use wavekat_turn::{TurnController, TurnState};
 use wavekat_turn::audio::PipecatSmartTurn;
 
-let mut detector = PipecatSmartTurn::new()?;
+let detector = PipecatSmartTurn::new()?;
+let mut ctrl = TurnController::new(detector);
 
-// Feed 16 kHz f32 PCM frames after VAD detects silence
-let prediction = detector.predict_audio(&audio_frames)?;
+// Feed audio continuously
+ctrl.push_audio(&audio_frame);
 
+// VAD speech start — soft reset (keeps buffer if turn was unfinished)
+ctrl.reset_if_finished();
+
+// VAD speech end — predict
+let prediction = ctrl.predict()?;
 match prediction.state {
     TurnState::Finished   => { /* user is done, send to LLM */ }
     TurnState::Unfinished => { /* keep listening */ }
     TurnState::Wait       => { /* user asked AI to hold */ }
 }
+
+// After assistant finishes responding — hard reset
+ctrl.reset();
 ```
 
-Or the text-based detector:
+Or the text-based detector directly:
 
 ```rust
 use wavekat_turn::{TextTurnDetector, TurnState};
@@ -57,6 +66,9 @@ let prediction = detector.predict_text("I was wondering if", &context)?;
 assert_eq!(prediction.state, TurnState::Unfinished);
 ```
 
+See [`examples/controller.rs`](crates/wavekat-turn/examples/controller.rs) for a
+full walkthrough with real audio.
+
 ## Architecture
 
 Two trait families cover the two input modalities:
@@ -64,6 +76,9 @@ Two trait families cover the two input modalities:
 - **`AudioTurnDetector`** -- operates on raw audio frames (no ASR needed)
 - **`TextTurnDetector`** -- operates on ASR transcript text with optional conversation context
 
+`TurnController` wraps any `AudioTurnDetector` and adds orchestration helpers
+like soft-reset (preserves buffer when the user pauses mid-sentence).
+
 ```
 wavekat-vad   -->  "is someone speaking?"
 wavekat-turn  -->  "are they done speaking?"
diff --git a/crates/wavekat-turn/Cargo.toml b/crates/wavekat-turn/Cargo.toml
index 9284d18..6d76eb1 100644
--- a/crates/wavekat-turn/Cargo.toml
+++ b/crates/wavekat-turn/Cargo.toml
@@ -36,6 +36,10 @@ ndarray-npy = "0.10"
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
 
+[[example]]
+name = "controller"
+required-features = ["pipecat"]
+
 [package.metadata.docs.rs]
 all-features = true
 rustdoc-args = ["--cfg", "docsrs"]
diff --git a/crates/wavekat-turn/examples/controller.rs b/crates/wavekat-turn/examples/controller.rs
new file mode 100644
index 0000000..4707385
--- /dev/null
+++ b/crates/wavekat-turn/examples/controller.rs
@@ -0,0 +1,100 @@
+//! Example: using TurnController for VAD-driven turn detection.
+//!
+//! Run with: `cargo run --features pipecat --example controller`
+//!
+//! Demonstrates the soft-reset flow using real WAV fixtures:
+//!
+//! 1. User speaks mid-sentence (speech_mid.wav) → Unfinished
+//! 2. User continues speaking — soft reset keeps the buffer intact
+//! 3. User finishes the sentence (speech_finished.wav) → Finished
+//! 4. After assistant responds, hard reset starts a fresh turn
+
+use std::path::Path;
+
+use wavekat_turn::audio::PipecatSmartTurn;
+use wavekat_turn::{AudioFrame, TurnController};
+
+fn load_wav(path: &Path) -> Vec<f32> {
+    let mut reader = hound::WavReader::open(path)
+        .unwrap_or_else(|e| panic!("failed to open {}: {}", path.display(), e));
+    let spec = reader.spec();
+    match spec.sample_format {
+        hound::SampleFormat::Int => reader
+            .samples::<i16>()
+            .map(|s| s.unwrap() as f32 / 32768.0)
+            .collect(),
+        hound::SampleFormat::Float => reader.samples::<f32>().map(|s| s.unwrap()).collect(),
+    }
+}
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let fixtures = Path::new(env!("CARGO_MANIFEST_DIR"))
+        .parent()
+        .unwrap()
+        .parent()
+        .unwrap()
+        .join("tests/fixtures");
+
+    let speech_mid = load_wav(&fixtures.join("speech_mid.wav"));
+    let speech_finished = load_wav(&fixtures.join("speech_finished.wav"));
+
+    let detector = PipecatSmartTurn::new()?;
+    let mut ctrl = TurnController::new(detector);
+
+    // --- Speech A: user says something mid-sentence ---
+    println!(">> VAD: speech started");
+    ctrl.reset_if_finished(); // first speech → resets
+
+    println!(">> Pushing speech_mid.wav (cut mid-sentence)");
+    ctrl.push_audio(&AudioFrame::new(&speech_mid[..], 16_000));
+
+    println!(">> VAD: speech ended");
+    let result_a = ctrl.predict()?;
+    println!(
+        "   predict → {:?} (confidence: {:.3})",
+        result_a.state, result_a.confidence
+    );
+
+    // --- Speech B: user continues speaking ---
+    println!("\n>> VAD: speech started again");
+    let did_reset = ctrl.reset_if_finished();
+    println!(
+        "   reset_if_finished → {}",
+        if did_reset {
+            "reset (turn was finished)"
+        } else {
+            "skipped (turn unfinished, keeping buffer)"
+        }
+    );
+
+    println!(">> Pushing speech_finished.wav (complete sentence)");
+    ctrl.push_audio(&AudioFrame::new(&speech_finished[..], 16_000));
+
+    println!(">> VAD: speech ended");
+    let result_b = ctrl.predict()?;
+    println!(
+        "   predict → {:?} (confidence: {:.3}, ran on A+B combined)",
+        result_b.state, result_b.confidence
+    );
+
+    // --- New turn: after assistant responds ---
+    println!("\n>> Assistant finished responding");
+    ctrl.reset(); // hard reset for next turn
+    println!("   hard reset, last_state: {:?}", ctrl.last_state());
+
+    // --- Speech C: fresh turn ---
+    println!("\n>> VAD: speech started (new turn)");
+    ctrl.reset_if_finished(); // last_state is None → resets
+
+    println!(">> Pushing speech_finished.wav");
+    ctrl.push_audio(&AudioFrame::new(&speech_finished[..], 16_000));
+
+    println!(">> VAD: speech ended");
+    let result_c = ctrl.predict()?;
+    println!(
+        "   predict → {:?} (confidence: {:.3})",
+        result_c.state, result_c.confidence
+    );
+
+    Ok(())
+}
diff --git a/crates/wavekat-turn/src/controller.rs b/crates/wavekat-turn/src/controller.rs
new file mode 100644
index 0000000..163e4df
--- /dev/null
+++ b/crates/wavekat-turn/src/controller.rs
@@ -0,0 +1,100 @@
+use crate::{AudioFrame, AudioTurnDetector, TurnError, TurnPrediction, TurnState};
+
+/// Orchestration wrapper around any [`AudioTurnDetector`].
+///
+/// Tracks prediction state across calls and provides convenience methods
+/// like [`reset_if_finished`](TurnController::reset_if_finished) for
+/// correct VAD integration without manual state bookkeeping.
+///
+/// # Usage
+///
+/// ```ignore
+/// let detector = PipecatSmartTurn::new()?;
+/// let mut ctrl = TurnController::new(detector);
+///
+/// // Audio arrives continuously
+/// ctrl.push_audio(&frame);
+///
+/// // VAD speech start — soft reset (keeps buffer if turn was unfinished)
+/// ctrl.reset_if_finished();
+///
+/// // VAD speech end — predict
+/// let result = ctrl.predict()?;
+/// ```
+///
+/// See [`reset_if_finished`](TurnController::reset_if_finished) for details
+/// on when to use soft vs hard reset.
+pub struct TurnController<T: AudioTurnDetector> {
+    inner: T,
+    last_state: Option<TurnState>,
+}
+
+impl<T: AudioTurnDetector> TurnController<T> {
+    /// Create a new controller wrapping the given detector.
+    pub fn new(inner: T) -> Self {
+        Self {
+            inner,
+            last_state: None,
+        }
+    }
+
+    /// Feed audio into the detector.
+    pub fn push_audio(&mut self, frame: &AudioFrame) {
+        self.inner.push_audio(frame);
+    }
+
+    /// Run prediction on buffered audio.
+    ///
+    /// Tracks the result state internally for [`reset_if_finished`](Self::reset_if_finished).
+    pub fn predict(&mut self) -> Result<TurnPrediction, TurnError> {
+        let result = self.inner.predict()?;
+        self.last_state = Some(result.state);
+        Ok(result)
+    }
+
+    /// Hard reset — always clears the buffer.
+    ///
+    /// Use when you know a new turn is starting (e.g. after the assistant
+    /// finishes responding).
+    pub fn reset(&mut self) {
+        self.inner.reset();
+        self.last_state = None;
+    }
+
+    /// Soft reset — clears the buffer only if the last prediction was
+    /// [`Finished`](TurnState::Finished) or no prediction has been made
+    /// since the last reset.
+    ///
+    /// Returns `true` if a reset occurred, `false` if skipped.
+    ///
+    /// Call this on VAD speech-start when you don't know whether the user
+    /// is continuing the same turn or starting a new one. If the previous
+    /// prediction was [`Unfinished`](TurnState::Unfinished), the buffer is
+    /// preserved so the next [`predict`](Self::predict) runs on the full
+    /// accumulated audio.
+    pub fn reset_if_finished(&mut self) -> bool {
+        match self.last_state {
+            Some(TurnState::Unfinished) => false,
+            _ => {
+                self.reset();
+                true
+            }
+        }
+    }
+
+    /// Returns the state from the last [`predict`](Self::predict) call,
+    /// or `None` if no prediction has been made since the last reset.
+    pub fn last_state(&self) -> Option<TurnState> {
+        self.last_state
+    }
+
+    /// Returns a mutable reference to the inner detector.
+    pub fn inner_mut(&mut self) -> &mut T {
+        &mut self.inner
+    }
+
+    /// Unwrap the controller, returning the inner detector.
+    pub fn into_inner(self) -> T {
+        self.inner
+    }
+}
diff --git a/crates/wavekat-turn/src/lib.rs b/crates/wavekat-turn/src/lib.rs
index 98ca819..4c6f020 100644
--- a/crates/wavekat-turn/src/lib.rs
+++ b/crates/wavekat-turn/src/lib.rs
@@ -9,6 +9,10 @@
 //! - [`AudioTurnDetector`] — operates on raw audio frames (e.g. Pipecat Smart Turn)
 //! - [`TextTurnDetector`] — operates on ASR transcript text (e.g. LiveKit EOU)
 //!
+//! For most use cases, wrap a detector in [`TurnController`] to get
+//! automatic state tracking and soft-reset logic for VAD integration.
+//! See [`controller`] for details.
+//!
 //! # Feature flags
 //!
 //! | Feature | Backend | Input |
@@ -16,6 +20,7 @@
 //! | `pipecat` | Pipecat Smart Turn v3 (ONNX) | Audio (16 kHz) |
 //! | `livekit` | LiveKit Turn Detector (ONNX) | Text |
 
+pub mod controller;
 pub mod error;
 
 #[cfg(any(feature = "pipecat", feature = "livekit"))]
@@ -27,6 +32,7 @@ pub mod audio;
 #[cfg(feature = "livekit")]
 pub mod text;
 
+pub use controller::TurnController;
 pub use error::TurnError;
 pub use wavekat_core::AudioFrame;
 
@@ -77,11 +83,23 @@ pub enum Role {
 /// Turn detector that operates on raw audio.
 ///
 /// Implementations buffer audio internally and run prediction on demand.
-/// The typical flow with VAD:
+///
+/// **Most users should wrap this in [`TurnController`]** rather than calling
+/// these methods directly. The controller tracks prediction state and provides
+/// [`reset_if_finished`](TurnController::reset_if_finished) for correct
+/// multi-utterance handling.
+///
+/// # Direct usage (advanced)
+///
+/// If you need full control over reset logic:
 ///
 /// 1. **Every audio chunk** → [`push_audio`](AudioTurnDetector::push_audio)
-/// 2. **VAD fires "speech started"** → [`reset`](AudioTurnDetector::reset)
-/// 3. **VAD fires "speech stopped"** → [`predict`](AudioTurnDetector::predict)
+/// 2. **VAD fires "speech stopped"** → [`predict`](AudioTurnDetector::predict)
+/// 3. **New turn begins** → [`reset`](AudioTurnDetector::reset)
+///
+/// Note: calling `reset` unconditionally on every VAD speech-start will discard
+/// audio context when the user pauses mid-sentence. See [`TurnController`] for
+/// the recommended approach.
 pub trait AudioTurnDetector: Send + Sync {
     /// Feed audio into the internal buffer.
     ///
@@ -90,10 +108,17 @@ pub trait AudioTurnDetector: Send + Sync {
 
     /// Run prediction on buffered audio.
     ///
-    /// Call when VAD detects end of speech.
+    /// Call when VAD detects end of speech. The buffer is **not** cleared
+    /// after prediction — call [`reset`](AudioTurnDetector::reset) explicitly
+    /// when starting a new turn.
     fn predict(&mut self) -> Result<TurnPrediction, TurnError>;
 
-    /// Clear the internal buffer. Call when a new speech turn begins.
+    /// Unconditionally clear the internal buffer.
+    ///
+    /// Use when you are certain a new turn is starting (e.g. after the
+    /// assistant finishes responding). For VAD speech-start events where
+    /// the user may be continuing, prefer
+    /// [`TurnController::reset_if_finished`].
     fn reset(&mut self);
 }
 
diff --git a/crates/wavekat-turn/tests/controller.rs b/crates/wavekat-turn/tests/controller.rs
new file mode 100644
index 0000000..e8edf88
--- /dev/null
+++ b/crates/wavekat-turn/tests/controller.rs
@@ -0,0 +1,163 @@
+//! Tests for [`TurnController`].
+//!
+//! Uses a mock detector to test orchestration logic without ONNX overhead.
+
+use wavekat_turn::{
+    AudioFrame, AudioTurnDetector, TurnController, TurnError, TurnPrediction, TurnState,
+};
+
+// ---------------------------------------------------------------------------
+// Mock detector
+// ---------------------------------------------------------------------------
+
+/// A minimal detector that records calls and returns a configurable state.
+struct MockDetector {
+    /// The state to return on the next `predict()` call.
+    next_state: TurnState,
+    /// Number of samples in the buffer (cleared by reset).
+    buffer_len: usize,
+    /// How many times `reset()` was called.
+    reset_count: usize,
+}
+
+impl MockDetector {
+    fn new() -> Self {
+        Self {
+            next_state: TurnState::Unfinished,
+            buffer_len: 0,
+            reset_count: 0,
+        }
+    }
+}
+
+impl AudioTurnDetector for MockDetector {
+    fn push_audio(&mut self, frame: &AudioFrame) {
+        self.buffer_len += frame.samples().len();
+    }
+
+    fn predict(&mut self) -> Result<TurnPrediction, TurnError> {
+        let state = self.next_state;
+        let confidence = match state {
+            TurnState::Finished => 0.95,
+            TurnState::Unfinished => 0.80,
+            TurnState::Wait => 0.70,
+        };
+        Ok(TurnPrediction {
+            state,
+            confidence,
+            latency_ms: 0,
+            stage_times: vec![],
+        })
+    }
+
+    fn reset(&mut self) {
+        self.buffer_len = 0;
+        self.reset_count += 1;
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[test]
+fn reset_if_finished_resets_on_first_call() {
+    let mut ctrl = TurnController::new(MockDetector::new());
+    assert!(
+        ctrl.reset_if_finished(),
+        "should reset when no prior prediction"
+    );
+}
+
+#[test]
+fn reset_if_finished_skips_after_unfinished() {
+    let mut ctrl = TurnController::new(MockDetector::new());
+    ctrl.inner_mut().next_state = TurnState::Unfinished;
+    ctrl.predict().unwrap();
+
+    assert!(
+        !ctrl.reset_if_finished(),
+        "should skip reset after Unfinished"
+    );
+}
+
+#[test]
+fn reset_if_finished_resets_after_finished() {
+    let mut ctrl = TurnController::new(MockDetector::new());
+    ctrl.inner_mut().next_state = TurnState::Finished;
+    ctrl.predict().unwrap();
+
+    assert!(ctrl.reset_if_finished(), "should reset after Finished");
+}
+
+#[test]
+fn hard_reset_always_clears() {
+    let mut ctrl = TurnController::new(MockDetector::new());
+    ctrl.inner_mut().next_state = TurnState::Unfinished;
+    ctrl.predict().unwrap();
+
+    ctrl.reset();
+    assert_eq!(
+        ctrl.last_state(),
+        None,
+        "hard reset should clear last_state"
+    );
+    assert_eq!(ctrl.inner_mut().reset_count, 1);
+}
+
+#[test]
+fn last_state_tracks_predictions() {
+    let mut ctrl = TurnController::new(MockDetector::new());
+    assert_eq!(ctrl.last_state(), None);
+
+    ctrl.inner_mut().next_state = TurnState::Unfinished;
+    ctrl.predict().unwrap();
+    assert_eq!(ctrl.last_state(), Some(TurnState::Unfinished));
+
+    ctrl.inner_mut().next_state = TurnState::Finished;
+    ctrl.predict().unwrap();
+    assert_eq!(ctrl.last_state(), Some(TurnState::Finished));
+
+    ctrl.reset();
+    assert_eq!(ctrl.last_state(), None);
+}
+
+#[test]
+fn predict_accumulates_across_soft_reset() {
+    let mut ctrl = TurnController::new(MockDetector::new());
+
+    // Speech A
+    let frame_a = AudioFrame::new(&[0.1f32; 1600][..], 16_000).into_owned();
+    ctrl.push_audio(&frame_a);
+    ctrl.inner_mut().next_state = TurnState::Unfinished;
+    ctrl.predict().unwrap();
+
+    // Soft reset — should NOT clear buffer
+    assert!(!ctrl.reset_if_finished());
+
+    // Speech B
+    let frame_b = AudioFrame::new(&[0.2f32; 1600][..], 16_000).into_owned();
+    ctrl.push_audio(&frame_b);
+
+    // Buffer should contain both A and B
+    assert_eq!(
+        ctrl.inner_mut().buffer_len,
+        3200,
+        "buffer should have A + B samples"
+    );
+    assert_eq!(
+        ctrl.inner_mut().reset_count,
+        0,
+        "no resets should have occurred"
+    );
+}
+
+#[test]
+fn into_inner_returns_detector() {
+    let mut ctrl = TurnController::new(MockDetector::new());
+    let frame = AudioFrame::new(&[0.0f32; 160][..], 16_000).into_owned();
+    ctrl.push_audio(&frame);
+
+    let detector = ctrl.into_inner();
+    assert_eq!(detector.buffer_len, 160);
+}
diff --git a/docs/plan-backends.md b/docs/plan-backends.md
index ec17c97..57c59a0 100644
--- a/docs/plan-backends.md
+++ b/docs/plan-backends.md
@@ -27,11 +27,13 @@
 ## Current state
 
 `PipecatSmartTurn` is fully implemented and all integration tests pass.
+`TurnController` wraps any `AudioTurnDetector` with state tracking and soft-reset.
 `LiveKitEou` remains a stub (out of scope for this branch).
 
 ```
 src/
 ├── lib.rs              — traits: AudioTurnDetector, TextTurnDetector, TurnPrediction, TurnState
+├── controller.rs       — TurnController<T> orchestration wrapper
 ├── error.rs            — TurnError: BackendError, InvalidInput, ModelNotLoaded
 ├── onnx.rs             — shared session_from_file / session_from_memory helpers
 ├── audio/
@@ -41,7 +43,10 @@ src/
     ├── mod.rs
     └── livekit.rs      — LiveKitEou (stub, out of scope)
 build.rs                — downloads smart-turn-v3.2-cpu.onnx at build time
+examples/
+└── controller.rs       — TurnController usage with real WAV fixtures
 tests/
+├── controller.rs       — 7 TurnController tests (mock detector)
 └── pipecat.rs          — 9 integration tests (all pass)
 ```
 
diff --git a/docs/plan-turn-controller.md b/docs/plan-turn-controller.md
new file mode 100644
index 0000000..8b16bc4
--- /dev/null
+++ b/docs/plan-turn-controller.md
@@ -0,0 +1,224 @@
+# Plan: TurnController Wrapper
+
+**Status:** Complete
+**Date:** 2026-03-31
+
+---
+
+## Problem
+
+The `AudioTurnDetector` trait documents a simple flow:
+
+1. Every audio chunk → `push_audio`
+2. VAD fires "speech started" → `reset`
+3. VAD fires "speech stopped" → `predict`
+
+This works for the basic case, but breaks when the user continues speaking after a
+brief pause — a common pattern in natural conversation (e.g. "I want to order... um...
+a pizza").
+
+### What goes wrong
+
+Consider this sequence:
+
+```
+VAD speech start  → reset()           buffer cleared
+  (push_audio)                        buffer has speech A
+VAD speech end    → predict()         → Unfinished
+VAD speech start  → reset()           ← WRONG: clears speech A
+  (push_audio)                        buffer has speech B only
+VAD speech end    → predict()         runs on B alone, missing context
+```
+
+The Pipecat Smart Turn documentation explicitly says:
+
+> If additional speech is detected from the user before Smart Turn has finished
+> executing, re-run Smart Turn on the entire turn recording, including the new audio,
+> rather than just the new segment. Smart Turn works best when given sufficient context,
+> and is not designed to run on very short audio segments.
+
+The correct behavior is to **skip the reset** when the previous prediction was
+`Unfinished`, so the buffer accumulates across the full turn:
+
+```
+VAD speech start  → reset()           buffer cleared (first speech)
+  (push_audio)                        buffer has speech A
+VAD speech end    → predict()         → Unfinished
+VAD speech start  → DON'T reset       buffer keeps speech A
+  (push_audio)                        buffer has speech A + B
+VAD speech end    → predict()         runs on A+B combined ✓
+```
+
+### Why this doesn't belong in the trait
+
+The `AudioTurnDetector` trait is the right abstraction for backend authors — it's
+minimal, and `reset()` is a clean primitive ("clear everything"). The soft-reset
+decision depends on tracking the last prediction state, which is orchestration logic.
+
+Every orchestrator would have to re-implement this same logic. As a library, we should
+provide a helper that does it correctly out of the box.
+
+---
+
+## Solution: `TurnController<T>`
+
+A generic wrapper around any `AudioTurnDetector` that tracks prediction state and
+provides convenience methods.
+
+```rust
+pub struct TurnController<T: AudioTurnDetector> {
+    inner: T,
+    last_state: Option<TurnState>,
+}
+```
+
+### API
+
+```rust
+impl<T: AudioTurnDetector> TurnController<T> {
+    /// Create a new controller wrapping the given detector.
+    pub fn new(inner: T) -> Self;
+
+    /// Feed audio into the detector.
+    pub fn push_audio(&mut self, frame: &AudioFrame);
+
+    /// Run prediction on buffered audio.
+    /// Tracks the result state internally for `reset_if_finished`.
+    pub fn predict(&mut self) -> Result<TurnPrediction, TurnError>;
+
+    /// Hard reset — always clears the buffer. Use when you know a new turn
+    /// is starting (e.g. after the assistant finishes responding).
+    pub fn reset(&mut self);
+
+    /// Soft reset — clears the buffer only if the last prediction was
+    /// `Finished` (or no prediction has been made yet). Returns whether
+    /// a reset actually occurred.
+    ///
+    /// Call this on VAD speech-start when you don't know whether the user
+    /// is continuing the same turn or starting a new one.
+    pub fn reset_if_finished(&mut self) -> bool;
+
+    /// Returns the state from the last `predict()` call, or `None` if
+    /// no prediction has been made since the last reset.
+    pub fn last_state(&self) -> Option<TurnState>;
+
+    /// Unwrap the controller, returning the inner detector.
+    pub fn into_inner(self) -> T;
+}
+```
+
+### Usage
+
+```rust
+let detector = PipecatSmartTurn::new()?;
+let mut ctrl = TurnController::new(detector);
+
+// Audio arrives continuously
+ctrl.push_audio(&frame);
+
+// VAD speech start — soft reset (keeps buffer if turn was unfinished)
+ctrl.reset_if_finished();
+
+// VAD speech end — predict
+let result = ctrl.predict()?;
+match result.state {
+    TurnState::Finished   => { /* hand off to LLM */ }
+    TurnState::Unfinished => { /* wait for more speech */ }
+}
+
+// After assistant finishes responding — hard reset for next turn
+ctrl.reset();
+```
+
+### Scenario walkthrough
+
+```rust
+// Speech A — user says "I want to order..."
+ctrl.reset_if_finished();          // no prior prediction → resets ✓
+ctrl.push_audio(&speech_a);
+let a = ctrl.predict()?;           // → Unfinished
+
+// Speech B — user continues "...a pizza"
+ctrl.reset_if_finished();          // last was Unfinished → NO reset ✓
+ctrl.push_audio(&speech_b);
+let b = ctrl.predict()?;           // runs on A+B combined → Finished ✓
+
+// Speech C — new conversation turn
+ctrl.reset();                      // hard reset after assistant responded
+ctrl.push_audio(&speech_c);
+let c = ctrl.predict()?;           // runs on C only ✓
+```
+
+---
+
+## Design decisions
+
+### Why `TurnController` and not a trait method
+
+- Rust traits can't have fields, so every implementor would duplicate the
+  `last_state` tracking boilerplate.
+- The soft-reset logic is identical across all backends — it only depends on
+  `TurnState`, not on backend internals.
+- A wrapper keeps the trait minimal for backend authors while giving orchestrators
+  a batteries-included API.
+
+### Why `reset_if_finished` returns `bool`
+
+The orchestrator may want to know whether a reset occurred — e.g. for logging,
+or to adjust behavior (start a new transcript vs. append to existing).
+
+### Why keep `reset()` on the controller
+
+Hard reset is still needed for cases the controller can't infer:
+- After the assistant finishes responding (new conversation turn).
+- Manual override / error recovery.
+- First initialization.
+
+`reset_if_finished()` is the default for VAD speech-start events.
+`reset()` is for explicit turn boundaries the orchestrator controls.
+
+---
+
+## Future possibilities
+
+These are not part of the initial implementation but the `TurnController` is a
+natural place to add them later:
+
+- **Min audio guard** — `predict()` returns early if the buffer is too short to
+  produce a meaningful prediction, avoiding wasted inference on tiny audio segments.
+- **Configurable threshold** — override the default 0.5 probability threshold
+  without modifying the detector.
+- **Prediction history** — track recent predictions for debugging and logging.
+
+---
+
+## File placement
+
+```
+src/
+├── lib.rs                 — existing traits (unchanged)
+├── controller.rs          — TurnController<T>     ← NEW
+├── audio/
+│   └── pipecat.rs         — PipecatSmartTurn (unchanged)
+└── ...
+```
+
+Re-export from `lib.rs`:
+
+```rust
+mod controller;
+pub use controller::TurnController;
+```
+
+---
+
+## Tests
+
+| Test | What it checks |
+|------|---------------|
+| `reset_if_finished_resets_on_first_call` | No prior prediction → resets |
+| `reset_if_finished_skips_after_unfinished` | Last predict was Unfinished → no reset |
+| `reset_if_finished_resets_after_finished` | Last predict was Finished → resets |
+| `hard_reset_always_clears` | `reset()` clears regardless of last state |
+| `last_state_tracks_predictions` | `last_state()` returns correct value after predict/reset |
+| `predict_accumulates_across_soft_reset` | Buffer preserved when soft reset skips → predict uses full audio |