From 79cf05353045dd56a2dddf178d3bc70e03b075de Mon Sep 17 00:00:00 2001
From: xenstalker02 <tim.dilich@gmail.com>
Date: Tue, 24 Mar 2026 13:05:10 -0400
Subject: [PATCH 1/4] feat(audio): add mic passthrough platform interface and
 Windows implementation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds speaker_t abstract class and capture_snapshot_t struct to the platform
audio interface. Implements speaker_wasapi_t for writing decoded mono PCM to
a WASAPI render endpoint (e.g. VB-Audio Virtual Cable Input) with a
4-packet prebuffer render thread.

Fix 2: snapshot_capture_defaults()/switch_capture_to()/restore_capture_from()
snapshot all three ERole values before switching and restore each individually,
eliminating the per-role race and removing the detached retry thread.

Fix 3: IsFormatSupported() negotiation — on S_FALSE uses closest_match; on
FAILED falls back to GetMixFormat(). Prevents AUDCLNT_E_UNSUPPORTED_FORMAT on
devices where float32/2ch/48kHz is not natively supported.

Removes install_steam_audio_drivers() and the startup call site; VB-Audio
Virtual Cable is the supported loopback path.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/platform/common.h          |  24 +++
 src/platform/windows/audio.cpp | 382 +++++++++++++++++++++++++--------
 2 files changed, 321 insertions(+), 85 deletions(-)

diff --git a/src/platform/common.h b/src/platform/common.h
index 2cee3f8da..729aa6755 100644
--- a/src/platform/common.h
+++ b/src/platform/common.h
@@ -527,12 +527,36 @@ namespace platf {
     virtual ~mic_t() = default;
   };
 
+  class speaker_t {
+  public:
+    virtual int write(const float *samples, std::uint32_t frame_count) = 0;
+    virtual ~speaker_t() = default;
+  };
+
+  struct capture_snapshot_t {
+    std::wstring console_id;
+    std::wstring comms_id;
+    std::wstring multimedia_id;
+  };
+
   class audio_control_t {
   public:
     virtual int set_sink(const std::string &sink) = 0;
 
     virtual std::unique_ptr<mic_t> microphone(const std::uint8_t *mapping, int channels, std::uint32_t sample_rate, std::uint32_t frame_size) = 0;
 
+    virtual std::unique_ptr<speaker_t> virtual_microphone(const std::string &device_name, std::uint32_t sample_rate, std::uint32_t frame_size) = 0;
+
+    virtual capture_snapshot_t snapshot_capture_defaults() { return {}; }
+
+    virtual void switch_capture_to(const std::string &device_name) {}
+
+    virtual void restore_capture_from(const capture_snapshot_t &snapshot) {}
+
+    virtual std::string get_current_default_capture_name() { return {}; }
+
+    virtual void reset_default_capture_to_first_real() {}
+
     /**
      * @brief Check if the audio sink is available in the system.
      * @param sink Sink to be checked.
diff --git a/src/platform/windows/audio.cpp b/src/platform/windows/audio.cpp
index 1ee332fb4..8d3570819 100644
--- a/src/platform/windows/audio.cpp
+++ b/src/platform/windows/audio.cpp
@@ -31,16 +31,9 @@ DEFINE_PROPERTYKEY(PKEY_Device_DeviceDesc, 0xa45c254e, 0xdf1c, 0x4efd, 0x80, 0x2
 DEFINE_PROPERTYKEY(PKEY_Device_FriendlyName, 0xa45c254e, 0xdf1c, 0x4efd, 0x80, 0x20, 0x67, 0xd1, 0x46, 0xa8, 0x50, 0xe0, 14);  // DEVPROP_TYPE_STRING
 DEFINE_PROPERTYKEY(PKEY_DeviceInterface_FriendlyName, 0x026e516e, 0xb814, 0x414b, 0x83, 0xcd, 0x85, 0x6d, 0x6f, 0xef, 0x48, 0x22, 2);
 
-#if defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(__amd64__) || defined(_M_AMD64)
-  #define STEAM_DRIVER_SUBDIR L"x64"
-#else
-  #warning No known Steam audio driver for this architecture
-#endif
-
 namespace {
 
   constexpr auto SAMPLE_RATE = 48000;
-  constexpr auto STEAM_AUDIO_DRIVER_PATH = L"%CommonProgramFiles(x86)%\\Steam\\drivers\\Windows10\\" STEAM_DRIVER_SUBDIR L"\\SteamStreamingSpeakers.inf";
 
   constexpr auto waveformat_mask_stereo = SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT;
 
@@ -219,6 +212,7 @@ namespace platf::audio {
   using collection_t = util::safe_ptr<IMMDeviceCollection, Release<IMMDeviceCollection>>;
   using audio_client_t = util::safe_ptr<IAudioClient, Release<IAudioClient>>;
   using audio_capture_t = util::safe_ptr<IAudioCaptureClient, Release<IAudioCaptureClient>>;
+  using audio_render_t = util::safe_ptr<IAudioRenderClient, Release<IAudioRenderClient>>;
   using wave_format_t = util::safe_ptr<WAVEFORMATEX, co_task_free<WAVEFORMATEX>>;
   using wstring_t = util::safe_ptr<WCHAR, co_task_free<WCHAR>>;
   using handle_t = util::safe_ptr_v2<void, BOOL, CloseHandle>;
@@ -683,6 +677,191 @@ namespace platf::audio {
     HANDLE mmcss_task_handle = nullptr;
   };
 
+  /**
+   * @brief WASAPI render endpoint for mic passthrough (writes to VB-Cable Input).
+   *
+   * write() queues mono float32 samples and signals render_loop via event.
+   * render_loop waits for a 4-packet prebuffer then drains into WASAPI stereo.
+   */
+  class speaker_wasapi_t: public platf::speaker_t {
+  public:
+    int write(const float *samples, std::uint32_t frame_count) override {
+      if (!render_event || render_dead.load(std::memory_order_acquire)) return -1;
+      {
+        std::lock_guard<std::mutex> lk(queue_mutex);
+        for (std::uint32_t i = 0; i < frame_count; ++i)
+          pending_frames.push_back(std::clamp(samples[i], -1.0f, 1.0f));
+        // Cap to 1 second to bound latency.
+        if (pending_frames.size() > 48000) {
+          auto trim = pending_frames.size() - 48000;
+          pending_frames.erase(pending_frames.begin(), pending_frames.begin() + (std::ptrdiff_t)trim);
+        }
+      }
+      SetEvent(render_event);
+      return 0;
+    }
+
+    int init(const std::wstring &device_id, std::uint32_t /*sample_rate*/) {
+      // Activate IAudioClient
+      device_enum_t dev_enum;
+      auto status = CoCreateInstance(CLSID_MMDeviceEnumerator, nullptr,
+        CLSCTX_ALL, IID_IMMDeviceEnumerator, (void **)&dev_enum);
+      if (FAILED(status)) {
+        BOOST_LOG(error) << "[mic] speaker_wasapi_t: CoCreateInstance failed 0x"sv << util::hex(status).to_string_view();
+        return -1;
+      }
+      device_t device;
+      status = dev_enum->GetDevice(device_id.c_str(), &device);
+      if (FAILED(status)) {
+        BOOST_LOG(error) << "[mic] speaker_wasapi_t: GetDevice failed 0x"sv << util::hex(status).to_string_view();
+        return -1;
+      }
+      status = device->Activate(IID_IAudioClient, CLSCTX_ALL, nullptr, (void **)&audio_client);
+      if (FAILED(status)) {
+        BOOST_LOG(error) << "[mic] speaker_wasapi_t: Activate failed 0x"sv << util::hex(status).to_string_view();
+        return -1;
+      }
+
+      // Preferred format: float32 stereo 48kHz
+      WAVEFORMATEXTENSIBLE fmt {};
+      fmt.Format.wFormatTag           = WAVE_FORMAT_EXTENSIBLE;
+      fmt.Format.nChannels            = 2;
+      fmt.Format.nSamplesPerSec       = 48000;
+      fmt.Format.wBitsPerSample       = 32;
+      fmt.Format.nBlockAlign          = 8;
+      fmt.Format.nAvgBytesPerSec      = 48000 * 8;
+      fmt.Format.cbSize               = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX);
+      fmt.Samples.wValidBitsPerSample = 32;
+      fmt.dwChannelMask               = SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT;
+      fmt.SubFormat                   = KSDATAFORMAT_SUBTYPE_IEEE_FLOAT;
+
+      // Fix 3: negotiate format instead of hardcoding
+      WAVEFORMATEX *closest_match = nullptr;
+      HRESULT fmt_hr = audio_client->IsFormatSupported(
+        AUDCLNT_SHAREMODE_SHARED, reinterpret_cast<WAVEFORMATEX *>(&fmt), &closest_match);
+
+      const WAVEFORMATEX *use_fmt = reinterpret_cast<WAVEFORMATEX *>(&fmt);
+      wave_format_t mix_fmt_guard;
+      wave_format_t closest_guard;
+
+      if (fmt_hr == S_FALSE && closest_match) {
+        closest_guard.reset(closest_match);
+        use_fmt = closest_match;
+        BOOST_LOG(info) << "[mic] IsFormatSupported S_FALSE — using closest match "sv
+                        << use_fmt->nChannels << "ch "sv << use_fmt->nSamplesPerSec << "Hz"sv;
+      } else if (FAILED(fmt_hr)) {
+        WAVEFORMATEX *mix_fmt = nullptr;
+        if (FAILED(audio_client->GetMixFormat(&mix_fmt)) || !mix_fmt) {
+          BOOST_LOG(error) << "[mic] speaker_wasapi_t: GetMixFormat failed — cannot initialize"sv;
+          return -1;
+        }
+        mix_fmt_guard.reset(mix_fmt);
+        use_fmt = mix_fmt;
+        BOOST_LOG(info) << "[mic] IsFormatSupported failed — falling back to mix format "sv
+                        << use_fmt->nChannels << "ch "sv << use_fmt->nSamplesPerSec << "Hz"sv;
+      }
+
+      status = audio_client->Initialize(AUDCLNT_SHAREMODE_SHARED,
+        AUDCLNT_STREAMFLAGS_EVENTCALLBACK, 1000000LL, 0, use_fmt, nullptr);
+      if (FAILED(status)) {
+        BOOST_LOG(error) << "[mic] speaker_wasapi_t: Initialize failed 0x"sv << util::hex(status).to_string_view();
+        return -1;
+      }
+
+      status = audio_client->GetBufferSize(&buffer_frames);
+      if (FAILED(status)) {
+        BOOST_LOG(error) << "[mic] speaker_wasapi_t: GetBufferSize failed"sv;
+        return -1;
+      }
+      status = audio_client->GetService(IID_IAudioRenderClient, (void **)&audio_render);
+      if (FAILED(status)) {
+        BOOST_LOG(error) << "[mic] speaker_wasapi_t: GetService IAudioRenderClient failed"sv;
+        return -1;
+      }
+
+      render_event = CreateEvent(nullptr, FALSE, FALSE, nullptr);
+      if (!render_event) {
+        BOOST_LOG(error) << "[mic] speaker_wasapi_t: CreateEvent failed"sv;
+        return -1;
+      }
+      audio_client->SetEventHandle(render_event);
+
+      status = audio_client->Start();
+      if (FAILED(status)) {
+        BOOST_LOG(error) << "[mic] speaker_wasapi_t: Start failed 0x"sv << util::hex(status).to_string_view();
+        CloseHandle(render_event);
+        render_event = nullptr;
+        return -1;
+      }
+
+      stop_flag = false;
+      render_thread = std::thread(&speaker_wasapi_t::render_loop, this);
+      BOOST_LOG(info) << "[mic] speaker_wasapi_t ready"sv;
+      return 0;
+    }
+
+    ~speaker_wasapi_t() override {
+      stop_flag = true;
+      if (render_event) SetEvent(render_event);
+      if (render_thread.joinable()) render_thread.join();
+      if (audio_client) audio_client->Stop();
+      if (render_event) { CloseHandle(render_event); render_event = nullptr; }
+    }
+
+  private:
+    static constexpr std::size_t kPrebufFrames = 960 * 4;  // 4 Opus packets
+
+    void render_loop() {
+      CoInitializeEx(nullptr, COINIT_MULTITHREADED | COINIT_SPEED_OVER_MEMORY);
+      bool playout_started = false;
+      while (!stop_flag) {
+        WaitForSingleObject(render_event, 20);
+        if (stop_flag) break;
+        if (!playout_started) {
+          std::size_t qsz;
+          { std::lock_guard<std::mutex> lk(queue_mutex); qsz = pending_frames.size(); }
+          if (qsz < kPrebufFrames) continue;
+          playout_started = true;
+          BOOST_LOG(info) << "[mic] Playout started (prebuffer: "sv << qsz << " frames)"sv;
+        }
+        UINT32 padding = 0;
+        if (FAILED(audio_client->GetCurrentPadding(&padding))) {
+          render_dead.store(true, std::memory_order_release);
+          break;
+        }
+        auto avail = buffer_frames - padding;
+        if (avail == 0) continue;
+        UINT32 to_write;
+        { std::lock_guard<std::mutex> lk(queue_mutex); to_write = std::min(avail, (UINT32)pending_frames.size()); }
+        if (to_write == 0) continue;
+        BYTE *buf = nullptr;
+        if (FAILED(audio_render->GetBuffer(to_write, &buf)) || !buf) continue;
+        auto *dst = reinterpret_cast<float *>(buf);
+        {
+          std::lock_guard<std::mutex> lk(queue_mutex);
+          for (UINT32 f = 0; f < to_write; ++f) {
+            const float s = pending_frames.front();
+            pending_frames.pop_front();
+            dst[f * 2]     = s;
+            dst[f * 2 + 1] = s;
+          }
+        }
+        audio_render->ReleaseBuffer(to_write, 0);
+      }
+      CoUninitialize();
+    }
+
+    audio_client_t    audio_client;
+    audio_render_t    audio_render;
+    HANDLE            render_event { nullptr };
+    UINT32            buffer_frames { 0 };
+    std::atomic<bool> stop_flag { false };
+    std::atomic<bool> render_dead { false };
+    std::thread       render_thread;
+    std::mutex        queue_mutex;
+    std::deque<float> pending_frames;
+  };
+
   class audio_control_t: public ::platf::audio_control_t {
   public:
     std::optional<sink_t> sink_info() override {
@@ -783,6 +962,92 @@ namespace platf::audio {
       return mic;
     }
 
+    std::unique_ptr<platf::speaker_t> virtual_microphone(const std::string &device_name, std::uint32_t sample_rate, std::uint32_t /*frame_size*/) override {
+      auto matched = find_device_id(match_all_fields(from_utf8(device_name)));
+      if (!matched) {
+        BOOST_LOG(warning) << "[mic] virtual_microphone: device not found: " << device_name
+                           << " — mic passthrough disabled for this session"sv;
+        return nullptr;
+      }
+      auto spk = std::make_unique<speaker_wasapi_t>();
+      if (spk->init(matched->second, sample_rate)) {
+        return nullptr;
+      }
+      return spk;
+    }
+
+    platf::capture_snapshot_t snapshot_capture_defaults() override {
+      platf::capture_snapshot_t snap;
+      auto get_id = [&](ERole role) -> std::wstring {
+        device_t dev;
+        if (FAILED(device_enum->GetDefaultAudioEndpoint(eCapture, role, &dev))) return {};
+        wstring_t id;
+        if (FAILED(dev->GetId(&id))) return {};
+        return std::wstring(id.get());
+      };
+      snap.console_id    = get_id(eConsole);
+      snap.comms_id      = get_id(eCommunications);
+      snap.multimedia_id = get_id(eMultimedia);
+      return snap;
+    }
+
+    void switch_capture_to(const std::string &device_name) override {
+      auto target_id = find_capture_device_id(from_utf8(device_name));
+      if (target_id.empty()) {
+        BOOST_LOG(warning) << "[mic] switch_capture_to: device not found: " << device_name;
+        return;
+      }
+      for (int x = 0; x < (int) ERole_enum_count; ++x)
+        policy->SetDefaultEndpoint(target_id.c_str(), (ERole) x);
+      BOOST_LOG(info) << "[mic] default capture switched to: " << device_name;
+    }
+
+    void restore_capture_from(const platf::capture_snapshot_t &snap) override {
+      auto restore_role = [&](const std::wstring &id, ERole role) {
+        if (id.empty()) return;
+        policy->SetDefaultEndpoint(id.c_str(), role);
+      };
+      restore_role(snap.console_id,    eConsole);
+      restore_role(snap.comms_id,      eCommunications);
+      restore_role(snap.multimedia_id, eMultimedia);
+      BOOST_LOG(info) << "[mic] default capture roles restored"sv;
+    }
+
+    std::string get_current_default_capture_name() override {
+      device_t dev;
+      if (FAILED(device_enum->GetDefaultAudioEndpoint(eCapture, eConsole, &dev))) return {};
+      prop_t prop;
+      if (FAILED(dev->OpenPropertyStore(STGM_READ, &prop))) return {};
+      prop_var_t pv;
+      if (SUCCEEDED(prop->GetValue(PKEY_Device_FriendlyName, &pv.prop)) && pv.prop.vt == VT_LPWSTR)
+        return to_utf8(pv.prop.pwszVal);
+      return {};
+    }
+
+    void reset_default_capture_to_first_real() override {
+      collection_t collection;
+      if (FAILED(device_enum->EnumAudioEndpoints(eCapture, DEVICE_STATE_ACTIVE, &collection))) return;
+      UINT count = 0;
+      collection->GetCount(&count);
+      for (UINT i = 0; i < count; ++i) {
+        device_t dev;
+        if (FAILED(collection->Item(i, &dev))) continue;
+        prop_t prop;
+        if (FAILED(dev->OpenPropertyStore(STGM_READ, &prop))) continue;
+        prop_var_t pv;
+        if (FAILED(prop->GetValue(PKEY_Device_FriendlyName, &pv.prop)) || pv.prop.vt != VT_LPWSTR) continue;
+        std::string name = to_utf8(pv.prop.pwszVal);
+        if (name.find("CABLE") != std::string::npos) continue;
+        wstring_t id;
+        if (FAILED(dev->GetId(&id))) continue;
+        for (int x = 0; x < (int) ERole_enum_count; ++x)
+          policy->SetDefaultEndpoint(id.get(), (ERole) x);
+        BOOST_LOG(info) << "[mic] reset_default_capture_to_first_real: " << name;
+        return;
+      }
+      BOOST_LOG(warning) << "[mic] reset_default_capture_to_first_real: no non-CABLE capture device found"sv;
+    }
+
     /**
      * If the requested sink is a virtual sink, meaning no speakers attached to
      * the host, then we can seamlessly set the format to stereo and surround sound.
@@ -982,6 +1247,31 @@ namespace platf::audio {
       return std::nullopt;
     }
 
+    /**
+     * @brief Search for a capture (input) device ID by friendly name.
+     */
+    std::wstring find_capture_device_id(const std::wstring &name) {
+      collection_t collection;
+      if (FAILED(device_enum->EnumAudioEndpoints(eCapture, DEVICE_STATE_ACTIVE, &collection))) return {};
+      UINT count = 0;
+      collection->GetCount(&count);
+      for (UINT i = 0; i < count; ++i) {
+        device_t dev;
+        if (FAILED(collection->Item(i, &dev))) continue;
+        prop_t prop;
+        if (FAILED(dev->OpenPropertyStore(STGM_READ, &prop))) continue;
+        prop_var_t pv;
+        if (SUCCEEDED(prop->GetValue(PKEY_Device_FriendlyName, &pv.prop)) && pv.prop.vt == VT_LPWSTR) {
+          if (std::wcscmp(pv.prop.pwszVal, name.c_str()) == 0) {
+            wstring_t id;
+            if (SUCCEEDED(dev->GetId(&id)))
+              return std::wstring(id.get());
+          }
+        }
+      }
+      return {};
+    }
+
     /**
      * @brief Resets the default audio device from Steam Streaming Speakers.
      */
@@ -1042,77 +1332,6 @@ namespace platf::audio {
       BOOST_LOG(info) << "Successfully reset default audio device"sv;
     }
 
-    /**
-     * @brief Installs the Steam Streaming Speakers driver, if present.
-     * @return `true` if installation was successful.
-     */
-    bool install_steam_audio_drivers() {
-#ifdef STEAM_DRIVER_SUBDIR
-      // MinGW's libnewdev.a is missing DiInstallDriverW() even though the headers have it,
-      // so we have to load it at runtime. It's Vista or later, so it will always be available.
-      auto newdev = LoadLibraryExW(L"newdev.dll", nullptr, LOAD_LIBRARY_SEARCH_SYSTEM32);
-      if (!newdev) {
-        BOOST_LOG(error) << "newdev.dll failed to load"sv;
-        return false;
-      }
-      auto fg = util::fail_guard([newdev]() {
-        FreeLibrary(newdev);
-      });
-
-      auto fn_DiInstallDriverW = (decltype(DiInstallDriverW) *) GetProcAddress(newdev, "DiInstallDriverW");
-      if (!fn_DiInstallDriverW) {
-        BOOST_LOG(error) << "DiInstallDriverW() is missing"sv;
-        return false;
-      }
-
-      // Get the current default audio device (if present)
-      auto old_default_dev = default_device(device_enum);
-
-      // Install the Steam Streaming Speakers driver
-      WCHAR driver_path[MAX_PATH] = {};
-      ExpandEnvironmentStringsW(STEAM_AUDIO_DRIVER_PATH, driver_path, ARRAYSIZE(driver_path));
-      if (fn_DiInstallDriverW(nullptr, driver_path, 0, nullptr)) {
-        BOOST_LOG(info) << "Successfully installed Steam Streaming Speakers"sv;
-
-        // Wait for 5 seconds to allow the audio subsystem to reconfigure things before
-        // modifying the default audio device or enumerating devices again.
-        Sleep(5000);
-
-        // If there was a previous default device, restore that original device as the
-        // default output device just in case installing the new one changed it.
-        if (old_default_dev) {
-          audio::wstring_t old_default_id;
-          old_default_dev->GetId(&old_default_id);
-
-          for (int x = 0; x < (int) ERole_enum_count; ++x) {
-            policy->SetDefaultEndpoint(old_default_id.get(), (ERole) x);
-          }
-        }
-
-        return true;
-      } else {
-        auto err = GetLastError();
-        switch (err) {
-          case ERROR_ACCESS_DENIED:
-            BOOST_LOG(warning) << "Administrator privileges are required to install Steam Streaming Speakers"sv;
-            break;
-          case ERROR_FILE_NOT_FOUND:
-          case ERROR_PATH_NOT_FOUND:
-            BOOST_LOG(info) << "Steam audio drivers not found. This is expected if you don't have Steam installed."sv;
-            break;
-          default:
-            BOOST_LOG(warning) << "Failed to install Steam audio drivers: "sv << err;
-            break;
-        }
-
-        return false;
-      }
-#else
-      BOOST_LOG(warning) << "Unable to install Steam Streaming Speakers on unknown architecture"sv;
-      return false;
-#endif
-    }
-
     int init() {
       auto status = CoCreateInstance(
         CLSID_CPolicyConfigClient,
@@ -1167,13 +1386,6 @@ namespace platf {
       return nullptr;
     }
 
-    // Install Steam Streaming Speakers if needed. We do this during audio_control() to ensure
-    // the sink information returned includes the new Steam Streaming Speakers device.
-    if (config::audio.install_steam_drivers && !control->find_device_id(control->match_steam_speakers())) {
-      // This is best effort. Don't fail if it doesn't work.
-      control->install_steam_audio_drivers();
-    }
-
     return control;
   }
 

From 2e3df0fa480ad495ebaae0803dc314141ea1812e Mon Sep 17 00:00:00 2001
From: xenstalker02 <tim.dilich@gmail.com>
Date: Tue, 24 Mar 2026 13:05:19 -0400
Subject: [PATCH 2/4] feat(stream): add IDX_MIC_AUDIO_DATA (0x3003) handler
 with jitter buffer

Adds IDX_MIC_AUDIO_DATA=19 / packetTypes[19]=0x3003 for the Apollo mic
passthrough control stream packet type.

Fix 1: parses the real sender sequence number from the 4-byte framing header
[seq_hi, seq_lo, ch, flags] prepended by the Vibelight client, instead of
using a fake local counter. This makes the jitter buffer correctly order
packets from the client's perspective.

Session lifecycle: on start(), allocates OpusDecoder + speaker_t, calls
snapshot_capture_defaults() and switch_capture_to(mic_capture_device) as a
one-shot switch with no retry thread. On join(), calls restore_capture_from()
using the saved snapshot to restore all three ERole defaults.

Requires SS_ENC_CONTROL_V2 (AES-GCM) on the control stream; plaintext mic is
refused per upstream requirements.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/stream.cpp | 245 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 245 insertions(+)

diff --git a/src/stream.cpp b/src/stream.cpp
index 88bd09f7a..dea409f6a 100644
--- a/src/stream.cpp
+++ b/src/stream.cpp
@@ -19,6 +19,7 @@
 #include <boost/algorithm/string/predicate.hpp>
 #include <boost/endian/arithmetic.hpp>
 #include <openssl/err.h>
+#include <opus/opus.h>
 
 extern "C" {
   // clang-format off
@@ -71,6 +72,7 @@ extern "C" {
 #define IDX_SET_CLIPBOARD 16
 #define IDX_FILE_TRANSFER_NONCE_REQUEST 17
 #define IDX_SET_ADAPTIVE_TRIGGERS 18
+#define IDX_MIC_AUDIO_DATA 19
 
 static const short packetTypes[] = {
   0x0305,  // Start A
@@ -92,6 +94,7 @@ static const short packetTypes[] = {
   0x3001,  // Set Clipboard (Apollo protocol extension)
   0x3002,  // File transfer nonce request (Apollo protocol extension)
   0x5503,  // Set Adaptive triggers (Sunshine protocol extension)
+  0x3003,  // Mic audio data (Apollo protocol extension)
 };
 
 namespace asio = boost::asio;
@@ -104,6 +107,9 @@ using namespace std::literals;
 
 namespace stream {
 
+  // Set to true on first 0x3003 mic packet receipt for timeout diagnostics.
+  static std::atomic<bool> mic_first_packet_received {false};
+
   enum class socket_e : int {
     video,  ///< Video
     audio  ///< Audio
@@ -479,6 +485,42 @@ namespace stream {
 
     std::atomic<session::state_e> state;
 
+    // Mic passthrough: Opus decode → virtual audio render endpoint
+    static constexpr std::size_t mic_max_queued_packets = 32;
+
+    struct mic_queued_packet_t {
+      std::vector<std::uint8_t> payload;
+      std::uint16_t seq;
+    };
+
+    struct {
+      std::unique_ptr<platf::speaker_t> speaker;
+      std::unique_ptr<OpusDecoder, void (*)(OpusDecoder *)> decoder {nullptr, opus_decoder_destroy};
+      int channels = 0;
+
+      // Snapshot of default capture roles saved before switching; restored on session end.
+      platf::capture_snapshot_t capture_snap;
+      bool capture_switched = false;
+
+      // Kept alive across the session to avoid re-running audio_control() on teardown.
+      std::unique_ptr<platf::audio_control_t> audio_ctrl;
+
+      // Jitter buffer for reordering and FEC/PLC
+      std::map<std::uint16_t, mic_queued_packet_t> pending_packets;
+      std::uint16_t expected_seq = 0;
+      bool has_playout_cursor = false;
+
+      // Stats
+      std::uint64_t packets_received = 0;
+      std::uint64_t decode_errors = 0;
+      std::uint64_t plc_events = 0;
+      std::uint64_t silence_frames = 0;
+      std::uint64_t frames_written = 0;
+      std::uint64_t encryption_failures = 0;
+
+      std::chrono::steady_clock::time_point last_recv_time;
+    } mic;
+
 #ifdef _WIN32
     struct {
       bool active = false;
@@ -1235,6 +1277,145 @@ namespace stream {
       }
     });
 
+    server->map(packetTypes[IDX_MIC_AUDIO_DATA], [](session_t *session, const std::string_view &payload) {
+      BOOST_LOG(verbose) << "type [IDX_MIC_AUDIO_DATA]"sv;
+
+      if (!session->mic.speaker || !session->mic.decoder) return;
+
+      // Require encrypted control stream
+      if (!(session->config.encryptionFlagsEnabled & SS_ENC_CONTROL_V2)) {
+        if (session->mic.encryption_failures++ == 0) {
+          BOOST_LOG(warning) << "[mic] Plaintext control stream — mic passthrough disabled."sv;
+          session->mic.speaker.reset();
+          session->mic.decoder.reset();
+        }
+        return;
+      }
+
+      // Need at least the 4-byte framing header
+      if (payload.size() < 4 || payload.size() >= 4096) {
+        BOOST_LOG(warning) << "[mic] malformed packet (len="sv << payload.size() << ")"sv;
+        return;
+      }
+
+      // Fix 1: parse real sender sequence number from 4-byte wire header [seq_hi, seq_lo, ch, flags]
+      const auto *hdr = reinterpret_cast<const std::uint8_t *>(payload.data());
+      const std::uint16_t incoming_seq = (static_cast<std::uint16_t>(hdr[0]) << 8) | hdr[1];
+
+      // Fix A: validate ch and flags fields
+      if (hdr[2] != 1 || hdr[3] != 0) {
+        BOOST_LOG(warning) << "[mic] invalid header (ch="sv << (int)hdr[2]
+                           << " flags="sv << (int)hdr[3] << ") — dropping"sv;
+        return;
+      }
+
+      const std::string_view opus_payload { payload.data() + 4, payload.size() - 4 };
+
+      // Fix A: validate Opus packet structure before inserting into jitter buffer
+      {
+        unsigned char toc_byte = 0;
+        const unsigned char *frames_arr[48] = {};
+        opus_int16 sizes[48] = {};
+        int payload_offset = 0;
+        int nb_frames = opus_packet_parse(
+          reinterpret_cast<const unsigned char *>(opus_payload.data()),
+          static_cast<opus_int32>(opus_payload.size()),
+          &toc_byte, frames_arr, sizes, &payload_offset);
+        if (nb_frames < 0) {
+          session->mic.decode_errors++;
+          BOOST_LOG(warning) << "[mic] opus_packet_parse failed ("sv << nb_frames << ") — dropping"sv;
+          return;
+        }
+      }
+
+      // Fix A: derive frame size from packet instead of hardcoding
+      const int expected_samples = opus_packet_get_nb_samples(
+        reinterpret_cast<const unsigned char *>(opus_payload.data()),
+        static_cast<opus_int32>(opus_payload.size()),
+        48000);
+      const int frame_size = (expected_samples > 0 && expected_samples <= 5760)
+                             ? expected_samples : 960;
+
+      if (!stream::mic_first_packet_received.exchange(true, std::memory_order_relaxed))
+        BOOST_LOG(info) << "[mic] First mic packet received from client"sv;
+
+      auto recv_time = std::chrono::steady_clock::now();
+      auto &mic = session->mic;
+      mic.packets_received++;
+
+      // Underrun detection: write silence on long gap
+      if (mic.packets_received > 1) {
+        auto gap_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
+                        recv_time - mic.last_recv_time).count();
+        if (gap_ms > config::audio.mic_buffer_ms && mic.speaker) {
+          std::vector<float> silence((size_t)(960 * mic.channels), 0.0f);
+          mic.speaker->write(silence.data(), 960u);
+          mic.silence_frames++;
+        }
+      }
+      mic.last_recv_time = recv_time;
+
+      // Insert into jitter buffer
+      mic.pending_packets.emplace(incoming_seq, session_t::mic_queued_packet_t {
+        std::vector<std::uint8_t> {
+          reinterpret_cast<const std::uint8_t *>(opus_payload.data()),
+          reinterpret_cast<const std::uint8_t *>(opus_payload.data()) + opus_payload.size()
+        },
+        incoming_seq
+      });
+      if (mic.pending_packets.size() > session_t::mic_max_queued_packets)
+        mic.pending_packets.erase(mic.pending_packets.begin());
+
+      // Prebuffer
+      if (!mic.has_playout_cursor) {
+        if (mic.pending_packets.size() < (std::size_t) config::audio.mic_buffer_packets) return;
+        mic.expected_seq = mic.pending_packets.begin()->first;
+        mic.has_playout_cursor = true;
+        BOOST_LOG(info) << "[mic] Jitter buffer ready ("sv << config::audio.mic_buffer_packets << " packets)"sv;
+      }
+
+      // Drain in sequence order
+      while (!mic.pending_packets.empty()) {
+        auto it = mic.pending_packets.find(mic.expected_seq);
+        std::vector<float> pcm((size_t)(frame_size * mic.channels));
+        int frames = 0;
+
+        if (it != mic.pending_packets.end()) {
+          frames = opus_decode_float(mic.decoder.get(), it->second.payload.data(),
+            (opus_int32) it->second.payload.size(), pcm.data(), frame_size, 0);
+          mic.pending_packets.erase(it);
+        } else {
+          auto next_it = mic.pending_packets.find(static_cast<std::uint16_t>(mic.expected_seq + 1));
+          if (next_it != mic.pending_packets.end()) {
+            frames = opus_decode_float(mic.decoder.get(), next_it->second.payload.data(),
+              (opus_int32) next_it->second.payload.size(), pcm.data(), frame_size, 1);
+          } else if (!mic.pending_packets.empty() && mic.pending_packets.begin()->first > mic.expected_seq) {
+            frames = opus_decode_float(mic.decoder.get(), nullptr, 0, pcm.data(), frame_size, 0);
+            mic.plc_events++;
+          } else {
+            break;
+          }
+        }
+
+        mic.expected_seq = static_cast<std::uint16_t>(mic.expected_seq + 1);
+
+        if (frames < 0) {
+          mic.decode_errors++;
+          BOOST_LOG(warning) << "[mic] Opus decode error: "sv << opus_strerror(frames);
+          continue;
+        }
+        if (frames == 0) continue;
+
+        int write_ret = mic.speaker->write(pcm.data(), (std::uint32_t) frames);
+        if (write_ret < 0) {
+          BOOST_LOG(error) << "[mic] WASAPI render device lost — disabling mic for this session"sv;
+          mic.speaker.reset();
+          return;
+        }
+        mic.frames_written++;
+      }
+    });
+
     server->map(packetTypes[IDX_ENCRYPTED], [server](session_t *session, const std::string_view &payload) {
       BOOST_LOG(verbose) << "type [IDX_ENCRYPTED]"sv;
 
@@ -2259,6 +2440,15 @@ namespace stream {
       BOOST_LOG(debug) << "Resetting Input..."sv;
       input::reset(session.input);
 
+      // Restore default capture device if we switched it on session start
+      if (session.mic.capture_switched && session.mic.audio_ctrl) {
+        session.mic.audio_ctrl->restore_capture_from(session.mic.capture_snap);
+        session.mic.capture_switched = false;
+      }
+      session.mic.speaker.reset();
+      session.mic.decoder.reset();
+      session.mic.audio_ctrl.reset();
+
       if (!session.undo_cmds.empty()) {
         auto exec_thread = std::thread([cmd_list = session.undo_cmds] {
           for (auto &cmd : cmd_list) {
@@ -2371,6 +2561,61 @@ namespace stream {
 
       session.pingTimeout = std::chrono::steady_clock::now() + config::stream.ping_timeout;
 
+      // Initialize mic passthrough
+      stream::mic_first_packet_received.store(false, std::memory_order_relaxed);
+      const bool mic_encrypted = (session.config.encryptionFlagsEnabled & SS_ENC_CONTROL_V2) != 0;
+
+      if (config::audio.mic_sink.empty()) {
+        BOOST_LOG(info) << "[mic] mic_sink not configured — passthrough disabled"sv;
+      } else if (!mic_encrypted) {
+        BOOST_LOG(warning) << "[mic] no encrypted control stream — passthrough disabled"sv;
+      } else {
+        // Fix C: flat init lambda — one level of nesting, clear strategy selection
+        auto init_mic = [&]() -> bool {
+          auto audio_ctrl = platf::audio_control();
+          if (!audio_ctrl) return false;
+
+          const auto snap = audio_ctrl->snapshot_capture_defaults();
+          auto spk = audio_ctrl->virtual_microphone(config::audio.mic_sink, 48000, 960);
+          if (!spk) {
+            BOOST_LOG(warning) << "[mic] virtual_microphone() failed"sv;
+            return false;
+          }
+
+          int err = OPUS_OK;
+          auto *dec = opus_decoder_create(48000, 1, &err);
+          if (err != OPUS_OK || !dec) {
+            BOOST_LOG(error) << "[mic] decoder create failed: "sv << opus_strerror(err);
+            return false;
+          }
+
+          session.mic.speaker = std::move(spk);
+          session.mic.decoder.reset(dec);
+          session.mic.channels = 1;
+          session.mic.audio_ctrl = std::move(audio_ctrl);
+
+          if (!config::audio.mic_capture_device.empty()) {
+            session.mic.audio_ctrl->switch_capture_to(config::audio.mic_capture_device);
+            session.mic.capture_snap = snap;
+            session.mic.capture_switched = true;
+          }
+
+          BOOST_LOG(info) << "[mic] passthrough active → "sv << config::audio.mic_sink;
+          return true;
+        };
+
+        try {
+          if (!init_mic()) {
+            session.mic.speaker.reset();
+            session.mic.decoder.reset();
+          }
+        } catch (...) {
+          BOOST_LOG(error) << "[mic] init threw exception — mic disabled"sv;
+          session.mic.speaker.reset();
+          session.mic.decoder.reset();
+        }
+      }
+
       session.audioThread = std::thread {audioThread, &session};
       session.videoThread = std::thread {videoThread, &session};
 

From 3f05de636dc94532229b316dd37774f5fd214926 Mon Sep 17 00:00:00 2001
From: xenstalker02 <tim.dilich@gmail.com>
Date: Tue, 24 Mar 2026 13:05:27 -0400
Subject: [PATCH 3/4] feat(config): add mic passthrough config fields; remove
 install_steam_drivers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds four new audio_t fields:
  mic_sink           — render endpoint name (e.g. 'CABLE Input ...')
  mic_capture_device — capture endpoint to set as default on session start
  mic_buffer_ms      — underrun gap threshold (default 500 ms)
  mic_buffer_packets — jitter buffer prebuffer size (default 3 packets)

Removes install_steam_drivers from audio_t and the corresponding
bool_f() binding in apply_config(). Steam Streaming Speakers are no longer
the supported loopback path.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/config.cpp | 10 ++++++++--
 src/config.h   |  5 ++++-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/config.cpp b/src/config.cpp
index 91359c56f..aa37fb2f1 100644
--- a/src/config.cpp
+++ b/src/config.cpp
@@ -800,8 +800,11 @@ namespace config {
   audio_t audio {
     {},  // audio_sink
     {},  // virtual_sink
+    {},  // mic_sink
+    {},  // mic_capture_device
+    500,  // mic_buffer_ms
+    3,    // mic_buffer_packets
     true,  // stream audio
-    true,  // install_steam_drivers
     true,  // keep_sink_default
     true,  // auto_capture
   };
@@ -1659,8 +1662,11 @@ namespace config {
 
     string_f(vars, "audio_sink", audio.sink);
     string_f(vars, "virtual_sink", audio.virtual_sink);
+    string_f(vars, "mic_sink", audio.mic_sink);
+    string_f(vars, "mic_capture_device", audio.mic_capture_device);
+    int_f(vars, "mic_buffer_ms", audio.mic_buffer_ms);
+    int_f(vars, "mic_buffer_packets", audio.mic_buffer_packets);
     bool_f(vars, "stream_audio", audio.stream);
-    bool_f(vars, "install_steam_audio_drivers", audio.install_steam_drivers);
     bool_f(vars, "keep_sink_default", audio.keep_default);
     bool_f(vars, "auto_capture_sink", audio.auto_capture);
 
diff --git a/src/config.h b/src/config.h
index 4ad13ae5e..051de8b83 100644
--- a/src/config.h
+++ b/src/config.h
@@ -189,8 +189,11 @@ namespace config {
   struct audio_t {
     std::string sink;
     std::string virtual_sink;
+    std::string mic_sink;             // Render endpoint for mic passthrough (e.g. "CABLE Input")
+    std::string mic_capture_device;   // Capture endpoint to set as default (e.g. "CABLE Output")
+    int mic_buffer_ms;                // Underrun gap threshold in milliseconds
+    int mic_buffer_packets;           // Jitter buffer prebuffer packet count
     bool stream;
-    bool install_steam_drivers;
     bool keep_default;
     bool auto_capture;
   };

From ac1e9245db60f02b22aeebbd459123335351c798 Mon Sep 17 00:00:00 2001
From: xenstalker02 <tim.dilich@gmail.com>
Date: Tue, 24 Mar 2026 13:05:32 -0400
Subject: [PATCH 4/4] feat(webui): add mic passthrough config fields to
 Audio/Video tab

Adds four ConfigFieldRenderer blocks for mic_sink, mic_capture_device,
mic_buffer_ms, and mic_buffer_packets to the Audio/Video configuration tab.

Adds corresponding English locale strings with descriptions explaining
VB-Audio Virtual Cable usage for each field.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../assets/web/configs/tabs/AudioVideo.vue    | 24 +++++++++++++++++++
 .../assets/web/public/assets/locale/en.json   |  8 +++++++
 2 files changed, 32 insertions(+)

diff --git a/src_assets/common/assets/web/configs/tabs/AudioVideo.vue b/src_assets/common/assets/web/configs/tabs/AudioVideo.vue
index 227c156c6..3d4c3e441 100644
--- a/src_assets/common/assets/web/configs/tabs/AudioVideo.vue
+++ b/src_assets/common/assets/web/configs/tabs/AudioVideo.vue
@@ -229,6 +229,30 @@ function selectVirtualDisplayLayout(v: unknown) {
       class="mb-6"
     />
 
+    <ConfigFieldRenderer
+      setting-key="mic_sink"
+      v-model="config.mic_sink"
+      class="mb-3"
+    />
+
+    <ConfigFieldRenderer
+      setting-key="mic_capture_device"
+      v-model="config.mic_capture_device"
+      class="mb-3"
+    />
+
+    <ConfigFieldRenderer
+      setting-key="mic_buffer_ms"
+      v-model="config.mic_buffer_ms"
+      class="mb-3"
+    />
+
+    <ConfigFieldRenderer
+      setting-key="mic_buffer_packets"
+      v-model="config.mic_buffer_packets"
+      class="mb-6"
+    />
+
     <AdapterNameSelector />
 
     <!-- Display configuration: clear, guided, pre-stream focused -->
diff --git a/src_assets/common/assets/web/public/assets/locale/en.json b/src_assets/common/assets/web/public/assets/locale/en.json
index 4230378a0..ba6e06f86 100644
--- a/src_assets/common/assets/web/public/assets/locale/en.json
+++ b/src_assets/common/assets/web/public/assets/locale/en.json
@@ -594,6 +594,14 @@
     "keep_sink_default_desc": "Leave the virtual audio sink selected as the default playback device while streaming audio is active.",
     "auto_capture_sink": "Auto capture current sink",
     "auto_capture_sink_desc": "Automatically follow the current default audio sink instead of sticking to the originally selected device.",
+    "mic_sink": "Mic passthrough render device",
+    "mic_sink_desc": "Name of the render endpoint used to feed client mic audio to the host. When using VB-Audio Virtual Cable, set this to 'CABLE Input (VB-Audio Virtual Cable)'. Leave blank to disable mic passthrough.",
+    "mic_capture_device": "Mic passthrough capture device",
+    "mic_capture_device_desc": "Host capture device to set as the system default while mic passthrough is active, so host apps receive client mic audio. Typically 'CABLE Output (VB-Audio Virtual Cable)'. Leave blank to skip switching.",
+    "mic_buffer_ms": "Mic underrun gap threshold (ms)",
+    "mic_buffer_ms_desc": "If a gap between mic packets exceeds this duration (milliseconds), a silence frame is inserted to prevent audio glitches. Default: 500.",
+    "mic_buffer_packets": "Mic jitter buffer size (packets)",
+    "mic_buffer_packets_desc": "Number of Opus packets to prebuffer in the jitter buffer before playout begins. Higher values reduce glitches on unstable connections at the cost of added latency. Default: 3.",
     "limit_framerate": "Limit capture framerate",
     "limit_framerate_desc": "Limit the framerate being captured to client requested framerate. May not run at full framerate if vsync is enabled and display refreshrate does not match requested framerate. Could cause lag on some clients if disabled.",
     "envvar_compatibility_mode": "ENVVAR compatibility mode",