diff --git a/README.md b/README.md index b00b23e16..7fe53142c 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ Major features: - [x] Built-in Virtual Display with HDR support that matches the resolution/framerate config of your client automatically - [x] Permission management for clients - [x] Clipboard sync +- [x] Windows remote microphone redirection through Steam Streaming Microphone with host-side debug visibility - [x] Commands for client connection/disconnection (checkout [Auto pause/resume games](https://github.com/ClassicOldSong/Apollo/wiki/Auto-pause-resume-games)) - [x] Input only mode @@ -35,6 +36,14 @@ Apollo uses SudoVDA for virtual display. It features auto resolution and framera The virtual display works just like any physically attached monitors with SudoVDA, there's completely no need for a super complicated solution to "fix" resolution configurations for your devices. Unlike all other solutions that reuses one identity or generate a random one each time for any virtual display sessions, **Apollo assigns a fixed identity for each Artemis/Moonlight client, so your display configuration will be automatically remembered and managed by Windows natively.** +## About Remote Microphone Redirection + +This fork adds a working Windows remote microphone path for compatible Moonlight/Artemis clients. + +Apollo accepts the client's Opus microphone packets, decodes them on the host, and renders the audio into the Steam playback endpoint `Speakers (Steam Streaming Microphone)`. Host applications should then select `Microphone (Steam Streaming Microphone)` as their microphone source. + +Setup and implementation notes are documented in [docs/remote_microphone.md](docs/remote_microphone.md). + ## Configuration for dual GPU laptops Apollo supports dual GPUs seamlessly. diff --git a/cmake/compile_definitions/windows.cmake b/cmake/compile_definitions/windows.cmake index 7fe814367..e7611254b 100644 --- a/cmake/compile_definitions/windows.cmake +++ b/cmake/compile_definitions/windows.cmake @@ -59,6 +59,10 @@ set(PLATFORM_TARGET_FILES "${CMAKE_SOURCE_DIR}/src/platform/windows/display_ram.cpp" "${CMAKE_SOURCE_DIR}/src/platform/windows/display_wgc.cpp" "${CMAKE_SOURCE_DIR}/src/platform/windows/audio.cpp" + "${CMAKE_SOURCE_DIR}/src/platform/windows/apollo_vmic.h" + "${CMAKE_SOURCE_DIR}/src/platform/windows/apollo_vmic.cpp" + "${CMAKE_SOURCE_DIR}/src/platform/windows/mic_write.h" + "${CMAKE_SOURCE_DIR}/src/platform/windows/mic_write.cpp" "${CMAKE_SOURCE_DIR}/src/platform/windows/virtual_display.h" "${CMAKE_SOURCE_DIR}/src/platform/windows/virtual_display.cpp" "${CMAKE_SOURCE_DIR}/src/platform/windows/utils.h" diff --git a/cmake/dependencies/Boost_Sunshine.cmake b/cmake/dependencies/Boost_Sunshine.cmake index 11e7770a3..7c2628dee 100644 --- a/cmake/dependencies/Boost_Sunshine.cmake +++ b/cmake/dependencies/Boost_Sunshine.cmake @@ -3,7 +3,8 @@ # include_guard(GLOBAL) -set(BOOST_VERSION "1.89.0") +set(BOOST_MIN_VERSION "1.89.0") +set(BOOST_FETCH_VERSION "1.89.0") set(BOOST_COMPONENTS filesystem locale @@ -30,9 +31,9 @@ endif() if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.30") cmake_policy(SET CMP0167 NEW) # Get BoostConfig.cmake from upstream endif() -find_package(Boost CONFIG ${BOOST_VERSION} EXACT COMPONENTS ${BOOST_COMPONENTS}) +find_package(Boost CONFIG ${BOOST_MIN_VERSION} COMPONENTS ${BOOST_COMPONENTS}) if(NOT Boost_FOUND) - message(STATUS "Boost v${BOOST_VERSION} package not found in the system. Falling back to FetchContent.") + message(STATUS "Boost v${BOOST_MIN_VERSION}+ package not found in the system. Falling back to FetchContent.") include(FetchContent) if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0") @@ -54,8 +55,8 @@ if(NOT Boost_FOUND) # Limit boost to the required libraries only set(BOOST_INCLUDE_LIBRARIES ${BOOST_COMPONENTS}) - set(BOOST_URL "https://github.com/boostorg/boost/releases/download/boost-${BOOST_VERSION}/boost-${BOOST_VERSION}-cmake.tar.xz") # cmake-lint: disable=C0301 - set(BOOST_HASH "SHA256=f48b48390380cfb94a629872346e3a81370dc498896f16019ade727ab72eb1ec") + set(BOOST_URL "https://github.com/boostorg/boost/releases/download/boost-${BOOST_FETCH_VERSION}/boost-${BOOST_FETCH_VERSION}-cmake.tar.xz") # cmake-lint: disable=C0301 + set(BOOST_HASH "SHA256=67acec02d0d118b5de9eb441f5fb707b3a1cdd884be00ca24b9a73c995511f74") if(CMAKE_VERSION VERSION_LESS "3.24.0") FetchContent_Declare( diff --git a/cmake/targets/common.cmake b/cmake/targets/common.cmake index ba378cbb6..48e397733 100644 --- a/cmake/targets/common.cmake +++ b/cmake/targets/common.cmake @@ -52,7 +52,18 @@ else() endif() #WebUI build -find_program(NPM npm REQUIRED) +if(WIN32) + unset(NODE CACHE) + unset(NPM_CLI CACHE) + find_program(NODE node.exe REQUIRED) + find_file(NPM_CLI npm-cli.js + PATHS + "${CMAKE_PREFIX_PATH}" + "C:/msys64/ucrt64/lib/node_modules/npm/bin" + REQUIRED) +else() + find_program(NPM npm REQUIRED) +endif() if (NPM_OFFLINE) set(NPM_INSTALL_FLAGS "--offline") @@ -63,8 +74,8 @@ endif() add_custom_target(web-ui ALL WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" COMMENT "Installing NPM Dependencies and Building the Web UI" - COMMAND "$<$:cmd;/C>" "${NPM}" install ${NPM_INSTALL_FLAGS} - COMMAND "${CMAKE_COMMAND}" -E env "SUNSHINE_BUILD_HOMEBREW=${NPM_BUILD_HOMEBREW}" "SUNSHINE_SOURCE_ASSETS_DIR=${NPM_SOURCE_ASSETS_DIR}" "SUNSHINE_ASSETS_DIR=${NPM_ASSETS_DIR}" "$<$:cmd;/C>" "${NPM}" run build # cmake-lint: disable=C0301 + COMMAND "$<$:${NODE};${NPM_CLI}>$<$>:${NPM}>" install ${NPM_INSTALL_FLAGS} + COMMAND "${CMAKE_COMMAND}" -E env "SUNSHINE_BUILD_HOMEBREW=${NPM_BUILD_HOMEBREW}" "SUNSHINE_SOURCE_ASSETS_DIR=${NPM_SOURCE_ASSETS_DIR}" "SUNSHINE_ASSETS_DIR=${NPM_ASSETS_DIR}" "$<$:${NODE};${NPM_CLI}>$<$>:${NPM}>" run build # cmake-lint: disable=C0301 COMMAND_EXPAND_LISTS VERBATIM) diff --git a/docs/configuration.md b/docs/configuration.md index b5377cad8..fa6a205e3 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -825,6 +825,56 @@ editing the `conf` file in a text editor. Use the examples as reference. +### mic_backend + + + + + + + + + + + + + + +
Description + Select how Apollo exposes redirected client microphone audio on Windows. + In this fork, Windows microphone redirection is standardized on the `steam_streaming_microphone` backend. + Apollo renders decoded client microphone audio into the Steam playback endpoint, and host applications + should select the paired `Microphone (Steam Streaming Microphone)` recording device. + @note{This option is currently only used on Windows hosts.} +
Default@code{} + steam_streaming_microphone + @endcode
Example@code{} + mic_backend = steam_streaming_microphone + @endcode
+ +### mic_device + + + + + + + + + + + + + + +
Description + The host-side device used for redirected client microphone audio. + On Windows, Apollo currently auto-detects the Steam Streaming Microphone render endpoint and this value is typically left unset. + On Linux and macOS this should point at the virtual device Apollo writes into. +
DefaultUnset.
Example (Linux)@code{} + mic_device = sunshine-mic + @endcode
+ ### stream_audio @@ -848,6 +898,29 @@ editing the `conf` file in a text editor. Use the examples as reference.
+### stream_mic + + + + + + + + + + + + + + +
Description + Whether Apollo should accept redirected client microphone audio and inject it into a host-side microphone backend. +
Default@code{} + disabled + @endcode
Example@code{} + stream_mic = enabled + @endcode
+ ### install_steam_audio_drivers diff --git a/docs/remote_microphone.md b/docs/remote_microphone.md new file mode 100644 index 000000000..92e037f30 --- /dev/null +++ b/docs/remote_microphone.md @@ -0,0 +1,67 @@ +# Remote Microphone Support + +This fork adds a working host-side remote microphone path for Apollo, focused on Windows hosts and Steam Streaming Microphone integration. + +## Overview + +The microphone path is: + +1. A compatible Moonlight/Artemis client captures local microphone audio. +2. The client sends encrypted or unencrypted microphone packets to Apollo on the dedicated microphone stream. +3. Apollo receives the packets, decrypts them when needed, and decodes the Opus frames on the host. +4. Apollo renders the decoded PCM into the Steam playback endpoint `Speakers (Steam Streaming Microphone)`. +5. Host applications consume that audio from the paired capture endpoint `Microphone (Steam Streaming Microphone)`. + +This keeps the host-side application flow simple: Apollo writes into Steam Streaming Microphone, and games, chat apps, or capture tools use `Microphone (Steam Streaming Microphone)` as the microphone. + +## What Changed + +The working implementation in this fork includes: + +- Dedicated microphone session handling in the stream path, including packet receive, optional decryption, and per-session lifecycle management. +- Windows microphone backend initialization and teardown that stays alive for the full remote microphone session. +- A Steam-backed Windows microphone path that auto-detects the Steam microphone render/capture pair, normalizes only that pair to `2ch, 32-bit, 48000 Hz` when microphone streaming starts, decodes Opus microphone frames as mono float `48 kHz`, and writes them into the Steam microphone render buffer using a `float32` shared-mode render client. +- Host-side recovery for recoverable WASAPI failures such as device invalidation or audio service restarts. +- A Remote Microphone Debug panel in the web UI that shows packet arrival, decode status, render status, signal detection, counters, and recent mic events. + +## Key Files + +- `src/stream.cpp`: microphone socket handling, session startup/shutdown, and packet routing. +- `src/audio.cpp`: shared microphone debug state and persistent audio context ownership for the redirect device. +- `src/platform/windows/audio.cpp`: Windows microphone backend selection and redirect device ownership. +- `src/platform/windows/apollo_vmic.cpp`: Steam Streaming Microphone backend wrapper. +- `src/platform/windows/mic_write.cpp`: device discovery, WASAPI initialization, Opus decode, and Steam Streaming Microphone rendering. +- `src_assets/common/assets/web/configs/tabs/AudioVideo.vue`: Remote Microphone Debug UI. + +## Windows Requirements + +- Install the Steam audio drivers on the host. +- Ensure the playback endpoint `Speakers (Steam Streaming Microphone)` exists and is enabled. +- In host applications, select `Microphone (Steam Streaming Microphone)` as the microphone/recording source. +- Enable `stream_mic` in Apollo. +- Use a client build that supports Apollo microphone redirection. + +## Configuration Notes + +- `stream_mic` enables the host microphone redirect path. +- `mic_backend` defaults to `steam_streaming_microphone` on Windows in this fork. +- On Windows, Apollo auto-detects the Steam Streaming Microphone pair and normalizes only those microphone endpoints to `2ch, 32-bit, 48000 Hz` automatically instead of requiring a manual device-properties change. +- `mic_device` is mainly relevant on non-Windows platforms. The Windows path currently targets Steam Streaming Microphone automatically. +- Redirected microphone transport is always required to negotiate encrypted microphone packets. If the client falls back to plaintext microphone transport, Apollo disables microphone passthrough for that session instead of accepting unencrypted microphone packets. + +## Debugging + +The Audio/Video page on Windows exposes a Remote Microphone Debug panel that shows: + +- whether the client is sending packets +- whether Apollo is decoding microphone frames +- whether Apollo is rendering into Steam Streaming Microphone +- whether non-silent input is being detected +- which endpoint mix format Apollo discovered +- which render and capture device formats are currently active +- which render format Apollo actually initialized +- whether the recommended Steam microphone format is active or had to be enforced +- how mono input is mapped to the host channels +- the most recent mic errors and recent mic events + +This view is intended to quickly separate client capture problems from host decode/render problems. diff --git a/src/audio.cpp b/src/audio.cpp index 18cf7f055..ddacd050c 100644 --- a/src/audio.cpp +++ b/src/audio.cpp @@ -3,6 +3,10 @@ * @brief Definitions for audio capture and encoding. */ // standard includes +#include +#include +#include +#include #include // lib includes @@ -22,6 +26,52 @@ namespace audio { using opus_t = util::safe_ptr; using sample_queue_t = std::shared_ptr>>; + namespace { + struct mic_debug_state_t { + std::mutex mutex; + mic_debug_snapshot_t snapshot; + std::chrono::steady_clock::time_point last_packet_time {}; + std::chrono::steady_clock::time_point last_decode_time {}; + std::chrono::steady_clock::time_point last_render_time {}; + bool has_last_packet_time {false}; + bool has_last_decode_time {false}; + bool has_last_render_time {false}; + std::deque recent_events; + }; + + mic_debug_state_t &mic_debug_state() { + static mic_debug_state_t state; + return state; + } + + void append_mic_event(mic_debug_state_t &state, const std::string &message) { + const auto now = std::chrono::system_clock::now(); + const auto tt = std::chrono::system_clock::to_time_t(now); + std::tm tm {}; +#ifdef _WIN32 + localtime_s(&tm, &tt); +#else + localtime_r(&tt, &tm); +#endif + char timestamp[16] {}; + std::strftime(timestamp, sizeof(timestamp), "%H:%M:%S", &tm); + state.recent_events.push_front(std::string {timestamp} + " " + message); + while (state.recent_events.size() > 12) { + state.recent_events.pop_back(); + } + } + + void set_mic_state_locked(mic_debug_state_t &state, const std::string &status) { + state.snapshot.state = status; + append_mic_event(state, status); + } + + audio_ctx_ref_t &mic_redirect_audio_ctx() { + static audio_ctx_ref_t ref; + return ref; + } + } // namespace + static int start_audio_control(audio_ctx_t &ctx); static void stop_audio_control(audio_ctx_t &); static void apply_surround_params(opus_stream_config_t &stream, const stream_params_t ¶ms); @@ -268,6 +318,249 @@ namespace audio { return ctx.control->is_sink_available(sink); } + int init_mic_redirect_device() { + auto &held_ref = mic_redirect_audio_ctx(); + if (!held_ref) { + held_ref = get_audio_ctx_ref(); + } + + auto &ref = held_ref; + if (!ref || !ref->control) { + mic_debug_on_backend_error("Audio control is unavailable; microphone redirection could not initialize"); + return -1; + } + + return ref->control->init_mic_redirect_device(); + } + + void release_mic_redirect_device() { + auto &ref = mic_redirect_audio_ctx(); + if (!ref || !ref->control) { + ref = {}; + return; + } + + ref->control->release_mic_redirect_device(); + ref = {}; + } + + int write_mic_data(const char *data, std::size_t len, std::uint16_t sequence_number, std::uint32_t timestamp) { + auto &held_ref = mic_redirect_audio_ctx(); + auto ref = held_ref ? held_ref : get_audio_ctx_ref(); + if (!ref || !ref->control) { + BOOST_LOG(warning) << "Client microphone packet rejected before decode because audio control is unavailable" + << " [seq=" << sequence_number << ", ts=" << timestamp << ", len=" << len << ']'; + mic_debug_on_packet_dropped(sequence_number, "Audio control is unavailable while writing microphone data"); + return -1; + } + + return ref->control->write_mic_data(data, len, sequence_number, timestamp); + } + + mic_debug_snapshot_t get_mic_debug_snapshot() { + auto &state = mic_debug_state(); + std::lock_guard lock(state.mutex); + + auto snapshot = state.snapshot; + const auto now = std::chrono::steady_clock::now(); + if (state.has_last_packet_time) { + snapshot.last_packet_age_ms = std::chrono::duration_cast(now - state.last_packet_time).count(); + } + if (state.has_last_decode_time) { + snapshot.last_decode_age_ms = std::chrono::duration_cast(now - state.last_decode_time).count(); + } + if (state.has_last_render_time) { + snapshot.last_render_age_ms = std::chrono::duration_cast(now - state.last_render_time).count(); + } + snapshot.recent_events.assign(state.recent_events.begin(), state.recent_events.end()); + snapshot.signal_detected = snapshot.last_input_level >= 0.02 && snapshot.last_decode_age_ms >= 0 && snapshot.last_decode_age_ms < 3000; + return snapshot; + } + + void mic_debug_on_session_start(const std::string &client_name, bool encryption_enabled) { + auto &state = mic_debug_state(); + std::lock_guard lock(state.mutex); + state.snapshot = {}; + state.snapshot.session_active = true; + state.snapshot.mic_requested = true; + state.snapshot.encryption_enabled = encryption_enabled; + state.snapshot.client_name = client_name; + state.snapshot.state = "Microphone redirection negotiated; waiting for client audio"; + state.snapshot.last_packet_age_ms = -1; + state.snapshot.last_decode_age_ms = -1; + state.snapshot.last_render_age_ms = -1; + state.has_last_packet_time = false; + state.has_last_decode_time = false; + state.has_last_render_time = false; + state.recent_events.clear(); + append_mic_event(state, "Microphone redirection negotiated for client [" + client_name + "]"); + } + + void mic_debug_on_session_stop(const std::string &reason) { + auto &state = mic_debug_state(); + std::lock_guard lock(state.mutex); + state.snapshot.session_active = false; + state.snapshot.decode_active = false; + state.snapshot.render_active = false; + state.snapshot.signal_detected = false; + state.snapshot.state = reason.empty() ? "No active remote microphone session" : reason; + append_mic_event(state, reason.empty() ? "Remote microphone session ended" : reason); + } + + void mic_debug_on_backend_initialized(const std::string &backend_name) { + auto &state = mic_debug_state(); + std::lock_guard lock(state.mutex); + state.snapshot.backend_initialized = true; + state.snapshot.backend_name = backend_name; + state.snapshot.last_error.clear(); + append_mic_event(state, "Microphone backend ready: " + backend_name); + } + + void mic_debug_on_backend_target(const std::string &target_device_name, int channels, std::uint32_t sample_rate) { + auto &state = mic_debug_state(); + std::lock_guard lock(state.mutex); + state.snapshot.target_device_name = target_device_name; + state.snapshot.state = "Rendering client microphone into " + target_device_name; + append_mic_event(state, "Using host render target [" + target_device_name + "] at " + std::to_string(channels) + "ch/" + std::to_string(sample_rate) + "Hz"); + } + + void mic_debug_on_backend_format(const std::string &endpoint_mix_format, + const std::string &render_format, + bool resampling_active, + const std::string &channel_mapping) { + auto &state = mic_debug_state(); + std::lock_guard lock(state.mutex); + state.snapshot.endpoint_mix_format = endpoint_mix_format; + state.snapshot.render_format = render_format; + state.snapshot.resampling_active = resampling_active; + state.snapshot.channel_mapping = channel_mapping; + append_mic_event( + state, + "Endpoint mix format [" + endpoint_mix_format + "], render format [" + render_format + + "], resampling " + (resampling_active ? "enabled" : "disabled") + ); + } + + void mic_debug_on_backend_endpoint_formats(const std::string &render_device_format, + const std::string &capture_device_name, + const std::string &capture_endpoint_mix_format, + const std::string &capture_device_format, + bool recommended_format_enforced, + bool recommended_format_active) { + auto &state = mic_debug_state(); + std::lock_guard lock(state.mutex); + state.snapshot.render_device_format = render_device_format; + state.snapshot.capture_device_name = capture_device_name; + state.snapshot.capture_endpoint_mix_format = capture_endpoint_mix_format; + state.snapshot.capture_device_format = capture_device_format; + state.snapshot.recommended_format_enforced = recommended_format_enforced; + state.snapshot.recommended_format_active = recommended_format_active; + append_mic_event( + state, + "Render device format [" + render_device_format + "], capture device [" + capture_device_name + + "], capture mix [" + capture_endpoint_mix_format + "], recommended format " + + (recommended_format_active ? "active" : "inactive") + + (recommended_format_enforced ? " (enforced)" : "") + ); + } + + void mic_debug_on_backend_error(const std::string &message) { + auto &state = mic_debug_state(); + std::lock_guard lock(state.mutex); + state.snapshot.last_error = message; + state.snapshot.render_active = false; + state.snapshot.state = message; + append_mic_event(state, message); + } + + void mic_debug_on_packet_received(std::uint16_t sequence_number, std::size_t payload_len) { + auto &state = mic_debug_state(); + std::lock_guard lock(state.mutex); + state.snapshot.first_packet_received = true; + state.snapshot.packets_received++; + state.snapshot.last_sequence_number = sequence_number; + state.snapshot.last_payload_size = payload_len; + state.last_packet_time = std::chrono::steady_clock::now(); + state.has_last_packet_time = true; + if (state.snapshot.packets_received == 1) { + set_mic_state_locked(state, "Receiving microphone packets from Moonlight"); + } + } + + void mic_debug_on_packet_decrypt_error(std::uint16_t sequence_number, const std::string &message) { + auto &state = mic_debug_state(); + std::lock_guard lock(state.mutex); + state.snapshot.decrypt_errors++; + state.snapshot.last_sequence_number = sequence_number; + state.snapshot.last_error = message; + state.snapshot.state = message; + append_mic_event(state, message); + } + + void mic_debug_on_packet_dropped(std::uint16_t sequence_number, const std::string &message) { + auto &state = mic_debug_state(); + std::lock_guard lock(state.mutex); + state.snapshot.packets_dropped++; + state.snapshot.last_sequence_number = sequence_number; + state.snapshot.last_error = message; + state.snapshot.state = message; + append_mic_event(state, message); + } + + void mic_debug_on_packet_decoded(std::uint16_t sequence_number, double normalized_level, bool silent) { + auto &state = mic_debug_state(); + std::lock_guard lock(state.mutex); + state.snapshot.decode_active = true; + state.snapshot.packets_decoded++; + state.snapshot.last_sequence_number = sequence_number; + state.snapshot.last_input_level = normalized_level; + state.snapshot.last_error.clear(); + if (silent) { + state.snapshot.silent_packets++; + } + state.last_decode_time = std::chrono::steady_clock::now(); + state.has_last_decode_time = true; + if (state.snapshot.packets_decoded == 1) { + set_mic_state_locked(state, "Apollo decoded microphone audio from Moonlight"); + } + } + + void mic_debug_on_packet_rendered(std::uint16_t sequence_number, double normalized_level, bool silent) { + auto &state = mic_debug_state(); + std::lock_guard lock(state.mutex); + state.snapshot.packets_rendered++; + state.snapshot.last_sequence_number = sequence_number; + state.snapshot.last_render_level = normalized_level; + state.snapshot.render_active = true; + state.snapshot.last_error.clear(); + state.last_render_time = std::chrono::steady_clock::now(); + state.has_last_render_time = true; + if (state.snapshot.packets_rendered == 1) { + set_mic_state_locked(state, "Apollo is rendering microphone audio into Steam Streaming Microphone"); + } + } + + void mic_debug_on_decode_error(std::uint16_t sequence_number, const std::string &message) { + auto &state = mic_debug_state(); + std::lock_guard lock(state.mutex); + state.snapshot.decode_errors++; + state.snapshot.last_sequence_number = sequence_number; + state.snapshot.last_error = message; + state.snapshot.state = message; + append_mic_event(state, message); + } + + void mic_debug_on_render_error(std::uint16_t sequence_number, const std::string &message) { + auto &state = mic_debug_state(); + std::lock_guard lock(state.mutex); + state.snapshot.render_errors++; + state.snapshot.last_sequence_number = sequence_number; + state.snapshot.last_error = message; + state.snapshot.render_active = false; + state.snapshot.state = message; + append_mic_event(state, message); + } + int map_stream(int channels, bool quality) { int shift = quality ? 1 : 0; switch (channels) { diff --git a/src/audio.h b/src/audio.h index 29f6bf17d..207dce095 100644 --- a/src/audio.h +++ b/src/audio.h @@ -9,7 +9,11 @@ #include "thread_safe.h" #include "utility.h" +#include #include +#include +#include +#include namespace audio { enum stream_config_e : int { @@ -77,6 +81,48 @@ namespace audio { using packet_t = std::pair; using audio_ctx_ref_t = safe::shared_t::ptr_t; + struct mic_debug_snapshot_t { + bool session_active {}; + bool mic_requested {}; + bool encryption_enabled {}; + bool backend_initialized {}; + bool first_packet_received {}; + bool decode_active {}; + bool render_active {}; + bool signal_detected {}; + std::uint64_t packets_received {}; + std::uint64_t packets_decoded {}; + std::uint64_t packets_rendered {}; + std::uint64_t packets_dropped {}; + std::uint64_t decrypt_errors {}; + std::uint64_t decode_errors {}; + std::uint64_t render_errors {}; + std::uint64_t silent_packets {}; + std::uint16_t last_sequence_number {}; + std::size_t last_payload_size {}; + double last_input_level {}; + double last_render_level {}; + std::int64_t last_packet_age_ms {-1}; + std::int64_t last_decode_age_ms {-1}; + std::int64_t last_render_age_ms {-1}; + std::string client_name; + std::string backend_name; + std::string target_device_name; + std::string endpoint_mix_format; + std::string render_device_format; + std::string render_format; + std::string capture_device_name; + std::string capture_endpoint_mix_format; + std::string capture_device_format; + std::string channel_mapping; + std::string state; + std::string last_error; + bool resampling_active {}; + bool recommended_format_enforced {}; + bool recommended_format_active {}; + std::vector recent_events; + }; + void capture(safe::mail_t mail, config_t config, void *channel_data); /** @@ -106,4 +152,27 @@ namespace audio { * @examples_end */ bool is_audio_ctx_sink_available(const audio_ctx_t &ctx); + int init_mic_redirect_device(); + void release_mic_redirect_device(); + int write_mic_data(const char *data, std::size_t len, std::uint16_t sequence_number, std::uint32_t timestamp); + mic_debug_snapshot_t get_mic_debug_snapshot(); + void mic_debug_on_session_start(const std::string &client_name, bool encryption_enabled); + void mic_debug_on_session_stop(const std::string &reason = {}); + void mic_debug_on_backend_initialized(const std::string &backend_name); + void mic_debug_on_backend_target(const std::string &target_device_name, int channels, std::uint32_t sample_rate); + void mic_debug_on_backend_format(const std::string &endpoint_mix_format, const std::string &render_format, bool resampling_active, const std::string &channel_mapping); + void mic_debug_on_backend_endpoint_formats(const std::string &render_device_format, + const std::string &capture_device_name, + const std::string &capture_endpoint_mix_format, + const std::string &capture_device_format, + bool recommended_format_enforced, + bool recommended_format_active); + void mic_debug_on_backend_error(const std::string &message); + void mic_debug_on_packet_received(std::uint16_t sequence_number, std::size_t payload_len); + void mic_debug_on_packet_decrypt_error(std::uint16_t sequence_number, const std::string &message); + void mic_debug_on_packet_dropped(std::uint16_t sequence_number, const std::string &message); + void mic_debug_on_packet_decoded(std::uint16_t sequence_number, double normalized_level, bool silent); + void mic_debug_on_packet_rendered(std::uint16_t sequence_number, double normalized_level, bool silent); + void mic_debug_on_decode_error(std::uint16_t sequence_number, const std::string &message); + void mic_debug_on_render_error(std::uint16_t sequence_number, const std::string &message); } // namespace audio diff --git a/src/config.cpp b/src/config.cpp index 6892e3029..0b4e6308f 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -521,7 +521,10 @@ namespace config { audio_t audio { {}, // audio_sink {}, // virtual_sink + "steam_streaming_microphone", // mic_backend + {}, // mic_device true, // stream audio + false, // stream microphone true, // install_steam_drivers true, // keep_sink_default true, // auto_capture @@ -1227,7 +1230,10 @@ namespace config { string_f(vars, "audio_sink", audio.sink); string_f(vars, "virtual_sink", audio.virtual_sink); + string_f(vars, "mic_backend", audio.mic_backend); + string_f(vars, "mic_device", audio.mic_device); bool_f(vars, "stream_audio", audio.stream); + bool_f(vars, "stream_mic", audio.stream_mic); bool_f(vars, "install_steam_audio_drivers", audio.install_steam_drivers); bool_f(vars, "keep_sink_default", audio.keep_default); bool_f(vars, "auto_capture_sink", audio.auto_capture); diff --git a/src/config.h b/src/config.h index a6d7c8338..8e9eb10be 100644 --- a/src/config.h +++ b/src/config.h @@ -154,7 +154,10 @@ namespace config { struct audio_t { std::string sink; std::string virtual_sink; + std::string mic_backend; + std::string mic_device; bool stream; + bool stream_mic; bool install_steam_drivers; bool keep_default; bool auto_capture; diff --git a/src/confighttp.cpp b/src/confighttp.cpp index d53ac6537..b91db114e 100644 --- a/src/confighttp.cpp +++ b/src/confighttp.cpp @@ -25,6 +25,7 @@ #include // local includes +#include "audio.h" #include "config.h" #include "confighttp.h" #include "crypto.h" @@ -1031,6 +1032,63 @@ namespace confighttp { send_response(response, output_tree); } + /** + * @brief Get the active remote microphone debug status. + * @param response The HTTP response object. + * @param request The HTTP request object. + */ + void getAudioDebug(resp_https_t response, req_https_t request) { + if (!authenticate(response, request)) { + return; + } + + print_req(request); + + const auto snapshot = audio::get_mic_debug_snapshot(); + nlohmann::json output_tree; + output_tree["status"] = true; + output_tree["sessionActive"] = snapshot.session_active; + output_tree["micRequested"] = snapshot.mic_requested; + output_tree["encryptionEnabled"] = snapshot.encryption_enabled; + output_tree["backendInitialized"] = snapshot.backend_initialized; + output_tree["firstPacketReceived"] = snapshot.first_packet_received; + output_tree["decodeActive"] = snapshot.decode_active; + output_tree["renderActive"] = snapshot.render_active; + output_tree["signalDetected"] = snapshot.signal_detected; + output_tree["packetsReceived"] = snapshot.packets_received; + output_tree["packetsDecoded"] = snapshot.packets_decoded; + output_tree["packetsRendered"] = snapshot.packets_rendered; + output_tree["packetsDropped"] = snapshot.packets_dropped; + output_tree["decryptErrors"] = snapshot.decrypt_errors; + output_tree["decodeErrors"] = snapshot.decode_errors; + output_tree["renderErrors"] = snapshot.render_errors; + output_tree["silentPackets"] = snapshot.silent_packets; + output_tree["lastSequenceNumber"] = snapshot.last_sequence_number; + output_tree["lastPayloadSize"] = snapshot.last_payload_size; + output_tree["lastInputLevel"] = snapshot.last_input_level; + output_tree["lastRenderLevel"] = snapshot.last_render_level; + output_tree["lastPacketAgeMs"] = snapshot.last_packet_age_ms; + output_tree["lastDecodeAgeMs"] = snapshot.last_decode_age_ms; + output_tree["lastRenderAgeMs"] = snapshot.last_render_age_ms; + output_tree["clientName"] = snapshot.client_name; + output_tree["backendName"] = snapshot.backend_name; + output_tree["targetDeviceName"] = snapshot.target_device_name; + output_tree["endpointMixFormat"] = snapshot.endpoint_mix_format; + output_tree["renderDeviceFormat"] = snapshot.render_device_format; + output_tree["renderFormat"] = snapshot.render_format; + output_tree["captureDeviceName"] = snapshot.capture_device_name; + output_tree["captureEndpointMixFormat"] = snapshot.capture_endpoint_mix_format; + output_tree["captureDeviceFormat"] = snapshot.capture_device_format; + output_tree["resamplingActive"] = snapshot.resampling_active; + output_tree["recommendedFormatEnforced"] = snapshot.recommended_format_enforced; + output_tree["recommendedFormatActive"] = snapshot.recommended_format_active; + output_tree["channelMapping"] = snapshot.channel_mapping; + output_tree["state"] = snapshot.state; + output_tree["lastError"] = snapshot.last_error; + output_tree["recentEvents"] = snapshot.recent_events; + send_response(response, output_tree); + } + /** * @brief Save the configuration settings. * @param response The HTTP response object. @@ -1542,6 +1600,7 @@ namespace confighttp { server.resource["^/api/logs$"]["GET"] = getLogs; server.resource["^/api/config$"]["GET"] = getConfig; server.resource["^/api/config$"]["POST"] = saveConfig; + server.resource["^/api/audio-debug$"]["GET"] = getAudioDebug; server.resource["^/api/configLocale$"]["GET"] = getLocale; server.resource["^/api/restart$"]["POST"] = restart; server.resource["^/api/quit$"]["POST"] = quit; diff --git a/src/crypto.cpp b/src/crypto.cpp index 0f7fbe2c0..d40ea1aa5 100644 --- a/src/crypto.cpp +++ b/src/crypto.cpp @@ -144,6 +144,18 @@ namespace crypto { return 0; } + static int init_decrypt_cbc(cipher_ctx_t &ctx, aes_t *key, aes_t *iv, bool padding) { + ctx.reset(EVP_CIPHER_CTX_new()); + + if (EVP_DecryptInit_ex(ctx.get(), EVP_aes_128_cbc(), nullptr, key->data(), iv->data()) != 1) { + return -1; + } + + EVP_CIPHER_CTX_set_padding(ctx.get(), padding); + + return 0; + } + int gcm_t::decrypt(const std::string_view &tagged_cipher, std::vector &plaintext, aes_t *iv) { if (!decrypt_ctx && init_decrypt_gcm(decrypt_ctx, &key, iv, padding)) { return -1; @@ -303,6 +315,31 @@ namespace crypto { return update_outlen + final_outlen; } + int cbc_t::decrypt(const std::string_view &cipher, std::vector &plaintext, aes_t *iv) { + if (!decrypt_ctx && init_decrypt_cbc(decrypt_ctx, &key, iv, padding)) { + return -1; + } + + if (EVP_DecryptInit_ex(decrypt_ctx.get(), nullptr, nullptr, nullptr, iv->data()) != 1) { + return -1; + } + + plaintext.resize(round_to_pkcs7_padded(cipher.size())); + + int update_outlen, final_outlen; + + if (EVP_DecryptUpdate(decrypt_ctx.get(), plaintext.data(), &update_outlen, (const std::uint8_t *) cipher.data(), cipher.size()) != 1) { + return -1; + } + + if (EVP_DecryptFinal_ex(decrypt_ctx.get(), plaintext.data() + update_outlen, &final_outlen) != 1) { + return -1; + } + + plaintext.resize(update_outlen + final_outlen); + return 0; + } + ecb_t::ecb_t(const aes_t &key, bool padding): cipher_t {EVP_CIPHER_CTX_new(), EVP_CIPHER_CTX_new(), key, padding} { } diff --git a/src/crypto.h b/src/crypto.h index 350bb5cc8..c70742d5a 100644 --- a/src/crypto.h +++ b/src/crypto.h @@ -226,6 +226,7 @@ namespace crypto { * @return The total length of the ciphertext written into cipher. Returns -1 in case of an error. */ int encrypt(const std::string_view &plaintext, std::uint8_t *cipher, aes_t *iv); + int decrypt(const std::string_view &cipher, std::vector &plaintext, aes_t *iv); }; } // namespace cipher } // namespace crypto diff --git a/src/platform/common.h b/src/platform/common.h index 2073d5937..d663ebbd3 100644 --- a/src/platform/common.h +++ b/src/platform/common.h @@ -6,6 +6,7 @@ // standard includes #include +#include #include #include #include @@ -558,6 +559,10 @@ namespace platf { virtual std::unique_ptr microphone(const std::uint8_t *mapping, int channels, std::uint32_t sample_rate, std::uint32_t frame_size) = 0; + virtual int init_mic_redirect_device() = 0; + virtual void release_mic_redirect_device() = 0; + virtual int write_mic_data(const char *data, std::size_t len, std::uint16_t sequence_number, std::uint32_t timestamp) = 0; + /** * @brief Check if the audio sink is available in the system. * @param sink Sink to be checked. diff --git a/src/platform/linux/audio.cpp b/src/platform/linux/audio.cpp index 0e53e939b..f257c6880 100644 --- a/src/platform/linux/audio.cpp +++ b/src/platform/linux/audio.cpp @@ -3,12 +3,14 @@ * @brief Definitions for audio control on Linux. */ // standard includes +#include #include #include #include // lib includes #include +#include #include #include #include @@ -102,6 +104,71 @@ namespace platf { return mic; } + struct mic_redirect_t { + util::safe_ptr sink; + util::safe_ptr decoder; + + int init(const std::string &sink_name) { + int opus_error = OPUS_OK; + decoder.reset(opus_decoder_create(48000, 1, &opus_error)); + if (!decoder || opus_error != OPUS_OK) { + BOOST_LOG(error) << "Couldn't create Opus decoder for microphone redirection: "sv << opus_strerror(opus_error); + return -1; + } + + pa_sample_spec ss {PA_SAMPLE_S16NE, 48000, 1}; + pa_buffer_attr attr { + .maxlength = uint32_t(-1), + .tlength = uint32_t(960 * sizeof(opus_int16) * 6), + .prebuf = uint32_t(-1), + .minreq = uint32_t(-1), + .fragsize = uint32_t(-1), + }; + + int status = 0; + sink.reset(pa_simple_new(nullptr, "sunshine", PA_STREAM_PLAYBACK, sink_name.c_str(), "sunshine-mic", &ss, nullptr, &attr, &status)); + if (!sink) { + BOOST_LOG(error) << "Couldn't open PulseAudio sink for microphone redirection ["sv << sink_name << "]: "sv << pa_strerror(status); + decoder.reset(); + return -1; + } + + return 0; + } + + int write_data(const char *data, std::size_t len, std::uint16_t sequence_number) { + (void) sequence_number; + + if (!sink || !decoder || data == nullptr || len == 0) { + return -1; + } + + std::array pcm {}; + auto decoded = opus_decode(decoder.get(), reinterpret_cast(data), static_cast(len), pcm.data(), static_cast(pcm.size()), 0); + if (decoded <= 0) { + return -1; + } + + int status = 0; + if (pa_simple_write(sink.get(), pcm.data(), decoded * sizeof(opus_int16), &status) < 0) { + BOOST_LOG(debug) << "PulseAudio microphone write failed: "sv << pa_strerror(status); + return -1; + } + + return decoded; + } + + void cleanup() { + if (sink) { + int status = 0; + pa_simple_drain(sink.get(), &status); + } + + sink.reset(); + decoder.reset(); + } + }; + namespace pa { template struct add_const_helper; @@ -186,6 +253,7 @@ namespace platf { loop_t loop; ctx_t ctx; std::string requested_sink; + std::unique_ptr mic_redirect_device; struct { std::uint32_t stereo = PA_INVALID_INDEX; @@ -459,6 +527,43 @@ namespace platf { return ::platf::microphone(mapping, channels, sample_rate, frame_size, get_monitor_name(sink_name)); } + int init_mic_redirect_device() override { + if (mic_redirect_device) { + return 0; + } + + std::string sink_name = config::audio.mic_device; + if (sink_name.empty()) { + BOOST_LOG(warning) << "Set config option [stream_mic] with [mic_device] pointing to a virtual PulseAudio/PipeWire sink to enable microphone redirection"sv; + return -1; + } + + auto device = std::make_unique(); + if (device->init(sink_name) != 0) { + return -1; + } + + BOOST_LOG(info) << "Client microphone redirection target sink: "sv << sink_name; + mic_redirect_device = std::move(device); + return 0; + } + + void release_mic_redirect_device() override { + if (mic_redirect_device) { + mic_redirect_device->cleanup(); + mic_redirect_device.reset(); + } + } + + int write_mic_data(const char *data, std::size_t len, std::uint16_t sequence_number, std::uint32_t timestamp) override { + (void) timestamp; + if (!mic_redirect_device) { + return -1; + } + + return mic_redirect_device->write_data(data, len, sequence_number); + } + bool is_sink_available(const std::string &sink) override { BOOST_LOG(warning) << "audio_control_t::is_sink_available() unimplemented: "sv << sink; return true; @@ -495,6 +600,7 @@ namespace platf { } ~server_t() override { + release_mic_redirect_device(); unload_null(index.stereo); unload_null(index.surround51); unload_null(index.surround71); diff --git a/src/platform/macos/microphone.mm b/src/platform/macos/microphone.mm index 06b9c19a8..1b2396952 100644 --- a/src/platform/macos/microphone.mm +++ b/src/platform/macos/microphone.mm @@ -78,6 +78,22 @@ int set_sink(const std::string &sink) override { return mic; } + int init_mic_redirect_device() override { + BOOST_LOG(warning) << "Client microphone redirection is not implemented on macOS yet"sv; + return -1; + } + + void release_mic_redirect_device() override { + } + + int write_mic_data(const char *data, std::size_t len, std::uint16_t sequence_number, std::uint32_t timestamp) override { + (void) data; + (void) len; + (void) sequence_number; + (void) timestamp; + return -1; + } + bool is_sink_available(const std::string &sink) override { BOOST_LOG(warning) << "audio_control_t::is_sink_available() unimplemented: "sv << sink; return true; diff --git a/src/platform/windows/apollo_vmic.cpp b/src/platform/windows/apollo_vmic.cpp new file mode 100644 index 000000000..89e7aa3c9 --- /dev/null +++ b/src/platform/windows/apollo_vmic.cpp @@ -0,0 +1,60 @@ +/** + * @file src/platform/windows/apollo_vmic.cpp + * @brief Steam Streaming Microphone backend for Windows host-side mic injection. + */ +#include "apollo_vmic.h" + +#include "mic_write.h" +#include "src/logging.h" + +namespace platf::audio { + apollo_vmic_t::~apollo_vmic_t() = default; + + std::string_view apollo_vmic_t::backend_id() const { + return "steam_streaming_microphone"; + } + + bool apollo_vmic_t::log_missing_driver_once() { + if (missing_driver_logged) { + return false; + } + + missing_driver_logged = true; + BOOST_LOG(warning) + << "Steam Streaming Microphone is unavailable. Install the local Steam audio drivers and ensure the " + << "\"Speakers (Steam Streaming Microphone)\" playback endpoint is present. Host applications should capture from " + << "\"Microphone (Steam Streaming Microphone)\"."; + return true; + } + + int apollo_vmic_t::init() { + if (!speaker_backend) { + speaker_backend = std::make_unique( + "steam_streaming_microphone", + std::vector { + L"Steam Streaming Microphone", + L"Speakers (Steam Streaming Microphone)", + } + ); + } + + if (speaker_backend->init() == 0) { + return 0; + } + + speaker_backend.reset(); + log_missing_driver_once(); + return -1; + } + + int apollo_vmic_t::write_data(const char *data, std::size_t len, std::uint16_t sequence_number, std::uint32_t timestamp) { + if (!speaker_backend) { + BOOST_LOG(warning) << "Client microphone packet rejected before decode because the Steam Streaming Microphone backend is missing" + << " [seq=" << sequence_number << ", ts=" << timestamp << ", len=" << len << ']'; + log_missing_driver_once(); + return -1; + } + + return speaker_backend->write_data(data, len, sequence_number, timestamp); + } +} // namespace platf::audio diff --git a/src/platform/windows/apollo_vmic.h b/src/platform/windows/apollo_vmic.h new file mode 100644 index 000000000..62cd91394 --- /dev/null +++ b/src/platform/windows/apollo_vmic.h @@ -0,0 +1,38 @@ +/** + * @file src/platform/windows/apollo_vmic.h + * @brief Steam Streaming Microphone backend definitions. + */ +#pragma once + +#include +#include +#include +#include + +namespace platf::audio { + class mic_write_wasapi_t; + + class mic_redirect_backend_t { + public: + virtual ~mic_redirect_backend_t() = default; + + virtual std::string_view backend_id() const = 0; + virtual int init() = 0; + virtual int write_data(const char *data, std::size_t len, std::uint16_t sequence_number, std::uint32_t timestamp) = 0; + }; + + class apollo_vmic_t final: public mic_redirect_backend_t { + public: + ~apollo_vmic_t() override; + + std::string_view backend_id() const override; + int init() override; + int write_data(const char *data, std::size_t len, std::uint16_t sequence_number, std::uint32_t timestamp) override; + + private: + bool log_missing_driver_once(); + + bool missing_driver_logged = false; + std::unique_ptr speaker_backend; + }; +} // namespace platf::audio diff --git a/src/platform/windows/audio.cpp b/src/platform/windows/audio.cpp index 69a0b2ff3..57586e7ef 100644 --- a/src/platform/windows/audio.cpp +++ b/src/platform/windows/audio.cpp @@ -6,6 +6,7 @@ // standard includes #include +#include // platform includes #include @@ -16,7 +17,10 @@ #include // local includes +#include "apollo_vmic.h" +#include "mic_write.h" #include "misc.h" +#include "src/audio.h" #include "src/config.h" #include "src/logging.h" #include "src/platform/common.h" @@ -39,7 +43,8 @@ DEFINE_PROPERTYKEY(PKEY_DeviceInterface_FriendlyName, 0x026e516e, 0xb814, 0x414b namespace { constexpr auto SAMPLE_RATE = 48000; - constexpr auto STEAM_AUDIO_DRIVER_PATH = L"%CommonProgramFiles(x86)%\\Steam\\drivers\\Windows10\\" STEAM_DRIVER_SUBDIR L"\\SteamStreamingSpeakers.inf"; + constexpr auto STEAM_SPEAKERS_DRIVER_PATH = L"%CommonProgramFiles(x86)%\\Steam\\drivers\\Windows10\\" STEAM_DRIVER_SUBDIR L"\\SteamStreamingSpeakers.inf"; + constexpr auto STEAM_MICROPHONE_DRIVER_PATH = L"%CommonProgramFiles(x86)%\\Steam\\drivers\\Windows10\\" STEAM_DRIVER_SUBDIR L"\\SteamStreamingMicrophone.inf"; constexpr auto waveformat_mask_stereo = SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT; @@ -198,6 +203,16 @@ namespace { return result; } + std::optional normalize_mic_backend_name(const std::string &backend_name) { + if (backend_name.empty() || backend_name == "steam_streaming_microphone") { + return "steam_streaming_microphone"; + } + + BOOST_LOG(error) << "Windows microphone backend ["sv << backend_name + << "] is not supported in Apollo Mic. Use [steam_streaming_microphone]."; + return std::nullopt; + } + } // namespace using namespace std::literals; @@ -782,6 +797,65 @@ namespace platf::audio { return mic; } + int init_mic_redirect_device() override { + if (mic_redirect_device) { + return 0; + } + + auto normalized_backend = normalize_mic_backend_name(config::audio.mic_backend); + if (!normalized_backend) { + ::audio::mic_debug_on_backend_error("Unsupported Windows microphone backend [" + config::audio.mic_backend + "]. Use steam_streaming_microphone."); + active_mic_backend.clear(); + return -1; + } + + config::audio.mic_backend = *normalized_backend; + + auto try_create_device = [this]() { + auto device = std::make_unique(); + if (device->init() != 0) { + return false; + } + + active_mic_backend = std::string {device->backend_id()}; + BOOST_LOG(info) << "Client microphone redirection backend: " << active_mic_backend; + mic_redirect_device = std::move(device); + return true; + }; + + if (try_create_device()) { + return 0; + } + + if (config::audio.install_steam_drivers) { + BOOST_LOG(info) << "Attempting to install missing Steam audio drivers for microphone redirection"sv; + install_steam_audio_drivers(); + if (try_create_device()) { + return 0; + } + } + + BOOST_LOG(warning) << "Client microphone redirection is unavailable because Steam Streaming Microphone is not installed or not accessible. " + << "Install the local Steam audio drivers and use \"Microphone (Steam Streaming Microphone)\" as the host microphone in your applications."; + active_mic_backend.clear(); + return -1; + } + + void release_mic_redirect_device() override { + mic_redirect_device.reset(); + active_mic_backend.clear(); + } + + int write_mic_data(const char *data, std::size_t len, std::uint16_t sequence_number, std::uint32_t timestamp) override { + if (!mic_redirect_device) { + BOOST_LOG(warning) << "Client microphone packet rejected before decode because no Windows microphone redirect device is active" + << " [seq=" << sequence_number << ", ts=" << timestamp << ", len=" << len << ']'; + return -1; + } + + return mic_redirect_device->write_data(data, len, sequence_number, timestamp); + } + /** * If the requested sink is a virtual sink, meaning no speakers attached to * the host, then we can seamlessly set the format to stereo and surround sound. @@ -896,6 +970,14 @@ namespace platf::audio { }; } + audio_control_t::match_fields_list_t match_steam_microphone() { + return { + {match_field_e::device_friendly_name, L"Speakers (Steam Streaming Microphone)"}, + {match_field_e::adapter_friendly_name, L"Steam Streaming Microphone"}, + {match_field_e::device_description, L"Steam Streaming Microphone"}, + }; + } + audio_control_t::match_fields_list_t match_all_fields(const std::wstring &name) { return { {match_field_e::device_id, name}, // {0.0.0.00000000}.{29dd7668-45b2-4846-882d-950f55bf7eb8} @@ -1041,11 +1123,7 @@ namespace platf::audio { BOOST_LOG(info) << "Successfully reset default audio device"sv; } - /** - * @brief Installs the Steam Streaming Speakers driver, if present. - * @return `true` if installation was successful. - */ - bool install_steam_audio_drivers() { + bool install_driver_from_local_steam_inf(const wchar_t *driver_path_template, std::wstring_view driver_name, bool restore_default_output_device) { #ifdef STEAM_DRIVER_SUBDIR // MinGW's libnewdev.a is missing DiInstallDriverW() even though the headers have it, // so we have to load it at runtime. It's Vista or later, so it will always be available. @@ -1064,22 +1142,23 @@ namespace platf::audio { return false; } - // Get the current default audio device (if present) - auto old_default_dev = default_device(device_enum); + audio::device_t old_default_dev; + if (restore_default_output_device) { + old_default_dev = default_device(device_enum); + } - // Install the Steam Streaming Speakers driver WCHAR driver_path[MAX_PATH] = {}; - ExpandEnvironmentStringsW(STEAM_AUDIO_DRIVER_PATH, driver_path, ARRAYSIZE(driver_path)); + ExpandEnvironmentStringsW(driver_path_template, driver_path, ARRAYSIZE(driver_path)); if (fn_DiInstallDriverW(nullptr, driver_path, 0, nullptr)) { - BOOST_LOG(info) << "Successfully installed Steam Streaming Speakers"sv; + BOOST_LOG(info) << "Successfully installed "sv << driver_name; // Wait for 5 seconds to allow the audio subsystem to reconfigure things before // modifying the default audio device or enumerating devices again. Sleep(5000); - // If there was a previous default device, restore that original device as the - // default output device just in case installing the new one changed it. - if (old_default_dev) { + if (restore_default_output_device && old_default_dev) { + // If there was a previous default device, restore that original device as the + // default output device just in case installing the new one changed it. audio::wstring_t old_default_id; old_default_dev->GetId(&old_default_id); @@ -1093,25 +1172,39 @@ namespace platf::audio { auto err = GetLastError(); switch (err) { case ERROR_ACCESS_DENIED: - BOOST_LOG(warning) << "Administrator privileges are required to install Steam Streaming Speakers"sv; + BOOST_LOG(warning) << "Administrator privileges are required to install "sv << driver_name; break; case ERROR_FILE_NOT_FOUND: case ERROR_PATH_NOT_FOUND: - BOOST_LOG(info) << "Steam audio drivers not found. This is expected if you don't have Steam installed."sv; + BOOST_LOG(info) << "Steam audio drivers not found locally. Install Steam on the host to use "sv << driver_name << '.'; break; default: - BOOST_LOG(warning) << "Failed to install Steam audio drivers: "sv << err; + BOOST_LOG(warning) << "Failed to install "sv << driver_name << ": "sv << err; break; } return false; } #else - BOOST_LOG(warning) << "Unable to install Steam Streaming Speakers on unknown architecture"sv; + BOOST_LOG(warning) << "Unable to install "sv << driver_name << " on unknown architecture"sv; return false; #endif } + bool install_steam_audio_drivers() { + bool ok = true; + + if (!find_device_id(match_steam_speakers())) { + ok = install_driver_from_local_steam_inf(STEAM_SPEAKERS_DRIVER_PATH, L"Steam Streaming Speakers", true) && ok; + } + + if (!find_device_id(match_steam_microphone())) { + ok = install_driver_from_local_steam_inf(STEAM_MICROPHONE_DRIVER_PATH, L"Steam Streaming Microphone", false) && ok; + } + + return ok; + } + int init() { auto status = CoCreateInstance( CLSID_CPolicyConfigClient, @@ -1149,6 +1242,8 @@ namespace platf::audio { policy_t policy; audio::device_enum_t device_enum; std::string assigned_sink; + std::string active_mic_backend; + std::unique_ptr mic_redirect_device; }; } // namespace platf::audio @@ -1166,9 +1261,11 @@ namespace platf { return nullptr; } - // Install Steam Streaming Speakers if needed. We do this during audio_control() to ensure - // the sink information returned includes the new Steam Streaming Speakers device. - if (config::audio.install_steam_drivers && !control->find_device_id(control->match_steam_speakers())) { + // Install Steam Streaming audio drivers if needed. We do this during audio_control() to ensure + // the sink information returned includes the new Steam endpoints before any later enumeration. + if (config::audio.install_steam_drivers && + (!control->find_device_id(control->match_steam_speakers()) || + !control->find_device_id(control->match_steam_microphone()))) { // This is best effort. Don't fail if it doesn't work. control->install_steam_audio_drivers(); } diff --git a/src/platform/windows/mic_write.cpp b/src/platform/windows/mic_write.cpp new file mode 100644 index 000000000..33ec6304a --- /dev/null +++ b/src/platform/windows/mic_write.cpp @@ -0,0 +1,967 @@ +/** + * @file src/platform/windows/mic_write.cpp + * @brief Windows microphone redirection writer. + */ +#include "mic_write.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "PolicyConfig.h" +#include "misc.h" +#include "src/audio.h" +#include "src/config.h" +#include "src/logging.h" + +namespace platf::audio { + namespace { + constexpr PROPERTYKEY PKEY_Device_DeviceDesc { + {0xa45c254e, 0xdf1c, 0x4efd, {0x80, 0x20, 0x67, 0xd1, 0x46, 0xa8, 0x50, 0xe0}}, + 2 + }; + constexpr PROPERTYKEY PKEY_Device_FriendlyName { + {0xa45c254e, 0xdf1c, 0x4efd, {0x80, 0x20, 0x67, 0xd1, 0x46, 0xa8, 0x50, 0xe0}}, + 14 + }; + constexpr PROPERTYKEY PKEY_DeviceInterface_FriendlyName { + {0x026e516e, 0xb814, 0x414b, {0x83, 0xcd, 0x85, 0x6d, 0x6f, 0xef, 0x48, 0x22}}, + 2 + }; + + constexpr std::uint32_t decoded_sample_rate = 48000; + constexpr REFERENCE_TIME buffer_duration_100ns = 1000000; + constexpr std::uint32_t default_packet_duration_samples = 960; + constexpr std::uint32_t max_packet_duration_samples = 5760; + constexpr std::size_t max_queued_frames = decoded_sample_rate; + constexpr std::size_t max_queued_packets = 64; + constexpr std::size_t target_prebuffer_packets = 4; + constexpr std::size_t target_prebuffer_frames = default_packet_duration_samples * target_prebuffer_packets; + + template + void co_task_free(T *ptr) { + if (ptr) { + CoTaskMemFree(ptr); + } + } + + using device_t = util::safe_ptr>; + using collection_t = util::safe_ptr>; + using prop_t = util::safe_ptr>; + using policy_t = util::safe_ptr>; + using wstring_t = util::safe_ptr>; + using waveformat_t = util::safe_ptr>; + + class prop_var_t { + public: + prop_var_t() { + PropVariantInit(&value); + } + + ~prop_var_t() { + PropVariantClear(&value); + } + + PROPVARIANT value; + }; + + struct parsed_waveformat_t { + WORD channels {}; + DWORD sample_rate {}; + WORD bits_per_sample {}; + WORD valid_bits_per_sample {}; + WORD block_align {}; + DWORD channel_mask {}; + bool is_float {}; + }; + + struct endpoint_format_info_t { + std::string mix_format {"unavailable"}; + std::string device_format {"unavailable"}; + bool recommended_active {}; + }; + + std::wstring get_prop_string(IPropertyStore *prop, REFPROPERTYKEY key) { + prop_var_t value; + if (FAILED(prop->GetValue(key, &value.value)) || value.value.vt != VT_LPWSTR || value.value.pwszVal == nullptr) { + return {}; + } + + return value.value.pwszVal; + } + + bool contains_case_insensitive(std::wstring haystack, std::wstring needle) { + std::transform(haystack.begin(), haystack.end(), haystack.begin(), ::towlower); + std::transform(needle.begin(), needle.end(), needle.begin(), ::towlower); + return haystack.find(needle) != std::wstring::npos; + } + + std::wstring endpoint_label(EDataFlow flow) { + return flow == eCapture ? L"capture" : L"render"; + } + + parsed_waveformat_t parse_waveformat(const WAVEFORMATEX *format) { + parsed_waveformat_t parsed {}; + if (format == nullptr) { + return parsed; + } + + parsed.channels = format->nChannels; + parsed.sample_rate = format->nSamplesPerSec; + parsed.bits_per_sample = format->wBitsPerSample; + parsed.valid_bits_per_sample = format->wBitsPerSample; + parsed.block_align = format->nBlockAlign; + + if (format->wFormatTag == WAVE_FORMAT_EXTENSIBLE && format->cbSize >= 22) { + const auto *extensible = reinterpret_cast(format); + parsed.valid_bits_per_sample = extensible->Samples.wValidBitsPerSample ? extensible->Samples.wValidBitsPerSample : format->wBitsPerSample; + parsed.channel_mask = extensible->dwChannelMask; + + parsed.is_float = extensible->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT && + format->wBitsPerSample == 32 && + parsed.valid_bits_per_sample == 32; + } else if (format->wFormatTag == WAVE_FORMAT_IEEE_FLOAT && + format->wBitsPerSample == 32) { + parsed.is_float = true; + } + + return parsed; + } + + std::string waveformat_to_pretty_string(const WAVEFORMATEX *format) { + const auto parsed = parse_waveformat(format); + if (format == nullptr) { + return "unavailable"; + } + + std::string result = parsed.is_float ? "float32" : "pcm"; + result += ", "; + result += std::to_string(parsed.valid_bits_per_sample ? parsed.valid_bits_per_sample : parsed.bits_per_sample); + result += "-bit"; + result += ", "; + result += std::to_string(parsed.sample_rate); + result += " Hz, "; + result += std::to_string(parsed.channels); + result += "ch"; + if (parsed.channel_mask != 0) { + result += ", mask=0x"; + result += util::hex(parsed.channel_mask).to_string(); + } + return result; + } + + bool is_recoverable_device_error(HRESULT status) { + return status == AUDCLNT_E_DEVICE_INVALIDATED || + status == AUDCLNT_E_RESOURCES_INVALIDATED || + status == AUDCLNT_E_SERVICE_NOT_RUNNING; + } + + std::uint16_t sequence_distance(std::uint16_t newer, std::uint16_t older) { + return static_cast(newer - older); + } + + std::uint32_t timestamp_distance(std::uint32_t newer, std::uint32_t older) { + return static_cast(newer - older); + } + + bool recover_device(mic_write_wasapi_t &writer, HRESULT status, const char *operation) { + if (!is_recoverable_device_error(status)) { + return false; + } + + BOOST_LOG(warning) << "Microphone playback device needs reinitialization after failure while " << operation + << ": 0x" << util::hex(status).to_string_view(); + + writer.cleanup(); + return writer.init() == 0; + } + + std::vector make_recommended_steam_mic_device_waveformat() { + WAVEFORMATEXTENSIBLE pcm_format {}; + pcm_format.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE; + pcm_format.Format.nChannels = 2; + pcm_format.Format.nSamplesPerSec = decoded_sample_rate; + pcm_format.Format.wBitsPerSample = 32; + pcm_format.Samples.wValidBitsPerSample = 32; + pcm_format.Format.nBlockAlign = static_cast(pcm_format.Format.nChannels * (pcm_format.Format.wBitsPerSample / 8)); + pcm_format.Format.nAvgBytesPerSec = pcm_format.Format.nSamplesPerSec * pcm_format.Format.nBlockAlign; + pcm_format.Format.cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX); + pcm_format.SubFormat = KSDATAFORMAT_SUBTYPE_PCM; + pcm_format.dwChannelMask = SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT; + + std::vector storage(sizeof(pcm_format)); + std::memcpy(storage.data(), &pcm_format, sizeof(pcm_format)); + return storage; + } + + std::vector make_required_steam_mic_render_waveformat() { + WAVEFORMATEXTENSIBLE float_format {}; + float_format.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE; + float_format.Format.nChannels = 2; + float_format.Format.nSamplesPerSec = decoded_sample_rate; + float_format.Format.wBitsPerSample = 32; + float_format.Samples.wValidBitsPerSample = 32; + float_format.Format.nBlockAlign = static_cast(float_format.Format.nChannels * sizeof(float)); + float_format.Format.nAvgBytesPerSec = float_format.Format.nSamplesPerSec * float_format.Format.nBlockAlign; + float_format.Format.cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX); + float_format.SubFormat = KSDATAFORMAT_SUBTYPE_IEEE_FLOAT; + float_format.dwChannelMask = SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT; + + std::vector storage(sizeof(float_format)); + std::memcpy(storage.data(), &float_format, sizeof(float_format)); + return storage; + } + + bool is_recommended_steam_mic_device_format(const WAVEFORMATEX *format) { + const auto parsed = parse_waveformat(format); + return parsed.channels == 2 && + parsed.sample_rate == decoded_sample_rate && + parsed.valid_bits_per_sample == 32; + } + + endpoint_format_info_t query_endpoint_format_info(IMMDeviceEnumerator *device_enum, const std::wstring &device_id) { + endpoint_format_info_t info; + + policy_t policy; + auto status = CoCreateInstance( + CLSID_CPolicyConfigClient, + nullptr, + CLSCTX_ALL, + IID_IPolicyConfig, + reinterpret_cast(&policy) + ); + if (FAILED(status) || !policy) { + return info; + } + + waveformat_t current_format; + status = policy->GetDeviceFormat(device_id.c_str(), false, ¤t_format); + if (SUCCEEDED(status) && current_format) { + info.device_format = waveformat_to_pretty_string(current_format.get()); + info.recommended_active = is_recommended_steam_mic_device_format(current_format.get()); + } + + device_t device; + status = device_enum ? device_enum->GetDevice(device_id.c_str(), &device) : E_FAIL; + if (FAILED(status) || !device) { + return info; + } + + util::safe_ptr> local_audio_client; + status = device->Activate(IID_IAudioClient, CLSCTX_ALL, nullptr, reinterpret_cast(&local_audio_client)); + if (FAILED(status) || !local_audio_client) { + return info; + } + + waveformat_t mix_format; + status = local_audio_client->GetMixFormat(&mix_format); + if (SUCCEEDED(status) && mix_format) { + info.mix_format = waveformat_to_pretty_string(mix_format.get()); + } + + return info; + } + + bool ensure_recommended_steam_mic_format(const std::wstring &device_id, const std::string &target_device_name, EDataFlow flow) { + policy_t policy; + auto status = CoCreateInstance( + CLSID_CPolicyConfigClient, + nullptr, + CLSCTX_ALL, + IID_IPolicyConfig, + reinterpret_cast(&policy) + ); + if (FAILED(status) || !policy) { + BOOST_LOG(warning) << "Couldn't create audio policy config for Steam microphone format setup: 0x" + << util::hex(status).to_string_view(); + return false; + } + + waveformat_t current_format; + status = policy->GetDeviceFormat(device_id.c_str(), false, ¤t_format); + if (FAILED(status) || !current_format) { + BOOST_LOG(warning) << "Couldn't query Steam microphone " << to_utf8(endpoint_label(flow)) << " device format for [" << target_device_name << "]: 0x" + << util::hex(status).to_string_view(); + return false; + } + + if (is_recommended_steam_mic_device_format(current_format.get())) { + return true; + } + + auto recommended_format_storage = make_recommended_steam_mic_device_waveformat(); + auto *recommended_format = reinterpret_cast(recommended_format_storage.data()); + WAVEFORMATEXTENSIBLE previous_format {}; + status = policy->SetDeviceFormat(device_id.c_str(), recommended_format, reinterpret_cast(&previous_format)); + if (FAILED(status)) { + BOOST_LOG(warning) << "Couldn't set Steam microphone " << to_utf8(endpoint_label(flow)) + << " device format to stereo 32-bit 48k for [" << target_device_name << "]: 0x" + << util::hex(status).to_string_view(); + return false; + } + + BOOST_LOG(info) << "Changed Steam microphone " << to_utf8(endpoint_label(flow)) << " device format for [" << target_device_name + << "] to [pcm, 32-bit, 48000 Hz, 2ch]"; + return true; + } + + HRESULT initialize_shared_audio_client(IAudioClient *audio_client, const WAVEFORMATEX *format, DWORD stream_flags) { + return audio_client->Initialize( + AUDCLNT_SHAREMODE_SHARED, + stream_flags, + buffer_duration_100ns, + 0, + format, + nullptr + ); + } + + } // namespace + + mic_write_wasapi_t::mic_write_wasapi_t(std::string backend_name, + std::vector autodetect_patterns, + std::string requested_device_name): + backend_name {std::move(backend_name)}, + requested_device_name {std::move(requested_device_name)}, + autodetect_patterns {std::move(autodetect_patterns)} { + } + + mic_write_wasapi_t::~mic_write_wasapi_t() { + cleanup(); + } + + std::string_view mic_write_wasapi_t::backend_id() const { + return backend_name; + } + + bool mic_write_wasapi_t::find_target_device(EDataFlow flow, std::wstring &device_id, std::string &device_name) { + collection_t collection; + HRESULT status = device_enum->EnumAudioEndpoints(flow, DEVICE_STATE_ACTIVE, &collection); + if (FAILED(status) || !collection) { + BOOST_LOG(error) << "Couldn't enumerate " << to_utf8(endpoint_label(flow)) + << " devices for microphone redirection: 0x" << util::hex(status).to_string_view(); + return false; + } + + std::wstring requested_name = requested_device_name.empty() ? std::wstring {} : from_utf8(requested_device_name); + auto patterns = autodetect_patterns; + if (patterns.empty()) { + patterns = flow == eCapture ? + std::vector {L"Microphone (Steam Streaming Microphone)", L"Steam Streaming Microphone"} : + std::vector {L"Steam Streaming Microphone", L"Speakers (Steam Streaming Microphone)"}; + } + + UINT count = 0; + collection->GetCount(&count); + for (UINT index = 0; index < count; ++index) { + device_t device; + if (FAILED(collection->Item(index, &device)) || !device) { + continue; + } + + wstring_t id; + if (FAILED(device->GetId(&id)) || !id) { + continue; + } + + prop_t prop; + if (FAILED(device->OpenPropertyStore(STGM_READ, &prop)) || !prop) { + continue; + } + + auto friendly_name = get_prop_string(prop.get(), PKEY_Device_FriendlyName); + auto interface_name = get_prop_string(prop.get(), PKEY_DeviceInterface_FriendlyName); + auto description = get_prop_string(prop.get(), PKEY_Device_DeviceDesc); + + if (requested_name.empty() && + (contains_case_insensitive(friendly_name, L"16ch") || + contains_case_insensitive(interface_name, L"16ch") || + contains_case_insensitive(description, L"16ch"))) { + continue; + } + + bool matched = false; + if (!requested_name.empty()) { + matched = friendly_name == requested_name || interface_name == requested_name || description == requested_name || id.get() == requested_name; + } else { + for (const auto &pattern : patterns) { + if (contains_case_insensitive(friendly_name, pattern) || + contains_case_insensitive(interface_name, pattern) || + contains_case_insensitive(description, pattern)) { + matched = true; + break; + } + } + } + + if (!matched) { + continue; + } + + device_id = id.get(); + device_name = to_utf8(!friendly_name.empty() ? friendly_name : interface_name); + return true; + } + + return false; + } + + bool mic_write_wasapi_t::initialize_device() { + std::wstring render_device_id; + if (!find_target_device(eRender, render_device_id, target_device_name)) { + if (requested_device_name.empty()) { + BOOST_LOG(warning) << "No supported Steam Streaming Microphone playback device found. Install the Steam audio drivers and ensure " + << "\"Speakers (Steam Streaming Microphone)\" is available."; + ::audio::mic_debug_on_backend_error("Steam Streaming Microphone was not found on the host. Install the local Steam audio drivers and ensure Speakers (Steam Streaming Microphone) is available."); + } else { + BOOST_LOG(warning) << "Requested microphone device not found: " << requested_device_name; + ::audio::mic_debug_on_backend_error("Requested microphone render device was not found: " + requested_device_name); + } + return false; + } + + std::wstring capture_device_id; + std::string capture_device_name; + if (!find_target_device(eCapture, capture_device_id, capture_device_name)) { + BOOST_LOG(warning) << "Couldn't find the paired Steam microphone capture endpoint. Host applications may read from a stale or mismatched format."; + ::audio::mic_debug_on_backend_error("Could not find the paired Microphone (Steam Streaming Microphone) capture endpoint"); + return false; + } + + const bool render_format_enforced = ensure_recommended_steam_mic_format(render_device_id, target_device_name, eRender); + const bool capture_format_enforced = ensure_recommended_steam_mic_format(capture_device_id, capture_device_name, eCapture); + const auto render_endpoint_info = query_endpoint_format_info(device_enum.get(), render_device_id); + const auto capture_endpoint_info = query_endpoint_format_info(device_enum.get(), capture_device_id); + const bool recommended_format_active = render_endpoint_info.recommended_active && capture_endpoint_info.recommended_active; + const bool recommended_format_enforced = render_format_enforced || capture_format_enforced; + + device_t device; + HRESULT status = device_enum->GetDevice(render_device_id.c_str(), &device); + if (FAILED(status) || !device) { + BOOST_LOG(error) << "Couldn't open microphone playback device [" << target_device_name << "]: 0x" << util::hex(status).to_string_view(); + ::audio::mic_debug_on_backend_error("Could not open host microphone render device [" + target_device_name + "]"); + return false; + } + + status = device->Activate(IID_IAudioClient, CLSCTX_ALL, nullptr, (void **) &audio_client); + if (FAILED(status) || !audio_client) { + BOOST_LOG(error) << "Couldn't activate microphone playback client [" << target_device_name << "]: 0x" << util::hex(status).to_string_view(); + ::audio::mic_debug_on_backend_error("Could not activate the host microphone playback client for [" + target_device_name + "]"); + return false; + } + + waveformat_t mix_format; + status = audio_client->GetMixFormat(&mix_format); + if (FAILED(status) || !mix_format) { + BOOST_LOG(error) << "Couldn't get microphone playback mix format for [" << target_device_name << "]: 0x" << util::hex(status).to_string_view(); + ::audio::mic_debug_on_backend_error("Could not query the Steam Streaming Microphone endpoint mix format"); + return false; + } + + const auto endpoint_mix_string = waveformat_to_pretty_string(mix_format.get()); + active_format_storage = make_required_steam_mic_render_waveformat(); + auto *required_render_format = reinterpret_cast(active_format_storage.data()); + + const auto init_status = initialize_shared_audio_client(audio_client.get(), required_render_format, AUDCLNT_STREAMFLAGS_EVENTCALLBACK); + if (FAILED(init_status)) { + BOOST_LOG(error) << "Couldn't initialize microphone playback client [" << target_device_name + << "] with required format [float32, 32-bit, 48000 Hz, 2ch]: 0x" + << util::hex(init_status).to_string_view(); + ::audio::mic_debug_on_backend_error("Steam Streaming Microphone must support 2ch, 32-bit float, 48000 Hz"); + return false; + } + + std::memset(&active_format, 0, sizeof(active_format)); + std::memcpy(&active_format, active_format_storage.data(), std::min(active_format_storage.size(), sizeof(active_format))); + + status = audio_client->GetBufferSize(&buffer_frame_count); + if (FAILED(status)) { + BOOST_LOG(error) << "Couldn't query microphone playback buffer size: 0x" << util::hex(status).to_string_view(); + ::audio::mic_debug_on_backend_error("Could not query the microphone playback buffer size"); + return false; + } + + status = audio_client->GetService(IID_IAudioRenderClient, (void **) &audio_render); + if (FAILED(status) || !audio_render) { + BOOST_LOG(error) << "Couldn't acquire microphone playback render client: 0x" << util::hex(status).to_string_view(); + ::audio::mic_debug_on_backend_error("Could not acquire the microphone playback render client"); + return false; + } + + render_event.reset(CreateEvent(nullptr, FALSE, FALSE, nullptr)); + if (!render_event) { + BOOST_LOG(error) << "Couldn't create microphone playback event handle: 0x" << util::hex(GetLastError()).to_string_view(); + ::audio::mic_debug_on_backend_error("Could not create the microphone playback event handle"); + return false; + } + + status = audio_client->SetEventHandle(render_event.get()); + if (FAILED(status)) { + BOOST_LOG(error) << "Couldn't set microphone playback event handle: 0x" << util::hex(status).to_string_view(); + ::audio::mic_debug_on_backend_error("Could not set the microphone playback event handle"); + return false; + } + + const auto render_format_string = waveformat_to_pretty_string(required_render_format); + const std::string channel_mapping = "Duplicate mono microphone input to stereo render channels"; + + BOOST_LOG(info) << "Client microphone redirection target: " << target_device_name + << " [mix=" << endpoint_mix_string + << ", render-device=" << render_endpoint_info.device_format + << ", capture-device=" << capture_endpoint_info.device_format + << ", render=" << render_format_string + << ", init=required float32 shared-mode render format" + << ", resampling=off" + << ']'; + + BOOST_LOG(info) << "Paired Steam microphone capture endpoint: " << capture_device_name + << " [mix=" << capture_endpoint_info.mix_format + << ", device=" << capture_endpoint_info.device_format + << ", recommended=" << (recommended_format_active ? "active" : "inactive") + << ", enforced=" << (recommended_format_enforced ? "yes" : "no") + << ']'; + + ::audio::mic_debug_on_backend_target(target_device_name, active_format.nChannels, active_format.nSamplesPerSec); + ::audio::mic_debug_on_backend_format(endpoint_mix_string, render_format_string, false, channel_mapping); + ::audio::mic_debug_on_backend_endpoint_formats( + render_endpoint_info.device_format, + capture_device_name, + capture_endpoint_info.mix_format, + capture_endpoint_info.device_format, + recommended_format_enforced, + recommended_format_active + ); + + status = audio_client->Start(); + if (FAILED(status)) { + BOOST_LOG(error) << "Couldn't start microphone playback client: 0x" << util::hex(status).to_string_view(); + ::audio::mic_debug_on_backend_error("Could not start the microphone playback client"); + return false; + } + + stop_render_thread = false; + render_thread = std::thread {[this]() { render_loop(); }}; + + return true; + } + + int mic_write_wasapi_t::init() { + int opus_error = OPUS_OK; + opus_decoder = opus_decoder_create(decoded_sample_rate, 1, &opus_error); + if (opus_error != OPUS_OK || opus_decoder == nullptr) { + BOOST_LOG(error) << "Couldn't create Opus decoder for microphone redirection: " << opus_strerror(opus_error); + ::audio::mic_debug_on_backend_error("Could not create the Opus decoder for microphone redirection"); + return -1; + } + + HRESULT status = CoCreateInstance(CLSID_MMDeviceEnumerator, nullptr, CLSCTX_ALL, IID_IMMDeviceEnumerator, (void **) &device_enum); + if (FAILED(status) || !device_enum) { + BOOST_LOG(error) << "Couldn't create device enumerator for microphone redirection: 0x" << util::hex(status).to_string_view(); + ::audio::mic_debug_on_backend_error("Could not create the Windows audio device enumerator for microphone redirection"); + return -1; + } + + if (!initialize_device()) { + return -1; + } + + ::audio::mic_debug_on_backend_initialized(std::string {backend_name}); + + return 0; + } + + int mic_write_wasapi_t::write_data(const char *data, std::size_t len, std::uint16_t sequence_number, std::uint32_t timestamp) { + if (!audio_client || audio_render == nullptr || opus_decoder == nullptr || data == nullptr || len == 0 || !render_event) { + BOOST_LOG(warning) << "Client microphone packet rejected before decode because the WASAPI write path is not ready" + << " [seq=" << sequence_number + << ", ts=" << timestamp + << ", len=" << len + << ", audio_client=" << static_cast(audio_client) + << ", audio_render=" << static_cast(audio_render != nullptr) + << ", opus_decoder=" << static_cast(opus_decoder != nullptr) + << ", render_event=" << static_cast(render_event) + << ", data=" << static_cast(data != nullptr) << ']'; + return -1; + } + + if (active_format.nChannels != 2 || active_format.nSamplesPerSec != decoded_sample_rate || active_format.wBitsPerSample != 32) { + ::audio::mic_debug_on_render_error(sequence_number, "Steam Streaming Microphone is not running at the required 2ch, 32-bit float, 48000 Hz format"); + return -1; + } + + bool stale_packet = false; + bool duplicate_packet = false; + bool trimmed_packet_queue = false; + { + std::lock_guard lock(queue_mutex); + + if (has_playout_cursor) { + const auto behind = sequence_distance(expected_sequence_number, sequence_number); + if (behind != 0 && behind < 0x8000) { + stale_packet = true; + } + } + + if (!stale_packet) { + auto [_, inserted] = pending_packets.emplace(sequence_number, queued_mic_packet_t { + std::vector {reinterpret_cast(data), reinterpret_cast(data) + len}, + sequence_number, + timestamp, + std::chrono::steady_clock::now() + }); + + duplicate_packet = !inserted; + if (inserted && pending_packets.size() > max_queued_packets) { + pending_packets.erase(pending_packets.begin()); + trimmed_packet_queue = true; + } + } + } + + if (stale_packet) { + ::audio::mic_debug_on_packet_dropped(sequence_number, "Dropped a stale microphone packet that arrived after its playout deadline"); + return 0; + } + + if (duplicate_packet) { + ::audio::mic_debug_on_packet_dropped(sequence_number, "Dropped a duplicate microphone packet"); + return 0; + } + + if (trimmed_packet_queue) { + BOOST_LOG(debug) << "Trimmed queued microphone packets for [" << target_device_name << "] to keep jitter-buffer latency bounded"; + ::audio::mic_debug_on_render_error(sequence_number, "Queued microphone packets grew too large, so older packets were dropped to keep latency bounded"); + } + + SetEvent(render_event.get()); + return static_cast(len); + } + + std::uint32_t mic_write_wasapi_t::infer_packet_duration_samples(std::uint32_t current_timestamp, std::uint32_t next_timestamp) const { + const auto delta = timestamp_distance(next_timestamp, current_timestamp); + if (delta >= 120 && delta <= max_packet_duration_samples) { + return delta; + } + return default_packet_duration_samples; + } + + bool mic_write_wasapi_t::should_conceal_missing_packet_locked() const { + if (pending_packets.empty()) { + return false; + } + + if (pending_packets.find(static_cast(expected_sequence_number + 1)) != pending_packets.end()) { + return true; + } + + const auto delta = sequence_distance(pending_packets.begin()->first, expected_sequence_number); + return delta != 0 && delta < 0x8000; + } + + void mic_write_wasapi_t::append_decoded_frames(const float *samples, int decoded_frames, std::uint16_t sequence_number) { + if (samples == nullptr || decoded_frames <= 0) { + return; + } + + float peak = 0.0f; + { + std::lock_guard lock(queue_mutex); + for (int frame = 0; frame < decoded_frames; ++frame) { + const float sample = std::clamp(samples[frame], -1.0f, 1.0f); + peak = std::max(peak, std::fabs(sample)); + pending_frames.push_back(sample); + } + + if (pending_frames.size() > max_queued_frames) { + const auto frames_to_trim = pending_frames.size() - max_queued_frames; + pending_frames.erase(pending_frames.begin(), pending_frames.begin() + static_cast(frames_to_trim)); + } + } + + const double normalized_level = std::clamp(static_cast(peak), 0.0, 1.0); + const bool silent = peak < 0.015625f; + ::audio::mic_debug_on_packet_decoded(sequence_number, normalized_level, silent); + ::audio::mic_debug_on_packet_rendered(sequence_number, normalized_level, silent); + } + + bool mic_write_wasapi_t::decode_next_packet() { + queued_mic_packet_t packet; + std::uint16_t packet_sequence = 0; + std::uint32_t frame_duration_samples = default_packet_duration_samples; + bool decode_fec = false; + bool decode_plc = false; + + { + std::lock_guard lock(queue_mutex); + + if (!has_playout_cursor) { + if (pending_packets.size() < target_prebuffer_packets) { + return false; + } + + expected_sequence_number = pending_packets.begin()->first; + expected_timestamp = pending_packets.begin()->second.timestamp; + has_playout_cursor = true; + } + + packet_sequence = expected_sequence_number; + + if (auto current = pending_packets.find(expected_sequence_number); current != pending_packets.end()) { + if (auto next = std::next(current); next != pending_packets.end()) { + frame_duration_samples = infer_packet_duration_samples(current->second.timestamp, next->second.timestamp); + } + + packet = std::move(current->second); + pending_packets.erase(current); + } else if (auto next = pending_packets.find(static_cast(expected_sequence_number + 1)); next != pending_packets.end()) { + frame_duration_samples = infer_packet_duration_samples(expected_timestamp, next->second.timestamp); + packet = next->second; + decode_fec = true; + } else if (should_conceal_missing_packet_locked()) { + decode_plc = true; + } else { + return false; + } + } + + std::vector decoded_pcm(max_packet_duration_samples); + int decoded_frames = 0; + if (decode_plc) { + decoded_frames = opus_decode_float(opus_decoder, nullptr, 0, decoded_pcm.data(), static_cast(frame_duration_samples), 0); + BOOST_LOG(debug) << "Applying Opus PLC for missing microphone packet on [" << target_device_name << "] sequence " << packet_sequence; + } else if (decode_fec) { + decoded_frames = opus_decode_float( + opus_decoder, + packet.payload.data(), + static_cast(packet.payload.size()), + decoded_pcm.data(), + static_cast(frame_duration_samples), + 1 + ); + BOOST_LOG(debug) << "Applying Opus FEC for missing microphone packet on [" << target_device_name << "] sequence " << packet_sequence; + } else { + decoded_frames = opus_decode_float( + opus_decoder, + packet.payload.data(), + static_cast(packet.payload.size()), + decoded_pcm.data(), + static_cast(decoded_pcm.size()), + 0 + ); + } + + if (decoded_frames <= 0) { + ::audio::mic_debug_on_decode_error(packet_sequence, "The host could not decode a microphone frame from the jitter buffer"); + std::vector silent_pcm(frame_duration_samples, 0.0f); + append_decoded_frames(silent_pcm.data(), static_cast(silent_pcm.size()), packet_sequence); + decoded_frames = static_cast(silent_pcm.size()); + } else { + append_decoded_frames(decoded_pcm.data(), decoded_frames, packet_sequence); + + if (!first_packet_written_logged) { + first_packet_written_logged = true; + BOOST_LOG(info) << "Client microphone audio is being rendered into [" << target_device_name << ']'; + } + } + + { + std::lock_guard lock(queue_mutex); + expected_sequence_number = static_cast(expected_sequence_number + 1); + expected_timestamp += static_cast(decoded_frames); + } + + return decoded_frames > 0; + } + + void mic_write_wasapi_t::render_loop() { + CoInitializeEx(nullptr, COINIT_MULTITHREADED | COINIT_SPEED_OVER_MEMORY); + platf::adjust_thread_priority(platf::thread_priority_e::high); + + while (!stop_render_thread) { + if (!audio_client || audio_render == nullptr || !render_event) { + break; + } + + const auto wait_result = WaitForSingleObject(render_event.get(), 20); + if (stop_render_thread) { + break; + } + if (wait_result != WAIT_OBJECT_0 && wait_result != WAIT_TIMEOUT) { + BOOST_LOG(debug) << "Microphone render wait failed for [" << target_device_name << "]: 0x" + << util::hex(GetLastError()).to_string_view(); + continue; + } + + UINT32 padding = 0; + auto status = audio_client->GetCurrentPadding(&padding); + if (FAILED(status)) { + BOOST_LOG(debug) << "Couldn't query microphone playback padding for [" << target_device_name << "]: 0x" + << util::hex(status).to_string_view(); + if (is_recoverable_device_error(status)) { + ::audio::mic_debug_on_backend_error("Steam microphone playback device was invalidated during rendering. Restart the stream."); + break; + } + continue; + } + + if (padding > buffer_frame_count) { + padding = 0; + } + + const auto frames_available = buffer_frame_count - padding; + if (frames_available == 0) { + continue; + } + + while (true) { + std::size_t queued_frames = 0; + std::size_t queued_packets = 0; + { + std::lock_guard lock(queue_mutex); + queued_frames = pending_frames.size(); + queued_packets = pending_packets.size(); + } + + if (queued_frames >= target_prebuffer_frames || queued_packets == 0) { + break; + } + + if (!decode_next_packet()) { + break; + } + } + + UINT32 queued_frames = 0; + std::size_t queued_packets = 0; + { + std::lock_guard lock(queue_mutex); + queued_frames = std::min(frames_available, static_cast(pending_frames.size())); + queued_packets = pending_packets.size(); + } + + const auto buffered_frames_total = padding + queued_frames; + + if (!playout_started) { + if (buffered_frames_total < target_prebuffer_frames) { + if (!playout_wait_logged) { + playout_wait_logged = true; + BOOST_LOG(debug) << "Waiting for microphone playout prebuffer on [" << target_device_name << "], queued " + << queued_frames << " frames, " << queued_packets << " buffered packets, padding " << padding << ", total buffered " + << buffered_frames_total << " of " << target_prebuffer_frames << " target frames"; + } + continue; + } + + playout_started = true; + playout_wait_logged = false; + BOOST_LOG(debug) << "Microphone playout prebuffer ready on [" << target_device_name << "] with " + << queued_frames << " queued frames, " << queued_packets << " buffered packets, padding " << padding + << ", total buffered " << buffered_frames_total << " frames"; + } + + if (queued_frames == 0) { + while (decode_next_packet()) { + std::lock_guard lock(queue_mutex); + queued_frames = std::min(frames_available, static_cast(pending_frames.size())); + if (queued_frames != 0) { + break; + } + } + } + + if (queued_frames == 0) { + continue; + } + + BYTE *buffer = nullptr; + status = audio_render->GetBuffer(queued_frames, &buffer); + if (FAILED(status) || buffer == nullptr) { + BOOST_LOG(debug) << "Couldn't acquire microphone playback buffer for [" << target_device_name << "]: 0x" + << util::hex(status).to_string_view(); + if (FAILED(status) && is_recoverable_device_error(status)) { + ::audio::mic_debug_on_backend_error("Steam microphone playback device was invalidated while acquiring a render buffer. Restart the stream."); + break; + } + continue; + } + + auto *dst = reinterpret_cast(buffer); + { + std::lock_guard lock(queue_mutex); + for (UINT32 frame = 0; frame < queued_frames; ++frame) { + const float sample = pending_frames.front(); + pending_frames.pop_front(); + dst[static_cast(frame) * 2] = sample; + dst[static_cast(frame) * 2 + 1] = sample; + } + } + + status = audio_render->ReleaseBuffer(queued_frames, 0); + if (FAILED(status)) { + BOOST_LOG(debug) << "Couldn't release microphone playback buffer for [" << target_device_name << "]: 0x" + << util::hex(status).to_string_view(); + if (is_recoverable_device_error(status)) { + ::audio::mic_debug_on_backend_error("Steam microphone playback device was invalidated while releasing a render buffer. Restart the stream."); + break; + } + } + } + + CoUninitialize(); + } + + void mic_write_wasapi_t::cleanup() { + stop_render_thread = true; + if (render_event) { + SetEvent(render_event.get()); + } + + if (render_thread.joinable()) { + render_thread.join(); + } + + if (audio_client) { + audio_client->Stop(); + } + + if (audio_render != nullptr) { + audio_render->Release(); + audio_render = nullptr; + } + + audio_client.reset(); + device_enum.reset(); + + if (opus_decoder != nullptr) { + opus_decoder_destroy(opus_decoder); + opus_decoder = nullptr; + } + + active_format_storage.clear(); + buffer_frame_count = 0; + active_format = {}; + target_device_name.clear(); + first_packet_written_logged = false; + render_event.reset(); + { + std::lock_guard lock(queue_mutex); + pending_packets.clear(); + pending_frames.clear(); + } + expected_sequence_number = 0; + expected_timestamp = 0; + has_playout_cursor = false; + playout_started = false; + playout_wait_logged = false; + } +} // namespace platf::audio diff --git a/src/platform/windows/mic_write.h b/src/platform/windows/mic_write.h new file mode 100644 index 000000000..f71ed4374 --- /dev/null +++ b/src/platform/windows/mic_write.h @@ -0,0 +1,86 @@ +/** + * @file src/platform/windows/mic_write.h + * @brief Windows microphone redirection writer. + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "apollo_vmic.h" +#include "src/platform/common.h" + +struct OpusDecoder; + +namespace platf::audio { + template + inline void release_com(T *ptr) { + if (ptr) { + ptr->Release(); + } + } + + class mic_write_wasapi_t: public mic_redirect_backend_t { + public: + struct queued_mic_packet_t { + std::vector payload; + std::uint16_t sequence_number {}; + std::uint32_t timestamp {}; + std::chrono::steady_clock::time_point arrival_time {}; + }; + + mic_write_wasapi_t(std::string backend_name = "steam_streaming_microphone", + std::vector autodetect_patterns = {}, + std::string requested_device_name = {}); + ~mic_write_wasapi_t(); + + std::string_view backend_id() const override; + int init() override; + int write_data(const char *data, std::size_t len, std::uint16_t sequence_number, std::uint32_t timestamp) override; + void cleanup(); + + private: + bool initialize_device(); + bool find_target_device(EDataFlow flow, std::wstring &device_id, std::string &device_name); + void render_loop(); + bool decode_next_packet(); + std::uint32_t infer_packet_duration_samples(std::uint32_t current_timestamp, std::uint32_t next_timestamp) const; + bool should_conceal_missing_packet_locked() const; + void append_decoded_frames(const float *samples, int decoded_frames, std::uint16_t sequence_number); + + util::safe_ptr> device_enum; + util::safe_ptr> audio_client; + IAudioRenderClient *audio_render = nullptr; + OpusDecoder *opus_decoder = nullptr; + std::vector active_format_storage; + WAVEFORMATEX active_format {}; + UINT32 buffer_frame_count = 0; + std::string backend_name; + std::string requested_device_name; + std::vector autodetect_patterns; + std::string target_device_name; + bool first_packet_written_logged = false; + util::safe_ptr_v2 render_event; + std::mutex queue_mutex; + std::map pending_packets; + std::deque pending_frames; + std::thread render_thread; + std::atomic stop_render_thread {false}; + std::uint16_t expected_sequence_number = 0; + std::uint32_t expected_timestamp = 0; + bool has_playout_cursor = false; + bool playout_started = false; + bool playout_wait_logged = false; + }; +} // namespace platf::audio diff --git a/src/rswrapper.h b/src/rswrapper.h index 6a3e38784..ec066af8f 100644 --- a/src/rswrapper.h +++ b/src/rswrapper.h @@ -15,6 +15,9 @@ typedef void (*reed_solomon_release_t)(reed_solomon *rs); typedef int (*reed_solomon_encode_t)(reed_solomon *rs, uint8_t **shards, int nr_shards, int bs); typedef int (*reed_solomon_decode_t)(reed_solomon *rs, uint8_t **shards, uint8_t *marks, int nr_shards, int bs); +// Preserve the nanors shard limit expected by the streaming code. +#define DATA_SHARDS_MAX 255 + extern reed_solomon_new_t reed_solomon_new_fn; extern reed_solomon_release_t reed_solomon_release_fn; extern reed_solomon_encode_t reed_solomon_encode_fn; diff --git a/src/rtsp.cpp b/src/rtsp.cpp index 9f08916a0..7241d7f23 100644 --- a/src/rtsp.cpp +++ b/src/rtsp.cpp @@ -794,6 +794,11 @@ namespace rtsp_stream { uint32_t encryption_flags_supported = SS_ENC_CONTROL_V2 | SS_ENC_AUDIO; uint32_t encryption_flags_requested = SS_ENC_CONTROL_V2; + if (config::audio.stream_mic) { + encryption_flags_supported |= SS_ENC_MICROPHONE; + encryption_flags_requested |= SS_ENC_MICROPHONE; + } + // Determine the encryption desired for this remote endpoint auto encryption_mode = net::encryption_mode_for_address(sock.remote_endpoint().address()); if (encryption_mode != config::ENCRYPTION_MODE_NEVER) { @@ -829,6 +834,12 @@ namespace rtsp_stream { ss << "a=fmtp:97 surround-params="sv << session.surround_params << std::endl; } + if (config::audio.stream_mic) { + ss << "m=audio " << net::map_port(stream::MIC_STREAM_PORT) << " RTP/AVP 96" << std::endl; + ss << "a=rtpmap:96 opus/48000/1"sv << std::endl; + ss << "a=fmtp:96 minptime=10;useinbandfec=1"sv << std::endl; + } + for (int x = 0; x < audio::MAX_STREAM_CONFIG; ++x) { auto &stream_config = audio::stream_configs[x]; std::uint8_t mapping[platf::speaker::MAX_SPEAKERS]; @@ -884,6 +895,9 @@ namespace rtsp_stream { port = net::map_port(stream::VIDEO_STREAM_PORT); } else if (type == "control"sv) { port = net::map_port(stream::CONTROL_PORT); + } else if (type == "mic"sv && config::audio.stream_mic) { + port = net::map_port(stream::MIC_STREAM_PORT); + session.enable_mic = true; } else { cmd_not_found(sock, session, std::move(req)); @@ -1156,6 +1170,14 @@ namespace rtsp_stream { return; } + if (session.enable_mic && + !(config.encryptionFlagsEnabled & SS_ENC_MICROPHONE)) { + BOOST_LOG(warning) << "Disabling microphone redirection for ["sv << session.device_name + << "] because the client did not negotiate microphone encryption"; + audio::mic_debug_on_session_stop("Microphone redirection requires encrypted transport. This client negotiated plaintext microphone packets, so mic passthrough was disabled for the session."); + session.enable_mic = false; + } + auto stream_session = stream::session::alloc(config, session); server->insert(stream_session); diff --git a/src/rtsp.h b/src/rtsp.h index c426bd936..80cf96c39 100644 --- a/src/rtsp.h +++ b/src/rtsp.h @@ -40,6 +40,7 @@ namespace rtsp_stream { bool input_only; bool host_audio; + bool enable_mic; int width; int height; int fps; diff --git a/src/stream.cpp b/src/stream.cpp index bceeb93d3..bb39ce1bb 100644 --- a/src/stream.cpp +++ b/src/stream.cpp @@ -88,7 +88,8 @@ namespace stream { enum class socket_e : int { video, ///< Video - audio ///< Audio + audio, ///< Audio + microphone ///< Microphone }; #pragma pack(push, 1) @@ -238,6 +239,14 @@ namespace stream { AUDIO_FEC_HEADER fecHeader; }; + struct mic_packet_header_t { + std::uint8_t flags; + std::uint8_t packetType; + boost::endian::little_uint16_at sequenceNumber; + boost::endian::little_uint32_at timestamp; + boost::endian::little_uint32_at ssrc; + }; + #pragma pack(pop) constexpr std::size_t round_to_pkcs7_padded(std::size_t size) { @@ -245,7 +254,6 @@ namespace stream { } constexpr std::size_t MAX_AUDIO_PACKET_SIZE = 1400; - using audio_aes_t = std::array; using av_session_id_t = std::variant; // IP address or SS-Ping-Payload from RTSP handshake @@ -335,6 +343,7 @@ namespace stream { message_queue_queue_t message_queue_queue; std::thread recv_thread; + std::thread mic_thread; std::thread video_thread; std::thread audio_thread; std::thread control_thread; @@ -343,6 +352,7 @@ namespace stream { udp::socket video_sock {io_context}; udp::socket audio_sock {io_context}; + udp::socket mic_sock {io_context}; control_server_t control_server; }; @@ -393,6 +403,8 @@ namespace stream { audio_fec_packet_t fec_packet; std::unique_ptr qos; + bool enable_mic; + bool first_mic_packet_logged; } audio; struct { @@ -1324,6 +1336,100 @@ namespace stream { } } + session_t *find_mic_session(broadcast_ctx_t &ctx, const udp::endpoint &peer) { + auto lg = ctx.control_server._sessions.lock(); + for (auto *stream_session : *ctx.control_server._sessions) { + if (!stream_session->audio.enable_mic) { + continue; + } + + if (stream_session->state.load(std::memory_order_relaxed) != stream::session::state_e::RUNNING) { + continue; + } + + if (stream_session->audio.peer.address() == peer.address()) { + return stream_session; + } + } + + return nullptr; + } + + void micRecvThread(broadcast_ctx_t &ctx) { + auto broadcast_shutdown_event = mail::man->event(mail::broadcast_shutdown); + std::array buf {}; + udp::endpoint peer; + + while (!broadcast_shutdown_event->peek()) { + boost::system::error_code ec; + auto bytes = ctx.mic_sock.receive_from(asio::buffer(buf), peer, 0, ec); + + if (broadcast_shutdown_event->peek()) { + break; + } + + if (ec) { + if (ec == boost::asio::error::operation_aborted || + ec == boost::asio::error::bad_descriptor || + ec == boost::asio::error::connection_refused || + ec == boost::asio::error::connection_reset) { + continue; + } + + BOOST_LOG(debug) << "Couldn't receive microphone packet: "sv << ec.message(); + continue; + } + + if (bytes <= sizeof(mic_packet_header_t)) { + continue; + } + + auto *header = reinterpret_cast(buf.data()); + if (header->packetType != MIC_PACKET_TYPE_OPUS || header->ssrc != MIC_PACKET_MAGIC) { + continue; + } + + auto *session = find_mic_session(ctx, peer); + if (session == nullptr) { + continue; + } + + const auto sequence_number = static_cast(header->sequenceNumber); + const auto timestamp = static_cast(header->timestamp); + const auto payload_len = bytes - sizeof(mic_packet_header_t); + const auto *payload = reinterpret_cast(buf.data() + sizeof(mic_packet_header_t)); + audio::mic_debug_on_packet_received(sequence_number, payload_len); + + if (!session->audio.first_mic_packet_logged) { + session->audio.first_mic_packet_logged = true; + BOOST_LOG(info) << "Received first client microphone packet for ["sv << session->device_name + << "] from ["sv << peer.address().to_string() << ':' << peer.port() + << "] with payload "sv << payload_len << " bytes"; + } + + std::vector decrypted_payload; + if (session->config.encryptionFlagsEnabled & SS_ENC_MICROPHONE) { + crypto::aes_t iv(16); + *(std::uint32_t *) iv.data() = util::endian::big(session->audio.avRiKeyId + sequence_number); + + if (session->audio.cipher.decrypt(std::string_view {reinterpret_cast(payload), payload_len}, decrypted_payload, &iv) != 0) { + BOOST_LOG(warning) << "Dropping encrypted microphone packet with invalid payload for ["sv << session->device_name + << "] sequence "sv << sequence_number; + audio::mic_debug_on_packet_decrypt_error(sequence_number, "Encrypted microphone packet could not be decrypted"); + continue; + } + + payload = decrypted_payload.data(); + } + + const auto decoded_payload_len = decrypted_payload.empty() ? payload_len : decrypted_payload.size(); + if (audio::write_mic_data(reinterpret_cast(payload), decoded_payload_len, sequence_number, timestamp) < 0) { + BOOST_LOG(debug) << "Dropping microphone packet for ["sv << session->device_name << ']'; + audio::mic_debug_on_packet_dropped(sequence_number, "Host microphone render path rejected the packet"); + } + } + } + void videoBroadcastThread(udp::socket &sock) { auto shutdown_event = mail::man->event(mail::broadcast_shutdown); auto packets = mail::man->queue(mail::video_packets); @@ -1758,6 +1864,7 @@ namespace stream { auto control_port = net::map_port(CONTROL_PORT); auto video_port = net::map_port(VIDEO_STREAM_PORT); auto audio_port = net::map_port(AUDIO_STREAM_PORT); + auto mic_port = net::map_port(MIC_STREAM_PORT); if (ctx.control_server.bind(address_family, control_port)) { BOOST_LOG(error) << "Couldn't bind Control server to port ["sv << control_port << "], likely another process already bound to the port"sv; @@ -1801,6 +1908,20 @@ namespace stream { return -1; } + if (config::audio.stream_mic) { + ctx.mic_sock.open(protocol, ec); + if (ec) { + BOOST_LOG(fatal) << "Couldn't open socket for Microphone server: "sv << ec.message(); + return -1; + } + + ctx.mic_sock.bind(udp::endpoint(protocol, mic_port), ec); + if (ec) { + BOOST_LOG(fatal) << "Couldn't bind Microphone server to port ["sv << mic_port << "]: "sv << ec.message(); + return -1; + } + } + ctx.message_queue_queue = std::make_shared(30); ctx.video_thread = std::thread {videoBroadcastThread, std::ref(ctx.video_sock)}; @@ -1808,6 +1929,9 @@ namespace stream { ctx.control_thread = std::thread {controlBroadcastThread, &ctx.control_server}; ctx.recv_thread = std::thread {recvThread, std::ref(ctx)}; + if (config::audio.stream_mic) { + ctx.mic_thread = std::thread {micRecvThread, std::ref(ctx)}; + } return 0; } @@ -1829,6 +1953,9 @@ namespace stream { ctx.video_sock.close(); ctx.audio_sock.close(); + if (ctx.mic_sock.is_open()) { + ctx.mic_sock.close(); + } video_packets.reset(); audio_packets.reset(); @@ -1841,6 +1968,10 @@ namespace stream { ctx.audio_thread.join(); BOOST_LOG(debug) << "Waiting for main control thread to end..."sv; ctx.control_thread.join(); + if (ctx.mic_thread.joinable()) { + BOOST_LOG(debug) << "Waiting for main microphone thread to end..."sv; + ctx.mic_thread.join(); + } BOOST_LOG(debug) << "All broadcasting threads ended"sv; broadcast_shutdown_event->reset(); @@ -1943,6 +2074,7 @@ namespace stream { namespace session { std::atomic_uint running_sessions; + std::atomic_uint running_mic_sessions; state_e state(session_t &session) { return session.state.load(std::memory_order_relaxed); @@ -2066,6 +2198,11 @@ namespace stream { exec_thread.detach(); } + if (session.audio.enable_mic && running_mic_sessions.fetch_sub(1, std::memory_order_acq_rel) == 1) { + audio::release_mic_redirect_device(); + audio::mic_debug_on_session_stop("Remote microphone session ended"); + } + // If this is the last session, invoke the platform callbacks if (--running_sessions == 0) { bool revert_display_config {config::video.dd.config_revert_on_disconnect}; @@ -2110,6 +2247,27 @@ namespace stream { session.audio.peer.address(addr); session.audio.peer.port(0); + if (session.audio.enable_mic) { + audio::mic_debug_on_session_start(session.device_name, (session.config.encryptionFlagsEnabled & SS_ENC_MICROPHONE) != 0); + if (running_mic_sessions.fetch_add(1, std::memory_order_acq_rel) == 0) { + if (audio::init_mic_redirect_device() != 0) { + running_mic_sessions.fetch_sub(1, std::memory_order_acq_rel); + session.audio.enable_mic = false; + audio::mic_debug_on_backend_error("Microphone backend could not initialize on the host"); + audio::mic_debug_on_session_stop("Microphone redirection requested, but the host backend could not initialize"); + BOOST_LOG(warning) << "Client microphone redirection is unavailable for ["sv << session.device_name << ']'; + } else { + BOOST_LOG(info) << "Client microphone redirection requested for ["sv << session.device_name + << "] with encryption "sv + << ((session.config.encryptionFlagsEnabled & SS_ENC_MICROPHONE) ? "enabled"sv : "disabled"sv); + } + } else { + BOOST_LOG(info) << "Client microphone redirection requested for ["sv << session.device_name + << "] with encryption "sv + << ((session.config.encryptionFlagsEnabled & SS_ENC_MICROPHONE) ? "enabled"sv : "disabled"sv); + } + } + session.pingTimeout = std::chrono::steady_clock::now() + config::stream.ping_timeout; session.audioThread = std::thread {audioThread, &session}; @@ -2214,6 +2372,8 @@ namespace stream { session->audio.avRiKeyId = util::endian::big(*(std::uint32_t *) launch_session.iv.data()); session->audio.sequenceNumber = 0; session->audio.timestamp = 0; + session->audio.enable_mic = launch_session.enable_mic && config::audio.stream_mic; + session->audio.first_mic_packet_logged = false; session->control.peer = nullptr; session->state.store(state_e::STOPPED, std::memory_order_relaxed); diff --git a/src/stream.h b/src/stream.h index 1aa08f42e..3c312250a 100644 --- a/src/stream.h +++ b/src/stream.h @@ -19,6 +19,7 @@ namespace stream { constexpr auto VIDEO_STREAM_PORT = 9; constexpr auto CONTROL_PORT = 10; constexpr auto AUDIO_STREAM_PORT = 11; + constexpr auto MIC_STREAM_PORT = 12; struct session_t; diff --git a/src_assets/common/assets/web/config.html b/src_assets/common/assets/web/config.html index 1c638b6fa..d602a2960 100644 --- a/src_assets/common/assets/web/config.html +++ b/src_assets/common/assets/web/config.html @@ -188,11 +188,13 @@

{{ $t('config.configuration') }}

options: { "audio_sink": "", "virtual_sink": "", + "mic_backend": "steam_streaming_microphone", + "mic_device": "", "stream_audio": "enabled", + "stream_mic": "disabled", "install_steam_audio_drivers": "enabled", "keep_sink_default": "enabled", "auto_capture_sink": "enabled", - "stream_audio": "enabled", "adapter_name": "", "output_name": "", "fallback_mode": "", diff --git a/src_assets/common/assets/web/configs/tabs/AudioVideo.vue b/src_assets/common/assets/web/configs/tabs/AudioVideo.vue index 41eeab11a..55bba8590 100644 --- a/src_assets/common/assets/web/configs/tabs/AudioVideo.vue +++ b/src_assets/common/assets/web/configs/tabs/AudioVideo.vue @@ -28,7 +28,6 @@ const sudovdaStatus = { const currentDriverStatus = computed(() => sudovdaStatus[props.vdisplay]) const config = ref(props.config) - const validateFallbackMode = (event) => { const value = event.target.value; if (!value.match(/^\d+x\d+x\d+(\.\d+)?$/)) { @@ -108,6 +107,47 @@ const validateFallbackMode = (event) => { default="true" > + + +
+ + +
{{ $t('config.mic_backend_desc_windows') }}
+
+ +
+ + +
+ {{ $tp('config.mic_device_desc') }}
+ + + + + +
+
+ {{ $t('troubleshooting.dd_reset') }} + +
+
+

Remote Microphone

+
+

Use Moonlight's microphone preview first, then follow the host-side validation stages below.

+
+ {{ micDebug && micDebug.state ? micDebug.state : 'No active remote microphone session' }} +
+
+
+
+
{{ stage.label }}
+ + {{ micStageStateLabel(stage.state) }} + +
+
{{ stage.detail }}
+
+
+ Loading microphone status... +
+
+
+
@@ -151,6 +181,8 @@

{{ $t('troubleshooting.logs') }}

logs: 'Loading...', logFilter: null, logInterval: null, + micDebug: null, + micDebugInterval: null, serverRestarting: false, serverQuitting: false, serverQuit: false, @@ -158,18 +190,132 @@

{{ $t('troubleshooting.logs') }}

}; }, computed: { + combinedLogs() { + const sections = []; + + if (this.platform === 'windows' && this.micDebug && this.micDebug.recentEvents && this.micDebug.recentEvents.length > 0) { + sections.push(`=== Recent Microphone Events ===\n${this.micDebug.recentEvents.join("\n")}`); + } + + sections.push(this.logs); + return sections.join("\n\n"); + }, actualLogs() { - if (!this.logFilter) return this.logs; - let lines = this.logs.split("\n"); + if (!this.logFilter) return this.combinedLogs; + let lines = this.combinedLogs.split("\n"); lines = lines.filter(x => x.indexOf(this.logFilter) !== -1); return lines.join("\n"); - } + }, + micStages() { + if (!this.micDebug) { + return []; + } + + const debug = this.micDebug; + const isFreshAge = (ageMs) => ageMs >= 0 && ageMs < 3000; + + const captureState = !debug.sessionActive ? 'idle' : (debug.firstPacketReceived ? 'success' : 'warning'); + const captureDetail = !debug.sessionActive + ? 'Start a remote session with microphone passthrough enabled.' + : (debug.firstPacketReceived + ? 'Moonlight is sending microphone audio to Apollo. Confirm the local source with the Moonlight preview.' + : 'Apollo negotiated microphone passthrough, but Moonlight has not sent microphone audio yet.'); + + let packetState = 'idle'; + let packetDetail = 'No active microphone session.'; + if (debug.sessionActive) { + if (!debug.firstPacketReceived) { + packetState = 'warning'; + packetDetail = 'Waiting for the first microphone packet from Moonlight.'; + } else if (isFreshAge(debug.lastPacketAgeMs)) { + packetState = 'success'; + packetDetail = `Packets are arriving from Moonlight (${debug.lastPacketAgeMs} ms ago).`; + } else { + packetState = 'warning'; + packetDetail = 'Packets arrived earlier, but Apollo has not seen a fresh microphone packet recently.'; + } + } + + let decodeState = 'idle'; + let decodeDetail = 'No decoded microphone audio on the host yet.'; + if (debug.sessionActive) { + if (debug.decodeErrors > 0 && !debug.decodeActive) { + decodeState = 'danger'; + decodeDetail = 'Apollo received microphone packets but could not decode them.'; + } else if (debug.decodeActive && isFreshAge(debug.lastDecodeAgeMs)) { + decodeState = 'success'; + decodeDetail = `Apollo decoded microphone audio successfully (${debug.lastDecodeAgeMs} ms ago).`; + } else if (debug.packetsReceived > 0) { + decodeState = 'warning'; + decodeDetail = 'Apollo is receiving packets, but decoded microphone audio has not been confirmed yet.'; + } + } + + let renderState = 'idle'; + let renderDetail = 'Steam Streaming Microphone rendering has not started.'; + if (debug.sessionActive) { + if (debug.renderErrors > 0 && !debug.renderActive) { + renderState = 'danger'; + renderDetail = 'Apollo decoded microphone audio, but writing it into Steam Streaming Microphone failed.'; + } else if (debug.renderActive && isFreshAge(debug.lastRenderAgeMs)) { + renderState = 'success'; + renderDetail = `Apollo is rendering microphone audio into ${debug.targetDeviceName || 'Steam Streaming Microphone'} (${debug.lastRenderAgeMs} ms ago).`; + } else if (debug.decodeActive) { + renderState = 'warning'; + renderDetail = 'Apollo decoded microphone audio, but the Steam Streaming Microphone render stage has not completed yet.'; + } + } + + let signalState = 'idle'; + let signalDetail = 'No decoded microphone signal is available yet.'; + if (debug.sessionActive) { + if (debug.signalDetected) { + signalState = 'success'; + signalDetail = 'Apollo is detecting non-silent microphone audio from Moonlight.'; + } else if (debug.decodeActive) { + signalState = 'warning'; + signalDetail = 'Decoded microphone audio is currently silent or below the signal threshold.'; + } else if (debug.firstPacketReceived) { + signalState = 'warning'; + signalDetail = 'Packets are arriving, but Apollo has not decoded usable microphone audio yet.'; + } + } + + return [ + { key: 'capture', label: 'Moonlight capture/send', state: captureState, detail: captureDetail }, + { key: 'packets', label: 'Packets arriving', state: packetState, detail: packetDetail }, + { key: 'decode', label: 'Decoded on host', state: decodeState, detail: decodeDetail }, + { key: 'render', label: 'Rendered into Steam Streaming Microphone', state: renderState, detail: renderDetail }, + { key: 'signal', label: 'Live signal detected', state: signalState, detail: signalDetail }, + ]; + }, + micStatusClass() { + if (!this.micDebug) { + return 'alert-secondary'; + } + + if (this.micDebug.renderActive) { + return 'alert-success'; + } + + if (this.micDebug.sessionActive) { + return 'alert-warning'; + } + + return 'alert-secondary'; + }, }, created() { fetch("/api/config") .then((r) => r.json()) .then((r) => { this.platform = r.platform; + if (this.platform === 'windows') { + this.refreshMicDebug(); + this.micDebugInterval = setInterval(() => { + this.refreshMicDebug(); + }, 1000); + } }); this.logInterval = setInterval(() => { @@ -177,10 +323,35 @@

{{ $t('troubleshooting.logs') }}

}, 5000); this.refreshLogs(); }, - beforeDestroy() { + beforeUnmount() { clearInterval(this.logInterval); + clearInterval(this.micDebugInterval); }, methods: { + micStageClass(state) { + return { + success: 'bg-success-subtle border-success-subtle text-success-emphasis', + warning: 'bg-warning-subtle border-warning-subtle text-warning-emphasis', + danger: 'bg-danger-subtle border-danger-subtle text-danger-emphasis', + idle: 'bg-secondary-subtle border-secondary-subtle text-secondary-emphasis', + }[state] || 'bg-secondary-subtle border-secondary-subtle text-secondary-emphasis'; + }, + micStageBadgeClass(state) { + return { + success: 'text-bg-success', + warning: 'text-bg-warning', + danger: 'text-bg-danger', + idle: 'text-bg-secondary', + }[state] || 'text-bg-secondary'; + }, + micStageStateLabel(state) { + return { + success: 'OK', + warning: 'Waiting', + danger: 'Error', + idle: 'Idle', + }[state] || 'Idle'; + }, refreshLogs() { fetch("./api/logs", { credentials: 'include' @@ -203,6 +374,28 @@

{{ $t('troubleshooting.logs') }}

}) .catch(error => console.error("Error fetching logs:", error)); }, + refreshMicDebug() { + if (this.platform !== 'windows') { + return; + } + + fetch('./api/audio-debug', { + credentials: 'include', + }) + .then((response) => { + if (!response.ok) { + return null; + } + + return response.json(); + }) + .then((data) => { + if (data) { + this.micDebug = data; + } + }) + .catch((error) => console.error('Error fetching microphone debug status:', error)); + }, closeApp() { this.closeAppPressed = true; fetch("./api/apps/close", { diff --git a/third-party/moonlight-common-c b/third-party/moonlight-common-c index c99943685..784fa1d0f 160000 --- a/third-party/moonlight-common-c +++ b/third-party/moonlight-common-c @@ -1 +1 @@ -Subproject commit c999436858471dfefa7617af3b7dc03ec1644ce4 +Subproject commit 784fa1d0f501155ab01fea7cefe8a0e9c9628b77