From 8bf35696e018ee9217678e3f8525cddda02c5ab6 Mon Sep 17 00:00:00 2001 From: Juan Pablo Bustamante Date: Tue, 23 Dec 2025 17:07:06 -0300 Subject: [PATCH 01/16] added cmaf support con relay --- justfile | 12 ++- rs/hang-cli/src/publish.rs | 6 +- rs/hang/src/catalog/audio/mod.rs | 7 ++ rs/hang/src/catalog/container.rs | 24 +++++ rs/hang/src/catalog/mod.rs | 2 + rs/hang/src/catalog/video/mod.rs | 7 ++ rs/hang/src/import/aac.rs | 1 + rs/hang/src/import/avc3.rs | 1 + rs/hang/src/import/fmp4.rs | 163 ++++++++++++++++++++++++++++--- rs/hang/src/import/hls.rs | 26 ++++- 10 files changed, 230 insertions(+), 19 deletions(-) create mode 100644 rs/hang/src/catalog/container.rs diff --git a/justfile b/justfile index 3907fc39b..2e908b1da 100644 --- a/justfile +++ b/justfile @@ -147,7 +147,7 @@ pub name url="http://localhost:4443/anon" *args: - | cargo run --bin hang -- publish --url "{{url}}" --name "{{name}}" fmp4 {{args}} # Generate and ingest an HLS stream from a video file. -pub-hls name relay="http://localhost:4443/anon": +pub-hls name passthrough="false" relay="http://localhost:4443/anon": #!/usr/bin/env bash set -euo pipefail @@ -200,7 +200,13 @@ pub-hls name relay="http://localhost:4443/anon": exit 1 fi - echo ">>> Starting HLS ingest from disk: $OUT_DIR/master.m3u8" + if [ "{{passthrough}}" = "true" ]; then + echo ">>> Starting HLS ingest from disk with passthrough mode: $OUT_DIR/master.m3u8" + PASSTHROUGH_FLAG="--passthrough" + else + echo ">>> Starting HLS ingest from disk: $OUT_DIR/master.m3u8" + PASSTHROUGH_FLAG="" + fi # Trap to clean up ffmpeg on exit cleanup() { @@ -211,7 +217,7 @@ pub-hls name relay="http://localhost:4443/anon": trap cleanup SIGINT SIGTERM # Run hang to ingest from local files - cargo run --bin hang -- publish --url "{{relay}}" --name "{{name}}" hls --playlist "$OUT_DIR/master.m3u8" + cargo run --bin hang -- publish --url "{{relay}}" --name "{{name}}" hls --playlist "$OUT_DIR/master.m3u8" $PASSTHROUGH_FLAG # Publish a video using H.264 Annex B format to the localhost relay server pub-h264 name url="http://localhost:4443/anon" *args: diff --git a/rs/hang-cli/src/publish.rs b/rs/hang-cli/src/publish.rs index eb12d8771..12154e64e 100644 --- a/rs/hang-cli/src/publish.rs +++ b/rs/hang-cli/src/publish.rs @@ -16,6 +16,9 @@ pub enum PublishFormat { /// URL or file path of an HLS playlist to ingest. #[arg(long)] playlist: String, + /// Enable passthrough mode to transport complete CMAF fragments (moof+mdat) without decomposing. + #[arg(long)] + passthrough: bool, }, } @@ -45,12 +48,13 @@ impl Publish { let stream = Decoder::new(broadcast.clone(), format); PublishDecoder::Decoder(Box::new(stream)) } - PublishFormat::Hls { playlist } => { + PublishFormat::Hls { playlist, passthrough } => { let hls = hang::import::Hls::new( broadcast.clone(), hang::import::HlsConfig { playlist: playlist.clone(), client: None, + passthrough: *passthrough, }, )?; PublishDecoder::Hls(Box::new(hls)) diff --git a/rs/hang/src/catalog/audio/mod.rs b/rs/hang/src/catalog/audio/mod.rs index 7322bc307..b74b5b6a1 100644 --- a/rs/hang/src/catalog/audio/mod.rs +++ b/rs/hang/src/catalog/audio/mod.rs @@ -11,6 +11,8 @@ use bytes::Bytes; use serde::{Deserialize, Serialize}; use serde_with::{hex::Hex, DisplayFromStr}; +use crate::catalog::container::Container; + /// Information about an audio track in the catalog. /// /// This struct contains a map of renditions (different quality/codec options) @@ -60,4 +62,9 @@ pub struct AudioConfig { #[serde(default)] #[serde_as(as = "Option")] pub description: Option, + + /// Container format for frame encoding. + /// Defaults to "legacy" for backward compatibility. + #[serde(default)] + pub container: Container, } diff --git a/rs/hang/src/catalog/container.rs b/rs/hang/src/catalog/container.rs new file mode 100644 index 000000000..edafa44e8 --- /dev/null +++ b/rs/hang/src/catalog/container.rs @@ -0,0 +1,24 @@ +use serde::{Deserialize, Serialize}; + +/// Container format for frame timestamp encoding and frame payload structure. +/// +/// - "legacy": Uses QUIC VarInt encoding (1-8 bytes, variable length), raw frame payloads +/// - "raw": Uses fixed u64 encoding (8 bytes, big-endian), raw frame payloads +/// - "fmp4": Fragmented MP4 container - frames contain complete moof+mdat fragments +#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub enum Container { + #[serde(rename = "legacy")] + Legacy, + #[serde(rename = "raw")] + Raw, + #[serde(rename = "fmp4")] + Fmp4, +} + +impl Default for Container { + fn default() -> Self { + Container::Legacy + } +} + diff --git a/rs/hang/src/catalog/mod.rs b/rs/hang/src/catalog/mod.rs index 69602dc26..4d8673bd5 100644 --- a/rs/hang/src/catalog/mod.rs +++ b/rs/hang/src/catalog/mod.rs @@ -7,6 +7,7 @@ mod audio; mod chat; +mod container; mod preview; mod root; mod track; @@ -15,6 +16,7 @@ mod video; pub use audio::*; pub use chat::*; +pub use container::*; pub use preview::*; pub use root::*; pub use track::*; diff --git a/rs/hang/src/catalog/video/mod.rs b/rs/hang/src/catalog/video/mod.rs index 4af6de7f0..1670fabb3 100644 --- a/rs/hang/src/catalog/video/mod.rs +++ b/rs/hang/src/catalog/video/mod.rs @@ -16,6 +16,8 @@ use bytes::Bytes; use serde::{Deserialize, Serialize}; use serde_with::{hex::Hex, DisplayFromStr}; +use crate::catalog::container::Container; + /// Information about a video track in the catalog. /// /// This struct contains a map of renditions (different quality/codec options) @@ -109,4 +111,9 @@ pub struct VideoConfig { /// Default: true #[serde(default)] pub optimize_for_latency: Option, + + /// Container format for frame encoding. + /// Defaults to "legacy" for backward compatibility. + #[serde(default)] + pub container: Container, } diff --git a/rs/hang/src/import/aac.rs b/rs/hang/src/import/aac.rs index 065282374..5997ee658 100644 --- a/rs/hang/src/import/aac.rs +++ b/rs/hang/src/import/aac.rs @@ -107,6 +107,7 @@ impl Aac { channel_count, bitrate: None, description: None, + container: hang::catalog::Container::Legacy, }; tracing::debug!(name = ?track.name, ?config, "starting track"); diff --git a/rs/hang/src/import/avc3.rs b/rs/hang/src/import/avc3.rs index 9027473ec..67872e486 100644 --- a/rs/hang/src/import/avc3.rs +++ b/rs/hang/src/import/avc3.rs @@ -64,6 +64,7 @@ impl Avc3 { display_ratio_width: None, display_ratio_height: None, optimize_for_latency: None, + container: hang::catalog::Container::Legacy, }; if let Some(old) = &self.config { diff --git a/rs/hang/src/import/fmp4.rs b/rs/hang/src/import/fmp4.rs index 96d5f9432..0f13849c6 100644 --- a/rs/hang/src/import/fmp4.rs +++ b/rs/hang/src/import/fmp4.rs @@ -1,4 +1,4 @@ -use crate::catalog::{AudioCodec, AudioConfig, CatalogProducer, VideoCodec, VideoConfig, AAC, AV1, H264, H265, VP9}; +use crate::catalog::{AudioCodec, AudioConfig, CatalogProducer, Container, VideoCodec, VideoConfig, AAC, AV1, H264, H265, VP9}; use crate::{self as hang, Timestamp}; use anyhow::Context; use bytes::{Buf, Bytes, BytesMut}; @@ -44,6 +44,13 @@ pub struct Fmp4 { // The latest moof header moof: Option, moof_size: usize, + + /// When true, transport CMAF fragments directly (passthrough mode) + /// When false, decompose fragments into individual samples (current behavior) + passthrough_mode: bool, + + /// When passthrough_mode is enabled, store raw bytes of moof + moof_bytes: Option, } impl Fmp4 { @@ -61,12 +68,31 @@ impl Fmp4 { moov: None, moof: None, moof_size: 0, + passthrough_mode: false, + moof_bytes: None, } } + /// Set passthrough mode for CMAF fragment transport. + /// + /// When enabled, complete fMP4 fragments (moof+mdat) are transported directly + /// instead of being decomposed into individual samples. + pub fn set_passthrough_mode(&mut self, enabled: bool) { + self.passthrough_mode = enabled; + } + pub fn decode>(&mut self, buf: &mut T) -> anyhow::Result<()> { + // If passthrough mode, we need to extract raw bytes before parsing. + let available_bytes = if self.passthrough_mode && buf.has_remaining() { + let chunk = buf.chunk(); + Some(Bytes::copy_from_slice(chunk)) + } else { + None + }; + let mut cursor = std::io::Cursor::new(buf); let mut position = 0; + let mut bytes_offset = 0; while let Some(atom) = mp4_atom::Any::decode_maybe(&mut cursor)? { // Process the parsed atom. @@ -89,17 +115,51 @@ impl Fmp4 { self.moof = Some(moof); self.moof_size = size; + + // If passthrough mode, extract and store raw bytes of moof + if let Some(ref bytes) = available_bytes { + if bytes_offset + size <= bytes.len() { + self.moof_bytes = Some(bytes.slice(bytes_offset..bytes_offset + size)); + } + } } Any::Mdat(mdat) => { - // Extract the samples from the mdat atom. - let header_size = size - mdat.data.len(); - self.extract(mdat, header_size)?; + if self.passthrough_mode { + // Transport complete fragment + let moof = self.moof.take().context("missing moof box")?; + let moof_bytes = self.moof_bytes.take().context("missing moof bytes")?; + + // Extract mdat bytes + let mdat_bytes = if let Some(ref bytes) = available_bytes { + if bytes_offset + size <= bytes.len() { + bytes.slice(bytes_offset..bytes_offset + size) + } else { + anyhow::bail!("invalid buffer position for mdat"); + } + } else { + anyhow::bail!("missing available bytes in passthrough mode"); + }; + + // Combine moof + mdat into complete fragment + let mut fragment_bytes = BytesMut::with_capacity(moof_bytes.len() + mdat_bytes.len()); + fragment_bytes.extend_from_slice(&moof_bytes); + fragment_bytes.extend_from_slice(&mdat_bytes); + let fragment = fragment_bytes.freeze(); + + self.transport_fragment(fragment, moof)?; + } else { + // Extract the samples from the mdat atom (existing behavior) + let header_size = size - mdat.data.len(); + self.extract(mdat, header_size)?; + } } _ => { // Skip unknown atoms tracing::warn!(?atom, "skipping") } } + + bytes_offset += size; } // Advance the buffer by the amount of data that was processed. @@ -113,6 +173,7 @@ impl Fmp4 { } fn init(&mut self, moov: Moov) -> anyhow::Result<()> { + let passthrough_mode = self.passthrough_mode; let mut catalog = self.catalog.lock(); for trak in &moov.trak { @@ -121,7 +182,7 @@ impl Fmp4 { let track = match handler.as_ref() { b"vide" => { - let config = Self::init_video(trak)?; + let config = Self::init_video_static(trak, passthrough_mode)?; let track = moq::Track { name: self.broadcast.track_name("video"), @@ -138,7 +199,7 @@ impl Fmp4 { track.producer } b"soun" => { - let config = Self::init_audio(trak)?; + let config = Self::init_audio_static(trak, passthrough_mode)?; let track = moq::Track { name: self.broadcast.track_name("audio"), @@ -166,7 +227,7 @@ impl Fmp4 { Ok(()) } - fn init_video(trak: &Trak) -> anyhow::Result { + fn init_video_static(trak: &Trak, passthrough_mode: bool) -> anyhow::Result { let stsd = &trak.mdia.minf.stbl.stsd; let codec = match stsd.codecs.len() { @@ -199,10 +260,11 @@ impl Fmp4 { display_ratio_width: None, display_ratio_height: None, optimize_for_latency: None, + container: if passthrough_mode { Container::Fmp4 } else { Container::Legacy }, } } - mp4_atom::Codec::Hev1(hev1) => Self::init_h265(true, &hev1.hvcc, &hev1.visual)?, - mp4_atom::Codec::Hvc1(hvc1) => Self::init_h265(false, &hvc1.hvcc, &hvc1.visual)?, + mp4_atom::Codec::Hev1(hev1) => Self::init_h265_static(true, &hev1.hvcc, &hev1.visual, passthrough_mode)?, + mp4_atom::Codec::Hvc1(hvc1) => Self::init_h265_static(false, &hvc1.hvcc, &hvc1.visual, passthrough_mode)?, mp4_atom::Codec::Vp08(vp08) => VideoConfig { codec: VideoCodec::VP8, description: Default::default(), @@ -214,6 +276,7 @@ impl Fmp4 { display_ratio_width: None, display_ratio_height: None, optimize_for_latency: None, + container: if passthrough_mode { Container::Fmp4 } else { Container::Legacy }, }, mp4_atom::Codec::Vp09(vp09) => { // https://github.com/gpac/mp4box.js/blob/325741b592d910297bf609bc7c400fc76101077b/src/box-codecs.js#L238 @@ -240,6 +303,7 @@ impl Fmp4 { optimize_for_latency: None, bitrate: None, framerate: None, + container: if passthrough_mode { Container::Fmp4 } else { Container::Legacy }, } } mp4_atom::Codec::Av01(av01) => { @@ -272,6 +336,7 @@ impl Fmp4 { optimize_for_latency: None, bitrate: None, framerate: None, + container: if passthrough_mode { Container::Fmp4 } else { Container::Legacy }, } } mp4_atom::Codec::Unknown(unknown) => anyhow::bail!("unknown codec: {:?}", unknown), @@ -282,7 +347,7 @@ impl Fmp4 { } // There's two almost identical hvcc atoms in the wild. - fn init_h265(in_band: bool, hvcc: &mp4_atom::Hvcc, visual: &mp4_atom::Visual) -> anyhow::Result { + fn init_h265_static(in_band: bool, hvcc: &mp4_atom::Hvcc, visual: &mp4_atom::Visual, passthrough_mode: bool) -> anyhow::Result { let mut description = BytesMut::new(); hvcc.encode_body(&mut description)?; @@ -306,10 +371,11 @@ impl Fmp4 { display_ratio_width: None, display_ratio_height: None, optimize_for_latency: None, + container: if passthrough_mode { Container::Fmp4 } else { Container::Legacy }, }) } - fn init_audio(trak: &Trak) -> anyhow::Result { + fn init_audio_static(trak: &Trak, passthrough_mode: bool) -> anyhow::Result { let stsd = &trak.mdia.minf.stbl.stsd; let codec = match stsd.codecs.len() { @@ -338,6 +404,7 @@ impl Fmp4 { channel_count: mp4a.audio.channel_count as _, bitrate: Some(bitrate.into()), description: None, // TODO? + container: if passthrough_mode { Container::Fmp4 } else { Container::Legacy }, } } mp4_atom::Codec::Opus(opus) => { @@ -347,6 +414,7 @@ impl Fmp4 { channel_count: opus.audio.channel_count as _, bitrate: None, description: None, // TODO? + container: if passthrough_mode { Container::Fmp4 } else { Container::Legacy }, } } mp4_atom::Codec::Unknown(unknown) => anyhow::bail!("unknown codec: {:?}", unknown), @@ -491,6 +559,79 @@ impl Fmp4 { Ok(()) } + + // Transport a complete CMAF fragment (moof+mdat) directly without decomposing. + fn transport_fragment(&mut self, fragment: Bytes, moof: Moof) -> anyhow::Result<()> { + let moov = self.moov.as_ref().context("missing moov box")?; + + // Loop over all of the traf boxes in the moof. + for traf in &moof.traf { + let track_id = traf.tfhd.track_id; + let track = self.tracks.get_mut(&track_id).context("unknown track")?; + + // Find the track information in the moov + let trak = moov + .trak + .iter() + .find(|trak| trak.tkhd.track_id == track_id) + .context("unknown track")?; + + let tfdt = traf.tfdt.as_ref().context("missing tfdt box")?; + let dts = tfdt.base_media_decode_time; + let timescale = trak.mdia.mdhd.timescale as u64; + + // Convert timestamp from track timescale to microseconds + let micros = (dts as u128 * 1_000_000 / timescale as u128) as u64; + let timestamp = hang::Timestamp::from_micros(micros)?; + + // Determine keyframe status (reuse logic from extract()) + let keyframe = if trak.mdia.hdlr.handler == b"vide".into() { + // For video, check sample flags in trun entries + let mut is_keyframe = false; + if let Some(trun) = traf.trun.first() { + if let Some(entry) = trun.entries.first() { + let tfhd = &traf.tfhd; + let flags = entry + .flags + .unwrap_or(tfhd.default_sample_flags.unwrap_or_default()); + // https://chromium.googlesource.com/chromium/src/media/+/master/formats/mp4/track_run_iterator.cc#177 + let keyframe_flag = (flags >> 24) & 0x3 == 0x2; // kSampleDependsOnNoOther + let non_sync = (flags >> 16) & 0x1 == 0x1; // kSampleIsNonSyncSample + is_keyframe = keyframe_flag && !non_sync; + + if is_keyframe { + // Force an audio keyframe on video keyframes + for audio in moov.trak.iter().filter(|t| t.mdia.hdlr.handler == b"soun".into()) { + self.last_keyframe.remove(&audio.tkhd.track_id); + } + } + } + } + is_keyframe + } else { + // For audio, force keyframe every 10 seconds or at video keyframes + match self.last_keyframe.get(&track_id) { + Some(prev) => timestamp - *prev > Timestamp::from_secs(10).unwrap(), + None => true, + } + }; + + if keyframe { + self.last_keyframe.insert(track_id, timestamp); + } + + // Create frame with entire fragment as payload + let frame = hang::Frame { + timestamp, + keyframe, + payload: fragment.clone().into(), + }; + + track.write(frame)?; + } + + Ok(()) + } } impl Drop for Fmp4 { diff --git a/rs/hang/src/import/hls.rs b/rs/hang/src/import/hls.rs index 992822be4..8b1ed480f 100644 --- a/rs/hang/src/import/hls.rs +++ b/rs/hang/src/import/hls.rs @@ -31,11 +31,20 @@ pub struct HlsConfig { /// An optional HTTP client to use for fetching the playlist and segments. /// If not provided, a default client will be created. pub client: Option, + + /// Enable passthrough mode for CMAF fragment transport. + /// When enabled, complete fMP4 fragments (moof+mdat) are transported directly + /// instead of being decomposed into individual samples. + pub passthrough: bool, } impl HlsConfig { pub fn new(playlist: String) -> Self { - Self { playlist, client: None } + Self { + playlist, + client: None, + passthrough: false, + } } /// Parse the playlist string into a URL. @@ -86,6 +95,8 @@ pub struct Hls { video: Vec, /// Optional audio track shared across variants. audio: Option, + /// Passthrough mode setting for fMP4 importers. + passthrough: bool, } #[derive(Debug, Clone, Copy)] @@ -120,9 +131,11 @@ impl Hls { .build() .unwrap() }); + let passthrough = cfg.passthrough; Ok(Self { broadcast, video_importers: Vec::new(), + passthrough, audio_importer: None, client, base_url, @@ -403,7 +416,8 @@ impl Hls { /// independent while still contributing to the same shared catalog. fn ensure_video_importer_for(&mut self, index: usize) -> &mut Fmp4 { while self.video_importers.len() <= index { - let importer = Fmp4::new(self.broadcast.clone()); + let mut importer = Fmp4::new(self.broadcast.clone()); + importer.set_passthrough_mode(self.passthrough); self.video_importers.push(importer); } @@ -412,8 +426,12 @@ impl Hls { /// Create or retrieve the fMP4 importer for the audio rendition. fn ensure_audio_importer(&mut self) -> &mut Fmp4 { - self.audio_importer - .get_or_insert_with(|| Fmp4::new(self.broadcast.clone())) + let importer = self.audio_importer.get_or_insert_with(|| { + let mut imp = Fmp4::new(self.broadcast.clone()); + imp.set_passthrough_mode(self.passthrough); + imp + }); + importer } #[cfg(test)] From 58c1ffe621fe05f61d41b326feaba4b36e4c9215 Mon Sep 17 00:00:00 2001 From: Juan Pablo Bustamante Date: Fri, 2 Jan 2026 12:40:40 -0300 Subject: [PATCH 02/16] MSE support --- js/hang/src/container/codec.ts | 18 +- js/hang/src/frame.ts | 87 +++- js/hang/src/util/mime.ts | 53 +++ js/hang/src/watch/audio/emitter.ts | 50 ++- js/hang/src/watch/audio/source-mse.ts | 447 ++++++++++++++++++ js/hang/src/watch/audio/source.ts | 97 +++- js/hang/src/watch/video/source-mse.ts | 625 ++++++++++++++++++++++++++ js/hang/src/watch/video/source.ts | 58 +++ justfile | 50 ++- rs/hang-cli/src/publish.rs | 1 + rs/hang/src/catalog/audio/mod.rs | 1 - rs/hang/src/catalog/root.rs | 16 + rs/hang/src/catalog/video/mod.rs | 1 - rs/hang/src/import/fmp4.rs | 170 ++++++- rs/hang/src/import/hls.rs | 140 +++++- rs/hang/src/model/track.rs | 28 +- 16 files changed, 1775 insertions(+), 67 deletions(-) create mode 100644 js/hang/src/util/mime.ts create mode 100644 js/hang/src/watch/audio/source-mse.ts create mode 100644 js/hang/src/watch/video/source-mse.ts diff --git a/js/hang/src/container/codec.ts b/js/hang/src/container/codec.ts index 57a1119cc..179e1fc38 100644 --- a/js/hang/src/container/codec.ts +++ b/js/hang/src/container/codec.ts @@ -15,8 +15,10 @@ export function encodeTimestamp(timestamp: Time.Micro, container: Catalog.Contai return encodeVarInt(timestamp); case "raw": return encodeU64(timestamp); - case "fmp4": - throw new Error("fmp4 container not yet implemented"); + case "fmp4": { + // For CMAF fragments, use raw encoding (8 bytes) for timestamp header + return encodeU64(timestamp); + } } } @@ -40,8 +42,14 @@ export function decodeTimestamp( const [value, remaining] = decodeU64(buffer); return [value as Time.Micro, remaining]; } - case "fmp4": - throw new Error("fmp4 container not yet implemented"); + case "fmp4": { + // For CMAF fragments, timestamp is in the moof atom, but we still need to decode + // the header to get to the fragment. The server uses VarInt encoding (same as legacy) + // for the timestamp header, so we use VarInt decoding here. + // The actual media timestamp will be extracted by MSE from the moof. + const [value, remaining] = decodeVarInt(buffer); + return [value as Time.Micro, remaining]; + } } } @@ -59,7 +67,7 @@ export function getTimestampSize(container: Catalog.Container = DEFAULT_CONTAINE case "raw": return 8; // u64 fixed size case "fmp4": - throw new Error("fmp4 container not yet implemented"); + return 8; // VarInt maximum size (same as legacy) } } diff --git a/js/hang/src/frame.ts b/js/hang/src/frame.ts index a8209fb37..17b024c0c 100644 --- a/js/hang/src/frame.ts +++ b/js/hang/src/frame.ts @@ -112,21 +112,57 @@ export class Consumer { async #run() { // Start fetching groups in the background + // For live streams (fmp4), start from the first group we receive (which should be the most recent available) + // The init segment will be detected from the first frame of the active group + for (;;) { + console.log(`[Frame.Consumer] Waiting for next group, current active=${this.#active ?? 'undefined'}, totalGroups=${this.#groups.length}`); const consumer = await this.#track.nextGroup(); - if (!consumer) break; - - // To improve TTV, we always start with the first group. - // For higher latencies we might need to figure something else out, as its racey. - if (this.#active === undefined) { - this.#active = consumer.sequence; + if (!consumer) { + console.log(`[Frame.Consumer] No more groups available (nextGroup returned null), breaking`); + break; } - if (consumer.sequence < this.#active) { - console.warn(`skipping old group: ${consumer.sequence} < ${this.#active}`); - // Skip old groups. - consumer.close(); - continue; + console.log(`[Frame.Consumer] Received group: sequence=${consumer.sequence}, active=${this.#active ?? 'undefined'}, container=${this.#container ?? 'undefined'}, totalGroups=${this.#groups.length}`); + + // For fmp4 container (live streams), start from the first group we receive + // This should be the most recent group available when we subscribe + if (this.#container === "fmp4") { + if (this.#active === undefined) { + // First group - start from here (this is a live stream, so start from the most recent available) + this.#active = consumer.sequence; + console.log(`[Frame.Consumer] Starting from first received group (live stream): sequence=${consumer.sequence}, setting active=${this.#active}`); + } else if (consumer.sequence < this.#active) { + // Skip old groups (but accept groups equal to or greater than active) + console.log(`[Frame.Consumer] Skipping old group in live stream: sequence=${consumer.sequence} < active=${this.#active}`); + consumer.close(); + continue; + } else if (consumer.sequence === this.#active && this.#groups.some(g => g.consumer.sequence === consumer.sequence)) { + // Skip duplicate group (same sequence and already in groups) + console.log(`[Frame.Consumer] Skipping duplicate group in live stream: sequence=${consumer.sequence} == active=${this.#active} and already in groups`); + consumer.close(); + continue; + } else { + // New group or same sequence but not in groups yet - accept it and update active + if (consumer.sequence > this.#active) { + console.log(`[Frame.Consumer] New group in live stream: sequence=${consumer.sequence} > active=${this.#active}, accepting and updating active`); + this.#active = consumer.sequence; + } else { + console.log(`[Frame.Consumer] Accepting group with same sequence as active: sequence=${consumer.sequence} == active=${this.#active} (not in groups yet)`); + } + } + } else { + // For non-fmp4 containers, use standard logic + if (this.#active === undefined) { + this.#active = consumer.sequence; + console.log(`[Frame.Consumer] First group received: sequence=${consumer.sequence}, setting active=${this.#active}`); + } + + if (consumer.sequence < this.#active) { + console.warn(`[Frame.Consumer] Skipping old group: sequence=${consumer.sequence} < active=${this.#active}`); + consumer.close(); + continue; + } } const group = { @@ -147,11 +183,19 @@ export class Consumer { async #runGroup(group: Group) { try { let keyframe = true; + let frameCount = 0; + + console.log(`[Frame.Consumer] Starting to read frames from group ${group.consumer.sequence}, active=${this.#active ?? 'undefined'}`); for (;;) { + console.log(`[Frame.Consumer] Calling readFrame() on group ${group.consumer.sequence}, frameCount=${frameCount}`); const next = await group.consumer.readFrame(); - if (!next) break; + if (!next) { + console.log(`[Frame.Consumer] Group ${group.consumer.sequence} finished (readFrame returned null), read ${frameCount} frames total, active=${this.#active ?? 'undefined'}`); + break; + } + frameCount++; const { data, timestamp } = decode(next, this.#container); const frame = { data, @@ -160,6 +204,8 @@ export class Consumer { group: group.consumer.sequence, }; + console.log(`[Frame.Consumer] Read frame ${frameCount} from group ${group.consumer.sequence}: timestamp=${timestamp}, size=${data.byteLength}, keyframe=${keyframe}, active=${this.#active ?? 'undefined'}`); + keyframe = false; group.frames.push(frame); @@ -169,6 +215,7 @@ export class Consumer { } if (group.consumer.sequence === this.#active) { + console.log(`[Frame.Consumer] Notifying decoder that frame is available from active group ${group.consumer.sequence}`); this.#notify?.(); this.#notify = undefined; } else { @@ -181,7 +228,9 @@ export class Consumer { } finally { if (group.consumer.sequence === this.#active) { // Advance to the next group. + const oldActive = this.#active; this.#active += 1; + console.log(`[Frame.Consumer] Group ${oldActive} finished, advancing active to ${this.#active}, totalGroups=${this.#groups.length}`); this.#notify?.(); this.#notify = undefined; @@ -246,10 +295,14 @@ export class Consumer { this.#groups[0].consumer.sequence <= this.#active ) { const frame = this.#groups[0].frames.shift(); - if (frame) return frame; + if (frame) { + console.log(`[Frame.Consumer] Returning frame from group ${this.#groups[0].consumer.sequence}, remaining frames in group=${this.#groups[0].frames.length}, active=${this.#active}`); + return frame; + } // Check if the group is done and then remove it. if (this.#active > this.#groups[0].consumer.sequence) { + console.log(`[Frame.Consumer] Group ${this.#groups[0].consumer.sequence} is done (active=${this.#active}), removing from groups`); this.#groups.shift(); continue; } @@ -259,13 +312,19 @@ export class Consumer { throw new Error("multiple calls to decode not supported"); } + console.log(`[Frame.Consumer] No frames available, waiting for notify. active=${this.#active ?? 'undefined'}, groups=${this.#groups.length}, groupSequences=[${this.#groups.map(g => g.consumer.sequence).join(', ')}]`); + const wait = new Promise((resolve) => { this.#notify = resolve; - }).then(() => true); + }).then(() => { + console.log(`[Frame.Consumer] Notified, checking for frames again. active=${this.#active ?? 'undefined'}, groups=${this.#groups.length}`); + return true; + }); if (!(await Promise.race([wait, this.#signals.closed]))) { this.#notify = undefined; // Consumer was closed while waiting for a new frame. + console.log(`[Frame.Consumer] Consumer closed while waiting`); return undefined; } } diff --git a/js/hang/src/util/mime.ts b/js/hang/src/util/mime.ts new file mode 100644 index 000000000..3d745044e --- /dev/null +++ b/js/hang/src/util/mime.ts @@ -0,0 +1,53 @@ +import type * as Catalog from "../catalog"; + +/** + * Builds a MIME type string for MediaSource from a codec string. + * + * @param codec - The codec string from the catalog (e.g., "avc1.42E01E", "mp4a.40.2") + * @param type - "video" or "audio" + * @returns MIME type string (e.g., "video/mp4; codecs=\"avc1.42E01E\"") + */ +export function buildMimeType(codec: string, type: "video" | "audio"): string { + // For MP4 containers, we use the standard MIME type format + // Most codecs are already in the correct format for MSE + return `${type}/mp4; codecs="${codec}"`; +} + +/** + * Checks if a MIME type is supported by MediaSource. + * + * @param mimeType - The MIME type to check + * @returns true if supported, false otherwise + */ +export function isMimeTypeSupported(mimeType: string): boolean { + return MediaSource.isTypeSupported(mimeType); +} + +/** + * Builds and validates a MIME type for video from catalog config. + * + * @param config - Video configuration from catalog + * @returns MIME type string or undefined if not supported + */ +export function buildVideoMimeType(config: Catalog.VideoConfig): string | undefined { + const mimeType = buildMimeType(config.codec, "video"); + if (isMimeTypeSupported(mimeType)) { + return mimeType; + } + return undefined; +} + +/** + * Builds and validates a MIME type for audio from catalog config. + * + * @param config - Audio configuration from catalog + * @returns MIME type string or undefined if not supported + */ +export function buildAudioMimeType(config: Catalog.AudioConfig): string | undefined { + const mimeType = buildMimeType(config.codec, "audio"); + if (isMimeTypeSupported(mimeType)) { + return mimeType; + } + return undefined; +} + diff --git a/js/hang/src/watch/audio/emitter.ts b/js/hang/src/watch/audio/emitter.ts index 79be52da9..464aabf18 100644 --- a/js/hang/src/watch/audio/emitter.ts +++ b/js/hang/src/watch/audio/emitter.ts @@ -46,7 +46,10 @@ export class Emitter { }); this.#signals.effect((effect) => { - const enabled = !effect.get(this.paused) && !effect.get(this.muted); + const paused = effect.get(this.paused); + const muted = effect.get(this.muted); + const enabled = !paused && !muted; + console.log(`[Audio Emitter] Setting source.enabled=${enabled} (paused=${paused}, muted=${muted})`); this.source.enabled.set(enabled); }); @@ -56,7 +59,49 @@ export class Emitter { this.muted.set(volume === 0); }); + // Handle MSE path (HTMLAudioElement) vs WebCodecs path (AudioWorklet) this.#signals.effect((effect) => { + const mseAudio = effect.get(this.source.mseAudioElement); + if (mseAudio) { + console.log("[Audio Emitter] MSE audio element found, setting up volume/mute/pause control"); + // MSE path: control HTMLAudioElement directly + effect.effect(() => { + const volume = effect.get(this.volume); + const muted = effect.get(this.muted); + const paused = effect.get(this.paused); + console.log(`[Audio Emitter] Setting volume=${volume}, muted=${muted}, paused=${paused}`); + mseAudio.volume = volume; + mseAudio.muted = muted; + + // Control play/pause state + if (paused && !mseAudio.paused) { + console.log("[Audio Emitter] Pausing audio element (paused=true)"); + mseAudio.pause(); + } else if (!paused && mseAudio.paused) { + // Resume if paused - try to play even if readyState is low + const tryPlay = () => { + if (!paused && mseAudio.paused) { + console.log("[Audio Emitter] Resuming audio element (paused=false)", { + readyState: mseAudio.readyState, + buffered: mseAudio.buffered.length > 0 ? `${mseAudio.buffered.length} ranges` : "no ranges", + }); + mseAudio.play().catch(err => console.error("[Audio Emitter] Failed to resume audio:", err)); + } + }; + + // Try to play if we have metadata (HAVE_METADATA = 1), browser will start when ready + if (mseAudio.readyState >= HTMLMediaElement.HAVE_METADATA) { + tryPlay(); + } else { + // Wait for loadedmetadata event if not ready yet + mseAudio.addEventListener("loadedmetadata", tryPlay, { once: true }); + } + } + }); + return; + } + + // WebCodecs path: use AudioWorklet with GainNode const root = effect.get(this.source.root); if (!root) return; @@ -76,9 +121,10 @@ export class Emitter { }); }); + // Only apply gain transitions for WebCodecs path (when gain node exists) this.#signals.effect((effect) => { const gain = effect.get(this.#gain); - if (!gain) return; + if (!gain) return; // MSE path doesn't use gain node // Cancel any scheduled transitions on change. effect.cleanup(() => gain.gain.cancelScheduledValues(gain.context.currentTime)); diff --git a/js/hang/src/watch/audio/source-mse.ts b/js/hang/src/watch/audio/source-mse.ts new file mode 100644 index 000000000..32acb9f5c --- /dev/null +++ b/js/hang/src/watch/audio/source-mse.ts @@ -0,0 +1,447 @@ +import type * as Moq from "@moq/lite"; +import { Effect, type Getter, Signal } from "@moq/signals"; +import type * as Catalog from "../../catalog"; +import * as Frame from "../../frame"; +import type * as Time from "../../time"; +import * as Mime from "../../util/mime"; + +export interface AudioStats { + bytesReceived: number; +} + +/** + * MSE-based audio source for CMAF/fMP4 fragments. + * Uses Media Source Extensions to handle complete moof+mdat fragments. + * The browser handles decoding and playback directly from the HTMLAudioElement. + */ +export class SourceMSE { + #audio?: HTMLAudioElement; + #mediaSource?: MediaSource; + #sourceBuffer?: SourceBuffer; + + // Signal to expose audio element for volume/mute control + #audioElement = new Signal(undefined); + readonly audioElement = this.#audioElement as Getter; + + // Cola de fragmentos esperando ser añadidos + // Límite máximo para evitar crecimiento infinito en live streaming + #appendQueue: Uint8Array[] = []; + static readonly MAX_QUEUE_SIZE = 10; // Máximo de fragmentos en cola + + #stats = new Signal(undefined); + readonly stats = this.#stats; + + readonly latency: Signal; + + #signals = new Effect(); + + constructor(latency: Signal) { + this.latency = latency; + } + + async initialize(config: Catalog.AudioConfig): Promise { + // Build MIME type from codec + const mimeType = Mime.buildAudioMimeType(config); + if (!mimeType) { + throw new Error(`Unsupported codec for MSE: ${config.codec}`); + } + + // Create hidden audio element + this.#audio = document.createElement("audio"); + this.#audio.style.display = "none"; + this.#audio.muted = false; // Allow audio playback + this.#audio.volume = 1.0; // Set initial volume to 1.0 + document.body.appendChild(this.#audio); + + console.log("[MSE Audio] Audio element created:", { + muted: this.#audio.muted, + volume: this.#audio.volume, + readyState: this.#audio.readyState, + }); + + // Don't auto-play here - let Emitter control play/pause state + // The initial play() call is handled in runTrack() after data is buffered + + // Expose audio element via Signal for Emitter to control volume/mute + this.#audioElement.set(this.#audio); + + // Create MediaSource + this.#mediaSource = new MediaSource(); + const url = URL.createObjectURL(this.#mediaSource); + this.#audio.src = url; + + // Set initial time to 0 to ensure playback starts from the beginning + this.#audio.currentTime = 0; + + // Wait for sourceopen event + await new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + reject(new Error("MediaSource sourceopen timeout")); + }, 5000); + + this.#mediaSource!.addEventListener( + "sourceopen", + () => { + clearTimeout(timeout); + try { + // Create SourceBuffer + this.#sourceBuffer = this.#mediaSource!.addSourceBuffer(mimeType); + this.#setupSourceBuffer(); + resolve(); + } catch (error) { + reject(error); + } + }, + { once: true }, + ); + + this.#mediaSource!.addEventListener("error", (e) => { + clearTimeout(timeout); + reject(new Error(`MediaSource error: ${e}`)); + }); + }); + } + + #setupSourceBuffer(): void { + if (!this.#sourceBuffer) return; + + // Procesar la cola cuando termine la operación actual + this.#sourceBuffer.addEventListener("updateend", () => { + this.#processAppendQueue(); + // Don't auto-resume here - let Emitter control play/pause state + }); + + this.#sourceBuffer.addEventListener("error", (e) => { + console.error("SourceBuffer error:", e); + }); + } + + async appendFragment(fragment: Uint8Array): Promise { + if (!this.#sourceBuffer || !this.#mediaSource) { + throw new Error("SourceBuffer not initialized"); + } + + // Si la cola está llena, descartar el fragmento más antiguo (FIFO) + // Esto mantiene baja la latencia en live streaming + if (this.#appendQueue.length >= SourceMSE.MAX_QUEUE_SIZE) { + const discarded = this.#appendQueue.shift(); + console.warn(`[MSE Audio] Queue full (${SourceMSE.MAX_QUEUE_SIZE}), discarding oldest fragment (${discarded?.byteLength ?? 0} bytes)`); + } + + // Añadir a la cola en lugar de esperar + // Crear una copia con ArrayBuffer real (no SharedArrayBuffer) + const copy = new Uint8Array(fragment); + this.#appendQueue.push(copy); + + // Intentar procesar inmediatamente si está disponible + this.#processAppendQueue(); + } + + #concatenateFragments(fragments: Uint8Array[]): Uint8Array { + if (fragments.length === 1) { + return fragments[0]; + } + + // Calculate total size + const totalSize = fragments.reduce((sum, frag) => sum + frag.byteLength, 0); + + // Concatenate all fragments into a single Uint8Array + const result = new Uint8Array(totalSize); + let offset = 0; + for (const fragment of fragments) { + result.set(fragment, offset); + offset += fragment.byteLength; + } + + return result; + } + + #processAppendQueue(): void { + if (!this.#sourceBuffer || this.#sourceBuffer.updating || this.#appendQueue.length === 0) { + return; + } + + if (this.#mediaSource?.readyState !== "open") { + console.error(`[MSE Audio] MediaSource not open: ${this.#mediaSource?.readyState}`); + return; + } + + const fragment = this.#appendQueue.shift()!; + + try { + // appendBuffer accepts BufferSource (ArrayBuffer or ArrayBufferView) + this.#sourceBuffer.appendBuffer(fragment as BufferSource); + + // Update stats + this.#stats.update((current) => ({ + bytesReceived: (current?.bytesReceived ?? 0) + fragment.byteLength, + })); + } catch (error) { + console.error("[MSE Audio] Error appending fragment:", error); + // No reintentamos - el fragmento se descarta + } + } + + async runTrack( + effect: Effect, + broadcast: Moq.Broadcast, + name: string, + config: Catalog.AudioConfig, + ): Promise { + // Initialize MSE + await this.initialize(config); + + const catalog = { priority: 128 }; // TODO: Get from actual catalog + const sub = broadcast.subscribe(name, catalog.priority); + effect.cleanup(() => sub.close()); + + // Create consumer for CMAF fragments + const consumer = new Frame.Consumer(sub, { + latency: this.latency, + container: "fmp4", // CMAF fragments + }); + effect.cleanup(() => consumer.close()); + + console.log("[MSE Audio] Consumer created, waiting for frames..."); + + // Initial play attempt when we have data buffered + // After this, Emitter controls play/pause state + effect.spawn(async () => { + if (!this.#audio) return; + + // Wait for some data to be buffered, then attempt to play + await new Promise((resolve) => { + let checkCount = 0; + const maxChecks = 100; // 10 seconds max wait + + let hasSeeked = false; + const checkReady = () => { + checkCount++; + if (this.#audio && this.#sourceBuffer) { + const bufferedRanges = this.#sourceBuffer.buffered; + const audioBuffered = this.#audio.buffered; + const hasBufferedData = bufferedRanges.length > 0; + const bufferedInfo = hasBufferedData + ? `${bufferedRanges.length} ranges, last: ${bufferedRanges.start(bufferedRanges.length - 1).toFixed(2)}-${bufferedRanges.end(bufferedRanges.length - 1).toFixed(2)}` + : "no ranges"; + console.log(`[MSE Audio] Audio readyState: ${this.#audio.readyState}, buffered: ${bufferedInfo}, checkCount: ${checkCount}`); + + // Check if currentTime is within buffered range + if (hasBufferedData && audioBuffered && audioBuffered.length > 0 && !hasSeeked) { + const currentTime = this.#audio.currentTime; + const isTimeBuffered = audioBuffered.start(0) <= currentTime && currentTime < audioBuffered.end(audioBuffered.length - 1); + + // If we have buffered data but current time is not in range, seek immediately + if (!isTimeBuffered) { + const seekTime = audioBuffered.start(0); + console.log(`[MSE Audio] Seeking to buffered start time: ${seekTime.toFixed(3)} (currentTime=${currentTime.toFixed(3)})`); + this.#audio.currentTime = seekTime; + hasSeeked = true; + // Continue checking after seek + setTimeout(checkReady, 100); + return; + } + } + + // Try to play if we have buffered data, even if readyState is low + // The browser will start playing when it's ready + if (hasBufferedData && this.#audio.readyState >= HTMLMediaElement.HAVE_METADATA) { + console.log("[MSE Audio] Audio has buffered data, attempting initial play...", { + readyState: this.#audio.readyState, + muted: this.#audio.muted, + volume: this.#audio.volume, + paused: this.#audio.paused, + hasBufferedData, + currentTime: this.#audio.currentTime, + }); + this.#audio.play().then(() => { + console.log("[MSE Audio] Audio play() succeeded (initial)"); + resolve(); + }).catch((error) => { + console.error("[MSE Audio] Audio play() failed (initial):", error); + // Continue checking, might succeed later + if (checkCount < maxChecks) { + setTimeout(checkReady, 200); + } else { + resolve(); + } + }); + } else if (checkCount >= maxChecks) { + console.warn("[MSE Audio] Audio did not get buffered data after 10 seconds"); + resolve(); + } else { + setTimeout(checkReady, 100); + } + } else if (checkCount >= maxChecks) { + resolve(); + } else { + setTimeout(checkReady, 100); + } + }; + checkReady(); + }); + }); + + // Track if we've received the init segment (moov) + let initSegmentReceived = false; + + // Helper function to detect moov atom in the buffer + // This searches for "moov" atom at any position, not just at the start + // The init segment may have other atoms before "moov" (e.g., "ftyp") + function hasMoovAtom(data: Uint8Array): boolean { + let offset = 0; + const len = data.length; + + while (offset + 8 <= len) { + // tamaño del atom (big endian) + const size = + (data[offset] << 24) | + (data[offset + 1] << 16) | + (data[offset + 2] << 8) | + data[offset + 3]; + + const type = String.fromCharCode( + data[offset + 4], + data[offset + 5], + data[offset + 6], + data[offset + 7], + ); + + if (type === "moov") return true; + + // Evitar loops infinitos si el size viene roto + if (size < 8) break; + offset += size; + } + + return false; + } + + // Read fragments and append to SourceBuffer + // MSE works better when appending complete groups (GOPs for video, sample groups for audio) + // We group fragments by MOQ group before appending + effect.spawn(async () => { + let frameCount = 0; + let currentGroup: number | undefined = undefined; + let groupFragments: Uint8Array[] = []; // Accumulate fragments for current group + + console.log("[MSE Audio] Starting to read frames from consumer..."); + + for (;;) { + const frame = await Promise.race([consumer.decode(), effect.cancel]); + if (!frame) { + // Append any remaining group fragments before finishing + if (groupFragments.length > 0 && initSegmentReceived) { + const groupData = this.#concatenateFragments(groupFragments); + console.log(`[MSE Audio] Appending final group (group ${currentGroup}): ${groupFragments.length} fragments, total size=${groupData.byteLength}`); + await this.appendFragment(groupData); + groupFragments = []; + } + console.log(`[MSE Audio] No more frames, total frames processed: ${frameCount}`); + break; + } + frameCount++; + console.log(`[MSE Audio] Received frame ${frameCount}: timestamp=${frame.timestamp}, size=${frame.data.byteLength}, group=${frame.group}, keyframe=${frame.keyframe}`); + + // Check if this is the init segment (moov) + const isMoovAtom = hasMoovAtom(frame.data); + const isInitSegment = isMoovAtom && !initSegmentReceived; + + if (isInitSegment) { + // Append any pending group before processing init segment + if (groupFragments.length > 0 && initSegmentReceived) { + const groupData = this.#concatenateFragments(groupFragments); + console.log(`[MSE Audio] Appending group (group ${currentGroup}) before init segment: ${groupFragments.length} fragments`); + await this.appendFragment(groupData); + groupFragments = []; + } + + // This is the init segment (moov), append it first + await this.appendFragment(frame.data); + initSegmentReceived = true; + console.log("[MSE Audio] Init segment (moov) received and appended"); + continue; + } + + // This is a regular fragment (moof+mdat) + if (!initSegmentReceived) { + console.warn("[MSE Audio] Received fragment before init segment, skipping"); + continue; + } + + // Check if we're starting a new group + if (currentGroup !== undefined && frame.group !== currentGroup) { + // Append the complete group from previous group + if (groupFragments.length > 0) { + const groupData = this.#concatenateFragments(groupFragments); + console.log(`[MSE Audio] Appending complete group (group ${currentGroup}): ${groupFragments.length} fragments, total size=${groupData.byteLength}`); + await this.appendFragment(groupData); + groupFragments = []; + } + } + + // If this is the first fragment of a new group, start accumulating + if (currentGroup === undefined || frame.group !== currentGroup) { + currentGroup = frame.group; + groupFragments = []; + } + + groupFragments.push(frame.data); + console.log(`[MSE Audio] Accumulating fragment for group (group ${frame.group}): timestamp=${frame.timestamp}, size=${frame.data.byteLength}, total fragments in group=${groupFragments.length}`); + + // For live streaming: append immediately if we have at least one fragment + // This ensures we don't wait indefinitely for more fragments + // We'll still group by MOQ group, but append more aggressively + if (groupFragments.length >= 1) { + // Append immediately - MSE can handle single fragments if they're complete + const groupData = this.#concatenateFragments(groupFragments); + console.log(`[MSE Audio] Appending group immediately (group ${currentGroup}): ${groupFragments.length} fragments, total size=${groupData.byteLength}`); + await this.appendFragment(groupData); + groupFragments = []; + } + } + }); + } + + close(): void { + // Limpiar la cola + this.#appendQueue = []; + + // Clear audio element Signal + this.#audioElement.set(undefined); + + // Clean up SourceBuffer + if (this.#sourceBuffer && this.#mediaSource) { + try { + if (this.#sourceBuffer.updating) { + this.#sourceBuffer.abort(); + } + // Don't call endOfStream() here - let it be called only once for MediaSource + } catch (error) { + console.error("Error closing SourceBuffer:", error); + } + } + + // Clean up MediaSource + if (this.#mediaSource) { + try { + if (this.#mediaSource.readyState === "open") { + this.#mediaSource.endOfStream(); + } + URL.revokeObjectURL(this.#audio?.src || ""); + } catch (error) { + console.error("Error closing MediaSource:", error); + } + } + + // Remove audio element + if (this.#audio) { + this.#audio.pause(); + this.#audio.src = ""; + this.#audio.remove(); + } + + this.#signals.close(); + } +} + diff --git a/js/hang/src/watch/audio/source.ts b/js/hang/src/watch/audio/source.ts index ca542b780..7bd6a9373 100644 --- a/js/hang/src/watch/audio/source.ts +++ b/js/hang/src/watch/audio/source.ts @@ -40,6 +40,10 @@ export class Source { #worklet = new Signal(undefined); // Downcast to AudioNode so it matches Publish.Audio readonly root = this.#worklet as Getter; + + // For MSE path, expose the HTMLAudioElement for direct control + #mseAudioElement = new Signal(undefined); + readonly mseAudioElement = this.#mseAudioElement as Getter; #sampleRate = new Signal(undefined); readonly sampleRate: Getter = this.#sampleRate; @@ -74,6 +78,11 @@ export class Source { if (audio?.renditions) { const first = Object.entries(audio.renditions).at(0); if (first) { + console.log(`[Audio Source] Rendition ${first[0]} from catalog:`, { + codec: first[1].codec, + container: first[1].container, + hasContainer: "container" in first[1], + }); effect.set(this.active, first[0]); effect.set(this.config, first[1]); } @@ -95,6 +104,13 @@ export class Source { const config = effect.get(this.config); if (!config) return; + // Don't create worklet for MSE (fmp4) - browser handles playback directly + // The worklet is only needed for WebCodecs path + if (config.container === "fmp4") { + console.log("[Audio Source] Skipping worklet creation for MSE (fmp4) - browser handles playback directly"); + return; + } + const sampleRate = config.sampleRate; const channelCount = config.numberOfChannels; @@ -149,21 +165,90 @@ export class Source { #runDecoder(effect: Effect): void { const enabled = effect.get(this.enabled); - if (!enabled) return; + console.log(`[Audio Source] #runDecoder: enabled=${enabled}`); + if (!enabled) { + console.log(`[Audio Source] #runDecoder: skipping because enabled=false`); + return; + } const catalog = effect.get(this.catalog); - if (!catalog) return; + if (!catalog) { + console.log(`[Audio Source] #runDecoder: skipping because catalog is undefined`); + return; + } const broadcast = effect.get(this.broadcast); - if (!broadcast) return; + if (!broadcast) { + console.log(`[Audio Source] #runDecoder: skipping because broadcast is undefined`); + return; + } const config = effect.get(this.config); - if (!config) return; + if (!config) { + console.log(`[Audio Source] #runDecoder: skipping because config is undefined`); + return; + } const active = effect.get(this.active); - if (!active) return; + if (!active) { + console.log(`[Audio Source] #runDecoder: skipping because active is undefined`); + return; + } + + console.log(`[Audio Source] #runDecoder: subscribing to track="${active}", container="${config.container}"`); + // Route to MSE for CMAF, WebCodecs for legacy/raw + if (config.container === "fmp4") { + this.#runMSEPath(effect, broadcast, active, config, catalog); + } else { + this.#runWebCodecsPath(effect, broadcast, active, config, catalog); + } + } + + #runMSEPath( + effect: Effect, + broadcast: Moq.Broadcast, + name: string, + config: Catalog.AudioConfig, + catalog: Catalog.Audio, + ): void { + // Import MSE source dynamically + effect.spawn(async () => { + const { SourceMSE } = await import("./source-mse.js"); + const mseSource = new SourceMSE(this.latency); + effect.cleanup(() => mseSource.close()); + + // Expose HTMLAudioElement for Emitter to control volume/mute + // Use effect to reactively get the audio element when it's ready + this.#signals.effect((eff) => { + const audioElement = eff.get(mseSource.audioElement); + eff.set(this.#mseAudioElement, audioElement); + }); + + // Forward stats + this.#signals.effect((eff) => { + const stats = eff.get(mseSource.stats); + eff.set(this.#stats, stats); + }); + + // Run MSE track - no worklet needed, browser handles everything + try { + await mseSource.runTrack(effect, broadcast, name, config); + } catch (error) { + console.error("MSE path error, falling back to WebCodecs:", error); + // Fallback to WebCodecs + this.#runWebCodecsPath(effect, broadcast, name, config, catalog); + } + }); + } - const sub = broadcast.subscribe(active, catalog.priority); + #runWebCodecsPath( + effect: Effect, + broadcast: Moq.Broadcast, + name: string, + config: Catalog.AudioConfig, + catalog: Catalog.Audio, + ): void { + const sub = broadcast.subscribe(name, catalog.priority); effect.cleanup(() => sub.close()); // Create consumer with slightly less latency than the render worklet to avoid underflowing. diff --git a/js/hang/src/watch/video/source-mse.ts b/js/hang/src/watch/video/source-mse.ts new file mode 100644 index 000000000..cb4557950 --- /dev/null +++ b/js/hang/src/watch/video/source-mse.ts @@ -0,0 +1,625 @@ +import type * as Moq from "@moq/lite"; +import { Effect, Signal } from "@moq/signals"; +import type * as Catalog from "../../catalog"; +import * as Frame from "../../frame"; +import { PRIORITY } from "../../publish/priority"; +import type * as Time from "../../time"; +import * as Mime from "../../util/mime"; + +// The types in VideoDecoderConfig that cause a hard reload. +type RequiredDecoderConfig = Omit; + +type BufferStatus = { state: "empty" | "filled" }; + +type SyncStatus = { + state: "ready" | "wait"; + bufferDuration?: number; +}; + +export interface VideoStats { + frameCount: number; + timestamp: number; + bytesReceived: number; +} + +/** + * MSE-based video source for CMAF/fMP4 fragments. + * Uses Media Source Extensions to handle complete moof+mdat fragments. + */ +export class SourceMSE { + #video?: HTMLVideoElement; + #mediaSource?: MediaSource; + #sourceBuffer?: SourceBuffer; + + // Cola de fragmentos esperando ser añadidos + // Límite máximo para evitar crecimiento infinito en live streaming + #appendQueue: Uint8Array[] = []; + static readonly MAX_QUEUE_SIZE = 10; // Máximo de fragmentos en cola + + // Expose the current frame to render as a signal + frame = new Signal(undefined); + + // The target latency in milliseconds. + latency: Signal; + + // The display size of the video in pixels. + display = new Signal<{ width: number; height: number } | undefined>(undefined); + + // Whether to flip the video horizontally. + flip = new Signal(undefined); + + bufferStatus = new Signal({ state: "empty" }); + syncStatus = new Signal({ state: "ready" }); + + #stats = new Signal(undefined); + + #signals = new Effect(); + #frameCallbackId?: number; + + constructor(latency: Signal) { + this.latency = latency; + } + + async initialize(config: RequiredDecoderConfig): Promise { + // Build MIME type from codec + const mimeType = Mime.buildVideoMimeType(config); + if (!mimeType) { + throw new Error(`Unsupported codec for MSE: ${config.codec}`); + } + console.log(`[MSE] Initializing with MIME type: ${mimeType}, codec: ${config.codec}`); + + // Create hidden video element + this.#video = document.createElement("video"); + this.#video.style.display = "none"; + this.#video.playsInline = true; + this.#video.muted = true; // Required for autoplay + document.body.appendChild(this.#video); + + // Listen for stalled event (when video runs out of data) + this.#video.addEventListener("waiting", () => { + if (!this.#video) return; + const buffered = this.#sourceBuffer?.buffered; + const videoBuffered = this.#video.buffered; + const current = this.#video.currentTime; + const sourceBufferInfo = buffered && buffered.length > 0 + ? `${buffered.length} ranges, last: ${buffered.end(buffered.length - 1).toFixed(2)}s` + : "no ranges"; + const videoBufferedInfo = videoBuffered && videoBuffered.length > 0 + ? `${videoBuffered.length} ranges, last: ${videoBuffered.end(videoBuffered.length - 1).toFixed(2)}s` + : "no ranges"; + console.warn(`[MSE] Video waiting for data (stalled) at ${current.toFixed(2)}s, SourceBuffer: ${sourceBufferInfo}, Video: ${videoBufferedInfo}`); + }); + + // Listen for ended event + this.#video.addEventListener("ended", () => { + if (!this.#video) return; + const buffered = this.#sourceBuffer?.buffered; + const videoBuffered = this.#video.buffered; + const current = this.#video.currentTime; + const sourceBufferInfo = buffered && buffered.length > 0 + ? `${buffered.length} ranges, last: ${buffered.end(buffered.length - 1).toFixed(2)}s` + : "no ranges"; + const videoBufferedInfo = videoBuffered && videoBuffered.length > 0 + ? `${videoBuffered.length} ranges, last: ${videoBuffered.end(videoBuffered.length - 1).toFixed(2)}s` + : "no ranges"; + console.warn(`[MSE] Video ended at ${current.toFixed(2)}s - SourceBuffer: ${sourceBufferInfo}, Video: ${videoBufferedInfo}`); + // For live streams, try to resume playback if we have buffered data + if (videoBuffered && videoBuffered.length > 0) { + const lastRange = videoBuffered.length - 1; + const end = videoBuffered.end(lastRange); + if (current < end) { + console.warn(`[MSE] Video ended but has buffered data up to ${end.toFixed(2)}s, seeking to current time`); + this.#video.currentTime = current; + this.#video.play().catch(err => console.error("[MSE] Failed to resume after ended:", err)); + } + } + }); + + // Listen for timeupdate to monitor playback + this.#video.addEventListener("timeupdate", () => { + if (!this.#video) return; + const buffered = this.#sourceBuffer?.buffered; + const videoBuffered = this.#video.buffered; + const current = this.#video.currentTime; + // Check video buffered ranges (more accurate for playback) + if (videoBuffered && videoBuffered.length > 0) { + const lastRange = videoBuffered.length - 1; + const end = videoBuffered.end(lastRange); + const remaining = end - current; + // Log warning if we're getting close to the end of buffered data + if (remaining < 1.0 && remaining > 0) { + console.warn(`[MSE] Video approaching end of buffered data: ${remaining.toFixed(2)}s remaining (current: ${current.toFixed(2)}s, buffered up to: ${end.toFixed(2)}s)`); + } + // If we've reached the end and video is paused, try to resume + if (remaining <= 0.1 && this.#video.paused) { + console.warn(`[MSE] Video reached end of buffered data, attempting to resume...`); + this.#video.play().catch(err => console.error("[MSE] Failed to resume playback:", err)); + } + } else if (buffered && buffered.length > 0) { + // SourceBuffer has data but video doesn't see it - this is a problem + const lastRange = buffered.length - 1; + const end = buffered.end(lastRange); + const remaining = end - current; + if (remaining < 1.0 && remaining > 0) { + console.warn(`[MSE] Video approaching end of SourceBuffer data (video doesn't see it): ${remaining.toFixed(2)}s remaining`); + } + } + }); + + // Create MediaSource + this.#mediaSource = new MediaSource(); + const url = URL.createObjectURL(this.#mediaSource); + this.#video.src = url; + + // Set initial time to 0 to ensure playback starts from the beginning + this.#video.currentTime = 0; + + // Wait for sourceopen event + await new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + reject(new Error("MediaSource sourceopen timeout")); + }, 5000); + + this.#mediaSource!.addEventListener( + "sourceopen", + () => { + clearTimeout(timeout); + try { + // Create SourceBuffer + this.#sourceBuffer = this.#mediaSource!.addSourceBuffer(mimeType); + this.#setupSourceBuffer(); + resolve(); + } catch (error) { + reject(error); + } + }, + { once: true }, + ); + + this.#mediaSource!.addEventListener("error", (e) => { + clearTimeout(timeout); + reject(new Error(`MediaSource error: ${e}`)); + }); + }); + + // Start capturing frames from video element + this.#startFrameCapture(); + } + + #setupSourceBuffer(): void { + if (!this.#sourceBuffer) return; + + // Handle updateend events + this.#sourceBuffer.addEventListener("updateend", () => { + // SourceBuffer is ready for more data + if (this.#sourceBuffer && this.#sourceBuffer.buffered.length > 0) { + const lastRange = this.#sourceBuffer.buffered.length - 1; + const start = this.#sourceBuffer.buffered.start(lastRange); + const end = this.#sourceBuffer.buffered.end(lastRange); + } else { + console.log("[MSE] SourceBuffer buffered: 0 ranges (no data buffered yet)"); + } + if (this.#video) { + console.log(`[MSE] Video readyState after updateend: ${this.#video.readyState} (HAVE_METADATA=${HTMLMediaElement.HAVE_METADATA}, HAVE_FUTURE_DATA=${HTMLMediaElement.HAVE_FUTURE_DATA})`); + } + + // Procesar la cola cuando termine la operación actual + this.#processAppendQueue(); + }); + + this.#sourceBuffer.addEventListener("error", (e) => { + console.error("SourceBuffer error:", e); + }); + } + + #startFrameCapture(): void { + if (!this.#video) return; + + // Use requestVideoFrameCallback to capture frames + const captureFrame = () => { + if (!this.#video) return; + + try { + // Create VideoFrame from video element + const frame = new VideoFrame(this.#video, { + timestamp: this.#video.currentTime * 1_000_000, // Convert to microseconds + }); + + // Update stats + this.#stats.update((current) => ({ + frameCount: (current?.frameCount ?? 0) + 1, + timestamp: frame.timestamp, + bytesReceived: current?.bytesReceived ?? 0, + })); + + // Update frame signal + this.frame.update((prev) => { + prev?.close(); + return frame; + }); + + // Update display size + if (this.#video.videoWidth && this.#video.videoHeight) { + this.display.set({ + width: this.#video.videoWidth, + height: this.#video.videoHeight, + }); + } + + // Update buffer status + if (this.#video.readyState >= HTMLMediaElement.HAVE_CURRENT_DATA) { + this.bufferStatus.set({ state: "filled" }); + } + } catch (error) { + console.error("Error capturing frame:", error); + } + + // Schedule next frame capture + if (this.#video.requestVideoFrameCallback) { + this.#frameCallbackId = this.#video.requestVideoFrameCallback(captureFrame); + } else { + // Fallback: use requestAnimationFrame + this.#frameCallbackId = requestAnimationFrame(captureFrame) as unknown as number; + } + }; + + // Start capturing + if (this.#video.requestVideoFrameCallback) { + this.#frameCallbackId = this.#video.requestVideoFrameCallback(captureFrame); + } else { + this.#frameCallbackId = requestAnimationFrame(captureFrame) as unknown as number; + } + } + + async appendFragment(fragment: Uint8Array): Promise { + if (!this.#sourceBuffer || !this.#mediaSource) { + throw new Error("SourceBuffer not initialized"); + } + + // Si la cola está llena, descartar el fragmento más antiguo (FIFO) + // Esto mantiene baja la latencia en live streaming + if (this.#appendQueue.length >= SourceMSE.MAX_QUEUE_SIZE) { + const discarded = this.#appendQueue.shift(); + console.warn(`[MSE] Queue full (${SourceMSE.MAX_QUEUE_SIZE}), discarding oldest fragment (${discarded?.byteLength ?? 0} bytes)`); + } + + // Añadir a la cola en lugar de esperar + // Crear una copia con ArrayBuffer real (no SharedArrayBuffer) + const copy = new Uint8Array(fragment); + this.#appendQueue.push(copy); + + // Intentar procesar inmediatamente si está disponible + this.#processAppendQueue(); + } + + #concatenateFragments(fragments: Uint8Array[]): Uint8Array { + if (fragments.length === 1) { + return fragments[0]; + } + + // Calculate total size + const totalSize = fragments.reduce((sum, frag) => sum + frag.byteLength, 0); + + // Concatenate all fragments into a single Uint8Array + const result = new Uint8Array(totalSize); + let offset = 0; + for (const fragment of fragments) { + result.set(fragment, offset); + offset += fragment.byteLength; + } + + return result; + } + + #processAppendQueue(): void { + if (!this.#sourceBuffer || this.#sourceBuffer.updating || this.#appendQueue.length === 0) { + return; + } + + if (this.#mediaSource?.readyState !== "open") { + console.error(`[MSE] MediaSource not open: ${this.#mediaSource?.readyState}`); + return; + } + + const fragment = this.#appendQueue.shift()!; + + try { + // appendBuffer accepts BufferSource (ArrayBuffer or ArrayBufferView) + this.#sourceBuffer.appendBuffer(fragment as BufferSource); + + // Update stats + this.#stats.update((current) => ({ + frameCount: current?.frameCount ?? 0, + timestamp: current?.timestamp ?? 0, + bytesReceived: (current?.bytesReceived ?? 0) + fragment.byteLength, + })); + } catch (error) { + console.error("[MSE] Error appending fragment:", error); + console.error("[MSE] SourceBuffer state:", { + updating: this.#sourceBuffer.updating, + buffered: this.#sourceBuffer.buffered.length, + }); + console.error("[MSE] MediaSource state:", { + readyState: this.#mediaSource.readyState, + duration: this.#mediaSource.duration, + }); + // No reintentamos - el fragmento se descarta + } + } + + async runTrack( + effect: Effect, + broadcast: Moq.Broadcast, + name: string, + config: RequiredDecoderConfig, + ): Promise { + // Initialize MSE + await this.initialize(config); + + const sub = broadcast.subscribe(name, PRIORITY.video); + effect.cleanup(() => sub.close()); + + // Create consumer for CMAF fragments + const consumer = new Frame.Consumer(sub, { + latency: this.latency, + container: "fmp4", // CMAF fragments + }); + effect.cleanup(() => consumer.close()); + + + // Start playing video when we have enough data + effect.spawn(async () => { + if (!this.#video) return; + + // Wait for some data to be buffered + await new Promise((resolve) => { + let checkCount = 0; + const maxChecks = 100; // 10 seconds max wait + let hasSeeked = false; + + const checkReady = () => { + checkCount++; + if (this.#video) { + const bufferedRanges = this.#sourceBuffer?.buffered; + const videoBuffered = this.#video.buffered; + const sourceBufferInfo = bufferedRanges && bufferedRanges.length > 0 + ? `${bufferedRanges.length} ranges, last: ${bufferedRanges.start(bufferedRanges.length - 1).toFixed(2)}-${bufferedRanges.end(bufferedRanges.length - 1).toFixed(2)}` + : "no ranges"; + const videoBufferedInfo = videoBuffered && videoBuffered.length > 0 + ? `${videoBuffered.length} ranges, last: ${videoBuffered.start(videoBuffered.length - 1).toFixed(2)}-${videoBuffered.end(videoBuffered.length - 1).toFixed(2)}` + : "no ranges"; + console.log(`[MSE] Video readyState: ${this.#video.readyState}, SourceBuffer buffered: ${sourceBufferInfo}, Video buffered: ${videoBufferedInfo}, checkCount: ${checkCount}`); + + // Check if we have buffered data and if the current time is within the buffered range + // Use video.buffered instead of sourceBuffer.buffered for checking if video can play + const hasBufferedData = videoBuffered && videoBuffered.length > 0; + const currentTime = this.#video.currentTime; + const isTimeBuffered = hasBufferedData && videoBuffered.start(0) <= currentTime && currentTime < videoBuffered.end(videoBuffered.length - 1); + + // If we have buffered data but current time is not in range, seek immediately + if (hasBufferedData && !isTimeBuffered && !hasSeeked) { + const seekTime = videoBuffered.start(0); + this.#video.currentTime = seekTime; + hasSeeked = true; + // Continue checking after seek + setTimeout(checkReady, 100); + return; + } + + if (this.#video.readyState >= HTMLMediaElement.HAVE_FUTURE_DATA) { + console.log("[MSE] Video has enough data, attempting to play..."); + this.#video.play().then(() => { + resolve(); + }).catch((error) => { + console.error("[MSE] Video play() failed:", error); + resolve(); + }); + } else if (hasBufferedData && checkCount >= 10) { + // If we have buffered data but readyState hasn't advanced, try playing anyway after 1 second + console.warn("[MSE] Video has buffered data but readyState hasn't advanced, attempting to play..."); + this.#video.play().then(() => { + resolve(); + }).catch((error) => { + console.error("[MSE] Video play() failed:", error); + // Continue checking + if (checkCount < maxChecks) { + setTimeout(checkReady, 100); + } else { + resolve(); + } + }); + } else if (checkCount >= maxChecks) { + console.warn("[MSE] Video did not reach HAVE_FUTURE_DATA after 10 seconds, attempting to play anyway..."); + this.#video.play().then(() => { + resolve(); + }).catch((error) => { + resolve(); + }); + } else { + setTimeout(checkReady, 100); + } + } + }; + checkReady(); + }); + }); + + // Track if we've received the init segment (ftyp+moov or moov) + let initSegmentReceived = false; + + // Helper function to detect init segment (ftyp or moov atom) + // The init segment may start with "ftyp" followed by "moov", or just "moov" + function isInitSegmentData(data: Uint8Array): boolean { + if (data.length < 8) return false; + + let offset = 0; + const len = data.length; + + while (offset + 8 <= len) { + // tamaño del atom (big endian) + const size = + (data[offset] << 24) | + (data[offset + 1] << 16) | + (data[offset + 2] << 8) | + data[offset + 3]; + + const type = String.fromCharCode( + data[offset + 4], + data[offset + 5], + data[offset + 6], + data[offset + 7], + ); + + // Init segment contains either "ftyp" or "moov" atoms + if (type === "ftyp" || type === "moov") return true; + + // Evitar loops infinitos si el size viene roto + if (size < 8 || size === 0) break; + offset += size; + } + + return false; + } + + // Read fragments and append to SourceBuffer + // MSE requires complete GOPs to be appended in a single operation + // We group fragments by MOQ group (which corresponds to GOPs) before appending + effect.spawn(async () => { + let frameCount = 0; + let currentGroup: number | undefined = undefined; + let gopFragments: Uint8Array[] = []; // Accumulate fragments for current GOP + + + for (;;) { + const frame = await Promise.race([consumer.decode(), effect.cancel]); + if (!frame) { + // Append any remaining GOP fragments before finishing + if (gopFragments.length > 0 && initSegmentReceived) { + const gopData = this.#concatenateFragments(gopFragments); + await this.appendFragment(gopData); + gopFragments = []; + } + console.log(`[MSE] No more frames, total frames processed: ${frameCount}`); + break; + } + frameCount++; + console.log(`[MSE] Received frame ${frameCount}: timestamp=${frame.timestamp}, size=${frame.data.byteLength}, group=${frame.group}, keyframe=${frame.keyframe}`); + + // Check if this is the init segment (ftyp+moov or just moov) + const containsInitSegmentData = isInitSegmentData(frame.data); + const isInitSegment = containsInitSegmentData && !initSegmentReceived; + + if (isInitSegment) { + // Append any pending GOP before processing init segment + if (gopFragments.length > 0 && initSegmentReceived) { + const gopData = this.#concatenateFragments(gopFragments); + await this.appendFragment(gopData); + gopFragments = []; + } + + // This is the init segment (moov), append it first + console.log("[MSE] Appending init segment..."); + await this.appendFragment(frame.data); + initSegmentReceived = true; + console.log("[MSE] Init segment (moov) received and appended"); + continue; + } + + // This is a regular fragment (moof+mdat) + if (!initSegmentReceived) { + console.warn(`[MSE] Received fragment before init segment (timestamp=${frame.timestamp}), skipping`); + continue; + } + + // Check if we're starting a new group (new GOP) + if (currentGroup !== undefined && frame.group !== currentGroup) { + // Append the complete GOP from previous group + if (gopFragments.length > 0) { + const gopData = this.#concatenateFragments(gopFragments); + console.log(`[MSE] Appending complete GOP (group ${currentGroup}): ${gopFragments.length} fragments, total size=${gopData.byteLength}`); + await this.appendFragment(gopData); + gopFragments = []; + } + } + + // If this is the first fragment of a new group, start accumulating + if (currentGroup === undefined || frame.group !== currentGroup) { + currentGroup = frame.group; + gopFragments = []; + } + + gopFragments.push(frame.data); + console.log(`[MSE] Accumulating fragment for GOP (group ${frame.group}): timestamp=${frame.timestamp}, size=${frame.data.byteLength}, total fragments in GOP=${gopFragments.length}`); + + // For live streaming: append immediately if we have at least one fragment + // This ensures we don't wait indefinitely for more fragments + // We'll still group by MOQ group, but append more aggressively + if (gopFragments.length >= 1) { + // Append immediately - MSE can handle single fragments if they're complete GOPs + const gopData = this.#concatenateFragments(gopFragments); + await this.appendFragment(gopData); + gopFragments = []; + } + } + }); + } + + close(): void { + this.#appendQueue = []; + + // Cancel frame capture + if (this.#frameCallbackId !== undefined) { + if (this.#video?.requestVideoFrameCallback) { + this.#video.cancelVideoFrameCallback(this.#frameCallbackId); + } else { + cancelAnimationFrame(this.#frameCallbackId); + } + } + + // Close current frame + this.frame.update((prev) => { + prev?.close(); + return undefined; + }); + + // Clean up SourceBuffer + if (this.#sourceBuffer && this.#mediaSource) { + try { + if (this.#sourceBuffer.updating) { + this.#sourceBuffer.abort(); + } + if (this.#mediaSource.readyState === "open") { + this.#mediaSource.endOfStream(); + } + } catch (error) { + console.error("Error closing SourceBuffer:", error); + } + } + + // Clean up MediaSource + if (this.#mediaSource) { + try { + if (this.#mediaSource.readyState === "open") { + this.#mediaSource.endOfStream(); + } + URL.revokeObjectURL(this.#video?.src || ""); + } catch (error) { + console.error("Error closing MediaSource:", error); + } + } + + // Remove video element + if (this.#video) { + this.#video.pause(); + this.#video.src = ""; + this.#video.remove(); + } + + this.#signals.close(); + } + + get stats() { + return this.#stats; + } +} + diff --git a/js/hang/src/watch/video/source.ts b/js/hang/src/watch/video/source.ts index a7fd2c923..c2d52e39e 100644 --- a/js/hang/src/watch/video/source.ts +++ b/js/hang/src/watch/video/source.ts @@ -126,6 +126,11 @@ export class Source { const supported: Record = {}; for (const [name, rendition] of Object.entries(renditions)) { + console.log(`[Video Source] Rendition ${name} from catalog:`, { + codec: rendition.codec, + container: rendition.container, + hasContainer: "container" in rendition, + }); const description = rendition.description ? Hex.toBytes(rendition.description) : undefined; const { supported: valid } = await VideoDecoder.isConfigSupported({ @@ -192,6 +197,59 @@ export class Source { } #runTrack(effect: Effect, broadcast: Moq.Broadcast, name: string, config: RequiredDecoderConfig): void { + // Route to MSE for CMAF, WebCodecs for legacy/raw + if (config.container === "fmp4") { + this.#runMSEPath(effect, broadcast, name, config); + } else { + this.#runWebCodecsPath(effect, broadcast, name, config); + } + } + + #runMSEPath(effect: Effect, broadcast: Moq.Broadcast, name: string, config: RequiredDecoderConfig): void { + // Import MSE source dynamically to avoid loading if not needed + effect.spawn(async () => { + const { SourceMSE } = await import("./source-mse.js"); + const mseSource = new SourceMSE(this.latency); + effect.cleanup(() => mseSource.close()); + + // Forward signals using effects + this.#signals.effect((eff) => { + const frame = eff.get(mseSource.frame); + eff.set(this.frame, frame); + }); + + this.#signals.effect((eff) => { + const display = eff.get(mseSource.display); + eff.set(this.display, display); + }); + + this.#signals.effect((eff) => { + const status = eff.get(mseSource.bufferStatus); + eff.set(this.bufferStatus, status, { state: "empty" }); + }); + + this.#signals.effect((eff) => { + const status = eff.get(mseSource.syncStatus); + eff.set(this.syncStatus, status, { state: "ready" }); + }); + + this.#signals.effect((eff) => { + const stats = eff.get(mseSource.stats); + eff.set(this.#stats, stats); + }); + + // Run MSE track + try { + await mseSource.runTrack(effect, broadcast, name, config); + } catch (error) { + console.error("MSE path error, falling back to WebCodecs:", error); + // Fallback to WebCodecs + this.#runWebCodecsPath(effect, broadcast, name, config); + } + }); + } + + #runWebCodecsPath(effect: Effect, broadcast: Moq.Broadcast, name: string, config: RequiredDecoderConfig): void { const sub = broadcast.subscribe(name, PRIORITY.video); // TODO use priority from catalog effect.cleanup(() => sub.close()); diff --git a/justfile b/justfile index 2e908b1da..53e7971b2 100644 --- a/justfile +++ b/justfile @@ -147,7 +147,7 @@ pub name url="http://localhost:4443/anon" *args: - | cargo run --bin hang -- publish --url "{{url}}" --name "{{name}}" fmp4 {{args}} # Generate and ingest an HLS stream from a video file. -pub-hls name passthrough="false" relay="http://localhost:4443/anon": +pub-hls name passthrough='false' relay="http://localhost:4443/anon": #!/usr/bin/env bash set -euo pipefail @@ -174,7 +174,7 @@ pub-hls name passthrough="false" relay="http://localhost:4443/anon": -c:v:1 libx264 -profile:v:1 high -level:v:1 4.1 -pix_fmt:v:1 yuv420p -tag:v:1 avc1 \ -b:v:1 300k -maxrate:v:1 330k -bufsize:v:1 600k \ -c:a aac -b:a 128k \ - -f hls -hls_time 2 -hls_list_size 12 \ + -f hls -hls_time 2 -hls_list_size 6 \ -hls_flags independent_segments+delete_segments \ -hls_segment_type fmp4 \ -master_pl_name master.m3u8 \ @@ -200,24 +200,60 @@ pub-hls name passthrough="false" relay="http://localhost:4443/anon": exit 1 fi - if [ "{{passthrough}}" = "true" ]; then + # Wait for individual playlists to be generated (they're referenced in master.m3u8) + # Give ffmpeg a bit more time to generate the variant playlists + echo ">>> Waiting for variant playlists..." + sleep 2 + for i in {1..20}; do + # Check if at least one variant playlist exists + if [ -f "$OUT_DIR/v0/stream.m3u8" ] || [ -f "$OUT_DIR/v720/stream.m3u8" ] || [ -f "$OUT_DIR/v144/stream.m3u8" ] || [ -f "$OUT_DIR/vaudio/stream.m3u8" ]; then + break + fi + sleep 0.5 + done + + echo ">>> Passthrough parameter value: '{{passthrough}}'" + # Just may pass the parameter as "passthrough=true" when using passthrough="true" + # So we check if it contains "true" (case-insensitive) and is not exactly "false" + if echo "{{passthrough}}" | grep -qi "true" && [ "{{passthrough}}" != "false" ]; then echo ">>> Starting HLS ingest from disk with passthrough mode: $OUT_DIR/master.m3u8" PASSTHROUGH_FLAG="--passthrough" else - echo ">>> Starting HLS ingest from disk: $OUT_DIR/master.m3u8" + echo ">>> Starting HLS ingest from disk (non-passthrough mode): $OUT_DIR/master.m3u8" PASSTHROUGH_FLAG="" fi # Trap to clean up ffmpeg on exit + CLEANUP_CALLED=false cleanup() { + if [ "$CLEANUP_CALLED" = "true" ]; then + return + fi + CLEANUP_CALLED=true echo "Shutting down..." kill $FFMPEG_PID 2>/dev/null || true - exit 0 + # Wait a bit for ffmpeg to finish + sleep 0.5 + # Force kill if still running + kill -9 $FFMPEG_PID 2>/dev/null || true } - trap cleanup SIGINT SIGTERM + trap cleanup SIGINT SIGTERM EXIT # Run hang to ingest from local files - cargo run --bin hang -- publish --url "{{relay}}" --name "{{name}}" hls --playlist "$OUT_DIR/master.m3u8" $PASSTHROUGH_FLAG + if [ -n "$PASSTHROUGH_FLAG" ]; then + echo ">>> Running with --passthrough flag" + cargo run --bin hang -- publish --url "{{relay}}" --name "{{name}}" hls --playlist "$OUT_DIR/master.m3u8" --passthrough + else + echo ">>> Running without --passthrough flag" + cargo run --bin hang -- publish --url "{{relay}}" --name "{{name}}" hls --playlist "$OUT_DIR/master.m3u8" + fi + EXIT_CODE=$? + + # Cleanup after cargo run completes (success or failure) + cleanup + + # Exit with the same code as cargo run + exit $EXIT_CODE # Publish a video using H.264 Annex B format to the localhost relay server pub-h264 name url="http://localhost:4443/anon" *args: diff --git a/rs/hang-cli/src/publish.rs b/rs/hang-cli/src/publish.rs index 12154e64e..44fd012ee 100644 --- a/rs/hang-cli/src/publish.rs +++ b/rs/hang-cli/src/publish.rs @@ -49,6 +49,7 @@ impl Publish { PublishDecoder::Decoder(Box::new(stream)) } PublishFormat::Hls { playlist, passthrough } => { + tracing::info!(passthrough = *passthrough, "HLS publish with passthrough flag"); let hls = hang::import::Hls::new( broadcast.clone(), hang::import::HlsConfig { diff --git a/rs/hang/src/catalog/audio/mod.rs b/rs/hang/src/catalog/audio/mod.rs index b74b5b6a1..1869ebb56 100644 --- a/rs/hang/src/catalog/audio/mod.rs +++ b/rs/hang/src/catalog/audio/mod.rs @@ -65,6 +65,5 @@ pub struct AudioConfig { /// Container format for frame encoding. /// Defaults to "legacy" for backward compatibility. - #[serde(default)] pub container: Container, } diff --git a/rs/hang/src/catalog/root.rs b/rs/hang/src/catalog/root.rs index 2defc55cb..590dc2fcb 100644 --- a/rs/hang/src/catalog/root.rs +++ b/rs/hang/src/catalog/root.rs @@ -205,6 +205,22 @@ impl Drop for CatalogGuard<'_> { // TODO decide if this should return an error, or be impossible to fail let frame = self.catalog.to_string().expect("invalid catalog"); + + // Log the catalog JSON to verify container field is included + if let Some(video) = &self.catalog.video { + for (name, config) in &video.renditions { + tracing::info!(track = name, container = ?config.container, "publishing catalog with container"); + } + } + if let Some(audio) = &self.catalog.audio { + for (name, config) in &audio.renditions { + tracing::info!(track = name, container = ?config.container, "publishing catalog with container"); + } + } + + // Log the full catalog JSON to debug serialization + tracing::debug!(catalog_json = %frame, "publishing catalog JSON"); + group.write_frame(frame); group.close(); } diff --git a/rs/hang/src/catalog/video/mod.rs b/rs/hang/src/catalog/video/mod.rs index 1670fabb3..af6c1a627 100644 --- a/rs/hang/src/catalog/video/mod.rs +++ b/rs/hang/src/catalog/video/mod.rs @@ -114,6 +114,5 @@ pub struct VideoConfig { /// Container format for frame encoding. /// Defaults to "legacy" for backward compatibility. - #[serde(default)] pub container: Container, } diff --git a/rs/hang/src/import/fmp4.rs b/rs/hang/src/import/fmp4.rs index 0f13849c6..66f38885a 100644 --- a/rs/hang/src/import/fmp4.rs +++ b/rs/hang/src/import/fmp4.rs @@ -51,6 +51,16 @@ pub struct Fmp4 { /// When passthrough_mode is enabled, store raw bytes of moof moof_bytes: Option, + + /// When passthrough_mode is enabled, store raw bytes of ftyp (file type box) + ftyp_bytes: Option, + + /// When passthrough_mode is enabled, store raw bytes of moov (init segment) + moov_bytes: Option, + + /// When passthrough_mode is enabled, store a copy of init segment (ftyp+moov) to send with each keyframe + /// This ensures new subscribers can receive the init segment even if group 0 is not available + init_segment_bytes_for_keyframes: Option, } impl Fmp4 { @@ -70,6 +80,9 @@ impl Fmp4 { moof_size: 0, passthrough_mode: false, moof_bytes: None, + ftyp_bytes: None, + moov_bytes: None, + init_segment_bytes_for_keyframes: None, } } @@ -101,9 +114,35 @@ impl Fmp4 { match atom { Any::Ftyp(_) | Any::Styp(_) => { - // Skip + // If passthrough mode, capture raw bytes of ftyp (file type box) + if self.passthrough_mode { + if let Some(ref bytes) = available_bytes { + if bytes_offset + size <= bytes.len() { + self.ftyp_bytes = Some(bytes.slice(bytes_offset..bytes_offset + size)); + tracing::debug!(ftyp_size = size, bytes_offset, "captured ftyp bytes for init segment"); + } else { + tracing::warn!(bytes_offset, size, available_len = bytes.len(), "ftyp bytes out of range"); + } + } else { + tracing::warn!("passthrough mode but available_bytes is None when processing ftyp"); + } + } + // Skip ftyp/styp atoms in normal processing } Any::Moov(moov) => { + // If passthrough mode, capture raw bytes of moov (init segment) + if self.passthrough_mode { + if let Some(ref bytes) = available_bytes { + if bytes_offset + size <= bytes.len() { + self.moov_bytes = Some(bytes.slice(bytes_offset..bytes_offset + size)); + tracing::debug!(moov_size = size, bytes_offset, "captured moov bytes for init segment"); + } else { + tracing::warn!(bytes_offset, size, available_len = bytes.len(), "moov bytes out of range"); + } + } else { + tracing::warn!("passthrough mode but available_bytes is None when processing moov"); + } + } // Create the broadcast. self.init(moov)?; } @@ -146,7 +185,9 @@ impl Fmp4 { fragment_bytes.extend_from_slice(&mdat_bytes); let fragment = fragment_bytes.freeze(); + tracing::info!(moof_size = moof_bytes.len(), mdat_size = mdat_bytes.len(), total_fragment_size = fragment.len(), "processing CMAF fragment (moof+mdat)"); self.transport_fragment(fragment, moof)?; + tracing::info!("finished processing CMAF fragment, ready for next fragment"); } else { // Extract the samples from the mdat atom (existing behavior) let header_size = size - mdat.data.len(); @@ -154,8 +195,9 @@ impl Fmp4 { } } _ => { - // Skip unknown atoms - tracing::warn!(?atom, "skipping") + // Skip unknown atoms (e.g., sidx, which is optional and used for segment indexing) + // These are safe to ignore and don't affect playback + tracing::debug!(?atom, "skipping optional atom") } } @@ -174,6 +216,7 @@ impl Fmp4 { fn init(&mut self, moov: Moov) -> anyhow::Result<()> { let passthrough_mode = self.passthrough_mode; + tracing::info!(passthrough_mode, "initializing fMP4 with passthrough mode"); let mut catalog = self.catalog.lock(); for trak in &moov.trak { @@ -183,6 +226,7 @@ impl Fmp4 { let track = match handler.as_ref() { b"vide" => { let config = Self::init_video_static(trak, passthrough_mode)?; + tracing::info!(container = ?config.container, "created video config with container"); let track = moq::Track { name: self.broadcast.track_name("video"), @@ -200,6 +244,7 @@ impl Fmp4 { } b"soun" => { let config = Self::init_audio_static(trak, passthrough_mode)?; + tracing::info!(container = ?config.container, "created audio config with container"); let track = moq::Track { name: self.broadcast.track_name("audio"), @@ -224,6 +269,52 @@ impl Fmp4 { self.moov = Some(moov); + // In passthrough mode, send the init segment (ftyp+moov) as a special frame + // This must be sent before any fragments for MSE to work + // NOTE: We send this AFTER creating tracks so that the tracks exist + // when we try to write to them. The init segment will create the first + // group (sequence 0), and fragments will create subsequent groups. + if passthrough_mode { + if let Some(moov_bytes) = self.moov_bytes.take() { + let timestamp = hang::Timestamp::from_micros(0)?; + + // Build init segment: ftyp (if available) + moov + let mut init_segment = BytesMut::new(); + if let Some(ref ftyp_bytes) = self.ftyp_bytes { + init_segment.extend_from_slice(ftyp_bytes); + tracing::debug!(ftyp_size = ftyp_bytes.len(), "including ftyp in init segment"); + } + init_segment.extend_from_slice(&moov_bytes); + let init_segment_bytes = init_segment.freeze(); + + tracing::info!(tracks = self.tracks.len(), init_segment_size = init_segment_bytes.len(), ftyp_included = self.ftyp_bytes.is_some(), "sending init segment to all tracks"); + + // Verify moov atom signature + let moov_offset = self.ftyp_bytes.as_ref().map(|f| f.len()).unwrap_or(0); + if moov_offset + 8 <= init_segment_bytes.len() { + let atom_type = String::from_utf8_lossy(&init_segment_bytes[moov_offset + 4..moov_offset + 8]); + tracing::info!(atom_type = %atom_type, "verifying moov atom signature in init segment"); + } + + // Store a copy for sending with keyframes + self.init_segment_bytes_for_keyframes = Some(init_segment_bytes.clone()); + + // Send init segment to all tracks - this creates the first group (sequence 0) + for (_track_id, track) in &mut self.tracks { + let frame = hang::Frame { + timestamp, + keyframe: true, // Init segment is always a keyframe - this creates a new group + payload: init_segment_bytes.clone().into(), + }; + track.write(frame)?; + tracing::debug!(track_id = ?_track_id, timestamp = ?timestamp, "wrote init segment frame to track"); + } + tracing::info!("init segment (ftyp+moov) sent to all tracks - should create groups with sequence 0"); + } else { + tracing::warn!("passthrough mode enabled but moov_bytes is None - init segment will not be sent"); + } + } + Ok(()) } @@ -562,7 +653,21 @@ impl Fmp4 { // Transport a complete CMAF fragment (moof+mdat) directly without decomposing. fn transport_fragment(&mut self, fragment: Bytes, moof: Moof) -> anyhow::Result<()> { - let moov = self.moov.as_ref().context("missing moov box")?; + // Verify that init segment was sent before fragments + if self.moov_bytes.is_some() { + tracing::warn!("transporting fragment but moov_bytes is still set - init segment may not have been sent"); + } + + // Verify fragment starts with moof atom + if fragment.len() >= 8 { + let atom_type = String::from_utf8_lossy(&fragment[4..8]); + tracing::info!(atom_type = %atom_type, fragment_size = fragment.len(), passthrough_mode = self.passthrough_mode, "transporting fragment"); + } + + // Ensure moov is available (init segment must be processed first) + let moov = self.moov.as_ref().ok_or_else(|| { + anyhow::anyhow!("missing moov box - init segment must be processed before fragments. Make sure ensure_init_segment() is called first.") + })?; // Loop over all of the traf boxes in the moof. for traf in &moof.traf { @@ -585,7 +690,7 @@ impl Fmp4 { let timestamp = hang::Timestamp::from_micros(micros)?; // Determine keyframe status (reuse logic from extract()) - let keyframe = if trak.mdia.hdlr.handler == b"vide".into() { + let is_keyframe = if trak.mdia.hdlr.handler == b"vide".into() { // For video, check sample flags in trun entries let mut is_keyframe = false; if let Some(trun) = traf.trun.first() { @@ -616,18 +721,55 @@ impl Fmp4 { } }; - if keyframe { + if is_keyframe { self.last_keyframe.insert(track_id, timestamp); } - // Create frame with entire fragment as payload - let frame = hang::Frame { - timestamp, - keyframe, - payload: fragment.clone().into(), - }; - - track.write(frame)?; + // In passthrough mode, create new groups periodically (every keyframe) to allow + // new subscribers to join at the most recent point. Each group starts with init segment. + // This makes it behave like a live stream where new subscribers start from recent content. + if self.passthrough_mode { + // For keyframes, send init segment to create a new group (every keyframe creates a new group) + // This allows new subscribers to receive the init segment and start from recent content + if is_keyframe { + tracing::info!(track_id, timestamp = ?timestamp, fragment_size = fragment.len(), "KEYFRAME DETECTED - creating new group"); + if let Some(ref init_segment_bytes) = self.init_segment_bytes_for_keyframes { + let init_frame = hang::Frame { + timestamp, + keyframe: true, // Send as keyframe to create a new group + payload: init_segment_bytes.clone().into(), + }; + track.write(init_frame)?; + tracing::info!(track_id, timestamp = ?timestamp, init_segment_size = init_segment_bytes.len(), "sent init segment as first frame of new group (keyframe) for live stream"); + } else { + tracing::warn!(track_id, "is_keyframe=true but init_segment_bytes_for_keyframes is None"); + } + } else { + tracing::debug!(track_id, timestamp = ?timestamp, fragment_size = fragment.len(), "non-keyframe fragment in passthrough mode"); + } + + // Send fragment as non-keyframe (in same group as init segment if keyframe, or current group if not) + let frame = hang::Frame { + timestamp, + keyframe: false, // Send as non-keyframe so it goes in the same group as init segment (if keyframe) or current group + payload: fragment.clone().into(), + }; + track.write(frame)?; + if is_keyframe { + tracing::info!(track_id, timestamp = ?timestamp, fragment_size = fragment.len(), "sent keyframe fragment in passthrough mode (new group created)"); + } else { + tracing::debug!(track_id, timestamp = ?timestamp, fragment_size = fragment.len(), "sent non-keyframe fragment in passthrough mode"); + } + } else { + // For non-passthrough mode, just write the frame normally + let frame = hang::Frame { + timestamp, + keyframe: is_keyframe, + payload: fragment.clone().into(), + }; + track.write(frame)?; + tracing::info!(track_id, timestamp = ?timestamp, fragment_size = fragment.len(), is_keyframe = is_keyframe, "sent fragment (non-passthrough mode)"); + } } Ok(()) diff --git a/rs/hang/src/import/hls.rs b/rs/hang/src/import/hls.rs index 8b1ed480f..6b03e3665 100644 --- a/rs/hang/src/import/hls.rs +++ b/rs/hang/src/import/hls.rs @@ -163,9 +163,10 @@ impl Hls { let outcome = self.step().await?; let delay = self.refresh_delay(outcome.target_duration, outcome.wrote_segments); - debug!( - wrote = outcome.wrote_segments, - delay = ?delay, + info!( + wrote_segments = outcome.wrote_segments, + target_duration = ?outcome.target_duration, + delay_secs = delay.as_secs_f32(), "HLS ingest step complete" ); @@ -178,6 +179,7 @@ impl Hls { self.ensure_tracks().await?; let mut buffered = 0usize; + const MAX_INIT_SEGMENTS: usize = 3; // Only process a few segments during init to avoid getting ahead of live stream // Prime all discovered video variants. // @@ -187,7 +189,7 @@ impl Hls { for (index, mut track) in video_tracks.into_iter().enumerate() { let playlist = self.fetch_media_playlist(track.playlist.clone()).await?; let count = self - .consume_segments(TrackKind::Video(index), &mut track, &playlist) + .consume_segments_limited(TrackKind::Video(index), &mut track, &playlist, MAX_INIT_SEGMENTS) .await?; buffered += count; self.video.push(track); @@ -196,7 +198,7 @@ impl Hls { // Prime the shared audio track, if any. if let Some(mut track) = self.audio.take() { let playlist = self.fetch_media_playlist(track.playlist.clone()).await?; - let count = self.consume_segments(TrackKind::Audio, &mut track, &playlist).await?; + let count = self.consume_segments_limited(TrackKind::Audio, &mut track, &playlist, MAX_INIT_SEGMENTS).await?; buffered += count; self.audio = Some(track); } @@ -315,6 +317,49 @@ impl Hls { Ok(()) } + async fn consume_segments_limited( + &mut self, + kind: TrackKind, + track: &mut TrackState, + playlist: &MediaPlaylist, + max_segments: usize, + ) -> anyhow::Result { + // Calculate segments to process + let next_seq = track.next_sequence.unwrap_or(0); + let playlist_seq = playlist.media_sequence; + let total_segments = playlist.segments.len(); + + let last_playlist_seq = playlist_seq + total_segments as u64; + + let skip = if next_seq > last_playlist_seq { + total_segments + } else if next_seq < playlist_seq { + track.next_sequence = None; + 0 + } else { + (next_seq - playlist_seq) as usize + }; + + let available = if skip < total_segments { + total_segments - skip + } else { + 0 + }; + + // Limit how many segments we process + let to_process = available.min(max_segments); + + if to_process > 0 { + let base_seq = playlist_seq + skip as u64; + for (i, segment) in playlist.segments[skip..skip+to_process].iter().enumerate() { + self.push_segment(kind, track, segment, base_seq + i as u64).await?; + } + info!(?kind, processed = to_process, available = available, "processed limited segments during init"); + } + + Ok(to_process) + } + async fn consume_segments( &mut self, kind: TrackKind, @@ -323,19 +368,67 @@ impl Hls { ) -> anyhow::Result { self.ensure_init_segment(kind, track, playlist).await?; - // Skip segments we've already seen - let skip = track.next_sequence.unwrap_or(0).saturating_sub(playlist.media_sequence) as usize; - let base_seq = playlist.media_sequence + skip as u64; - for (i, segment) in playlist.segments[skip..].iter().enumerate() { - self.push_segment(kind, track, segment, base_seq + i as u64).await?; - } - let consumed = playlist.segments.len() - skip; - - if consumed == 0 { + // Calculate how many segments to skip (already processed) + let next_seq = track.next_sequence.unwrap_or(0); + let playlist_seq = playlist.media_sequence; + let total_segments = playlist.segments.len(); + + // Calculate the last sequence number in the playlist + let last_playlist_seq = playlist_seq + total_segments as u64; + + // If we've already processed beyond what's in the playlist, wait for new segments + let skip = if next_seq > last_playlist_seq { + // We're ahead of the playlist - wait for ffmpeg to generate more segments + warn!( + ?kind, + next_sequence = next_seq, + playlist_sequence = playlist_seq, + last_playlist_sequence = last_playlist_seq, + "imported ahead of playlist, waiting for new segments" + ); + total_segments // Skip all segments in playlist + } else if next_seq < playlist_seq { + // We're behind - reset and start from the beginning of the playlist + warn!( + ?kind, + next_sequence = next_seq, + playlist_sequence = playlist_seq, + "next_sequence behind playlist, resetting to start of playlist" + ); + track.next_sequence = None; + 0 + } else { + // Normal case: next_seq is within playlist range + (next_seq - playlist_seq) as usize + }; + + let fresh_segments = if skip < total_segments { + total_segments - skip + } else { + 0 + }; + + info!( + ?kind, + playlist_sequence = playlist_seq, + next_sequence = next_seq, + skip = skip, + total_segments = total_segments, + fresh_segments = fresh_segments, + "consuming HLS segments" + ); + + if fresh_segments > 0 { + let base_seq = playlist_seq + skip as u64; + for (i, segment) in playlist.segments[skip..].iter().enumerate() { + self.push_segment(kind, track, segment, base_seq + i as u64).await?; + } + info!(?kind, consumed = fresh_segments, "consumed HLS segments"); + } else { debug!(?kind, "no fresh HLS segments available"); } - Ok(consumed) + Ok(fresh_segments) } async fn ensure_init_segment( @@ -382,11 +475,28 @@ impl Hls { let url = resolve_uri(&track.playlist, &segment.uri)?; let mut bytes = self.fetch_bytes(url).await?; + // Ensure the importer is initialized before processing fragments + // Use track.init_ready to avoid borrowing issues + if !track.init_ready { + // Try to ensure init segment is processed + let playlist = self.fetch_media_playlist(track.playlist.clone()).await?; + self.ensure_init_segment(kind, track, &playlist).await?; + } + + // Get importer after ensuring init segment let importer = match kind { TrackKind::Video(index) => self.ensure_video_importer_for(index), TrackKind::Audio => self.ensure_audio_importer(), }; + // Final check after ensuring init segment + if !importer.is_initialized() { + return Err(anyhow::anyhow!( + "importer not initialized for {:?} after ensure_init_segment - init segment processing failed", + kind + )); + } + importer.decode(&mut bytes).context("failed to parse media segment")?; track.next_sequence = Some(sequence + 1); diff --git a/rs/hang/src/model/track.rs b/rs/hang/src/model/track.rs index cd092571a..9c797b4fd 100644 --- a/rs/hang/src/model/track.rs +++ b/rs/hang/src/model/track.rs @@ -23,6 +23,9 @@ pub struct TrackProducer { pub inner: moq_lite::TrackProducer, group: Option, keyframe: Option, + /// Track if the current group is the init segment group (timestamp 0) + /// We keep this group open so new subscribers can receive the init segment + is_init_segment_group: bool, } impl TrackProducer { @@ -32,6 +35,7 @@ impl TrackProducer { inner, group: None, keyframe: None, + is_init_segment_group: false, } } @@ -52,7 +56,16 @@ impl TrackProducer { if frame.keyframe { if let Some(group) = self.group.take() { - group.close(); + // Don't close the init segment group - keep it open for new subscribers + if self.is_init_segment_group { + tracing::debug!("keeping init segment group open for new subscribers"); + // Don't close it, just drop it (the group remains open) + drop(group); + } else { + tracing::info!(timestamp = ?frame.timestamp, "closing group and creating new one for keyframe"); + group.close(); + } + self.is_init_segment_group = false; } // Make sure this frame's timestamp doesn't go backwards relative to the last keyframe. @@ -68,7 +81,18 @@ impl TrackProducer { let mut group = match self.group.take() { Some(group) => group, - None if frame.keyframe => self.inner.append_group(), + None if frame.keyframe => { + let new_group = self.inner.append_group(); + // Log when creating a new group, especially for init segment (timestamp 0) + if frame.timestamp.as_micros() == 0 { + tracing::info!(timestamp = ?frame.timestamp, "creating new group for init segment (timestamp 0)"); + // Mark this as the init segment group so we can keep it open + self.is_init_segment_group = true; + } else { + tracing::info!(timestamp = ?frame.timestamp, "creating new group for keyframe"); + } + new_group + }, // The first frame must be a keyframe. None => return Err(Error::MissingKeyframe), }; From 30139cd8ae399b76c0f9053e4e3ee780cf6dd026 Mon Sep 17 00:00:00 2001 From: Juan Pablo Bustamante Date: Tue, 6 Jan 2026 14:13:34 -0300 Subject: [PATCH 03/16] adjusting logs and coments --- js/hang/src/frame.ts | 68 ++---------- js/hang/src/watch/audio/emitter.ts | 8 -- js/hang/src/watch/audio/source-mse.ts | 99 ++---------------- js/hang/src/watch/audio/source.ts | 30 +++--- js/hang/src/watch/video/source-mse.ts | 144 ++------------------------ js/hang/src/watch/video/source.ts | 28 +++-- 6 files changed, 60 insertions(+), 317 deletions(-) diff --git a/js/hang/src/frame.ts b/js/hang/src/frame.ts index 17b024c0c..b0581e656 100644 --- a/js/hang/src/frame.ts +++ b/js/hang/src/frame.ts @@ -112,57 +112,20 @@ export class Consumer { async #run() { // Start fetching groups in the background - // For live streams (fmp4), start from the first group we receive (which should be the most recent available) - // The init segment will be detected from the first frame of the active group for (;;) { - console.log(`[Frame.Consumer] Waiting for next group, current active=${this.#active ?? 'undefined'}, totalGroups=${this.#groups.length}`); const consumer = await this.#track.nextGroup(); if (!consumer) { - console.log(`[Frame.Consumer] No more groups available (nextGroup returned null), breaking`); break; } - console.log(`[Frame.Consumer] Received group: sequence=${consumer.sequence}, active=${this.#active ?? 'undefined'}, container=${this.#container ?? 'undefined'}, totalGroups=${this.#groups.length}`); - - // For fmp4 container (live streams), start from the first group we receive - // This should be the most recent group available when we subscribe - if (this.#container === "fmp4") { - if (this.#active === undefined) { - // First group - start from here (this is a live stream, so start from the most recent available) - this.#active = consumer.sequence; - console.log(`[Frame.Consumer] Starting from first received group (live stream): sequence=${consumer.sequence}, setting active=${this.#active}`); - } else if (consumer.sequence < this.#active) { - // Skip old groups (but accept groups equal to or greater than active) - console.log(`[Frame.Consumer] Skipping old group in live stream: sequence=${consumer.sequence} < active=${this.#active}`); - consumer.close(); - continue; - } else if (consumer.sequence === this.#active && this.#groups.some(g => g.consumer.sequence === consumer.sequence)) { - // Skip duplicate group (same sequence and already in groups) - console.log(`[Frame.Consumer] Skipping duplicate group in live stream: sequence=${consumer.sequence} == active=${this.#active} and already in groups`); - consumer.close(); - continue; - } else { - // New group or same sequence but not in groups yet - accept it and update active - if (consumer.sequence > this.#active) { - console.log(`[Frame.Consumer] New group in live stream: sequence=${consumer.sequence} > active=${this.#active}, accepting and updating active`); - this.#active = consumer.sequence; - } else { - console.log(`[Frame.Consumer] Accepting group with same sequence as active: sequence=${consumer.sequence} == active=${this.#active} (not in groups yet)`); - } - } - } else { - // For non-fmp4 containers, use standard logic - if (this.#active === undefined) { - this.#active = consumer.sequence; - console.log(`[Frame.Consumer] First group received: sequence=${consumer.sequence}, setting active=${this.#active}`); - } + if (this.#active === undefined) { + this.#active = consumer.sequence; + } - if (consumer.sequence < this.#active) { - console.warn(`[Frame.Consumer] Skipping old group: sequence=${consumer.sequence} < active=${this.#active}`); - consumer.close(); - continue; - } + if (consumer.sequence < this.#active) { + consumer.close(); + continue; } const group = { @@ -183,19 +146,13 @@ export class Consumer { async #runGroup(group: Group) { try { let keyframe = true; - let frameCount = 0; - - console.log(`[Frame.Consumer] Starting to read frames from group ${group.consumer.sequence}, active=${this.#active ?? 'undefined'}`); for (;;) { - console.log(`[Frame.Consumer] Calling readFrame() on group ${group.consumer.sequence}, frameCount=${frameCount}`); const next = await group.consumer.readFrame(); if (!next) { - console.log(`[Frame.Consumer] Group ${group.consumer.sequence} finished (readFrame returned null), read ${frameCount} frames total, active=${this.#active ?? 'undefined'}`); break; } - frameCount++; const { data, timestamp } = decode(next, this.#container); const frame = { data, @@ -204,8 +161,6 @@ export class Consumer { group: group.consumer.sequence, }; - console.log(`[Frame.Consumer] Read frame ${frameCount} from group ${group.consumer.sequence}: timestamp=${timestamp}, size=${data.byteLength}, keyframe=${keyframe}, active=${this.#active ?? 'undefined'}`); - keyframe = false; group.frames.push(frame); @@ -215,7 +170,6 @@ export class Consumer { } if (group.consumer.sequence === this.#active) { - console.log(`[Frame.Consumer] Notifying decoder that frame is available from active group ${group.consumer.sequence}`); this.#notify?.(); this.#notify = undefined; } else { @@ -228,9 +182,7 @@ export class Consumer { } finally { if (group.consumer.sequence === this.#active) { // Advance to the next group. - const oldActive = this.#active; this.#active += 1; - console.log(`[Frame.Consumer] Group ${oldActive} finished, advancing active to ${this.#active}, totalGroups=${this.#groups.length}`); this.#notify?.(); this.#notify = undefined; @@ -272,8 +224,6 @@ export class Consumer { if (this.#active !== undefined && first.consumer.sequence <= this.#active) { this.#groups.shift(); - console.warn(`skipping slow group: ${first.consumer.sequence} < ${this.#groups[0]?.consumer.sequence}`); - first.consumer.close(); first.frames.length = 0; } @@ -296,13 +246,11 @@ export class Consumer { ) { const frame = this.#groups[0].frames.shift(); if (frame) { - console.log(`[Frame.Consumer] Returning frame from group ${this.#groups[0].consumer.sequence}, remaining frames in group=${this.#groups[0].frames.length}, active=${this.#active}`); return frame; } // Check if the group is done and then remove it. if (this.#active > this.#groups[0].consumer.sequence) { - console.log(`[Frame.Consumer] Group ${this.#groups[0].consumer.sequence} is done (active=${this.#active}), removing from groups`); this.#groups.shift(); continue; } @@ -312,19 +260,15 @@ export class Consumer { throw new Error("multiple calls to decode not supported"); } - console.log(`[Frame.Consumer] No frames available, waiting for notify. active=${this.#active ?? 'undefined'}, groups=${this.#groups.length}, groupSequences=[${this.#groups.map(g => g.consumer.sequence).join(', ')}]`); - const wait = new Promise((resolve) => { this.#notify = resolve; }).then(() => { - console.log(`[Frame.Consumer] Notified, checking for frames again. active=${this.#active ?? 'undefined'}, groups=${this.#groups.length}`); return true; }); if (!(await Promise.race([wait, this.#signals.closed]))) { this.#notify = undefined; // Consumer was closed while waiting for a new frame. - console.log(`[Frame.Consumer] Consumer closed while waiting`); return undefined; } } diff --git a/js/hang/src/watch/audio/emitter.ts b/js/hang/src/watch/audio/emitter.ts index 464aabf18..6caa825ca 100644 --- a/js/hang/src/watch/audio/emitter.ts +++ b/js/hang/src/watch/audio/emitter.ts @@ -49,7 +49,6 @@ export class Emitter { const paused = effect.get(this.paused); const muted = effect.get(this.muted); const enabled = !paused && !muted; - console.log(`[Audio Emitter] Setting source.enabled=${enabled} (paused=${paused}, muted=${muted})`); this.source.enabled.set(enabled); }); @@ -63,28 +62,21 @@ export class Emitter { this.#signals.effect((effect) => { const mseAudio = effect.get(this.source.mseAudioElement); if (mseAudio) { - console.log("[Audio Emitter] MSE audio element found, setting up volume/mute/pause control"); // MSE path: control HTMLAudioElement directly effect.effect(() => { const volume = effect.get(this.volume); const muted = effect.get(this.muted); const paused = effect.get(this.paused); - console.log(`[Audio Emitter] Setting volume=${volume}, muted=${muted}, paused=${paused}`); mseAudio.volume = volume; mseAudio.muted = muted; // Control play/pause state if (paused && !mseAudio.paused) { - console.log("[Audio Emitter] Pausing audio element (paused=true)"); mseAudio.pause(); } else if (!paused && mseAudio.paused) { // Resume if paused - try to play even if readyState is low const tryPlay = () => { if (!paused && mseAudio.paused) { - console.log("[Audio Emitter] Resuming audio element (paused=false)", { - readyState: mseAudio.readyState, - buffered: mseAudio.buffered.length > 0 ? `${mseAudio.buffered.length} ranges` : "no ranges", - }); mseAudio.play().catch(err => console.error("[Audio Emitter] Failed to resume audio:", err)); } }; diff --git a/js/hang/src/watch/audio/source-mse.ts b/js/hang/src/watch/audio/source-mse.ts index 32acb9f5c..d32faac1a 100644 --- a/js/hang/src/watch/audio/source-mse.ts +++ b/js/hang/src/watch/audio/source-mse.ts @@ -23,10 +23,8 @@ export class SourceMSE { #audioElement = new Signal(undefined); readonly audioElement = this.#audioElement as Getter; - // Cola de fragmentos esperando ser añadidos - // Límite máximo para evitar crecimiento infinito en live streaming #appendQueue: Uint8Array[] = []; - static readonly MAX_QUEUE_SIZE = 10; // Máximo de fragmentos en cola + static readonly MAX_QUEUE_SIZE = 10; #stats = new Signal(undefined); readonly stats = this.#stats; @@ -40,40 +38,24 @@ export class SourceMSE { } async initialize(config: Catalog.AudioConfig): Promise { - // Build MIME type from codec const mimeType = Mime.buildAudioMimeType(config); if (!mimeType) { throw new Error(`Unsupported codec for MSE: ${config.codec}`); } - // Create hidden audio element this.#audio = document.createElement("audio"); this.#audio.style.display = "none"; this.#audio.muted = false; // Allow audio playback this.#audio.volume = 1.0; // Set initial volume to 1.0 document.body.appendChild(this.#audio); - console.log("[MSE Audio] Audio element created:", { - muted: this.#audio.muted, - volume: this.#audio.volume, - readyState: this.#audio.readyState, - }); - - // Don't auto-play here - let Emitter control play/pause state - // The initial play() call is handled in runTrack() after data is buffered - - // Expose audio element via Signal for Emitter to control volume/mute this.#audioElement.set(this.#audio); - // Create MediaSource this.#mediaSource = new MediaSource(); const url = URL.createObjectURL(this.#mediaSource); this.#audio.src = url; - - // Set initial time to 0 to ensure playback starts from the beginning this.#audio.currentTime = 0; - // Wait for sourceopen event await new Promise((resolve, reject) => { const timeout = setTimeout(() => { reject(new Error("MediaSource sourceopen timeout")); @@ -84,7 +66,6 @@ export class SourceMSE { () => { clearTimeout(timeout); try { - // Create SourceBuffer this.#sourceBuffer = this.#mediaSource!.addSourceBuffer(mimeType); this.#setupSourceBuffer(); resolve(); @@ -105,10 +86,9 @@ export class SourceMSE { #setupSourceBuffer(): void { if (!this.#sourceBuffer) return; - // Procesar la cola cuando termine la operación actual this.#sourceBuffer.addEventListener("updateend", () => { this.#processAppendQueue(); - // Don't auto-resume here - let Emitter control play/pause state + }); this.#sourceBuffer.addEventListener("error", (e) => { @@ -121,19 +101,15 @@ export class SourceMSE { throw new Error("SourceBuffer not initialized"); } - // Si la cola está llena, descartar el fragmento más antiguo (FIFO) - // Esto mantiene baja la latencia en live streaming + if (this.#appendQueue.length >= SourceMSE.MAX_QUEUE_SIZE) { const discarded = this.#appendQueue.shift(); console.warn(`[MSE Audio] Queue full (${SourceMSE.MAX_QUEUE_SIZE}), discarding oldest fragment (${discarded?.byteLength ?? 0} bytes)`); } - // Añadir a la cola en lugar de esperar - // Crear una copia con ArrayBuffer real (no SharedArrayBuffer) const copy = new Uint8Array(fragment); this.#appendQueue.push(copy); - // Intentar procesar inmediatamente si está disponible this.#processAppendQueue(); } @@ -141,11 +117,8 @@ export class SourceMSE { if (fragments.length === 1) { return fragments[0]; } - - // Calculate total size + const totalSize = fragments.reduce((sum, frag) => sum + frag.byteLength, 0); - - // Concatenate all fragments into a single Uint8Array const result = new Uint8Array(totalSize); let offset = 0; for (const fragment of fragments) { @@ -172,13 +145,11 @@ export class SourceMSE { // appendBuffer accepts BufferSource (ArrayBuffer or ArrayBufferView) this.#sourceBuffer.appendBuffer(fragment as BufferSource); - // Update stats this.#stats.update((current) => ({ bytesReceived: (current?.bytesReceived ?? 0) + fragment.byteLength, })); } catch (error) { console.error("[MSE Audio] Error appending fragment:", error); - // No reintentamos - el fragmento se descarta } } @@ -188,28 +159,21 @@ export class SourceMSE { name: string, config: Catalog.AudioConfig, ): Promise { - // Initialize MSE await this.initialize(config); const catalog = { priority: 128 }; // TODO: Get from actual catalog const sub = broadcast.subscribe(name, catalog.priority); effect.cleanup(() => sub.close()); - // Create consumer for CMAF fragments const consumer = new Frame.Consumer(sub, { latency: this.latency, container: "fmp4", // CMAF fragments }); effect.cleanup(() => consumer.close()); - console.log("[MSE Audio] Consumer created, waiting for frames..."); - - // Initial play attempt when we have data buffered - // After this, Emitter controls play/pause state effect.spawn(async () => { if (!this.#audio) return; - // Wait for some data to be buffered, then attempt to play await new Promise((resolve) => { let checkCount = 0; const maxChecks = 100; // 10 seconds max wait @@ -221,23 +185,15 @@ export class SourceMSE { const bufferedRanges = this.#sourceBuffer.buffered; const audioBuffered = this.#audio.buffered; const hasBufferedData = bufferedRanges.length > 0; - const bufferedInfo = hasBufferedData - ? `${bufferedRanges.length} ranges, last: ${bufferedRanges.start(bufferedRanges.length - 1).toFixed(2)}-${bufferedRanges.end(bufferedRanges.length - 1).toFixed(2)}` - : "no ranges"; - console.log(`[MSE Audio] Audio readyState: ${this.#audio.readyState}, buffered: ${bufferedInfo}, checkCount: ${checkCount}`); - // Check if currentTime is within buffered range if (hasBufferedData && audioBuffered && audioBuffered.length > 0 && !hasSeeked) { const currentTime = this.#audio.currentTime; const isTimeBuffered = audioBuffered.start(0) <= currentTime && currentTime < audioBuffered.end(audioBuffered.length - 1); - // If we have buffered data but current time is not in range, seek immediately if (!isTimeBuffered) { const seekTime = audioBuffered.start(0); - console.log(`[MSE Audio] Seeking to buffered start time: ${seekTime.toFixed(3)} (currentTime=${currentTime.toFixed(3)})`); this.#audio.currentTime = seekTime; hasSeeked = true; - // Continue checking after seek setTimeout(checkReady, 100); return; } @@ -246,20 +202,10 @@ export class SourceMSE { // Try to play if we have buffered data, even if readyState is low // The browser will start playing when it's ready if (hasBufferedData && this.#audio.readyState >= HTMLMediaElement.HAVE_METADATA) { - console.log("[MSE Audio] Audio has buffered data, attempting initial play...", { - readyState: this.#audio.readyState, - muted: this.#audio.muted, - volume: this.#audio.volume, - paused: this.#audio.paused, - hasBufferedData, - currentTime: this.#audio.currentTime, - }); this.#audio.play().then(() => { - console.log("[MSE Audio] Audio play() succeeded (initial)"); resolve(); }).catch((error) => { console.error("[MSE Audio] Audio play() failed (initial):", error); - // Continue checking, might succeed later if (checkCount < maxChecks) { setTimeout(checkReady, 200); } else { @@ -267,7 +213,6 @@ export class SourceMSE { } }); } else if (checkCount >= maxChecks) { - console.warn("[MSE Audio] Audio did not get buffered data after 10 seconds"); resolve(); } else { setTimeout(checkReady, 100); @@ -282,7 +227,6 @@ export class SourceMSE { }); }); - // Track if we've received the init segment (moov) let initSegmentReceived = false; // Helper function to detect moov atom in the buffer @@ -293,7 +237,7 @@ export class SourceMSE { const len = data.length; while (offset + 8 <= len) { - // tamaño del atom (big endian) + // Atom size (big endian) const size = (data[offset] << 24) | (data[offset + 1] << 16) | @@ -309,7 +253,7 @@ export class SourceMSE { if (type === "moov") return true; - // Evitar loops infinitos si el size viene roto + // Avoid infinite loops if size is broken if (size < 8) break; offset += size; } @@ -325,104 +269,78 @@ export class SourceMSE { let currentGroup: number | undefined = undefined; let groupFragments: Uint8Array[] = []; // Accumulate fragments for current group - console.log("[MSE Audio] Starting to read frames from consumer..."); - for (;;) { const frame = await Promise.race([consumer.decode(), effect.cancel]); if (!frame) { - // Append any remaining group fragments before finishing if (groupFragments.length > 0 && initSegmentReceived) { const groupData = this.#concatenateFragments(groupFragments); - console.log(`[MSE Audio] Appending final group (group ${currentGroup}): ${groupFragments.length} fragments, total size=${groupData.byteLength}`); await this.appendFragment(groupData); groupFragments = []; } - console.log(`[MSE Audio] No more frames, total frames processed: ${frameCount}`); break; } frameCount++; - console.log(`[MSE Audio] Received frame ${frameCount}: timestamp=${frame.timestamp}, size=${frame.data.byteLength}, group=${frame.group}, keyframe=${frame.keyframe}`); - // Check if this is the init segment (moov) const isMoovAtom = hasMoovAtom(frame.data); const isInitSegment = isMoovAtom && !initSegmentReceived; if (isInitSegment) { - // Append any pending group before processing init segment if (groupFragments.length > 0 && initSegmentReceived) { const groupData = this.#concatenateFragments(groupFragments); - console.log(`[MSE Audio] Appending group (group ${currentGroup}) before init segment: ${groupFragments.length} fragments`); await this.appendFragment(groupData); groupFragments = []; } - // This is the init segment (moov), append it first await this.appendFragment(frame.data); initSegmentReceived = true; - console.log("[MSE Audio] Init segment (moov) received and appended"); continue; } - // This is a regular fragment (moof+mdat) if (!initSegmentReceived) { - console.warn("[MSE Audio] Received fragment before init segment, skipping"); continue; } - // Check if we're starting a new group if (currentGroup !== undefined && frame.group !== currentGroup) { - // Append the complete group from previous group if (groupFragments.length > 0) { const groupData = this.#concatenateFragments(groupFragments); - console.log(`[MSE Audio] Appending complete group (group ${currentGroup}): ${groupFragments.length} fragments, total size=${groupData.byteLength}`); await this.appendFragment(groupData); groupFragments = []; } } - // If this is the first fragment of a new group, start accumulating if (currentGroup === undefined || frame.group !== currentGroup) { currentGroup = frame.group; groupFragments = []; } groupFragments.push(frame.data); - console.log(`[MSE Audio] Accumulating fragment for group (group ${frame.group}): timestamp=${frame.timestamp}, size=${frame.data.byteLength}, total fragments in group=${groupFragments.length}`); - // For live streaming: append immediately if we have at least one fragment - // This ensures we don't wait indefinitely for more fragments - // We'll still group by MOQ group, but append more aggressively if (groupFragments.length >= 1) { - // Append immediately - MSE can handle single fragments if they're complete + const groupData = this.#concatenateFragments(groupFragments); - console.log(`[MSE Audio] Appending group immediately (group ${currentGroup}): ${groupFragments.length} fragments, total size=${groupData.byteLength}`); await this.appendFragment(groupData); groupFragments = []; + } } }); } close(): void { - // Limpiar la cola this.#appendQueue = []; - // Clear audio element Signal this.#audioElement.set(undefined); - // Clean up SourceBuffer if (this.#sourceBuffer && this.#mediaSource) { try { if (this.#sourceBuffer.updating) { this.#sourceBuffer.abort(); } - // Don't call endOfStream() here - let it be called only once for MediaSource } catch (error) { console.error("Error closing SourceBuffer:", error); } } - // Clean up MediaSource if (this.#mediaSource) { try { if (this.#mediaSource.readyState === "open") { @@ -434,7 +352,6 @@ export class SourceMSE { } } - // Remove audio element if (this.#audio) { this.#audio.pause(); this.#audio.src = ""; diff --git a/js/hang/src/watch/audio/source.ts b/js/hang/src/watch/audio/source.ts index 7bd6a9373..8e277c2df 100644 --- a/js/hang/src/watch/audio/source.ts +++ b/js/hang/src/watch/audio/source.ts @@ -74,15 +74,10 @@ export class Source { this.#signals.effect((effect) => { const audio = effect.get(catalog)?.audio; this.catalog.set(audio); - + if (audio?.renditions) { const first = Object.entries(audio.renditions).at(0); if (first) { - console.log(`[Audio Source] Rendition ${first[0]} from catalog:`, { - codec: first[1].codec, - container: first[1].container, - hasContainer: "container" in first[1], - }); effect.set(this.active, first[0]); effect.set(this.config, first[1]); } @@ -107,7 +102,6 @@ export class Source { // Don't create worklet for MSE (fmp4) - browser handles playback directly // The worklet is only needed for WebCodecs path if (config.container === "fmp4") { - console.log("[Audio Source] Skipping worklet creation for MSE (fmp4) - browser handles playback directly"); return; } @@ -165,37 +159,30 @@ export class Source { #runDecoder(effect: Effect): void { const enabled = effect.get(this.enabled); - console.log(`[Audio Source] #runDecoder: enabled=${enabled}`); if (!enabled) { - console.log(`[Audio Source] #runDecoder: skipping because enabled=false`); return; } const catalog = effect.get(this.catalog); if (!catalog) { - console.log(`[Audio Source] #runDecoder: skipping because catalog is undefined`); return; } const broadcast = effect.get(this.broadcast); if (!broadcast) { - console.log(`[Audio Source] #runDecoder: skipping because broadcast is undefined`); return; } const config = effect.get(this.config); if (!config) { - console.log(`[Audio Source] #runDecoder: skipping because config is undefined`); return; } const active = effect.get(this.active); if (!active) { - console.log(`[Audio Source] #runDecoder: skipping because active is undefined`); return; } - console.log(`[Audio Source] #runDecoder: subscribing to track="${active}", container="${config.container}"`); // Route to MSE for CMAF, WebCodecs for legacy/raw if (config.container === "fmp4") { this.#runMSEPath(effect, broadcast, active, config, catalog); @@ -211,6 +198,13 @@ export class Source { config: Catalog.AudioConfig, catalog: Catalog.Audio, ): void { + console.log("[Audio Stream] Subscribing to track", { + name, + codec: config.codec, + container: config.container, + sampleRate: config.sampleRate, + channels: config.numberOfChannels, + }); // Import MSE source dynamically effect.spawn(async () => { const { SourceMSE } = await import("./source-mse.js"); @@ -248,12 +242,18 @@ export class Source { config: Catalog.AudioConfig, catalog: Catalog.Audio, ): void { + console.log("[Audio Stream] Subscribing to track", { + name, + codec: config.codec, + container: config.container, + sampleRate: config.sampleRate, + channels: config.numberOfChannels, + }); const sub = broadcast.subscribe(name, catalog.priority); effect.cleanup(() => sub.close()); // Create consumer with slightly less latency than the render worklet to avoid underflowing. // Container defaults to "legacy" via Zod schema for backward compatibility - console.log(`[Audio Subscriber] Using container format: ${config.container}`); const consumer = new Frame.Consumer(sub, { latency: Math.max(this.latency.peek() - JITTER_UNDERHEAD, 0) as Time.Milli, container: config.container, diff --git a/js/hang/src/watch/video/source-mse.ts b/js/hang/src/watch/video/source-mse.ts index cb4557950..df7defe91 100644 --- a/js/hang/src/watch/video/source-mse.ts +++ b/js/hang/src/watch/video/source-mse.ts @@ -31,10 +31,10 @@ export class SourceMSE { #mediaSource?: MediaSource; #sourceBuffer?: SourceBuffer; - // Cola de fragmentos esperando ser añadidos - // Límite máximo para evitar crecimiento infinito en live streaming + // Queue of fragments waiting to be added + // Maximum limit to prevent infinite growth in live streaming #appendQueue: Uint8Array[] = []; - static readonly MAX_QUEUE_SIZE = 10; // Máximo de fragmentos en cola + static readonly MAX_QUEUE_SIZE = 10; // Maximum fragments in queue // Expose the current frame to render as a signal frame = new Signal(undefined); @@ -61,100 +61,56 @@ export class SourceMSE { } async initialize(config: RequiredDecoderConfig): Promise { - // Build MIME type from codec const mimeType = Mime.buildVideoMimeType(config); if (!mimeType) { throw new Error(`Unsupported codec for MSE: ${config.codec}`); } - console.log(`[MSE] Initializing with MIME type: ${mimeType}, codec: ${config.codec}`); - // Create hidden video element this.#video = document.createElement("video"); this.#video.style.display = "none"; this.#video.playsInline = true; this.#video.muted = true; // Required for autoplay document.body.appendChild(this.#video); - // Listen for stalled event (when video runs out of data) + // Note: In live streaming, "waiting" events are common and normal as the video waits for new data this.#video.addEventListener("waiting", () => { - if (!this.#video) return; - const buffered = this.#sourceBuffer?.buffered; - const videoBuffered = this.#video.buffered; - const current = this.#video.currentTime; - const sourceBufferInfo = buffered && buffered.length > 0 - ? `${buffered.length} ranges, last: ${buffered.end(buffered.length - 1).toFixed(2)}s` - : "no ranges"; - const videoBufferedInfo = videoBuffered && videoBuffered.length > 0 - ? `${videoBuffered.length} ranges, last: ${videoBuffered.end(videoBuffered.length - 1).toFixed(2)}s` - : "no ranges"; - console.warn(`[MSE] Video waiting for data (stalled) at ${current.toFixed(2)}s, SourceBuffer: ${sourceBufferInfo}, Video: ${videoBufferedInfo}`); + // Silently handle - this is expected in live streaming }); - // Listen for ended event this.#video.addEventListener("ended", () => { if (!this.#video) return; - const buffered = this.#sourceBuffer?.buffered; const videoBuffered = this.#video.buffered; const current = this.#video.currentTime; - const sourceBufferInfo = buffered && buffered.length > 0 - ? `${buffered.length} ranges, last: ${buffered.end(buffered.length - 1).toFixed(2)}s` - : "no ranges"; - const videoBufferedInfo = videoBuffered && videoBuffered.length > 0 - ? `${videoBuffered.length} ranges, last: ${videoBuffered.end(videoBuffered.length - 1).toFixed(2)}s` - : "no ranges"; - console.warn(`[MSE] Video ended at ${current.toFixed(2)}s - SourceBuffer: ${sourceBufferInfo}, Video: ${videoBufferedInfo}`); - // For live streams, try to resume playback if we have buffered data + if (videoBuffered && videoBuffered.length > 0) { const lastRange = videoBuffered.length - 1; const end = videoBuffered.end(lastRange); if (current < end) { - console.warn(`[MSE] Video ended but has buffered data up to ${end.toFixed(2)}s, seeking to current time`); this.#video.currentTime = current; this.#video.play().catch(err => console.error("[MSE] Failed to resume after ended:", err)); } } }); - // Listen for timeupdate to monitor playback this.#video.addEventListener("timeupdate", () => { if (!this.#video) return; - const buffered = this.#sourceBuffer?.buffered; const videoBuffered = this.#video.buffered; const current = this.#video.currentTime; - // Check video buffered ranges (more accurate for playback) if (videoBuffered && videoBuffered.length > 0) { const lastRange = videoBuffered.length - 1; const end = videoBuffered.end(lastRange); const remaining = end - current; - // Log warning if we're getting close to the end of buffered data - if (remaining < 1.0 && remaining > 0) { - console.warn(`[MSE] Video approaching end of buffered data: ${remaining.toFixed(2)}s remaining (current: ${current.toFixed(2)}s, buffered up to: ${end.toFixed(2)}s)`); - } - // If we've reached the end and video is paused, try to resume if (remaining <= 0.1 && this.#video.paused) { - console.warn(`[MSE] Video reached end of buffered data, attempting to resume...`); this.#video.play().catch(err => console.error("[MSE] Failed to resume playback:", err)); } - } else if (buffered && buffered.length > 0) { - // SourceBuffer has data but video doesn't see it - this is a problem - const lastRange = buffered.length - 1; - const end = buffered.end(lastRange); - const remaining = end - current; - if (remaining < 1.0 && remaining > 0) { - console.warn(`[MSE] Video approaching end of SourceBuffer data (video doesn't see it): ${remaining.toFixed(2)}s remaining`); - } } }); - // Create MediaSource this.#mediaSource = new MediaSource(); const url = URL.createObjectURL(this.#mediaSource); this.#video.src = url; - - // Set initial time to 0 to ensure playback starts from the beginning this.#video.currentTime = 0; - // Wait for sourceopen event await new Promise((resolve, reject) => { const timeout = setTimeout(() => { reject(new Error("MediaSource sourceopen timeout")); @@ -165,7 +121,6 @@ export class SourceMSE { () => { clearTimeout(timeout); try { - // Create SourceBuffer this.#sourceBuffer = this.#mediaSource!.addSourceBuffer(mimeType); this.#setupSourceBuffer(); resolve(); @@ -182,28 +137,13 @@ export class SourceMSE { }); }); - // Start capturing frames from video element this.#startFrameCapture(); } #setupSourceBuffer(): void { if (!this.#sourceBuffer) return; - // Handle updateend events this.#sourceBuffer.addEventListener("updateend", () => { - // SourceBuffer is ready for more data - if (this.#sourceBuffer && this.#sourceBuffer.buffered.length > 0) { - const lastRange = this.#sourceBuffer.buffered.length - 1; - const start = this.#sourceBuffer.buffered.start(lastRange); - const end = this.#sourceBuffer.buffered.end(lastRange); - } else { - console.log("[MSE] SourceBuffer buffered: 0 ranges (no data buffered yet)"); - } - if (this.#video) { - console.log(`[MSE] Video readyState after updateend: ${this.#video.readyState} (HAVE_METADATA=${HTMLMediaElement.HAVE_METADATA}, HAVE_FUTURE_DATA=${HTMLMediaElement.HAVE_FUTURE_DATA})`); - } - - // Procesar la cola cuando termine la operación actual this.#processAppendQueue(); }); @@ -215,30 +155,25 @@ export class SourceMSE { #startFrameCapture(): void { if (!this.#video) return; - // Use requestVideoFrameCallback to capture frames const captureFrame = () => { if (!this.#video) return; try { - // Create VideoFrame from video element const frame = new VideoFrame(this.#video, { timestamp: this.#video.currentTime * 1_000_000, // Convert to microseconds }); - // Update stats this.#stats.update((current) => ({ frameCount: (current?.frameCount ?? 0) + 1, timestamp: frame.timestamp, bytesReceived: current?.bytesReceived ?? 0, })); - // Update frame signal this.frame.update((prev) => { prev?.close(); return frame; }); - // Update display size if (this.#video.videoWidth && this.#video.videoHeight) { this.display.set({ width: this.#video.videoWidth, @@ -246,7 +181,6 @@ export class SourceMSE { }); } - // Update buffer status if (this.#video.readyState >= HTMLMediaElement.HAVE_CURRENT_DATA) { this.bufferStatus.set({ state: "filled" }); } @@ -254,7 +188,6 @@ export class SourceMSE { console.error("Error capturing frame:", error); } - // Schedule next frame capture if (this.#video.requestVideoFrameCallback) { this.#frameCallbackId = this.#video.requestVideoFrameCallback(captureFrame); } else { @@ -263,7 +196,6 @@ export class SourceMSE { } }; - // Start capturing if (this.#video.requestVideoFrameCallback) { this.#frameCallbackId = this.#video.requestVideoFrameCallback(captureFrame); } else { @@ -275,20 +207,14 @@ export class SourceMSE { if (!this.#sourceBuffer || !this.#mediaSource) { throw new Error("SourceBuffer not initialized"); } - - // Si la cola está llena, descartar el fragmento más antiguo (FIFO) - // Esto mantiene baja la latencia en live streaming if (this.#appendQueue.length >= SourceMSE.MAX_QUEUE_SIZE) { const discarded = this.#appendQueue.shift(); console.warn(`[MSE] Queue full (${SourceMSE.MAX_QUEUE_SIZE}), discarding oldest fragment (${discarded?.byteLength ?? 0} bytes)`); } - // Añadir a la cola en lugar de esperar - // Crear una copia con ArrayBuffer real (no SharedArrayBuffer) const copy = new Uint8Array(fragment); this.#appendQueue.push(copy); - - // Intentar procesar inmediatamente si está disponible + this.#processAppendQueue(); } @@ -297,10 +223,7 @@ export class SourceMSE { return fragments[0]; } - // Calculate total size const totalSize = fragments.reduce((sum, frag) => sum + frag.byteLength, 0); - - // Concatenate all fragments into a single Uint8Array const result = new Uint8Array(totalSize); let offset = 0; for (const fragment of fragments) { @@ -327,7 +250,6 @@ export class SourceMSE { // appendBuffer accepts BufferSource (ArrayBuffer or ArrayBufferView) this.#sourceBuffer.appendBuffer(fragment as BufferSource); - // Update stats this.#stats.update((current) => ({ frameCount: current?.frameCount ?? 0, timestamp: current?.timestamp ?? 0, @@ -343,7 +265,6 @@ export class SourceMSE { readyState: this.#mediaSource.readyState, duration: this.#mediaSource.duration, }); - // No reintentamos - el fragmento se descarta } } @@ -353,25 +274,20 @@ export class SourceMSE { name: string, config: RequiredDecoderConfig, ): Promise { - // Initialize MSE await this.initialize(config); const sub = broadcast.subscribe(name, PRIORITY.video); effect.cleanup(() => sub.close()); - // Create consumer for CMAF fragments const consumer = new Frame.Consumer(sub, { latency: this.latency, container: "fmp4", // CMAF fragments }); effect.cleanup(() => consumer.close()); - - // Start playing video when we have enough data effect.spawn(async () => { if (!this.#video) return; - // Wait for some data to be buffered await new Promise((resolve) => { let checkCount = 0; const maxChecks = 100; // 10 seconds max wait @@ -380,34 +296,20 @@ export class SourceMSE { const checkReady = () => { checkCount++; if (this.#video) { - const bufferedRanges = this.#sourceBuffer?.buffered; const videoBuffered = this.#video.buffered; - const sourceBufferInfo = bufferedRanges && bufferedRanges.length > 0 - ? `${bufferedRanges.length} ranges, last: ${bufferedRanges.start(bufferedRanges.length - 1).toFixed(2)}-${bufferedRanges.end(bufferedRanges.length - 1).toFixed(2)}` - : "no ranges"; - const videoBufferedInfo = videoBuffered && videoBuffered.length > 0 - ? `${videoBuffered.length} ranges, last: ${videoBuffered.start(videoBuffered.length - 1).toFixed(2)}-${videoBuffered.end(videoBuffered.length - 1).toFixed(2)}` - : "no ranges"; - console.log(`[MSE] Video readyState: ${this.#video.readyState}, SourceBuffer buffered: ${sourceBufferInfo}, Video buffered: ${videoBufferedInfo}, checkCount: ${checkCount}`); - - // Check if we have buffered data and if the current time is within the buffered range - // Use video.buffered instead of sourceBuffer.buffered for checking if video can play const hasBufferedData = videoBuffered && videoBuffered.length > 0; const currentTime = this.#video.currentTime; const isTimeBuffered = hasBufferedData && videoBuffered.start(0) <= currentTime && currentTime < videoBuffered.end(videoBuffered.length - 1); - // If we have buffered data but current time is not in range, seek immediately if (hasBufferedData && !isTimeBuffered && !hasSeeked) { const seekTime = videoBuffered.start(0); this.#video.currentTime = seekTime; hasSeeked = true; - // Continue checking after seek setTimeout(checkReady, 100); return; } if (this.#video.readyState >= HTMLMediaElement.HAVE_FUTURE_DATA) { - console.log("[MSE] Video has enough data, attempting to play..."); this.#video.play().then(() => { resolve(); }).catch((error) => { @@ -416,12 +318,10 @@ export class SourceMSE { }); } else if (hasBufferedData && checkCount >= 10) { // If we have buffered data but readyState hasn't advanced, try playing anyway after 1 second - console.warn("[MSE] Video has buffered data but readyState hasn't advanced, attempting to play..."); this.#video.play().then(() => { resolve(); }).catch((error) => { console.error("[MSE] Video play() failed:", error); - // Continue checking if (checkCount < maxChecks) { setTimeout(checkReady, 100); } else { @@ -429,10 +329,9 @@ export class SourceMSE { } }); } else if (checkCount >= maxChecks) { - console.warn("[MSE] Video did not reach HAVE_FUTURE_DATA after 10 seconds, attempting to play anyway..."); this.#video.play().then(() => { resolve(); - }).catch((error) => { + }).catch(() => { resolve(); }); } else { @@ -456,7 +355,7 @@ export class SourceMSE { const len = data.length; while (offset + 8 <= len) { - // tamaño del atom (big endian) + // Atom size (big endian) const size = (data[offset] << 24) | (data[offset + 1] << 16) | @@ -473,7 +372,6 @@ export class SourceMSE { // Init segment contains either "ftyp" or "moov" atoms if (type === "ftyp" || type === "moov") return true; - // Evitar loops infinitos si el size viene roto if (size < 8 || size === 0) break; offset += size; } @@ -493,69 +391,50 @@ export class SourceMSE { for (;;) { const frame = await Promise.race([consumer.decode(), effect.cancel]); if (!frame) { - // Append any remaining GOP fragments before finishing if (gopFragments.length > 0 && initSegmentReceived) { const gopData = this.#concatenateFragments(gopFragments); await this.appendFragment(gopData); gopFragments = []; } - console.log(`[MSE] No more frames, total frames processed: ${frameCount}`); break; } frameCount++; - console.log(`[MSE] Received frame ${frameCount}: timestamp=${frame.timestamp}, size=${frame.data.byteLength}, group=${frame.group}, keyframe=${frame.keyframe}`); - // Check if this is the init segment (ftyp+moov or just moov) const containsInitSegmentData = isInitSegmentData(frame.data); const isInitSegment = containsInitSegmentData && !initSegmentReceived; if (isInitSegment) { - // Append any pending GOP before processing init segment if (gopFragments.length > 0 && initSegmentReceived) { const gopData = this.#concatenateFragments(gopFragments); await this.appendFragment(gopData); gopFragments = []; } - // This is the init segment (moov), append it first - console.log("[MSE] Appending init segment..."); await this.appendFragment(frame.data); initSegmentReceived = true; - console.log("[MSE] Init segment (moov) received and appended"); continue; } - // This is a regular fragment (moof+mdat) if (!initSegmentReceived) { - console.warn(`[MSE] Received fragment before init segment (timestamp=${frame.timestamp}), skipping`); continue; } - // Check if we're starting a new group (new GOP) if (currentGroup !== undefined && frame.group !== currentGroup) { - // Append the complete GOP from previous group if (gopFragments.length > 0) { const gopData = this.#concatenateFragments(gopFragments); - console.log(`[MSE] Appending complete GOP (group ${currentGroup}): ${gopFragments.length} fragments, total size=${gopData.byteLength}`); await this.appendFragment(gopData); gopFragments = []; } } - // If this is the first fragment of a new group, start accumulating if (currentGroup === undefined || frame.group !== currentGroup) { currentGroup = frame.group; gopFragments = []; } gopFragments.push(frame.data); - console.log(`[MSE] Accumulating fragment for GOP (group ${frame.group}): timestamp=${frame.timestamp}, size=${frame.data.byteLength}, total fragments in GOP=${gopFragments.length}`); - // For live streaming: append immediately if we have at least one fragment - // This ensures we don't wait indefinitely for more fragments - // We'll still group by MOQ group, but append more aggressively if (gopFragments.length >= 1) { - // Append immediately - MSE can handle single fragments if they're complete GOPs const gopData = this.#concatenateFragments(gopFragments); await this.appendFragment(gopData); gopFragments = []; @@ -567,7 +446,6 @@ export class SourceMSE { close(): void { this.#appendQueue = []; - // Cancel frame capture if (this.#frameCallbackId !== undefined) { if (this.#video?.requestVideoFrameCallback) { this.#video.cancelVideoFrameCallback(this.#frameCallbackId); @@ -576,13 +454,11 @@ export class SourceMSE { } } - // Close current frame this.frame.update((prev) => { prev?.close(); return undefined; }); - // Clean up SourceBuffer if (this.#sourceBuffer && this.#mediaSource) { try { if (this.#sourceBuffer.updating) { @@ -596,7 +472,6 @@ export class SourceMSE { } } - // Clean up MediaSource if (this.#mediaSource) { try { if (this.#mediaSource.readyState === "open") { @@ -608,7 +483,6 @@ export class SourceMSE { } } - // Remove video element if (this.#video) { this.#video.pause(); this.#video.src = ""; diff --git a/js/hang/src/watch/video/source.ts b/js/hang/src/watch/video/source.ts index c2d52e39e..b73ddc849 100644 --- a/js/hang/src/watch/video/source.ts +++ b/js/hang/src/watch/video/source.ts @@ -110,6 +110,14 @@ export class Source { const c = effect.get(catalog)?.video; effect.set(this.catalog, c); effect.set(this.flip, c?.flip); + + if (c) { + console.log("[Video Catalog]", { + renditions: Object.keys(c.renditions ?? {}), + renditionCount: Object.keys(c.renditions ?? {}).length, + flip: c.flip, + }); + } }); this.#signals.effect(this.#runSupported.bind(this)); @@ -126,11 +134,6 @@ export class Source { const supported: Record = {}; for (const [name, rendition] of Object.entries(renditions)) { - console.log(`[Video Source] Rendition ${name} from catalog:`, { - codec: rendition.codec, - container: rendition.container, - hasContainer: "container" in rendition, - }); const description = rendition.description ? Hex.toBytes(rendition.description) : undefined; const { supported: valid } = await VideoDecoder.isConfigSupported({ @@ -206,6 +209,13 @@ export class Source { } #runMSEPath(effect: Effect, broadcast: Moq.Broadcast, name: string, config: RequiredDecoderConfig): void { + console.log("[Video Stream] Subscribing to track", { + name, + codec: config.codec, + container: config.container, + width: config.codedWidth, + height: config.codedHeight, + }); // Import MSE source dynamically to avoid loading if not needed effect.spawn(async () => { const { SourceMSE } = await import("./source-mse.js"); @@ -250,12 +260,18 @@ export class Source { } #runWebCodecsPath(effect: Effect, broadcast: Moq.Broadcast, name: string, config: RequiredDecoderConfig): void { + console.log("[Video Stream] Subscribing to track", { + name, + codec: config.codec, + container: config.container, + width: config.codedWidth, + height: config.codedHeight, + }); const sub = broadcast.subscribe(name, PRIORITY.video); // TODO use priority from catalog effect.cleanup(() => sub.close()); // Create consumer that reorders groups/frames up to the provided latency. // Container defaults to "legacy" via Zod schema for backward compatibility - console.log(`[Video Subscriber] Using container format: ${config.container}`); const consumer = new Frame.Consumer(sub, { latency: this.latency, container: config.container, From deec2a9ce3805b2f7de3b08e7836d83a63e942ea Mon Sep 17 00:00:00 2001 From: Juan Pablo Bustamante Date: Wed, 7 Jan 2026 17:38:12 -0300 Subject: [PATCH 04/16] Just comands updated flags --- js/hang/src/watch/audio/source-mse.ts | 1 - justfile | 8 +++----- rs/hang/examples/video.rs | 1 + rs/hang/src/catalog/root.rs | 4 +++- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/js/hang/src/watch/audio/source-mse.ts b/js/hang/src/watch/audio/source-mse.ts index d32faac1a..26383bc33 100644 --- a/js/hang/src/watch/audio/source-mse.ts +++ b/js/hang/src/watch/audio/source-mse.ts @@ -262,7 +262,6 @@ export class SourceMSE { } // Read fragments and append to SourceBuffer - // MSE works better when appending complete groups (GOPs for video, sample groups for audio) // We group fragments by MOQ group before appending effect.spawn(async () => { let frameCount = 0; diff --git a/justfile b/justfile index 53e7971b2..09d61ed9c 100644 --- a/justfile +++ b/justfile @@ -147,7 +147,7 @@ pub name url="http://localhost:4443/anon" *args: - | cargo run --bin hang -- publish --url "{{url}}" --name "{{name}}" fmp4 {{args}} # Generate and ingest an HLS stream from a video file. -pub-hls name passthrough='false' relay="http://localhost:4443/anon": +pub-hls name passthrough='' relay="http://localhost:4443/anon": #!/usr/bin/env bash set -euo pipefail @@ -212,10 +212,8 @@ pub-hls name passthrough='false' relay="http://localhost:4443/anon": sleep 0.5 done - echo ">>> Passthrough parameter value: '{{passthrough}}'" - # Just may pass the parameter as "passthrough=true" when using passthrough="true" - # So we check if it contains "true" (case-insensitive) and is not exactly "false" - if echo "{{passthrough}}" | grep -qi "true" && [ "{{passthrough}}" != "false" ]; then + # Check if passthrough flag is provided (boolean parameter) + if [ -n "{{passthrough}}" ]; then echo ">>> Starting HLS ingest from disk with passthrough mode: $OUT_DIR/master.m3u8" PASSTHROUGH_FLAG="--passthrough" else diff --git a/rs/hang/examples/video.rs b/rs/hang/examples/video.rs index 9ad11e9b2..cdea3d973 100644 --- a/rs/hang/examples/video.rs +++ b/rs/hang/examples/video.rs @@ -72,6 +72,7 @@ fn create_track(broadcast: &mut moq_lite::BroadcastProducer) -> hang::TrackProdu display_ratio_width: None, display_ratio_height: None, optimize_for_latency: None, + container: hang::catalog::Container::Legacy, }; // Create a map of video renditions diff --git a/rs/hang/src/catalog/root.rs b/rs/hang/src/catalog/root.rs index 590dc2fcb..8850aeb56 100644 --- a/rs/hang/src/catalog/root.rs +++ b/rs/hang/src/catalog/root.rs @@ -285,7 +285,7 @@ impl From for CatalogConsumer { mod test { use std::collections::BTreeMap; - use crate::catalog::{AudioCodec::Opus, AudioConfig, VideoConfig, H264}; + use crate::catalog::{AudioCodec::Opus, AudioConfig, VideoConfig, H264, Container}; use super::*; @@ -339,6 +339,7 @@ mod test { bitrate: Some(6_000_000), framerate: Some(30.0), optimize_for_latency: None, + container: Container::Legacy, }, ); @@ -351,6 +352,7 @@ mod test { channel_count: 2, bitrate: Some(128_000), description: None, + container: Container::Legacy, }, ); From 6ed6e219e1fe49fbc19d6069e80b6871d1df503f Mon Sep 17 00:00:00 2001 From: Juan Pablo Bustamante Date: Thu, 8 Jan 2026 09:51:10 -0300 Subject: [PATCH 05/16] catalog on source-mse --- js/hang/src/watch/audio/source-mse.ts | 2 +- js/hang/src/watch/audio/source.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/js/hang/src/watch/audio/source-mse.ts b/js/hang/src/watch/audio/source-mse.ts index 26383bc33..e031f80f8 100644 --- a/js/hang/src/watch/audio/source-mse.ts +++ b/js/hang/src/watch/audio/source-mse.ts @@ -158,10 +158,10 @@ export class SourceMSE { broadcast: Moq.Broadcast, name: string, config: Catalog.AudioConfig, + catalog: Catalog.Audio, ): Promise { await this.initialize(config); - const catalog = { priority: 128 }; // TODO: Get from actual catalog const sub = broadcast.subscribe(name, catalog.priority); effect.cleanup(() => sub.close()); diff --git a/js/hang/src/watch/audio/source.ts b/js/hang/src/watch/audio/source.ts index 8e277c2df..f5d94fc3b 100644 --- a/js/hang/src/watch/audio/source.ts +++ b/js/hang/src/watch/audio/source.ts @@ -226,7 +226,7 @@ export class Source { // Run MSE track - no worklet needed, browser handles everything try { - await mseSource.runTrack(effect, broadcast, name, config); + await mseSource.runTrack(effect, broadcast, name, config, catalog); } catch (error) { console.error("MSE path error, falling back to WebCodecs:", error); // Fallback to WebCodecs From 834d7783bc26bf2cc54f60d72d3fb31eaf150f65 Mon Sep 17 00:00:00 2001 From: Juan Pablo Bustamante Date: Thu, 8 Jan 2026 10:57:24 -0300 Subject: [PATCH 06/16] group logic MSE --- js/hang/src/watch/audio/source-mse.ts | 60 ++++++++++++++++----------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/js/hang/src/watch/audio/source-mse.ts b/js/hang/src/watch/audio/source-mse.ts index e031f80f8..f50b325e5 100644 --- a/js/hang/src/watch/audio/source-mse.ts +++ b/js/hang/src/watch/audio/source-mse.ts @@ -18,13 +18,13 @@ export class SourceMSE { #audio?: HTMLAudioElement; #mediaSource?: MediaSource; #sourceBuffer?: SourceBuffer; - + // Signal to expose audio element for volume/mute control #audioElement = new Signal(undefined); readonly audioElement = this.#audioElement as Getter; #appendQueue: Uint8Array[] = []; - static readonly MAX_QUEUE_SIZE = 10; + static readonly MAX_QUEUE_SIZE = 10; #stats = new Signal(undefined); readonly stats = this.#stats; @@ -48,7 +48,7 @@ export class SourceMSE { this.#audio.muted = false; // Allow audio playback this.#audio.volume = 1.0; // Set initial volume to 1.0 document.body.appendChild(this.#audio); - + this.#audioElement.set(this.#audio); this.#mediaSource = new MediaSource(); @@ -88,7 +88,7 @@ export class SourceMSE { this.#sourceBuffer.addEventListener("updateend", () => { this.#processAppendQueue(); - + }); this.#sourceBuffer.addEventListener("error", (e) => { @@ -101,6 +101,10 @@ export class SourceMSE { throw new Error("SourceBuffer not initialized"); } + // Don't queue fragments if MediaSource is closed + if (this.#mediaSource.readyState === "closed") { + return; + } if (this.#appendQueue.length >= SourceMSE.MAX_QUEUE_SIZE) { const discarded = this.#appendQueue.shift(); @@ -125,7 +129,7 @@ export class SourceMSE { result.set(fragment, offset); offset += fragment.byteLength; } - + return result; } @@ -135,16 +139,15 @@ export class SourceMSE { } if (this.#mediaSource?.readyState !== "open") { - console.error(`[MSE Audio] MediaSource not open: ${this.#mediaSource?.readyState}`); return; } const fragment = this.#appendQueue.shift()!; - + try { // appendBuffer accepts BufferSource (ArrayBuffer or ArrayBufferView) this.#sourceBuffer.appendBuffer(fragment as BufferSource); - + this.#stats.update((current) => ({ bytesReceived: (current?.bytesReceived ?? 0) + fragment.byteLength, })); @@ -177,19 +180,23 @@ export class SourceMSE { await new Promise((resolve) => { let checkCount = 0; const maxChecks = 100; // 10 seconds max wait - + let hasSeeked = false; const checkReady = () => { checkCount++; if (this.#audio && this.#sourceBuffer) { - const bufferedRanges = this.#sourceBuffer.buffered; const audioBuffered = this.#audio.buffered; - const hasBufferedData = bufferedRanges.length > 0; - + const hasBufferedData = this.#sourceBuffer.buffered.length > 0; + if (hasBufferedData && audioBuffered && audioBuffered.length > 0 && !hasSeeked) { const currentTime = this.#audio.currentTime; - const isTimeBuffered = audioBuffered.start(0) <= currentTime && currentTime < audioBuffered.end(audioBuffered.length - 1); - + let isTimeBuffered = false; + for (let i = 0; i < audioBuffered.length; i++) { + if (audioBuffered.start(i) <= currentTime && currentTime < audioBuffered.end(i)) { + isTimeBuffered = true; + break; + } + } if (!isTimeBuffered) { const seekTime = audioBuffered.start(0); this.#audio.currentTime = seekTime; @@ -198,7 +205,7 @@ export class SourceMSE { return; } } - + // Try to play if we have buffered data, even if readyState is low // The browser will start playing when it's ready if (hasBufferedData && this.#audio.readyState >= HTMLMediaElement.HAVE_METADATA) { @@ -268,23 +275,29 @@ export class SourceMSE { let currentGroup: number | undefined = undefined; let groupFragments: Uint8Array[] = []; // Accumulate fragments for current group - for (;;) { + for (; ;) { const frame = await Promise.race([consumer.decode(), effect.cancel]); if (!frame) { - if (groupFragments.length > 0 && initSegmentReceived) { + if (groupFragments.length > 0 && initSegmentReceived && this.#mediaSource?.readyState === "open") { const groupData = this.#concatenateFragments(groupFragments); await this.appendFragment(groupData); groupFragments = []; } break; } + + // Stop processing if MediaSource is closed + if (this.#mediaSource?.readyState === "closed") { + break; + } + frameCount++; const isMoovAtom = hasMoovAtom(frame.data); const isInitSegment = isMoovAtom && !initSegmentReceived; - + if (isInitSegment) { - if (groupFragments.length > 0 && initSegmentReceived) { + if (groupFragments.length > 0 && initSegmentReceived && this.#mediaSource?.readyState === "open") { const groupData = this.#concatenateFragments(groupFragments); await this.appendFragment(groupData); groupFragments = []; @@ -300,7 +313,7 @@ export class SourceMSE { } if (currentGroup !== undefined && frame.group !== currentGroup) { - if (groupFragments.length > 0) { + if (groupFragments.length > 0 && this.#mediaSource?.readyState === "open") { const groupData = this.#concatenateFragments(groupFragments); await this.appendFragment(groupData); groupFragments = []; @@ -314,12 +327,11 @@ export class SourceMSE { groupFragments.push(frame.data); - if (groupFragments.length >= 1) { - + // Append immediately for low latency audio sync + if (groupFragments.length >= 1 && this.#mediaSource?.readyState === "open") { const groupData = this.#concatenateFragments(groupFragments); await this.appendFragment(groupData); groupFragments = []; - } } }); @@ -327,7 +339,7 @@ export class SourceMSE { close(): void { this.#appendQueue = []; - + this.#audioElement.set(undefined); if (this.#sourceBuffer && this.#mediaSource) { From 75d85df98a4416ce432a7dbfeec0c19c31e42177 Mon Sep 17 00:00:00 2001 From: Juan Pablo Bustamante Date: Thu, 8 Jan 2026 14:20:32 -0300 Subject: [PATCH 07/16] rust and typescript fixes --- js/hang/src/frame.ts | 2 +- js/hang/src/util/mime.ts | 9 +- js/hang/src/watch/audio/emitter.ts | 8 +- js/hang/src/watch/audio/source-mse.ts | 54 ++++++------ js/hang/src/watch/audio/source.ts | 4 +- js/hang/src/watch/video/source-mse.ts | 118 +++++++++++++++----------- js/hang/src/watch/video/source.ts | 4 +- rs/hang/src/catalog/container.rs | 10 +-- rs/hang/src/catalog/root.rs | 8 +- rs/hang/src/import/fmp4.rs | 110 ++++++++++++++++++------ rs/hang/src/import/hls.rs | 47 +++++----- rs/hang/src/model/track.rs | 2 +- 12 files changed, 223 insertions(+), 153 deletions(-) diff --git a/js/hang/src/frame.ts b/js/hang/src/frame.ts index b0581e656..84904e5f5 100644 --- a/js/hang/src/frame.ts +++ b/js/hang/src/frame.ts @@ -112,7 +112,7 @@ export class Consumer { async #run() { // Start fetching groups in the background - + for (;;) { const consumer = await this.#track.nextGroup(); if (!consumer) { diff --git a/js/hang/src/util/mime.ts b/js/hang/src/util/mime.ts index 3d745044e..bd7b32907 100644 --- a/js/hang/src/util/mime.ts +++ b/js/hang/src/util/mime.ts @@ -2,7 +2,7 @@ import type * as Catalog from "../catalog"; /** * Builds a MIME type string for MediaSource from a codec string. - * + * * @param codec - The codec string from the catalog (e.g., "avc1.42E01E", "mp4a.40.2") * @param type - "video" or "audio" * @returns MIME type string (e.g., "video/mp4; codecs=\"avc1.42E01E\"") @@ -15,7 +15,7 @@ export function buildMimeType(codec: string, type: "video" | "audio"): string { /** * Checks if a MIME type is supported by MediaSource. - * + * * @param mimeType - The MIME type to check * @returns true if supported, false otherwise */ @@ -25,7 +25,7 @@ export function isMimeTypeSupported(mimeType: string): boolean { /** * Builds and validates a MIME type for video from catalog config. - * + * * @param config - Video configuration from catalog * @returns MIME type string or undefined if not supported */ @@ -39,7 +39,7 @@ export function buildVideoMimeType(config: Catalog.VideoConfig): string | undefi /** * Builds and validates a MIME type for audio from catalog config. - * + * * @param config - Audio configuration from catalog * @returns MIME type string or undefined if not supported */ @@ -50,4 +50,3 @@ export function buildAudioMimeType(config: Catalog.AudioConfig): string | undefi } return undefined; } - diff --git a/js/hang/src/watch/audio/emitter.ts b/js/hang/src/watch/audio/emitter.ts index 6caa825ca..6a1b91a4e 100644 --- a/js/hang/src/watch/audio/emitter.ts +++ b/js/hang/src/watch/audio/emitter.ts @@ -69,7 +69,7 @@ export class Emitter { const paused = effect.get(this.paused); mseAudio.volume = volume; mseAudio.muted = muted; - + // Control play/pause state if (paused && !mseAudio.paused) { mseAudio.pause(); @@ -77,10 +77,12 @@ export class Emitter { // Resume if paused - try to play even if readyState is low const tryPlay = () => { if (!paused && mseAudio.paused) { - mseAudio.play().catch(err => console.error("[Audio Emitter] Failed to resume audio:", err)); + mseAudio + .play() + .catch((err) => console.error("[Audio Emitter] Failed to resume audio:", err)); } }; - + // Try to play if we have metadata (HAVE_METADATA = 1), browser will start when ready if (mseAudio.readyState >= HTMLMediaElement.HAVE_METADATA) { tryPlay(); diff --git a/js/hang/src/watch/audio/source-mse.ts b/js/hang/src/watch/audio/source-mse.ts index f50b325e5..50476c199 100644 --- a/js/hang/src/watch/audio/source-mse.ts +++ b/js/hang/src/watch/audio/source-mse.ts @@ -61,12 +61,16 @@ export class SourceMSE { reject(new Error("MediaSource sourceopen timeout")); }, 5000); - this.#mediaSource!.addEventListener( + this.#mediaSource?.addEventListener( "sourceopen", () => { clearTimeout(timeout); try { - this.#sourceBuffer = this.#mediaSource!.addSourceBuffer(mimeType); + this.#sourceBuffer = this.#mediaSource?.addSourceBuffer(mimeType); + if (!this.#sourceBuffer) { + reject(new Error("Failed to create SourceBuffer")); + return; + } this.#setupSourceBuffer(); resolve(); } catch (error) { @@ -76,7 +80,7 @@ export class SourceMSE { { once: true }, ); - this.#mediaSource!.addEventListener("error", (e) => { + this.#mediaSource?.addEventListener("error", (e) => { clearTimeout(timeout); reject(new Error(`MediaSource error: ${e}`)); }); @@ -88,7 +92,6 @@ export class SourceMSE { this.#sourceBuffer.addEventListener("updateend", () => { this.#processAppendQueue(); - }); this.#sourceBuffer.addEventListener("error", (e) => { @@ -108,12 +111,14 @@ export class SourceMSE { if (this.#appendQueue.length >= SourceMSE.MAX_QUEUE_SIZE) { const discarded = this.#appendQueue.shift(); - console.warn(`[MSE Audio] Queue full (${SourceMSE.MAX_QUEUE_SIZE}), discarding oldest fragment (${discarded?.byteLength ?? 0} bytes)`); + console.warn( + `[MSE Audio] Queue full (${SourceMSE.MAX_QUEUE_SIZE}), discarding oldest fragment (${discarded?.byteLength ?? 0} bytes)`, + ); } const copy = new Uint8Array(fragment); this.#appendQueue.push(copy); - + this.#processAppendQueue(); } @@ -142,7 +147,8 @@ export class SourceMSE { return; } - const fragment = this.#appendQueue.shift()!; + const fragment = this.#appendQueue.shift(); + if (!fragment) return; try { // appendBuffer accepts BufferSource (ArrayBuffer or ArrayBufferView) @@ -209,16 +215,19 @@ export class SourceMSE { // Try to play if we have buffered data, even if readyState is low // The browser will start playing when it's ready if (hasBufferedData && this.#audio.readyState >= HTMLMediaElement.HAVE_METADATA) { - this.#audio.play().then(() => { - resolve(); - }).catch((error) => { - console.error("[MSE Audio] Audio play() failed (initial):", error); - if (checkCount < maxChecks) { - setTimeout(checkReady, 200); - } else { + this.#audio + .play() + .then(() => { resolve(); - } - }); + }) + .catch((error) => { + console.error("[MSE Audio] Audio play() failed (initial):", error); + if (checkCount < maxChecks) { + setTimeout(checkReady, 200); + } else { + resolve(); + } + }); } else if (checkCount >= maxChecks) { resolve(); } else { @@ -246,10 +255,7 @@ export class SourceMSE { while (offset + 8 <= len) { // Atom size (big endian) const size = - (data[offset] << 24) | - (data[offset + 1] << 16) | - (data[offset + 2] << 8) | - data[offset + 3]; + (data[offset] << 24) | (data[offset + 1] << 16) | (data[offset + 2] << 8) | data[offset + 3]; const type = String.fromCharCode( data[offset + 4], @@ -271,11 +277,10 @@ export class SourceMSE { // Read fragments and append to SourceBuffer // We group fragments by MOQ group before appending effect.spawn(async () => { - let frameCount = 0; - let currentGroup: number | undefined = undefined; + let currentGroup: number | undefined; let groupFragments: Uint8Array[] = []; // Accumulate fragments for current group - for (; ;) { + for (;;) { const frame = await Promise.race([consumer.decode(), effect.cancel]); if (!frame) { if (groupFragments.length > 0 && initSegmentReceived && this.#mediaSource?.readyState === "open") { @@ -291,8 +296,6 @@ export class SourceMSE { break; } - frameCount++; - const isMoovAtom = hasMoovAtom(frame.data); const isInitSegment = isMoovAtom && !initSegmentReceived; @@ -372,4 +375,3 @@ export class SourceMSE { this.#signals.close(); } } - diff --git a/js/hang/src/watch/audio/source.ts b/js/hang/src/watch/audio/source.ts index f5d94fc3b..da09401ea 100644 --- a/js/hang/src/watch/audio/source.ts +++ b/js/hang/src/watch/audio/source.ts @@ -40,7 +40,7 @@ export class Source { #worklet = new Signal(undefined); // Downcast to AudioNode so it matches Publish.Audio readonly root = this.#worklet as Getter; - + // For MSE path, expose the HTMLAudioElement for direct control #mseAudioElement = new Signal(undefined); readonly mseAudioElement = this.#mseAudioElement as Getter; @@ -74,7 +74,7 @@ export class Source { this.#signals.effect((effect) => { const audio = effect.get(catalog)?.audio; this.catalog.set(audio); - + if (audio?.renditions) { const first = Object.entries(audio.renditions).at(0); if (first) { diff --git a/js/hang/src/watch/video/source-mse.ts b/js/hang/src/watch/video/source-mse.ts index df7defe91..60ac15bdb 100644 --- a/js/hang/src/watch/video/source-mse.ts +++ b/js/hang/src/watch/video/source-mse.ts @@ -7,7 +7,11 @@ import type * as Time from "../../time"; import * as Mime from "../../util/mime"; // The types in VideoDecoderConfig that cause a hard reload. -type RequiredDecoderConfig = Omit; +// ex. codedWidth/Height are optional and can be changed in-band, so we don't want to trigger a reload. +// This way we can keep the current subscription active. +// Note: We keep codedWidth/Height as optional for logging, but set them to undefined to avoid reloads. +type RequiredDecoderConfig = Omit & + Partial>; type BufferStatus = { state: "empty" | "filled" }; @@ -81,13 +85,13 @@ export class SourceMSE { if (!this.#video) return; const videoBuffered = this.#video.buffered; const current = this.#video.currentTime; - + if (videoBuffered && videoBuffered.length > 0) { const lastRange = videoBuffered.length - 1; const end = videoBuffered.end(lastRange); if (current < end) { this.#video.currentTime = current; - this.#video.play().catch(err => console.error("[MSE] Failed to resume after ended:", err)); + this.#video.play().catch((err) => console.error("[MSE] Failed to resume after ended:", err)); } } }); @@ -101,7 +105,7 @@ export class SourceMSE { const end = videoBuffered.end(lastRange); const remaining = end - current; if (remaining <= 0.1 && this.#video.paused) { - this.#video.play().catch(err => console.error("[MSE] Failed to resume playback:", err)); + this.#video.play().catch((err) => console.error("[MSE] Failed to resume playback:", err)); } } }); @@ -116,12 +120,16 @@ export class SourceMSE { reject(new Error("MediaSource sourceopen timeout")); }, 5000); - this.#mediaSource!.addEventListener( + this.#mediaSource?.addEventListener( "sourceopen", () => { clearTimeout(timeout); try { - this.#sourceBuffer = this.#mediaSource!.addSourceBuffer(mimeType); + this.#sourceBuffer = this.#mediaSource?.addSourceBuffer(mimeType); + if (!this.#sourceBuffer) { + reject(new Error("Failed to create SourceBuffer")); + return; + } this.#setupSourceBuffer(); resolve(); } catch (error) { @@ -131,7 +139,7 @@ export class SourceMSE { { once: true }, ); - this.#mediaSource!.addEventListener("error", (e) => { + this.#mediaSource?.addEventListener("error", (e) => { clearTimeout(timeout); reject(new Error(`MediaSource error: ${e}`)); }); @@ -209,12 +217,14 @@ export class SourceMSE { } if (this.#appendQueue.length >= SourceMSE.MAX_QUEUE_SIZE) { const discarded = this.#appendQueue.shift(); - console.warn(`[MSE] Queue full (${SourceMSE.MAX_QUEUE_SIZE}), discarding oldest fragment (${discarded?.byteLength ?? 0} bytes)`); + console.warn( + `[MSE] Queue full (${SourceMSE.MAX_QUEUE_SIZE}), discarding oldest fragment (${discarded?.byteLength ?? 0} bytes)`, + ); } const copy = new Uint8Array(fragment); this.#appendQueue.push(copy); - + this.#processAppendQueue(); } @@ -222,7 +232,7 @@ export class SourceMSE { if (fragments.length === 1) { return fragments[0]; } - + const totalSize = fragments.reduce((sum, frag) => sum + frag.byteLength, 0); const result = new Uint8Array(totalSize); let offset = 0; @@ -230,7 +240,7 @@ export class SourceMSE { result.set(fragment, offset); offset += fragment.byteLength; } - + return result; } @@ -244,12 +254,13 @@ export class SourceMSE { return; } - const fragment = this.#appendQueue.shift()!; - + const fragment = this.#appendQueue.shift(); + if (!fragment) return; + try { // appendBuffer accepts BufferSource (ArrayBuffer or ArrayBufferView) this.#sourceBuffer.appendBuffer(fragment as BufferSource); - + this.#stats.update((current) => ({ frameCount: current?.frameCount ?? 0, timestamp: current?.timestamp ?? 0, @@ -292,15 +303,18 @@ export class SourceMSE { let checkCount = 0; const maxChecks = 100; // 10 seconds max wait let hasSeeked = false; - + const checkReady = () => { checkCount++; if (this.#video) { const videoBuffered = this.#video.buffered; const hasBufferedData = videoBuffered && videoBuffered.length > 0; const currentTime = this.#video.currentTime; - const isTimeBuffered = hasBufferedData && videoBuffered.start(0) <= currentTime && currentTime < videoBuffered.end(videoBuffered.length - 1); - + const isTimeBuffered = + hasBufferedData && + videoBuffered.start(0) <= currentTime && + currentTime < videoBuffered.end(videoBuffered.length - 1); + if (hasBufferedData && !isTimeBuffered && !hasSeeked) { const seekTime = videoBuffered.start(0); this.#video.currentTime = seekTime; @@ -308,32 +322,41 @@ export class SourceMSE { setTimeout(checkReady, 100); return; } - + if (this.#video.readyState >= HTMLMediaElement.HAVE_FUTURE_DATA) { - this.#video.play().then(() => { - resolve(); - }).catch((error) => { - console.error("[MSE] Video play() failed:", error); - resolve(); - }); + this.#video + .play() + .then(() => { + resolve(); + }) + .catch((error) => { + console.error("[MSE] Video play() failed:", error); + resolve(); + }); } else if (hasBufferedData && checkCount >= 10) { // If we have buffered data but readyState hasn't advanced, try playing anyway after 1 second - this.#video.play().then(() => { - resolve(); - }).catch((error) => { - console.error("[MSE] Video play() failed:", error); - if (checkCount < maxChecks) { - setTimeout(checkReady, 100); - } else { + this.#video + .play() + .then(() => { resolve(); - } - }); + }) + .catch((error) => { + console.error("[MSE] Video play() failed:", error); + if (checkCount < maxChecks) { + setTimeout(checkReady, 100); + } else { + resolve(); + } + }); } else if (checkCount >= maxChecks) { - this.#video.play().then(() => { - resolve(); - }).catch(() => { - resolve(); - }); + this.#video + .play() + .then(() => { + resolve(); + }) + .catch(() => { + resolve(); + }); } else { setTimeout(checkReady, 100); } @@ -350,17 +373,14 @@ export class SourceMSE { // The init segment may start with "ftyp" followed by "moov", or just "moov" function isInitSegmentData(data: Uint8Array): boolean { if (data.length < 8) return false; - + let offset = 0; const len = data.length; while (offset + 8 <= len) { // Atom size (big endian) const size = - (data[offset] << 24) | - (data[offset + 1] << 16) | - (data[offset + 2] << 8) | - data[offset + 3]; + (data[offset] << 24) | (data[offset + 1] << 16) | (data[offset + 2] << 8) | data[offset + 3]; const type = String.fromCharCode( data[offset + 4], @@ -378,16 +398,14 @@ export class SourceMSE { return false; } - + // Read fragments and append to SourceBuffer // MSE requires complete GOPs to be appended in a single operation // We group fragments by MOQ group (which corresponds to GOPs) before appending effect.spawn(async () => { - let frameCount = 0; - let currentGroup: number | undefined = undefined; + let currentGroup: number | undefined; let gopFragments: Uint8Array[] = []; // Accumulate fragments for current GOP - for (;;) { const frame = await Promise.race([consumer.decode(), effect.cancel]); if (!frame) { @@ -398,11 +416,10 @@ export class SourceMSE { } break; } - frameCount++; const containsInitSegmentData = isInitSegmentData(frame.data); const isInitSegment = containsInitSegmentData && !initSegmentReceived; - + if (isInitSegment) { if (gopFragments.length > 0 && initSegmentReceived) { const gopData = this.#concatenateFragments(gopFragments); @@ -438,7 +455,7 @@ export class SourceMSE { const gopData = this.#concatenateFragments(gopFragments); await this.appendFragment(gopData); gopFragments = []; - } + } } }); } @@ -496,4 +513,3 @@ export class SourceMSE { return this.#stats; } } - diff --git a/js/hang/src/watch/video/source.ts b/js/hang/src/watch/video/source.ts index b73ddc849..ba8b48381 100644 --- a/js/hang/src/watch/video/source.ts +++ b/js/hang/src/watch/video/source.ts @@ -27,7 +27,9 @@ export type Target = { // The types in VideoDecoderConfig that cause a hard reload. // ex. codedWidth/Height are optional and can be changed in-band, so we don't want to trigger a reload. // This way we can keep the current subscription active. -type RequiredDecoderConfig = Omit; +// Note: We keep codedWidth/Height as optional for logging, but set them to undefined to avoid reloads. +type RequiredDecoderConfig = Omit & + Partial>; type BufferStatus = { state: "empty" | "filled" }; diff --git a/rs/hang/src/catalog/container.rs b/rs/hang/src/catalog/container.rs index edafa44e8..f9a1e6805 100644 --- a/rs/hang/src/catalog/container.rs +++ b/rs/hang/src/catalog/container.rs @@ -5,20 +5,14 @@ use serde::{Deserialize, Serialize}; /// - "legacy": Uses QUIC VarInt encoding (1-8 bytes, variable length), raw frame payloads /// - "raw": Uses fixed u64 encoding (8 bytes, big-endian), raw frame payloads /// - "fmp4": Fragmented MP4 container - frames contain complete moof+mdat fragments -#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Default)] #[serde(rename_all = "camelCase")] pub enum Container { #[serde(rename = "legacy")] + #[default] Legacy, #[serde(rename = "raw")] Raw, #[serde(rename = "fmp4")] Fmp4, } - -impl Default for Container { - fn default() -> Self { - Container::Legacy - } -} - diff --git a/rs/hang/src/catalog/root.rs b/rs/hang/src/catalog/root.rs index f2a36ac81..c902d9594 100644 --- a/rs/hang/src/catalog/root.rs +++ b/rs/hang/src/catalog/root.rs @@ -205,7 +205,7 @@ impl Drop for CatalogGuard<'_> { // TODO decide if this should return an error, or be impossible to fail let frame = self.catalog.to_string().expect("invalid catalog"); - + // Log the catalog JSON to verify container field is included if let Some(video) = &self.catalog.video { for (name, config) in &video.renditions { @@ -217,10 +217,10 @@ impl Drop for CatalogGuard<'_> { tracing::info!(track = name, container = ?config.container, "publishing catalog with container"); } } - + // Log the full catalog JSON to debug serialization tracing::debug!(catalog_json = %frame, "publishing catalog JSON"); - + group.write_frame(frame); group.close(); } @@ -285,7 +285,7 @@ impl From for CatalogConsumer { mod test { use std::collections::BTreeMap; - use crate::catalog::{AudioCodec::Opus, AudioConfig, VideoConfig, H264, Container}; + use crate::catalog::{AudioCodec::Opus, AudioConfig, Container, VideoConfig, H264}; use super::*; diff --git a/rs/hang/src/import/fmp4.rs b/rs/hang/src/import/fmp4.rs index 66f38885a..514a08484 100644 --- a/rs/hang/src/import/fmp4.rs +++ b/rs/hang/src/import/fmp4.rs @@ -1,4 +1,6 @@ -use crate::catalog::{AudioCodec, AudioConfig, CatalogProducer, Container, VideoCodec, VideoConfig, AAC, AV1, H264, H265, VP9}; +use crate::catalog::{ + AudioCodec, AudioConfig, CatalogProducer, Container, VideoCodec, VideoConfig, AAC, AV1, H264, H265, VP9, +}; use crate::{self as hang, Timestamp}; use anyhow::Context; use bytes::{Buf, Bytes, BytesMut}; @@ -54,10 +56,10 @@ pub struct Fmp4 { /// When passthrough_mode is enabled, store raw bytes of ftyp (file type box) ftyp_bytes: Option, - + /// When passthrough_mode is enabled, store raw bytes of moov (init segment) moov_bytes: Option, - + /// When passthrough_mode is enabled, store a copy of init segment (ftyp+moov) to send with each keyframe /// This ensures new subscribers can receive the init segment even if group 0 is not available init_segment_bytes_for_keyframes: Option, @@ -121,7 +123,12 @@ impl Fmp4 { self.ftyp_bytes = Some(bytes.slice(bytes_offset..bytes_offset + size)); tracing::debug!(ftyp_size = size, bytes_offset, "captured ftyp bytes for init segment"); } else { - tracing::warn!(bytes_offset, size, available_len = bytes.len(), "ftyp bytes out of range"); + tracing::warn!( + bytes_offset, + size, + available_len = bytes.len(), + "ftyp bytes out of range" + ); } } else { tracing::warn!("passthrough mode but available_bytes is None when processing ftyp"); @@ -137,7 +144,12 @@ impl Fmp4 { self.moov_bytes = Some(bytes.slice(bytes_offset..bytes_offset + size)); tracing::debug!(moov_size = size, bytes_offset, "captured moov bytes for init segment"); } else { - tracing::warn!(bytes_offset, size, available_len = bytes.len(), "moov bytes out of range"); + tracing::warn!( + bytes_offset, + size, + available_len = bytes.len(), + "moov bytes out of range" + ); } } else { tracing::warn!("passthrough mode but available_bytes is None when processing moov"); @@ -185,7 +197,12 @@ impl Fmp4 { fragment_bytes.extend_from_slice(&mdat_bytes); let fragment = fragment_bytes.freeze(); - tracing::info!(moof_size = moof_bytes.len(), mdat_size = mdat_bytes.len(), total_fragment_size = fragment.len(), "processing CMAF fragment (moof+mdat)"); + tracing::info!( + moof_size = moof_bytes.len(), + mdat_size = mdat_bytes.len(), + total_fragment_size = fragment.len(), + "processing CMAF fragment (moof+mdat)" + ); self.transport_fragment(fragment, moof)?; tracing::info!("finished processing CMAF fragment, ready for next fragment"); } else { @@ -277,7 +294,7 @@ impl Fmp4 { if passthrough_mode { if let Some(moov_bytes) = self.moov_bytes.take() { let timestamp = hang::Timestamp::from_micros(0)?; - + // Build init segment: ftyp (if available) + moov let mut init_segment = BytesMut::new(); if let Some(ref ftyp_bytes) = self.ftyp_bytes { @@ -286,19 +303,24 @@ impl Fmp4 { } init_segment.extend_from_slice(&moov_bytes); let init_segment_bytes = init_segment.freeze(); - - tracing::info!(tracks = self.tracks.len(), init_segment_size = init_segment_bytes.len(), ftyp_included = self.ftyp_bytes.is_some(), "sending init segment to all tracks"); - + + tracing::info!( + tracks = self.tracks.len(), + init_segment_size = init_segment_bytes.len(), + ftyp_included = self.ftyp_bytes.is_some(), + "sending init segment to all tracks" + ); + // Verify moov atom signature let moov_offset = self.ftyp_bytes.as_ref().map(|f| f.len()).unwrap_or(0); if moov_offset + 8 <= init_segment_bytes.len() { let atom_type = String::from_utf8_lossy(&init_segment_bytes[moov_offset + 4..moov_offset + 8]); tracing::info!(atom_type = %atom_type, "verifying moov atom signature in init segment"); } - + // Store a copy for sending with keyframes self.init_segment_bytes_for_keyframes = Some(init_segment_bytes.clone()); - + // Send init segment to all tracks - this creates the first group (sequence 0) for (_track_id, track) in &mut self.tracks { let frame = hang::Frame { @@ -351,7 +373,11 @@ impl Fmp4 { display_ratio_width: None, display_ratio_height: None, optimize_for_latency: None, - container: if passthrough_mode { Container::Fmp4 } else { Container::Legacy }, + container: if passthrough_mode { + Container::Fmp4 + } else { + Container::Legacy + }, } } mp4_atom::Codec::Hev1(hev1) => Self::init_h265_static(true, &hev1.hvcc, &hev1.visual, passthrough_mode)?, @@ -367,7 +393,11 @@ impl Fmp4 { display_ratio_width: None, display_ratio_height: None, optimize_for_latency: None, - container: if passthrough_mode { Container::Fmp4 } else { Container::Legacy }, + container: if passthrough_mode { + Container::Fmp4 + } else { + Container::Legacy + }, }, mp4_atom::Codec::Vp09(vp09) => { // https://github.com/gpac/mp4box.js/blob/325741b592d910297bf609bc7c400fc76101077b/src/box-codecs.js#L238 @@ -394,7 +424,11 @@ impl Fmp4 { optimize_for_latency: None, bitrate: None, framerate: None, - container: if passthrough_mode { Container::Fmp4 } else { Container::Legacy }, + container: if passthrough_mode { + Container::Fmp4 + } else { + Container::Legacy + }, } } mp4_atom::Codec::Av01(av01) => { @@ -427,7 +461,11 @@ impl Fmp4 { optimize_for_latency: None, bitrate: None, framerate: None, - container: if passthrough_mode { Container::Fmp4 } else { Container::Legacy }, + container: if passthrough_mode { + Container::Fmp4 + } else { + Container::Legacy + }, } } mp4_atom::Codec::Unknown(unknown) => anyhow::bail!("unknown codec: {:?}", unknown), @@ -438,7 +476,12 @@ impl Fmp4 { } // There's two almost identical hvcc atoms in the wild. - fn init_h265_static(in_band: bool, hvcc: &mp4_atom::Hvcc, visual: &mp4_atom::Visual, passthrough_mode: bool) -> anyhow::Result { + fn init_h265_static( + in_band: bool, + hvcc: &mp4_atom::Hvcc, + visual: &mp4_atom::Visual, + passthrough_mode: bool, + ) -> anyhow::Result { let mut description = BytesMut::new(); hvcc.encode_body(&mut description)?; @@ -462,7 +505,11 @@ impl Fmp4 { display_ratio_width: None, display_ratio_height: None, optimize_for_latency: None, - container: if passthrough_mode { Container::Fmp4 } else { Container::Legacy }, + container: if passthrough_mode { + Container::Fmp4 + } else { + Container::Legacy + }, }) } @@ -495,7 +542,11 @@ impl Fmp4 { channel_count: mp4a.audio.channel_count as _, bitrate: Some(bitrate.into()), description: None, // TODO? - container: if passthrough_mode { Container::Fmp4 } else { Container::Legacy }, + container: if passthrough_mode { + Container::Fmp4 + } else { + Container::Legacy + }, } } mp4_atom::Codec::Opus(opus) => { @@ -505,7 +556,11 @@ impl Fmp4 { channel_count: opus.audio.channel_count as _, bitrate: None, description: None, // TODO? - container: if passthrough_mode { Container::Fmp4 } else { Container::Legacy }, + container: if passthrough_mode { + Container::Fmp4 + } else { + Container::Legacy + }, } } mp4_atom::Codec::Unknown(unknown) => anyhow::bail!("unknown codec: {:?}", unknown), @@ -657,13 +712,13 @@ impl Fmp4 { if self.moov_bytes.is_some() { tracing::warn!("transporting fragment but moov_bytes is still set - init segment may not have been sent"); } - + // Verify fragment starts with moof atom if fragment.len() >= 8 { let atom_type = String::from_utf8_lossy(&fragment[4..8]); tracing::info!(atom_type = %atom_type, fragment_size = fragment.len(), passthrough_mode = self.passthrough_mode, "transporting fragment"); } - + // Ensure moov is available (init segment must be processed first) let moov = self.moov.as_ref().ok_or_else(|| { anyhow::anyhow!("missing moov box - init segment must be processed before fragments. Make sure ensure_init_segment() is called first.") @@ -696,9 +751,7 @@ impl Fmp4 { if let Some(trun) = traf.trun.first() { if let Some(entry) = trun.entries.first() { let tfhd = &traf.tfhd; - let flags = entry - .flags - .unwrap_or(tfhd.default_sample_flags.unwrap_or_default()); + let flags = entry.flags.unwrap_or(tfhd.default_sample_flags.unwrap_or_default()); // https://chromium.googlesource.com/chromium/src/media/+/master/formats/mp4/track_run_iterator.cc#177 let keyframe_flag = (flags >> 24) & 0x3 == 0x2; // kSampleDependsOnNoOther let non_sync = (flags >> 16) & 0x1 == 0x1; // kSampleIsNonSyncSample @@ -742,12 +795,15 @@ impl Fmp4 { track.write(init_frame)?; tracing::info!(track_id, timestamp = ?timestamp, init_segment_size = init_segment_bytes.len(), "sent init segment as first frame of new group (keyframe) for live stream"); } else { - tracing::warn!(track_id, "is_keyframe=true but init_segment_bytes_for_keyframes is None"); + tracing::warn!( + track_id, + "is_keyframe=true but init_segment_bytes_for_keyframes is None" + ); } } else { tracing::debug!(track_id, timestamp = ?timestamp, fragment_size = fragment.len(), "non-keyframe fragment in passthrough mode"); } - + // Send fragment as non-keyframe (in same group as init segment if keyframe, or current group if not) let frame = hang::Frame { timestamp, diff --git a/rs/hang/src/import/hls.rs b/rs/hang/src/import/hls.rs index 6b03e3665..53cdf453a 100644 --- a/rs/hang/src/import/hls.rs +++ b/rs/hang/src/import/hls.rs @@ -198,7 +198,9 @@ impl Hls { // Prime the shared audio track, if any. if let Some(mut track) = self.audio.take() { let playlist = self.fetch_media_playlist(track.playlist.clone()).await?; - let count = self.consume_segments_limited(TrackKind::Audio, &mut track, &playlist, MAX_INIT_SEGMENTS).await?; + let count = self + .consume_segments_limited(TrackKind::Audio, &mut track, &playlist, MAX_INIT_SEGMENTS) + .await?; buffered += count; self.audio = Some(track); } @@ -328,9 +330,9 @@ impl Hls { let next_seq = track.next_sequence.unwrap_or(0); let playlist_seq = playlist.media_sequence; let total_segments = playlist.segments.len(); - + let last_playlist_seq = playlist_seq + total_segments as u64; - + let skip = if next_seq > last_playlist_seq { total_segments } else if next_seq < playlist_seq { @@ -339,24 +341,25 @@ impl Hls { } else { (next_seq - playlist_seq) as usize }; - - let available = if skip < total_segments { - total_segments - skip - } else { - 0 - }; - + + let available = total_segments.saturating_sub(skip); + // Limit how many segments we process let to_process = available.min(max_segments); - + if to_process > 0 { let base_seq = playlist_seq + skip as u64; - for (i, segment) in playlist.segments[skip..skip+to_process].iter().enumerate() { + for (i, segment) in playlist.segments[skip..skip + to_process].iter().enumerate() { self.push_segment(kind, track, segment, base_seq + i as u64).await?; } - info!(?kind, processed = to_process, available = available, "processed limited segments during init"); + info!( + ?kind, + processed = to_process, + available = available, + "processed limited segments during init" + ); } - + Ok(to_process) } @@ -372,10 +375,10 @@ impl Hls { let next_seq = track.next_sequence.unwrap_or(0); let playlist_seq = playlist.media_sequence; let total_segments = playlist.segments.len(); - + // Calculate the last sequence number in the playlist let last_playlist_seq = playlist_seq + total_segments as u64; - + // If we've already processed beyond what's in the playlist, wait for new segments let skip = if next_seq > last_playlist_seq { // We're ahead of the playlist - wait for ffmpeg to generate more segments @@ -401,13 +404,9 @@ impl Hls { // Normal case: next_seq is within playlist range (next_seq - playlist_seq) as usize }; - - let fresh_segments = if skip < total_segments { - total_segments - skip - } else { - 0 - }; - + + let fresh_segments = total_segments.saturating_sub(skip); + info!( ?kind, playlist_sequence = playlist_seq, @@ -417,7 +416,7 @@ impl Hls { fresh_segments = fresh_segments, "consuming HLS segments" ); - + if fresh_segments > 0 { let base_seq = playlist_seq + skip as u64; for (i, segment) in playlist.segments[skip..].iter().enumerate() { diff --git a/rs/hang/src/model/track.rs b/rs/hang/src/model/track.rs index 9c797b4fd..8a92d7c86 100644 --- a/rs/hang/src/model/track.rs +++ b/rs/hang/src/model/track.rs @@ -92,7 +92,7 @@ impl TrackProducer { tracing::info!(timestamp = ?frame.timestamp, "creating new group for keyframe"); } new_group - }, + } // The first frame must be a keyframe. None => return Err(Error::MissingKeyframe), }; From f14e058e82d0c360ff33f3e866d741760f95abd9 Mon Sep 17 00:00:00 2001 From: jpbusta10 <103268894+jpbusta10@users.noreply.github.com> Date: Fri, 9 Jan 2026 08:54:04 -0300 Subject: [PATCH 08/16] Update rs/hang-cli/src/publish.rs Co-authored-by: Emil Santurio --- rs/hang-cli/src/publish.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/hang-cli/src/publish.rs b/rs/hang-cli/src/publish.rs index 44fd012ee..734a673fe 100644 --- a/rs/hang-cli/src/publish.rs +++ b/rs/hang-cli/src/publish.rs @@ -49,7 +49,7 @@ impl Publish { PublishDecoder::Decoder(Box::new(stream)) } PublishFormat::Hls { playlist, passthrough } => { - tracing::info!(passthrough = *passthrough, "HLS publish with passthrough flag"); + tracing::info!(passthrough = *passthrough, "HLS publish preserving original container format."); let hls = hang::import::Hls::new( broadcast.clone(), hang::import::HlsConfig { From 807db9e7d6003078e8e42671a3bd3fa6d77a43b5 Mon Sep 17 00:00:00 2001 From: Juan Pablo Bustamante Date: Fri, 9 Jan 2026 09:07:36 -0300 Subject: [PATCH 09/16] cargo formatting issue --- rs/hang-cli/src/publish.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rs/hang-cli/src/publish.rs b/rs/hang-cli/src/publish.rs index 734a673fe..44f322424 100644 --- a/rs/hang-cli/src/publish.rs +++ b/rs/hang-cli/src/publish.rs @@ -49,7 +49,10 @@ impl Publish { PublishDecoder::Decoder(Box::new(stream)) } PublishFormat::Hls { playlist, passthrough } => { - tracing::info!(passthrough = *passthrough, "HLS publish preserving original container format."); + tracing::info!( + passthrough = *passthrough, + "HLS publish preserving original container format." + ); let hls = hang::import::Hls::new( broadcast.clone(), hang::import::HlsConfig { From 326cafeb2265c8b2c7e9d7eadfc936a24a6fac41 Mon Sep 17 00:00:00 2001 From: Juan Pablo Bustamante Date: Fri, 9 Jan 2026 13:17:55 -0300 Subject: [PATCH 10/16] audio sync issues --- js/hang/src/watch/broadcast.ts | 8 +++++++ js/hang/src/watch/video/source-mse.ts | 33 +++++++++++++++++++++++++++ js/hang/src/watch/video/source.ts | 6 +++++ 3 files changed, 47 insertions(+) diff --git a/js/hang/src/watch/broadcast.ts b/js/hang/src/watch/broadcast.ts index c8fe30a1a..7caf4ccb7 100644 --- a/js/hang/src/watch/broadcast.ts +++ b/js/hang/src/watch/broadcast.ts @@ -64,6 +64,14 @@ export class Broadcast { this.reload = Signal.from(props?.reload ?? true); this.audio = new Audio.Source(this.#broadcast, this.#catalog, props?.audio); this.video = new Video.Source(this.#broadcast, this.#catalog, props?.video); + + // Connect audio element to video source for synchronization + this.signals.effect((eff) => { + const audioElement = eff.get(this.audio.mseAudioElement); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (this.video as any).setAudioSync?.(audioElement); + }); + this.location = new Location.Root(this.#broadcast, this.#catalog, props?.location); this.chat = new Chat(this.#broadcast, this.#catalog, props?.chat); this.preview = new Preview(this.#broadcast, this.#catalog, props?.preview); diff --git a/js/hang/src/watch/video/source-mse.ts b/js/hang/src/watch/video/source-mse.ts index 60ac15bdb..649c3f953 100644 --- a/js/hang/src/watch/video/source-mse.ts +++ b/js/hang/src/watch/video/source-mse.ts @@ -59,11 +59,18 @@ export class SourceMSE { #signals = new Effect(); #frameCallbackId?: number; + #audioElement?: HTMLAudioElement; + #lastSyncTime = 0; constructor(latency: Signal) { this.latency = latency; } + setAudioSync(audioElement: HTMLAudioElement | undefined): void { + this.#audioElement = audioElement; + this.#lastSyncTime = 0; // Reset sync timer when audio element changes + } + async initialize(config: RequiredDecoderConfig): Promise { const mimeType = Mime.buildVideoMimeType(config); if (!mimeType) { @@ -108,6 +115,32 @@ export class SourceMSE { this.#video.play().catch((err) => console.error("[MSE] Failed to resume playback:", err)); } } + + // Sync audio to video (very conservative to minimize choppiness) + if (this.#audioElement && this.#audioElement.readyState >= HTMLMediaElement.HAVE_METADATA) { + const now = performance.now(); + // Only check sync every 5 seconds to minimize seeks + if (now - this.#lastSyncTime < 5000) { + return; + } + + const audioTime = this.#audioElement.currentTime; + const diff = Math.abs(current - audioTime); + // Only sync if difference is very large (>500ms) to avoid choppiness + // This allows some drift but prevents major desync + if (diff > 0.5) { + const audioBuffered = this.#audioElement.buffered; + if (audioBuffered && audioBuffered.length > 0) { + for (let i = 0; i < audioBuffered.length; i++) { + if (audioBuffered.start(i) <= current && current <= audioBuffered.end(i)) { + this.#audioElement.currentTime = current; + this.#lastSyncTime = now; + break; + } + } + } + } + } }); this.#mediaSource = new MediaSource(); diff --git a/js/hang/src/watch/video/source.ts b/js/hang/src/watch/video/source.ts index ba8b48381..c5dfd3bb6 100644 --- a/js/hang/src/watch/video/source.ts +++ b/js/hang/src/watch/video/source.ts @@ -250,6 +250,12 @@ export class Source { eff.set(this.#stats, stats); }); + // Expose method to set audio element for synchronization + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (this as any).setAudioSync = (audioElement: HTMLAudioElement | undefined) => { + mseSource.setAudioSync(audioElement); + }; + // Run MSE track try { await mseSource.runTrack(effect, broadcast, name, config); From a73fd9cbe9a8004068a29103d887907e2884afef Mon Sep 17 00:00:00 2001 From: Juan Pablo Bustamante Date: Fri, 9 Jan 2026 15:15:16 -0300 Subject: [PATCH 11/16] coments cleanup --- js/hang/src/watch/video/source-mse.ts | 6 ------ 1 file changed, 6 deletions(-) diff --git a/js/hang/src/watch/video/source-mse.ts b/js/hang/src/watch/video/source-mse.ts index 649c3f953..1ad282117 100644 --- a/js/hang/src/watch/video/source-mse.ts +++ b/js/hang/src/watch/video/source-mse.ts @@ -7,9 +7,6 @@ import type * as Time from "../../time"; import * as Mime from "../../util/mime"; // The types in VideoDecoderConfig that cause a hard reload. -// ex. codedWidth/Height are optional and can be changed in-band, so we don't want to trigger a reload. -// This way we can keep the current subscription active. -// Note: We keep codedWidth/Height as optional for logging, but set them to undefined to avoid reloads. type RequiredDecoderConfig = Omit & Partial>; @@ -83,9 +80,7 @@ export class SourceMSE { this.#video.muted = true; // Required for autoplay document.body.appendChild(this.#video); - // Note: In live streaming, "waiting" events are common and normal as the video waits for new data this.#video.addEventListener("waiting", () => { - // Silently handle - this is expected in live streaming }); this.#video.addEventListener("ended", () => { @@ -126,7 +121,6 @@ export class SourceMSE { const audioTime = this.#audioElement.currentTime; const diff = Math.abs(current - audioTime); - // Only sync if difference is very large (>500ms) to avoid choppiness // This allows some drift but prevents major desync if (diff > 0.5) { const audioBuffered = this.#audioElement.buffered; From ec4a1a0c7463ba55a08e85091b3fa0ccc4b34974 Mon Sep 17 00:00:00 2001 From: Juan Pablo Bustamante Date: Fri, 9 Jan 2026 15:25:18 -0300 Subject: [PATCH 12/16] variable names: legacy->native fmp4->cmaf --- js/hang/src/catalog/audio.ts | 2 +- js/hang/src/catalog/container.ts | 10 +++++----- js/hang/src/catalog/video.ts | 2 +- js/hang/src/container/codec.ts | 16 +++++++-------- js/hang/src/watch/audio/source-mse.ts | 2 +- js/hang/src/watch/audio/source.ts | 10 +++++----- js/hang/src/watch/video/source-mse.ts | 5 ++--- js/hang/src/watch/video/source.ts | 6 +++--- rs/hang/examples/video.rs | 2 +- rs/hang/src/catalog/audio/mod.rs | 2 +- rs/hang/src/catalog/container.rs | 12 ++++++------ rs/hang/src/catalog/root.rs | 8 ++++---- rs/hang/src/catalog/video/mod.rs | 2 +- rs/hang/src/import/aac.rs | 2 +- rs/hang/src/import/avc3.rs | 2 +- rs/hang/src/import/fmp4.rs | 28 +++++++++++++-------------- rs/hang/src/import/hev1.rs | 2 +- rs/hang/src/import/opus.rs | 2 +- 18 files changed, 57 insertions(+), 58 deletions(-) diff --git a/js/hang/src/catalog/audio.ts b/js/hang/src/catalog/audio.ts index e57710bf2..5dfa9cff6 100644 --- a/js/hang/src/catalog/audio.ts +++ b/js/hang/src/catalog/audio.ts @@ -15,7 +15,7 @@ export const AudioConfigSchema = z.object({ codec: z.string(), // Container format for timestamp encoding - // Defaults to "legacy" when not specified in catalog (backward compatibility) + // Defaults to "native" when not specified in catalog (backward compatibility) container: ContainerSchema.default(DEFAULT_CONTAINER), // The description is used for some codecs. diff --git a/js/hang/src/catalog/container.ts b/js/hang/src/catalog/container.ts index 05b0e81db..6ec563e9f 100644 --- a/js/hang/src/catalog/container.ts +++ b/js/hang/src/catalog/container.ts @@ -3,16 +3,16 @@ import { z } from "zod"; /** * Container format for frame timestamp encoding. * - * - "legacy": Uses QUIC VarInt encoding (1-8 bytes, variable length) + * - "native": Uses QUIC VarInt encoding (1-8 bytes, variable length) * - "raw": Uses fixed u64 encoding (8 bytes, big-endian) - * - "fmp4": Fragmented MP4 container (future) + * - "cmaf": Fragmented MP4 container (future) */ -export const ContainerSchema = z.enum(["legacy", "raw", "fmp4"]); +export const ContainerSchema = z.enum(["native", "raw", "cmaf"]); export type Container = z.infer; /** * Default container format when not specified. - * Set to legacy for backward compatibility. + * Set to native for backward compatibility. */ -export const DEFAULT_CONTAINER: Container = "legacy"; +export const DEFAULT_CONTAINER: Container = "native"; diff --git a/js/hang/src/catalog/video.ts b/js/hang/src/catalog/video.ts index b8b77883f..3dc5ae4e9 100644 --- a/js/hang/src/catalog/video.ts +++ b/js/hang/src/catalog/video.ts @@ -14,7 +14,7 @@ export const VideoConfigSchema = z.object({ codec: z.string(), // Container format for timestamp encoding - // Defaults to "legacy" when not specified in catalog (backward compatibility) + // Defaults to "native" when not specified in catalog (backward compatibility) container: ContainerSchema.default(DEFAULT_CONTAINER), // The description is used for some codecs. diff --git a/js/hang/src/container/codec.ts b/js/hang/src/container/codec.ts index 179e1fc38..5a049e571 100644 --- a/js/hang/src/container/codec.ts +++ b/js/hang/src/container/codec.ts @@ -11,11 +11,11 @@ import type * as Time from "../time"; */ export function encodeTimestamp(timestamp: Time.Micro, container: Catalog.Container = DEFAULT_CONTAINER): Uint8Array { switch (container) { - case "legacy": + case "native": return encodeVarInt(timestamp); case "raw": return encodeU64(timestamp); - case "fmp4": { + case "cmaf": { // For CMAF fragments, use raw encoding (8 bytes) for timestamp header return encodeU64(timestamp); } @@ -34,7 +34,7 @@ export function decodeTimestamp( container: Catalog.Container = DEFAULT_CONTAINER, ): [Time.Micro, Uint8Array] { switch (container) { - case "legacy": { + case "native": { const [value, remaining] = decodeVarInt(buffer); return [value as Time.Micro, remaining]; } @@ -42,9 +42,9 @@ export function decodeTimestamp( const [value, remaining] = decodeU64(buffer); return [value as Time.Micro, remaining]; } - case "fmp4": { + case "cmaf": { // For CMAF fragments, timestamp is in the moof atom, but we still need to decode - // the header to get to the fragment. The server uses VarInt encoding (same as legacy) + // the header to get to the fragment. The server uses VarInt encoding (same as native) // for the timestamp header, so we use VarInt decoding here. // The actual media timestamp will be extracted by MSE from the moof. const [value, remaining] = decodeVarInt(buffer); @@ -62,12 +62,12 @@ export function decodeTimestamp( */ export function getTimestampSize(container: Catalog.Container = DEFAULT_CONTAINER): number { switch (container) { - case "legacy": + case "native": return 8; // VarInt maximum size case "raw": return 8; // u64 fixed size - case "fmp4": - return 8; // VarInt maximum size (same as legacy) + case "cmaf": + return 8; // VarInt maximum size (same as native) } } diff --git a/js/hang/src/watch/audio/source-mse.ts b/js/hang/src/watch/audio/source-mse.ts index 50476c199..f34dc3853 100644 --- a/js/hang/src/watch/audio/source-mse.ts +++ b/js/hang/src/watch/audio/source-mse.ts @@ -176,7 +176,7 @@ export class SourceMSE { const consumer = new Frame.Consumer(sub, { latency: this.latency, - container: "fmp4", // CMAF fragments + container: "cmaf", // CMAF fragments }); effect.cleanup(() => consumer.close()); diff --git a/js/hang/src/watch/audio/source.ts b/js/hang/src/watch/audio/source.ts index da09401ea..d90d76e70 100644 --- a/js/hang/src/watch/audio/source.ts +++ b/js/hang/src/watch/audio/source.ts @@ -99,9 +99,9 @@ export class Source { const config = effect.get(this.config); if (!config) return; - // Don't create worklet for MSE (fmp4) - browser handles playback directly + // Don't create worklet for MSE (cmaf) - browser handles playback directly // The worklet is only needed for WebCodecs path - if (config.container === "fmp4") { + if (config.container === "cmaf") { return; } @@ -183,8 +183,8 @@ export class Source { return; } - // Route to MSE for CMAF, WebCodecs for legacy/raw - if (config.container === "fmp4") { + // Route to MSE for CMAF, WebCodecs for native/raw + if (config.container === "cmaf") { this.#runMSEPath(effect, broadcast, active, config, catalog); } else { this.#runWebCodecsPath(effect, broadcast, active, config, catalog); @@ -253,7 +253,7 @@ export class Source { effect.cleanup(() => sub.close()); // Create consumer with slightly less latency than the render worklet to avoid underflowing. - // Container defaults to "legacy" via Zod schema for backward compatibility + // Container defaults to "native" via Zod schema for backward compatibility const consumer = new Frame.Consumer(sub, { latency: Math.max(this.latency.peek() - JITTER_UNDERHEAD, 0) as Time.Milli, container: config.container, diff --git a/js/hang/src/watch/video/source-mse.ts b/js/hang/src/watch/video/source-mse.ts index 1ad282117..8f5e6d02b 100644 --- a/js/hang/src/watch/video/source-mse.ts +++ b/js/hang/src/watch/video/source-mse.ts @@ -80,8 +80,7 @@ export class SourceMSE { this.#video.muted = true; // Required for autoplay document.body.appendChild(this.#video); - this.#video.addEventListener("waiting", () => { - }); + this.#video.addEventListener("waiting", () => {}); this.#video.addEventListener("ended", () => { if (!this.#video) return; @@ -319,7 +318,7 @@ export class SourceMSE { const consumer = new Frame.Consumer(sub, { latency: this.latency, - container: "fmp4", // CMAF fragments + container: "cmaf", // CMAF fragments }); effect.cleanup(() => consumer.close()); diff --git a/js/hang/src/watch/video/source.ts b/js/hang/src/watch/video/source.ts index c5dfd3bb6..812bd6e87 100644 --- a/js/hang/src/watch/video/source.ts +++ b/js/hang/src/watch/video/source.ts @@ -202,8 +202,8 @@ export class Source { } #runTrack(effect: Effect, broadcast: Moq.Broadcast, name: string, config: RequiredDecoderConfig): void { - // Route to MSE for CMAF, WebCodecs for legacy/raw - if (config.container === "fmp4") { + // Route to MSE for CMAF, WebCodecs for native/raw + if (config.container === "cmaf") { this.#runMSEPath(effect, broadcast, name, config); } else { this.#runWebCodecsPath(effect, broadcast, name, config); @@ -279,7 +279,7 @@ export class Source { effect.cleanup(() => sub.close()); // Create consumer that reorders groups/frames up to the provided latency. - // Container defaults to "legacy" via Zod schema for backward compatibility + // Container defaults to "native" via Zod schema for backward compatibility const consumer = new Frame.Consumer(sub, { latency: this.latency, container: config.container, diff --git a/rs/hang/examples/video.rs b/rs/hang/examples/video.rs index 9418960da..64f88135b 100644 --- a/rs/hang/examples/video.rs +++ b/rs/hang/examples/video.rs @@ -70,7 +70,7 @@ fn create_track(broadcast: &mut moq_lite::BroadcastProducer) -> hang::TrackProdu display_ratio_width: None, display_ratio_height: None, optimize_for_latency: None, - container: hang::catalog::Container::Legacy, + container: hang::catalog::Container::Native, }; // Create a map of video renditions diff --git a/rs/hang/src/catalog/audio/mod.rs b/rs/hang/src/catalog/audio/mod.rs index 1869ebb56..e7e7d79ba 100644 --- a/rs/hang/src/catalog/audio/mod.rs +++ b/rs/hang/src/catalog/audio/mod.rs @@ -64,6 +64,6 @@ pub struct AudioConfig { pub description: Option, /// Container format for frame encoding. - /// Defaults to "legacy" for backward compatibility. + /// Defaults to "native" for backward compatibility. pub container: Container, } diff --git a/rs/hang/src/catalog/container.rs b/rs/hang/src/catalog/container.rs index f9a1e6805..cecee5d57 100644 --- a/rs/hang/src/catalog/container.rs +++ b/rs/hang/src/catalog/container.rs @@ -2,17 +2,17 @@ use serde::{Deserialize, Serialize}; /// Container format for frame timestamp encoding and frame payload structure. /// -/// - "legacy": Uses QUIC VarInt encoding (1-8 bytes, variable length), raw frame payloads +/// - "native": Uses QUIC VarInt encoding (1-8 bytes, variable length), raw frame payloads /// - "raw": Uses fixed u64 encoding (8 bytes, big-endian), raw frame payloads -/// - "fmp4": Fragmented MP4 container - frames contain complete moof+mdat fragments +/// - "cmaf": Fragmented MP4 container - frames contain complete moof+mdat fragments #[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Default)] #[serde(rename_all = "camelCase")] pub enum Container { - #[serde(rename = "legacy")] + #[serde(rename = "native")] #[default] - Legacy, + Native, #[serde(rename = "raw")] Raw, - #[serde(rename = "fmp4")] - Fmp4, + #[serde(rename = "cmaf")] + Cmaf, } diff --git a/rs/hang/src/catalog/root.rs b/rs/hang/src/catalog/root.rs index c902d9594..f0256cb88 100644 --- a/rs/hang/src/catalog/root.rs +++ b/rs/hang/src/catalog/root.rs @@ -300,7 +300,7 @@ mod test { "codedHeight": 720, "bitrate": 6000000, "framerate": 30.0, - "container": "legacy" + "container": "native" } }, "priority": 1 @@ -312,7 +312,7 @@ mod test { "sampleRate": 48000, "numberOfChannels": 2, "bitrate": 128000, - "container": "legacy" + "container": "native" } }, "priority": 2 @@ -341,7 +341,7 @@ mod test { bitrate: Some(6_000_000), framerate: Some(30.0), optimize_for_latency: None, - container: Container::Legacy, + container: Container::Native, }, ); @@ -354,7 +354,7 @@ mod test { channel_count: 2, bitrate: Some(128_000), description: None, - container: Container::Legacy, + container: Container::Native, }, ); diff --git a/rs/hang/src/catalog/video/mod.rs b/rs/hang/src/catalog/video/mod.rs index af6c1a627..ada42c950 100644 --- a/rs/hang/src/catalog/video/mod.rs +++ b/rs/hang/src/catalog/video/mod.rs @@ -113,6 +113,6 @@ pub struct VideoConfig { pub optimize_for_latency: Option, /// Container format for frame encoding. - /// Defaults to "legacy" for backward compatibility. + /// Defaults to "native" for backward compatibility. pub container: Container, } diff --git a/rs/hang/src/import/aac.rs b/rs/hang/src/import/aac.rs index 5997ee658..38d28508e 100644 --- a/rs/hang/src/import/aac.rs +++ b/rs/hang/src/import/aac.rs @@ -107,7 +107,7 @@ impl Aac { channel_count, bitrate: None, description: None, - container: hang::catalog::Container::Legacy, + container: hang::catalog::Container::Native, }; tracing::debug!(name = ?track.name, ?config, "starting track"); diff --git a/rs/hang/src/import/avc3.rs b/rs/hang/src/import/avc3.rs index 5e934a48c..dea2555a9 100644 --- a/rs/hang/src/import/avc3.rs +++ b/rs/hang/src/import/avc3.rs @@ -62,7 +62,7 @@ impl Avc3 { display_ratio_width: None, display_ratio_height: None, optimize_for_latency: None, - container: hang::catalog::Container::Legacy, + container: hang::catalog::Container::Native, }; if let Some(old) = &self.config { diff --git a/rs/hang/src/import/fmp4.rs b/rs/hang/src/import/fmp4.rs index 514a08484..42ca1948a 100644 --- a/rs/hang/src/import/fmp4.rs +++ b/rs/hang/src/import/fmp4.rs @@ -374,9 +374,9 @@ impl Fmp4 { display_ratio_height: None, optimize_for_latency: None, container: if passthrough_mode { - Container::Fmp4 + Container::Cmaf } else { - Container::Legacy + Container::Native }, } } @@ -394,9 +394,9 @@ impl Fmp4 { display_ratio_height: None, optimize_for_latency: None, container: if passthrough_mode { - Container::Fmp4 + Container::Cmaf } else { - Container::Legacy + Container::Native }, }, mp4_atom::Codec::Vp09(vp09) => { @@ -425,9 +425,9 @@ impl Fmp4 { bitrate: None, framerate: None, container: if passthrough_mode { - Container::Fmp4 + Container::Cmaf } else { - Container::Legacy + Container::Native }, } } @@ -462,9 +462,9 @@ impl Fmp4 { bitrate: None, framerate: None, container: if passthrough_mode { - Container::Fmp4 + Container::Cmaf } else { - Container::Legacy + Container::Native }, } } @@ -506,9 +506,9 @@ impl Fmp4 { display_ratio_height: None, optimize_for_latency: None, container: if passthrough_mode { - Container::Fmp4 + Container::Cmaf } else { - Container::Legacy + Container::Native }, }) } @@ -543,9 +543,9 @@ impl Fmp4 { bitrate: Some(bitrate.into()), description: None, // TODO? container: if passthrough_mode { - Container::Fmp4 + Container::Cmaf } else { - Container::Legacy + Container::Native }, } } @@ -557,9 +557,9 @@ impl Fmp4 { bitrate: None, description: None, // TODO? container: if passthrough_mode { - Container::Fmp4 + Container::Cmaf } else { - Container::Legacy + Container::Native }, } } diff --git a/rs/hang/src/import/hev1.rs b/rs/hang/src/import/hev1.rs index 70d3d329c..0e01c7ffd 100644 --- a/rs/hang/src/import/hev1.rs +++ b/rs/hang/src/import/hev1.rs @@ -62,7 +62,7 @@ impl Hev1 { display_ratio_width: vui_data.display_ratio_width, display_ratio_height: vui_data.display_ratio_height, optimize_for_latency: None, - container: hang::catalog::Container::Legacy, + container: hang::catalog::Container::Native, }; if let Some(old) = &self.config { diff --git a/rs/hang/src/import/opus.rs b/rs/hang/src/import/opus.rs index 4f935cc67..ac46ba9b2 100644 --- a/rs/hang/src/import/opus.rs +++ b/rs/hang/src/import/opus.rs @@ -53,7 +53,7 @@ impl Opus { channel_count, bitrate: None, description: None, - container: hang::catalog::Container::Legacy, + container: hang::catalog::Container::Native, }; tracing::debug!(name = ?track.name, ?config, "starting track"); From 3b20c4398a54032d9f0d6a44455aaff0f992a8be Mon Sep 17 00:00:00 2001 From: Juan Pablo Bustamante Date: Fri, 9 Jan 2026 15:39:21 -0300 Subject: [PATCH 13/16] optiona setAudioSync --- js/hang/src/watch/broadcast.ts | 3 +-- js/hang/src/watch/video/source-mse.ts | 1 - js/hang/src/watch/video/source.ts | 6 ++++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/js/hang/src/watch/broadcast.ts b/js/hang/src/watch/broadcast.ts index 7caf4ccb7..fa7f7063e 100644 --- a/js/hang/src/watch/broadcast.ts +++ b/js/hang/src/watch/broadcast.ts @@ -68,8 +68,7 @@ export class Broadcast { // Connect audio element to video source for synchronization this.signals.effect((eff) => { const audioElement = eff.get(this.audio.mseAudioElement); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (this.video as any).setAudioSync?.(audioElement); + this.video.setAudioSync?.(audioElement); }); this.location = new Location.Root(this.#broadcast, this.#catalog, props?.location); diff --git a/js/hang/src/watch/video/source-mse.ts b/js/hang/src/watch/video/source-mse.ts index 8f5e6d02b..eedd5931e 100644 --- a/js/hang/src/watch/video/source-mse.ts +++ b/js/hang/src/watch/video/source-mse.ts @@ -81,7 +81,6 @@ export class SourceMSE { document.body.appendChild(this.#video); this.#video.addEventListener("waiting", () => {}); - this.#video.addEventListener("ended", () => { if (!this.#video) return; const videoBuffered = this.#video.buffered; diff --git a/js/hang/src/watch/video/source.ts b/js/hang/src/watch/video/source.ts index 812bd6e87..45dea5944 100644 --- a/js/hang/src/watch/video/source.ts +++ b/js/hang/src/watch/video/source.ts @@ -99,6 +99,9 @@ export class Source { #signals = new Effect(); + // Optional method set by MSE path for audio synchronization + setAudioSync?(audioElement: HTMLAudioElement | undefined): void; + constructor( broadcast: Signal, catalog: Signal, @@ -251,8 +254,7 @@ export class Source { }); // Expose method to set audio element for synchronization - // eslint-disable-next-line @typescript-eslint/no-explicit-any - (this as any).setAudioSync = (audioElement: HTMLAudioElement | undefined) => { + this.setAudioSync = (audioElement: HTMLAudioElement | undefined) => { mseSource.setAudioSync(audioElement); }; From 8cbbf40d692636e461debd6dba63a5122bc7bc5c Mon Sep 17 00:00:00 2001 From: Juan Pablo Bustamante Date: Thu, 15 Jan 2026 11:22:43 -0300 Subject: [PATCH 14/16] init segmet on catalog - timestamp fix - AV sync on MSE --- js/hang/src/catalog/audio.ts | 6 + js/hang/src/catalog/video.ts | 6 + js/hang/src/container/codec.ts | 11 +- js/hang/src/frame.ts | 10 + js/hang/src/watch/audio/emitter.ts | 2 +- js/hang/src/watch/audio/source-mse.ts | 377 ------ js/hang/src/watch/audio/source.ts | 211 +++- js/hang/src/watch/broadcast.ts | 13 +- js/hang/src/watch/source-mse.ts | 1530 +++++++++++++++++++++++++ js/hang/src/watch/video/source-mse.ts | 540 --------- js/hang/src/watch/video/source.ts | 28 +- rs/hang/examples/video.rs | 1 + rs/hang/src/catalog/audio/mod.rs | 12 +- rs/hang/src/catalog/root.rs | 2 + rs/hang/src/catalog/video/mod.rs | 16 +- rs/hang/src/import/aac.rs | 1 + rs/hang/src/import/avc3.rs | 1 + rs/hang/src/import/fmp4.rs | 202 +++- rs/hang/src/import/hev1.rs | 1 + rs/hang/src/import/opus.rs | 1 + rs/hang/src/model/track.rs | 17 +- 21 files changed, 1952 insertions(+), 1036 deletions(-) delete mode 100644 js/hang/src/watch/audio/source-mse.ts create mode 100644 js/hang/src/watch/source-mse.ts delete mode 100644 js/hang/src/watch/video/source-mse.ts diff --git a/js/hang/src/catalog/audio.ts b/js/hang/src/catalog/audio.ts index 5dfa9cff6..dee81c3eb 100644 --- a/js/hang/src/catalog/audio.ts +++ b/js/hang/src/catalog/audio.ts @@ -32,6 +32,12 @@ export const AudioConfigSchema = z.object({ // The bitrate of the audio in bits per second // TODO: Support up to Number.MAX_SAFE_INTEGER bitrate: u53Schema.optional(), + + // Init segment (ftyp+moov) for CMAF/fMP4 containers. + // This is the initialization segment needed for MSE playback. + // Stored as base64-encoded bytes. If not provided, init segments + // will be sent over the data track (legacy behavior). + initSegment: z.string().optional(), // base64-encoded }); export const AudioSchema = z diff --git a/js/hang/src/catalog/video.ts b/js/hang/src/catalog/video.ts index 3dc5ae4e9..fbb460de5 100644 --- a/js/hang/src/catalog/video.ts +++ b/js/hang/src/catalog/video.ts @@ -43,6 +43,12 @@ export const VideoConfigSchema = z.object({ // If true, the decoder will optimize for latency. // Default: true optimizeForLatency: z.boolean().optional(), + + // Init segment (ftyp+moov) for CMAF/fMP4 containers. + // This is the initialization segment needed for MSE playback. + // Stored as base64-encoded bytes. If not provided, init segments + // will be sent over the data track (legacy behavior). + initSegment: z.string().optional(), // base64-encoded }); // Mirrors VideoDecoderConfig diff --git a/js/hang/src/container/codec.ts b/js/hang/src/container/codec.ts index 5a049e571..d6196a73f 100644 --- a/js/hang/src/container/codec.ts +++ b/js/hang/src/container/codec.ts @@ -16,8 +16,8 @@ export function encodeTimestamp(timestamp: Time.Micro, container: Catalog.Contai case "raw": return encodeU64(timestamp); case "cmaf": { - // For CMAF fragments, use raw encoding (8 bytes) for timestamp header - return encodeU64(timestamp); + // CMAF fragments contain timestamps in moof atoms, no header needed + return new Uint8Array(0); } } } @@ -43,12 +43,7 @@ export function decodeTimestamp( return [value as Time.Micro, remaining]; } case "cmaf": { - // For CMAF fragments, timestamp is in the moof atom, but we still need to decode - // the header to get to the fragment. The server uses VarInt encoding (same as native) - // for the timestamp header, so we use VarInt decoding here. - // The actual media timestamp will be extracted by MSE from the moof. - const [value, remaining] = decodeVarInt(buffer); - return [value as Time.Micro, remaining]; + return [0 as Time.Micro, buffer]; } } } diff --git a/js/hang/src/frame.ts b/js/hang/src/frame.ts index 84904e5f5..945d872d4 100644 --- a/js/hang/src/frame.ts +++ b/js/hang/src/frame.ts @@ -20,6 +20,16 @@ export function encode(source: Uint8Array | Source, timestamp: Time.Micro, conta // Encode timestamp using the specified container format const timestampBytes = Container.encodeTimestamp(timestamp, container); + // For CMAF, timestampBytes will be empty, so we just return the source + if (container === "cmaf") { + if (source instanceof Uint8Array) { + return source; + } + const data = new Uint8Array(source.byteLength); + source.copyTo(data); + return data; + } + // Allocate buffer for timestamp + payload const payloadSize = source instanceof Uint8Array ? source.byteLength : source.byteLength; const data = new Uint8Array(timestampBytes.byteLength + payloadSize); diff --git a/js/hang/src/watch/audio/emitter.ts b/js/hang/src/watch/audio/emitter.ts index 6a1b91a4e..a859ce8c3 100644 --- a/js/hang/src/watch/audio/emitter.ts +++ b/js/hang/src/watch/audio/emitter.ts @@ -48,7 +48,7 @@ export class Emitter { this.#signals.effect((effect) => { const paused = effect.get(this.paused); const muted = effect.get(this.muted); - const enabled = !paused && !muted; + const enabled = !paused; this.source.enabled.set(enabled); }); diff --git a/js/hang/src/watch/audio/source-mse.ts b/js/hang/src/watch/audio/source-mse.ts deleted file mode 100644 index f34dc3853..000000000 --- a/js/hang/src/watch/audio/source-mse.ts +++ /dev/null @@ -1,377 +0,0 @@ -import type * as Moq from "@moq/lite"; -import { Effect, type Getter, Signal } from "@moq/signals"; -import type * as Catalog from "../../catalog"; -import * as Frame from "../../frame"; -import type * as Time from "../../time"; -import * as Mime from "../../util/mime"; - -export interface AudioStats { - bytesReceived: number; -} - -/** - * MSE-based audio source for CMAF/fMP4 fragments. - * Uses Media Source Extensions to handle complete moof+mdat fragments. - * The browser handles decoding and playback directly from the HTMLAudioElement. - */ -export class SourceMSE { - #audio?: HTMLAudioElement; - #mediaSource?: MediaSource; - #sourceBuffer?: SourceBuffer; - - // Signal to expose audio element for volume/mute control - #audioElement = new Signal(undefined); - readonly audioElement = this.#audioElement as Getter; - - #appendQueue: Uint8Array[] = []; - static readonly MAX_QUEUE_SIZE = 10; - - #stats = new Signal(undefined); - readonly stats = this.#stats; - - readonly latency: Signal; - - #signals = new Effect(); - - constructor(latency: Signal) { - this.latency = latency; - } - - async initialize(config: Catalog.AudioConfig): Promise { - const mimeType = Mime.buildAudioMimeType(config); - if (!mimeType) { - throw new Error(`Unsupported codec for MSE: ${config.codec}`); - } - - this.#audio = document.createElement("audio"); - this.#audio.style.display = "none"; - this.#audio.muted = false; // Allow audio playback - this.#audio.volume = 1.0; // Set initial volume to 1.0 - document.body.appendChild(this.#audio); - - this.#audioElement.set(this.#audio); - - this.#mediaSource = new MediaSource(); - const url = URL.createObjectURL(this.#mediaSource); - this.#audio.src = url; - this.#audio.currentTime = 0; - - await new Promise((resolve, reject) => { - const timeout = setTimeout(() => { - reject(new Error("MediaSource sourceopen timeout")); - }, 5000); - - this.#mediaSource?.addEventListener( - "sourceopen", - () => { - clearTimeout(timeout); - try { - this.#sourceBuffer = this.#mediaSource?.addSourceBuffer(mimeType); - if (!this.#sourceBuffer) { - reject(new Error("Failed to create SourceBuffer")); - return; - } - this.#setupSourceBuffer(); - resolve(); - } catch (error) { - reject(error); - } - }, - { once: true }, - ); - - this.#mediaSource?.addEventListener("error", (e) => { - clearTimeout(timeout); - reject(new Error(`MediaSource error: ${e}`)); - }); - }); - } - - #setupSourceBuffer(): void { - if (!this.#sourceBuffer) return; - - this.#sourceBuffer.addEventListener("updateend", () => { - this.#processAppendQueue(); - }); - - this.#sourceBuffer.addEventListener("error", (e) => { - console.error("SourceBuffer error:", e); - }); - } - - async appendFragment(fragment: Uint8Array): Promise { - if (!this.#sourceBuffer || !this.#mediaSource) { - throw new Error("SourceBuffer not initialized"); - } - - // Don't queue fragments if MediaSource is closed - if (this.#mediaSource.readyState === "closed") { - return; - } - - if (this.#appendQueue.length >= SourceMSE.MAX_QUEUE_SIZE) { - const discarded = this.#appendQueue.shift(); - console.warn( - `[MSE Audio] Queue full (${SourceMSE.MAX_QUEUE_SIZE}), discarding oldest fragment (${discarded?.byteLength ?? 0} bytes)`, - ); - } - - const copy = new Uint8Array(fragment); - this.#appendQueue.push(copy); - - this.#processAppendQueue(); - } - - #concatenateFragments(fragments: Uint8Array[]): Uint8Array { - if (fragments.length === 1) { - return fragments[0]; - } - - const totalSize = fragments.reduce((sum, frag) => sum + frag.byteLength, 0); - const result = new Uint8Array(totalSize); - let offset = 0; - for (const fragment of fragments) { - result.set(fragment, offset); - offset += fragment.byteLength; - } - - return result; - } - - #processAppendQueue(): void { - if (!this.#sourceBuffer || this.#sourceBuffer.updating || this.#appendQueue.length === 0) { - return; - } - - if (this.#mediaSource?.readyState !== "open") { - return; - } - - const fragment = this.#appendQueue.shift(); - if (!fragment) return; - - try { - // appendBuffer accepts BufferSource (ArrayBuffer or ArrayBufferView) - this.#sourceBuffer.appendBuffer(fragment as BufferSource); - - this.#stats.update((current) => ({ - bytesReceived: (current?.bytesReceived ?? 0) + fragment.byteLength, - })); - } catch (error) { - console.error("[MSE Audio] Error appending fragment:", error); - } - } - - async runTrack( - effect: Effect, - broadcast: Moq.Broadcast, - name: string, - config: Catalog.AudioConfig, - catalog: Catalog.Audio, - ): Promise { - await this.initialize(config); - - const sub = broadcast.subscribe(name, catalog.priority); - effect.cleanup(() => sub.close()); - - const consumer = new Frame.Consumer(sub, { - latency: this.latency, - container: "cmaf", // CMAF fragments - }); - effect.cleanup(() => consumer.close()); - - effect.spawn(async () => { - if (!this.#audio) return; - - await new Promise((resolve) => { - let checkCount = 0; - const maxChecks = 100; // 10 seconds max wait - - let hasSeeked = false; - const checkReady = () => { - checkCount++; - if (this.#audio && this.#sourceBuffer) { - const audioBuffered = this.#audio.buffered; - const hasBufferedData = this.#sourceBuffer.buffered.length > 0; - - if (hasBufferedData && audioBuffered && audioBuffered.length > 0 && !hasSeeked) { - const currentTime = this.#audio.currentTime; - let isTimeBuffered = false; - for (let i = 0; i < audioBuffered.length; i++) { - if (audioBuffered.start(i) <= currentTime && currentTime < audioBuffered.end(i)) { - isTimeBuffered = true; - break; - } - } - if (!isTimeBuffered) { - const seekTime = audioBuffered.start(0); - this.#audio.currentTime = seekTime; - hasSeeked = true; - setTimeout(checkReady, 100); - return; - } - } - - // Try to play if we have buffered data, even if readyState is low - // The browser will start playing when it's ready - if (hasBufferedData && this.#audio.readyState >= HTMLMediaElement.HAVE_METADATA) { - this.#audio - .play() - .then(() => { - resolve(); - }) - .catch((error) => { - console.error("[MSE Audio] Audio play() failed (initial):", error); - if (checkCount < maxChecks) { - setTimeout(checkReady, 200); - } else { - resolve(); - } - }); - } else if (checkCount >= maxChecks) { - resolve(); - } else { - setTimeout(checkReady, 100); - } - } else if (checkCount >= maxChecks) { - resolve(); - } else { - setTimeout(checkReady, 100); - } - }; - checkReady(); - }); - }); - - let initSegmentReceived = false; - - // Helper function to detect moov atom in the buffer - // This searches for "moov" atom at any position, not just at the start - // The init segment may have other atoms before "moov" (e.g., "ftyp") - function hasMoovAtom(data: Uint8Array): boolean { - let offset = 0; - const len = data.length; - - while (offset + 8 <= len) { - // Atom size (big endian) - const size = - (data[offset] << 24) | (data[offset + 1] << 16) | (data[offset + 2] << 8) | data[offset + 3]; - - const type = String.fromCharCode( - data[offset + 4], - data[offset + 5], - data[offset + 6], - data[offset + 7], - ); - - if (type === "moov") return true; - - // Avoid infinite loops if size is broken - if (size < 8) break; - offset += size; - } - - return false; - } - - // Read fragments and append to SourceBuffer - // We group fragments by MOQ group before appending - effect.spawn(async () => { - let currentGroup: number | undefined; - let groupFragments: Uint8Array[] = []; // Accumulate fragments for current group - - for (;;) { - const frame = await Promise.race([consumer.decode(), effect.cancel]); - if (!frame) { - if (groupFragments.length > 0 && initSegmentReceived && this.#mediaSource?.readyState === "open") { - const groupData = this.#concatenateFragments(groupFragments); - await this.appendFragment(groupData); - groupFragments = []; - } - break; - } - - // Stop processing if MediaSource is closed - if (this.#mediaSource?.readyState === "closed") { - break; - } - - const isMoovAtom = hasMoovAtom(frame.data); - const isInitSegment = isMoovAtom && !initSegmentReceived; - - if (isInitSegment) { - if (groupFragments.length > 0 && initSegmentReceived && this.#mediaSource?.readyState === "open") { - const groupData = this.#concatenateFragments(groupFragments); - await this.appendFragment(groupData); - groupFragments = []; - } - - await this.appendFragment(frame.data); - initSegmentReceived = true; - continue; - } - - if (!initSegmentReceived) { - continue; - } - - if (currentGroup !== undefined && frame.group !== currentGroup) { - if (groupFragments.length > 0 && this.#mediaSource?.readyState === "open") { - const groupData = this.#concatenateFragments(groupFragments); - await this.appendFragment(groupData); - groupFragments = []; - } - } - - if (currentGroup === undefined || frame.group !== currentGroup) { - currentGroup = frame.group; - groupFragments = []; - } - - groupFragments.push(frame.data); - - // Append immediately for low latency audio sync - if (groupFragments.length >= 1 && this.#mediaSource?.readyState === "open") { - const groupData = this.#concatenateFragments(groupFragments); - await this.appendFragment(groupData); - groupFragments = []; - } - } - }); - } - - close(): void { - this.#appendQueue = []; - - this.#audioElement.set(undefined); - - if (this.#sourceBuffer && this.#mediaSource) { - try { - if (this.#sourceBuffer.updating) { - this.#sourceBuffer.abort(); - } - } catch (error) { - console.error("Error closing SourceBuffer:", error); - } - } - - if (this.#mediaSource) { - try { - if (this.#mediaSource.readyState === "open") { - this.#mediaSource.endOfStream(); - } - URL.revokeObjectURL(this.#audio?.src || ""); - } catch (error) { - console.error("Error closing MediaSource:", error); - } - } - - if (this.#audio) { - this.#audio.pause(); - this.#audio.src = ""; - this.#audio.remove(); - } - - this.#signals.close(); - } -} diff --git a/js/hang/src/watch/audio/source.ts b/js/hang/src/watch/audio/source.ts index d90d76e70..9779d0e9d 100644 --- a/js/hang/src/watch/audio/source.ts +++ b/js/hang/src/watch/audio/source.ts @@ -6,6 +6,7 @@ import type * as Time from "../../time"; import * as Hex from "../../util/hex"; import * as libav from "../../util/libav"; import type * as Render from "./render"; +import * as Video from "../video"; // We want some extra overhead to avoid starving the render worklet. // The default Opus frame duration is 20ms. @@ -62,6 +63,9 @@ export class Source { #signals = new Effect(); + // Reference to video source for coordination + video?: Video.Source; + constructor( broadcast: Getter, catalog: Getter, @@ -159,7 +163,20 @@ export class Source { #runDecoder(effect: Effect): void { const enabled = effect.get(this.enabled); - if (!enabled) { + const config = effect.get(this.config); + + // For CMAF, we need to add the SourceBuffer even if audio is disabled + // This ensures the MediaSource has both SourceBuffers before video starts appending + // We'll just not append audio data if disabled + if (config?.container === "cmaf") { + // Always initialize MSE for CMAF, even if disabled + // The SourceBuffer needs to be added before video starts appending + } else if (!enabled) { + // For non-CMAF, if disabled, don't initialize + return; + } + + if (!enabled && config?.container !== "cmaf") { return; } @@ -173,7 +190,6 @@ export class Source { return; } - const config = effect.get(this.config); if (!config) { return; } @@ -184,10 +200,20 @@ export class Source { } // Route to MSE for CMAF, WebCodecs for native/raw + // For CMAF, ALWAYS initialize MSE (even if disabled) to add SourceBuffer + // This ensures MediaSource has both SourceBuffers before video starts appending + // The SourceBuffer will be added, but fragments won't be appended if disabled + console.log(`[Audio Source] Routing audio: container=${config.container}, enabled=${enabled}`); if (config.container === "cmaf") { + // Always initialize for CMAF - SourceBuffer must be added before video starts + console.log("[Audio Source] Using MSE path for CMAF"); this.#runMSEPath(effect, broadcast, active, config, catalog); } else { - this.#runWebCodecsPath(effect, broadcast, active, config, catalog); + // For non-CMAF, only run if enabled + console.log(`[Audio Source] Using WebCodecs path (container=${config.container})`); + if (enabled) { + this.#runWebCodecsPath(effect, broadcast, active, config, catalog); + } } } @@ -198,39 +224,168 @@ export class Source { config: Catalog.AudioConfig, catalog: Catalog.Audio, ): void { - console.log("[Audio Stream] Subscribing to track", { - name, - codec: config.codec, - container: config.container, - sampleRate: config.sampleRate, - channels: config.numberOfChannels, - }); - // Import MSE source dynamically + // Use the unified SourceMSE from video - it manages both video and audio SourceBuffers + // Use a reactive effect to always get the latest SourceMSE instance effect.spawn(async () => { - const { SourceMSE } = await import("./source-mse.js"); - const mseSource = new SourceMSE(this.latency); - effect.cleanup(() => mseSource.close()); + // Wait for video's MSE source to be available + // Video creates it asynchronously, and may recreate it when restarting + // So we need to get it reactively each time + let videoMseSource: any; + if (this.video?.mseSource) { + // Wait up to 2 seconds for video MSE source to be available + const maxWait = 2000; + const startTime = Date.now(); + while (!videoMseSource && (Date.now() - startTime) < maxWait) { + videoMseSource = effect.get(this.video.mseSource); + if (!videoMseSource) { + await new Promise(resolve => setTimeout(resolve, 50)); // Check more frequently + } + } + } + + if (!videoMseSource) { + console.error("[Audio Source] Video MSE source not available, falling back to WebCodecs"); + this.#runWebCodecsPath(effect, broadcast, name, config, catalog); + return; + } - // Expose HTMLAudioElement for Emitter to control volume/mute - // Use effect to reactively get the audio element when it's ready + // For MSE path, audio plays through the video element + // Expose video element as "audioElement" for compatibility with emitter + // Use reactive effect to always get the latest video element this.#signals.effect((eff) => { - const audioElement = eff.get(mseSource.audioElement); - eff.set(this.#mseAudioElement, audioElement); + // Get latest SourceMSE instance in case video restarted + const latestMseSource = this.video?.mseSource ? eff.get(this.video.mseSource) : undefined; + const mseSource = latestMseSource || videoMseSource; + const videoElement = mseSource?.videoElement ? eff.get(mseSource.videoElement) : undefined; + // Expose as audioElement for emitter compatibility (HTMLVideoElement works the same as HTMLAudioElement for volume/mute) + eff.set(this.#mseAudioElement, videoElement as HTMLAudioElement | undefined); }); - // Forward stats + // Forward stats (audio stats are not currently tracked in unified SourceMSE, but we can add them later) + // For now, just set empty stats this.#signals.effect((eff) => { - const stats = eff.get(mseSource.stats); - eff.set(this.#stats, stats); + eff.set(this.#stats, { bytesReceived: 0 }); }); - // Run MSE track - no worklet needed, browser handles everything - try { - await mseSource.runTrack(effect, broadcast, name, config, catalog); - } catch (error) { - console.error("MSE path error, falling back to WebCodecs:", error); - // Fallback to WebCodecs - this.#runWebCodecsPath(effect, broadcast, name, config, catalog); + // Check if audio is enabled + const isEnabled = effect.get(this.enabled); + + // Only subscribe to track and initialize SourceBuffer if enabled + // When disabled, we don't need to do anything - video can play without audio + if (!isEnabled) { + console.log(`[Audio Source] Audio disabled, skipping SourceBuffer initialization and track subscription - video will play without audio`); + return; + } + + // Audio is enabled - subscribe to track and initialize SourceBuffer + // Wait a bit for video to stabilize if it's restarting + // Get the latest SourceMSE instance and verify it's stable + let latestMseSource: any; + let retryCount = 0; + const maxRetries = 3; + + while (retryCount < maxRetries) { + // Get the latest SourceMSE instance (in case video restarted) + latestMseSource = this.video?.mseSource ? effect.get(this.video.mseSource) : videoMseSource; + if (!latestMseSource) { + // Wait a bit for video to create SourceMSE + await new Promise(resolve => setTimeout(resolve, 100)); + retryCount++; + continue; + } + + // Check if MediaSource is ready (not closed) + const mediaSource = latestMseSource.mediaSource ? effect.get(latestMseSource.mediaSource) : undefined; + if (mediaSource && typeof mediaSource === "object" && "readyState" in mediaSource && (mediaSource as MediaSource).readyState === "closed") { + // MediaSource is closed, video might be restarting - wait and retry + console.log("[Audio Source] MediaSource is closed, waiting for video to stabilize"); + await new Promise(resolve => setTimeout(resolve, 200)); + retryCount++; + continue; + } + + // SourceMSE instance looks good, proceed + break; + } + + if (!latestMseSource) { + console.warn("[Audio Source] SourceMSE instance not available after retries, skipping audio"); + return; + } + + console.log("[Audio Stream] Subscribing to track", { + name, + codec: config.codec, + container: config.container, + sampleRate: config.sampleRate, + channels: config.numberOfChannels, + }); + + // Retry a few times for transient MSE states / QuotaExceeded + for (let attempt = 0; attempt < 5; attempt++) { + try { + // Resolve freshest SourceMSE and wait for MediaSource to be open (up to ~5s). + const resolveOpenMediaSource = async (): Promise => { + const start = Date.now(); + let current = latestMseSource; + for (;;) { + // Follow any video restart by re-reading the signal + const candidate = this.video?.mseSource ? effect.get(this.video.mseSource) : current; + if (candidate && candidate !== current) { + console.log("[Audio Source] Video restarted, using new SourceMSE instance"); + current = candidate; + } + + const ms = current?.mediaSource ? effect.get(current.mediaSource) : undefined; + if (ms && typeof ms === "object" && "readyState" in ms && (ms as MediaSource).readyState === "open") { + return current; + } + + if (Date.now() - start > 5000) { + throw new Error("MediaSource not ready for audio SourceBuffer"); + } + await new Promise(resolve => setTimeout(resolve, 50)); + } + }; + + const readyMseSource = await resolveOpenMediaSource(); + latestMseSource = readyMseSource; + + console.log(`[Audio Source] Initializing audio SourceBuffer on unified SourceMSE (attempt ${attempt + 1})`); + await latestMseSource.initializeAudio(config); + + // Verify we're still using the current instance after initialization + const verifyMseSource = this.video?.mseSource ? effect.get(this.video.mseSource) : latestMseSource; + if (verifyMseSource !== latestMseSource) { + // Video restarted during initialization, get new instance and retry + console.log("[Audio Source] Video restarted during initialization, retrying with new instance"); + await verifyMseSource.initializeAudio(config); + latestMseSource = verifyMseSource; + } + + console.log(`[Audio Source] Audio SourceBuffer initialization completed`); + + // Get latest instance again before running track (video might have restarted) + const finalMseSource = this.video?.mseSource ? effect.get(this.video.mseSource) : latestMseSource; + if (!finalMseSource) { + throw new Error("SourceMSE instance not available"); + } + + // Run audio track - use the latest instance + console.log(`[Audio Source] Starting MSE track on unified SourceMSE`); + await finalMseSource.runAudioTrack(effect, broadcast, name, config, catalog, this.enabled); + console.log("[Audio Source] MSE track completed successfully"); + return; // success + } catch (error) { + const retriable = error instanceof DOMException && error.name === "QuotaExceededError"; + if (!retriable || attempt === 4) { + console.warn("[Audio Source] Failed to initialize audio SourceBuffer, video will continue without audio:", error); + return; + } + const delay = 150 + attempt * 150; + console.warn(`[Audio Source] Audio init attempt ${attempt + 1} failed (${(error as Error).message}); retrying in ${delay}ms`); + await new Promise(resolve => setTimeout(resolve, delay)); + } } }); } diff --git a/js/hang/src/watch/broadcast.ts b/js/hang/src/watch/broadcast.ts index fa7f7063e..f8473c347 100644 --- a/js/hang/src/watch/broadcast.ts +++ b/js/hang/src/watch/broadcast.ts @@ -62,14 +62,15 @@ export class Broadcast { this.path = Signal.from(props?.path); this.enabled = Signal.from(props?.enabled ?? false); this.reload = Signal.from(props?.reload ?? true); - this.audio = new Audio.Source(this.#broadcast, this.#catalog, props?.audio); + + // Create video first so audio can use its MediaSource this.video = new Video.Source(this.#broadcast, this.#catalog, props?.video); + + // Create audio and pass video reference for coordination + this.audio = new Audio.Source(this.#broadcast, this.#catalog, props?.audio); + this.audio.video = this.video; // Pass video reference for coordination - // Connect audio element to video source for synchronization - this.signals.effect((eff) => { - const audioElement = eff.get(this.audio.mseAudioElement); - this.video.setAudioSync?.(audioElement); - }); + this.location = new Location.Root(this.#broadcast, this.#catalog, props?.location); this.chat = new Chat(this.#broadcast, this.#catalog, props?.chat); diff --git a/js/hang/src/watch/source-mse.ts b/js/hang/src/watch/source-mse.ts new file mode 100644 index 000000000..bab894632 --- /dev/null +++ b/js/hang/src/watch/source-mse.ts @@ -0,0 +1,1530 @@ +import type * as Moq from "@moq/lite"; +import { Effect, type Getter, Signal } from "@moq/signals"; +import type * as Catalog from "../catalog"; +import * as Frame from "../frame"; +import { PRIORITY } from "../publish/priority"; +import type * as Time from "../time"; +import * as Mime from "../util/mime"; + +// The types in VideoDecoderConfig that cause a hard reload. +type RequiredDecoderConfig = Omit & + Partial>; + +type BufferStatus = { state: "empty" | "filled" }; + +type SyncStatus = { + state: "ready" | "wait"; + bufferDuration?: number; +}; + +export interface VideoStats { + frameCount: number; + timestamp: number; + bytesReceived: number; +} + +/** + * MSE-based video source for CMAF/fMP4 fragments. + * Uses Media Source Extensions to handle complete moof+mdat fragments. + */ +export class SourceMSE { + #video?: HTMLVideoElement; + #mediaSource?: MediaSource; + #videoSourceBuffer?: SourceBuffer; + #audioSourceBuffer?: SourceBuffer; + #audioSourceBufferSetup = false; // Track if audio SourceBuffer has been set up + + readonly mediaSource = new Signal(undefined); + + // Expose video element for audio control (audio plays through video element) + readonly videoElement = new Signal(undefined); + + // Queue of fragments waiting to be added for video + #videoAppendQueue: Uint8Array[] = []; + // Queue of fragments waiting to be added for audio + #audioAppendQueue: Uint8Array[] = []; + static readonly MAX_QUEUE_SIZE = 10; // Maximum fragments in queue + + // Expose the current frame to render as a signal + frame = new Signal(undefined); + + // The target latency in milliseconds. + latency: Signal; + + // The display size of the video in pixels. + display = new Signal<{ width: number; height: number } | undefined>(undefined); + + // Whether to flip the video horizontally. + flip = new Signal(undefined); + + bufferStatus = new Signal({ state: "empty" }); + syncStatus = new Signal({ state: "ready" }); + + #stats = new Signal(undefined); + + #signals = new Effect(); + #frameCallbackId?: number; + + constructor(latency: Signal) { + this.latency = latency; + } + + /** + * Check if any SourceBuffer is updating + */ + #isBufferUpdating(): boolean { + if (!this.#mediaSource) return false; + const buffers = this.#mediaSource.sourceBuffers; + for (let i = 0; i < buffers.length; i++) { + if (buffers[i].updating) { + return true; + } + } + return false; + } + + + async initializeVideo(config: RequiredDecoderConfig): Promise { + const mimeType = Mime.buildVideoMimeType(config); + if (!mimeType) { + throw new Error(`Unsupported codec for MSE: ${config.codec}`); + } + + console.log("[MSE] Initializing video, MIME type:", mimeType); + + // Create video element + this.#video = document.createElement("video"); + this.#video.style.display = "none"; + this.#video.playsInline = true; + this.#video.muted = false; // Don't mute - audio plays through video element + document.body.appendChild(this.#video); + + // Expose video element + this.videoElement.set(this.#video); + + // Create MediaSource + this.#mediaSource = new MediaSource(); + this.mediaSource.set(this.#mediaSource); + console.log("[MSE] Video initialization: MediaSource signal set, state:", this.#mediaSource.readyState); + + // Attach MediaSource to video element + const url = URL.createObjectURL(this.#mediaSource); + this.#video.src = url; + console.log("[MSE] MediaSource created and attached to video element"); + + // Wait for sourceopen event + await new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + reject(new Error("MediaSource sourceopen timeout")); + }, 5000); + + this.#mediaSource?.addEventListener( + "sourceopen", + () => { + clearTimeout(timeout); + console.log("[MSE] MediaSource sourceopen event fired"); + // Update signal to ensure audio sees the open MediaSource + if (this.#mediaSource) { + this.mediaSource.set(this.#mediaSource); + } + try { + this.#videoSourceBuffer = this.#mediaSource?.addSourceBuffer(mimeType); + if (!this.#videoSourceBuffer) { + reject(new Error("Failed to create video SourceBuffer")); + return; + } + console.log("[MSE] Video SourceBuffer created successfully"); + this.#setupVideoSourceBuffer(); + resolve(); + } catch (error) { + console.error("[MSE] Error creating video SourceBuffer:", error); + reject(error); + } + }, + { once: true }, + ); + + this.#mediaSource?.addEventListener("error", (e) => { + clearTimeout(timeout); + console.error("[MSE] MediaSource error event:", e); + reject(new Error(`MediaSource error: ${e}`)); + }, { once: true }); + }); + + console.log("[MSE] Video initialization complete, starting frame capture"); + this.#startFrameCapture(); + } + + async initializeAudio(config: Catalog.AudioConfig): Promise { + // Early return if already initialized + if (this.#audioSourceBuffer && this.#audioSourceBufferSetup) { + console.log("[MSE] Audio SourceBuffer already initialized, skipping"); + return; + } + + const mimeType = Mime.buildAudioMimeType(config); + if (!mimeType) { + throw new Error(`Unsupported codec for MSE: ${config.codec}`); + } + + console.log("[MSE] Initializing audio, MIME type:", mimeType); + + // Get MediaSource from signal (most up-to-date) + // Use a small delay to ensure signal updates have propagated + await new Promise(resolve => setTimeout(resolve, 10)); + let mediaSource = this.mediaSource.peek(); + console.log("[MSE] Audio initialization: MediaSource from signal:", mediaSource ? `readyState=${mediaSource.readyState}` : "not set"); + + // Also check private field as fallback + if (!mediaSource && this.#mediaSource) { + console.log("[MSE] Audio initialization: Using private MediaSource field, state:", this.#mediaSource.readyState); + mediaSource = this.#mediaSource; + } + + // Quick check: if MediaSource is ready, proceed immediately + if (mediaSource && mediaSource.readyState === "open") { + console.log("[MSE] Audio initialization: MediaSource is already open, proceeding"); + this.#mediaSource = mediaSource; + } else { + console.log("[MSE] Audio initialization: MediaSource not ready, waiting..."); + // Wait for MediaSource to be created and open (video initialization is async) + // Use a longer timeout to allow video to restart properly + await new Promise((resolve, reject) => { + const maxWait = 5000; // 5 seconds max wait + const startTime = Date.now(); + const checkInterval = 50; // Check every 50ms for responsiveness + + const timeout = setTimeout(() => { + const waited = ((Date.now() - startTime) / 1000).toFixed(1); + reject(new Error(`MediaSource not ready after ${waited}s (current state: ${mediaSource?.readyState || "not created"})`)); + }, maxWait); + + const checkReady = () => { + // Get latest MediaSource from signal (always get fresh value) + const signalValue = this.mediaSource.peek(); + mediaSource = signalValue; + + // Also check private field if signal is not set + if (!mediaSource && this.#mediaSource) { + mediaSource = this.#mediaSource; + } + + // Check if MediaSource exists and is open + if (mediaSource && mediaSource.readyState === "open") { + clearTimeout(timeout); + this.#mediaSource = mediaSource; + const elapsed = ((Date.now() - startTime) / 1000).toFixed(2); + console.log(`[MSE] Audio initialization: MediaSource is ready (waited ${elapsed}s)`); + resolve(); + return; + } + + // Log progress for debugging (every 0.5 seconds) + const elapsed = Date.now() - startTime; + if (elapsed % 500 < checkInterval) { + const signalState = this.mediaSource.peek()?.readyState || "not set"; + const privateState = this.#mediaSource?.readyState || "not set"; + console.log(`[MSE] Audio initialization: Waiting for MediaSource (${(elapsed / 1000).toFixed(1)}s, signal: ${signalState}, private: ${privateState})`); + } + + // If MediaSource exists but is closed, it's from an old instance - wait for new one + if (mediaSource && mediaSource.readyState === "closed") { + // Reset private field + if (this.#mediaSource === mediaSource) { + this.#mediaSource = undefined; + } + } + + // Continue checking if we haven't exceeded max wait time + if (elapsed < maxWait) { + setTimeout(checkReady, checkInterval); + } else { + clearTimeout(timeout); + const waited = (elapsed / 1000).toFixed(1); + const finalSignalState = this.mediaSource.peek()?.readyState || "not set"; + const finalPrivateState = this.#mediaSource?.readyState || "not set"; + reject(new Error(`MediaSource not ready after ${waited}s (signal: ${finalSignalState}, private: ${finalPrivateState})`)); + } + }; + + checkReady(); + }); + } + + // Final check - ensure we have a MediaSource + mediaSource = this.mediaSource.peek() || this.#mediaSource; + if (!mediaSource || mediaSource.readyState !== "open") { + throw new Error(`MediaSource not ready (state: ${mediaSource?.readyState || "not created"})`); + } + + // Update private field + this.#mediaSource = mediaSource; + + // Check if MediaSource already has an audio SourceBuffer + // (could be added by a previous call to initializeAudio) + if (this.#mediaSource.sourceBuffers.length >= 2) { + const sourceBuffers = Array.from(this.#mediaSource.sourceBuffers); + + // If we already have an audio SourceBuffer set, use it + if (this.#audioSourceBuffer && sourceBuffers.includes(this.#audioSourceBuffer)) { + return; // Already have it + } + + // If we have exactly 2 SourceBuffers and one is video, the other must be audio + if (sourceBuffers.length === 2 && this.#videoSourceBuffer) { + const otherBuffer = sourceBuffers.find( + (sb) => sb !== this.#videoSourceBuffer + ); + if (otherBuffer) { + // This must be the audio SourceBuffer + this.#audioSourceBuffer = otherBuffer; + if (!this.#audioSourceBufferSetup) { + this.#setupAudioSourceBuffer(); + } + return; + } + } + + // Fallback: If we have 2 SourceBuffers but don't know which is video + // Assume the second one is audio (video is usually added first) + if (sourceBuffers.length === 2 && !this.#videoSourceBuffer) { + console.log("[MSE] Video SourceBuffer not set yet, using fallback: assuming second SourceBuffer is audio"); + this.#audioSourceBuffer = sourceBuffers[1]; + if (!this.#audioSourceBufferSetup) { + this.#setupAudioSourceBuffer(); + } + return; + } + + // MediaSource has 2 SourceBuffers but we can't identify which is audio + // This shouldn't happen, but handle gracefully + throw new Error("MediaSource already has maximum SourceBuffers and cannot identify audio SourceBuffer"); + } + + // Double-check audio SourceBuffer wasn't set while we were waiting + if (this.#audioSourceBuffer) { + return; + } + + // Wait for video SourceBuffer to finish if updating + if (this.#videoSourceBuffer?.updating) { + console.log("[MSE] Waiting for video SourceBuffer to finish updating before adding audio"); + await new Promise((resolve) => { + this.#videoSourceBuffer!.addEventListener("updateend", () => { + console.log("[MSE] Video SourceBuffer finished updating"); + resolve(); + }, { once: true }); + }); + } + + // Final check before adding + if (this.#audioSourceBuffer) { + return; + } + + // Check again if MediaSource now has 2 SourceBuffers (race condition) + if (this.#mediaSource.sourceBuffers.length >= 2) { + const sourceBuffers = Array.from(this.#mediaSource.sourceBuffers); + + // If we already have audio SourceBuffer set, use it + if (this.#audioSourceBuffer && sourceBuffers.includes(this.#audioSourceBuffer)) { + return; + } + + // If we have exactly 2 and one is video, use the other + if (sourceBuffers.length === 2 && this.#videoSourceBuffer) { + const otherBuffer = sourceBuffers.find( + (sb) => sb !== this.#videoSourceBuffer + ); + if (otherBuffer) { + this.#audioSourceBuffer = otherBuffer; + if (!this.#audioSourceBufferSetup) { + this.#setupAudioSourceBuffer(); + } + return; + } + } + + // Fallback: If we have 2 SourceBuffers but don't know which is video + if (sourceBuffers.length === 2 && !this.#videoSourceBuffer) { + console.log("[MSE] Race condition: Video SourceBuffer not set yet, using fallback"); + this.#audioSourceBuffer = sourceBuffers[1]; // Assume second is audio + if (!this.#audioSourceBufferSetup) { + this.#setupAudioSourceBuffer(); + } + return; + } + + throw new Error("MediaSource already has maximum SourceBuffers and cannot identify audio SourceBuffer"); + } + + // Final check before adding - verify MediaSource is still open + if (this.#mediaSource.readyState !== "open") { + throw new Error(`MediaSource readyState changed to "${this.#mediaSource.readyState}" before adding audio SourceBuffer`); + } + + // Ensure we're using the MediaSource from signal (most up-to-date) + mediaSource = this.mediaSource.peek() || this.#mediaSource; + if (!mediaSource) { + throw new Error("MediaSource is not available"); + } + + // Update private field to match signal + this.#mediaSource = mediaSource; + + // Wait for video SourceBuffer to finish updating before adding audio SourceBuffer + // Only wait if it's actually updating (should be rare) + if (this.#videoSourceBuffer?.updating) { + console.log("[MSE] Video SourceBuffer is updating, waiting briefly"); + await new Promise((resolve) => { + const timeout = setTimeout(() => { + // Don't wait too long - proceed anyway + console.log("[MSE] Video SourceBuffer update timeout, proceeding"); + resolve(); + }, 500); // Only wait 500ms max + + this.#videoSourceBuffer!.addEventListener("updateend", () => { + clearTimeout(timeout); + resolve(); + }, { once: true }); + }); + } + + // Log state before adding + const sourceBuffers = Array.from(mediaSource.sourceBuffers); + console.log("[MSE] About to add audio SourceBuffer", { + audioMimeType: mimeType, + sourceBufferCount: sourceBuffers.length, + videoSourceBufferUpdating: this.#videoSourceBuffer?.updating, + readyState: mediaSource.readyState, + isAudioMimeTypeSupported: MediaSource.isTypeSupported(mimeType), + }); + + // Double-check MIME type is supported + if (!MediaSource.isTypeSupported(mimeType)) { + throw new Error(`Audio MIME type not supported: ${mimeType}`); + } + + // Some browsers have quirks - try to add SourceBuffer and handle errors gracefully + try { + // Check if we can actually add another SourceBuffer + // Some browsers might report 1 SourceBuffer but actually be at limit + if (sourceBuffers.length >= 2) { + console.warn("[MSE] MediaSource already has 2 SourceBuffers, cannot add audio"); + throw new Error("MediaSource already has maximum SourceBuffers"); + } + + this.#audioSourceBuffer = mediaSource.addSourceBuffer(mimeType); + if (!this.#audioSourceBuffer) { + throw new Error("Failed to create audio SourceBuffer"); + } + console.log("[MSE] Audio SourceBuffer created successfully"); + this.#setupAudioSourceBuffer(); + } catch (error) { + // If QuotaExceededError, check if another call added the audio SourceBuffer + if (error instanceof DOMException && error.name === "QuotaExceededError") { + const sourceBuffers = Array.from(mediaSource.sourceBuffers); + const readyState = mediaSource.readyState; + console.log("[MSE] QuotaExceededError - MediaSource has", sourceBuffers.length, "SourceBuffers", { + readyState, + videoSourceBufferSet: !!this.#videoSourceBuffer, + audioSourceBufferSet: !!this.#audioSourceBuffer, + }); + + // If MediaSource is not open, that's the problem + if (readyState !== "open") { + throw new Error(`MediaSource readyState is "${readyState}", cannot add SourceBuffers`); + } + + // If we already have audio SourceBuffer set, use it + if (this.#audioSourceBuffer && sourceBuffers.includes(this.#audioSourceBuffer)) { + console.log("[MSE] Found existing audio SourceBuffer reference"); + return; // Success - silently return + } + + // If we have exactly 2 SourceBuffers and one is video, the other must be audio + if (sourceBuffers.length === 2 && this.#videoSourceBuffer) { + const otherBuffer = sourceBuffers.find( + (sb) => sb !== this.#videoSourceBuffer + ); + if (otherBuffer) { + console.log("[MSE] Found audio SourceBuffer by exclusion (other than video)"); + this.#audioSourceBuffer = otherBuffer; + if (!this.#audioSourceBufferSetup) { + this.#setupAudioSourceBuffer(); + } + return; // Success - silently return + } + } + + // If we have 2 SourceBuffers but don't know which is video, try to identify by checking if one is already set + // This handles the case where video SourceBuffer isn't set yet + if (sourceBuffers.length === 2) { + // If we don't have video SourceBuffer set, we can't reliably identify which is audio + // But if one of them was added by a previous call to initializeAudio, we should use it + // For now, if we have 2 SourceBuffers and can't identify, assume the first non-video one is audio + // This is a fallback - ideally video should initialize first + const nonVideoBuffer = this.#videoSourceBuffer + ? sourceBuffers.find(sb => sb !== this.#videoSourceBuffer) + : sourceBuffers[1]; // If video not set, assume second one is audio (video is usually first) + + if (nonVideoBuffer) { + console.log("[MSE] Using fallback: assuming non-video SourceBuffer is audio"); + this.#audioSourceBuffer = nonVideoBuffer; + if (!this.#audioSourceBufferSetup) { + this.#setupAudioSourceBuffer(); + } + return; // Success - silently return + } + } + + // If we have only 1 SourceBuffer and get QuotaExceededError, this is unusual + // It might mean the video SourceBuffer is updating or MediaSource is in a transitional state + // Wait briefly and retry once + if (sourceBuffers.length === 1 && this.#videoSourceBuffer) { + console.log("[MSE] QuotaExceededError with only 1 SourceBuffer - retrying once", { + readyState: mediaSource.readyState, + videoSourceBufferUpdating: this.#videoSourceBuffer.updating, + }); + + // Wait for video SourceBuffer to finish if it's updating (with timeout) + if (this.#videoSourceBuffer.updating) { + await new Promise((resolve) => { + const timeout = setTimeout(() => resolve(), 200); // Max 200ms wait + this.#videoSourceBuffer!.addEventListener("updateend", () => { + clearTimeout(timeout); + resolve(); + }, { once: true }); + }); + } else { + // Brief wait for MediaSource to stabilize + await new Promise(resolve => setTimeout(resolve, 10)); + } + + // Quick retry - check if another call added it first + const currentSourceBuffers = Array.from(mediaSource.sourceBuffers); + if (currentSourceBuffers.length >= 2) { + const otherBuffer = currentSourceBuffers.find(sb => sb !== this.#videoSourceBuffer); + if (otherBuffer) { + console.log("[MSE] Found audio SourceBuffer after retry"); + this.#audioSourceBuffer = otherBuffer; + if (!this.#audioSourceBufferSetup) { + this.#setupAudioSourceBuffer(); + } + return; + } + } + + // Try adding again + try { + if (mediaSource.readyState !== "open") { + throw new Error(`MediaSource readyState is "${mediaSource.readyState}"`); + } + this.#audioSourceBuffer = mediaSource.addSourceBuffer(mimeType); + if (!this.#audioSourceBuffer) { + throw new Error("Failed to create audio SourceBuffer"); + } + console.log("[MSE] Audio SourceBuffer created successfully after retry"); + this.#setupAudioSourceBuffer(); + return; // Success + } catch (retryError) { + // If retry also fails, allow video-only playback (don't delay further) + console.warn("[MSE] Retry failed, allowing video-only playback", retryError); + return; + } + } + + // If we still can't find it, log details and rethrow + console.warn("[MSE] QuotaExceededError but couldn't find audio SourceBuffer in MediaSource", { + sourceBufferCount: sourceBuffers.length, + readyState: mediaSource.readyState, + hasVideoSourceBuffer: !!this.#videoSourceBuffer, + hasAudioSourceBuffer: !!this.#audioSourceBuffer, + }); + } + console.error("[MSE] Error adding audio SourceBuffer:", error); + throw error; + } + } + + #setupVideoSourceBuffer(): void { + if (!this.#videoSourceBuffer) return; + + const SEEK_HYSTERESIS = 0.1; // seconds to avoid re-seek loops on tiny drift + this.#videoSourceBuffer.addEventListener("updateend", () => { + // Check if we have buffered data and try to play + const video = this.#video; + const sourceBuffer = this.#videoSourceBuffer; + if (video && sourceBuffer && sourceBuffer.buffered.length > 0) { + const buffered = sourceBuffer.buffered; + const start = buffered.start(0); + const end = buffered.end(0); + + // Seek to start of buffered range if needed + if ( + video.currentTime + SEEK_HYSTERESIS < start || + video.currentTime >= end - SEEK_HYSTERESIS || + isNaN(video.currentTime) + ) { + console.log(`[MSE] Seeking video to buffered range start: ${start.toFixed(2)}`); + video.currentTime = start; + } + + // Try to play if paused + if (video.paused && video.readyState >= HTMLMediaElement.HAVE_METADATA) { + console.log("[MSE] Attempting to play video after SourceBuffer updateend"); + video.play().catch((err) => { + console.warn("[MSE] Autoplay blocked:", err); + }); + } + } + + this.#processVideoQueue(); + }); + + this.#videoSourceBuffer.addEventListener("error", (e) => { + console.error("[MSE] Video SourceBuffer error:", e); + }); + } + + #setupAudioSourceBuffer(): void { + if (!this.#audioSourceBuffer || this.#audioSourceBufferSetup) return; + + this.#audioSourceBuffer.addEventListener("updateend", () => { + this.#processAudioQueue(); + }); + + this.#audioSourceBuffer.addEventListener("error", (e) => { + console.error("[MSE] Audio SourceBuffer error:", e); + }); + + this.#audioSourceBufferSetup = true; + } + + #startFrameCapture(): void { + if (!this.#video) return; + + let captureCount = 0; + const captureFrame = () => { + if (!this.#video) return; + + try { + const frame = new VideoFrame(this.#video, { + timestamp: this.#video.currentTime * 1_000_000, // Convert to microseconds + }); + + captureCount++; + if (captureCount === 1 || captureCount % 30 === 0) { + console.log(`[MSE] Captured frame ${captureCount}, currentTime: ${this.#video.currentTime.toFixed(2)}, readyState: ${this.#video.readyState}, paused: ${this.#video.paused}, buffered: ${this.#video.buffered.length > 0 ? `${this.#video.buffered.start(0).toFixed(2)}-${this.#video.buffered.end(0).toFixed(2)}` : "none"}`); + } + + this.#stats.update((current) => ({ + frameCount: (current?.frameCount ?? 0) + 1, + timestamp: frame.timestamp, + bytesReceived: current?.bytesReceived ?? 0, + })); + + this.frame.update((prev) => { + prev?.close(); + return frame; + }); + + if (this.#video.videoWidth && this.#video.videoHeight) { + this.display.set({ + width: this.#video.videoWidth, + height: this.#video.videoHeight, + }); + } + + if (this.#video.readyState >= HTMLMediaElement.HAVE_CURRENT_DATA) { + this.bufferStatus.set({ state: "filled" }); + // Try to play if paused and we have data + if (this.#video.paused && this.#video.readyState >= HTMLMediaElement.HAVE_CURRENT_DATA) { + this.#video.play().catch((err) => { + if (captureCount <= 5) { + console.log("[MSE] Attempted to play video, result:", err); + } + }); + } + } + } catch (error) { + console.error("Error capturing frame:", error); + } + + if (this.#video.requestVideoFrameCallback) { + this.#frameCallbackId = this.#video.requestVideoFrameCallback(captureFrame); + } else { + this.#frameCallbackId = requestAnimationFrame(captureFrame) as unknown as number; + } + }; + + if (this.#video.requestVideoFrameCallback) { + this.#frameCallbackId = this.#video.requestVideoFrameCallback(captureFrame); + } else { + this.#frameCallbackId = requestAnimationFrame(captureFrame) as unknown as number; + } + } + + async appendVideoFragment(fragment: Uint8Array): Promise { + if (!this.#videoSourceBuffer || !this.#mediaSource) { + throw new Error("Video SourceBuffer not initialized"); + } + + if (this.#videoAppendQueue.length >= SourceMSE.MAX_QUEUE_SIZE) { + const discarded = this.#videoAppendQueue.shift(); + console.warn( + `[MSE] Video queue full (${SourceMSE.MAX_QUEUE_SIZE}), discarding oldest fragment (${discarded?.byteLength ?? 0} bytes)`, + ); + } + + const copy = new Uint8Array(fragment); + this.#videoAppendQueue.push(copy); + this.#processVideoQueue(); + } + + async appendAudioFragment(fragment: Uint8Array): Promise { + // If audio SourceBuffer doesn't exist, silently return (video-only playback) + if (!this.#audioSourceBuffer || !this.#mediaSource) { + return; + } + + if (this.#mediaSource.readyState === "closed") { + return; + } + + if (this.#audioAppendQueue.length >= SourceMSE.MAX_QUEUE_SIZE) { + const discarded = this.#audioAppendQueue.shift(); + console.warn( + `[MSE] Audio queue full (${SourceMSE.MAX_QUEUE_SIZE}), discarding oldest fragment (${discarded?.byteLength ?? 0} bytes)`, + ); + } + + const copy = new Uint8Array(fragment); + this.#audioAppendQueue.push(copy); + this.#processAudioQueue(); + } + + /** + * Extracts a track-specific init segment from a full init segment. + * MSE requires track-specific init segments for each SourceBuffer. + */ + #extractTrackInitSegment(fullInitSegment: Uint8Array, trackType: "video" | "audio"): Uint8Array { + let offset = 0; + let ftypAtom: Uint8Array | null = null; + let moovOffset = 0; + let moovSize = 0; + + // Find ftyp and moov atoms + while (offset + 8 <= fullInitSegment.length) { + const size = (fullInitSegment[offset] << 24) | + (fullInitSegment[offset + 1] << 16) | + (fullInitSegment[offset + 2] << 8) | + fullInitSegment[offset + 3]; + const type = String.fromCharCode( + fullInitSegment[offset + 4], + fullInitSegment[offset + 5], + fullInitSegment[offset + 6], + fullInitSegment[offset + 7], + ); + + if (type === "ftyp") { + ftypAtom = fullInitSegment.slice(offset, offset + size); + offset += size; + } else if (type === "moov") { + moovOffset = offset; + moovSize = size; + break; + } else { + if (size < 8 || size === 0) break; + offset += size; + } + } + + if (moovSize === 0) { + throw new Error("moov atom not found in init segment"); + } + + // Parse moov atom to find the relevant track + const moovAtom = fullInitSegment.slice(moovOffset, moovOffset + moovSize); + const targetHandler = trackType === "video" ? "vide" : "soun"; + + // Count tracks in moov + let moov_track_count = 0; + let moov_offset_temp = 8; + while (moov_offset_temp + 8 <= moovAtom.length) { + const size = (moovAtom[moov_offset_temp] << 24) | + (moovAtom[moov_offset_temp + 1] << 16) | + (moovAtom[moov_offset_temp + 2] << 8) | + moovAtom[moov_offset_temp + 3]; + const type = String.fromCharCode( + moovAtom[moov_offset_temp + 4], + moovAtom[moov_offset_temp + 5], + moovAtom[moov_offset_temp + 6], + moovAtom[moov_offset_temp + 7], + ); + if (type === "trak") { + moov_track_count++; + } + if (size < 8 || size === 0) break; + moov_offset_temp += size; + } + + // If only one track, use directly + if (moov_track_count === 1) { + return fullInitSegment; + } + + // Multiple tracks - need to extract + const trakAtom = this.#findTrackInMoov(moovAtom, targetHandler); + if (!trakAtom) { + // Try alternative handler types + const alternatives = trackType === "video" ? ["vid ", "vide", "avc1"] : ["soun", "mp4a"]; + for (const alt of alternatives) { + const altTrak = this.#findTrackInMoov(moovAtom, alt); + if (altTrak) { + return this.#extractTrackInitSegmentWithHandler(fullInitSegment, ftypAtom, moovAtom, alt); + } + } + + const foundTracks = this.#getAllTracksInMoov(moovAtom); + const foundHandlers = foundTracks.map(t => t.handler || "unknown").join(", "); + throw new Error( + `${trackType} track not found in moov atom. ` + + `Looking for handler: "${targetHandler}", but found: [${foundHandlers}]. ` + + `The init segment should contain all tracks.` + ); + } + + // Reconstruct moov atom with only the target track + const newMoov = this.#rebuildMoovWithSingleTrack(moovAtom, trakAtom, targetHandler); + + // Combine ftyp (if present) + new moov + const result: Uint8Array[] = []; + if (ftypAtom) { + result.push(ftypAtom); + } + result.push(newMoov); + + const totalSize = result.reduce((sum, arr) => sum + arr.length, 0); + const combined = new Uint8Array(totalSize); + let writeOffset = 0; + for (const arr of result) { + combined.set(arr, writeOffset); + writeOffset += arr.length; + } + + return combined; + } + + #extractTrackInitSegmentWithHandler(_fullInitSegment: Uint8Array, ftypAtom: Uint8Array | null, moovAtom: Uint8Array, handlerType: string): Uint8Array { + const trakAtom = this.#findTrackInMoov(moovAtom, handlerType); + if (!trakAtom) { + throw new Error(`Track with handler "${handlerType}" not found`); + } + + const newMoov = this.#rebuildMoovWithSingleTrack(moovAtom, trakAtom, handlerType); + + const result: Uint8Array[] = []; + if (ftypAtom) { + result.push(ftypAtom); + } + result.push(newMoov); + + const totalSize = result.reduce((sum, arr) => sum + arr.length, 0); + const combined = new Uint8Array(totalSize); + let writeOffset = 0; + for (const arr of result) { + combined.set(arr, writeOffset); + writeOffset += arr.length; + } + + return combined; + } + + #getAllTracksInMoov(moovAtom: Uint8Array): Array<{handler: string | null}> { + const tracks: Array<{handler: string | null}> = []; + let offset = 8; // Skip moov header + + while (offset + 8 <= moovAtom.length) { + const size = (moovAtom[offset] << 24) | + (moovAtom[offset + 1] << 16) | + (moovAtom[offset + 2] << 8) | + moovAtom[offset + 3]; + const type = String.fromCharCode( + moovAtom[offset + 4], + moovAtom[offset + 5], + moovAtom[offset + 6], + moovAtom[offset + 7], + ); + + if (type === "trak") { + const trakAtom = moovAtom.slice(offset, offset + size); + const handler = this.#getHandlerType(trakAtom); + tracks.push({handler: handler || null}); + } + + if (size < 8 || size === 0) break; + offset += size; + } + + return tracks; + } + + #getHandlerType(trakAtom: Uint8Array): string | null { + let offset = 8; // Skip trak header + + while (offset + 8 <= trakAtom.length) { + const size = (trakAtom[offset] << 24) | + (trakAtom[offset + 1] << 16) | + (trakAtom[offset + 2] << 8) | + trakAtom[offset + 3]; + const type = String.fromCharCode( + trakAtom[offset + 4], + trakAtom[offset + 5], + trakAtom[offset + 6], + trakAtom[offset + 7], + ); + + if (type === "mdia") { + const mdiaAtom = trakAtom.slice(offset, offset + size); + let mdiaOffset = 8; + while (mdiaOffset + 8 <= mdiaAtom.length) { + const hdlrSize = (mdiaAtom[mdiaOffset] << 24) | + (mdiaAtom[mdiaOffset + 1] << 16) | + (mdiaAtom[mdiaOffset + 2] << 8) | + mdiaAtom[mdiaOffset + 3]; + const hdlrType = String.fromCharCode( + mdiaAtom[mdiaOffset + 4], + mdiaAtom[mdiaOffset + 5], + mdiaAtom[mdiaOffset + 6], + mdiaAtom[mdiaOffset + 7], + ); + + if (hdlrType === "hdlr") { + if (mdiaOffset + 24 <= mdiaAtom.length) { + const handlerTypeBytes = String.fromCharCode( + mdiaAtom[mdiaOffset + 16], + mdiaAtom[mdiaOffset + 17], + mdiaAtom[mdiaOffset + 18], + mdiaAtom[mdiaOffset + 19], + ); + return handlerTypeBytes; + } + } + + if (hdlrSize < 8 || hdlrSize === 0) break; + mdiaOffset += hdlrSize; + } + } + + if (size < 8 || size === 0) break; + offset += size; + } + + return null; + } + + #findTrackInMoov(moovAtom: Uint8Array, handlerType: string): Uint8Array | null { + let offset = 8; // Skip moov header + + while (offset + 8 <= moovAtom.length) { + const size = (moovAtom[offset] << 24) | + (moovAtom[offset + 1] << 16) | + (moovAtom[offset + 2] << 8) | + moovAtom[offset + 3]; + const type = String.fromCharCode( + moovAtom[offset + 4], + moovAtom[offset + 5], + moovAtom[offset + 6], + moovAtom[offset + 7], + ); + + if (type === "trak") { + const trakAtom = moovAtom.slice(offset, offset + size); + if (this.#trakHasHandler(trakAtom, handlerType)) { + return trakAtom; + } + } + + if (size < 8 || size === 0) break; + offset += size; + } + + return null; + } + + #trakHasHandler(trakAtom: Uint8Array, handlerType: string): boolean { + const foundHandler = this.#getHandlerType(trakAtom); + return foundHandler === handlerType; + } + + #rebuildMoovWithSingleTrack(moovAtom: Uint8Array, trakAtom: Uint8Array, targetHandler: string): Uint8Array { + const parts: Uint8Array[] = []; + let offset = 8; // Skip moov header + + const trackId = this.#getTrackId(trakAtom); + + while (offset + 8 <= moovAtom.length) { + const size = (moovAtom[offset] << 24) | + (moovAtom[offset + 1] << 16) | + (moovAtom[offset + 2] << 8) | + moovAtom[offset + 3]; + const type = String.fromCharCode( + moovAtom[offset + 4], + moovAtom[offset + 5], + moovAtom[offset + 6], + moovAtom[offset + 7], + ); + + if (type === "mvhd") { + parts.push(moovAtom.slice(offset, offset + size)); + } else if (type === "trak") { + const trak = moovAtom.slice(offset, offset + size); + if (this.#trakHasHandler(trak, targetHandler)) { + parts.push(trak); + } + } else if (type === "mvex") { + const mvexAtom = moovAtom.slice(offset, offset + size); + const rebuiltMvex = this.#rebuildMvexWithSingleTrack(mvexAtom, trackId); + if (rebuiltMvex) { + parts.push(rebuiltMvex); + } + } + + if (size < 8 || size === 0) break; + offset += size; + } + + const totalSize = 8 + parts.reduce((sum, arr) => sum + arr.length, 0); + const newMoov = new Uint8Array(totalSize); + + newMoov[0] = (totalSize >>> 24) & 0xFF; + newMoov[1] = (totalSize >>> 16) & 0xFF; + newMoov[2] = (totalSize >>> 8) & 0xFF; + newMoov[3] = totalSize & 0xFF; + newMoov[4] = 0x6D; // 'm' + newMoov[5] = 0x6F; // 'o' + newMoov[6] = 0x6F; // 'o' + newMoov[7] = 0x76; // 'v' + + let writeOffset = 8; + for (const part of parts) { + newMoov.set(part, writeOffset); + writeOffset += part.length; + } + + return newMoov; + } + + #getTrackId(trakAtom: Uint8Array): number { + let offset = 8; // Skip trak header + + while (offset + 8 <= trakAtom.length) { + const size = (trakAtom[offset] << 24) | + (trakAtom[offset + 1] << 16) | + (trakAtom[offset + 2] << 8) | + trakAtom[offset + 3]; + const type = String.fromCharCode( + trakAtom[offset + 4], + trakAtom[offset + 5], + trakAtom[offset + 6], + trakAtom[offset + 7], + ); + + if (type === "tkhd") { + const version = trakAtom[offset + 8]; + const trackIdOffset = version === 1 ? 24 : 16; + if (offset + trackIdOffset + 4 <= trakAtom.length) { + return (trakAtom[offset + trackIdOffset] << 24) | + (trakAtom[offset + trackIdOffset + 1] << 16) | + (trakAtom[offset + trackIdOffset + 2] << 8) | + trakAtom[offset + trackIdOffset + 3]; + } + } + + if (size < 8 || size === 0) break; + offset += size; + } + + return 0; + } + + #rebuildMvexWithSingleTrack(mvexAtom: Uint8Array, trackId: number): Uint8Array | null { + const parts: Uint8Array[] = []; + let offset = 8; // Skip mvex header + + while (offset + 8 <= mvexAtom.length) { + const size = (mvexAtom[offset] << 24) | + (mvexAtom[offset + 1] << 16) | + (mvexAtom[offset + 2] << 8) | + mvexAtom[offset + 3]; + const type = String.fromCharCode( + mvexAtom[offset + 4], + mvexAtom[offset + 5], + mvexAtom[offset + 6], + mvexAtom[offset + 7], + ); + + if (type === "trex") { + if (offset + 16 <= mvexAtom.length) { + const trexTrackId = (mvexAtom[offset + 12] << 24) | + (mvexAtom[offset + 13] << 16) | + (mvexAtom[offset + 14] << 8) | + mvexAtom[offset + 15]; + if (trexTrackId === trackId) { + parts.push(mvexAtom.slice(offset, offset + size)); + } + } + } + + if (size < 8 || size === 0) break; + offset += size; + } + + if (parts.length === 0) { + return null; + } + + const totalSize = 8 + parts.reduce((sum, arr) => sum + arr.length, 0); + const newMvex = new Uint8Array(totalSize); + + newMvex[0] = (totalSize >>> 24) & 0xFF; + newMvex[1] = (totalSize >>> 16) & 0xFF; + newMvex[2] = (totalSize >>> 8) & 0xFF; + newMvex[3] = totalSize & 0xFF; + newMvex[4] = 0x6D; // 'm' + newMvex[5] = 0x76; // 'v' + newMvex[6] = 0x65; // 'e' + newMvex[7] = 0x78; // 'x' + + let writeOffset = 8; + for (const part of parts) { + newMvex.set(part, writeOffset); + writeOffset += part.length; + } + + return newMvex; + } + + #concatenateFragments(fragments: Uint8Array[]): Uint8Array { + if (fragments.length === 1) { + return fragments[0]; + } + + const totalSize = fragments.reduce((sum, frag) => sum + frag.byteLength, 0); + const result = new Uint8Array(totalSize); + let offset = 0; + for (const fragment of fragments) { + result.set(fragment, offset); + offset += fragment.byteLength; + } + + return result; + } + + #processVideoQueue(): void { + if (!this.#videoSourceBuffer || this.#videoSourceBuffer.updating || this.#videoAppendQueue.length === 0) { + return; + } + + if (this.#mediaSource?.readyState !== "open") { + return; + } + + // Wait if any SourceBuffer is updating (dash.js pattern) + if (this.#isBufferUpdating()) { + return; + } + + const fragment = this.#videoAppendQueue.shift(); + if (!fragment) return; + + try { + this.#videoSourceBuffer.appendBuffer(fragment as BufferSource); + this.#stats.update((current) => { + const newCount = (current?.frameCount ?? 0) + 1; + if (newCount === 1 || newCount % 10 === 0) { + console.log(`[MSE] Appended video fragment ${newCount}, size: ${fragment.byteLength} bytes`); + } + return { + frameCount: newCount, + timestamp: current?.timestamp ?? 0, + bytesReceived: (current?.bytesReceived ?? 0) + fragment.byteLength, + }; + }); + } catch (error) { + // Let browser handle buffer management - just log the error + if (error instanceof DOMException && error.name === "QuotaExceededError") { + console.warn("[MSE] QuotaExceededError - browser will manage buffer automatically"); + // Put fragment back in queue to retry later + this.#videoAppendQueue.unshift(fragment); + } else { + console.error("[MSE] Error appending video fragment:", error); + } + } + } + + #processAudioQueue(): void { + if (!this.#audioSourceBuffer || this.#audioSourceBuffer.updating || this.#audioAppendQueue.length === 0) { + return; + } + + if (this.#mediaSource?.readyState !== "open") { + return; + } + + // Wait if any SourceBuffer is updating (dash.js pattern) + if (this.#isBufferUpdating()) { + return; + } + + const fragment = this.#audioAppendQueue.shift(); + if (!fragment) return; + + try { + this.#audioSourceBuffer.appendBuffer(fragment as BufferSource); + } catch (error) { + // Let browser handle buffer management - just log the error + if (error instanceof DOMException && error.name === "QuotaExceededError") { + console.warn("[MSE] QuotaExceededError for audio - browser will manage buffer automatically"); + // Put fragment back in queue to retry later + this.#audioAppendQueue.unshift(fragment); + } else { + console.error("[MSE] Error appending audio fragment:", error); + } + } + } + + // Backward compatibility - delegates to appendVideoFragment + async appendFragment(fragment: Uint8Array): Promise { + return this.appendVideoFragment(fragment); + } + + async runTrack( + effect: Effect, + broadcast: Moq.Broadcast, + name: string, + config: RequiredDecoderConfig, + ): Promise { + await this.initializeVideo(config); + + // Briefly wait for audio SourceBuffer so we don't hit Chrome's quota race. + console.log("[MSE] Checking if audio SourceBuffer will be added..."); + for (let i = 0; i < 10; i++) { // up to ~1s + if (this.#audioSourceBuffer || (this.#mediaSource && this.#mediaSource.sourceBuffers.length >= 2)) { + console.log("[MSE] Audio SourceBuffer detected, proceeding with video"); + break; + } + await new Promise(resolve => setTimeout(resolve, 100)); + } + + const sub = broadcast.subscribe(name, PRIORITY.video); + effect.cleanup(() => sub.close()); + + const consumer = new Frame.Consumer(sub, { + latency: this.latency, + container: "cmaf", + }); + effect.cleanup(() => consumer.close()); + + // Init segment must be in catalog for CMAF + if (!config.initSegment) { + throw new Error("Init segment is required in catalog for CMAF playback"); + } + + // Decode base64 string to Uint8Array + const binaryString = atob(config.initSegment); + const fullInitSegment = new Uint8Array(binaryString.length); + for (let i = 0; i < binaryString.length; i++) { + fullInitSegment[i] = binaryString.charCodeAt(i); + } + + // Extract video-specific init segment + const videoInitSegment = this.#extractTrackInitSegment(fullInitSegment, "video"); + + // Append init segment and wait for completion + if (!this.#videoSourceBuffer) { + throw new Error("Video SourceBuffer not available"); + } + + console.log("[MSE] Appending video init segment, size:", videoInitSegment.byteLength, "bytes"); + await new Promise((resolve, reject) => { + const onUpdateEnd = () => { + videoSourceBuffer.removeEventListener("updateend", onUpdateEnd); + videoSourceBuffer.removeEventListener("error", onError); + console.log("[MSE] Video init segment appended successfully"); + resolve(); + }; + + const onError = (e: Event) => { + videoSourceBuffer.removeEventListener("updateend", onUpdateEnd); + videoSourceBuffer.removeEventListener("error", onError); + const error = e as ErrorEvent; + console.error("[MSE] Video SourceBuffer error appending init segment:", error); + reject(new Error(`Video SourceBuffer error: ${error.message || "unknown error"}`)); + }; + + const videoSourceBuffer = this.#videoSourceBuffer!; + videoSourceBuffer.addEventListener("updateend", onUpdateEnd, { once: true }); + videoSourceBuffer.addEventListener("error", onError, { once: true }); + + try { + videoSourceBuffer.appendBuffer(videoInitSegment as BufferSource); + } catch (error) { + videoSourceBuffer.removeEventListener("updateend", onUpdateEnd); + videoSourceBuffer.removeEventListener("error", onError); + console.error("[MSE] Error calling appendBuffer on video init segment:", error); + reject(error); + } + }); + + // Helper function to detect init segment + function isInitSegmentData(data: Uint8Array): boolean { + if (data.length < 8) return false; + + let offset = 0; + const len = data.length; + + while (offset + 8 <= len) { + const size = + (data[offset] << 24) | (data[offset + 1] << 16) | (data[offset + 2] << 8) | data[offset + 3]; + + const type = String.fromCharCode( + data[offset + 4], + data[offset + 5], + data[offset + 6], + data[offset + 7], + ); + + if (type === "ftyp" || type === "moov") return true; + + if (size < 8 || size === 0) break; + offset += size; + } + + return false; + } + + // Read fragments and append to SourceBuffer + console.log("[MSE] Starting to read video fragments from track"); + effect.spawn(async () => { + let currentGroup: number | undefined; + let gopFragments: Uint8Array[] = []; + let frameCount = 0; + + for (;;) { + const frame = await Promise.race([consumer.decode(), effect.cancel]); + if (!frame) { + console.log(`[MSE] Video track ended, processed ${frameCount} frames`); + if (gopFragments.length > 0) { + const gopData = this.#concatenateFragments(gopFragments); + await this.appendVideoFragment(gopData); + } + break; + } + + frameCount++; + if (frameCount === 1 || frameCount % 10 === 0) { + console.log(`[MSE] Processing video frame ${frameCount}, group: ${frame.group}`); + } + + // Skip any init segments that might come from track + if (isInitSegmentData(frame.data)) { + continue; + } + + // Check if we've moved to a new group + if (currentGroup !== undefined && frame.group !== currentGroup) { + if (gopFragments.length > 0) { + const gopData = this.#concatenateFragments(gopFragments); + await this.appendVideoFragment(gopData); + gopFragments = []; + } + currentGroup = frame.group; + } + + if (currentGroup === undefined) { + currentGroup = frame.group; + } + + gopFragments.push(frame.data); + } + }); + } + + async runAudioTrack( + effect: Effect, + broadcast: Moq.Broadcast, + name: string, + config: Catalog.AudioConfig, + catalog: Catalog.Audio, + enabled?: Getter, + ): Promise { + // Check if audio SourceBuffer was initialized + // If not, allow video-only playback + if (!this.#audioSourceBuffer) { + console.log("[MSE] Audio SourceBuffer not available, skipping audio track (video-only playback)"); + return; + } + + // Init segment must be in catalog for CMAF + if (!config.initSegment) { + throw new Error("Init segment is required in catalog for CMAF audio playback"); + } + + // Decode base64 string to Uint8Array + const binaryString = atob(config.initSegment); + const fullInitSegment = new Uint8Array(binaryString.length); + for (let i = 0; i < binaryString.length; i++) { + fullInitSegment[i] = binaryString.charCodeAt(i); + } + + // Extract audio-specific init segment + const audioInitSegment = this.#extractTrackInitSegment(fullInitSegment, "audio"); + + // Append init segment + await this.appendAudioFragment(audioInitSegment); + + // Check if enabled + const isEnabled = enabled ? effect.get(enabled) : true; + if (!isEnabled) { + return; + } + + const sub = broadcast.subscribe(name, catalog.priority); + effect.cleanup(() => sub.close()); + + const consumer = new Frame.Consumer(sub, { + latency: this.latency, + container: "cmaf", + }); + effect.cleanup(() => consumer.close()); + + function hasMoovAtom(data: Uint8Array): boolean { + let offset = 0; + const len = data.length; + while (offset + 8 <= len) { + const size = (data[offset] << 24) | (data[offset + 1] << 16) | (data[offset + 2] << 8) | data[offset + 3]; + const type = String.fromCharCode(data[offset + 4], data[offset + 5], data[offset + 6], data[offset + 7]); + if (type === "moov") return true; + if (size < 8 || size === 0) break; + offset += size; + } + return false; + } + + effect.spawn(async () => { + let currentGroup: number | undefined; + let groupFragments: Uint8Array[] = []; + let frameCount = 0; + + for (;;) { + const frame = await Promise.race([consumer.decode(), effect.cancel]); + if (!frame) { + if (groupFragments.length > 0 && this.#mediaSource?.readyState === "open") { + const groupData = this.#concatenateFragments(groupFragments); + await this.appendAudioFragment(groupData); + } + break; + } + + frameCount++; + + if (this.#mediaSource?.readyState === "closed") { + break; + } + + // Skip any init segments + if (hasMoovAtom(frame.data)) { + continue; + } + + if (currentGroup !== undefined && frame.group !== currentGroup) { + if (groupFragments.length > 0 && this.#mediaSource?.readyState === "open") { + const groupData = this.#concatenateFragments(groupFragments); + await this.appendAudioFragment(groupData); + groupFragments = []; + } + currentGroup = frame.group; + } + + if (currentGroup === undefined) { + currentGroup = frame.group; + } + + groupFragments.push(frame.data); + } + }); + } + + close(): void { + this.#videoAppendQueue = []; + this.#audioAppendQueue = []; + this.#audioSourceBufferSetup = false; + + // Store references before resetting + const audioSourceBuffer = this.#audioSourceBuffer; + const videoSourceBuffer = this.#videoSourceBuffer; + const mediaSource = this.#mediaSource; + + this.#audioSourceBuffer = undefined; // Reset audio SourceBuffer reference + + this.mediaSource.set(undefined); + + if (this.#frameCallbackId !== undefined) { + if (this.#video?.requestVideoFrameCallback) { + this.#video.cancelVideoFrameCallback(this.#frameCallbackId); + } else { + cancelAnimationFrame(this.#frameCallbackId); + } + } + + this.frame.update((prev) => { + prev?.close(); + return undefined; + }); + + if (videoSourceBuffer && mediaSource) { + try { + if (videoSourceBuffer.updating) { + videoSourceBuffer.abort(); + } + } catch (error) { + console.error("Error closing video SourceBuffer:", error); + } + } + + if (audioSourceBuffer && mediaSource) { + try { + if (audioSourceBuffer.updating) { + audioSourceBuffer.abort(); + } + } catch (error) { + console.error("Error closing audio SourceBuffer:", error); + } + } + + if (this.#mediaSource) { + try { + if (this.#mediaSource.readyState === "open") { + this.#mediaSource.endOfStream(); + } + URL.revokeObjectURL(this.#video?.src || ""); + } catch (error) { + console.error("Error closing MediaSource:", error); + } + } + + if (this.#video) { + this.#video.pause(); + this.#video.src = ""; + this.#video.remove(); + } + + this.#signals.close(); + } + + get stats() { + return this.#stats; + } +} diff --git a/js/hang/src/watch/video/source-mse.ts b/js/hang/src/watch/video/source-mse.ts deleted file mode 100644 index eedd5931e..000000000 --- a/js/hang/src/watch/video/source-mse.ts +++ /dev/null @@ -1,540 +0,0 @@ -import type * as Moq from "@moq/lite"; -import { Effect, Signal } from "@moq/signals"; -import type * as Catalog from "../../catalog"; -import * as Frame from "../../frame"; -import { PRIORITY } from "../../publish/priority"; -import type * as Time from "../../time"; -import * as Mime from "../../util/mime"; - -// The types in VideoDecoderConfig that cause a hard reload. -type RequiredDecoderConfig = Omit & - Partial>; - -type BufferStatus = { state: "empty" | "filled" }; - -type SyncStatus = { - state: "ready" | "wait"; - bufferDuration?: number; -}; - -export interface VideoStats { - frameCount: number; - timestamp: number; - bytesReceived: number; -} - -/** - * MSE-based video source for CMAF/fMP4 fragments. - * Uses Media Source Extensions to handle complete moof+mdat fragments. - */ -export class SourceMSE { - #video?: HTMLVideoElement; - #mediaSource?: MediaSource; - #sourceBuffer?: SourceBuffer; - - // Queue of fragments waiting to be added - // Maximum limit to prevent infinite growth in live streaming - #appendQueue: Uint8Array[] = []; - static readonly MAX_QUEUE_SIZE = 10; // Maximum fragments in queue - - // Expose the current frame to render as a signal - frame = new Signal(undefined); - - // The target latency in milliseconds. - latency: Signal; - - // The display size of the video in pixels. - display = new Signal<{ width: number; height: number } | undefined>(undefined); - - // Whether to flip the video horizontally. - flip = new Signal(undefined); - - bufferStatus = new Signal({ state: "empty" }); - syncStatus = new Signal({ state: "ready" }); - - #stats = new Signal(undefined); - - #signals = new Effect(); - #frameCallbackId?: number; - #audioElement?: HTMLAudioElement; - #lastSyncTime = 0; - - constructor(latency: Signal) { - this.latency = latency; - } - - setAudioSync(audioElement: HTMLAudioElement | undefined): void { - this.#audioElement = audioElement; - this.#lastSyncTime = 0; // Reset sync timer when audio element changes - } - - async initialize(config: RequiredDecoderConfig): Promise { - const mimeType = Mime.buildVideoMimeType(config); - if (!mimeType) { - throw new Error(`Unsupported codec for MSE: ${config.codec}`); - } - - this.#video = document.createElement("video"); - this.#video.style.display = "none"; - this.#video.playsInline = true; - this.#video.muted = true; // Required for autoplay - document.body.appendChild(this.#video); - - this.#video.addEventListener("waiting", () => {}); - this.#video.addEventListener("ended", () => { - if (!this.#video) return; - const videoBuffered = this.#video.buffered; - const current = this.#video.currentTime; - - if (videoBuffered && videoBuffered.length > 0) { - const lastRange = videoBuffered.length - 1; - const end = videoBuffered.end(lastRange); - if (current < end) { - this.#video.currentTime = current; - this.#video.play().catch((err) => console.error("[MSE] Failed to resume after ended:", err)); - } - } - }); - - this.#video.addEventListener("timeupdate", () => { - if (!this.#video) return; - const videoBuffered = this.#video.buffered; - const current = this.#video.currentTime; - if (videoBuffered && videoBuffered.length > 0) { - const lastRange = videoBuffered.length - 1; - const end = videoBuffered.end(lastRange); - const remaining = end - current; - if (remaining <= 0.1 && this.#video.paused) { - this.#video.play().catch((err) => console.error("[MSE] Failed to resume playback:", err)); - } - } - - // Sync audio to video (very conservative to minimize choppiness) - if (this.#audioElement && this.#audioElement.readyState >= HTMLMediaElement.HAVE_METADATA) { - const now = performance.now(); - // Only check sync every 5 seconds to minimize seeks - if (now - this.#lastSyncTime < 5000) { - return; - } - - const audioTime = this.#audioElement.currentTime; - const diff = Math.abs(current - audioTime); - // This allows some drift but prevents major desync - if (diff > 0.5) { - const audioBuffered = this.#audioElement.buffered; - if (audioBuffered && audioBuffered.length > 0) { - for (let i = 0; i < audioBuffered.length; i++) { - if (audioBuffered.start(i) <= current && current <= audioBuffered.end(i)) { - this.#audioElement.currentTime = current; - this.#lastSyncTime = now; - break; - } - } - } - } - } - }); - - this.#mediaSource = new MediaSource(); - const url = URL.createObjectURL(this.#mediaSource); - this.#video.src = url; - this.#video.currentTime = 0; - - await new Promise((resolve, reject) => { - const timeout = setTimeout(() => { - reject(new Error("MediaSource sourceopen timeout")); - }, 5000); - - this.#mediaSource?.addEventListener( - "sourceopen", - () => { - clearTimeout(timeout); - try { - this.#sourceBuffer = this.#mediaSource?.addSourceBuffer(mimeType); - if (!this.#sourceBuffer) { - reject(new Error("Failed to create SourceBuffer")); - return; - } - this.#setupSourceBuffer(); - resolve(); - } catch (error) { - reject(error); - } - }, - { once: true }, - ); - - this.#mediaSource?.addEventListener("error", (e) => { - clearTimeout(timeout); - reject(new Error(`MediaSource error: ${e}`)); - }); - }); - - this.#startFrameCapture(); - } - - #setupSourceBuffer(): void { - if (!this.#sourceBuffer) return; - - this.#sourceBuffer.addEventListener("updateend", () => { - this.#processAppendQueue(); - }); - - this.#sourceBuffer.addEventListener("error", (e) => { - console.error("SourceBuffer error:", e); - }); - } - - #startFrameCapture(): void { - if (!this.#video) return; - - const captureFrame = () => { - if (!this.#video) return; - - try { - const frame = new VideoFrame(this.#video, { - timestamp: this.#video.currentTime * 1_000_000, // Convert to microseconds - }); - - this.#stats.update((current) => ({ - frameCount: (current?.frameCount ?? 0) + 1, - timestamp: frame.timestamp, - bytesReceived: current?.bytesReceived ?? 0, - })); - - this.frame.update((prev) => { - prev?.close(); - return frame; - }); - - if (this.#video.videoWidth && this.#video.videoHeight) { - this.display.set({ - width: this.#video.videoWidth, - height: this.#video.videoHeight, - }); - } - - if (this.#video.readyState >= HTMLMediaElement.HAVE_CURRENT_DATA) { - this.bufferStatus.set({ state: "filled" }); - } - } catch (error) { - console.error("Error capturing frame:", error); - } - - if (this.#video.requestVideoFrameCallback) { - this.#frameCallbackId = this.#video.requestVideoFrameCallback(captureFrame); - } else { - // Fallback: use requestAnimationFrame - this.#frameCallbackId = requestAnimationFrame(captureFrame) as unknown as number; - } - }; - - if (this.#video.requestVideoFrameCallback) { - this.#frameCallbackId = this.#video.requestVideoFrameCallback(captureFrame); - } else { - this.#frameCallbackId = requestAnimationFrame(captureFrame) as unknown as number; - } - } - - async appendFragment(fragment: Uint8Array): Promise { - if (!this.#sourceBuffer || !this.#mediaSource) { - throw new Error("SourceBuffer not initialized"); - } - if (this.#appendQueue.length >= SourceMSE.MAX_QUEUE_SIZE) { - const discarded = this.#appendQueue.shift(); - console.warn( - `[MSE] Queue full (${SourceMSE.MAX_QUEUE_SIZE}), discarding oldest fragment (${discarded?.byteLength ?? 0} bytes)`, - ); - } - - const copy = new Uint8Array(fragment); - this.#appendQueue.push(copy); - - this.#processAppendQueue(); - } - - #concatenateFragments(fragments: Uint8Array[]): Uint8Array { - if (fragments.length === 1) { - return fragments[0]; - } - - const totalSize = fragments.reduce((sum, frag) => sum + frag.byteLength, 0); - const result = new Uint8Array(totalSize); - let offset = 0; - for (const fragment of fragments) { - result.set(fragment, offset); - offset += fragment.byteLength; - } - - return result; - } - - #processAppendQueue(): void { - if (!this.#sourceBuffer || this.#sourceBuffer.updating || this.#appendQueue.length === 0) { - return; - } - - if (this.#mediaSource?.readyState !== "open") { - console.error(`[MSE] MediaSource not open: ${this.#mediaSource?.readyState}`); - return; - } - - const fragment = this.#appendQueue.shift(); - if (!fragment) return; - - try { - // appendBuffer accepts BufferSource (ArrayBuffer or ArrayBufferView) - this.#sourceBuffer.appendBuffer(fragment as BufferSource); - - this.#stats.update((current) => ({ - frameCount: current?.frameCount ?? 0, - timestamp: current?.timestamp ?? 0, - bytesReceived: (current?.bytesReceived ?? 0) + fragment.byteLength, - })); - } catch (error) { - console.error("[MSE] Error appending fragment:", error); - console.error("[MSE] SourceBuffer state:", { - updating: this.#sourceBuffer.updating, - buffered: this.#sourceBuffer.buffered.length, - }); - console.error("[MSE] MediaSource state:", { - readyState: this.#mediaSource.readyState, - duration: this.#mediaSource.duration, - }); - } - } - - async runTrack( - effect: Effect, - broadcast: Moq.Broadcast, - name: string, - config: RequiredDecoderConfig, - ): Promise { - await this.initialize(config); - - const sub = broadcast.subscribe(name, PRIORITY.video); - effect.cleanup(() => sub.close()); - - const consumer = new Frame.Consumer(sub, { - latency: this.latency, - container: "cmaf", // CMAF fragments - }); - effect.cleanup(() => consumer.close()); - - effect.spawn(async () => { - if (!this.#video) return; - - await new Promise((resolve) => { - let checkCount = 0; - const maxChecks = 100; // 10 seconds max wait - let hasSeeked = false; - - const checkReady = () => { - checkCount++; - if (this.#video) { - const videoBuffered = this.#video.buffered; - const hasBufferedData = videoBuffered && videoBuffered.length > 0; - const currentTime = this.#video.currentTime; - const isTimeBuffered = - hasBufferedData && - videoBuffered.start(0) <= currentTime && - currentTime < videoBuffered.end(videoBuffered.length - 1); - - if (hasBufferedData && !isTimeBuffered && !hasSeeked) { - const seekTime = videoBuffered.start(0); - this.#video.currentTime = seekTime; - hasSeeked = true; - setTimeout(checkReady, 100); - return; - } - - if (this.#video.readyState >= HTMLMediaElement.HAVE_FUTURE_DATA) { - this.#video - .play() - .then(() => { - resolve(); - }) - .catch((error) => { - console.error("[MSE] Video play() failed:", error); - resolve(); - }); - } else if (hasBufferedData && checkCount >= 10) { - // If we have buffered data but readyState hasn't advanced, try playing anyway after 1 second - this.#video - .play() - .then(() => { - resolve(); - }) - .catch((error) => { - console.error("[MSE] Video play() failed:", error); - if (checkCount < maxChecks) { - setTimeout(checkReady, 100); - } else { - resolve(); - } - }); - } else if (checkCount >= maxChecks) { - this.#video - .play() - .then(() => { - resolve(); - }) - .catch(() => { - resolve(); - }); - } else { - setTimeout(checkReady, 100); - } - } - }; - checkReady(); - }); - }); - - // Track if we've received the init segment (ftyp+moov or moov) - let initSegmentReceived = false; - - // Helper function to detect init segment (ftyp or moov atom) - // The init segment may start with "ftyp" followed by "moov", or just "moov" - function isInitSegmentData(data: Uint8Array): boolean { - if (data.length < 8) return false; - - let offset = 0; - const len = data.length; - - while (offset + 8 <= len) { - // Atom size (big endian) - const size = - (data[offset] << 24) | (data[offset + 1] << 16) | (data[offset + 2] << 8) | data[offset + 3]; - - const type = String.fromCharCode( - data[offset + 4], - data[offset + 5], - data[offset + 6], - data[offset + 7], - ); - - // Init segment contains either "ftyp" or "moov" atoms - if (type === "ftyp" || type === "moov") return true; - - if (size < 8 || size === 0) break; - offset += size; - } - - return false; - } - - // Read fragments and append to SourceBuffer - // MSE requires complete GOPs to be appended in a single operation - // We group fragments by MOQ group (which corresponds to GOPs) before appending - effect.spawn(async () => { - let currentGroup: number | undefined; - let gopFragments: Uint8Array[] = []; // Accumulate fragments for current GOP - - for (;;) { - const frame = await Promise.race([consumer.decode(), effect.cancel]); - if (!frame) { - if (gopFragments.length > 0 && initSegmentReceived) { - const gopData = this.#concatenateFragments(gopFragments); - await this.appendFragment(gopData); - gopFragments = []; - } - break; - } - - const containsInitSegmentData = isInitSegmentData(frame.data); - const isInitSegment = containsInitSegmentData && !initSegmentReceived; - - if (isInitSegment) { - if (gopFragments.length > 0 && initSegmentReceived) { - const gopData = this.#concatenateFragments(gopFragments); - await this.appendFragment(gopData); - gopFragments = []; - } - - await this.appendFragment(frame.data); - initSegmentReceived = true; - continue; - } - - if (!initSegmentReceived) { - continue; - } - - if (currentGroup !== undefined && frame.group !== currentGroup) { - if (gopFragments.length > 0) { - const gopData = this.#concatenateFragments(gopFragments); - await this.appendFragment(gopData); - gopFragments = []; - } - } - - if (currentGroup === undefined || frame.group !== currentGroup) { - currentGroup = frame.group; - gopFragments = []; - } - - gopFragments.push(frame.data); - - if (gopFragments.length >= 1) { - const gopData = this.#concatenateFragments(gopFragments); - await this.appendFragment(gopData); - gopFragments = []; - } - } - }); - } - - close(): void { - this.#appendQueue = []; - - if (this.#frameCallbackId !== undefined) { - if (this.#video?.requestVideoFrameCallback) { - this.#video.cancelVideoFrameCallback(this.#frameCallbackId); - } else { - cancelAnimationFrame(this.#frameCallbackId); - } - } - - this.frame.update((prev) => { - prev?.close(); - return undefined; - }); - - if (this.#sourceBuffer && this.#mediaSource) { - try { - if (this.#sourceBuffer.updating) { - this.#sourceBuffer.abort(); - } - if (this.#mediaSource.readyState === "open") { - this.#mediaSource.endOfStream(); - } - } catch (error) { - console.error("Error closing SourceBuffer:", error); - } - } - - if (this.#mediaSource) { - try { - if (this.#mediaSource.readyState === "open") { - this.#mediaSource.endOfStream(); - } - URL.revokeObjectURL(this.#video?.src || ""); - } catch (error) { - console.error("Error closing MediaSource:", error); - } - } - - if (this.#video) { - this.#video.pause(); - this.#video.src = ""; - this.#video.remove(); - } - - this.#signals.close(); - } - - get stats() { - return this.#stats; - } -} diff --git a/js/hang/src/watch/video/source.ts b/js/hang/src/watch/video/source.ts index 45dea5944..ff3799ec2 100644 --- a/js/hang/src/watch/video/source.ts +++ b/js/hang/src/watch/video/source.ts @@ -99,8 +99,14 @@ export class Source { #signals = new Effect(); - // Optional method set by MSE path for audio synchronization - setAudioSync?(audioElement: HTMLAudioElement | undefined): void; + // Expose MediaSource for audio to use + #mseMediaSource = new Signal(undefined); + readonly mseMediaSource = this.#mseMediaSource as Getter; + + // Expose mseSource instance for audio to access coordination methods + #mseSource = new Signal(undefined); + readonly mseSource = this.#mseSource as Getter; + constructor( broadcast: Signal, @@ -223,7 +229,7 @@ export class Source { }); // Import MSE source dynamically to avoid loading if not needed effect.spawn(async () => { - const { SourceMSE } = await import("./source-mse.js"); + const { SourceMSE } = await import("../source-mse.js"); const mseSource = new SourceMSE(this.latency); effect.cleanup(() => mseSource.close()); @@ -248,16 +254,20 @@ export class Source { eff.set(this.syncStatus, status, { state: "ready" }); }); + this.#signals.effect((eff) => { + const mediaSource = eff.get(mseSource.mediaSource); + eff.set(this.#mseMediaSource, mediaSource); + }); + + // Expose mseSource for audio to access + this.#signals.effect((eff) => { + eff.set(this.#mseSource, mseSource); + }); + this.#signals.effect((eff) => { const stats = eff.get(mseSource.stats); eff.set(this.#stats, stats); }); - - // Expose method to set audio element for synchronization - this.setAudioSync = (audioElement: HTMLAudioElement | undefined) => { - mseSource.setAudioSync(audioElement); - }; - // Run MSE track try { await mseSource.runTrack(effect, broadcast, name, config); diff --git a/rs/hang/examples/video.rs b/rs/hang/examples/video.rs index 64f88135b..112e00a8b 100644 --- a/rs/hang/examples/video.rs +++ b/rs/hang/examples/video.rs @@ -71,6 +71,7 @@ fn create_track(broadcast: &mut moq_lite::BroadcastProducer) -> hang::TrackProdu display_ratio_height: None, optimize_for_latency: None, container: hang::catalog::Container::Native, + init_segment: None, }; // Create a map of video renditions diff --git a/rs/hang/src/catalog/audio/mod.rs b/rs/hang/src/catalog/audio/mod.rs index e7e7d79ba..271fcb515 100644 --- a/rs/hang/src/catalog/audio/mod.rs +++ b/rs/hang/src/catalog/audio/mod.rs @@ -9,7 +9,7 @@ use std::collections::BTreeMap; use bytes::Bytes; use serde::{Deserialize, Serialize}; -use serde_with::{hex::Hex, DisplayFromStr}; +use serde_with::{base64::Base64, hex::Hex, DisplayFromStr}; use crate::catalog::container::Container; @@ -66,4 +66,14 @@ pub struct AudioConfig { /// Container format for frame encoding. /// Defaults to "native" for backward compatibility. pub container: Container, + + /// Init segment (ftyp+moov) for CMAF/fMP4 containers. + /// + /// This is the initialization segment needed for MSE playback. + /// Stored as base64-encoded bytes and embedded in the catalog (as suggested + /// in feedback). Init segments should not be sent over data tracks or at the + /// start of each group. + #[serde(default)] + #[serde_as(as = "Option")] + pub init_segment: Option, } diff --git a/rs/hang/src/catalog/root.rs b/rs/hang/src/catalog/root.rs index f0256cb88..608458416 100644 --- a/rs/hang/src/catalog/root.rs +++ b/rs/hang/src/catalog/root.rs @@ -342,6 +342,7 @@ mod test { framerate: Some(30.0), optimize_for_latency: None, container: Container::Native, + init_segment: None, }, ); @@ -355,6 +356,7 @@ mod test { bitrate: Some(128_000), description: None, container: Container::Native, + init_segment: None, }, ); diff --git a/rs/hang/src/catalog/video/mod.rs b/rs/hang/src/catalog/video/mod.rs index ada42c950..615934f72 100644 --- a/rs/hang/src/catalog/video/mod.rs +++ b/rs/hang/src/catalog/video/mod.rs @@ -14,7 +14,7 @@ use std::collections::BTreeMap; use bytes::Bytes; use serde::{Deserialize, Serialize}; -use serde_with::{hex::Hex, DisplayFromStr}; +use serde_with::{base64::Base64, hex::Hex, DisplayFromStr}; use crate::catalog::container::Container; @@ -115,4 +115,18 @@ pub struct VideoConfig { /// Container format for frame encoding. /// Defaults to "native" for backward compatibility. pub container: Container, + + /// Init segment (ftyp+moov) for CMAF/fMP4 containers. + /// + /// This is the initialization segment needed for MSE playback. + /// Stored as base64-encoded bytes and embedded in the catalog (as suggested + /// in feedback). Init segments should not be sent over data tracks or at the + /// start of each group. + /// + /// Note: A future optimization could build init segments from the description + /// field (e.g., avcC box for H.264) along with other catalog metadata, but + /// for now we store the complete init segment for simplicity and correctness. + #[serde(default)] + #[serde_as(as = "Option")] + pub init_segment: Option, } diff --git a/rs/hang/src/import/aac.rs b/rs/hang/src/import/aac.rs index 38d28508e..fe79f652e 100644 --- a/rs/hang/src/import/aac.rs +++ b/rs/hang/src/import/aac.rs @@ -108,6 +108,7 @@ impl Aac { bitrate: None, description: None, container: hang::catalog::Container::Native, + init_segment: None, }; tracing::debug!(name = ?track.name, ?config, "starting track"); diff --git a/rs/hang/src/import/avc3.rs b/rs/hang/src/import/avc3.rs index dea2555a9..b2b14e8de 100644 --- a/rs/hang/src/import/avc3.rs +++ b/rs/hang/src/import/avc3.rs @@ -63,6 +63,7 @@ impl Avc3 { display_ratio_height: None, optimize_for_latency: None, container: hang::catalog::Container::Native, + init_segment: None, }; if let Some(old) = &self.config { diff --git a/rs/hang/src/import/fmp4.rs b/rs/hang/src/import/fmp4.rs index 42ca1948a..be1f2da1f 100644 --- a/rs/hang/src/import/fmp4.rs +++ b/rs/hang/src/import/fmp4.rs @@ -40,6 +40,9 @@ pub struct Fmp4 { // The timestamp of the last keyframe for each track last_keyframe: HashMap, + // Track if we've sent the first frame for each track (needed for passthrough mode) + first_frame_sent: HashMap, + // The moov atom at the start of the file. moov: Option, @@ -59,10 +62,6 @@ pub struct Fmp4 { /// When passthrough_mode is enabled, store raw bytes of moov (init segment) moov_bytes: Option, - - /// When passthrough_mode is enabled, store a copy of init segment (ftyp+moov) to send with each keyframe - /// This ensures new subscribers can receive the init segment even if group 0 is not available - init_segment_bytes_for_keyframes: Option, } impl Fmp4 { @@ -77,6 +76,7 @@ impl Fmp4 { catalog, tracks: HashMap::default(), last_keyframe: HashMap::default(), + first_frame_sent: HashMap::default(), moov: None, moof: None, moof_size: 0, @@ -84,7 +84,6 @@ impl Fmp4 { moof_bytes: None, ftyp_bytes: None, moov_bytes: None, - init_segment_bytes_for_keyframes: None, } } @@ -252,12 +251,12 @@ impl Fmp4 { tracing::debug!(name = ?track.name, ?config, "starting track"); - let video = catalog.insert_video(track.name.clone(), config); + let video = catalog.insert_video(track.name.clone(), config.clone()); video.priority = 1; let track = track.produce(); self.broadcast.insert_track(track.consumer); - track.producer + hang::TrackProducer::new(track.producer, config.container) } b"soun" => { let config = Self::init_audio_static(trak, passthrough_mode)?; @@ -270,45 +269,60 @@ impl Fmp4 { tracing::debug!(name = ?track.name, ?config, "starting track"); - let audio = catalog.insert_audio(track.name.clone(), config); + let audio = catalog.insert_audio(track.name.clone(), config.clone()); audio.priority = 2; let track = track.produce(); self.broadcast.insert_track(track.consumer); - track.producer + hang::TrackProducer::new(track.producer, config.container) } b"sbtl" => anyhow::bail!("subtitle tracks are not supported"), handler => anyhow::bail!("unknown track type: {:?}", handler), }; - self.tracks.insert(track_id, track.into()); + self.tracks.insert(track_id, track); } + // Verify that the moov atom contains all expected tracks BEFORE moving it + let moov_track_count = moov.trak.len(); + let has_video = moov.trak.iter().any(|t| t.mdia.hdlr.handler.as_ref() == b"vide"); + let has_audio = moov.trak.iter().any(|t| t.mdia.hdlr.handler.as_ref() == b"soun"); + self.moov = Some(moov); - // In passthrough mode, send the init segment (ftyp+moov) as a special frame - // This must be sent before any fragments for MSE to work - // NOTE: We send this AFTER creating tracks so that the tracks exist - // when we try to write to them. The init segment will create the first - // group (sequence 0), and fragments will create subsequent groups. + // In passthrough mode, store the init segment (ftyp+moov) in the catalog + // instead of sending it over the data tracks. This allows clients to + // reconstruct init segments from the catalog. + // + // Note: Init segments are embedded in the catalog. + // A future optimization could build init segments from the description field + // (e.g., avcC box for H.264) along with other catalog metadata, but for now + // we store the complete init segment for simplicity and correctness. if passthrough_mode { - if let Some(moov_bytes) = self.moov_bytes.take() { - let timestamp = hang::Timestamp::from_micros(0)?; - + if let Some(moov_bytes) = self.moov_bytes.as_ref() { // Build init segment: ftyp (if available) + moov let mut init_segment = BytesMut::new(); if let Some(ref ftyp_bytes) = self.ftyp_bytes { init_segment.extend_from_slice(ftyp_bytes); tracing::debug!(ftyp_size = ftyp_bytes.len(), "including ftyp in init segment"); } - init_segment.extend_from_slice(&moov_bytes); + init_segment.extend_from_slice(moov_bytes); let init_segment_bytes = init_segment.freeze(); + // Verify that the moov atom contains all expected tracks + let expected_video_tracks = catalog.video.as_ref().map(|v| v.renditions.len()).unwrap_or(0); + let expected_audio_tracks = catalog.audio.as_ref().map(|a| a.renditions.len()).unwrap_or(0); + tracing::info!( - tracks = self.tracks.len(), + tracks_in_moov = moov_track_count, + expected_video = expected_video_tracks, + expected_audio = expected_audio_tracks, + tracks_processed = self.tracks.len(), init_segment_size = init_segment_bytes.len(), ftyp_included = self.ftyp_bytes.is_some(), - "sending init segment to all tracks" + has_video = has_video, + has_audio = has_audio, + "storing init segment in catalog" ); // Verify moov atom signature @@ -318,22 +332,96 @@ impl Fmp4 { tracing::info!(atom_type = %atom_type, "verifying moov atom signature in init segment"); } - // Store a copy for sending with keyframes - self.init_segment_bytes_for_keyframes = Some(init_segment_bytes.clone()); + // Warn if moov doesn't contain expected tracks. + // For HLS, inits are per-track (video-only or audio-only), so skip cross-track warnings. + let video_only = has_video && !has_audio; + let audio_only = has_audio && !has_video; + if expected_video_tracks > 0 && !has_video && !audio_only { + tracing::error!( + "moov atom does not contain video track but video configs exist! This will cause client-side errors." + ); + } + if expected_audio_tracks > 0 && !has_audio && !video_only { + tracing::error!( + "moov atom does not contain audio track but audio configs exist! This will cause client-side errors." + ); + } - // Send init segment to all tracks - this creates the first group (sequence 0) - for (_track_id, track) in &mut self.tracks { - let frame = hang::Frame { - timestamp, - keyframe: true, // Init segment is always a keyframe - this creates a new group - payload: init_segment_bytes.clone().into(), - }; - track.write(frame)?; - tracing::debug!(track_id = ?_track_id, timestamp = ?timestamp, "wrote init segment frame to track"); + // Store init segment in catalog for the relevant track type + // For HLS, each track has its own init segment (video init segment only has video, + // audio init segment only has audio). For direct fMP4 files, the init segment + // contains all tracks. We store track-specific init segments in their respective configs. + + if has_video { + if let Some(video) = catalog.video.as_mut() { + for (name, config) in video.renditions.iter_mut() { + config.init_segment = Some(init_segment_bytes.clone()); + tracing::debug!( + video_track = %name, + init_segment_size = init_segment_bytes.len(), + has_video_track = has_video, + has_audio_track = has_audio, + "stored init segment in video config" + ); + } + } + } + + if has_audio { + if let Some(audio) = catalog.audio.as_mut() { + for (name, config) in audio.renditions.iter_mut() { + config.init_segment = Some(init_segment_bytes.clone()); + tracing::debug!( + audio_track = %name, + init_segment_size = init_segment_bytes.len(), + has_video_track = has_video, + has_audio_track = has_audio, + "stored init segment in audio config" + ); + } + } + } + + // If the init segment contains both tracks (e.g., from a direct fMP4 file), + // also store it in the other config type for convenience + if has_video && has_audio { + // Full init segment with both tracks - store in both configs + if let Some(video) = catalog.video.as_mut() { + for (name, config) in video.renditions.iter_mut() { + if config.init_segment.is_none() { + config.init_segment = Some(init_segment_bytes.clone()); + tracing::debug!( + video_track = %name, + "stored full init segment (with both tracks) in video config" + ); + } + } + } + if let Some(audio) = catalog.audio.as_mut() { + for (name, config) in audio.renditions.iter_mut() { + if config.init_segment.is_none() { + config.init_segment = Some(init_segment_bytes.clone()); + tracing::debug!( + audio_track = %name, + "stored full init segment (with both tracks) in audio config" + ); + } + } + } } - tracing::info!("init segment (ftyp+moov) sent to all tracks - should create groups with sequence 0"); + + tracing::info!( + has_video = has_video, + has_audio = has_audio, + tracks_in_moov = moov_track_count, + "init segment (ftyp+moov) stored in catalog for all tracks" + ); + + // Init has been stored; clear cached moov/ftyp to avoid repeated warnings later. + self.moov_bytes = None; + self.ftyp_bytes = None; } else { - tracing::warn!("passthrough mode enabled but moov_bytes is None - init segment will not be sent"); + tracing::warn!("passthrough mode enabled but moov_bytes is None - init segment will not be stored in catalog"); } } @@ -378,6 +466,7 @@ impl Fmp4 { } else { Container::Native }, + init_segment: None, } } mp4_atom::Codec::Hev1(hev1) => Self::init_h265_static(true, &hev1.hvcc, &hev1.visual, passthrough_mode)?, @@ -398,6 +487,7 @@ impl Fmp4 { } else { Container::Native }, + init_segment: None, }, mp4_atom::Codec::Vp09(vp09) => { // https://github.com/gpac/mp4box.js/blob/325741b592d910297bf609bc7c400fc76101077b/src/box-codecs.js#L238 @@ -429,6 +519,7 @@ impl Fmp4 { } else { Container::Native }, + init_segment: None, } } mp4_atom::Codec::Av01(av01) => { @@ -466,6 +557,7 @@ impl Fmp4 { } else { Container::Native }, + init_segment: None, } } mp4_atom::Codec::Unknown(unknown) => anyhow::bail!("unknown codec: {:?}", unknown), @@ -501,6 +593,7 @@ impl Fmp4 { coded_height: Some(visual.height as _), // TODO: populate these fields bitrate: None, + init_segment: None, framerate: None, display_ratio_width: None, display_ratio_height: None, @@ -547,6 +640,7 @@ impl Fmp4 { } else { Container::Native }, + init_segment: None, } } mp4_atom::Codec::Opus(opus) => { @@ -561,6 +655,7 @@ impl Fmp4 { } else { Container::Native }, + init_segment: None, } } mp4_atom::Codec::Unknown(unknown) => anyhow::bail!("unknown codec: {:?}", unknown), @@ -778,41 +873,26 @@ impl Fmp4 { self.last_keyframe.insert(track_id, timestamp); } - // In passthrough mode, create new groups periodically (every keyframe) to allow - // new subscribers to join at the most recent point. Each group starts with init segment. - // This makes it behave like a live stream where new subscribers start from recent content. + // In passthrough mode, send fragments directly without init segments + // Init segments are stored in the catalog and reconstructed on the client side if self.passthrough_mode { - // For keyframes, send init segment to create a new group (every keyframe creates a new group) - // This allows new subscribers to receive the init segment and start from recent content - if is_keyframe { - tracing::info!(track_id, timestamp = ?timestamp, fragment_size = fragment.len(), "KEYFRAME DETECTED - creating new group"); - if let Some(ref init_segment_bytes) = self.init_segment_bytes_for_keyframes { - let init_frame = hang::Frame { - timestamp, - keyframe: true, // Send as keyframe to create a new group - payload: init_segment_bytes.clone().into(), - }; - track.write(init_frame)?; - tracing::info!(track_id, timestamp = ?timestamp, init_segment_size = init_segment_bytes.len(), "sent init segment as first frame of new group (keyframe) for live stream"); - } else { - tracing::warn!( - track_id, - "is_keyframe=true but init_segment_bytes_for_keyframes is None" - ); - } - } else { - tracing::debug!(track_id, timestamp = ?timestamp, fragment_size = fragment.len(), "non-keyframe fragment in passthrough mode"); + // The first frame must be a keyframe to create the initial group + // After that, we can send fragments based on their actual keyframe status + let is_first_frame = !self.first_frame_sent.get(&track_id).copied().unwrap_or(false); + let should_be_keyframe = is_first_frame || is_keyframe; + + if is_first_frame { + self.first_frame_sent.insert(track_id, true); } - // Send fragment as non-keyframe (in same group as init segment if keyframe, or current group if not) let frame = hang::Frame { timestamp, - keyframe: false, // Send as non-keyframe so it goes in the same group as init segment (if keyframe) or current group + keyframe: should_be_keyframe, payload: fragment.clone().into(), }; track.write(frame)?; - if is_keyframe { - tracing::info!(track_id, timestamp = ?timestamp, fragment_size = fragment.len(), "sent keyframe fragment in passthrough mode (new group created)"); + if should_be_keyframe { + tracing::info!(track_id, timestamp = ?timestamp, fragment_size = fragment.len(), is_first = is_first_frame, "sent fragment in passthrough mode (keyframe - creates group)"); } else { tracing::debug!(track_id, timestamp = ?timestamp, fragment_size = fragment.len(), "sent non-keyframe fragment in passthrough mode"); } diff --git a/rs/hang/src/import/hev1.rs b/rs/hang/src/import/hev1.rs index 0e01c7ffd..5f3372fa9 100644 --- a/rs/hang/src/import/hev1.rs +++ b/rs/hang/src/import/hev1.rs @@ -63,6 +63,7 @@ impl Hev1 { display_ratio_height: vui_data.display_ratio_height, optimize_for_latency: None, container: hang::catalog::Container::Native, + init_segment: None, }; if let Some(old) = &self.config { diff --git a/rs/hang/src/import/opus.rs b/rs/hang/src/import/opus.rs index ac46ba9b2..497a20cd6 100644 --- a/rs/hang/src/import/opus.rs +++ b/rs/hang/src/import/opus.rs @@ -54,6 +54,7 @@ impl Opus { bitrate: None, description: None, container: hang::catalog::Container::Native, + init_segment: None, }; tracing::debug!(name = ?track.name, ?config, "starting track"); diff --git a/rs/hang/src/model/track.rs b/rs/hang/src/model/track.rs index 8a92d7c86..e2f3f69eb 100644 --- a/rs/hang/src/model/track.rs +++ b/rs/hang/src/model/track.rs @@ -1,6 +1,7 @@ use std::collections::VecDeque; use std::ops::Deref; +use crate::catalog::Container; use crate::model::{Frame, GroupConsumer, Timestamp}; use crate::Error; use futures::{stream::FuturesUnordered, StreamExt}; @@ -26,16 +27,18 @@ pub struct TrackProducer { /// Track if the current group is the init segment group (timestamp 0) /// We keep this group open so new subscribers can receive the init segment is_init_segment_group: bool, + container: Container } impl TrackProducer { /// Create a new TrackProducer wrapping the given moq-lite producer. - pub fn new(inner: moq_lite::TrackProducer) -> Self { + pub fn new(inner: moq_lite::TrackProducer, container: Container) -> Self { Self { inner, group: None, keyframe: None, is_init_segment_group: false, + container, } } @@ -52,7 +55,9 @@ impl TrackProducer { tracing::trace!(?frame, "write frame"); let mut header = BytesMut::new(); - frame.timestamp.as_micros().encode(&mut header, lite::Version::Draft02); + if self.container != Container::Cmaf { + frame.timestamp.as_micros().encode(&mut header, lite::Version::Draft02); + } if frame.keyframe { if let Some(group) = self.group.take() { @@ -100,10 +105,14 @@ impl TrackProducer { let size = header.len() + frame.payload.remaining(); let mut chunked = group.create_frame(size.into()); - chunked.write_chunk(header.freeze()); + if !header.is_empty() { + chunked.write_chunk(header.freeze()); + } + for chunk in frame.payload { chunked.write_chunk(chunk); } + chunked.close(); self.group.replace(group); @@ -122,7 +131,7 @@ impl TrackProducer { impl From for TrackProducer { fn from(inner: moq_lite::TrackProducer) -> Self { - Self::new(inner) + Self::new(inner, Container::Native) } } From 3fa2bc801c1577f0e43208f82f9e4e78e5d091a4 Mon Sep 17 00:00:00 2001 From: Juan Pablo Bustamante Date: Thu, 15 Jan 2026 11:24:27 -0300 Subject: [PATCH 15/16] base64 to cargo --- rs/hang/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/hang/Cargo.toml b/rs/hang/Cargo.toml index 75928bc1f..8e7500a4a 100644 --- a/rs/hang/Cargo.toml +++ b/rs/hang/Cargo.toml @@ -32,7 +32,7 @@ reqwest = { version = "0.12", default-features = false, features = [ scuffle-h265 = "0.2.2" serde = { workspace = true } serde_json = "1" -serde_with = { version = "3", features = ["hex"] } +serde_with = { version = "3", features = ["hex", "base64"] } thiserror = "2" tokio = { workspace = true, features = ["macros", "fs"] } tracing = "0.1" From 3857fb2f79527ced0006dc695b42f364f21fe1ca Mon Sep 17 00:00:00 2001 From: Juan Pablo Bustamante Date: Thu, 15 Jan 2026 15:39:20 -0300 Subject: [PATCH 16/16] fix minor issues - aboid playback by group --- js/hang/src/frame.ts | 2 +- js/hang/src/watch/audio/emitter.ts | 1 - js/hang/src/watch/audio/source.ts | 92 +++-- js/hang/src/watch/broadcast.ts | 6 +- js/hang/src/watch/source-mse.ts | 528 +++++++++++++++-------------- js/hang/src/watch/video/source.ts | 8 +- rs/hang/src/import/fmp4.rs | 88 ++--- rs/hang/src/model/track.rs | 4 +- 8 files changed, 375 insertions(+), 354 deletions(-) diff --git a/js/hang/src/frame.ts b/js/hang/src/frame.ts index 945d872d4..fa7d2b4da 100644 --- a/js/hang/src/frame.ts +++ b/js/hang/src/frame.ts @@ -24,7 +24,7 @@ export function encode(source: Uint8Array | Source, timestamp: Time.Micro, conta if (container === "cmaf") { if (source instanceof Uint8Array) { return source; - } + } const data = new Uint8Array(source.byteLength); source.copyTo(data); return data; diff --git a/js/hang/src/watch/audio/emitter.ts b/js/hang/src/watch/audio/emitter.ts index a859ce8c3..2bdbb9870 100644 --- a/js/hang/src/watch/audio/emitter.ts +++ b/js/hang/src/watch/audio/emitter.ts @@ -47,7 +47,6 @@ export class Emitter { this.#signals.effect((effect) => { const paused = effect.get(this.paused); - const muted = effect.get(this.muted); const enabled = !paused; this.source.enabled.set(enabled); }); diff --git a/js/hang/src/watch/audio/source.ts b/js/hang/src/watch/audio/source.ts index 9779d0e9d..f482e675c 100644 --- a/js/hang/src/watch/audio/source.ts +++ b/js/hang/src/watch/audio/source.ts @@ -5,8 +5,9 @@ import * as Frame from "../../frame"; import type * as Time from "../../time"; import * as Hex from "../../util/hex"; import * as libav from "../../util/libav"; +import type { SourceMSE } from "../source-mse"; +import type * as Video from "../video"; import type * as Render from "./render"; -import * as Video from "../video"; // We want some extra overhead to avoid starving the render worklet. // The default Opus frame duration is 20ms. @@ -164,7 +165,7 @@ export class Source { #runDecoder(effect: Effect): void { const enabled = effect.get(this.enabled); const config = effect.get(this.config); - + // For CMAF, we need to add the SourceBuffer even if audio is disabled // This ensures the MediaSource has both SourceBuffers before video starts appending // We'll just not append audio data if disabled @@ -175,7 +176,7 @@ export class Source { // For non-CMAF, if disabled, don't initialize return; } - + if (!enabled && config?.container !== "cmaf") { return; } @@ -230,19 +231,19 @@ export class Source { // Wait for video's MSE source to be available // Video creates it asynchronously, and may recreate it when restarting // So we need to get it reactively each time - let videoMseSource: any; + let videoMseSource: SourceMSE | undefined; if (this.video?.mseSource) { // Wait up to 2 seconds for video MSE source to be available const maxWait = 2000; const startTime = Date.now(); - while (!videoMseSource && (Date.now() - startTime) < maxWait) { + while (!videoMseSource && Date.now() - startTime < maxWait) { videoMseSource = effect.get(this.video.mseSource); if (!videoMseSource) { - await new Promise(resolve => setTimeout(resolve, 50)); // Check more frequently + await new Promise((resolve) => setTimeout(resolve, 50)); // Check more frequently } } } - + if (!videoMseSource) { console.error("[Audio Source] Video MSE source not available, falling back to WebCodecs"); this.#runWebCodecsPath(effect, broadcast, name, config, catalog); @@ -269,50 +270,57 @@ export class Source { // Check if audio is enabled const isEnabled = effect.get(this.enabled); - + // Only subscribe to track and initialize SourceBuffer if enabled // When disabled, we don't need to do anything - video can play without audio if (!isEnabled) { - console.log(`[Audio Source] Audio disabled, skipping SourceBuffer initialization and track subscription - video will play without audio`); + console.log( + `[Audio Source] Audio disabled, skipping SourceBuffer initialization and track subscription - video will play without audio`, + ); return; } // Audio is enabled - subscribe to track and initialize SourceBuffer // Wait a bit for video to stabilize if it's restarting // Get the latest SourceMSE instance and verify it's stable - let latestMseSource: any; + let latestMseSource: SourceMSE | undefined; let retryCount = 0; const maxRetries = 3; - + while (retryCount < maxRetries) { // Get the latest SourceMSE instance (in case video restarted) latestMseSource = this.video?.mseSource ? effect.get(this.video.mseSource) : videoMseSource; if (!latestMseSource) { // Wait a bit for video to create SourceMSE - await new Promise(resolve => setTimeout(resolve, 100)); + await new Promise((resolve) => setTimeout(resolve, 100)); retryCount++; continue; } - + // Check if MediaSource is ready (not closed) const mediaSource = latestMseSource.mediaSource ? effect.get(latestMseSource.mediaSource) : undefined; - if (mediaSource && typeof mediaSource === "object" && "readyState" in mediaSource && (mediaSource as MediaSource).readyState === "closed") { + if ( + mediaSource && + typeof mediaSource === "object" && + "readyState" in mediaSource && + (mediaSource as MediaSource).readyState === "closed" + ) { // MediaSource is closed, video might be restarting - wait and retry console.log("[Audio Source] MediaSource is closed, waiting for video to stabilize"); - await new Promise(resolve => setTimeout(resolve, 200)); + await new Promise((resolve) => setTimeout(resolve, 200)); retryCount++; continue; } - + // SourceMSE instance looks good, proceed break; } - + if (!latestMseSource) { console.warn("[Audio Source] SourceMSE instance not available after retries, skipping audio"); return; } - + console.log("[Audio Stream] Subscribing to track", { name, codec: config.codec, @@ -320,12 +328,12 @@ export class Source { sampleRate: config.sampleRate, channels: config.numberOfChannels, }); - + // Retry a few times for transient MSE states / QuotaExceeded for (let attempt = 0; attempt < 5; attempt++) { try { // Resolve freshest SourceMSE and wait for MediaSource to be open (up to ~5s). - const resolveOpenMediaSource = async (): Promise => { + const resolveOpenMediaSource = async (): Promise => { const start = Date.now(); let current = latestMseSource; for (;;) { @@ -336,41 +344,56 @@ export class Source { current = candidate; } - const ms = current?.mediaSource ? effect.get(current.mediaSource) : undefined; - if (ms && typeof ms === "object" && "readyState" in ms && (ms as MediaSource).readyState === "open") { + if (!current) { + if (Date.now() - start > 5000) { + throw new Error("SourceMSE not available"); + } + await new Promise((resolve) => setTimeout(resolve, 50)); + continue; + } + + const ms = current.mediaSource ? effect.get(current.mediaSource) : undefined; + if ( + ms && + typeof ms === "object" && + "readyState" in ms && + (ms as MediaSource).readyState === "open" + ) { return current; } if (Date.now() - start > 5000) { throw new Error("MediaSource not ready for audio SourceBuffer"); } - await new Promise(resolve => setTimeout(resolve, 50)); + await new Promise((resolve) => setTimeout(resolve, 50)); } }; const readyMseSource = await resolveOpenMediaSource(); latestMseSource = readyMseSource; - console.log(`[Audio Source] Initializing audio SourceBuffer on unified SourceMSE (attempt ${attempt + 1})`); + console.log( + `[Audio Source] Initializing audio SourceBuffer on unified SourceMSE (attempt ${attempt + 1})`, + ); await latestMseSource.initializeAudio(config); - + // Verify we're still using the current instance after initialization const verifyMseSource = this.video?.mseSource ? effect.get(this.video.mseSource) : latestMseSource; - if (verifyMseSource !== latestMseSource) { + if (verifyMseSource && verifyMseSource !== latestMseSource) { // Video restarted during initialization, get new instance and retry console.log("[Audio Source] Video restarted during initialization, retrying with new instance"); await verifyMseSource.initializeAudio(config); latestMseSource = verifyMseSource; } - + console.log(`[Audio Source] Audio SourceBuffer initialization completed`); - + // Get latest instance again before running track (video might have restarted) const finalMseSource = this.video?.mseSource ? effect.get(this.video.mseSource) : latestMseSource; if (!finalMseSource) { throw new Error("SourceMSE instance not available"); } - + // Run audio track - use the latest instance console.log(`[Audio Source] Starting MSE track on unified SourceMSE`); await finalMseSource.runAudioTrack(effect, broadcast, name, config, catalog, this.enabled); @@ -379,12 +402,17 @@ export class Source { } catch (error) { const retriable = error instanceof DOMException && error.name === "QuotaExceededError"; if (!retriable || attempt === 4) { - console.warn("[Audio Source] Failed to initialize audio SourceBuffer, video will continue without audio:", error); + console.warn( + "[Audio Source] Failed to initialize audio SourceBuffer, video will continue without audio:", + error, + ); return; } const delay = 150 + attempt * 150; - console.warn(`[Audio Source] Audio init attempt ${attempt + 1} failed (${(error as Error).message}); retrying in ${delay}ms`); - await new Promise(resolve => setTimeout(resolve, delay)); + console.warn( + `[Audio Source] Audio init attempt ${attempt + 1} failed (${(error as Error).message}); retrying in ${delay}ms`, + ); + await new Promise((resolve) => setTimeout(resolve, delay)); } } }); diff --git a/js/hang/src/watch/broadcast.ts b/js/hang/src/watch/broadcast.ts index f8473c347..5d28a94b6 100644 --- a/js/hang/src/watch/broadcast.ts +++ b/js/hang/src/watch/broadcast.ts @@ -62,16 +62,14 @@ export class Broadcast { this.path = Signal.from(props?.path); this.enabled = Signal.from(props?.enabled ?? false); this.reload = Signal.from(props?.reload ?? true); - + // Create video first so audio can use its MediaSource this.video = new Video.Source(this.#broadcast, this.#catalog, props?.video); - + // Create audio and pass video reference for coordination this.audio = new Audio.Source(this.#broadcast, this.#catalog, props?.audio); this.audio.video = this.video; // Pass video reference for coordination - - this.location = new Location.Root(this.#broadcast, this.#catalog, props?.location); this.chat = new Chat(this.#broadcast, this.#catalog, props?.chat); this.preview = new Preview(this.#broadcast, this.#catalog, props?.preview); diff --git a/js/hang/src/watch/source-mse.ts b/js/hang/src/watch/source-mse.ts index bab894632..0e19b2b82 100644 --- a/js/hang/src/watch/source-mse.ts +++ b/js/hang/src/watch/source-mse.ts @@ -35,7 +35,7 @@ export class SourceMSE { #audioSourceBufferSetup = false; // Track if audio SourceBuffer has been set up readonly mediaSource = new Signal(undefined); - + // Expose video element for audio control (audio plays through video element) readonly videoElement = new Signal(undefined); @@ -83,41 +83,40 @@ export class SourceMSE { return false; } - async initializeVideo(config: RequiredDecoderConfig): Promise { const mimeType = Mime.buildVideoMimeType(config); if (!mimeType) { throw new Error(`Unsupported codec for MSE: ${config.codec}`); } - + console.log("[MSE] Initializing video, MIME type:", mimeType); - + // Create video element this.#video = document.createElement("video"); this.#video.style.display = "none"; this.#video.playsInline = true; this.#video.muted = false; // Don't mute - audio plays through video element document.body.appendChild(this.#video); - + // Expose video element this.videoElement.set(this.#video); - + // Create MediaSource this.#mediaSource = new MediaSource(); this.mediaSource.set(this.#mediaSource); console.log("[MSE] Video initialization: MediaSource signal set, state:", this.#mediaSource.readyState); - + // Attach MediaSource to video element const url = URL.createObjectURL(this.#mediaSource); this.#video.src = url; console.log("[MSE] MediaSource created and attached to video element"); - + // Wait for sourceopen event await new Promise((resolve, reject) => { const timeout = setTimeout(() => { reject(new Error("MediaSource sourceopen timeout")); }, 5000); - + this.#mediaSource?.addEventListener( "sourceopen", () => { @@ -143,14 +142,18 @@ export class SourceMSE { }, { once: true }, ); - - this.#mediaSource?.addEventListener("error", (e) => { - clearTimeout(timeout); - console.error("[MSE] MediaSource error event:", e); - reject(new Error(`MediaSource error: ${e}`)); - }, { once: true }); + + this.#mediaSource?.addEventListener( + "error", + (e) => { + clearTimeout(timeout); + console.error("[MSE] MediaSource error event:", e); + reject(new Error(`MediaSource error: ${e}`)); + }, + { once: true }, + ); }); - + console.log("[MSE] Video initialization complete, starting frame capture"); this.#startFrameCapture(); } @@ -168,19 +171,25 @@ export class SourceMSE { } console.log("[MSE] Initializing audio, MIME type:", mimeType); - + // Get MediaSource from signal (most up-to-date) // Use a small delay to ensure signal updates have propagated - await new Promise(resolve => setTimeout(resolve, 10)); + await new Promise((resolve) => setTimeout(resolve, 10)); let mediaSource = this.mediaSource.peek(); - console.log("[MSE] Audio initialization: MediaSource from signal:", mediaSource ? `readyState=${mediaSource.readyState}` : "not set"); - + console.log( + "[MSE] Audio initialization: MediaSource from signal:", + mediaSource ? `readyState=${mediaSource.readyState}` : "not set", + ); + // Also check private field as fallback if (!mediaSource && this.#mediaSource) { - console.log("[MSE] Audio initialization: Using private MediaSource field, state:", this.#mediaSource.readyState); + console.log( + "[MSE] Audio initialization: Using private MediaSource field, state:", + this.#mediaSource.readyState, + ); mediaSource = this.#mediaSource; } - + // Quick check: if MediaSource is ready, proceed immediately if (mediaSource && mediaSource.readyState === "open") { console.log("[MSE] Audio initialization: MediaSource is already open, proceeding"); @@ -193,22 +202,26 @@ export class SourceMSE { const maxWait = 5000; // 5 seconds max wait const startTime = Date.now(); const checkInterval = 50; // Check every 50ms for responsiveness - + const timeout = setTimeout(() => { const waited = ((Date.now() - startTime) / 1000).toFixed(1); - reject(new Error(`MediaSource not ready after ${waited}s (current state: ${mediaSource?.readyState || "not created"})`)); + reject( + new Error( + `MediaSource not ready after ${waited}s (current state: ${mediaSource?.readyState || "not created"})`, + ), + ); }, maxWait); const checkReady = () => { // Get latest MediaSource from signal (always get fresh value) const signalValue = this.mediaSource.peek(); mediaSource = signalValue; - + // Also check private field if signal is not set if (!mediaSource && this.#mediaSource) { mediaSource = this.#mediaSource; } - + // Check if MediaSource exists and is open if (mediaSource && mediaSource.readyState === "open") { clearTimeout(timeout); @@ -224,7 +237,9 @@ export class SourceMSE { if (elapsed % 500 < checkInterval) { const signalState = this.mediaSource.peek()?.readyState || "not set"; const privateState = this.#mediaSource?.readyState || "not set"; - console.log(`[MSE] Audio initialization: Waiting for MediaSource (${(elapsed / 1000).toFixed(1)}s, signal: ${signalState}, private: ${privateState})`); + console.log( + `[MSE] Audio initialization: Waiting for MediaSource (${(elapsed / 1000).toFixed(1)}s, signal: ${signalState}, private: ${privateState})`, + ); } // If MediaSource exists but is closed, it's from an old instance - wait for new one @@ -243,7 +258,11 @@ export class SourceMSE { const waited = (elapsed / 1000).toFixed(1); const finalSignalState = this.mediaSource.peek()?.readyState || "not set"; const finalPrivateState = this.#mediaSource?.readyState || "not set"; - reject(new Error(`MediaSource not ready after ${waited}s (signal: ${finalSignalState}, private: ${finalPrivateState})`)); + reject( + new Error( + `MediaSource not ready after ${waited}s (signal: ${finalSignalState}, private: ${finalPrivateState})`, + ), + ); } }; @@ -256,7 +275,7 @@ export class SourceMSE { if (!mediaSource || mediaSource.readyState !== "open") { throw new Error(`MediaSource not ready (state: ${mediaSource?.readyState || "not created"})`); } - + // Update private field this.#mediaSource = mediaSource; @@ -264,7 +283,7 @@ export class SourceMSE { // (could be added by a previous call to initializeAudio) if (this.#mediaSource.sourceBuffers.length >= 2) { const sourceBuffers = Array.from(this.#mediaSource.sourceBuffers); - + // If we already have an audio SourceBuffer set, use it if (this.#audioSourceBuffer && sourceBuffers.includes(this.#audioSourceBuffer)) { return; // Already have it @@ -272,9 +291,7 @@ export class SourceMSE { // If we have exactly 2 SourceBuffers and one is video, the other must be audio if (sourceBuffers.length === 2 && this.#videoSourceBuffer) { - const otherBuffer = sourceBuffers.find( - (sb) => sb !== this.#videoSourceBuffer - ); + const otherBuffer = sourceBuffers.find((sb) => sb !== this.#videoSourceBuffer); if (otherBuffer) { // This must be the audio SourceBuffer this.#audioSourceBuffer = otherBuffer; @@ -288,7 +305,9 @@ export class SourceMSE { // Fallback: If we have 2 SourceBuffers but don't know which is video // Assume the second one is audio (video is usually added first) if (sourceBuffers.length === 2 && !this.#videoSourceBuffer) { - console.log("[MSE] Video SourceBuffer not set yet, using fallback: assuming second SourceBuffer is audio"); + console.log( + "[MSE] Video SourceBuffer not set yet, using fallback: assuming second SourceBuffer is audio", + ); this.#audioSourceBuffer = sourceBuffers[1]; if (!this.#audioSourceBufferSetup) { this.#setupAudioSourceBuffer(); @@ -310,10 +329,18 @@ export class SourceMSE { if (this.#videoSourceBuffer?.updating) { console.log("[MSE] Waiting for video SourceBuffer to finish updating before adding audio"); await new Promise((resolve) => { - this.#videoSourceBuffer!.addEventListener("updateend", () => { - console.log("[MSE] Video SourceBuffer finished updating"); + if (!this.#videoSourceBuffer) { resolve(); - }, { once: true }); + return; + } + this.#videoSourceBuffer.addEventListener( + "updateend", + () => { + console.log("[MSE] Video SourceBuffer finished updating"); + resolve(); + }, + { once: true }, + ); }); } @@ -325,7 +352,7 @@ export class SourceMSE { // Check again if MediaSource now has 2 SourceBuffers (race condition) if (this.#mediaSource.sourceBuffers.length >= 2) { const sourceBuffers = Array.from(this.#mediaSource.sourceBuffers); - + // If we already have audio SourceBuffer set, use it if (this.#audioSourceBuffer && sourceBuffers.includes(this.#audioSourceBuffer)) { return; @@ -333,9 +360,7 @@ export class SourceMSE { // If we have exactly 2 and one is video, use the other if (sourceBuffers.length === 2 && this.#videoSourceBuffer) { - const otherBuffer = sourceBuffers.find( - (sb) => sb !== this.#videoSourceBuffer - ); + const otherBuffer = sourceBuffers.find((sb) => sb !== this.#videoSourceBuffer); if (otherBuffer) { this.#audioSourceBuffer = otherBuffer; if (!this.#audioSourceBufferSetup) { @@ -360,7 +385,9 @@ export class SourceMSE { // Final check before adding - verify MediaSource is still open if (this.#mediaSource.readyState !== "open") { - throw new Error(`MediaSource readyState changed to "${this.#mediaSource.readyState}" before adding audio SourceBuffer`); + throw new Error( + `MediaSource readyState changed to "${this.#mediaSource.readyState}" before adding audio SourceBuffer`, + ); } // Ensure we're using the MediaSource from signal (most up-to-date) @@ -368,7 +395,7 @@ export class SourceMSE { if (!mediaSource) { throw new Error("MediaSource is not available"); } - + // Update private field to match signal this.#mediaSource = mediaSource; @@ -382,11 +409,20 @@ export class SourceMSE { console.log("[MSE] Video SourceBuffer update timeout, proceeding"); resolve(); }, 500); // Only wait 500ms max - - this.#videoSourceBuffer!.addEventListener("updateend", () => { + + if (!this.#videoSourceBuffer) { clearTimeout(timeout); resolve(); - }, { once: true }); + return; + } + this.#videoSourceBuffer.addEventListener( + "updateend", + () => { + clearTimeout(timeout); + resolve(); + }, + { once: true }, + ); }); } @@ -444,9 +480,7 @@ export class SourceMSE { // If we have exactly 2 SourceBuffers and one is video, the other must be audio if (sourceBuffers.length === 2 && this.#videoSourceBuffer) { - const otherBuffer = sourceBuffers.find( - (sb) => sb !== this.#videoSourceBuffer - ); + const otherBuffer = sourceBuffers.find((sb) => sb !== this.#videoSourceBuffer); if (otherBuffer) { console.log("[MSE] Found audio SourceBuffer by exclusion (other than video)"); this.#audioSourceBuffer = otherBuffer; @@ -464,10 +498,10 @@ export class SourceMSE { // But if one of them was added by a previous call to initializeAudio, we should use it // For now, if we have 2 SourceBuffers and can't identify, assume the first non-video one is audio // This is a fallback - ideally video should initialize first - const nonVideoBuffer = this.#videoSourceBuffer - ? sourceBuffers.find(sb => sb !== this.#videoSourceBuffer) + const nonVideoBuffer = this.#videoSourceBuffer + ? sourceBuffers.find((sb) => sb !== this.#videoSourceBuffer) : sourceBuffers[1]; // If video not set, assume second one is audio (video is usually first) - + if (nonVideoBuffer) { console.log("[MSE] Using fallback: assuming non-video SourceBuffer is audio"); this.#audioSourceBuffer = nonVideoBuffer; @@ -486,25 +520,34 @@ export class SourceMSE { readyState: mediaSource.readyState, videoSourceBufferUpdating: this.#videoSourceBuffer.updating, }); - + // Wait for video SourceBuffer to finish if it's updating (with timeout) if (this.#videoSourceBuffer.updating) { await new Promise((resolve) => { const timeout = setTimeout(() => resolve(), 200); // Max 200ms wait - this.#videoSourceBuffer!.addEventListener("updateend", () => { + if (!this.#videoSourceBuffer) { clearTimeout(timeout); resolve(); - }, { once: true }); + return; + } + this.#videoSourceBuffer.addEventListener( + "updateend", + () => { + clearTimeout(timeout); + resolve(); + }, + { once: true }, + ); }); } else { // Brief wait for MediaSource to stabilize - await new Promise(resolve => setTimeout(resolve, 10)); + await new Promise((resolve) => setTimeout(resolve, 10)); } - + // Quick retry - check if another call added it first const currentSourceBuffers = Array.from(mediaSource.sourceBuffers); if (currentSourceBuffers.length >= 2) { - const otherBuffer = currentSourceBuffers.find(sb => sb !== this.#videoSourceBuffer); + const otherBuffer = currentSourceBuffers.find((sb) => sb !== this.#videoSourceBuffer); if (otherBuffer) { console.log("[MSE] Found audio SourceBuffer after retry"); this.#audioSourceBuffer = otherBuffer; @@ -514,7 +557,7 @@ export class SourceMSE { return; } } - + // Try adding again try { if (mediaSource.readyState !== "open") { @@ -559,17 +602,17 @@ export class SourceMSE { const buffered = sourceBuffer.buffered; const start = buffered.start(0); const end = buffered.end(0); - + // Seek to start of buffered range if needed if ( video.currentTime + SEEK_HYSTERESIS < start || video.currentTime >= end - SEEK_HYSTERESIS || - isNaN(video.currentTime) + Number.isNaN(video.currentTime) ) { console.log(`[MSE] Seeking video to buffered range start: ${start.toFixed(2)}`); video.currentTime = start; } - + // Try to play if paused if (video.paused && video.readyState >= HTMLMediaElement.HAVE_METADATA) { console.log("[MSE] Attempting to play video after SourceBuffer updateend"); @@ -578,7 +621,7 @@ export class SourceMSE { }); } } - + this.#processVideoQueue(); }); @@ -615,7 +658,9 @@ export class SourceMSE { captureCount++; if (captureCount === 1 || captureCount % 30 === 0) { - console.log(`[MSE] Captured frame ${captureCount}, currentTime: ${this.#video.currentTime.toFixed(2)}, readyState: ${this.#video.readyState}, paused: ${this.#video.paused}, buffered: ${this.#video.buffered.length > 0 ? `${this.#video.buffered.start(0).toFixed(2)}-${this.#video.buffered.end(0).toFixed(2)}` : "none"}`); + console.log( + `[MSE] Captured frame ${captureCount}, currentTime: ${this.#video.currentTime.toFixed(2)}, readyState: ${this.#video.readyState}, paused: ${this.#video.paused}, buffered: ${this.#video.buffered.length > 0 ? `${this.#video.buffered.start(0).toFixed(2)}-${this.#video.buffered.end(0).toFixed(2)}` : "none"}`, + ); } this.#stats.update((current) => ({ @@ -669,7 +714,7 @@ export class SourceMSE { if (!this.#videoSourceBuffer || !this.#mediaSource) { throw new Error("Video SourceBuffer not initialized"); } - + if (this.#videoAppendQueue.length >= SourceMSE.MAX_QUEUE_SIZE) { const discarded = this.#videoAppendQueue.shift(); console.warn( @@ -713,20 +758,21 @@ export class SourceMSE { let ftypAtom: Uint8Array | null = null; let moovOffset = 0; let moovSize = 0; - + // Find ftyp and moov atoms while (offset + 8 <= fullInitSegment.length) { - const size = (fullInitSegment[offset] << 24) | - (fullInitSegment[offset + 1] << 16) | - (fullInitSegment[offset + 2] << 8) | - fullInitSegment[offset + 3]; + const size = + (fullInitSegment[offset] << 24) | + (fullInitSegment[offset + 1] << 16) | + (fullInitSegment[offset + 2] << 8) | + fullInitSegment[offset + 3]; const type = String.fromCharCode( fullInitSegment[offset + 4], fullInitSegment[offset + 5], fullInitSegment[offset + 6], fullInitSegment[offset + 7], ); - + if (type === "ftyp") { ftypAtom = fullInitSegment.slice(offset, offset + size); offset += size; @@ -739,23 +785,24 @@ export class SourceMSE { offset += size; } } - + if (moovSize === 0) { throw new Error("moov atom not found in init segment"); } - + // Parse moov atom to find the relevant track const moovAtom = fullInitSegment.slice(moovOffset, moovOffset + moovSize); const targetHandler = trackType === "video" ? "vide" : "soun"; - + // Count tracks in moov let moov_track_count = 0; let moov_offset_temp = 8; while (moov_offset_temp + 8 <= moovAtom.length) { - const size = (moovAtom[moov_offset_temp] << 24) | - (moovAtom[moov_offset_temp + 1] << 16) | - (moovAtom[moov_offset_temp + 2] << 8) | - moovAtom[moov_offset_temp + 3]; + const size = + (moovAtom[moov_offset_temp] << 24) | + (moovAtom[moov_offset_temp + 1] << 16) | + (moovAtom[moov_offset_temp + 2] << 8) | + moovAtom[moov_offset_temp + 3]; const type = String.fromCharCode( moovAtom[moov_offset_temp + 4], moovAtom[moov_offset_temp + 5], @@ -768,12 +815,12 @@ export class SourceMSE { if (size < 8 || size === 0) break; moov_offset_temp += size; } - + // If only one track, use directly if (moov_track_count === 1) { return fullInitSegment; } - + // Multiple tracks - need to extract const trakAtom = this.#findTrackInMoov(moovAtom, targetHandler); if (!trakAtom) { @@ -785,26 +832,26 @@ export class SourceMSE { return this.#extractTrackInitSegmentWithHandler(fullInitSegment, ftypAtom, moovAtom, alt); } } - + const foundTracks = this.#getAllTracksInMoov(moovAtom); - const foundHandlers = foundTracks.map(t => t.handler || "unknown").join(", "); + const foundHandlers = foundTracks.map((t) => t.handler || "unknown").join(", "); throw new Error( `${trackType} track not found in moov atom. ` + - `Looking for handler: "${targetHandler}", but found: [${foundHandlers}]. ` + - `The init segment should contain all tracks.` + `Looking for handler: "${targetHandler}", but found: [${foundHandlers}]. ` + + `The init segment should contain all tracks.`, ); } - + // Reconstruct moov atom with only the target track const newMoov = this.#rebuildMoovWithSingleTrack(moovAtom, trakAtom, targetHandler); - + // Combine ftyp (if present) + new moov const result: Uint8Array[] = []; if (ftypAtom) { result.push(ftypAtom); } result.push(newMoov); - + const totalSize = result.reduce((sum, arr) => sum + arr.length, 0); const combined = new Uint8Array(totalSize); let writeOffset = 0; @@ -812,24 +859,29 @@ export class SourceMSE { combined.set(arr, writeOffset); writeOffset += arr.length; } - + return combined; } - - #extractTrackInitSegmentWithHandler(_fullInitSegment: Uint8Array, ftypAtom: Uint8Array | null, moovAtom: Uint8Array, handlerType: string): Uint8Array { + + #extractTrackInitSegmentWithHandler( + _fullInitSegment: Uint8Array, + ftypAtom: Uint8Array | null, + moovAtom: Uint8Array, + handlerType: string, + ): Uint8Array { const trakAtom = this.#findTrackInMoov(moovAtom, handlerType); if (!trakAtom) { throw new Error(`Track with handler "${handlerType}" not found`); } - + const newMoov = this.#rebuildMoovWithSingleTrack(moovAtom, trakAtom, handlerType); - + const result: Uint8Array[] = []; if (ftypAtom) { result.push(ftypAtom); } result.push(newMoov); - + const totalSize = result.reduce((sum, arr) => sum + arr.length, 0); const combined = new Uint8Array(totalSize); let writeOffset = 0; @@ -837,69 +889,72 @@ export class SourceMSE { combined.set(arr, writeOffset); writeOffset += arr.length; } - + return combined; } - - #getAllTracksInMoov(moovAtom: Uint8Array): Array<{handler: string | null}> { - const tracks: Array<{handler: string | null}> = []; + + #getAllTracksInMoov(moovAtom: Uint8Array): Array<{ handler: string | null }> { + const tracks: Array<{ handler: string | null }> = []; let offset = 8; // Skip moov header - + while (offset + 8 <= moovAtom.length) { - const size = (moovAtom[offset] << 24) | - (moovAtom[offset + 1] << 16) | - (moovAtom[offset + 2] << 8) | - moovAtom[offset + 3]; + const size = + (moovAtom[offset] << 24) | + (moovAtom[offset + 1] << 16) | + (moovAtom[offset + 2] << 8) | + moovAtom[offset + 3]; const type = String.fromCharCode( moovAtom[offset + 4], moovAtom[offset + 5], moovAtom[offset + 6], moovAtom[offset + 7], ); - + if (type === "trak") { const trakAtom = moovAtom.slice(offset, offset + size); const handler = this.#getHandlerType(trakAtom); - tracks.push({handler: handler || null}); + tracks.push({ handler: handler || null }); } - + if (size < 8 || size === 0) break; offset += size; } - + return tracks; } - + #getHandlerType(trakAtom: Uint8Array): string | null { let offset = 8; // Skip trak header - + while (offset + 8 <= trakAtom.length) { - const size = (trakAtom[offset] << 24) | - (trakAtom[offset + 1] << 16) | - (trakAtom[offset + 2] << 8) | - trakAtom[offset + 3]; + const size = + (trakAtom[offset] << 24) | + (trakAtom[offset + 1] << 16) | + (trakAtom[offset + 2] << 8) | + trakAtom[offset + 3]; const type = String.fromCharCode( trakAtom[offset + 4], trakAtom[offset + 5], trakAtom[offset + 6], trakAtom[offset + 7], ); - + if (type === "mdia") { const mdiaAtom = trakAtom.slice(offset, offset + size); let mdiaOffset = 8; while (mdiaOffset + 8 <= mdiaAtom.length) { - const hdlrSize = (mdiaAtom[mdiaOffset] << 24) | - (mdiaAtom[mdiaOffset + 1] << 16) | - (mdiaAtom[mdiaOffset + 2] << 8) | - mdiaAtom[mdiaOffset + 3]; + const hdlrSize = + (mdiaAtom[mdiaOffset] << 24) | + (mdiaAtom[mdiaOffset + 1] << 16) | + (mdiaAtom[mdiaOffset + 2] << 8) | + mdiaAtom[mdiaOffset + 3]; const hdlrType = String.fromCharCode( mdiaAtom[mdiaOffset + 4], mdiaAtom[mdiaOffset + 5], mdiaAtom[mdiaOffset + 6], mdiaAtom[mdiaOffset + 7], ); - + if (hdlrType === "hdlr") { if (mdiaOffset + 24 <= mdiaAtom.length) { const handlerTypeBytes = String.fromCharCode( @@ -911,71 +966,73 @@ export class SourceMSE { return handlerTypeBytes; } } - + if (hdlrSize < 8 || hdlrSize === 0) break; mdiaOffset += hdlrSize; } } - + if (size < 8 || size === 0) break; offset += size; } - + return null; } - + #findTrackInMoov(moovAtom: Uint8Array, handlerType: string): Uint8Array | null { let offset = 8; // Skip moov header - + while (offset + 8 <= moovAtom.length) { - const size = (moovAtom[offset] << 24) | - (moovAtom[offset + 1] << 16) | - (moovAtom[offset + 2] << 8) | - moovAtom[offset + 3]; + const size = + (moovAtom[offset] << 24) | + (moovAtom[offset + 1] << 16) | + (moovAtom[offset + 2] << 8) | + moovAtom[offset + 3]; const type = String.fromCharCode( moovAtom[offset + 4], moovAtom[offset + 5], moovAtom[offset + 6], moovAtom[offset + 7], ); - + if (type === "trak") { const trakAtom = moovAtom.slice(offset, offset + size); if (this.#trakHasHandler(trakAtom, handlerType)) { return trakAtom; } } - + if (size < 8 || size === 0) break; offset += size; } - + return null; } - + #trakHasHandler(trakAtom: Uint8Array, handlerType: string): boolean { const foundHandler = this.#getHandlerType(trakAtom); return foundHandler === handlerType; } - + #rebuildMoovWithSingleTrack(moovAtom: Uint8Array, trakAtom: Uint8Array, targetHandler: string): Uint8Array { const parts: Uint8Array[] = []; let offset = 8; // Skip moov header - + const trackId = this.#getTrackId(trakAtom); - + while (offset + 8 <= moovAtom.length) { - const size = (moovAtom[offset] << 24) | - (moovAtom[offset + 1] << 16) | - (moovAtom[offset + 2] << 8) | - moovAtom[offset + 3]; + const size = + (moovAtom[offset] << 24) | + (moovAtom[offset + 1] << 16) | + (moovAtom[offset + 2] << 8) | + moovAtom[offset + 3]; const type = String.fromCharCode( moovAtom[offset + 4], moovAtom[offset + 5], moovAtom[offset + 6], moovAtom[offset + 7], ); - + if (type === "mvhd") { parts.push(moovAtom.slice(offset, offset + size)); } else if (type === "trak") { @@ -990,136 +1047,125 @@ export class SourceMSE { parts.push(rebuiltMvex); } } - + if (size < 8 || size === 0) break; offset += size; } - + const totalSize = 8 + parts.reduce((sum, arr) => sum + arr.length, 0); const newMoov = new Uint8Array(totalSize); - - newMoov[0] = (totalSize >>> 24) & 0xFF; - newMoov[1] = (totalSize >>> 16) & 0xFF; - newMoov[2] = (totalSize >>> 8) & 0xFF; - newMoov[3] = totalSize & 0xFF; - newMoov[4] = 0x6D; // 'm' - newMoov[5] = 0x6F; // 'o' - newMoov[6] = 0x6F; // 'o' + + newMoov[0] = (totalSize >>> 24) & 0xff; + newMoov[1] = (totalSize >>> 16) & 0xff; + newMoov[2] = (totalSize >>> 8) & 0xff; + newMoov[3] = totalSize & 0xff; + newMoov[4] = 0x6d; // 'm' + newMoov[5] = 0x6f; // 'o' + newMoov[6] = 0x6f; // 'o' newMoov[7] = 0x76; // 'v' - + let writeOffset = 8; for (const part of parts) { newMoov.set(part, writeOffset); writeOffset += part.length; } - + return newMoov; } - + #getTrackId(trakAtom: Uint8Array): number { let offset = 8; // Skip trak header - + while (offset + 8 <= trakAtom.length) { - const size = (trakAtom[offset] << 24) | - (trakAtom[offset + 1] << 16) | - (trakAtom[offset + 2] << 8) | - trakAtom[offset + 3]; + const size = + (trakAtom[offset] << 24) | + (trakAtom[offset + 1] << 16) | + (trakAtom[offset + 2] << 8) | + trakAtom[offset + 3]; const type = String.fromCharCode( trakAtom[offset + 4], trakAtom[offset + 5], trakAtom[offset + 6], trakAtom[offset + 7], ); - + if (type === "tkhd") { const version = trakAtom[offset + 8]; const trackIdOffset = version === 1 ? 24 : 16; if (offset + trackIdOffset + 4 <= trakAtom.length) { - return (trakAtom[offset + trackIdOffset] << 24) | - (trakAtom[offset + trackIdOffset + 1] << 16) | - (trakAtom[offset + trackIdOffset + 2] << 8) | - trakAtom[offset + trackIdOffset + 3]; + return ( + (trakAtom[offset + trackIdOffset] << 24) | + (trakAtom[offset + trackIdOffset + 1] << 16) | + (trakAtom[offset + trackIdOffset + 2] << 8) | + trakAtom[offset + trackIdOffset + 3] + ); } } - + if (size < 8 || size === 0) break; offset += size; } - + return 0; } - + #rebuildMvexWithSingleTrack(mvexAtom: Uint8Array, trackId: number): Uint8Array | null { const parts: Uint8Array[] = []; let offset = 8; // Skip mvex header - + while (offset + 8 <= mvexAtom.length) { - const size = (mvexAtom[offset] << 24) | - (mvexAtom[offset + 1] << 16) | - (mvexAtom[offset + 2] << 8) | - mvexAtom[offset + 3]; + const size = + (mvexAtom[offset] << 24) | + (mvexAtom[offset + 1] << 16) | + (mvexAtom[offset + 2] << 8) | + mvexAtom[offset + 3]; const type = String.fromCharCode( mvexAtom[offset + 4], mvexAtom[offset + 5], mvexAtom[offset + 6], mvexAtom[offset + 7], ); - + if (type === "trex") { if (offset + 16 <= mvexAtom.length) { - const trexTrackId = (mvexAtom[offset + 12] << 24) | - (mvexAtom[offset + 13] << 16) | - (mvexAtom[offset + 14] << 8) | - mvexAtom[offset + 15]; + const trexTrackId = + (mvexAtom[offset + 12] << 24) | + (mvexAtom[offset + 13] << 16) | + (mvexAtom[offset + 14] << 8) | + mvexAtom[offset + 15]; if (trexTrackId === trackId) { parts.push(mvexAtom.slice(offset, offset + size)); } } } - + if (size < 8 || size === 0) break; offset += size; } - + if (parts.length === 0) { return null; } - + const totalSize = 8 + parts.reduce((sum, arr) => sum + arr.length, 0); const newMvex = new Uint8Array(totalSize); - - newMvex[0] = (totalSize >>> 24) & 0xFF; - newMvex[1] = (totalSize >>> 16) & 0xFF; - newMvex[2] = (totalSize >>> 8) & 0xFF; - newMvex[3] = totalSize & 0xFF; - newMvex[4] = 0x6D; // 'm' + + newMvex[0] = (totalSize >>> 24) & 0xff; + newMvex[1] = (totalSize >>> 16) & 0xff; + newMvex[2] = (totalSize >>> 8) & 0xff; + newMvex[3] = totalSize & 0xff; + newMvex[4] = 0x6d; // 'm' newMvex[5] = 0x76; // 'v' newMvex[6] = 0x65; // 'e' newMvex[7] = 0x78; // 'x' - + let writeOffset = 8; for (const part of parts) { newMvex.set(part, writeOffset); writeOffset += part.length; } - - return newMvex; - } - - #concatenateFragments(fragments: Uint8Array[]): Uint8Array { - if (fragments.length === 1) { - return fragments[0]; - } - - const totalSize = fragments.reduce((sum, frag) => sum + frag.byteLength, 0); - const result = new Uint8Array(totalSize); - let offset = 0; - for (const fragment of fragments) { - result.set(fragment, offset); - offset += fragment.byteLength; - } - return result; + return newMvex; } #processVideoQueue(): void { @@ -1210,12 +1256,13 @@ export class SourceMSE { // Briefly wait for audio SourceBuffer so we don't hit Chrome's quota race. console.log("[MSE] Checking if audio SourceBuffer will be added..."); - for (let i = 0; i < 10; i++) { // up to ~1s + for (let i = 0; i < 10; i++) { + // up to ~1s if (this.#audioSourceBuffer || (this.#mediaSource && this.#mediaSource.sourceBuffers.length >= 2)) { console.log("[MSE] Audio SourceBuffer detected, proceeding with video"); break; } - await new Promise(resolve => setTimeout(resolve, 100)); + await new Promise((resolve) => setTimeout(resolve, 100)); } const sub = broadcast.subscribe(name, PRIORITY.video); @@ -1247,6 +1294,7 @@ export class SourceMSE { throw new Error("Video SourceBuffer not available"); } + const videoSourceBuffer = this.#videoSourceBuffer; console.log("[MSE] Appending video init segment, size:", videoInitSegment.byteLength, "bytes"); await new Promise((resolve, reject) => { const onUpdateEnd = () => { @@ -1255,7 +1303,7 @@ export class SourceMSE { console.log("[MSE] Video init segment appended successfully"); resolve(); }; - + const onError = (e: Event) => { videoSourceBuffer.removeEventListener("updateend", onUpdateEnd); videoSourceBuffer.removeEventListener("error", onError); @@ -1263,11 +1311,10 @@ export class SourceMSE { console.error("[MSE] Video SourceBuffer error appending init segment:", error); reject(new Error(`Video SourceBuffer error: ${error.message || "unknown error"}`)); }; - - const videoSourceBuffer = this.#videoSourceBuffer!; + videoSourceBuffer.addEventListener("updateend", onUpdateEnd, { once: true }); videoSourceBuffer.addEventListener("error", onError, { once: true }); - + try { videoSourceBuffer.appendBuffer(videoInitSegment as BufferSource); } catch (error) { @@ -1306,20 +1353,16 @@ export class SourceMSE { } // Read fragments and append to SourceBuffer + // Each fragment is already a complete CMAF segment (moof+mdat), so we can append individually + // This reduces latency and memory usage compared to batching by group console.log("[MSE] Starting to read video fragments from track"); effect.spawn(async () => { - let currentGroup: number | undefined; - let gopFragments: Uint8Array[] = []; let frameCount = 0; for (;;) { const frame = await Promise.race([consumer.decode(), effect.cancel]); if (!frame) { console.log(`[MSE] Video track ended, processed ${frameCount} frames`); - if (gopFragments.length > 0) { - const gopData = this.#concatenateFragments(gopFragments); - await this.appendVideoFragment(gopData); - } break; } @@ -1333,21 +1376,8 @@ export class SourceMSE { continue; } - // Check if we've moved to a new group - if (currentGroup !== undefined && frame.group !== currentGroup) { - if (gopFragments.length > 0) { - const gopData = this.#concatenateFragments(gopFragments); - await this.appendVideoFragment(gopData); - gopFragments = []; - } - currentGroup = frame.group; - } - - if (currentGroup === undefined) { - currentGroup = frame.group; - } - - gopFragments.push(frame.data); + // Append fragment immediately - each fragment is a complete CMAF segment + await this.appendVideoFragment(frame.data); } }); } @@ -1404,8 +1434,14 @@ export class SourceMSE { let offset = 0; const len = data.length; while (offset + 8 <= len) { - const size = (data[offset] << 24) | (data[offset + 1] << 16) | (data[offset + 2] << 8) | data[offset + 3]; - const type = String.fromCharCode(data[offset + 4], data[offset + 5], data[offset + 6], data[offset + 7]); + const size = + (data[offset] << 24) | (data[offset + 1] << 16) | (data[offset + 2] << 8) | data[offset + 3]; + const type = String.fromCharCode( + data[offset + 4], + data[offset + 5], + data[offset + 6], + data[offset + 7], + ); if (type === "moov") return true; if (size < 8 || size === 0) break; offset += size; @@ -1414,22 +1450,12 @@ export class SourceMSE { } effect.spawn(async () => { - let currentGroup: number | undefined; - let groupFragments: Uint8Array[] = []; - let frameCount = 0; - for (;;) { const frame = await Promise.race([consumer.decode(), effect.cancel]); if (!frame) { - if (groupFragments.length > 0 && this.#mediaSource?.readyState === "open") { - const groupData = this.#concatenateFragments(groupFragments); - await this.appendAudioFragment(groupData); - } break; } - frameCount++; - if (this.#mediaSource?.readyState === "closed") { break; } @@ -1439,20 +1465,10 @@ export class SourceMSE { continue; } - if (currentGroup !== undefined && frame.group !== currentGroup) { - if (groupFragments.length > 0 && this.#mediaSource?.readyState === "open") { - const groupData = this.#concatenateFragments(groupFragments); - await this.appendAudioFragment(groupData); - groupFragments = []; - } - currentGroup = frame.group; + // Append fragment immediately - each fragment is a complete CMAF segment + if (this.#mediaSource?.readyState === "open") { + await this.appendAudioFragment(frame.data); } - - if (currentGroup === undefined) { - currentGroup = frame.group; - } - - groupFragments.push(frame.data); } }); } @@ -1461,12 +1477,12 @@ export class SourceMSE { this.#videoAppendQueue = []; this.#audioAppendQueue = []; this.#audioSourceBufferSetup = false; - + // Store references before resetting const audioSourceBuffer = this.#audioSourceBuffer; const videoSourceBuffer = this.#videoSourceBuffer; const mediaSource = this.#mediaSource; - + this.#audioSourceBuffer = undefined; // Reset audio SourceBuffer reference this.mediaSource.set(undefined); diff --git a/js/hang/src/watch/video/source.ts b/js/hang/src/watch/video/source.ts index ff3799ec2..1ecaa010c 100644 --- a/js/hang/src/watch/video/source.ts +++ b/js/hang/src/watch/video/source.ts @@ -5,6 +5,7 @@ import * as Frame from "../../frame"; import { PRIORITY } from "../../publish/priority"; import type * as Time from "../../time"; import * as Hex from "../../util/hex"; +import type { SourceMSE } from "../source-mse"; export type SourceProps = { enabled?: boolean | Signal; @@ -104,9 +105,8 @@ export class Source { readonly mseMediaSource = this.#mseMediaSource as Getter; // Expose mseSource instance for audio to access coordination methods - #mseSource = new Signal(undefined); - readonly mseSource = this.#mseSource as Getter; - + #mseSource = new Signal(undefined); + readonly mseSource = this.#mseSource as Getter; constructor( broadcast: Signal, @@ -263,7 +263,7 @@ export class Source { this.#signals.effect((eff) => { eff.set(this.#mseSource, mseSource); }); - + this.#signals.effect((eff) => { const stats = eff.get(mseSource.stats); eff.set(this.#stats, stats); diff --git a/rs/hang/src/import/fmp4.rs b/rs/hang/src/import/fmp4.rs index be1f2da1f..4e476e081 100644 --- a/rs/hang/src/import/fmp4.rs +++ b/rs/hang/src/import/fmp4.rs @@ -235,6 +235,10 @@ impl Fmp4 { tracing::info!(passthrough_mode, "initializing fMP4 with passthrough mode"); let mut catalog = self.catalog.lock(); + // Track which specific tracks were created in this init call + let mut created_video_tracks = Vec::new(); + let mut created_audio_tracks = Vec::new(); + for trak in &moov.trak { let track_id = trak.tkhd.track_id; let handler = &trak.mdia.hdlr.handler; @@ -254,6 +258,9 @@ impl Fmp4 { let video = catalog.insert_video(track.name.clone(), config.clone()); video.priority = 1; + // Record this track name + created_video_tracks.push(track.name.clone()); + let track = track.produce(); self.broadcast.insert_track(track.consumer); hang::TrackProducer::new(track.producer, config.container) @@ -272,6 +279,9 @@ impl Fmp4 { let audio = catalog.insert_audio(track.name.clone(), config.clone()); audio.priority = 2; + // Record this track name + created_audio_tracks.push(track.name.clone()); + let track = track.produce(); self.broadcast.insert_track(track.consumer); hang::TrackProducer::new(track.producer, config.container) @@ -287,7 +297,7 @@ impl Fmp4 { let moov_track_count = moov.trak.len(); let has_video = moov.trak.iter().any(|t| t.mdia.hdlr.handler.as_ref() == b"vide"); let has_audio = moov.trak.iter().any(|t| t.mdia.hdlr.handler.as_ref() == b"soun"); - + self.moov = Some(moov); // In passthrough mode, store the init segment (ftyp+moov) in the catalog @@ -312,7 +322,7 @@ impl Fmp4 { // Verify that the moov atom contains all expected tracks let expected_video_tracks = catalog.video.as_ref().map(|v| v.renditions.len()).unwrap_or(0); let expected_audio_tracks = catalog.audio.as_ref().map(|a| a.renditions.len()).unwrap_or(0); - + tracing::info!( tracks_in_moov = moov_track_count, expected_video = expected_video_tracks, @@ -350,78 +360,48 @@ impl Fmp4 { // Store init segment in catalog for the relevant track type // For HLS, each track has its own init segment (video init segment only has video, // audio init segment only has audio). For direct fMP4 files, the init segment - // contains all tracks. We store track-specific init segments in their respective configs. - + // contains all tracks. We store track-specific init segments only in the tracks + // created in this init call, not all renditions of that type. + if has_video { if let Some(video) = catalog.video.as_mut() { - for (name, config) in video.renditions.iter_mut() { - config.init_segment = Some(init_segment_bytes.clone()); - tracing::debug!( - video_track = %name, - init_segment_size = init_segment_bytes.len(), - has_video_track = has_video, - has_audio_track = has_audio, - "stored init segment in video config" - ); - } - } - } - - if has_audio { - if let Some(audio) = catalog.audio.as_mut() { - for (name, config) in audio.renditions.iter_mut() { - config.init_segment = Some(init_segment_bytes.clone()); - tracing::debug!( - audio_track = %name, - init_segment_size = init_segment_bytes.len(), - has_video_track = has_video, - has_audio_track = has_audio, - "stored init segment in audio config" - ); - } - } - } - - // If the init segment contains both tracks (e.g., from a direct fMP4 file), - // also store it in the other config type for convenience - if has_video && has_audio { - // Full init segment with both tracks - store in both configs - if let Some(video) = catalog.video.as_mut() { - for (name, config) in video.renditions.iter_mut() { - if config.init_segment.is_none() { + for track_name in &created_video_tracks { + if let Some(config) = video.renditions.get_mut(track_name) { config.init_segment = Some(init_segment_bytes.clone()); tracing::debug!( - video_track = %name, - "stored full init segment (with both tracks) in video config" + video_track = %track_name, + init_segment_size = init_segment_bytes.len(), + has_audio_track = has_audio, + "stored init segment in video config" ); } } } + } + + if has_audio { if let Some(audio) = catalog.audio.as_mut() { - for (name, config) in audio.renditions.iter_mut() { - if config.init_segment.is_none() { + for track_name in &created_audio_tracks { + if let Some(config) = audio.renditions.get_mut(track_name) { config.init_segment = Some(init_segment_bytes.clone()); tracing::debug!( - audio_track = %name, - "stored full init segment (with both tracks) in audio config" + audio_track = %track_name, + init_segment_size = init_segment_bytes.len(), + has_video_track = has_video, + "stored init segment in audio config" ); } } } } - tracing::info!( - has_video = has_video, - has_audio = has_audio, - tracks_in_moov = moov_track_count, - "init segment (ftyp+moov) stored in catalog for all tracks" - ); - // Init has been stored; clear cached moov/ftyp to avoid repeated warnings later. self.moov_bytes = None; self.ftyp_bytes = None; } else { - tracing::warn!("passthrough mode enabled but moov_bytes is None - init segment will not be stored in catalog"); + tracing::warn!( + "passthrough mode enabled but moov_bytes is None - init segment will not be stored in catalog" + ); } } @@ -880,7 +860,7 @@ impl Fmp4 { // After that, we can send fragments based on their actual keyframe status let is_first_frame = !self.first_frame_sent.get(&track_id).copied().unwrap_or(false); let should_be_keyframe = is_first_frame || is_keyframe; - + if is_first_frame { self.first_frame_sent.insert(track_id, true); } diff --git a/rs/hang/src/model/track.rs b/rs/hang/src/model/track.rs index e2f3f69eb..0819911c4 100644 --- a/rs/hang/src/model/track.rs +++ b/rs/hang/src/model/track.rs @@ -27,7 +27,7 @@ pub struct TrackProducer { /// Track if the current group is the init segment group (timestamp 0) /// We keep this group open so new subscribers can receive the init segment is_init_segment_group: bool, - container: Container + container: Container, } impl TrackProducer { @@ -112,7 +112,7 @@ impl TrackProducer { for chunk in frame.payload { chunked.write_chunk(chunk); } - + chunked.close(); self.group.replace(group);