diff --git a/API.md b/API.md index 6eaa88a..5771f13 100644 --- a/API.md +++ b/API.md @@ -10,7 +10,9 @@ let session = RTSPClientSession( credentials: Credentials(username: "admin", password: "pass")) let desc = try await session.start() -// desc.videoCodec, desc.resolution, desc.audioCodec, etc. +// desc.video?.codec / .resolution / .sps / .pps / .vps / .clockRate +// desc.audio?.codec / .sampleRate / .channels / .extraData +// desc.metadataEncoding (e.g. "vnd.onvif.metadata") for try await item in session.frames() { switch item { @@ -18,6 +20,8 @@ for try await item in session.frames() { // frame.nalus — AVCC NAL units for VideoToolbox case .audio(let frame): // frame.data — raw audio (PCMA/PCMU/AAC/etc.) + case .metadata(let frame): + // frame.data — ONVIF analytics XML (possibly GZIP-compressed) case .rtcp: break } @@ -82,19 +86,29 @@ enum Transport: Sendable { Returned by `start()` with stream metadata parsed from SDP. +At least one of `video`, `audio`, or `metadataEncoding` is non-`nil`; a session with zero usable streams is rejected at `start()`. + ```swift struct SessionDescription: Sendable { - let videoCodec: VideoCodec - let sps: Data - let pps: Data + let video: VideoStream? + let audio: AudioStream? + let metadataEncoding: String? // e.g. "vnd.onvif.metadata" +} + +struct VideoStream: Sendable { + let codec: VideoCodec + let clockRate: UInt32 + let sps: Data? // nil until parameters observed + let pps: Data? let vps: Data? // H.265 only let resolution: (width: Int, height: Int)? - let clockRate: UInt32 +} - let audioCodec: PublicAudioCodec? - let audioSampleRate: UInt32? - let audioChannels: UInt16? - let audioExtraData: Data? // e.g. AudioSpecificConfig for AAC +struct AudioStream: Sendable { + let codec: PublicAudioCodec + let sampleRate: UInt32 // Hz + let channels: UInt16? + let extraData: Data? // e.g. AudioSpecificConfig for AAC } ``` @@ -129,6 +143,7 @@ enum PublicAudioCodec: Sendable { enum PublicCodecItem: Sendable { case video(PublicVideoFrame) case audio(PublicAudioFrame) + case metadata(PublicMetadataFrame) case rtcp(PublicRTCPPacket) } ``` @@ -164,6 +179,19 @@ struct PublicAudioFrame: Sendable { } ``` +### PublicMetadataFrame + +Raw payload from an analytics-metadata RTP stream (e.g. ONVIF XML, possibly GZIP-compressed). Consumers handle decoding based on `encodingName`. + +```swift +struct PublicMetadataFrame: Sendable { + let data: Data // Raw payload (depacketized across RTP fragments) + let timestamp: Double // Presentation timestamp in seconds + let encodingName: String // e.g. "vnd.onvif.metadata" + let loss: UInt16 // RTP packets lost before this frame +} +``` + ### PublicRTCPPacket ```swift diff --git a/CHANGELOG.md b/CHANGELOG.md index bbf31cd..86bdd59 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,10 +2,22 @@ ## Upcoming +### Breaking changes + +- `SessionDescription` now groups video and audio fields into `VideoStream?` and `AudioStream?` substructs. Replaces the flat `videoCodec` / `sps` / `pps` / `clockRate` / `audioCodec` / `audioSampleRate` / `audioChannels` / `audioExtraData` fields. A session is valid as long as any one of video, audio, or analytics metadata is set up — audio-only and metadata-only RTSP configurations (e.g. Axis cameras with `video=0`) now work end-to-end. Consumers branch on `desc.video != nil` (or `desc.audio != nil`) before configuring their decoders. + +### New + +- ONVIF analytics metadata stream support (`vnd.onvif.metadata` per the ONVIF Streaming Specification). Surfaced as `PublicCodecItem.metadata(PublicMetadataFrame)` in the `session.frames()` stream, with discoverability via `SessionDescription.metadataEncoding`. Best-effort: malformed metadata SDP or a failed application SETUP degrades to a diagnostic without aborting video/audio. + ### Improvements - Add visionOS 1.0 to supported platforms +### Fixes + +- Audio depacketizer init failures now null out the audio stream state (index, encoding name, clock rate, channels), mirroring the metadata-init failure path. Previously the indices stayed set while the depacketizer was nil; packets on that channel were silently dropped by the dispatch loop but `SessionDescription` could still claim the stream existed. Required to keep the "at least one usable stream" guard honest in audio-only sessions. + ## 0.2.0 ### Breaking changes diff --git a/Examples/CameraViewer/main.swift b/Examples/CameraViewer/main.swift index cff1c25..7db579c 100644 --- a/Examples/CameraViewer/main.swift +++ b/Examples/CameraViewer/main.swift @@ -282,23 +282,25 @@ final class CameraViewerDelegate: NSObject, NSApplicationDelegate { do { let desc = try await session.start() - let res = desc.resolution.map { "\($0.width)×\($0.height)" } ?? "?" - log("Connected: \(desc.videoCodec) \(res)") + let res = desc.video?.resolution.map { "\($0.width)×\($0.height)" } ?? "?" + let codecLabel = desc.video.map { "\($0.codec)" } ?? "no video" + log("Connected: \(codecLabel) \(res)") await MainActor.run { - window.title = "IPCamKit — \(desc.videoCodec) \(res)" + window.title = "IPCamKit — \(codecLabel) \(res)" } - if let audioCodec = desc.audioCodec, let audioRate = desc.audioSampleRate { + if let audio = desc.audio { audioPlayer.start( - codec: audioCodec, sampleRate: Double(audioRate), - channels: UInt32(desc.audioChannels ?? 1)) + codec: audio.codec, sampleRate: Double(audio.sampleRate), + channels: UInt32(audio.channels ?? 1)) } - var fmtDesc = try makeFormatDescription( - codec: desc.videoCodec, - sps: desc.sps, pps: desc.pps, vps: desc.vps - ) + var fmtDesc: CMVideoFormatDescription? + if let video = desc.video, let sps = video.sps, let pps = video.pps { + fmtDesc = try makeFormatDescription( + codec: video.codec, sps: sps, pps: pps, vps: video.vps) + } let layer = layerRef.layer var receivedKeyframe = false @@ -311,9 +313,10 @@ final class CameraViewerDelegate: NSObject, NSApplicationDelegate { audioPlayer.enqueue(audioFrame) case .video(let frame): + guard let video = desc.video else { continue } if let newSPS = frame.sps, let newPPS = frame.pps { fmtDesc = try makeFormatDescription( - codec: desc.videoCodec, + codec: video.codec, sps: newSPS, pps: newPPS, vps: frame.vps ) } @@ -323,9 +326,11 @@ final class CameraViewerDelegate: NSObject, NSApplicationDelegate { receivedKeyframe = true } - if let sample = buildSampleBuffer( - frame, codec: desc.videoCodec, formatDescription: fmtDesc - ) { + if let fmt = fmtDesc, + let sample = buildSampleBuffer( + frame, codec: video.codec, formatDescription: fmt + ) + { if layer.status == .failed { layer.flush() } layer.enqueue(sample) } diff --git a/README.md b/README.md index 96f3cf8..98fc370 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ A pure-Swift RTSP client library for streaming live video and audio from IP came - **H.264 and H.265/HEVC video** — depacketized to AVCC format, ready for VideoToolbox - **Audio** — AAC, PCMU, PCMA, G.722, G.726, L16, G.723.1 - **ONVIF analytics metadata** — raw XML documents from the camera's `application` RTSP stream +- **Optional streams** — any combination of video / audio / metadata is supported; audio-only or metadata-only sessions (e.g. Axis `video=0`) work end-to-end - **Zero dependencies** — only Foundation, Network, and CryptoKit - **Swift 6** — strict concurrency with async/await and AsyncThrowingStream @@ -45,9 +46,13 @@ let session = RTSPClientSession( // Connect and get stream metadata let desc = try await session.start() -// desc.videoCodec, desc.resolution, desc.sps, desc.pps, desc.vps -// desc.audioCodec, desc.audioSampleRate, desc.audioChannels -// desc.metadataEncoding — non-nil if an ONVIF metadata stream is active +// desc.video, desc.audio, desc.metadataEncoding — at least one is non-nil +// desc.video?.codec / .clockRate / .sps / .pps / .vps / .resolution +// desc.audio?.codec / .sampleRate / .channels / .extraData + +if let video = desc.video { + // configure a video decoder (VideoToolbox, etc.) +} // Consume depacketized frames for try await item in session.frames() { diff --git a/Sources/IPCamKit/Client/RTSPSession.swift b/Sources/IPCamKit/Client/RTSPSession.swift index eb9c251..cdccdf0 100644 --- a/Sources/IPCamKit/Client/RTSPSession.swift +++ b/Sources/IPCamKit/Client/RTSPSession.swift @@ -50,24 +50,41 @@ public struct RTSPDiagnostic: Sendable { } } -/// Parsed session description returned from `start()`. -public struct SessionDescription: Sendable { - public let videoCodec: VideoCodec - public let sps: Data - public let pps: Data - /// VPS data (H.265 only, nil for H.264). +/// Video stream details, surfaced when a supported video stream is active. +/// +/// `codec` and `clockRate` are always populated. `sps`/`pps`/`vps`/`resolution` +/// are `nil` until the depacketizer has observed parameter sets — for most +/// cameras this happens in-band on the first packet; cameras that ship the +/// parameter sets in SDP `fmtp` will have them set at `start()` time. +public struct VideoStream: Sendable { + public let codec: VideoCodec + public let clockRate: UInt32 + public let sps: Data? + public let pps: Data? + /// H.265 only; nil for H.264. public let vps: Data? public let resolution: (width: Int, height: Int)? - public let clockRate: UInt32 +} - /// Audio codec, if an audio stream was found. - public let audioCodec: PublicAudioCodec? - /// Audio sample rate in Hz, if an audio stream was found. - public let audioSampleRate: UInt32? - /// Audio channel count, if known. - public let audioChannels: UInt16? +/// Audio stream details, surfaced when a supported audio stream is active. +/// +/// `codec` and `sampleRate` are always populated. `channels` and `extraData` +/// reflect what the camera advertised; they are codec-dependent. +public struct AudioStream: Sendable { + public let codec: PublicAudioCodec + public let sampleRate: UInt32 + public let channels: UInt16? /// Codec-specific extra data (e.g. AudioSpecificConfig for AAC). - public let audioExtraData: Data? + public let extraData: Data? +} + +/// Parsed session description returned from `start()`. +/// +/// At least one of `video`, `audio`, or `metadataEncoding` is non-`nil` — +/// a session with zero usable streams is rejected at `start()`. +public struct SessionDescription: Sendable { + public let video: VideoStream? + public let audio: AudioStream? /// SDP encoding name of the analytics-metadata stream if one was set up /// (e.g. `vnd.onvif.metadata`), or `nil` if no metadata stream is active. @@ -316,41 +333,42 @@ actor SessionState { var presMut = try parseDescribe(requestURL: url, response: describeResp) presentation = presMut - // Find first H.264 or H.265 video stream - guard - let videoIdx = presMut.streams.firstIndex(where: { - $0.media == "video" && ($0.encodingName == "h264" || $0.encodingName == "h265") - }) - else { - throw RTSPError.sessionSetupFailed( - statusCode: 0, reason: "No H.264/H.265 video stream found") - } - - let stream = presMut.streams[videoIdx] self.url = url - self.videoStreamIndex = videoIdx - - // SETUP - let setupURL = stream.control ?? url - var setupHeaders: [(String, String)] = [] - if transport == .tcp { - let channelId = channelMappings.nextUnassigned() ?? 0 - setupHeaders.append( - ( - "Transport", - "RTP/AVP/TCP;unicast;interleaved=\(channelId)-\(channelId + 1)" - )) - try channelMappings.assign(channelId: channelId, streamIndex: videoIdx) - } else { - setupHeaders.append(("Transport", "RTP/AVP;unicast")) - } - let setupResp = try await sendRequest( - method: .setup, url: setupURL, extraHeaders: setupHeaders) - let setup = try parseSetup(response: setupResp) - sessionId = setup.session.id - presMut.streams[videoIdx].state = .setup( - StreamStateInit(ssrc: setup.ssrc, initialSeq: nil, initialRtptime: nil, ctx: .dummy)) + // Find first H.264 or H.265 video stream — optional. Cameras can be + // configured to expose audio-only or metadata-only RTSP sessions + // (e.g. Axis with `video=0`), in which case we proceed without video + // and require at least one of audio/metadata to be set up. + let videoIdx = presMut.streams.firstIndex(where: { + $0.media == "video" && isVideoEncodingSupported($0.encodingName) + }) + var videoSetupSSRC: UInt32? + + if let videoIdx = videoIdx { + let stream = presMut.streams[videoIdx] + let setupURL = stream.control ?? url + var setupHeaders: [(String, String)] = [] + if transport == .tcp { + let channelId = channelMappings.nextUnassigned() ?? 0 + setupHeaders.append( + ( + "Transport", + "RTP/AVP/TCP;unicast;interleaved=\(channelId)-\(channelId + 1)" + )) + try channelMappings.assign(channelId: channelId, streamIndex: videoIdx) + } else { + setupHeaders.append(("Transport", "RTP/AVP;unicast")) + } + + let setupResp = try await sendRequest( + method: .setup, url: setupURL, extraHeaders: setupHeaders) + let setup = try parseSetup(response: setupResp) + sessionId = setup.session.id + videoSetupSSRC = setup.ssrc + presMut.streams[videoIdx].state = .setup( + StreamStateInit(ssrc: setup.ssrc, initialSeq: nil, initialRtptime: nil, ctx: .dummy)) + self.videoStreamIndex = videoIdx + } // Find and SETUP audio stream (optional, best-effort) let audioIdx = presMut.streams.firstIndex(where: { s in @@ -462,6 +480,20 @@ actor SessionState { } } + // Require at least one usable stream — a session with no video, audio, + // or metadata is degenerate (DESCRIBE succeeded but nothing is carrying + // payload), and sending PLAY would just open the door to packets we + // can't route. + if videoStreamIndex == nil && audioStreamIndex == nil && applicationStreamIndex == nil { + let offered = presMut.streams.map { "\($0.media)/\($0.encodingName)" } + .joined(separator: ", ") + throw RTSPError.sessionSetupFailed( + statusCode: 0, + reason: + "No supported video, audio, or metadata stream was set up " + + "(offered: \(offered.isEmpty ? "" : offered)).") + } + // PLAY var playHeaders: [(String, String)] = [] if let sid = sessionId { @@ -475,36 +507,42 @@ actor SessionState { try parsePlay(response: playResp, presentation: &presMut) presentation = presMut - // Initialize video depacketizer - if stream.encodingName == "h265" { - depacketizer = .h265( - try H265Depacketizer( - clockRate: stream.clockRateHz, - formatSpecificParams: stream.formatSpecificParams)) - } else { - depacketizer = .h264( - try H264Depacketizer( - clockRate: stream.clockRateHz, - formatSpecificParams: stream.formatSpecificParams)) - } + // Initialize video depacketizer + timeline + inorder parser. Conditional + // on a successful video SETUP — when no video stream was set up, + // `videoStreamIndex` is nil and we skip the entire video pipeline. + var videoClockRate: UInt32? + if let videoIdx = videoStreamIndex { + let stream = presMut.streams[videoIdx] + if stream.encodingName == "h265" { + depacketizer = .h265( + try H265Depacketizer( + clockRate: stream.clockRateHz, + formatSpecificParams: stream.formatSpecificParams)) + } else { + depacketizer = .h264( + try H264Depacketizer( + clockRate: stream.clockRateHz, + formatSpecificParams: stream.formatSpecificParams)) + } + videoClockRate = stream.clockRateHz - // Initialize video timeline and inorder parser - var videoStart: UInt32? - var videoSeq: UInt16? - var videoSsrc: UInt32? = setup.ssrc + var videoStart: UInt32? + var videoSeq: UInt16? + var videoSsrc: UInt32? = videoSetupSSRC - if case .setup(let init_) = presMut.streams[videoIdx].state { - videoStart = init_.initialRtptime - if let seq = init_.initialSeq, seq != 0, seq != 1 { - videoSeq = seq + if case .setup(let init_) = presMut.streams[videoIdx].state { + videoStart = init_.initialRtptime + if let seq = init_.initialSeq, seq != 0, seq != 1 { + videoSeq = seq + } + if let s = init_.ssrc { videoSsrc = s } } - if let s = init_.ssrc { videoSsrc = s } - } - let timeline = try Timeline(start: videoStart, clockRate: stream.clockRateHz) - inorderParsers[videoIdx] = InorderParser( - ssrc: videoSsrc, nextSeq: videoSeq, isTcp: transport == .tcp, - timeline: timeline, onDiagnostic: onDiagnostic) + let timeline = try Timeline(start: videoStart, clockRate: stream.clockRateHz) + inorderParsers[videoIdx] = InorderParser( + ssrc: videoSsrc, nextSeq: videoSeq, isTcp: transport == .tcp, + timeline: timeline, onDiagnostic: onDiagnostic) + } // Initialize audio depacketizer and inorder parser var resolvedAudioCodec: PublicAudioCodec? @@ -543,6 +581,22 @@ actor SessionState { from: audioStream.encodingName) resolvedAudioRate = audioStream.clockRateHz resolvedAudioChannels = audioStream.channels + } else { + // Audio SETUP succeeded but the depacketizer rejected the format + // (e.g. malformed AAC fmtp). Null the audio state so packets on + // that interleaved channel are silently dropped instead of + // misrouted, and so `SessionDescription.audio` is `nil` (matching + // reality). Mirrors the metadata-init failure path below. + onDiagnostic?( + RTSPDiagnostic( + severity: .warning, + message: + "Audio depacketizer init failed for " + + "\(audioStream.encodingName); audio will not be delivered.")) + audioStreamIndex = nil + audioEncodingName = nil + audioClockRate = nil + audioChannels = nil } } @@ -584,46 +638,70 @@ actor SessionState { } } + // Post-init gate. The pre-PLAY gate above caught the case where no + // stream was supported, but audio and metadata depacketizer init run + // *after* PLAY and can null their own indices on failure (malformed + // AAC fmtp, broken Timeline clock rate, etc.). Re-check here so the + // documented "at least one usable stream" invariant holds at the + // return site too. PLAY has already been sent; the caller will tear + // down via `stop()` after we throw. + if videoStreamIndex == nil && audioStreamIndex == nil && applicationStreamIndex == nil { + throw RTSPError.sessionSetupFailed( + statusCode: 0, + reason: + "All stream depacketizers failed to initialize after PLAY; " + + "session has no usable streams.") + } + isPlaying = true - // Build session description - let isH265 = stream.encodingName == "h265" - let sps: Data - let pps: Data - var vps: Data? - let dims: (width: UInt16, height: UInt16)? - if let depkt = depacketizer { + // Build session description. `video` is nil when no video stream was + // set up; consumers branch on `desc.video != nil` to discover availability. + // `depacketizer` and `videoClockRate` are set in lock-step inside the + // video-init block above, so the outer `if let` enforces that invariant. + let video: VideoStream? + if let depkt = depacketizer, let clockRate = videoClockRate { switch depkt { case .h264(let d): - sps = d.parameters?.spsNAL ?? Data() - pps = d.parameters?.ppsNAL ?? Data() - dims = d.parameters?.genericParameters.pixelDimensions + let dims = d.parameters?.genericParameters.pixelDimensions + video = VideoStream( + codec: .h264, + clockRate: clockRate, + sps: d.parameters?.spsNAL, + pps: d.parameters?.ppsNAL, + vps: nil, + resolution: dims.map { (width: Int($0.width), height: Int($0.height)) } + ) case .h265(let d): - sps = d.parameters?.spsNAL ?? Data() - pps = d.parameters?.ppsNAL ?? Data() - vps = d.parameters?.vpsNAL - dims = d.parameters?.genericParameters.pixelDimensions + let dims = d.parameters?.genericParameters.pixelDimensions + video = VideoStream( + codec: .h265, + clockRate: clockRate, + sps: d.parameters?.spsNAL, + pps: d.parameters?.ppsNAL, + vps: d.parameters?.vpsNAL, + resolution: dims.map { (width: Int($0.width), height: Int($0.height)) } + ) } } else { - sps = Data() - pps = Data() - dims = nil + video = nil } - let resolution = dims.map { - (width: Int($0.width), height: Int($0.height)) + + let audio: AudioStream? + if let codec = resolvedAudioCodec, let rate = resolvedAudioRate { + audio = AudioStream( + codec: codec, + sampleRate: rate, + channels: resolvedAudioChannels, + extraData: audioDepacketizer?.audioParameters?.extraData + ) + } else { + audio = nil } return SessionDescription( - videoCodec: isH265 ? .h265 : .h264, - sps: sps, - pps: pps, - vps: vps, - resolution: resolution, - clockRate: stream.clockRateHz, - audioCodec: resolvedAudioCodec, - audioSampleRate: resolvedAudioRate, - audioChannels: resolvedAudioChannels, - audioExtraData: audioDepacketizer?.audioParameters?.extraData, + video: video, + audio: audio, metadataEncoding: applicationEncodingName ) } @@ -878,25 +956,6 @@ actor SessionState { ) } - private func isAudioEncodingSupported(_ name: String) -> Bool { - switch name { - case "mpeg4-generic", "pcmu", "pcma", "l16", "g722", "g723", - "u8", "dvi4", "g726-16", "g726-24", "g726-32", "g726-40": - return true - default: - return false - } - } - - private func isApplicationEncodingSupported(_ name: String) -> Bool { - switch name { - case "vnd.onvif.metadata": - return true - default: - return false - } - } - private func publicAudioCodec(from encoding: String) -> PublicAudioCodec { switch encoding { case "mpeg4-generic": return .aac @@ -909,3 +968,39 @@ actor SessionState { } } } + +// MARK: - Encoding-support predicates (free functions, testable) + +/// True iff `RTSPClientSession` can depacketize a video stream advertising +/// this SDP `a=rtpmap` encoding name. +func isVideoEncodingSupported(_ name: String) -> Bool { + switch name { + case "h264", "h265": + return true + default: + return false + } +} + +/// True iff `RTSPClientSession` can depacketize an audio stream advertising +/// this SDP `a=rtpmap` encoding name. +func isAudioEncodingSupported(_ name: String) -> Bool { + switch name { + case "mpeg4-generic", "pcmu", "pcma", "l16", "g722", "g723", + "u8", "dvi4", "g726-16", "g726-24", "g726-32", "g726-40": + return true + default: + return false + } +} + +/// True iff `RTSPClientSession` can depacketize an analytics-metadata stream +/// advertising this SDP `a=rtpmap` encoding name. +func isApplicationEncodingSupported(_ name: String) -> Bool { + switch name { + case "vnd.onvif.metadata": + return true + default: + return false + } +} diff --git a/Tests/IPCamKitTests/DescribeParserTests.swift b/Tests/IPCamKitTests/DescribeParserTests.swift index 303fbe6..8793d85 100644 --- a/Tests/IPCamKitTests/DescribeParserTests.swift +++ b/Tests/IPCamKitTests/DescribeParserTests.swift @@ -686,4 +686,130 @@ struct DescribeParserTests { #expect(setup.source == nil) #expect(setup.serverPort == 49152) } + + // MARK: - Video-less stream configurations (Axis `video=0`) + + @Test("Axis audio-only SDP (no video stream)") + func axisAudioOnlySDP() throws { + let p = try loadDescribe(url: "rtsp://127.0.0.1/", filename: "axis_audio_only_sdp.txt") + #expect(p.streams.count == 1) + let s0 = p.streams[0] + #expect(s0.media == "audio") + #expect(s0.encodingName == "mpeg4-generic") + #expect(s0.clockRateHz == 16000) + #expect(s0.channels == 1) + #expect(p.streams.contains(where: { $0.media == "video" }) == false) + } + + @Test("Axis metadata-only SDP (no video or audio)") + func axisMetadataOnlySDP() throws { + let p = try loadDescribe(url: "rtsp://127.0.0.1/", filename: "axis_metadata_only_sdp.txt") + #expect(p.streams.count == 1) + let s0 = p.streams[0] + #expect(s0.media == "application") + #expect(s0.encodingName == "vnd.onvif.metadata") + #expect(s0.clockRateHz == 90000) + #expect(p.streams.contains(where: { $0.media == "video" }) == false) + #expect(p.streams.contains(where: { $0.media == "audio" }) == false) + } + + @Test("Axis audio + metadata SDP (no video)") + func axisAudioMetadataSDP() throws { + let p = try loadDescribe(url: "rtsp://127.0.0.1/", filename: "axis_audio_metadata_sdp.txt") + #expect(p.streams.count == 2) + #expect(p.streams[0].media == "audio") + #expect(p.streams[0].encodingName == "mpeg4-generic") + #expect(p.streams[1].media == "application") + #expect(p.streams[1].encodingName == "vnd.onvif.metadata") + #expect(p.streams.contains(where: { $0.media == "video" }) == false) + } + + // MARK: - Encoding-support predicates + + @Test("Video encoding support predicate") + func videoEncodingSupport() { + #expect(isVideoEncodingSupported("h264")) + #expect(isVideoEncodingSupported("h265")) + #expect(!isVideoEncodingSupported("jpeg")) + #expect(!isVideoEncodingSupported("vp8")) + #expect(!isVideoEncodingSupported("")) + } + + @Test("Audio encoding support predicate") + func audioEncodingSupport() { + #expect(isAudioEncodingSupported("mpeg4-generic")) + #expect(isAudioEncodingSupported("pcma")) + #expect(isAudioEncodingSupported("pcmu")) + #expect(isAudioEncodingSupported("l16")) + #expect(isAudioEncodingSupported("g722")) + #expect(isAudioEncodingSupported("g723")) + #expect(isAudioEncodingSupported("g726-32")) + #expect(!isAudioEncodingSupported("opus")) + #expect(!isAudioEncodingSupported("speex")) + #expect(!isAudioEncodingSupported("")) + } + + @Test("Application encoding support predicate") + func applicationEncodingSupport() { + #expect(isApplicationEncodingSupported("vnd.onvif.metadata")) + #expect(!isApplicationEncodingSupported("vnd.axis.metadata")) + #expect(!isApplicationEncodingSupported("vnd.hikvision.metadata")) + #expect(!isApplicationEncodingSupported("")) + } + + /// Mirrors the `firstIndex(where:)` filters at the top of `start()`. + /// When all three return `nil`, the pre-PLAY "at least one usable stream" + /// gate throws `sessionSetupFailed`. + private func discoverUsableStreams( + _ p: Presentation + ) -> (video: Int?, audio: Int?, metadata: Int?) { + let v = p.streams.firstIndex { + $0.media == "video" && isVideoEncodingSupported($0.encodingName) + } + let a = p.streams.firstIndex { + $0.media == "audio" && isAudioEncodingSupported($0.encodingName) + } + let m = p.streams.firstIndex { + $0.media == "application" && isApplicationEncodingSupported($0.encodingName) + } + return (v, a, m) + } + + @Test("SDP with only unsupported encodings leaves all stream slots empty") + func unsupportedEncodingsSDP() throws { + let p = try loadDescribe( + url: "rtsp://127.0.0.1/", filename: "unsupported_encodings_sdp.txt") + // Parser tolerates the SDP (3 streams advertised, all unsupported). + #expect(p.streams.count == 3) + #expect(p.streams[0].encodingName == "jpeg") + #expect(p.streams[1].encodingName == "opus") + #expect(p.streams[2].encodingName == "vnd.axis.metadata") + + // None of the three would survive the encoding-support filters in + // `start()` — so the pre-PLAY gate would throw `sessionSetupFailed`. + let usable = discoverUsableStreams(p) + #expect(usable.video == nil) + #expect(usable.audio == nil) + #expect(usable.metadata == nil) + } + + @Test("Axis audio-only SDP yields only the audio slot") + func axisAudioOnlyUsableStreams() throws { + let p = try loadDescribe( + url: "rtsp://127.0.0.1/", filename: "axis_audio_only_sdp.txt") + let usable = discoverUsableStreams(p) + #expect(usable.video == nil) + #expect(usable.audio != nil) + #expect(usable.metadata == nil) + } + + @Test("Axis metadata-only SDP yields only the metadata slot") + func axisMetadataOnlyUsableStreams() throws { + let p = try loadDescribe( + url: "rtsp://127.0.0.1/", filename: "axis_metadata_only_sdp.txt") + let usable = discoverUsableStreams(p) + #expect(usable.video == nil) + #expect(usable.audio == nil) + #expect(usable.metadata != nil) + } } diff --git a/Tests/IPCamKitTests/TestData/axis_audio_metadata_sdp.txt b/Tests/IPCamKitTests/TestData/axis_audio_metadata_sdp.txt new file mode 100644 index 0000000..913a69a --- /dev/null +++ b/Tests/IPCamKitTests/TestData/axis_audio_metadata_sdp.txt @@ -0,0 +1,20 @@ +v=0 +o=- 1620251477190769 1620251477190769 IN IP4 192.168.1.50 +s=Media Presentation +e=NONE +c=IN IP4 0.0.0.0 +b=AS:82 +t=0 0 +a=control:rtsp://192.168.1.50:554/axis-media/media.amp/?video=0&event=on +a=range:npt=0- +m=audio 0 RTP/AVP 97 +b=AS:32 +a=recvonly +a=control:rtsp://192.168.1.50:554/axis-media/media.amp/trackID=2 +a=rtpmap:97 mpeg4-generic/16000/1 +a=fmtp:97 streamtype=5; profile-level-id=2; mode=AAC-hbr; config=1408; sizeLength=13; indexLength=3; indexDeltaLength=3; profile=1; bitrate=32000 +m=application 0 RTP/AVP 107 +b=AS:50 +a=recvonly +a=control:rtsp://192.168.1.50:554/axis-media/media.amp/trackID=3 +a=rtpmap:107 vnd.onvif.metadata/90000 diff --git a/Tests/IPCamKitTests/TestData/axis_audio_only_sdp.txt b/Tests/IPCamKitTests/TestData/axis_audio_only_sdp.txt new file mode 100644 index 0000000..d60eb38 --- /dev/null +++ b/Tests/IPCamKitTests/TestData/axis_audio_only_sdp.txt @@ -0,0 +1,15 @@ +v=0 +o=- 1620251477190769 1620251477190769 IN IP4 192.168.1.50 +s=Media Presentation +e=NONE +c=IN IP4 0.0.0.0 +b=AS:32 +t=0 0 +a=control:rtsp://192.168.1.50:554/axis-media/media.amp/?video=0 +a=range:npt=0- +m=audio 0 RTP/AVP 97 +b=AS:32 +a=recvonly +a=control:rtsp://192.168.1.50:554/axis-media/media.amp/trackID=2 +a=rtpmap:97 mpeg4-generic/16000/1 +a=fmtp:97 streamtype=5; profile-level-id=2; mode=AAC-hbr; config=1408; sizeLength=13; indexLength=3; indexDeltaLength=3; profile=1; bitrate=32000 diff --git a/Tests/IPCamKitTests/TestData/axis_metadata_only_sdp.txt b/Tests/IPCamKitTests/TestData/axis_metadata_only_sdp.txt new file mode 100644 index 0000000..e0232e4 --- /dev/null +++ b/Tests/IPCamKitTests/TestData/axis_metadata_only_sdp.txt @@ -0,0 +1,14 @@ +v=0 +o=- 1620251477190769 1620251477190769 IN IP4 192.168.1.50 +s=Media Presentation +e=NONE +c=IN IP4 0.0.0.0 +b=AS:50 +t=0 0 +a=control:rtsp://192.168.1.50:554/axis-media/media.amp/?video=0&audio=0&event=on +a=range:npt=0- +m=application 0 RTP/AVP 107 +b=AS:50 +a=recvonly +a=control:rtsp://192.168.1.50:554/axis-media/media.amp/trackID=3 +a=rtpmap:107 vnd.onvif.metadata/90000 diff --git a/Tests/IPCamKitTests/TestData/unsupported_encodings_sdp.txt b/Tests/IPCamKitTests/TestData/unsupported_encodings_sdp.txt new file mode 100644 index 0000000..a285907 --- /dev/null +++ b/Tests/IPCamKitTests/TestData/unsupported_encodings_sdp.txt @@ -0,0 +1,24 @@ +v=0 +o=- 1620251477190769 1620251477190769 IN IP4 192.168.1.50 +s=Media Presentation +e=NONE +c=IN IP4 0.0.0.0 +b=AS:200 +t=0 0 +a=control:rtsp://192.168.1.50:554/unsupported/ +a=range:npt=0- +m=video 0 RTP/AVP 26 +b=AS:150 +a=recvonly +a=control:rtsp://192.168.1.50:554/unsupported/trackID=1 +a=rtpmap:26 JPEG/90000 +m=audio 0 RTP/AVP 96 +b=AS:48 +a=recvonly +a=control:rtsp://192.168.1.50:554/unsupported/trackID=2 +a=rtpmap:96 opus/48000/2 +m=application 0 RTP/AVP 107 +b=AS:50 +a=recvonly +a=control:rtsp://192.168.1.50:554/unsupported/trackID=3 +a=rtpmap:107 vnd.axis.metadata/90000