Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 37 additions & 9 deletions API.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,18 @@ let session = RTSPClientSession(
credentials: Credentials(username: "admin", password: "pass"))

let desc = try await session.start()
// desc.videoCodec, desc.resolution, desc.audioCodec, etc.
// desc.video?.codec / .resolution / .sps / .pps / .vps / .clockRate
// desc.audio?.codec / .sampleRate / .channels / .extraData
// desc.metadataEncoding (e.g. "vnd.onvif.metadata")

for try await item in session.frames() {
switch item {
case .video(let frame):
// frame.nalus — AVCC NAL units for VideoToolbox
case .audio(let frame):
// frame.data — raw audio (PCMA/PCMU/AAC/etc.)
case .metadata(let frame):
// frame.data — ONVIF analytics XML (possibly GZIP-compressed)
case .rtcp:
break
}
Expand Down Expand Up @@ -82,19 +86,29 @@ enum Transport: Sendable {

Returned by `start()` with stream metadata parsed from SDP.

At least one of `video`, `audio`, or `metadataEncoding` is non-`nil`; a session with zero usable streams is rejected at `start()`.

```swift
struct SessionDescription: Sendable {
let videoCodec: VideoCodec
let sps: Data
let pps: Data
let video: VideoStream?
let audio: AudioStream?
let metadataEncoding: String? // e.g. "vnd.onvif.metadata"
}

struct VideoStream: Sendable {
let codec: VideoCodec
let clockRate: UInt32
let sps: Data? // nil until parameters observed
let pps: Data?
let vps: Data? // H.265 only
let resolution: (width: Int, height: Int)?
let clockRate: UInt32
}

let audioCodec: PublicAudioCodec?
let audioSampleRate: UInt32?
let audioChannels: UInt16?
let audioExtraData: Data? // e.g. AudioSpecificConfig for AAC
struct AudioStream: Sendable {
let codec: PublicAudioCodec
let sampleRate: UInt32 // Hz
let channels: UInt16?
let extraData: Data? // e.g. AudioSpecificConfig for AAC
}
```

Expand Down Expand Up @@ -129,6 +143,7 @@ enum PublicAudioCodec: Sendable {
enum PublicCodecItem: Sendable {
case video(PublicVideoFrame)
case audio(PublicAudioFrame)
case metadata(PublicMetadataFrame)
case rtcp(PublicRTCPPacket)
}
```
Expand Down Expand Up @@ -164,6 +179,19 @@ struct PublicAudioFrame: Sendable {
}
```

### PublicMetadataFrame

Raw payload from an analytics-metadata RTP stream (e.g. ONVIF XML, possibly GZIP-compressed). Consumers handle decoding based on `encodingName`.

```swift
struct PublicMetadataFrame: Sendable {
let data: Data // Raw payload (depacketized across RTP fragments)
let timestamp: Double // Presentation timestamp in seconds
let encodingName: String // e.g. "vnd.onvif.metadata"
let loss: UInt16 // RTP packets lost before this frame
}
```

### PublicRTCPPacket

```swift
Expand Down
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,22 @@

## Upcoming

### Breaking changes

- `SessionDescription` now groups video and audio fields into `VideoStream?` and `AudioStream?` substructs. Replaces the flat `videoCodec` / `sps` / `pps` / `clockRate` / `audioCodec` / `audioSampleRate` / `audioChannels` / `audioExtraData` fields. A session is valid as long as any one of video, audio, or analytics metadata is set up — audio-only and metadata-only RTSP configurations (e.g. Axis cameras with `video=0`) now work end-to-end. Consumers branch on `desc.video != nil` (or `desc.audio != nil`) before configuring their decoders.

### New

- ONVIF analytics metadata stream support (`vnd.onvif.metadata` per the ONVIF Streaming Specification). Surfaced as `PublicCodecItem.metadata(PublicMetadataFrame)` in the `session.frames()` stream, with discoverability via `SessionDescription.metadataEncoding`. Best-effort: malformed metadata SDP or a failed application SETUP degrades to a diagnostic without aborting video/audio.

### Improvements

- Add visionOS 1.0 to supported platforms

### Fixes

- Audio depacketizer init failures now null out the audio stream state (index, encoding name, clock rate, channels), mirroring the metadata-init failure path. Previously the indices stayed set while the depacketizer was nil; packets on that channel were silently dropped by the dispatch loop but `SessionDescription` could still claim the stream existed. Required to keep the "at least one usable stream" guard honest in audio-only sessions.

## 0.2.0

### Breaking changes
Expand Down
33 changes: 19 additions & 14 deletions Examples/CameraViewer/main.swift
Original file line number Diff line number Diff line change
Expand Up @@ -282,23 +282,25 @@ final class CameraViewerDelegate: NSObject, NSApplicationDelegate {

do {
let desc = try await session.start()
let res = desc.resolution.map { "\($0.width)×\($0.height)" } ?? "?"
log("Connected: \(desc.videoCodec) \(res)")
let res = desc.video?.resolution.map { "\($0.width)×\($0.height)" } ?? "?"
let codecLabel = desc.video.map { "\($0.codec)" } ?? "no video"
log("Connected: \(codecLabel) \(res)")

await MainActor.run {
window.title = "IPCamKit — \(desc.videoCodec) \(res)"
window.title = "IPCamKit — \(codecLabel) \(res)"
}

if let audioCodec = desc.audioCodec, let audioRate = desc.audioSampleRate {
if let audio = desc.audio {
audioPlayer.start(
codec: audioCodec, sampleRate: Double(audioRate),
channels: UInt32(desc.audioChannels ?? 1))
codec: audio.codec, sampleRate: Double(audio.sampleRate),
channels: UInt32(audio.channels ?? 1))
}

var fmtDesc = try makeFormatDescription(
codec: desc.videoCodec,
sps: desc.sps, pps: desc.pps, vps: desc.vps
)
var fmtDesc: CMVideoFormatDescription?
if let video = desc.video, let sps = video.sps, let pps = video.pps {
fmtDesc = try makeFormatDescription(
codec: video.codec, sps: sps, pps: pps, vps: video.vps)
}

let layer = layerRef.layer
var receivedKeyframe = false
Expand All @@ -311,9 +313,10 @@ final class CameraViewerDelegate: NSObject, NSApplicationDelegate {
audioPlayer.enqueue(audioFrame)

case .video(let frame):
guard let video = desc.video else { continue }
if let newSPS = frame.sps, let newPPS = frame.pps {
fmtDesc = try makeFormatDescription(
codec: desc.videoCodec,
codec: video.codec,
sps: newSPS, pps: newPPS, vps: frame.vps
)
}
Expand All @@ -323,9 +326,11 @@ final class CameraViewerDelegate: NSObject, NSApplicationDelegate {
receivedKeyframe = true
}

if let sample = buildSampleBuffer(
frame, codec: desc.videoCodec, formatDescription: fmtDesc
) {
if let fmt = fmtDesc,
let sample = buildSampleBuffer(
frame, codec: video.codec, formatDescription: fmt
)
{
if layer.status == .failed { layer.flush() }
layer.enqueue(sample)
}
Expand Down
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ A pure-Swift RTSP client library for streaming live video and audio from IP came
- **H.264 and H.265/HEVC video** — depacketized to AVCC format, ready for VideoToolbox
- **Audio** — AAC, PCMU, PCMA, G.722, G.726, L16, G.723.1
- **ONVIF analytics metadata** — raw XML documents from the camera's `application` RTSP stream
- **Optional streams** — any combination of video / audio / metadata is supported; audio-only or metadata-only sessions (e.g. Axis `video=0`) work end-to-end
- **Zero dependencies** — only Foundation, Network, and CryptoKit
- **Swift 6** — strict concurrency with async/await and AsyncThrowingStream

Expand Down Expand Up @@ -45,9 +46,13 @@ let session = RTSPClientSession(

// Connect and get stream metadata
let desc = try await session.start()
// desc.videoCodec, desc.resolution, desc.sps, desc.pps, desc.vps
// desc.audioCodec, desc.audioSampleRate, desc.audioChannels
// desc.metadataEncoding — non-nil if an ONVIF metadata stream is active
// desc.video, desc.audio, desc.metadataEncoding — at least one is non-nil
// desc.video?.codec / .clockRate / .sps / .pps / .vps / .resolution
// desc.audio?.codec / .sampleRate / .channels / .extraData

if let video = desc.video {
// configure a video decoder (VideoToolbox, etc.)
}

// Consume depacketized frames
for try await item in session.frames() {
Expand Down
Loading