Skip to content

Commit 0bcdd4b

Browse files
tsightlerTom Sightlerdgreif
authored
Simplify Opus RTP timestamp mangling (#1563)
* Simplify Opus RTP timestamp mangling This improves the Opus RTP packet handling to reduce complexity and more directly follow the HAP spec. * Add changelog --------- Co-authored-by: Tom Sightler <tom.sightler@veeam.com> Co-authored-by: dgreif <dustin.greif@gmail.com>
1 parent 93a9f6b commit 0bcdd4b

File tree

3 files changed

+37
-250
lines changed

3 files changed

+37
-250
lines changed

.changeset/dry-moles-wave.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'homebridge-ring': minor
3+
---
4+
5+
Simplified handling of Opus audio packets, which should lead to better audio when streaming

packages/homebridge-ring/camera-source.ts

Lines changed: 32 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ import type { RingCamera } from 'ring-client-api'
22
import { hap } from './hap.ts'
33
import type { SrtpOptions } from '@homebridge/camera-utils'
44
import {
5-
doesFfmpegSupportCodec,
65
generateSrtpOptions,
76
ReturnAudioTranscoder,
87
RtpSplitter,
@@ -38,7 +37,6 @@ import {
3837
SrtcpSession,
3938
} from 'werift'
4039
import type { StreamingSession } from 'ring-client-api/streaming/streaming-session'
41-
import { OpusRepacketizer } from './opus-repacketizer.ts'
4240
import path from 'node:path'
4341

4442
const __dirname = new URL('.', import.meta.url).pathname,
@@ -70,7 +68,7 @@ class StreamingSessionWrapper {
7068
videoSrtp = generateSrtpOptions()
7169
audioSplitter = new RtpSplitter()
7270
videoSplitter = new RtpSplitter()
73-
repacketizeAudioSplitter = new RtpSplitter()
71+
transcodedAudioSplitter = new RtpSplitter()
7472

7573
constructor(
7674
public streamingSession: StreamingSession,
@@ -142,57 +140,26 @@ class StreamingSessionWrapper {
142140
targetAddress,
143141
audio: { port: audioPort },
144142
} = this.prepareStreamRequest,
145-
{
146-
audio: {
147-
codec: audioCodec,
148-
sample_rate: audioSampleRate,
149-
packet_time: audioPacketTime,
150-
},
151-
} = startStreamRequest,
152-
// Repacketize the audio stream after it's been transcoded
153-
opusRepacketizer = new OpusRepacketizer(audioPacketTime / 20),
154-
audioIntervalScale = ((audioSampleRate / 8) * audioPacketTime) / 20,
143+
timestampIncrement =
144+
startStreamRequest.audio.sample_rate *
145+
startStreamRequest.audio.packet_time,
155146
audioSrtpSession = new SrtpSession(getSessionConfig(this.audioSrtp))
156147

157-
let firstTimestamp: number,
158-
audioPacketCount = 0
159-
160-
this.repacketizeAudioSplitter.addMessageHandler(({ message }) => {
161-
let rtp: RtpPacket | undefined = RtpPacket.deSerialize(message)
148+
let runningTimestamp: number
162149

163-
if (audioCodec === AudioStreamingCodecType.OPUS) {
164-
// borrowed from scrypted
165-
// Original source: https://github.com/koush/scrypted/blob/c13ba09889c3e0d9d3724cb7d49253c9d787fb97/plugins/homekit/src/types/camera/camera-streaming-srtp-sender.ts#L124-L143
166-
rtp = opusRepacketizer.repacketize(rtp)
150+
this.transcodedAudioSplitter.addMessageHandler(({ message }) => {
151+
const rtp: RtpPacket | undefined = RtpPacket.deSerialize(message)
167152

168-
if (!rtp) {
169-
return null
170-
}
171-
172-
if (!firstTimestamp) {
173-
firstTimestamp = rtp.header.timestamp
174-
}
175-
176-
// from HAP spec:
177-
// RTP Payload Format for Opus Speech and Audio Codec RFC 7587 with an exception
178-
// that Opus audio RTP Timestamp shall be based on RFC 3550.
179-
// RFC 3550 indicates that PCM audio based with a sample rate of 8k and a packet
180-
// time of 20ms would have a monotonic interval of 8k / (1000 / 20) = 160.
181-
// So 24k audio would have a monotonic interval of (24k / 8k) * 160 = 480.
182-
// HAP spec also states that it may request packet times of 20, 30, 40, or 60.
183-
// In practice, HAP has been seen to request 20 on LAN and 60 over LTE.
184-
// So the RTP timestamp must scale accordingly.
185-
// Further investigation indicates that HAP doesn't care about the actual sample rate at all,
186-
// that's merely a suggestion. When encoding Opus, it can seemingly be an arbitrary sample rate,
187-
// audio will work so long as the rtp timestamps are created properly: which is a construct of the sample rate
188-
// HAP requests, and the packet time is respected,
189-
// opus 48khz will work just fine.
190-
rtp.header.timestamp =
191-
(firstTimestamp + audioPacketCount * 160 * audioIntervalScale) %
192-
0xffffffff
193-
audioPacketCount++
153+
// For some reason HAP uses RFC 3550 timestamps instead of following RTP Paylod
154+
// Format for Opus Speech and Audio Codec from RFC 7587 like everyone else.
155+
// This calculates and replaces the timestamps before forwarding to Homekit.
156+
if (!runningTimestamp) {
157+
runningTimestamp = rtp.header.timestamp
194158
}
195159

160+
rtp.header.timestamp = runningTimestamp % 0xffffffff
161+
runningTimestamp += timestampIncrement
162+
196163
// encrypt the packet
197164
const encryptedPacket = audioSrtpSession.encrypt(rtp.payload, rtp.header)
198165

@@ -246,36 +213,29 @@ class StreamingSessionWrapper {
246213
const transcodingPromise = this.streamingSession.startTranscoding({
247214
input: ['-vn'],
248215
audio: [
249-
'-map',
250-
'0:a',
251-
252-
// OPUS specific - it works, but audio is very choppy
253216
'-acodec',
254217
'libopus',
255-
'-frame_duration',
256-
request.audio.packet_time,
257218
'-application',
258219
'lowdelay',
259-
260-
// Shared options
220+
'-frame_duration',
221+
request.audio.packet_time.toString(),
261222
'-flags',
262223
'+global_header',
263-
'-ac',
264-
`${request.audio.channel}`,
265224
'-ar',
266225
`${request.audio.sample_rate}k`,
267226
'-b:a',
268227
`${request.audio.max_bit_rate}k`,
269228
'-bufsize',
270229
`${request.audio.max_bit_rate * 4}k`,
230+
'-ac',
231+
`${request.audio.channel}`,
271232
'-payload_type',
272233
request.audio.pt,
273234
'-ssrc',
274235
this.audioSsrc,
275236
'-f',
276237
'rtp',
277-
`rtp://127.0.0.1:${await this.repacketizeAudioSplitter
278-
.portPromise}?pkt_size=376`,
238+
`rtp://127.0.0.1:${await this.transcodedAudioSplitter.portPromise}`,
279239
],
280240
video: false,
281241
output: [],
@@ -309,16 +269,20 @@ class StreamingSessionWrapper {
309269
outputArgs: [
310270
'-acodec',
311271
'libopus',
312-
'-ac',
313-
'1',
314-
'-ar',
315-
'24k',
316-
'-b:a',
317-
'24k',
318272
'-application',
319273
'lowdelay',
274+
'-frame_duration',
275+
'60',
320276
'-flags',
321277
'+global_header',
278+
'-ar',
279+
'48k',
280+
'-b:a',
281+
'48k',
282+
'-bufsize',
283+
'192k',
284+
'-ac',
285+
'2',
322286
'-f',
323287
'rtp',
324288
`rtp://127.0.0.1:${await returnAudioTranscodedSplitter.portPromise}`,
@@ -344,7 +308,7 @@ class StreamingSessionWrapper {
344308

345309
stop() {
346310
this.audioSplitter.close()
347-
this.repacketizeAudioSplitter.close()
311+
this.transcodedAudioSplitter.close()
348312
this.videoSplitter.close()
349313
this.streamingSession.stop()
350314
}
@@ -363,6 +327,7 @@ export class CameraSource implements CameraStreamingDelegate {
363327
supportedCryptoSuites: [SRTPCryptoSuites.AES_CM_128_HMAC_SHA1_80],
364328
video: {
365329
resolutions: [
330+
[1920, 1024, 30],
366331
[1280, 720, 30],
367332
[1024, 768, 30],
368333
[640, 480, 30],

packages/homebridge-ring/opus-repacketizer.ts

Lines changed: 0 additions & 183 deletions
This file was deleted.

0 commit comments

Comments
 (0)