@@ -2,7 +2,6 @@ import type { RingCamera } from 'ring-client-api'
2
2
import { hap } from './hap.ts'
3
3
import type { SrtpOptions } from '@homebridge/camera-utils'
4
4
import {
5
- doesFfmpegSupportCodec ,
6
5
generateSrtpOptions ,
7
6
ReturnAudioTranscoder ,
8
7
RtpSplitter ,
@@ -38,7 +37,6 @@ import {
38
37
SrtcpSession ,
39
38
} from 'werift'
40
39
import type { StreamingSession } from 'ring-client-api/streaming/streaming-session'
41
- import { OpusRepacketizer } from './opus-repacketizer.ts'
42
40
import path from 'node:path'
43
41
44
42
const __dirname = new URL ( '.' , import . meta. url ) . pathname ,
@@ -70,7 +68,7 @@ class StreamingSessionWrapper {
70
68
videoSrtp = generateSrtpOptions ( )
71
69
audioSplitter = new RtpSplitter ( )
72
70
videoSplitter = new RtpSplitter ( )
73
- repacketizeAudioSplitter = new RtpSplitter ( )
71
+ transcodedAudioSplitter = new RtpSplitter ( )
74
72
75
73
constructor (
76
74
public streamingSession : StreamingSession ,
@@ -142,57 +140,26 @@ class StreamingSessionWrapper {
142
140
targetAddress,
143
141
audio : { port : audioPort } ,
144
142
} = this . prepareStreamRequest ,
145
- {
146
- audio : {
147
- codec : audioCodec ,
148
- sample_rate : audioSampleRate ,
149
- packet_time : audioPacketTime ,
150
- } ,
151
- } = startStreamRequest ,
152
- // Repacketize the audio stream after it's been transcoded
153
- opusRepacketizer = new OpusRepacketizer ( audioPacketTime / 20 ) ,
154
- audioIntervalScale = ( ( audioSampleRate / 8 ) * audioPacketTime ) / 20 ,
143
+ timestampIncrement =
144
+ startStreamRequest . audio . sample_rate *
145
+ startStreamRequest . audio . packet_time ,
155
146
audioSrtpSession = new SrtpSession ( getSessionConfig ( this . audioSrtp ) )
156
147
157
- let firstTimestamp : number ,
158
- audioPacketCount = 0
159
-
160
- this . repacketizeAudioSplitter . addMessageHandler ( ( { message } ) => {
161
- let rtp : RtpPacket | undefined = RtpPacket . deSerialize ( message )
148
+ let runningTimestamp : number
162
149
163
- if ( audioCodec === AudioStreamingCodecType . OPUS ) {
164
- // borrowed from scrypted
165
- // Original source: https://github.com/koush/scrypted/blob/c13ba09889c3e0d9d3724cb7d49253c9d787fb97/plugins/homekit/src/types/camera/camera-streaming-srtp-sender.ts#L124-L143
166
- rtp = opusRepacketizer . repacketize ( rtp )
150
+ this . transcodedAudioSplitter . addMessageHandler ( ( { message } ) => {
151
+ const rtp : RtpPacket | undefined = RtpPacket . deSerialize ( message )
167
152
168
- if ( ! rtp ) {
169
- return null
170
- }
171
-
172
- if ( ! firstTimestamp ) {
173
- firstTimestamp = rtp . header . timestamp
174
- }
175
-
176
- // from HAP spec:
177
- // RTP Payload Format for Opus Speech and Audio Codec RFC 7587 with an exception
178
- // that Opus audio RTP Timestamp shall be based on RFC 3550.
179
- // RFC 3550 indicates that PCM audio based with a sample rate of 8k and a packet
180
- // time of 20ms would have a monotonic interval of 8k / (1000 / 20) = 160.
181
- // So 24k audio would have a monotonic interval of (24k / 8k) * 160 = 480.
182
- // HAP spec also states that it may request packet times of 20, 30, 40, or 60.
183
- // In practice, HAP has been seen to request 20 on LAN and 60 over LTE.
184
- // So the RTP timestamp must scale accordingly.
185
- // Further investigation indicates that HAP doesn't care about the actual sample rate at all,
186
- // that's merely a suggestion. When encoding Opus, it can seemingly be an arbitrary sample rate,
187
- // audio will work so long as the rtp timestamps are created properly: which is a construct of the sample rate
188
- // HAP requests, and the packet time is respected,
189
- // opus 48khz will work just fine.
190
- rtp . header . timestamp =
191
- ( firstTimestamp + audioPacketCount * 160 * audioIntervalScale ) %
192
- 0xffffffff
193
- audioPacketCount ++
153
+ // For some reason HAP uses RFC 3550 timestamps instead of following RTP Paylod
154
+ // Format for Opus Speech and Audio Codec from RFC 7587 like everyone else.
155
+ // This calculates and replaces the timestamps before forwarding to Homekit.
156
+ if ( ! runningTimestamp ) {
157
+ runningTimestamp = rtp . header . timestamp
194
158
}
195
159
160
+ rtp . header . timestamp = runningTimestamp % 0xffffffff
161
+ runningTimestamp += timestampIncrement
162
+
196
163
// encrypt the packet
197
164
const encryptedPacket = audioSrtpSession . encrypt ( rtp . payload , rtp . header )
198
165
@@ -246,36 +213,29 @@ class StreamingSessionWrapper {
246
213
const transcodingPromise = this . streamingSession . startTranscoding ( {
247
214
input : [ '-vn' ] ,
248
215
audio : [
249
- '-map' ,
250
- '0:a' ,
251
-
252
- // OPUS specific - it works, but audio is very choppy
253
216
'-acodec' ,
254
217
'libopus' ,
255
- '-frame_duration' ,
256
- request . audio . packet_time ,
257
218
'-application' ,
258
219
'lowdelay' ,
259
-
260
- // Shared options
220
+ '-frame_duration' ,
221
+ request . audio . packet_time . toString ( ) ,
261
222
'-flags' ,
262
223
'+global_header' ,
263
- '-ac' ,
264
- `${ request . audio . channel } ` ,
265
224
'-ar' ,
266
225
`${ request . audio . sample_rate } k` ,
267
226
'-b:a' ,
268
227
`${ request . audio . max_bit_rate } k` ,
269
228
'-bufsize' ,
270
229
`${ request . audio . max_bit_rate * 4 } k` ,
230
+ '-ac' ,
231
+ `${ request . audio . channel } ` ,
271
232
'-payload_type' ,
272
233
request . audio . pt ,
273
234
'-ssrc' ,
274
235
this . audioSsrc ,
275
236
'-f' ,
276
237
'rtp' ,
277
- `rtp://127.0.0.1:${ await this . repacketizeAudioSplitter
278
- . portPromise } ?pkt_size=376`,
238
+ `rtp://127.0.0.1:${ await this . transcodedAudioSplitter . portPromise } ` ,
279
239
] ,
280
240
video : false ,
281
241
output : [ ] ,
@@ -309,16 +269,20 @@ class StreamingSessionWrapper {
309
269
outputArgs : [
310
270
'-acodec' ,
311
271
'libopus' ,
312
- '-ac' ,
313
- '1' ,
314
- '-ar' ,
315
- '24k' ,
316
- '-b:a' ,
317
- '24k' ,
318
272
'-application' ,
319
273
'lowdelay' ,
274
+ '-frame_duration' ,
275
+ '60' ,
320
276
'-flags' ,
321
277
'+global_header' ,
278
+ '-ar' ,
279
+ '48k' ,
280
+ '-b:a' ,
281
+ '48k' ,
282
+ '-bufsize' ,
283
+ '192k' ,
284
+ '-ac' ,
285
+ '2' ,
322
286
'-f' ,
323
287
'rtp' ,
324
288
`rtp://127.0.0.1:${ await returnAudioTranscodedSplitter . portPromise } ` ,
@@ -344,7 +308,7 @@ class StreamingSessionWrapper {
344
308
345
309
stop ( ) {
346
310
this . audioSplitter . close ( )
347
- this . repacketizeAudioSplitter . close ( )
311
+ this . transcodedAudioSplitter . close ( )
348
312
this . videoSplitter . close ( )
349
313
this . streamingSession . stop ( )
350
314
}
@@ -363,6 +327,7 @@ export class CameraSource implements CameraStreamingDelegate {
363
327
supportedCryptoSuites : [ SRTPCryptoSuites . AES_CM_128_HMAC_SHA1_80 ] ,
364
328
video : {
365
329
resolutions : [
330
+ [ 1920 , 1024 , 30 ] ,
366
331
[ 1280 , 720 , 30 ] ,
367
332
[ 1024 , 768 , 30 ] ,
368
333
[ 640 , 480 , 30 ] ,
0 commit comments