Skip to content

Commit

Permalink
audio latency #388
Browse files Browse the repository at this point in the history
  • Loading branch information
mrlt8 committed Aug 6, 2023
1 parent 068df73 commit c83829e
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 50 deletions.
18 changes: 9 additions & 9 deletions app/wyzebridge/ffmpeg.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ def get_ffmpeg_cmd(
- list of str: complete ffmpeg command that is ready to run as subprocess.
"""

flags = "-fflags +genpts+flush_packets+nobuffer+bitexact -flags +low_delay"
flags = "-fflags +flush_packets+nobuffer -flags +low_delay+global_header -use_wallclock_as_timestamps 1"
livestream = get_livestream_cmd(uri)
audio_in = "-f lavfi -i anullsrc=cl=mono" if livestream else ""
audio_out = "aac"
if audio and "codec" in audio:
audio_in = f"-thread_queue_size 100 -f {audio['codec']} -ar {audio['rate']} -i /tmp/{uri}.wav"
audio_in = f"-thread_queue_size 100 -f {audio['codec']} -ac 1 -ar {audio['rate']} -sample_fmt s16 -i /tmp/{uri}.wav"
audio_out = audio["codec_out"] or "copy"
a_filter = ["-filter:a"] + env_bool("AUDIO_FILTER", "volume=5").split()
a_filter = ["-filter:a", env_bool("AUDIO_FILTER", "volume=5")]
rtsp_transport = "udp" if "udp" in env_bool("MTX_PROTOCOLS") else "tcp"
rss_cmd = f"[{{}}f=rtsp:{rtsp_transport=:}:bsfs/v=dump_extra=freq=keyframe]rtsp://0.0.0.0:8554/{uri}"
rtsp_ss = rss_cmd.format("")
Expand All @@ -49,18 +49,18 @@ def get_ffmpeg_cmd(
).split() or (
["-hide_banner", "-loglevel", get_log_level()]
+ env_cam("FFMPEG_FLAGS", uri, flags).strip("'\"\n ").split()
+ ["-thread_queue_size", "100"]
+ ["-thread_queue_size", "100", "-analyzeduration", "50", "-probesize", "50"]
+ (["-hwaccel", h264_enc] if h264_enc in {"vaapi", "qsv"} else [])
+ ["-analyzeduration", "50", "-probesize", "50", "-f", vcodec, "-i", "pipe:"]
+ ["-f", vcodec, "-i", "pipe:"]
+ audio_in.split()
+ ["-flags", "+global_header", "-c:v"]
+ ["-c:v"]
+ re_encode_video(uri, is_vertical)
+ (["-c:a", audio_out] if audio_in else [])
+ (a_filter if audio and audio_out != "copy" else [])
+ ["-movflags", "+empty_moov+default_base_moof+frag_keyframe"]
+ ["-muxdelay", "0", "-muxpreload", "0"]
+ ["-vsync", "passthrough", "-rtbufsize", "100", "-flush_packets", "1"]
+ ["-muxdelay", "0", "-muxpreload", "0", "-max_delay", "0"]
+ ["-map", "0:v"]
+ (["-map", "1:a", "-async", "100"] if audio_in else [])
+ (["-map", "1:a"] if audio_in else [])
+ ["-f", "tee"]
+ [rtsp_ss + get_record_cmd(uri, audio_out, record) + livestream]
)
Expand Down
64 changes: 26 additions & 38 deletions app/wyzecam/iotc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import base64
import contextlib
import enum
import errno
import fcntl
import hashlib
import logging
import os
Expand Down Expand Up @@ -520,16 +522,6 @@ def recv_bridge_frame(self, timeout: int = 15, fps: int = 15) -> Iterator[bytes]
continue
if frame_info.is_keyframe:
last |= {"key_frame": frame_info.frame_no, "key_time": time.time()}
elif (
frame_info.frame_no - last["key_frame"] > fps * 3
and frame_info.frame_no - last["frame"] > fps
):
warnings.warn("Waiting for keyframe")
time.sleep((1 / (fps)) - 0.02)
continue
elif time.time() - frame_info.timestamp > timeout:
warnings.warn("frame too old")
continue

last |= {"frame": frame_info.frame_no, "time": time.time()}
yield frame_data
Expand Down Expand Up @@ -578,43 +570,39 @@ def recv_audio_frames(self, uri: str) -> None:
"""Write raw audio frames to a named pipe."""
FIFO = f"/tmp/{uri.lower()}.wav"
try:
os.mkfifo(FIFO, os.O_NONBLOCK)
os.mkfifo(FIFO)
except OSError as e:
if e.errno != 17:
raise e
tutav = self.tutk_platform_lib, self.av_chan_id

# sample_rate = self.get_audio_sample_rate()
# sleep_interval = 1 / (sample_rate / (320 if sample_rate <= 8000 else 640))
sleep_interval = 1 / 5
sleep_interval = 1 / 20
try:
with open(FIFO, "wb") as audio_pipe:
audio_fd = os.open(FIFO, os.O_RDWR | os.O_NONBLOCK | os.O_CREAT, 0o777)
fcntl.fcntl(audio_fd, fcntl.F_SETPIPE_SZ, 1024 * 512)

with os.fdopen(audio_fd, "wb") as audio_pipe:
while (
self.state == WyzeIOTCSessionState.AUTHENTICATION_SUCCEEDED
and self.stream_state.value > 1
):
if (buf := tutk.av_check_audio_buf(*tutav)) < 1:
if buf < 0:
raise tutk.TutkError(buf)
error_no, frame_data, _ = tutk.av_recv_audio_data(*tutav)
if error_no in {
tutk.AV_ER_DATA_NOREADY,
tutk.AV_ER_INCOMPLETE_FRAME,
tutk.AV_ER_LOSED_THIS_FRAME,
}:
time.sleep(sleep_interval)
continue
errno, frame_data, _ = tutk.av_recv_audio_data(*tutav)
if errno < 0:
if errno in (
tutk.AV_ER_DATA_NOREADY,
tutk.AV_ER_INCOMPLETE_FRAME,
tutk.AV_ER_LOSED_THIS_FRAME,
):
continue
warnings.warn(f"Error: {errno=}")
break

if error_no:
raise tutk.TutkError(error_no)

audio_pipe.write(frame_data)
audio_pipe.write(b"")
except tutk.TutkError as ex:

audio_pipe.write(b"")
except Exception as ex:
warnings.warn(str(ex))
except IOError as ex:
if ex.errno != 32: # Ignore errno.EPIPE - Broken pipe
warnings.warn(str(ex))
finally:
self.state = WyzeIOTCSessionState.CONNECTING_FAILED
os.unlink(FIFO)
Expand All @@ -631,10 +619,10 @@ def get_audio_codec(self, limit: int = 25) -> tuple[str, int]:
"""Identify audio codec."""
sample_rate = self.get_audio_sample_rate()
for _ in range(limit):
errno, _, frame_info = tutk.av_recv_audio_data(
error_no, _, frame_info = tutk.av_recv_audio_data(
self.tutk_platform_lib, self.av_chan_id
)
if errno == 0 and (codec_id := frame_info.codec_id):
if not error_no and (codec_id := frame_info.codec_id):
codec = False
if codec_id == 137: # MEDIA_CODEC_AUDIO_G711_ULAW
codec = "mulaw"
Expand Down Expand Up @@ -947,9 +935,9 @@ def _connect(
f"expected_chan={channel_id}"
)

tutk.av_client_set_recv_buf_size(
self.tutk_platform_lib, self.av_chan_id, max_buf_size
)
# tutk.av_client_set_recv_buf_size(
# self.tutk_platform_lib, self.av_chan_id, max_buf_size
# )

def get_auth_key(self) -> bytes:
"""Generate authkey using enr and mac address."""
Expand Down
11 changes: 8 additions & 3 deletions app/wyzecam/tutk/tutk.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pathlib
from ctypes import (
CDLL,
POINTER,
Structure,
byref,
c_char,
Expand All @@ -12,6 +13,7 @@
c_uint8,
c_uint16,
c_uint32,
cast,
cdll,
create_string_buffer,
sizeof,
Expand Down Expand Up @@ -603,20 +605,23 @@ def av_recv_audio_data(tutk_platform_lib: CDLL, av_chan_id: c_int):
frame_info_max_size = 1024

audio_data = (c_char * audio_data_max_size)()
frame_info = FrameInfo3Struct()
frame_index = c_uint()
frame_info_buffer = (c_char * frame_info_max_size)()
frame_index = c_uint32()

frame_len = tutk_platform_lib.avRecvAudioData(
av_chan_id,
audio_data,
audio_data_max_size,
byref(frame_info),
frame_info_buffer,
frame_info_max_size,
byref(frame_index),
)

if frame_len < 0:
return frame_len, None, None

# frame_info = FrameInfo3Struct.from_buffer_copy(frame_info_buffer)
frame_info = cast(frame_info_buffer, POINTER(FrameInfo3Struct)).contents
return 0, audio_data[:frame_len], frame_info


Expand Down

0 comments on commit c83829e

Please sign in to comment.