audio latency #388

mrlt8 · Aug 6, 2023 · c83829e · c83829e
1 parent 068df73
commit c83829e
Show file tree

Hide file tree

Showing 3 changed files with 43 additions and 50 deletions.
diff --git a/app/wyzebridge/ffmpeg.py b/app/wyzebridge/ffmpeg.py
@@ -29,14 +29,14 @@ def get_ffmpeg_cmd(
     - list of str: complete ffmpeg command that is ready to run as subprocess.
     """
 
-    flags = "-fflags +genpts+flush_packets+nobuffer+bitexact -flags +low_delay"
+    flags = "-fflags +flush_packets+nobuffer -flags +low_delay+global_header -use_wallclock_as_timestamps 1"
     livestream = get_livestream_cmd(uri)
     audio_in = "-f lavfi -i anullsrc=cl=mono" if livestream else ""
     audio_out = "aac"
     if audio and "codec" in audio:
-        audio_in = f"-thread_queue_size 100 -f {audio['codec']} -ar {audio['rate']} -i /tmp/{uri}.wav"
+        audio_in = f"-thread_queue_size 100 -f {audio['codec']} -ac 1 -ar {audio['rate']} -sample_fmt s16 -i /tmp/{uri}.wav"
         audio_out = audio["codec_out"] or "copy"
-        a_filter = ["-filter:a"] + env_bool("AUDIO_FILTER", "volume=5").split()
+        a_filter = ["-filter:a", env_bool("AUDIO_FILTER", "volume=5")]
     rtsp_transport = "udp" if "udp" in env_bool("MTX_PROTOCOLS") else "tcp"
     rss_cmd = f"[{{}}f=rtsp:{rtsp_transport=:}:bsfs/v=dump_extra=freq=keyframe]rtsp://0.0.0.0:8554/{uri}"
     rtsp_ss = rss_cmd.format("")
@@ -49,18 +49,18 @@ def get_ffmpeg_cmd(
     ).split() or (
         ["-hide_banner", "-loglevel", get_log_level()]
         + env_cam("FFMPEG_FLAGS", uri, flags).strip("'\"\n ").split()
-        + ["-thread_queue_size", "100"]
+        + ["-thread_queue_size", "100", "-analyzeduration", "50", "-probesize", "50"]
         + (["-hwaccel", h264_enc] if h264_enc in {"vaapi", "qsv"} else [])
-        + ["-analyzeduration", "50", "-probesize", "50", "-f", vcodec, "-i", "pipe:"]
+        + ["-f", vcodec, "-i", "pipe:"]
         + audio_in.split()
-        + ["-flags", "+global_header", "-c:v"]
+        + ["-c:v"]
         + re_encode_video(uri, is_vertical)
         + (["-c:a", audio_out] if audio_in else [])
         + (a_filter if audio and audio_out != "copy" else [])
-        + ["-movflags", "+empty_moov+default_base_moof+frag_keyframe"]
-        + ["-muxdelay", "0", "-muxpreload", "0"]
+        + ["-vsync", "passthrough", "-rtbufsize", "100", "-flush_packets", "1"]
+        + ["-muxdelay", "0", "-muxpreload", "0", "-max_delay", "0"]
         + ["-map", "0:v"]
-        + (["-map", "1:a", "-async", "100"] if audio_in else [])
+        + (["-map", "1:a"] if audio_in else [])
         + ["-f", "tee"]
         + [rtsp_ss + get_record_cmd(uri, audio_out, record) + livestream]
     )

diff --git a/app/wyzecam/iotc.py b/app/wyzecam/iotc.py
@@ -1,6 +1,8 @@
 import base64
 import contextlib
 import enum
+import errno
+import fcntl
 import hashlib
 import logging
 import os
@@ -520,16 +522,6 @@ def recv_bridge_frame(self, timeout: int = 15, fps: int = 15) -> Iterator[bytes]
                 continue
             if frame_info.is_keyframe:
                 last |= {"key_frame": frame_info.frame_no, "key_time": time.time()}
-            elif (
-                frame_info.frame_no - last["key_frame"] > fps * 3
-                and frame_info.frame_no - last["frame"] > fps
-            ):
-                warnings.warn("Waiting for keyframe")
-                time.sleep((1 / (fps)) - 0.02)
-                continue
-            elif time.time() - frame_info.timestamp > timeout:
-                warnings.warn("frame too old")
-                continue
 
             last |= {"frame": frame_info.frame_no, "time": time.time()}
             yield frame_data
@@ -578,43 +570,39 @@ def recv_audio_frames(self, uri: str) -> None:
         """Write raw audio frames to a named pipe."""
         FIFO = f"/tmp/{uri.lower()}.wav"
         try:
-            os.mkfifo(FIFO, os.O_NONBLOCK)
+            os.mkfifo(FIFO)
         except OSError as e:
             if e.errno != 17:
                 raise e
         tutav = self.tutk_platform_lib, self.av_chan_id
 
-        # sample_rate = self.get_audio_sample_rate()
-        # sleep_interval = 1 / (sample_rate / (320 if sample_rate <= 8000 else 640))
-        sleep_interval = 1 / 5
+        sleep_interval = 1 / 20
         try:
-            with open(FIFO, "wb") as audio_pipe:
+            audio_fd = os.open(FIFO, os.O_RDWR | os.O_NONBLOCK | os.O_CREAT, 0o777)
+            fcntl.fcntl(audio_fd, fcntl.F_SETPIPE_SZ, 1024 * 512)
+
+            with os.fdopen(audio_fd, "wb") as audio_pipe:
                 while (
                     self.state == WyzeIOTCSessionState.AUTHENTICATION_SUCCEEDED
                     and self.stream_state.value > 1
                 ):
-                    if (buf := tutk.av_check_audio_buf(*tutav)) < 1:
-                        if buf < 0:
-                            raise tutk.TutkError(buf)
+                    error_no, frame_data, _ = tutk.av_recv_audio_data(*tutav)
+                    if error_no in {
+                        tutk.AV_ER_DATA_NOREADY,
+                        tutk.AV_ER_INCOMPLETE_FRAME,
+                        tutk.AV_ER_LOSED_THIS_FRAME,
+                    }:
                         time.sleep(sleep_interval)
                         continue
-                    errno, frame_data, _ = tutk.av_recv_audio_data(*tutav)
-                    if errno < 0:
-                        if errno in (
-                            tutk.AV_ER_DATA_NOREADY,
-                            tutk.AV_ER_INCOMPLETE_FRAME,
-                            tutk.AV_ER_LOSED_THIS_FRAME,
-                        ):
-                            continue
-                        warnings.warn(f"Error: {errno=}")
-                        break
+
+                    if error_no:
+                        raise tutk.TutkError(error_no)
+
                     audio_pipe.write(frame_data)
-                audio_pipe.write(b"")
-        except tutk.TutkError as ex:
+
+            audio_pipe.write(b"")
+        except Exception as ex:
             warnings.warn(str(ex))
-        except IOError as ex:
-            if ex.errno != 32:  # Ignore errno.EPIPE - Broken pipe
-                warnings.warn(str(ex))
         finally:
             self.state = WyzeIOTCSessionState.CONNECTING_FAILED
             os.unlink(FIFO)
@@ -631,10 +619,10 @@ def get_audio_codec(self, limit: int = 25) -> tuple[str, int]:
         """Identify audio codec."""
         sample_rate = self.get_audio_sample_rate()
         for _ in range(limit):
-            errno, _, frame_info = tutk.av_recv_audio_data(
+            error_no, _, frame_info = tutk.av_recv_audio_data(
                 self.tutk_platform_lib, self.av_chan_id
             )
-            if errno == 0 and (codec_id := frame_info.codec_id):
+            if not error_no and (codec_id := frame_info.codec_id):
                 codec = False
                 if codec_id == 137:  # MEDIA_CODEC_AUDIO_G711_ULAW
                     codec = "mulaw"
@@ -947,9 +935,9 @@ def _connect(
             f"expected_chan={channel_id}"
         )
 
-        tutk.av_client_set_recv_buf_size(
-            self.tutk_platform_lib, self.av_chan_id, max_buf_size
-        )
+        # tutk.av_client_set_recv_buf_size(
+        #     self.tutk_platform_lib, self.av_chan_id, max_buf_size
+        # )
 
     def get_auth_key(self) -> bytes:
         """Generate authkey using enr and mac address."""

diff --git a/app/wyzecam/tutk/tutk.py b/app/wyzecam/tutk/tutk.py
@@ -1,6 +1,7 @@
 import pathlib
 from ctypes import (
     CDLL,
+    POINTER,
     Structure,
     byref,
     c_char,
@@ -12,6 +13,7 @@
     c_uint8,
     c_uint16,
     c_uint32,
+    cast,
     cdll,
     create_string_buffer,
     sizeof,
@@ -603,20 +605,23 @@ def av_recv_audio_data(tutk_platform_lib: CDLL, av_chan_id: c_int):
     frame_info_max_size = 1024
 
     audio_data = (c_char * audio_data_max_size)()
-    frame_info = FrameInfo3Struct()
-    frame_index = c_uint()
+    frame_info_buffer = (c_char * frame_info_max_size)()
+    frame_index = c_uint32()
 
     frame_len = tutk_platform_lib.avRecvAudioData(
         av_chan_id,
         audio_data,
         audio_data_max_size,
-        byref(frame_info),
+        frame_info_buffer,
         frame_info_max_size,
         byref(frame_index),
     )
 
     if frame_len < 0:
         return frame_len, None, None
+
+    # frame_info = FrameInfo3Struct.from_buffer_copy(frame_info_buffer)
+    frame_info = cast(frame_info_buffer, POINTER(FrameInfo3Struct)).contents
     return 0, audio_data[:frame_len], frame_info