Merge pull request #893 from luxonis/video_encoder_frame

Video encoder frame
luxonis · Nov 13, 2023 · f80ce27 · f80ce27
2 parents 152bd06 + 83ead49
commit f80ce27
Show file tree

Hide file tree

Showing 6 changed files with 210 additions and 1 deletion.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -141,6 +141,7 @@ pybind11_add_module(${TARGET_NAME}
     src/pipeline/datatype/ImageManipConfigBindings.cpp
     src/pipeline/datatype/ImgDetectionsBindings.cpp
     src/pipeline/datatype/ImgFrameBindings.cpp
+    src/pipeline/datatype/EncodedFrameBindings.cpp
     src/pipeline/datatype/IMUDataBindings.cpp
     src/pipeline/datatype/NNDataBindings.cpp
     src/pipeline/datatype/SpatialImgDetectionsBindings.cpp

diff --git a/depthai-core b/depthai-core
diff --git a/examples/VideoEncoder/rgb_encoding_encodedframe.py b/examples/VideoEncoder/rgb_encoding_encodedframe.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+
+import depthai as dai
+
+def frametype2str(ft):
+    if ft == dai.EncodedFrame.FrameType.I:
+        return "I"
+    elif ft == dai.EncodedFrame.FrameType.P:
+        return "P"
+    elif ft == dai.EncodedFrame.FrameType.B:
+        return "B"
+
+def compress(ls):
+    curr = ls[0]
+    count = 1
+    res = []
+    for i in range(1, len(ls)):
+        if ls[i] == curr:
+            count += 1
+        else:
+            res.append((count, curr))
+            curr = ls[i]
+            count = 1
+    res.append((count, curr))
+    return res
+
+
+# Create pipeline
+pipeline = dai.Pipeline()
+
+# Define sources and output
+camRgb = pipeline.create(dai.node.ColorCamera)
+videoEnc = pipeline.create(dai.node.VideoEncoder)
+xout = pipeline.create(dai.node.XLinkOut)
+
+xout.setStreamName('h265')
+
+# Properties
+camRgb.setBoardSocket(dai.CameraBoardSocket.CAM_A)
+camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_4_K)
+videoEnc.setDefaultProfilePreset(30, dai.VideoEncoderProperties.Profile.H265_MAIN)
+
+# Linking
+camRgb.video.link(videoEnc.input)
+videoEnc.out.link(xout.input)
+
+frametypes = []
+# Connect to device and start pipeline
+with dai.Device(pipeline) as device:
+
+    # Output queue will be used to get the encoded data from the output defined above
+    q = device.getOutputQueue(name="h265", maxSize=30, blocking=True)
+
+    # The .h265 file is a raw stream file (not playable yet)
+    with open('video.h265', 'wb') as videoFile:
+        print("Press Ctrl+C to stop encoding...")
+        try:
+            while True:
+                h265Packet = q.get()  # Blocking call, will wait until a new data has arrived
+                frametypes.append(frametype2str(h265Packet.getFrameType()))
+                h265Packet.getData().tofile(videoFile)  # Appends the packet data to the opened file
+        except KeyboardInterrupt:
+            # Keyboard interrupt (Ctrl + C) detected
+            pass
+
+    print("To view the encoded data, convert the stream file (.h265) into a video file (.mp4) using a command below:")
+    print("ffmpeg -framerate 30 -i video.h265 -c copy video.mp4")
+
+print(",".join([f"{c}{f}" for c, f in compress(frametypes)]))
diff --git a/src/DatatypeBindings.cpp b/src/DatatypeBindings.cpp
@@ -13,6 +13,7 @@ void bind_featuretrackerconfig(pybind11::module& m, void* pCallstack);
 void bind_imagemanipconfig(pybind11::module& m, void* pCallstack);
 void bind_imgdetections(pybind11::module& m, void* pCallstack);
 void bind_imgframe(pybind11::module& m, void* pCallstack);
+void bind_encodedframe(pybind11::module& m, void* pCallstack);
 void bind_imudata(pybind11::module& m, void* pCallstack);
 void bind_nndata(pybind11::module& m, void* pCallstack);
 void bind_spatialimgdetections(pybind11::module& m, void* pCallstack);
@@ -39,6 +40,7 @@ void DatatypeBindings::addToCallstack(std::deque<StackFunction>& callstack) {
     callstack.push_front(bind_imagemanipconfig);
     callstack.push_front(bind_imgdetections);
     callstack.push_front(bind_imgframe);
+    callstack.push_front(bind_encodedframe);
     callstack.push_front(bind_imudata);
     callstack.push_front(bind_nndata);
     callstack.push_front(bind_spatialimgdetections);
@@ -74,6 +76,7 @@ void DatatypeBindings::bind(pybind11::module& m, void* pCallstack){
     datatypeEnum
         .value("Buffer", DatatypeEnum::Buffer)
         .value("ImgFrame", DatatypeEnum::ImgFrame)
+        .value("EncodedFrame", DatatypeEnum::EncodedFrame)
         .value("NNData", DatatypeEnum::NNData)
         .value("ImageManipConfig", DatatypeEnum::ImageManipConfig)
         .value("CameraControl", DatatypeEnum::CameraControl)

diff --git a/src/pipeline/datatype/EncodedFrameBindings.cpp b/src/pipeline/datatype/EncodedFrameBindings.cpp
@@ -0,0 +1,135 @@
+#include "DatatypeBindings.hpp"
+#include "depthai-shared/datatype/RawEncodedFrame.hpp"
+#include "pipeline/CommonBindings.hpp"
+#include <memory>
+#include <unordered_map>
+
+// depthai
+#include "depthai/pipeline/datatype/EncodedFrame.hpp"
+
+// pybind
+#include <pybind11/chrono.h>
+#include <pybind11/numpy.h>
+
+void bind_encodedframe(pybind11::module &m, void *pCallstack) {
+
+  using namespace dai;
+
+  py::class_<RawEncodedFrame, RawBuffer, std::shared_ptr<RawEncodedFrame>>
+      rawEncodedFrame(m, "RawEncodedFrame", DOC(dai, RawEncodedFrame));
+  py::enum_<RawEncodedFrame::Profile> rawEncodedFrameProfile(rawEncodedFrame,
+                                                             "Profile");
+  py::enum_<RawEncodedFrame::FrameType> rawEncodedFrameType(
+      rawEncodedFrame, "FrameType", DOC(dai, RawEncodedFrame, FrameType));
+  py::class_<EncodedFrame, Buffer, std::shared_ptr<EncodedFrame>> encodedFrame(
+      m, "EncodedFrame", DOC(dai, EncodedFrame));
+
+  ///////////////////////////////////////////////////////////////////////
+  ///////////////////////////////////////////////////////////////////////
+  ///////////////////////////////////////////////////////////////////////
+  // Call the rest of the type defines, then perform the actual bindings
+  Callstack *callstack = (Callstack *)pCallstack;
+  auto cb = callstack->top();
+  callstack->pop();
+  cb(m, pCallstack);
+  // Actual bindings
+  ///////////////////////////////////////////////////////////////////////
+  ///////////////////////////////////////////////////////////////////////
+  ///////////////////////////////////////////////////////////////////////
+
+  // Metadata / raw
+
+  rawEncodedFrame.def(py::init<>())
+      .def_readwrite("quality", &RawEncodedFrame::quality)
+      .def_readwrite("bitrate", &RawEncodedFrame::bitrate)
+      .def_readwrite("profile", &RawEncodedFrame::profile)
+      .def_readwrite("lossless", &RawEncodedFrame::lossless)
+      .def_readwrite("type", &RawEncodedFrame::type)
+      .def_readwrite("instanceNum", &RawEncodedFrame::instanceNum)
+      .def_readwrite("sequenceNum", &RawEncodedFrame::sequenceNum)
+      .def_property(
+          "ts",
+          [](const RawEncodedFrame &o) {
+            double ts = o.ts.sec + o.ts.nsec / 1000000000.0;
+            return ts;
+          },
+          [](RawEncodedFrame &o, double ts) {
+            o.ts.sec = ts;
+            o.ts.nsec = (ts - o.ts.sec) * 1000000000.0;
+          })
+      .def_property(
+          "tsDevice",
+          [](const RawEncodedFrame &o) {
+            double ts = o.tsDevice.sec + o.tsDevice.nsec / 1000000000.0;
+            return ts;
+          },
+          [](RawEncodedFrame &o, double ts) {
+            o.tsDevice.sec = ts;
+            o.tsDevice.nsec = (ts - o.tsDevice.sec) * 1000000000.0;
+          });
+
+  rawEncodedFrameProfile.value("JPEG", EncodedFrame::Profile::JPEG)
+      .value("AVC", EncodedFrame::Profile::AVC)
+      .value("HEVC", EncodedFrame::Profile::HEVC);
+
+  rawEncodedFrameType.value("I", EncodedFrame::FrameType::I)
+      .value("P", EncodedFrame::FrameType::P)
+      .value("B", EncodedFrame::FrameType::B)
+      .value("Unknown", EncodedFrame::FrameType::Unknown);
+
+  // Message
+  encodedFrame
+      .def(py::init<>())
+      // getters
+      .def("getTimestamp",
+           py::overload_cast<>(&EncodedFrame::getTimestamp, py::const_),
+           DOC(dai, EncodedFrame, getTimestamp))
+      .def("getTimestampDevice",
+           py::overload_cast<>(&EncodedFrame::getTimestampDevice, py::const_),
+           DOC(dai, EncodedFrame, getTimestampDevice))
+      .def("getInstanceNum", &EncodedFrame::getInstanceNum,
+           DOC(dai, EncodedFrame, getInstanceNum))
+      .def("getSequenceNum", &EncodedFrame::getSequenceNum,
+           DOC(dai, EncodedFrame, getSequenceNum))
+      .def("getExposureTime", &EncodedFrame::getExposureTime,
+           DOC(dai, EncodedFrame, getExposureTime))
+      .def("getSensitivity", &EncodedFrame::getSensitivity,
+           DOC(dai, EncodedFrame, getSensitivity))
+      .def("getColorTemperature", &EncodedFrame::getColorTemperature,
+           DOC(dai, EncodedFrame, getColorTemperature))
+      .def("getLensPosition", &EncodedFrame::getLensPosition,
+           DOC(dai, EncodedFrame, getLensPosition))
+      .def("getQuality", &EncodedFrame::getQuality,
+           DOC(dai, EncodedFrame, getQuality))
+      .def("getBitrate", &EncodedFrame::getBitrate,
+           DOC(dai, EncodedFrame, getBitrate))
+      .def("getFrameType", &EncodedFrame::getFrameType,
+           DOC(dai, EncodedFrame, getFrameType))
+      .def("getLossless", &EncodedFrame::getLossless,
+           DOC(dai, EncodedFrame, getLossless))
+      .def("getProfile", &EncodedFrame::getProfile,
+           DOC(dai, EncodedFrame, getProfile))
+
+      // setters
+      .def("setTimestamp", &EncodedFrame::setTimestamp,
+           DOC(dai, EncodedFrame, setTimestamp))
+      .def("setTimestampDevice", &EncodedFrame::setTimestampDevice,
+           DOC(dai, EncodedFrame, setTimestampDevice))
+      .def("setSequenceNum", &EncodedFrame::setSequenceNum,
+           DOC(dai, EncodedFrame, getSequenceNum))
+      .def("setQuality", &EncodedFrame::setQuality,
+           DOC(dai, EncodedFrame, getQuality))
+      .def("setBitrate", &EncodedFrame::setBitrate,
+           DOC(dai, EncodedFrame, getBitrate))
+      .def("setFrameType", &EncodedFrame::setFrameType,
+           DOC(dai, EncodedFrame, getFrameType))
+      .def("setLossless", &EncodedFrame::setLossless,
+           DOC(dai, EncodedFrame, getLossless))
+      .def("setProfile", &EncodedFrame::setProfile,
+           DOC(dai, EncodedFrame, getProfile));
+  // add aliases dai.ImgFrame.Type and dai.ImgFrame.Specs
+  m.attr("EncodedFrame").attr("FrameType") =
+      m.attr("RawEncodedFrame").attr("FrameType");
+  m.attr("EncodedFrame").attr("Profile") =
+      m.attr("RawEncodedFrame").attr("Profile");
+}
diff --git a/src/pipeline/node/VideoEncoderBindings.cpp b/src/pipeline/node/VideoEncoderBindings.cpp
@@ -59,6 +59,7 @@ void bind_videoencoder(pybind11::module& m, void* pCallstack){
     videoEncoder
         .def_readonly("input", &VideoEncoder::input, DOC(dai, node, VideoEncoder, input), DOC(dai, node, VideoEncoder, input))
         .def_readonly("bitstream", &VideoEncoder::bitstream, DOC(dai, node, VideoEncoder, bitstream), DOC(dai, node, VideoEncoder, bitstream))
+        .def_readonly("out", &VideoEncoder::out, DOC(dai, node, VideoEncoder, out), DOC(dai, node, VideoEncoder, out))
         .def("setDefaultProfilePreset", static_cast<void(VideoEncoder::*)(float, VideoEncoderProperties::Profile)>(&VideoEncoder::setDefaultProfilePreset), py::arg("fps"), py::arg("profile"), DOC(dai, node, VideoEncoder, setDefaultProfilePreset))
         .def("setDefaultProfilePreset", [](VideoEncoder& v, int width, int height, float fps, VideoEncoderProperties::Profile profile){
             PyErr_WarnEx(PyExc_DeprecationWarning, "Input width/height no longer needed, automatically determined from first frame", 1);