Skip to content

Commit

Permalink
Merge pull request #893 from luxonis/video_encoder_frame
Browse files Browse the repository at this point in the history
Video encoder frame
  • Loading branch information
asahtik authored Nov 13, 2023
2 parents 152bd06 + 83ead49 commit f80ce27
Show file tree
Hide file tree
Showing 6 changed files with 210 additions and 1 deletion.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ pybind11_add_module(${TARGET_NAME}
src/pipeline/datatype/ImageManipConfigBindings.cpp
src/pipeline/datatype/ImgDetectionsBindings.cpp
src/pipeline/datatype/ImgFrameBindings.cpp
src/pipeline/datatype/EncodedFrameBindings.cpp
src/pipeline/datatype/IMUDataBindings.cpp
src/pipeline/datatype/NNDataBindings.cpp
src/pipeline/datatype/SpatialImgDetectionsBindings.cpp
Expand Down
69 changes: 69 additions & 0 deletions examples/VideoEncoder/rgb_encoding_encodedframe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env python3

import depthai as dai

def frametype2str(ft):
if ft == dai.EncodedFrame.FrameType.I:
return "I"
elif ft == dai.EncodedFrame.FrameType.P:
return "P"
elif ft == dai.EncodedFrame.FrameType.B:
return "B"

def compress(ls):
curr = ls[0]
count = 1
res = []
for i in range(1, len(ls)):
if ls[i] == curr:
count += 1
else:
res.append((count, curr))
curr = ls[i]
count = 1
res.append((count, curr))
return res


# Create pipeline
pipeline = dai.Pipeline()

# Define sources and output
camRgb = pipeline.create(dai.node.ColorCamera)
videoEnc = pipeline.create(dai.node.VideoEncoder)
xout = pipeline.create(dai.node.XLinkOut)

xout.setStreamName('h265')

# Properties
camRgb.setBoardSocket(dai.CameraBoardSocket.CAM_A)
camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_4_K)
videoEnc.setDefaultProfilePreset(30, dai.VideoEncoderProperties.Profile.H265_MAIN)

# Linking
camRgb.video.link(videoEnc.input)
videoEnc.out.link(xout.input)

frametypes = []
# Connect to device and start pipeline
with dai.Device(pipeline) as device:

# Output queue will be used to get the encoded data from the output defined above
q = device.getOutputQueue(name="h265", maxSize=30, blocking=True)

# The .h265 file is a raw stream file (not playable yet)
with open('video.h265', 'wb') as videoFile:
print("Press Ctrl+C to stop encoding...")
try:
while True:
h265Packet = q.get() # Blocking call, will wait until a new data has arrived
frametypes.append(frametype2str(h265Packet.getFrameType()))
h265Packet.getData().tofile(videoFile) # Appends the packet data to the opened file
except KeyboardInterrupt:
# Keyboard interrupt (Ctrl + C) detected
pass

print("To view the encoded data, convert the stream file (.h265) into a video file (.mp4) using a command below:")
print("ffmpeg -framerate 30 -i video.h265 -c copy video.mp4")

print(",".join([f"{c}{f}" for c, f in compress(frametypes)]))
3 changes: 3 additions & 0 deletions src/DatatypeBindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ void bind_featuretrackerconfig(pybind11::module& m, void* pCallstack);
void bind_imagemanipconfig(pybind11::module& m, void* pCallstack);
void bind_imgdetections(pybind11::module& m, void* pCallstack);
void bind_imgframe(pybind11::module& m, void* pCallstack);
void bind_encodedframe(pybind11::module& m, void* pCallstack);
void bind_imudata(pybind11::module& m, void* pCallstack);
void bind_nndata(pybind11::module& m, void* pCallstack);
void bind_spatialimgdetections(pybind11::module& m, void* pCallstack);
Expand All @@ -39,6 +40,7 @@ void DatatypeBindings::addToCallstack(std::deque<StackFunction>& callstack) {
callstack.push_front(bind_imagemanipconfig);
callstack.push_front(bind_imgdetections);
callstack.push_front(bind_imgframe);
callstack.push_front(bind_encodedframe);
callstack.push_front(bind_imudata);
callstack.push_front(bind_nndata);
callstack.push_front(bind_spatialimgdetections);
Expand Down Expand Up @@ -74,6 +76,7 @@ void DatatypeBindings::bind(pybind11::module& m, void* pCallstack){
datatypeEnum
.value("Buffer", DatatypeEnum::Buffer)
.value("ImgFrame", DatatypeEnum::ImgFrame)
.value("EncodedFrame", DatatypeEnum::EncodedFrame)
.value("NNData", DatatypeEnum::NNData)
.value("ImageManipConfig", DatatypeEnum::ImageManipConfig)
.value("CameraControl", DatatypeEnum::CameraControl)
Expand Down
135 changes: 135 additions & 0 deletions src/pipeline/datatype/EncodedFrameBindings.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#include "DatatypeBindings.hpp"
#include "depthai-shared/datatype/RawEncodedFrame.hpp"
#include "pipeline/CommonBindings.hpp"
#include <memory>
#include <unordered_map>

// depthai
#include "depthai/pipeline/datatype/EncodedFrame.hpp"

// pybind
#include <pybind11/chrono.h>
#include <pybind11/numpy.h>

void bind_encodedframe(pybind11::module &m, void *pCallstack) {

using namespace dai;

py::class_<RawEncodedFrame, RawBuffer, std::shared_ptr<RawEncodedFrame>>
rawEncodedFrame(m, "RawEncodedFrame", DOC(dai, RawEncodedFrame));
py::enum_<RawEncodedFrame::Profile> rawEncodedFrameProfile(rawEncodedFrame,
"Profile");
py::enum_<RawEncodedFrame::FrameType> rawEncodedFrameType(
rawEncodedFrame, "FrameType", DOC(dai, RawEncodedFrame, FrameType));
py::class_<EncodedFrame, Buffer, std::shared_ptr<EncodedFrame>> encodedFrame(
m, "EncodedFrame", DOC(dai, EncodedFrame));

///////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////
// Call the rest of the type defines, then perform the actual bindings
Callstack *callstack = (Callstack *)pCallstack;
auto cb = callstack->top();
callstack->pop();
cb(m, pCallstack);
// Actual bindings
///////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////

// Metadata / raw

rawEncodedFrame.def(py::init<>())
.def_readwrite("quality", &RawEncodedFrame::quality)
.def_readwrite("bitrate", &RawEncodedFrame::bitrate)
.def_readwrite("profile", &RawEncodedFrame::profile)
.def_readwrite("lossless", &RawEncodedFrame::lossless)
.def_readwrite("type", &RawEncodedFrame::type)
.def_readwrite("instanceNum", &RawEncodedFrame::instanceNum)
.def_readwrite("sequenceNum", &RawEncodedFrame::sequenceNum)
.def_property(
"ts",
[](const RawEncodedFrame &o) {
double ts = o.ts.sec + o.ts.nsec / 1000000000.0;
return ts;
},
[](RawEncodedFrame &o, double ts) {
o.ts.sec = ts;
o.ts.nsec = (ts - o.ts.sec) * 1000000000.0;
})
.def_property(
"tsDevice",
[](const RawEncodedFrame &o) {
double ts = o.tsDevice.sec + o.tsDevice.nsec / 1000000000.0;
return ts;
},
[](RawEncodedFrame &o, double ts) {
o.tsDevice.sec = ts;
o.tsDevice.nsec = (ts - o.tsDevice.sec) * 1000000000.0;
});

rawEncodedFrameProfile.value("JPEG", EncodedFrame::Profile::JPEG)
.value("AVC", EncodedFrame::Profile::AVC)
.value("HEVC", EncodedFrame::Profile::HEVC);

rawEncodedFrameType.value("I", EncodedFrame::FrameType::I)
.value("P", EncodedFrame::FrameType::P)
.value("B", EncodedFrame::FrameType::B)
.value("Unknown", EncodedFrame::FrameType::Unknown);

// Message
encodedFrame
.def(py::init<>())
// getters
.def("getTimestamp",
py::overload_cast<>(&EncodedFrame::getTimestamp, py::const_),
DOC(dai, EncodedFrame, getTimestamp))
.def("getTimestampDevice",
py::overload_cast<>(&EncodedFrame::getTimestampDevice, py::const_),
DOC(dai, EncodedFrame, getTimestampDevice))
.def("getInstanceNum", &EncodedFrame::getInstanceNum,
DOC(dai, EncodedFrame, getInstanceNum))
.def("getSequenceNum", &EncodedFrame::getSequenceNum,
DOC(dai, EncodedFrame, getSequenceNum))
.def("getExposureTime", &EncodedFrame::getExposureTime,
DOC(dai, EncodedFrame, getExposureTime))
.def("getSensitivity", &EncodedFrame::getSensitivity,
DOC(dai, EncodedFrame, getSensitivity))
.def("getColorTemperature", &EncodedFrame::getColorTemperature,
DOC(dai, EncodedFrame, getColorTemperature))
.def("getLensPosition", &EncodedFrame::getLensPosition,
DOC(dai, EncodedFrame, getLensPosition))
.def("getQuality", &EncodedFrame::getQuality,
DOC(dai, EncodedFrame, getQuality))
.def("getBitrate", &EncodedFrame::getBitrate,
DOC(dai, EncodedFrame, getBitrate))
.def("getFrameType", &EncodedFrame::getFrameType,
DOC(dai, EncodedFrame, getFrameType))
.def("getLossless", &EncodedFrame::getLossless,
DOC(dai, EncodedFrame, getLossless))
.def("getProfile", &EncodedFrame::getProfile,
DOC(dai, EncodedFrame, getProfile))

// setters
.def("setTimestamp", &EncodedFrame::setTimestamp,
DOC(dai, EncodedFrame, setTimestamp))
.def("setTimestampDevice", &EncodedFrame::setTimestampDevice,
DOC(dai, EncodedFrame, setTimestampDevice))
.def("setSequenceNum", &EncodedFrame::setSequenceNum,
DOC(dai, EncodedFrame, getSequenceNum))
.def("setQuality", &EncodedFrame::setQuality,
DOC(dai, EncodedFrame, getQuality))
.def("setBitrate", &EncodedFrame::setBitrate,
DOC(dai, EncodedFrame, getBitrate))
.def("setFrameType", &EncodedFrame::setFrameType,
DOC(dai, EncodedFrame, getFrameType))
.def("setLossless", &EncodedFrame::setLossless,
DOC(dai, EncodedFrame, getLossless))
.def("setProfile", &EncodedFrame::setProfile,
DOC(dai, EncodedFrame, getProfile));
// add aliases dai.ImgFrame.Type and dai.ImgFrame.Specs
m.attr("EncodedFrame").attr("FrameType") =
m.attr("RawEncodedFrame").attr("FrameType");
m.attr("EncodedFrame").attr("Profile") =
m.attr("RawEncodedFrame").attr("Profile");
}
1 change: 1 addition & 0 deletions src/pipeline/node/VideoEncoderBindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ void bind_videoencoder(pybind11::module& m, void* pCallstack){
videoEncoder
.def_readonly("input", &VideoEncoder::input, DOC(dai, node, VideoEncoder, input), DOC(dai, node, VideoEncoder, input))
.def_readonly("bitstream", &VideoEncoder::bitstream, DOC(dai, node, VideoEncoder, bitstream), DOC(dai, node, VideoEncoder, bitstream))
.def_readonly("out", &VideoEncoder::out, DOC(dai, node, VideoEncoder, out), DOC(dai, node, VideoEncoder, out))
.def("setDefaultProfilePreset", static_cast<void(VideoEncoder::*)(float, VideoEncoderProperties::Profile)>(&VideoEncoder::setDefaultProfilePreset), py::arg("fps"), py::arg("profile"), DOC(dai, node, VideoEncoder, setDefaultProfilePreset))
.def("setDefaultProfilePreset", [](VideoEncoder& v, int width, int height, float fps, VideoEncoderProperties::Profile profile){
PyErr_WarnEx(PyExc_DeprecationWarning, "Input width/height no longer needed, automatically determined from first frame", 1);
Expand Down

0 comments on commit f80ce27

Please sign in to comment.