From e508ceb72553b1abcff97cfb17f7a8967125de3e Mon Sep 17 00:00:00 2001 From: aler9 <46489434+aler9@users.noreply.github.com> Date: Mon, 21 Oct 2024 00:41:03 +0200 Subject: [PATCH] improve performance by using DMA buffers this speeds up text drawing and software encoding. --- camera.cpp | 89 +++++++++++++++++++++++++++++---------------- encoder.c | 4 +- encoder.h | 5 +-- encoder_hard_h264.c | 14 +++---- encoder_hard_h264.h | 6 ++- encoder_soft_h264.c | 6 +-- encoder_soft_h264.h | 4 +- main.c | 27 ++++++++++---- pipe.c | 8 ++-- pipe.h | 2 +- 10 files changed, 100 insertions(+), 65 deletions(-) diff --git a/camera.cpp b/camera.cpp index 9ecec38..78e3e0f 100644 --- a/camera.cpp +++ b/camera.cpp @@ -8,6 +8,9 @@ #include #include +#include +#include +#include #include #include #include @@ -16,7 +19,6 @@ #include #include #include -#include #include "camera.h" @@ -31,17 +33,35 @@ using libcamera::Orientation; using libcamera::PixelFormat; using libcamera::Rectangle; using libcamera::Request; +using libcamera::SharedFD; using libcamera::Size; using libcamera::Span; using libcamera::Stream; using libcamera::StreamRole; using libcamera::StreamConfiguration; using libcamera::Transform; +using libcamera::UniqueFD; namespace controls = libcamera::controls; namespace formats = libcamera::formats; namespace properties = libcamera::properties; +static const char *heap_positions[] = { + "/dev/dma_heap/vidbuf_cached", + "/dev/dma_heap/linux,cma", +}; + +// https://github.com/raspberrypi/rpicam-apps/blob/6de1ab6a899df35f929b2a15c0831780bd8e750e/core/dma_heaps.cpp +static int create_dma_allocator() { + for (unsigned int i = 0; i < sizeof(heap_positions); i++) { + int fd = open(heap_positions[i], O_RDWR | O_CLOEXEC, 0); + if (fd >= 0) { + return fd; + } + } + return -1; +} + static char errbuf[256]; static void set_error(const char *format, ...) { @@ -80,10 +100,10 @@ struct CameraPriv { std::unique_ptr camera_manager; std::shared_ptr camera; Stream *video_stream; - std::unique_ptr allocator; std::vector> requests; std::mutex ctrls_mutex; std::unique_ptr ctrls; + std::vector> frame_buffers; std::map mapped_buffers; bool ts_initialized; uint64_t ts_start; @@ -96,22 +116,6 @@ static int get_v4l2_colorspace(std::optional const &cs) { return V4L2_COLORSPACE_SMPTE170M; } -// https://github.com/raspberrypi/libcamera-apps/blob/a5b5506a132056ac48ba22bc581cc394456da339/core/libcamera_app.cpp#L824 -static uint8_t *map_buffer(FrameBuffer *buffer) { - size_t buffer_size = 0; - - for (unsigned i = 0; i < buffer->planes().size(); i++) { - const FrameBuffer::Plane &plane = buffer->planes()[i]; - buffer_size += plane.length; - - if (i == buffer->planes().size() - 1 || plane.fd.get() != buffer->planes()[i + 1].fd.get()) { - return (uint8_t *)mmap(NULL, buffer_size, PROT_READ | PROT_WRITE, MAP_SHARED, plane.fd.get(), 0); - } - } - - return NULL; -} - // https://github.com/raspberrypi/libcamera-apps/blob/a6267d51949d0602eedf60f3ddf8c6685f652812/core/options.cpp#L101 static void set_hdr(bool hdr) { bool ok = false; @@ -175,7 +179,7 @@ bool camera_create(const parameters_t *params, camera_frame_cb frame_cb, camera_ return false; } - std::vector stream_roles = { StreamRole::VideoRecording }; + std::vector stream_roles = { StreamRole::VideoRecording }; if (params->mode != NULL) { stream_roles.push_back(StreamRole::Raw); } @@ -187,7 +191,7 @@ bool camera_create(const parameters_t *params, camera_frame_cb frame_cb, camera_ } StreamConfiguration &video_stream_conf = conf->at(0); - video_stream_conf.size = libcamera::Size(params->width, params->height); + video_stream_conf.size = Size(params->width, params->height); video_stream_conf.pixelFormat = formats::YUV420; video_stream_conf.bufferCount = params->buffer_count; if (params->width >= 1280 || params->height >= 720) { @@ -234,24 +238,45 @@ bool camera_create(const parameters_t *params, camera_frame_cb frame_cb, camera_ camp->requests.push_back(std::move(request)); } - camp->allocator = std::make_unique(camp->camera); + // allocate DMA buffers manually instead of using default buffers provided by libcamera. + // this improves performance by a lot. + // https://forums.raspberrypi.com/viewtopic.php?t=352554 + // https://github.com/raspberrypi/rpicam-apps/blob/6de1ab6a899df35f929b2a15c0831780bd8e750e/core/rpicam_app.cpp#L1012 + + int allocator_fd = create_dma_allocator(); + if (allocator_fd < 0) { + set_error("failed to open dma heap allocator"); + return false; + } + for (StreamConfiguration &stream_conf : *conf) { Stream *stream = stream_conf.stream(); - res = camp->allocator->allocate(stream); - if (res < 0) { - set_error("allocate() failed"); - return false; - } + for (unsigned int i = 0; i < params->buffer_count; i++) { + struct dma_heap_allocation_data alloc = {}; + alloc.len = stream_conf.frameSize; + alloc.fd_flags = O_CLOEXEC | O_RDWR; + int ret = ioctl(allocator_fd, DMA_HEAP_IOCTL_ALLOC, &alloc); + if (ret < 0) { + set_error("failed to allocate buffer in dma heap"); + return false; + } + UniqueFD fd(alloc.fd); - int i = 0; - for (const std::unique_ptr &buffer : camp->allocator->buffers(stream)) { - // map buffer of the video stream only + std::vector plane(1); + plane[0].fd = SharedFD(std::move(fd)); + plane[0].offset = 0; + plane[0].length = stream_conf.frameSize; + + camp->frame_buffers.push_back(std::make_unique(plane)); + FrameBuffer *fb = camp->frame_buffers.back().get(); + + // map buffers of the video stream only if (stream == video_stream_conf.stream()) { - camp->mapped_buffers[buffer.get()] = map_buffer(buffer.get()); + camp->mapped_buffers[fb] = (uint8_t*)mmap(NULL, stream_conf.frameSize, PROT_READ | PROT_WRITE, MAP_SHARED, plane[0].fd.get(), 0); } - res = camp->requests.at(i++)->addBuffer(stream, buffer.get()); + res = camp->requests.at(i)->addBuffer(stream, fb); if (res != 0) { set_error("addBuffer() failed"); return false; @@ -259,6 +284,8 @@ bool camera_create(const parameters_t *params, camera_frame_cb frame_cb, camera_ } } + close(allocator_fd); + camp->params = params; camp->frame_cb = frame_cb; *cam = camp.release(); diff --git a/encoder.c b/encoder.c index 1d48cc1..266709f 100644 --- a/encoder.c +++ b/encoder.c @@ -13,8 +13,6 @@ #include "encoder_soft_h264.h" #include "encoder.h" -#define HARDWARE_DEVICE "/dev/video11" - static char errbuf[256]; static void set_error(const char *format, ...) { @@ -38,7 +36,7 @@ typedef struct { } encoder_priv_t; static bool supports_hardware_h264() { - int fd = open(HARDWARE_DEVICE, O_RDWR, 0); + int fd = open(ENCODER_HARD_H264_DEVICE, O_RDWR, 0); if (fd < 0) { return false; } diff --git a/encoder.h b/encoder.h index 6f6e7af..713cc99 100644 --- a/encoder.h +++ b/encoder.h @@ -5,10 +5,7 @@ typedef void encoder_t; -typedef void (*encoder_output_cb)( - uint64_t ts, - const uint8_t *buf, - uint64_t size); +typedef void (*encoder_output_cb)(const uint8_t *mapped, uint64_t size, uint64_t ts); const char *encoder_get_error(); bool encoder_create(const parameters_t *params, int stride, int colorspace, encoder_output_cb output_cb, encoder_t **enc); diff --git a/encoder_hard_h264.c b/encoder_hard_h264.c index 6309b24..2c3e022 100644 --- a/encoder_hard_h264.c +++ b/encoder_hard_h264.c @@ -15,8 +15,6 @@ #include "encoder_hard_h264.h" -#define DEVICE "/dev/video11" - static char errbuf[256]; static void set_error(const char *format, ...) { @@ -61,11 +59,11 @@ static void *output_thread(void *userdata) { exit(1); } + const uint8_t *mapped = (const uint8_t *)encp->capture_buffers[buf.index]; + int size = buf.m.planes[0].bytesused; uint64_t ts = ((uint64_t)buf.timestamp.tv_sec * (uint64_t)1000000) + (uint64_t)buf.timestamp.tv_usec; - const uint8_t *buf_mem = (const uint8_t *)encp->capture_buffers[buf.index]; - int buf_size = buf.m.planes[0].bytesused; - encp->output_cb(ts, buf_mem, buf_size); + encp->output_cb(mapped, size, ts); res = ioctl(encp->fd, VIDIOC_QBUF, &buf); if (res != 0) { @@ -103,7 +101,7 @@ bool encoder_hard_h264_create(const parameters_t *params, int stride, int colors encoder_hard_h264_priv_t *encp = (encoder_hard_h264_priv_t *)(*enc); memset(encp, 0, sizeof(encoder_hard_h264_priv_t)); - encp->fd = open(DEVICE, O_RDWR, 0); + encp->fd = open(ENCODER_HARD_H264_DEVICE, O_RDWR, 0); if (encp->fd < 0) { set_error("unable to open device"); goto failed; @@ -266,7 +264,7 @@ bool encoder_hard_h264_create(const parameters_t *params, int stride, int colors return false; } -void encoder_hard_h264_encode(encoder_hard_h264_t *enc, uint8_t *mapped_buffer, int buffer_fd, size_t size, uint64_t ts) { +void encoder_hard_h264_encode(encoder_hard_h264_t *enc, uint8_t *mapped, int fd, size_t size, uint64_t ts) { encoder_hard_h264_priv_t *encp = (encoder_hard_h264_priv_t *)enc; int index = encp->cur_buffer++; @@ -282,7 +280,7 @@ void encoder_hard_h264_encode(encoder_hard_h264_t *enc, uint8_t *mapped_buffer, buf.timestamp.tv_sec = ts / 1000000; buf.timestamp.tv_usec = ts % 1000000; buf.m.planes = planes; - buf.m.planes[0].m.fd = buffer_fd; + buf.m.planes[0].m.fd = fd; buf.m.planes[0].bytesused = size; buf.m.planes[0].length = size; int res = ioctl(encp->fd, VIDIOC_QBUF, &buf); diff --git a/encoder_hard_h264.h b/encoder_hard_h264.h index ef2c9b6..dfaa92d 100644 --- a/encoder_hard_h264.h +++ b/encoder_hard_h264.h @@ -3,13 +3,15 @@ #include "parameters.h" +#define ENCODER_HARD_H264_DEVICE "/dev/video11" + typedef void encoder_hard_h264_t; -typedef void (*encoder_hard_h264_output_cb)(uint64_t ts, const uint8_t *buf, uint64_t size); +typedef void (*encoder_hard_h264_output_cb)(const uint8_t *mapped, uint64_t size, uint64_t ts); const char *encoder_hard_h264_get_error(); bool encoder_hard_h264_create(const parameters_t *params, int stride, int colorspace, encoder_hard_h264_output_cb output_cb, encoder_hard_h264_t **enc); -void encoder_hard_h264_encode(encoder_hard_h264_t *enc, uint8_t *mapped_buffer, int buffer_fd, size_t size, uint64_t ts); +void encoder_hard_h264_encode(encoder_hard_h264_t *enc, uint8_t *mapped, int fd, size_t size, uint64_t ts); void encoder_hard_h264_reload_params(encoder_hard_h264_t *enc, const parameters_t *params); #endif diff --git a/encoder_soft_h264.c b/encoder_soft_h264.c index 1e5ea00..96f031c 100644 --- a/encoder_soft_h264.c +++ b/encoder_soft_h264.c @@ -97,10 +97,10 @@ bool encoder_soft_h264_create(const parameters_t *params, int stride, int colors return false; } -void encoder_soft_h264_encode(encoder_soft_h264_t *enc, uint8_t *mapped_buffer, int buffer_fd, size_t size, uint64_t ts) { +void encoder_soft_h264_encode(encoder_soft_h264_t *enc, uint8_t *mapped, int fd, size_t size, uint64_t ts) { encoder_soft_h264_priv_t *encp = (encoder_soft_h264_priv_t *)enc; - encp->x_pic_in.img.plane[0] = mapped_buffer; // Y + encp->x_pic_in.img.plane[0] = mapped; // Y encp->x_pic_in.img.plane[1] = encp->x_pic_in.img.plane[0] + encp->x_pic_in.img.i_stride[0] * encp->params->height; // U encp->x_pic_in.img.plane[2] = encp->x_pic_in.img.plane[1] + (encp->x_pic_in.img.i_stride[0] / 2) * (encp->params->height / 2); // V encp->x_pic_in.i_pts = encp->next_pts++; @@ -113,7 +113,7 @@ void encoder_soft_h264_encode(encoder_soft_h264_t *enc, uint8_t *mapped_buffer, pthread_mutex_unlock(&encp->mutex); - encp->output_cb(ts, nal->p_payload, frame_size); + encp->output_cb(nal->p_payload, frame_size, ts); } void encoder_soft_h264_reload_params(encoder_soft_h264_t *enc, const parameters_t *params) { diff --git a/encoder_soft_h264.h b/encoder_soft_h264.h index 0e74e56..60c8834 100644 --- a/encoder_soft_h264.h +++ b/encoder_soft_h264.h @@ -5,11 +5,11 @@ typedef void encoder_soft_h264_t; -typedef void (*encoder_soft_h264_output_cb)(uint64_t ts, const uint8_t *buf, uint64_t size); +typedef void (*encoder_soft_h264_output_cb)(const uint8_t *mapped, uint64_t size, uint64_t ts); const char *encoder_soft_h264_get_error(); bool encoder_soft_h264_create(const parameters_t *params, int stride, int colorspace, encoder_soft_h264_output_cb output_cb, encoder_soft_h264_t **enc); -void encoder_soft_h264_encode(encoder_soft_h264_t *enc, uint8_t *mapped_buffer, int buffer_fd, size_t size, uint64_t ts); +void encoder_soft_h264_encode(encoder_soft_h264_t *enc, uint8_t *mapped, int fd, size_t size, uint64_t ts); void encoder_soft_h264_reload_params(encoder_soft_h264_t *enc, const parameters_t *params); #endif diff --git a/main.c b/main.c index c3337d1..110879f 100644 --- a/main.c +++ b/main.c @@ -5,6 +5,9 @@ #include #include #include +#include + +#include #include "parameters.h" #include "pipe.h" @@ -18,17 +21,27 @@ static text_t *text; static encoder_t *enc; static void on_frame( - uint8_t *mapped_buffer, - int buffer_fd, + uint8_t *mapped, + int fd, uint64_t size, - uint64_t timestamp) { - text_draw(text, mapped_buffer); - encoder_encode(enc, mapped_buffer, buffer_fd, size, timestamp); + uint64_t ts) { + // mapped DMA buffers require a DMA_BUF_IOCTL_SYNC before and after usage. + // https://forums.raspberrypi.com/viewtopic.php?t=352554 + struct dma_buf_sync dma_sync = {0}; + dma_sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_RW; + ioctl(fd, DMA_BUF_IOCTL_SYNC, &dma_sync); + + text_draw(text, mapped); + + dma_sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_RW; + ioctl(fd, DMA_BUF_IOCTL_SYNC, &dma_sync); + + encoder_encode(enc, mapped, fd, size, ts); } -static void on_encoder_output(uint64_t ts, const uint8_t *buf, uint64_t size) { +static void on_encoder_output(const uint8_t *mapped, uint64_t size, uint64_t ts) { pthread_mutex_lock(&pipe_video_mutex); - pipe_write_buf(pipe_video_fd, ts, buf, size); + pipe_write_buf(pipe_video_fd, mapped, size, ts); pthread_mutex_unlock(&pipe_video_mutex); } diff --git a/pipe.c b/pipe.c index 38d2437..fd43d41 100644 --- a/pipe.c +++ b/pipe.c @@ -25,13 +25,13 @@ void pipe_write_ready(int fd) { write(fd, buf, n); } -void pipe_write_buf(int fd, uint64_t ts, const uint8_t *buf, uint32_t n) { +void pipe_write_buf(int fd, const uint8_t *mapped, uint32_t size, uint64_t ts) { char head[] = {'b'}; - n += 1 + sizeof(uint64_t); - write(fd, &n, 4); + size += 1 + sizeof(uint64_t); + write(fd, &size, 4); write(fd, head, 1); write(fd, &ts, sizeof(uint64_t)); - write(fd, buf, n - 1 - sizeof(uint64_t)); + write(fd, mapped, size - 1 - sizeof(uint64_t)); } uint32_t pipe_read(int fd, uint8_t **pbuf) { diff --git a/pipe.h b/pipe.h index 2663045..83b76b5 100644 --- a/pipe.h +++ b/pipe.h @@ -6,7 +6,7 @@ void pipe_write_error(int fd, const char *format, ...); void pipe_write_ready(int fd); -void pipe_write_buf(int fd, uint64_t ts, const uint8_t *buf, uint32_t n); +void pipe_write_buf(int fd, const uint8_t *mapped, uint32_t size, uint64_t ts); uint32_t pipe_read(int fd, uint8_t **pbuf); #endif