Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve performance by using DMA buffers #29

Merged
merged 1 commit into from
Oct 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 58 additions & 31 deletions camera.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
#include <fcntl.h>
#include <unistd.h>

#include <linux/videodev2.h>
#include <linux/dma-buf.h>
#include <linux/dma-heap.h>
#include <libcamera/camera_manager.h>
#include <libcamera/camera.h>
#include <libcamera/formats.h>
Expand All @@ -16,7 +19,6 @@
#include <libcamera/framebuffer_allocator.h>
#include <libcamera/property_ids.h>
#include <libcamera/transform.h>
#include <linux/videodev2.h>

#include "camera.h"

Expand All @@ -31,17 +33,35 @@ using libcamera::Orientation;
using libcamera::PixelFormat;
using libcamera::Rectangle;
using libcamera::Request;
using libcamera::SharedFD;
using libcamera::Size;
using libcamera::Span;
using libcamera::Stream;
using libcamera::StreamRole;
using libcamera::StreamConfiguration;
using libcamera::Transform;
using libcamera::UniqueFD;

namespace controls = libcamera::controls;
namespace formats = libcamera::formats;
namespace properties = libcamera::properties;

static const char *heap_positions[] = {
"/dev/dma_heap/vidbuf_cached",
"/dev/dma_heap/linux,cma",
};

// https://github.com/raspberrypi/rpicam-apps/blob/6de1ab6a899df35f929b2a15c0831780bd8e750e/core/dma_heaps.cpp
static int create_dma_allocator() {
for (unsigned int i = 0; i < sizeof(heap_positions); i++) {
int fd = open(heap_positions[i], O_RDWR | O_CLOEXEC, 0);
if (fd >= 0) {
return fd;
}
}
return -1;
}

static char errbuf[256];

static void set_error(const char *format, ...) {
Expand Down Expand Up @@ -80,10 +100,10 @@ struct CameraPriv {
std::unique_ptr<CameraManager> camera_manager;
std::shared_ptr<Camera> camera;
Stream *video_stream;
std::unique_ptr<FrameBufferAllocator> allocator;
std::vector<std::unique_ptr<Request>> requests;
std::mutex ctrls_mutex;
std::unique_ptr<ControlList> ctrls;
std::vector<std::unique_ptr<FrameBuffer>> frame_buffers;
std::map<FrameBuffer *, uint8_t *> mapped_buffers;
bool ts_initialized;
uint64_t ts_start;
Expand All @@ -96,22 +116,6 @@ static int get_v4l2_colorspace(std::optional<ColorSpace> const &cs) {
return V4L2_COLORSPACE_SMPTE170M;
}

// https://github.com/raspberrypi/libcamera-apps/blob/a5b5506a132056ac48ba22bc581cc394456da339/core/libcamera_app.cpp#L824
static uint8_t *map_buffer(FrameBuffer *buffer) {
size_t buffer_size = 0;

for (unsigned i = 0; i < buffer->planes().size(); i++) {
const FrameBuffer::Plane &plane = buffer->planes()[i];
buffer_size += plane.length;

if (i == buffer->planes().size() - 1 || plane.fd.get() != buffer->planes()[i + 1].fd.get()) {
return (uint8_t *)mmap(NULL, buffer_size, PROT_READ | PROT_WRITE, MAP_SHARED, plane.fd.get(), 0);
}
}

return NULL;
}

// https://github.com/raspberrypi/libcamera-apps/blob/a6267d51949d0602eedf60f3ddf8c6685f652812/core/options.cpp#L101
static void set_hdr(bool hdr) {
bool ok = false;
Expand Down Expand Up @@ -175,7 +179,7 @@ bool camera_create(const parameters_t *params, camera_frame_cb frame_cb, camera_
return false;
}

std::vector<libcamera::StreamRole> stream_roles = { StreamRole::VideoRecording };
std::vector<StreamRole> stream_roles = { StreamRole::VideoRecording };
if (params->mode != NULL) {
stream_roles.push_back(StreamRole::Raw);
}
Expand All @@ -187,7 +191,7 @@ bool camera_create(const parameters_t *params, camera_frame_cb frame_cb, camera_
}

StreamConfiguration &video_stream_conf = conf->at(0);
video_stream_conf.size = libcamera::Size(params->width, params->height);
video_stream_conf.size = Size(params->width, params->height);
video_stream_conf.pixelFormat = formats::YUV420;
video_stream_conf.bufferCount = params->buffer_count;
if (params->width >= 1280 || params->height >= 720) {
Expand Down Expand Up @@ -234,31 +238,54 @@ bool camera_create(const parameters_t *params, camera_frame_cb frame_cb, camera_
camp->requests.push_back(std::move(request));
}

camp->allocator = std::make_unique<FrameBufferAllocator>(camp->camera);
// allocate DMA buffers manually instead of using default buffers provided by libcamera.
// this improves performance by a lot.
// https://forums.raspberrypi.com/viewtopic.php?t=352554
// https://github.com/raspberrypi/rpicam-apps/blob/6de1ab6a899df35f929b2a15c0831780bd8e750e/core/rpicam_app.cpp#L1012

int allocator_fd = create_dma_allocator();
if (allocator_fd < 0) {
set_error("failed to open dma heap allocator");
return false;
}

for (StreamConfiguration &stream_conf : *conf) {
Stream *stream = stream_conf.stream();

res = camp->allocator->allocate(stream);
if (res < 0) {
set_error("allocate() failed");
return false;
}
for (unsigned int i = 0; i < params->buffer_count; i++) {
struct dma_heap_allocation_data alloc = {};
alloc.len = stream_conf.frameSize;
alloc.fd_flags = O_CLOEXEC | O_RDWR;
int ret = ioctl(allocator_fd, DMA_HEAP_IOCTL_ALLOC, &alloc);
if (ret < 0) {
set_error("failed to allocate buffer in dma heap");
return false;
}
UniqueFD fd(alloc.fd);

int i = 0;
for (const std::unique_ptr<FrameBuffer> &buffer : camp->allocator->buffers(stream)) {
// map buffer of the video stream only
std::vector<FrameBuffer::Plane> plane(1);
plane[0].fd = SharedFD(std::move(fd));
plane[0].offset = 0;
plane[0].length = stream_conf.frameSize;

camp->frame_buffers.push_back(std::make_unique<FrameBuffer>(plane));
FrameBuffer *fb = camp->frame_buffers.back().get();

// map buffers of the video stream only
if (stream == video_stream_conf.stream()) {
camp->mapped_buffers[buffer.get()] = map_buffer(buffer.get());
camp->mapped_buffers[fb] = (uint8_t*)mmap(NULL, stream_conf.frameSize, PROT_READ | PROT_WRITE, MAP_SHARED, plane[0].fd.get(), 0);
}

res = camp->requests.at(i++)->addBuffer(stream, buffer.get());
res = camp->requests.at(i)->addBuffer(stream, fb);
if (res != 0) {
set_error("addBuffer() failed");
return false;
}
}
}

close(allocator_fd);

camp->params = params;
camp->frame_cb = frame_cb;
*cam = camp.release();
Expand Down
4 changes: 1 addition & 3 deletions encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
#include "encoder_soft_h264.h"
#include "encoder.h"

#define HARDWARE_DEVICE "/dev/video11"

static char errbuf[256];

static void set_error(const char *format, ...) {
Expand All @@ -38,7 +36,7 @@ typedef struct {
} encoder_priv_t;

static bool supports_hardware_h264() {
int fd = open(HARDWARE_DEVICE, O_RDWR, 0);
int fd = open(ENCODER_HARD_H264_DEVICE, O_RDWR, 0);
if (fd < 0) {
return false;
}
Expand Down
5 changes: 1 addition & 4 deletions encoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@

typedef void encoder_t;

typedef void (*encoder_output_cb)(
uint64_t ts,
const uint8_t *buf,
uint64_t size);
typedef void (*encoder_output_cb)(const uint8_t *mapped, uint64_t size, uint64_t ts);

const char *encoder_get_error();
bool encoder_create(const parameters_t *params, int stride, int colorspace, encoder_output_cb output_cb, encoder_t **enc);
Expand Down
14 changes: 6 additions & 8 deletions encoder_hard_h264.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@

#include "encoder_hard_h264.h"

#define DEVICE "/dev/video11"

static char errbuf[256];

static void set_error(const char *format, ...) {
Expand Down Expand Up @@ -61,11 +59,11 @@ static void *output_thread(void *userdata) {
exit(1);
}

const uint8_t *mapped = (const uint8_t *)encp->capture_buffers[buf.index];
int size = buf.m.planes[0].bytesused;
uint64_t ts = ((uint64_t)buf.timestamp.tv_sec * (uint64_t)1000000) + (uint64_t)buf.timestamp.tv_usec;

const uint8_t *buf_mem = (const uint8_t *)encp->capture_buffers[buf.index];
int buf_size = buf.m.planes[0].bytesused;
encp->output_cb(ts, buf_mem, buf_size);
encp->output_cb(mapped, size, ts);

res = ioctl(encp->fd, VIDIOC_QBUF, &buf);
if (res != 0) {
Expand Down Expand Up @@ -103,7 +101,7 @@ bool encoder_hard_h264_create(const parameters_t *params, int stride, int colors
encoder_hard_h264_priv_t *encp = (encoder_hard_h264_priv_t *)(*enc);
memset(encp, 0, sizeof(encoder_hard_h264_priv_t));

encp->fd = open(DEVICE, O_RDWR, 0);
encp->fd = open(ENCODER_HARD_H264_DEVICE, O_RDWR, 0);
if (encp->fd < 0) {
set_error("unable to open device");
goto failed;
Expand Down Expand Up @@ -266,7 +264,7 @@ bool encoder_hard_h264_create(const parameters_t *params, int stride, int colors
return false;
}

void encoder_hard_h264_encode(encoder_hard_h264_t *enc, uint8_t *mapped_buffer, int buffer_fd, size_t size, uint64_t ts) {
void encoder_hard_h264_encode(encoder_hard_h264_t *enc, uint8_t *mapped, int fd, size_t size, uint64_t ts) {
encoder_hard_h264_priv_t *encp = (encoder_hard_h264_priv_t *)enc;

int index = encp->cur_buffer++;
Expand All @@ -282,7 +280,7 @@ void encoder_hard_h264_encode(encoder_hard_h264_t *enc, uint8_t *mapped_buffer,
buf.timestamp.tv_sec = ts / 1000000;
buf.timestamp.tv_usec = ts % 1000000;
buf.m.planes = planes;
buf.m.planes[0].m.fd = buffer_fd;
buf.m.planes[0].m.fd = fd;
buf.m.planes[0].bytesused = size;
buf.m.planes[0].length = size;
int res = ioctl(encp->fd, VIDIOC_QBUF, &buf);
Expand Down
6 changes: 4 additions & 2 deletions encoder_hard_h264.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@

#include "parameters.h"

#define ENCODER_HARD_H264_DEVICE "/dev/video11"

typedef void encoder_hard_h264_t;

typedef void (*encoder_hard_h264_output_cb)(uint64_t ts, const uint8_t *buf, uint64_t size);
typedef void (*encoder_hard_h264_output_cb)(const uint8_t *mapped, uint64_t size, uint64_t ts);

const char *encoder_hard_h264_get_error();
bool encoder_hard_h264_create(const parameters_t *params, int stride, int colorspace, encoder_hard_h264_output_cb output_cb, encoder_hard_h264_t **enc);
void encoder_hard_h264_encode(encoder_hard_h264_t *enc, uint8_t *mapped_buffer, int buffer_fd, size_t size, uint64_t ts);
void encoder_hard_h264_encode(encoder_hard_h264_t *enc, uint8_t *mapped, int fd, size_t size, uint64_t ts);
void encoder_hard_h264_reload_params(encoder_hard_h264_t *enc, const parameters_t *params);

#endif
6 changes: 3 additions & 3 deletions encoder_soft_h264.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,10 @@ bool encoder_soft_h264_create(const parameters_t *params, int stride, int colors
return false;
}

void encoder_soft_h264_encode(encoder_soft_h264_t *enc, uint8_t *mapped_buffer, int buffer_fd, size_t size, uint64_t ts) {
void encoder_soft_h264_encode(encoder_soft_h264_t *enc, uint8_t *mapped, int fd, size_t size, uint64_t ts) {
encoder_soft_h264_priv_t *encp = (encoder_soft_h264_priv_t *)enc;

encp->x_pic_in.img.plane[0] = mapped_buffer; // Y
encp->x_pic_in.img.plane[0] = mapped; // Y
encp->x_pic_in.img.plane[1] = encp->x_pic_in.img.plane[0] + encp->x_pic_in.img.i_stride[0] * encp->params->height; // U
encp->x_pic_in.img.plane[2] = encp->x_pic_in.img.plane[1] + (encp->x_pic_in.img.i_stride[0] / 2) * (encp->params->height / 2); // V
encp->x_pic_in.i_pts = encp->next_pts++;
Expand All @@ -113,7 +113,7 @@ void encoder_soft_h264_encode(encoder_soft_h264_t *enc, uint8_t *mapped_buffer,

pthread_mutex_unlock(&encp->mutex);

encp->output_cb(ts, nal->p_payload, frame_size);
encp->output_cb(nal->p_payload, frame_size, ts);
}

void encoder_soft_h264_reload_params(encoder_soft_h264_t *enc, const parameters_t *params) {
Expand Down
4 changes: 2 additions & 2 deletions encoder_soft_h264.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@

typedef void encoder_soft_h264_t;

typedef void (*encoder_soft_h264_output_cb)(uint64_t ts, const uint8_t *buf, uint64_t size);
typedef void (*encoder_soft_h264_output_cb)(const uint8_t *mapped, uint64_t size, uint64_t ts);

const char *encoder_soft_h264_get_error();
bool encoder_soft_h264_create(const parameters_t *params, int stride, int colorspace, encoder_soft_h264_output_cb output_cb, encoder_soft_h264_t **enc);
void encoder_soft_h264_encode(encoder_soft_h264_t *enc, uint8_t *mapped_buffer, int buffer_fd, size_t size, uint64_t ts);
void encoder_soft_h264_encode(encoder_soft_h264_t *enc, uint8_t *mapped, int fd, size_t size, uint64_t ts);
void encoder_soft_h264_reload_params(encoder_soft_h264_t *enc, const parameters_t *params);

#endif
27 changes: 20 additions & 7 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <sys/ioctl.h>

#include <linux/dma-buf.h>

#include "parameters.h"
#include "pipe.h"
Expand All @@ -18,17 +21,27 @@ static text_t *text;
static encoder_t *enc;

static void on_frame(
uint8_t *mapped_buffer,
int buffer_fd,
uint8_t *mapped,
int fd,
uint64_t size,
uint64_t timestamp) {
text_draw(text, mapped_buffer);
encoder_encode(enc, mapped_buffer, buffer_fd, size, timestamp);
uint64_t ts) {
// mapped DMA buffers require a DMA_BUF_IOCTL_SYNC before and after usage.
// https://forums.raspberrypi.com/viewtopic.php?t=352554
struct dma_buf_sync dma_sync = {0};
dma_sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_RW;
ioctl(fd, DMA_BUF_IOCTL_SYNC, &dma_sync);

text_draw(text, mapped);

dma_sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_RW;
ioctl(fd, DMA_BUF_IOCTL_SYNC, &dma_sync);

encoder_encode(enc, mapped, fd, size, ts);
}

static void on_encoder_output(uint64_t ts, const uint8_t *buf, uint64_t size) {
static void on_encoder_output(const uint8_t *mapped, uint64_t size, uint64_t ts) {
pthread_mutex_lock(&pipe_video_mutex);
pipe_write_buf(pipe_video_fd, ts, buf, size);
pipe_write_buf(pipe_video_fd, mapped, size, ts);
pthread_mutex_unlock(&pipe_video_mutex);
}

Expand Down
8 changes: 4 additions & 4 deletions pipe.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ void pipe_write_ready(int fd) {
write(fd, buf, n);
}

void pipe_write_buf(int fd, uint64_t ts, const uint8_t *buf, uint32_t n) {
void pipe_write_buf(int fd, const uint8_t *mapped, uint32_t size, uint64_t ts) {
char head[] = {'b'};
n += 1 + sizeof(uint64_t);
write(fd, &n, 4);
size += 1 + sizeof(uint64_t);
write(fd, &size, 4);
write(fd, head, 1);
write(fd, &ts, sizeof(uint64_t));
write(fd, buf, n - 1 - sizeof(uint64_t));
write(fd, mapped, size - 1 - sizeof(uint64_t));
}

uint32_t pipe_read(int fd, uint8_t **pbuf) {
Expand Down
2 changes: 1 addition & 1 deletion pipe.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

void pipe_write_error(int fd, const char *format, ...);
void pipe_write_ready(int fd);
void pipe_write_buf(int fd, uint64_t ts, const uint8_t *buf, uint32_t n);
void pipe_write_buf(int fd, const uint8_t *mapped, uint32_t size, uint64_t ts);
uint32_t pipe_read(int fd, uint8_t **pbuf);

#endif