Skip to content

Commit

Permalink
working converter
Browse files Browse the repository at this point in the history
  • Loading branch information
mickel8 committed Aug 2, 2024
1 parent 407330e commit 09f0db6
Show file tree
Hide file tree
Showing 14 changed files with 198 additions and 113 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ XAV_SO = $(PRIV_DIR)/libxav.so
# uncomment to compile with debug logs
# XAV_DEBUG_LOGS = -DXAV_DEBUG=1

HEADERS = $(XAV_DIR)/reader.h $(XAV_DIR)/decoder.h $(XAV_DIR)/utils.h
SOURCES = $(XAV_DIR)/xav_nif.c $(XAV_DIR)/reader.c $(XAV_DIR)/decoder.c $(XAV_DIR)/utils.c
HEADERS = $(XAV_DIR)/reader.h $(XAV_DIR)/decoder.h $(XAV_DIR)/converter.h $(XAV_DIR)/utils.h
SOURCES = $(XAV_DIR)/xav_nif.c $(XAV_DIR)/reader.c $(XAV_DIR)/decoder.c $(XAV_DIR)/converter.c $(XAV_DIR)/utils.c

CFLAGS = $(XAV_DEBUG_LOGS) -fPIC -shared
IFLAGS = -I$(ERTS_INCLUDE_DIR) -I$(XAV_DIR)
Expand Down
66 changes: 66 additions & 0 deletions c_src/xav/converter.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#include "converter.h"
#include <libavutil/channel_layout.h>
#include <libavutil/opt.h>
#include <libavutil/samplefmt.h>
#include <libswresample/swresample.h>
#include <stdint.h>

#include "utils.h"

int converter_init(struct Converter *c, AVChannelLayout in_chlayout, int in_sample_rate,
enum AVSampleFormat in_sample_fmt, AVChannelLayout out_chlayout,
int out_sample_rate, enum AVSampleFormat out_sample_fmt) {
c->swr_ctx = swr_alloc();
c->in_sample_rate = in_sample_rate;
c->out_sample_rate = out_sample_rate;
c->out_chlayout = out_chlayout;
c->out_sample_fmt = out_sample_fmt;

av_opt_set_chlayout(c->swr_ctx, "in_chlayout", &in_chlayout, 0);
av_opt_set_chlayout(c->swr_ctx, "out_chlayout", &out_chlayout, 0);

av_opt_set_int(c->swr_ctx, "in_sample_rate", in_sample_rate, 0);
av_opt_set_int(c->swr_ctx, "out_sample_rate", out_sample_rate, 0);

av_opt_set_sample_fmt(c->swr_ctx, "in_sample_fmt", in_sample_fmt, 0);
av_opt_set_sample_fmt(c->swr_ctx, "out_sample_fmt", out_sample_fmt, 0);

return swr_init(c->swr_ctx);
}

int converter_convert(struct Converter *c, AVFrame *src_frame, uint8_t ***out_data,
int *out_samples, int *out_size) {
uint8_t **out_data_tmp = NULL;
int max_out_nb_samples = swr_get_out_samples(c->swr_ctx, src_frame->nb_samples);
int out_bytes_per_sample = av_get_bytes_per_sample(c->out_sample_fmt);

// Some parts of ffmpeg require buffers to by divisible by 32
// to use fast/aligned SIMD routines - this is what align option is used for.
// See https://stackoverflow.com/questions/35678041/what-is-linesize-alignment-meaning
// Because we return the binary straight to the Erlang, we can disable it.
int ret = av_samples_alloc_array_and_samples(&out_data_tmp, NULL, c->out_chlayout.nb_channels,
max_out_nb_samples, c->out_sample_fmt, 1);

if (ret < 0) {
XAV_LOG_DEBUG("Couldn't allocate array for out samples.");
return ret;
}

*out_data = out_data_tmp;

*out_samples = swr_convert(c->swr_ctx, out_data_tmp, max_out_nb_samples,
(const uint8_t **)src_frame->data, src_frame->nb_samples);

if (*out_samples < 0) {
XAV_LOG_DEBUG("Couldn't convert samples: %d", *out_samples);
return -1;
}

XAV_LOG_DEBUG("Converted %d samples per channel", *out_samples);

*out_size = *out_samples * out_bytes_per_sample * c->out_chlayout.nb_channels;

return 0;
}

void converter_free(struct Converter *c) { swr_free(&c->swr_ctx); }
21 changes: 21 additions & 0 deletions c_src/xav/converter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#ifndef CONVERTER_H
#define CONVERTER_H
#include <libavutil/channel_layout.h>
#include <libswresample/swresample.h>
#include <stdint.h>

struct Converter {
SwrContext *swr_ctx;
int64_t in_sample_rate;
int64_t out_sample_rate;
AVChannelLayout out_chlayout;
enum AVSampleFormat out_sample_fmt;
};

int converter_init(struct Converter *c, AVChannelLayout in_chlayout, int in_sample_rate,
enum AVSampleFormat in_sample_fmt, AVChannelLayout out_chlaout,
int out_sample_rate, enum AVSampleFormat out_sample_fmt);
int converter_convert(struct Converter *c, AVFrame *src_frame, uint8_t ***out_data,
int *out_samples, int *out_size);
void converter_free(struct Converter *converter);
#endif
34 changes: 17 additions & 17 deletions c_src/xav/decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

int decoder_init(struct Decoder *decoder, const char *codec) {
decoder->swr_ctx = NULL;
decoder->out_data = NULL;

if (strcmp(codec, "opus") == 0) {
decoder->media_type = AVMEDIA_TYPE_AUDIO;
Expand Down Expand Up @@ -30,6 +31,19 @@ int decoder_init(struct Decoder *decoder, const char *codec) {
return -1;
}

if (decoder->media_type == AVMEDIA_TYPE_AUDIO) {
AVChannelLayout out_chlayout = decoder->c->ch_layout;
int out_sample_rate = decoder->c->sample_rate;
enum AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_FLT;

int ret = converter_init(&decoder->converter, decoder->c->ch_layout, decoder->c->sample_rate,
decoder->c->sample_fmt, out_chlayout, out_sample_rate, out_sample_fmt);

if (ret < 0) {
return ret;
}
}

return 0;
}

Expand Down Expand Up @@ -59,23 +73,9 @@ int decoder_decode(struct Decoder *decoder, AVPacket *pkt, AVFrame *frame) {
decoder->frame_data = frame->data;
decoder->frame_linesize = frame->linesize;
}
} else if (decoder->media_type == AVMEDIA_TYPE_AUDIO &&
av_sample_fmt_is_planar(frame->format) == 1) {
if (decoder->swr_ctx == NULL) {
if (init_swr_ctx_from_frame(&decoder->swr_ctx, frame) != 0) {
return -1;
}
}

if (convert_to_interleaved(decoder->swr_ctx, frame, decoder->rgb_dst_data,
decoder->rgb_dst_linesize) != 0) {
return -1;
}

decoder->frame_data = decoder->rgb_dst_data;
decoder->frame_linesize = decoder->rgb_dst_linesize;
} else {
decoder->frame_data = frame->extended_data;
} else if (decoder->media_type == AVMEDIA_TYPE_AUDIO) {
return converter_convert(&decoder->converter, frame, &decoder->out_data, &decoder->out_samples,
&decoder->out_size);
}

return 0;
Expand Down
12 changes: 12 additions & 0 deletions c_src/xav/decoder.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include <libavcodec/avcodec.h>
#include <libswresample/swresample.h>

#include "converter.h"

struct Decoder {
enum AVMediaType media_type;
const AVCodec *codec;
Expand All @@ -14,6 +16,16 @@ struct Decoder {

uint8_t **frame_data;
int *frame_linesize;

struct Converter converter;
// Buffer where audio samples are written after conversion.
// We always convet to packed format, so only out_data[0] is set.
uint8_t **out_data;
// Number of samples in out_data buffer
int out_samples;
// Size of out_data buffer.
// This is the same as out_samples * bytes_per_sample(out_format) * out_channels.
int out_size;
};

int decoder_init(struct Decoder *decoder, const char *codec);
Expand Down
47 changes: 15 additions & 32 deletions c_src/xav/reader.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "reader.h"
#include "utils.h"
#include <libavutil/samplefmt.h>
#include <libavutil/version.h>

int reader_init(struct Reader *reader, unsigned char *path, size_t path_size, int device_flag,
Expand All @@ -21,6 +22,7 @@ int reader_init(struct Reader *reader, unsigned char *path, size_t path_size, in
reader->media_type = media_type;
reader->in_format_name = NULL;
reader->out_format_name = NULL;
reader->out_data = NULL;

if (device_flag == 1) {
avdevice_register_all();
Expand Down Expand Up @@ -69,23 +71,13 @@ int reader_init(struct Reader *reader, unsigned char *path, size_t path_size, in
}

if (reader->media_type == AVMEDIA_TYPE_AUDIO) {
reader->swr_ctx = swr_alloc();
enum AVSampleFormat out_sample_fmt = av_get_alt_sample_fmt(reader->c->sample_fmt, 0);

#if LIBAVUTIL_VERSION_MAJOR >= 58
av_opt_set_chlayout(reader->swr_ctx, "in_chlayout", &reader->c->ch_layout, 0);
av_opt_set_chlayout(reader->swr_ctx, "out_chlayout", &reader->c->ch_layout, 0);
#else
av_opt_set_channel_layout(reader->swr_ctx, "in_channel_layout", reader->c->channel_layout, 0);
av_opt_set_channel_layout(reader->swr_ctx, "out_channel_layout", reader->c->channel_layout, 0);
#endif

av_opt_set_int(reader->swr_ctx, "in_sample_rate", reader->c->sample_rate, 0);
av_opt_set_int(reader->swr_ctx, "out_sample_rate", reader->c->sample_rate, 0);
av_opt_set_sample_fmt(reader->swr_ctx, "in_sample_fmt", reader->c->sample_fmt, 0);
av_opt_set_sample_fmt(reader->swr_ctx, "out_sample_fmt", out_sample_fmt, 0);

ret = swr_init(reader->swr_ctx);
AVChannelLayout out_chlayout = AV_CHANNEL_LAYOUT_MONO;
int out_sample_rate = 16000;
enum AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_FLT;

int ret = converter_init(&reader->converter, reader->c->ch_layout, reader->c->sample_rate,
reader->c->sample_fmt, out_chlayout, out_sample_rate, out_sample_fmt);

if (ret < 0) {
return ret;
}
Expand Down Expand Up @@ -155,7 +147,6 @@ int reader_next_frame(struct Reader *reader) {
ret = avcodec_receive_frame(reader->c, reader->frame);

if (ret == 0) {
XAV_LOG_DEBUG("Received frame");
frame_ready = 1;
} else if (ret == AVERROR_EOF) {
XAV_LOG_DEBUG("EOF");
Expand Down Expand Up @@ -193,26 +184,17 @@ int reader_next_frame(struct Reader *reader) {

fin:
if (reader->media_type == AVMEDIA_TYPE_VIDEO && reader->frame->format != AV_PIX_FMT_RGB24) {
XAV_LOG_DEBUG("Converting to RGB");
XAV_LOG_DEBUG("Converting video to RGB");
convert_to_rgb(reader->frame, reader->rgb_dst_data, reader->rgb_dst_linesize);
reader->frame_data = reader->rgb_dst_data;
reader->frame_linesize = reader->rgb_dst_linesize;
} else if (reader->media_type == AVMEDIA_TYPE_VIDEO) {
reader->frame_data = reader->frame->data;
reader->frame_linesize = reader->frame->linesize;
} else if (reader->media_type == AVMEDIA_TYPE_AUDIO &&
av_sample_fmt_is_planar(reader->frame->format) == 1) {
XAV_LOG_DEBUG("Converting to interleaved");

if (convert_to_interleaved(reader->swr_ctx, reader->frame, reader->rgb_dst_data,
reader->rgb_dst_linesize) != 0) {
return -1;
}

reader->frame_data = reader->rgb_dst_data;
reader->frame_linesize = reader->rgb_dst_linesize;
} else {
reader->frame_data = reader->frame->extended_data;
} else if (reader->media_type == AVMEDIA_TYPE_AUDIO) {
XAV_LOG_DEBUG("Converting audio to desired out format");
return converter_convert(&reader->converter, reader->frame, &reader->out_data,
&reader->out_samples, &reader->out_size);
}

return 0;
Expand All @@ -226,6 +208,7 @@ void reader_free_frame(struct Reader *reader) {
reader->frame_data == reader->rgb_dst_data) {
av_freep(&reader->frame_data[0]);
}
free(reader->out_data);
}

void reader_free(struct Reader *reader) {
Expand Down
12 changes: 11 additions & 1 deletion c_src/xav/reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#include "converter.h"
#include "utils.h"

struct Reader {
Expand Down Expand Up @@ -39,6 +39,16 @@ struct Reader {
// whether convertion to rgb was needed
uint8_t **frame_data;
int *frame_linesize;

struct Converter converter;
// Buffer where audio samples are written after conversion.
// We always convet to packed format, so only out_data[0] is set.
uint8_t **out_data;
// Number of samples in out_data buffer
int out_samples;
// Size of out_data buffer.
// This is the same as out_samples * bytes_per_sample(out_format) * out_channels.
int out_size;
};

int reader_init(struct Reader *reader, unsigned char *path, size_t path_size, int device_flag,
Expand Down
50 changes: 10 additions & 40 deletions c_src/xav/utils.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
#include "utils.h"
#include <libavutil/mathematics.h>
#include <libavutil/opt.h>
#include <stdint.h>

void print_supported_pix_fmts(AVCodec *codec) {
if (codec->pix_fmts == NULL) {
Expand Down Expand Up @@ -42,31 +45,6 @@ void convert_to_rgb(AVFrame *src_frame, uint8_t *dst_data[], int dst_linesize[])
src_frame->height, dst_data, dst_linesize);
}

int convert_to_interleaved(SwrContext *swr_ctx, AVFrame *src_frame, uint8_t **dst_data,
int *dst_linesize) {
#if LIBAVUTIL_VERSION_MAJOR >= 58
int channels = src_frame->ch_layout.nb_channels;
#else
int channels = src_frame->channels;
#endif

int samples_per_channel = src_frame->nb_samples;

int ret =
av_samples_alloc(dst_data, dst_linesize, channels, samples_per_channel, src_frame->format, 0);
if (ret < 0) {
return ret;
}

ret = swr_convert(swr_ctx, dst_data, samples_per_channel, (const uint8_t **)src_frame->data,
samples_per_channel);
if (ret < 0) {
return ret;
}

return 0;
}

ERL_NIF_TERM xav_nif_ok(ErlNifEnv *env, ERL_NIF_TERM data_term) {
ERL_NIF_TERM ok_term = enif_make_atom(env, "ok");
return enif_make_tuple(env, 2, ok_term, data_term);
Expand All @@ -83,24 +61,16 @@ ERL_NIF_TERM xav_nif_raise(ErlNifEnv *env, char *msg) {
return enif_raise_exception(env, reason);
}

ERL_NIF_TERM xav_nif_audio_frame_to_term(ErlNifEnv *env, AVFrame *frame, unsigned char *data[],
const char *format_name) {
ERL_NIF_TERM xav_nif_audio_frame_to_term(ErlNifEnv *env, uint8_t **out_data, int out_samples,
int out_size, const char *out_format, int pts) {
ERL_NIF_TERM data_term;

#if LIBAVUTIL_VERSION_MAJOR >= 58
size_t unpadded_linesize =
frame->nb_samples * av_get_bytes_per_sample(frame->format) * frame->ch_layout.nb_channels;
#else
size_t unpadded_linesize =
frame->nb_samples * av_get_bytes_per_sample(frame->format) * frame->channels;
#endif

unsigned char *ptr = enif_make_new_binary(env, unpadded_linesize, &data_term);
memcpy(ptr, data[0], unpadded_linesize);
unsigned char *ptr = enif_make_new_binary(env, out_size, &data_term);
memcpy(ptr, out_data[0], out_size);

ERL_NIF_TERM samples_term = enif_make_int(env, frame->nb_samples);
ERL_NIF_TERM format_term = enif_make_atom(env, format_name);
ERL_NIF_TERM pts_term = enif_make_int(env, frame->pts);
ERL_NIF_TERM samples_term = enif_make_int(env, out_samples);
ERL_NIF_TERM format_term = enif_make_atom(env, out_format);
ERL_NIF_TERM pts_term = enif_make_int(env, pts);

return enif_make_tuple(env, 4, data_term, format_term, samples_term, pts_term);
}
Expand Down
6 changes: 2 additions & 4 deletions c_src/xav/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,11 @@
void print_supported_pix_fmts(AVCodec *codec);
int init_swr_ctx_from_frame(SwrContext **swr_ctx, AVFrame *frame);
void convert_to_rgb(AVFrame *src_frame, uint8_t *dst_data[], int dst_linesize[]);
int convert_to_interleaved(SwrContext *swr_ctx, AVFrame *src_frame, uint8_t **dst_data,
int *dst_linesize);

ERL_NIF_TERM xav_nif_ok(ErlNifEnv *env, ERL_NIF_TERM data_term);
ERL_NIF_TERM xav_nif_error(ErlNifEnv *env, char *reason);
ERL_NIF_TERM xav_nif_raise(ErlNifEnv *env, char *msg);
ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame, unsigned char *data[],
int *linesize, const char *out_format_name);
ERL_NIF_TERM xav_nif_audio_frame_to_term(ErlNifEnv *env, AVFrame *frame, unsigned char *data[],
const char *out_format_name);
ERL_NIF_TERM xav_nif_audio_frame_to_term(ErlNifEnv *env, uint8_t **out_data, int out_samples,
int out_size, const char *out_format, int pts);
Loading

0 comments on commit 09f0db6

Please sign in to comment.