From 42ba9626013343f240806fdb7531659d5c9f4bb0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20=C5=9Aled=C5=BA?= <michalsledz34@gmail.com>
Date: Fri, 2 Aug 2024 19:23:13 +0200
Subject: [PATCH] Bring back support for older ffmpegs

---
 Makefile                   |  2 +-
 c_src/xav/channel_layout.h | 12 ++++++++++
 c_src/xav/converter.c      | 27 +++++++++++++++------
 c_src/xav/converter.h      | 10 ++++----
 c_src/xav/decoder.c        | 48 ++++++++++++++++++++++++++------------
 c_src/xav/decoder.h        |  2 +-
 c_src/xav/reader.c         | 13 +++++++++--
 c_src/xav/utils.c          | 20 ----------------
 c_src/xav/utils.h          |  1 -
 c_src/xav/xav_nif.c        |  3 ++-
 10 files changed, 86 insertions(+), 52 deletions(-)
 create mode 100644 c_src/xav/channel_layout.h

diff --git a/Makefile b/Makefile
index 4e2c93a..9fdb04d 100644
--- a/Makefile
+++ b/Makefile
@@ -10,7 +10,7 @@ XAV_SO = $(PRIV_DIR)/libxav.so
 # uncomment to compile with debug logs
 # XAV_DEBUG_LOGS = -DXAV_DEBUG=1
 
-HEADERS = $(XAV_DIR)/reader.h $(XAV_DIR)/decoder.h $(XAV_DIR)/converter.h $(XAV_DIR)/utils.h
+HEADERS = $(XAV_DIR)/reader.h $(XAV_DIR)/decoder.h $(XAV_DIR)/converter.h $(XAV_DIR)/channel_layout.h $(XAV_DIR)/utils.h
 SOURCES = $(XAV_DIR)/xav_nif.c $(XAV_DIR)/reader.c $(XAV_DIR)/decoder.c $(XAV_DIR)/converter.c $(XAV_DIR)/utils.c
 
 CFLAGS = $(XAV_DEBUG_LOGS) -fPIC -shared
diff --git a/c_src/xav/channel_layout.h b/c_src/xav/channel_layout.h
new file mode 100644
index 0000000..5dd8685
--- /dev/null
+++ b/c_src/xav/channel_layout.h
@@ -0,0 +1,12 @@
+#ifndef CHANNEL_LAYOUT_H
+#define CHANNEL_LAYOUT_H
+#include <libavutil/channel_layout.h>
+
+struct ChannelLayout {
+#if LIBAVUTIL_VERSION_MAJOR >= 58
+  AVChannelLayout layout;
+#else
+  uint64_t layout;
+#endif
+};
+#endif
diff --git a/c_src/xav/converter.c b/c_src/xav/converter.c
index cecaf99..a7564aa 100644
--- a/c_src/xav/converter.c
+++ b/c_src/xav/converter.c
@@ -5,10 +5,11 @@
 #include <libswresample/swresample.h>
 #include <stdint.h>
 
+#include "channel_layout.h"
 #include "utils.h"
 
-int converter_init(struct Converter *c, AVChannelLayout in_chlayout, int in_sample_rate,
-                   enum AVSampleFormat in_sample_fmt, AVChannelLayout out_chlayout,
+int converter_init(struct Converter *c, struct ChannelLayout in_chlayout, int in_sample_rate,
+                   enum AVSampleFormat in_sample_fmt, struct ChannelLayout out_chlayout,
                    int out_sample_rate, enum AVSampleFormat out_sample_fmt) {
   c->swr_ctx = swr_alloc();
   c->in_sample_rate = in_sample_rate;
@@ -16,8 +17,13 @@ int converter_init(struct Converter *c, AVChannelLayout in_chlayout, int in_samp
   c->out_chlayout = out_chlayout;
   c->out_sample_fmt = out_sample_fmt;
 
-  av_opt_set_chlayout(c->swr_ctx, "in_chlayout", &in_chlayout, 0);
-  av_opt_set_chlayout(c->swr_ctx, "out_chlayout", &out_chlayout, 0);
+#if LIBAVUTIL_VERSION_MAJOR >= 58
+  av_opt_set_chlayout(c->swr_ctx, "in_chlayout", &in_chlayout.layout, 0);
+  av_opt_set_chlayout(c->swr_ctx, "out_chlayout", &out_chlayout.layout, 0);
+#else
+  av_opt_set_channel_layout(c->swr_ctx, "in_channel_layout", in_chlayout.layout, 0);
+  av_opt_set_channel_layout(c->swr_ctx, "out_channel_layout", out_chlayout.layout, 0);
+#endif
 
   av_opt_set_int(c->swr_ctx, "in_sample_rate", in_sample_rate, 0);
   av_opt_set_int(c->swr_ctx, "out_sample_rate", out_sample_rate, 0);
@@ -30,6 +36,13 @@ int converter_init(struct Converter *c, AVChannelLayout in_chlayout, int in_samp
 
 int converter_convert(struct Converter *c, AVFrame *src_frame, uint8_t ***out_data,
                       int *out_samples, int *out_size) {
+
+#if LIBAVUTIL_VERSION_MAJOR >= 58
+  int out_nb_channels = c->out_chlayout.layout.nb_channels;
+#else
+  int out_nb_channels = av_get_channel_layout_nb_channels(c->out_chlayout.layout);
+#endif
+
   uint8_t **out_data_tmp = NULL;
   int max_out_nb_samples = swr_get_out_samples(c->swr_ctx, src_frame->nb_samples);
   int out_bytes_per_sample = av_get_bytes_per_sample(c->out_sample_fmt);
@@ -38,7 +51,7 @@ int converter_convert(struct Converter *c, AVFrame *src_frame, uint8_t ***out_da
   // to use fast/aligned SIMD routines - this is what align option is used for.
   // See https://stackoverflow.com/questions/35678041/what-is-linesize-alignment-meaning
   // Because we return the binary straight to the Erlang, we can disable it.
-  int ret = av_samples_alloc_array_and_samples(&out_data_tmp, NULL, c->out_chlayout.nb_channels,
+  int ret = av_samples_alloc_array_and_samples(&out_data_tmp, NULL, out_nb_channels,
                                                max_out_nb_samples, c->out_sample_fmt, 1);
 
   if (ret < 0) {
@@ -58,9 +71,9 @@ int converter_convert(struct Converter *c, AVFrame *src_frame, uint8_t ***out_da
 
   XAV_LOG_DEBUG("Converted %d samples per channel", *out_samples);
 
-  *out_size = *out_samples * out_bytes_per_sample * c->out_chlayout.nb_channels;
+  *out_size = *out_samples * out_bytes_per_sample * out_nb_channels;
 
   return 0;
 }
 
-void converter_free(struct Converter *c) { swr_free(&c->swr_ctx); }
\ No newline at end of file
+void converter_free(struct Converter *c) { swr_free(&c->swr_ctx); }
diff --git a/c_src/xav/converter.h b/c_src/xav/converter.h
index f2d1ec2..4e99a5d 100644
--- a/c_src/xav/converter.h
+++ b/c_src/xav/converter.h
@@ -4,18 +4,20 @@
 #include <libswresample/swresample.h>
 #include <stdint.h>
 
+#include "channel_layout.h"
+
 struct Converter {
   SwrContext *swr_ctx;
   int64_t in_sample_rate;
   int64_t out_sample_rate;
-  AVChannelLayout out_chlayout;
+  struct ChannelLayout out_chlayout;
   enum AVSampleFormat out_sample_fmt;
 };
 
-int converter_init(struct Converter *c, AVChannelLayout in_chlayout, int in_sample_rate,
-                   enum AVSampleFormat in_sample_fmt, AVChannelLayout out_chlaout,
+int converter_init(struct Converter *c, struct ChannelLayout in_chlayout, int in_sample_rate,
+                   enum AVSampleFormat in_sample_fmt, struct ChannelLayout out_chlaout,
                    int out_sample_rate, enum AVSampleFormat out_sample_fmt);
 int converter_convert(struct Converter *c, AVFrame *src_frame, uint8_t ***out_data,
                       int *out_samples, int *out_size);
 void converter_free(struct Converter *converter);
-#endif
\ No newline at end of file
+#endif
diff --git a/c_src/xav/decoder.c b/c_src/xav/decoder.c
index 516c332..e466924 100644
--- a/c_src/xav/decoder.c
+++ b/c_src/xav/decoder.c
@@ -1,8 +1,11 @@
 #include "decoder.h"
 #include "utils.h"
 
+static int init_converter(struct Decoder *decoder);
+
 int decoder_init(struct Decoder *decoder, const char *codec) {
   decoder->swr_ctx = NULL;
+  decoder->converter = NULL;
   decoder->out_data = NULL;
 
   if (strcmp(codec, "opus") == 0) {
@@ -31,19 +34,6 @@ int decoder_init(struct Decoder *decoder, const char *codec) {
     return -1;
   }
 
-  if (decoder->media_type == AVMEDIA_TYPE_AUDIO) {
-    AVChannelLayout out_chlayout = decoder->c->ch_layout;
-    int out_sample_rate = decoder->c->sample_rate;
-    enum AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_FLT;
-
-    int ret = converter_init(&decoder->converter, decoder->c->ch_layout, decoder->c->sample_rate,
-                             decoder->c->sample_fmt, out_chlayout, out_sample_rate, out_sample_fmt);
-
-    if (ret < 0) {
-      return ret;
-    }
-  }
-
   return 0;
 }
 
@@ -74,7 +64,15 @@ int decoder_decode(struct Decoder *decoder, AVPacket *pkt, AVFrame *frame) {
       decoder->frame_linesize = frame->linesize;
     }
   } else if (decoder->media_type == AVMEDIA_TYPE_AUDIO) {
-    return converter_convert(&decoder->converter, frame, &decoder->out_data, &decoder->out_samples,
+
+    if (decoder->converter == NULL) {
+      ret = init_converter(decoder);
+      if (ret < 0) {
+        return ret;
+      }
+    }
+
+    return converter_convert(decoder->converter, frame, &decoder->out_data, &decoder->out_samples,
                              &decoder->out_size);
   }
 
@@ -89,4 +87,24 @@ void decoder_free(struct Decoder *decoder) {
   if (decoder->c != NULL) {
     avcodec_free_context(&decoder->c);
   }
-}
\ No newline at end of file
+}
+
+static int init_converter(struct Decoder *decoder) {
+  decoder->converter = (struct Converter *)calloc(1, sizeof(struct Converter));
+  int out_sample_rate = decoder->c->sample_rate;
+  enum AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_FLT;
+
+  struct ChannelLayout in_chlayout, out_chlayout;
+#if LIBAVUTIL_VERSION_MAJOR >= 58
+  in_chlayout.layout = decoder->c->ch_layout;
+  out_chlayout.layout = decoder->c->ch_layout;
+#else
+  in_chlayout.layout = decoder->c->channel_layout;
+  out_chlayout.layout = decoder->c->channel_layout;
+  XAV_LOG_DEBUG("in_chlayout %ld", in_chlayout.layout);
+  XAV_LOG_DEBUG("in nb_channels %d", av_get_channel_layout_nb_channels(in_chlayout.layout));
+#endif
+
+  return converter_init(decoder->converter, in_chlayout, decoder->c->sample_rate,
+                        decoder->c->sample_fmt, out_chlayout, out_sample_rate, out_sample_fmt);
+}
diff --git a/c_src/xav/decoder.h b/c_src/xav/decoder.h
index 20a5d58..f6710e9 100644
--- a/c_src/xav/decoder.h
+++ b/c_src/xav/decoder.h
@@ -17,7 +17,7 @@ struct Decoder {
   uint8_t **frame_data;
   int *frame_linesize;
 
-  struct Converter converter;
+  struct Converter *converter;
   // Buffer where audio samples are written after conversion.
   // We always convet to packed format, so only out_data[0] is set.
   uint8_t **out_data;
diff --git a/c_src/xav/reader.c b/c_src/xav/reader.c
index 61b06e5..ee4deaf 100644
--- a/c_src/xav/reader.c
+++ b/c_src/xav/reader.c
@@ -71,11 +71,19 @@ int reader_init(struct Reader *reader, unsigned char *path, size_t path_size, in
   }
 
   if (reader->media_type == AVMEDIA_TYPE_AUDIO) {
-    AVChannelLayout out_chlayout = AV_CHANNEL_LAYOUT_MONO;
     int out_sample_rate = 16000;
     enum AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_FLT;
 
-    int ret = converter_init(&reader->converter, reader->c->ch_layout, reader->c->sample_rate,
+    struct ChannelLayout in_chlayout, out_chlayout;
+#if LIBAVUTIL_VERSION_MAJOR >= 58
+    in_chlayout.layout = reader->c->ch_layout;
+    av_channel_layout_from_mask(&out_chlayout.layout, AV_CH_LAYOUT_MONO);
+#else
+    in_chlayout.layout = reader->c->channel_layout;
+    out_chlayout.layout = AV_CH_LAYOUT_MONO;
+#endif
+
+    int ret = converter_init(&reader->converter, in_chlayout, reader->c->sample_rate,
                              reader->c->sample_fmt, out_chlayout, out_sample_rate, out_sample_fmt);
 
     if (ret < 0) {
@@ -211,6 +219,7 @@ void reader_free_frame(struct Reader *reader) {
 
   if (reader->out_data != NULL) {
     free(reader->out_data);
+    reader->out_data = NULL;
   }
 }
 
diff --git a/c_src/xav/utils.c b/c_src/xav/utils.c
index 78610d5..87772cd 100644
--- a/c_src/xav/utils.c
+++ b/c_src/xav/utils.c
@@ -13,26 +13,6 @@ void print_supported_pix_fmts(AVCodec *codec) {
   }
 }
 
-int init_swr_ctx_from_frame(SwrContext **swr_ctx, AVFrame *frame) {
-  *swr_ctx = swr_alloc();
-  enum AVSampleFormat out_sample_fmt = av_get_alt_sample_fmt(frame->format, 0);
-
-#if LIBAVUTIL_VERSION_MAJOR >= 58
-  av_opt_set_chlayout(*swr_ctx, "in_chlayout", &frame->ch_layout, 0);
-  av_opt_set_chlayout(*swr_ctx, "out_chlayout", &frame->ch_layout, 0);
-#else
-  av_opt_set_channel_layout(*swr_ctx, "in_channel_layout", frame->channel_layout, 0);
-  av_opt_set_channel_layout(*swr_ctx, "out_channel_layout", frame->channel_layout, 0);
-#endif
-
-  av_opt_set_int(*swr_ctx, "in_sample_rate", frame->sample_rate, 0);
-  av_opt_set_int(*swr_ctx, "out_sample_rate", frame->sample_rate, 0);
-  av_opt_set_sample_fmt(*swr_ctx, "in_sample_fmt", frame->format, 0);
-  av_opt_set_sample_fmt(*swr_ctx, "out_sample_fmt", out_sample_fmt, 0);
-
-  return swr_init(*swr_ctx);
-}
-
 void convert_to_rgb(AVFrame *src_frame, uint8_t *dst_data[], int dst_linesize[]) {
   struct SwsContext *sws_ctx =
       sws_getContext(src_frame->width, src_frame->height, src_frame->format, src_frame->width,
diff --git a/c_src/xav/utils.h b/c_src/xav/utils.h
index 7339cda..72e21dd 100644
--- a/c_src/xav/utils.h
+++ b/c_src/xav/utils.h
@@ -18,7 +18,6 @@
 #define XAV_FREE(X) enif_free(X)
 
 void print_supported_pix_fmts(AVCodec *codec);
-int init_swr_ctx_from_frame(SwrContext **swr_ctx, AVFrame *frame);
 void convert_to_rgb(AVFrame *src_frame, uint8_t *dst_data[], int dst_linesize[]);
 
 ERL_NIF_TERM xav_nif_ok(ErlNifEnv *env, ERL_NIF_TERM data_term);
diff --git a/c_src/xav/xav_nif.c b/c_src/xav/xav_nif.c
index 7f52e96..81cc63d 100644
--- a/c_src/xav/xav_nif.c
+++ b/c_src/xav/xav_nif.c
@@ -190,7 +190,7 @@ ERL_NIF_TERM decode(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
                                              decoder->frame_linesize, "rgb");
 
   } else if (decoder->media_type == AVMEDIA_TYPE_AUDIO) {
-    const char *out_format = av_get_sample_fmt_name(decoder->converter.out_sample_fmt);
+    const char *out_format = av_get_sample_fmt_name(decoder->converter->out_sample_fmt);
 
     frame_term = xav_nif_audio_frame_to_term(env, decoder->out_data, decoder->out_samples,
                                              decoder->out_size, out_format, frame->pts);
@@ -204,6 +204,7 @@ ERL_NIF_TERM decode(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
 
   if (decoder->out_data != NULL) {
     free(decoder->out_data);
+    decoder->out_data = NULL;
   }
 
   return term;