CasparCG · scriptorian · Jul 2, 2020 · Jul 2, 2020
diff --git a/src/core/frame/frame.cpp b/src/core/frame/frame.cpp
@@ -84,6 +84,7 @@ array<std::uint8_t>&       mutable_frame::image_data(std::size_t index) { return
 array<std::int32_t>&       mutable_frame::audio_data() { return impl_->audio_data_; }
 std::size_t                mutable_frame::width() const { return impl_->desc_.planes.at(0).width; }
 std::size_t                mutable_frame::height() const { return impl_->desc_.planes.at(0).height; }
+const void*                mutable_frame::stream_tag() const { return impl_->tag_; }
 const frame_geometry&      mutable_frame::geometry() const { return impl_->geometry_; }
 frame_geometry&            mutable_frame::geometry() { return impl_->geometry_; }
 
@@ -92,13 +93,16 @@ struct const_frame::impl
     std::vector<array<const std::uint8_t>> image_data_;
     array<const std::int32_t>              audio_data_;
     core::pixel_format_desc                desc_     = pixel_format::invalid;
+    const void*                            tag_;
     frame_geometry                         geometry_ = frame_geometry::get_default();
     boost::any                             opaque_;
 
-    impl(std::vector<array<const std::uint8_t>> image_data,
+    impl(const void*                            tag,
+         std::vector<array<const std::uint8_t>> image_data,
          array<const std::int32_t>              audio_data,
          const core::pixel_format_desc&         desc)
-        : image_data_(std::move(image_data))
+        : tag_(tag)
+        , image_data_(std::move(image_data))
         , audio_data_(std::move(audio_data))
         , desc_(desc)
     {
@@ -107,10 +111,12 @@ struct const_frame::impl
         }
     }
 
-    impl(std::vector<array<std::uint8_t>>&& image_data,
+    impl(const void*                        tag,
+         std::vector<array<std::uint8_t>>&& image_data,
          array<const std::int32_t>          audio_data,
          const core::pixel_format_desc&     desc)
-        : image_data_(std::make_move_iterator(image_data.begin()), std::make_move_iterator(image_data.end()))
+        : tag_(tag)
+        , image_data_(std::make_move_iterator(image_data.begin()), std::make_move_iterator(image_data.end()))
         , audio_data_(std::move(audio_data))
         , desc_(desc)
     {
@@ -120,7 +126,8 @@ struct const_frame::impl
     }
 
     impl(mutable_frame&& other)
-        : image_data_(std::make_move_iterator(other.impl_->image_data_.begin()),
+        : tag_(other.stream_tag())
+        , image_data_(std::make_move_iterator(other.impl_->image_data_.begin()),
                       std::make_move_iterator(other.impl_->image_data_.end()))
         , audio_data_(std::move(other.impl_->audio_data_))
         , desc_(std::move(other.impl_->desc_))
@@ -147,7 +154,7 @@ const_frame::const_frame() {}
 const_frame::const_frame(std::vector<array<const std::uint8_t>> image_data,
                          array<const std::int32_t>              audio_data,
                          const core::pixel_format_desc&         desc)
-    : impl_(new impl(std::move(image_data), std::move(audio_data), desc))
+    : impl_(new impl(nullptr, std::move(image_data), std::move(audio_data), desc))
 {
 }
 const_frame::const_frame(mutable_frame&& other)
@@ -174,6 +181,12 @@ const array<const std::int32_t>& const_frame::audio_data() const { return impl_-
 std::size_t                      const_frame::width() const { return impl_->width(); }
 std::size_t                      const_frame::height() const { return impl_->height(); }
 std::size_t                      const_frame::size() const { return impl_->size(); }
+const void*                      const_frame::stream_tag() const { return impl_->tag_; }
+const_frame                      const_frame::with_tag(const void* new_tag) const {
+    const_frame copy(*this);
+    copy.impl_->tag_ = new_tag;
+    return copy;
+}
 const frame_geometry&            const_frame::geometry() const { return impl_->geometry_; }
 const boost::any&                const_frame::opaque() const { return impl_->opaque_; }
 const_frame::operator bool() const { return impl_ != nullptr && impl_->desc_.format != core::pixel_format::invalid; }

diff --git a/src/core/frame/frame.h b/src/core/frame/frame.h
@@ -46,6 +46,8 @@ class mutable_frame final
 
     std::size_t height() const;
 
+    const void* stream_tag() const;
+
     class frame_geometry&       geometry();
     const class frame_geometry& geometry() const;
 
@@ -80,6 +82,9 @@ class const_frame final
 
     std::size_t size() const;
 
+    const void* stream_tag() const;
+    const_frame with_tag(const void* new_tag) const;
+
     const boost::any& opaque() const;
 
     const class frame_geometry& geometry() const;

diff --git a/src/core/mixer/audio/audio_mixer.cpp b/src/core/mixer/audio/audio_mixer.cpp
@@ -42,6 +42,7 @@ using namespace boost::container;
 
 struct audio_item
 {
+    const void*          tag = nullptr;
     audio_transform      transform;
     array<const int32_t> samples;
 };
@@ -50,11 +51,13 @@ using audio_buffer_ps = std::vector<double>;
 
 struct audio_mixer::impl
 {
-    monitor::state                      state_;
-    std::stack<core::audio_transform>   transform_stack_;
-    std::vector<audio_item>             items_;
-    std::atomic<float>                  master_volume_{1.0f};
-    spl::shared_ptr<diagnostics::graph> graph_;
+    monitor::state                              state_;
+    std::stack<core::audio_transform>           transform_stack_;
+    std::vector<audio_item>                     items_;
+    std::map<const void*, std::vector<int32_t>> audio_streams_;
+    video_format_desc                           format_desc_;
+    std::atomic<float>                          master_volume_{1.0f};
+    spl::shared_ptr<diagnostics::graph>         graph_;
 
     impl(const impl&) = delete;
     impl& operator=(const impl&) = delete;
@@ -77,11 +80,7 @@ struct audio_mixer::impl
         if (transform_stack_.top().volume < 0.002 || !frame.audio_data())
             return;
 
-        audio_item item;
-        item.transform = transform_stack_.top();
-        item.samples   = frame.audio_data();
-
-        items_.push_back(std::move(item));
+        items_.push_back(std::move(audio_item{frame.stream_tag(), transform_stack_.top(), frame.audio_data()}));
     }
 
     void pop() { transform_stack_.pop(); }
@@ -92,25 +91,68 @@ struct audio_mixer::impl
 
     array<const int32_t> mix(const video_format_desc& format_desc, int nb_samples)
     {
+        if (format_desc_ != format_desc) {
+            audio_streams_.clear();
+            format_desc_ = format_desc;
+        }
+
         auto channels = format_desc.audio_channels;
         auto items    = std::move(items_);
-        auto result   = std::vector<int32_t>(nb_samples * channels, 0);
+        auto result   = std::vector<int32_t>(size_t(nb_samples) * channels, 0);
 
-        auto mixed = std::vector<double>(nb_samples * channels, 0.0f);
+        auto mixed = std::vector<double>(size_t(nb_samples) * channels, 0.0f);
 
+        std::map<const void*, std::vector<int32_t>> next_audio_streams;
+
+        //int i = 0;
         for (auto& item : items) {
-            auto ptr  = item.samples.data();
-            auto size = result.size();
-            for (auto n = 0; n < size; ++n) {
-                if (n < item.samples.size()) {
-                    mixed[n] = static_cast<double>(ptr[n]) * item.transform.volume + mixed[n];
+            auto ptr       = item.samples.data();
+            auto item_size = item.samples.size();
+            auto dst_size  = result.size();
+
+            size_t last_size = 0;
+            const int32_t *last_ptr = nullptr;
+            bool fix_1001 = (1001 == format_desc.framerate.denominator());
+            if (fix_1001) {
+                auto audio_stream = audio_streams_.find(item.tag);
+                if (audio_stream != audio_streams_.end()) {
+                    last_size = audio_stream->second.size();
+                    last_ptr = audio_stream->second.data();
+                } else if (nullptr != item.tag) {
+                    // Insert a sample of silence at startup
+                    // Covers the startup case where eg dst_size is 801 and item_size is 800
+                    // The sample of silence will be output before any valid audio data from the source
+                    last_size = channels;
+                    std::vector<int32_t> buf(last_size);
+                    std::memset(buf.data(), 0, last_size);
+                    last_ptr = buf.data();
+                }
+            }
+
+            for (auto n = 0; n < dst_size; ++n) {
+                if (last_ptr && n < last_size) {
+                    mixed[n] = static_cast<double>(last_ptr[n]) * item.transform.volume + mixed[n];
+                } else if (n < last_size + item_size) {
+                    mixed[n] = static_cast<double>(ptr[n - last_size]) * item.transform.volume + mixed[n];
                 } else {
-                    auto offset = (item.samples.size()) - (channels - (n % channels));
+                    auto offset = int(item_size) - (channels - (n % channels));
                     mixed[n]    = static_cast<double>(ptr[offset]) * item.transform.volume + mixed[n];
                 }
             }
+
+            if (fix_1001 && item.tag) {
+                if (item_size + last_size > dst_size) {
+                    auto                 buf_size = item_size + last_size - dst_size;
+                    std::vector<int32_t> buf(buf_size);
+                    std::memcpy(buf.data(), item.samples.data() + dst_size - last_size, buf_size * sizeof(int32_t));
+                    next_audio_streams[item.tag] = std::move(buf);
+                } else
+                    next_audio_streams[item.tag] = std::vector<int32_t>();
+            }
         }
 
+        audio_streams_ = std::move(next_audio_streams);
+
         auto master_volume = master_volume_.load();
         for (auto n = 0; n < mixed.size(); ++n) {
             auto sample = mixed[n] * master_volume;

diff --git a/src/core/producer/route/route_producer.cpp b/src/core/producer/route/route_producer.cpp
@@ -23,8 +23,12 @@
 #include <common/diagnostics/graph.h>
 #include <common/param.h>
 #include <common/timer.h>
+#include <stack>
 
+#include <core/frame/frame.h>
 #include <core/frame/draw_frame.h>
+#include <core/frame/frame_visitor.h>
+#include <core/frame/frame_transform.h>
 #include <core/monitor/monitor.h>
 #include <core/producer/frame_producer.h>
 #include <core/video_channel.h>
@@ -37,6 +41,60 @@
 
 namespace caspar { namespace core {
 
+class fix_stream_tag : public frame_visitor
+{
+    const void* stream_tag_;
+    std::stack<std::pair<frame_transform, std::vector<draw_frame>>> frames_stack_;
+    boost::optional<const_frame>                                    upd_frame_;
+
+    fix_stream_tag(const fix_stream_tag&);
+    fix_stream_tag& operator=(const fix_stream_tag&);
+
+  public:
+    fix_stream_tag(void* stream_tag)
+        : stream_tag_(stream_tag)
+    {
+        frames_stack_ = std::stack<std::pair<frame_transform, std::vector<draw_frame>>>();
+        frames_stack_.emplace(frame_transform{}, std::vector<draw_frame>());
+    }
+
+    void push(const frame_transform& transform) {
+        frames_stack_.emplace(transform, std::vector<core::draw_frame>());
+    }
+
+    void visit(const const_frame& frame) {
+        upd_frame_ = frame.with_tag(stream_tag_);
+    }
+
+    void pop() {
+        auto popped = frames_stack_.top();
+        frames_stack_.pop();
+
+        if (upd_frame_ != boost::none) {
+            auto new_frame        = draw_frame(std::move(*upd_frame_));
+            upd_frame_            = boost::none;
+            new_frame.transform() = popped.first;
+            frames_stack_.top().second.push_back(std::move(new_frame));
+        } else {
+            auto new_frame        = draw_frame(std::move(popped.second));
+            new_frame.transform() = popped.first;
+            frames_stack_.top().second.push_back(new_frame);
+        }
+    }
+
+    draw_frame operator()(draw_frame frame) {
+        frame.accept(*this);
+
+        auto popped = frames_stack_.top();
+        frames_stack_.pop();
+        draw_frame result = std::move(popped.second);
+
+        frames_stack_ = std::stack<std::pair<frame_transform, std::vector<draw_frame>>>();
+        frames_stack_.emplace(frame_transform{}, std::vector<draw_frame>());
+        return result;
+    }
+};
+
 class route_producer
     : public frame_producer
     , public route_control
@@ -54,6 +112,7 @@ class route_producer
     core::draw_frame frame_;
     int              source_channel_;
     int              source_layer_;
+    fix_stream_tag   tag_fix_;
 
     int get_source_channel() const { return source_channel_; }
     int get_source_layer() const { return source_layer_; }
@@ -73,12 +132,15 @@ class route_producer
         : route_(route)
         , source_channel_(source_channel)
         , source_layer_(source_layer)
+        , tag_fix_(this)
         , connection_(route_->signal.connect([this](const core::draw_frame& frame) {
             auto frame2 = frame;
             if (!frame2) {
                 // We got a frame, so ensure it is a real frame (otherwise the layer gets confused)
                 frame2 = core::draw_frame::push(frame2);
             }
+            // Update the tag in the frame to allow the audio mixer to distiguish between the source frame and the routed frame
+            frame2 = tag_fix_(frame2);
             if (!buffer_.try_push(frame2)) {
                 graph_->set_tag(diagnostics::tag_severity::WARNING, "dropped-frame");
             }

diff --git a/src/modules/oal/consumer/oal_consumer.cpp b/src/modules/oal/consumer/oal_consumer.cpp
@@ -158,7 +158,7 @@ struct oal_consumer : public core::frame_consumer
         graph_->set_text(print());
 
         executor_.begin_invoke([=] {
-            duration_ = format_desc_.audio_cadence[0];
+            duration_ = *std::min_element(format_desc_.audio_cadence.begin(), format_desc_.audio_cadence.end());
             buffers_.resize(8);
             alGenBuffers(static_cast<ALsizei>(buffers_.size()), buffers_.data());
             alGenSources(1, &source_);