Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix dropped and repeated audio samples for 1001-based standards #1326

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions src/core/frame/frame.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ array<std::uint8_t>& mutable_frame::image_data(std::size_t index) { return
array<std::int32_t>& mutable_frame::audio_data() { return impl_->audio_data_; }
std::size_t mutable_frame::width() const { return impl_->desc_.planes.at(0).width; }
std::size_t mutable_frame::height() const { return impl_->desc_.planes.at(0).height; }
const void* mutable_frame::stream_tag() const { return impl_->tag_; }
const frame_geometry& mutable_frame::geometry() const { return impl_->geometry_; }
frame_geometry& mutable_frame::geometry() { return impl_->geometry_; }

Expand All @@ -92,13 +93,16 @@ struct const_frame::impl
std::vector<array<const std::uint8_t>> image_data_;
array<const std::int32_t> audio_data_;
core::pixel_format_desc desc_ = pixel_format::invalid;
const void* tag_;
frame_geometry geometry_ = frame_geometry::get_default();
boost::any opaque_;

impl(std::vector<array<const std::uint8_t>> image_data,
impl(const void* tag,
std::vector<array<const std::uint8_t>> image_data,
array<const std::int32_t> audio_data,
const core::pixel_format_desc& desc)
: image_data_(std::move(image_data))
: tag_(tag)
, image_data_(std::move(image_data))
, audio_data_(std::move(audio_data))
, desc_(desc)
{
Expand All @@ -107,10 +111,12 @@ struct const_frame::impl
}
}

impl(std::vector<array<std::uint8_t>>&& image_data,
impl(const void* tag,
std::vector<array<std::uint8_t>>&& image_data,
array<const std::int32_t> audio_data,
const core::pixel_format_desc& desc)
: image_data_(std::make_move_iterator(image_data.begin()), std::make_move_iterator(image_data.end()))
: tag_(tag)
, image_data_(std::make_move_iterator(image_data.begin()), std::make_move_iterator(image_data.end()))
, audio_data_(std::move(audio_data))
, desc_(desc)
{
Expand All @@ -120,7 +126,8 @@ struct const_frame::impl
}

impl(mutable_frame&& other)
: image_data_(std::make_move_iterator(other.impl_->image_data_.begin()),
: tag_(other.stream_tag())
, image_data_(std::make_move_iterator(other.impl_->image_data_.begin()),
std::make_move_iterator(other.impl_->image_data_.end()))
, audio_data_(std::move(other.impl_->audio_data_))
, desc_(std::move(other.impl_->desc_))
Expand All @@ -147,7 +154,7 @@ const_frame::const_frame() {}
const_frame::const_frame(std::vector<array<const std::uint8_t>> image_data,
array<const std::int32_t> audio_data,
const core::pixel_format_desc& desc)
: impl_(new impl(std::move(image_data), std::move(audio_data), desc))
: impl_(new impl(nullptr, std::move(image_data), std::move(audio_data), desc))
{
}
const_frame::const_frame(mutable_frame&& other)
Expand All @@ -174,6 +181,12 @@ const array<const std::int32_t>& const_frame::audio_data() const { return impl_-
std::size_t const_frame::width() const { return impl_->width(); }
std::size_t const_frame::height() const { return impl_->height(); }
std::size_t const_frame::size() const { return impl_->size(); }
const void* const_frame::stream_tag() const { return impl_->tag_; }
const_frame const_frame::with_tag(const void* new_tag) const {
const_frame copy(*this);
copy.impl_->tag_ = new_tag;
return copy;
}
const frame_geometry& const_frame::geometry() const { return impl_->geometry_; }
const boost::any& const_frame::opaque() const { return impl_->opaque_; }
const_frame::operator bool() const { return impl_ != nullptr && impl_->desc_.format != core::pixel_format::invalid; }
Expand Down
5 changes: 5 additions & 0 deletions src/core/frame/frame.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ class mutable_frame final

std::size_t height() const;

const void* stream_tag() const;

class frame_geometry& geometry();
const class frame_geometry& geometry() const;

Expand Down Expand Up @@ -80,6 +82,9 @@ class const_frame final

std::size_t size() const;

const void* stream_tag() const;
const_frame with_tag(const void* new_tag) const;

const boost::any& opaque() const;

const class frame_geometry& geometry() const;
Expand Down
78 changes: 60 additions & 18 deletions src/core/mixer/audio/audio_mixer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ using namespace boost::container;

struct audio_item
{
const void* tag = nullptr;
audio_transform transform;
array<const int32_t> samples;
};
Expand All @@ -50,11 +51,13 @@ using audio_buffer_ps = std::vector<double>;

struct audio_mixer::impl
{
monitor::state state_;
std::stack<core::audio_transform> transform_stack_;
std::vector<audio_item> items_;
std::atomic<float> master_volume_{1.0f};
spl::shared_ptr<diagnostics::graph> graph_;
monitor::state state_;
std::stack<core::audio_transform> transform_stack_;
std::vector<audio_item> items_;
std::map<const void*, std::vector<int32_t>> audio_streams_;
video_format_desc format_desc_;
std::atomic<float> master_volume_{1.0f};
spl::shared_ptr<diagnostics::graph> graph_;

impl(const impl&) = delete;
impl& operator=(const impl&) = delete;
Expand All @@ -77,11 +80,7 @@ struct audio_mixer::impl
if (transform_stack_.top().volume < 0.002 || !frame.audio_data())
return;

audio_item item;
item.transform = transform_stack_.top();
item.samples = frame.audio_data();

items_.push_back(std::move(item));
items_.push_back(std::move(audio_item{frame.stream_tag(), transform_stack_.top(), frame.audio_data()}));
}

void pop() { transform_stack_.pop(); }
Expand All @@ -92,25 +91,68 @@ struct audio_mixer::impl

array<const int32_t> mix(const video_format_desc& format_desc, int nb_samples)
{
if (format_desc_ != format_desc) {
audio_streams_.clear();
format_desc_ = format_desc;
}

auto channels = format_desc.audio_channels;
auto items = std::move(items_);
auto result = std::vector<int32_t>(nb_samples * channels, 0);
auto result = std::vector<int32_t>(size_t(nb_samples) * channels, 0);

auto mixed = std::vector<double>(nb_samples * channels, 0.0f);
auto mixed = std::vector<double>(size_t(nb_samples) * channels, 0.0f);

std::map<const void*, std::vector<int32_t>> next_audio_streams;

//int i = 0;
for (auto& item : items) {
auto ptr = item.samples.data();
auto size = result.size();
for (auto n = 0; n < size; ++n) {
if (n < item.samples.size()) {
mixed[n] = static_cast<double>(ptr[n]) * item.transform.volume + mixed[n];
auto ptr = item.samples.data();
auto item_size = item.samples.size();
auto dst_size = result.size();

size_t last_size = 0;
const int32_t *last_ptr = nullptr;
bool fix_1001 = (1001 == format_desc.framerate.denominator());
if (fix_1001) {
auto audio_stream = audio_streams_.find(item.tag);
if (audio_stream != audio_streams_.end()) {
last_size = audio_stream->second.size();
last_ptr = audio_stream->second.data();
} else if (nullptr != item.tag) {
// Insert a sample of silence at startup
// Covers the startup case where eg dst_size is 801 and item_size is 800
// The sample of silence will be output before any valid audio data from the source
last_size = channels;
std::vector<int32_t> buf(last_size);
std::memset(buf.data(), 0, last_size);
last_ptr = buf.data();
}
}

for (auto n = 0; n < dst_size; ++n) {
if (last_ptr && n < last_size) {
mixed[n] = static_cast<double>(last_ptr[n]) * item.transform.volume + mixed[n];
} else if (n < last_size + item_size) {
mixed[n] = static_cast<double>(ptr[n - last_size]) * item.transform.volume + mixed[n];
} else {
auto offset = (item.samples.size()) - (channels - (n % channels));
auto offset = int(item_size) - (channels - (n % channels));
mixed[n] = static_cast<double>(ptr[offset]) * item.transform.volume + mixed[n];
}
}

if (fix_1001 && item.tag) {
if (item_size + last_size > dst_size) {
auto buf_size = item_size + last_size - dst_size;
std::vector<int32_t> buf(buf_size);
std::memcpy(buf.data(), item.samples.data() + dst_size - last_size, buf_size * sizeof(int32_t));
next_audio_streams[item.tag] = std::move(buf);
} else
next_audio_streams[item.tag] = std::vector<int32_t>();
}
}

audio_streams_ = std::move(next_audio_streams);

auto master_volume = master_volume_.load();
for (auto n = 0; n < mixed.size(); ++n) {
auto sample = mixed[n] * master_volume;
Expand Down
62 changes: 62 additions & 0 deletions src/core/producer/route/route_producer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,12 @@
#include <common/diagnostics/graph.h>
#include <common/param.h>
#include <common/timer.h>
#include <stack>

#include <core/frame/frame.h>
#include <core/frame/draw_frame.h>
#include <core/frame/frame_visitor.h>
#include <core/frame/frame_transform.h>
#include <core/monitor/monitor.h>
#include <core/producer/frame_producer.h>
#include <core/video_channel.h>
Expand All @@ -37,6 +41,60 @@

namespace caspar { namespace core {

class fix_stream_tag : public frame_visitor
{
const void* stream_tag_;
std::stack<std::pair<frame_transform, std::vector<draw_frame>>> frames_stack_;
boost::optional<const_frame> upd_frame_;

fix_stream_tag(const fix_stream_tag&);
fix_stream_tag& operator=(const fix_stream_tag&);

public:
fix_stream_tag(void* stream_tag)
: stream_tag_(stream_tag)
{
frames_stack_ = std::stack<std::pair<frame_transform, std::vector<draw_frame>>>();
frames_stack_.emplace(frame_transform{}, std::vector<draw_frame>());
}

void push(const frame_transform& transform) {
frames_stack_.emplace(transform, std::vector<core::draw_frame>());
}

void visit(const const_frame& frame) {
upd_frame_ = frame.with_tag(stream_tag_);
}

void pop() {
auto popped = frames_stack_.top();
frames_stack_.pop();

if (upd_frame_ != boost::none) {
auto new_frame = draw_frame(std::move(*upd_frame_));
upd_frame_ = boost::none;
new_frame.transform() = popped.first;
frames_stack_.top().second.push_back(std::move(new_frame));
} else {
auto new_frame = draw_frame(std::move(popped.second));
new_frame.transform() = popped.first;
frames_stack_.top().second.push_back(new_frame);
}
}

draw_frame operator()(draw_frame frame) {
frame.accept(*this);

auto popped = frames_stack_.top();
frames_stack_.pop();
draw_frame result = std::move(popped.second);

frames_stack_ = std::stack<std::pair<frame_transform, std::vector<draw_frame>>>();
frames_stack_.emplace(frame_transform{}, std::vector<draw_frame>());
return result;
}
};

class route_producer
: public frame_producer
, public route_control
Expand All @@ -54,6 +112,7 @@ class route_producer
core::draw_frame frame_;
int source_channel_;
int source_layer_;
fix_stream_tag tag_fix_;

int get_source_channel() const { return source_channel_; }
int get_source_layer() const { return source_layer_; }
Expand All @@ -73,12 +132,15 @@ class route_producer
: route_(route)
, source_channel_(source_channel)
, source_layer_(source_layer)
, tag_fix_(this)
, connection_(route_->signal.connect([this](const core::draw_frame& frame) {
auto frame2 = frame;
if (!frame2) {
// We got a frame, so ensure it is a real frame (otherwise the layer gets confused)
frame2 = core::draw_frame::push(frame2);
}
// Update the tag in the frame to allow the audio mixer to distiguish between the source frame and the routed frame
frame2 = tag_fix_(frame2);
if (!buffer_.try_push(frame2)) {
graph_->set_tag(diagnostics::tag_severity::WARNING, "dropped-frame");
}
Expand Down
2 changes: 1 addition & 1 deletion src/modules/oal/consumer/oal_consumer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ struct oal_consumer : public core::frame_consumer
graph_->set_text(print());

executor_.begin_invoke([=] {
duration_ = format_desc_.audio_cadence[0];
duration_ = *std::min_element(format_desc_.audio_cadence.begin(), format_desc_.audio_cadence.end());
buffers_.resize(8);
alGenBuffers(static_cast<ALsizei>(buffers_.size()), buffers_.data());
alGenSources(1, &source_);
Expand Down