Skip to content

Commit

Permalink
Rename raw_samples to pcm_samples and wrap in std::optional.
Browse files Browse the repository at this point in the history
  - These represent the original lossless audio which was used to encode the bitstream.
    - These mostly have functional importance for recon gain.
    - And they currently are relied on for certain tests working with lossy codecs.
    - Certain paths, like one that decodes IAMF would never know what the "original" lossless content was.
    - These also can be memory intensive, so for memory efficiency it may be useful to destroy the data after it is needed.
  - Signal in a few places where they are irrelevant by using `std::nullopt`.
  - Rename in anticipation of unifying with `DecodedAudioFrame::decoded_samples` which also could be called `pcm_samples`.

PiperOrigin-RevId: 690745328
  • Loading branch information
jwcullen committed Nov 1, 2024
1 parent 77ad423 commit f154bd7
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 19 deletions.
5 changes: 4 additions & 1 deletion iamf/cli/audio_frame_with_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#define CLI_AUDIO_FRAME_WITH_DATA_H_

#include <cstdint>
#include <optional>
#include <vector>

#include "iamf/cli/audio_element_with_data.h"
Expand All @@ -32,7 +33,9 @@ struct AudioFrameWithData {
int32_t end_timestamp; // End time of this frame. Measured in ticks from the
// Global Timing Module.

std::vector<std::vector<int32_t>> raw_samples;
// The PCM samples to encode this audio frame, if known. This is useful to
// calculate recon gain.
std::optional<std::vector<std::vector<int32_t>>> pcm_samples;

// Down-mixing parameters used to create this audio frame.
DownMixingParams down_mixing_params;
Expand Down
4 changes: 2 additions & 2 deletions iamf/cli/codec/tests/encoder_test_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class EncoderTestBase {
EXPECT_THAT(encoder_->Initialize(kValidateCodecDelay), IsOk());
}

void EncodeAudioFrame(const std::vector<std::vector<int32_t>>& raw_samples,
void EncodeAudioFrame(const std::vector<std::vector<int32_t>>& pcm_samples,
bool expected_encode_frame_is_ok = true) {
// `EncodeAudioFrame` only passes on most of the data in the input
// `AudioFrameWithData`. Simulate the timestamp to ensure frames are
Expand All @@ -69,7 +69,7 @@ class EncoderTestBase {

// Encode the frame as requested.
EXPECT_EQ(encoder_
->EncodeAudioFrame(input_sample_size_, raw_samples,
->EncodeAudioFrame(input_sample_size_, pcm_samples,
std::move(partial_audio_frame_with_data))
.ok(),
expected_encode_frame_is_ok);
Expand Down
21 changes: 14 additions & 7 deletions iamf/cli/demixing_module.cc
Original file line number Diff line number Diff line change
Expand Up @@ -601,14 +601,17 @@ uint32_t GetSubstreamId(const DecodedAudioFrame& audio_frame_with_data) {
return audio_frame_with_data.substream_id;
}

const std::vector<std::vector<int32_t>>& GetSamples(
const std::vector<std::vector<int32_t>>* GetSamples(
const AudioFrameWithData& audio_frame_with_data) {
return audio_frame_with_data.raw_samples;
if (!audio_frame_with_data.pcm_samples.has_value()) {
return nullptr;
}
return &audio_frame_with_data.pcm_samples.value();
}

const std::vector<std::vector<int32_t>>& GetSamples(
const std::vector<std::vector<int32_t>>* GetSamples(
const DecodedAudioFrame& audio_frame_with_data) {
return audio_frame_with_data.decoded_samples;
return &audio_frame_with_data.decoded_samples;
}

// NOOP function if the frame is not a DecodedAudioFrame.
Expand Down Expand Up @@ -676,16 +679,20 @@ absl::Status StoreSamplesForAudioElementId(
const auto& labels = substream_id_labels_iter->second;
int channel_index = 0;
for (const auto& label : labels) {
const auto& input_samples = GetSamples(audio_frame);
const size_t num_ticks = input_samples.size();
const auto* input_samples = GetSamples(audio_frame);
if (input_samples == nullptr) {
return absl::InvalidArgumentError(
"Input samples are not available for down-mixing.");
}
const size_t num_ticks = input_samples->size();

ConfigureLabeledFrame(audio_frame, labeled_frame);

auto& samples = labeled_frame.label_to_samples[label];
samples.resize(num_ticks, 0);
for (int t = 0; t < samples.size(); t++) {
samples[t] =
static_cast<InternalSampleType>(input_samples[t][channel_index]);
static_cast<InternalSampleType>((*input_samples)[t][channel_index]);
}
channel_index++;
}
Expand Down
2 changes: 1 addition & 1 deletion iamf/cli/proto_to_obu/audio_frame_generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ absl::Status EncodeFramesForAudioElement(
substream_id, {}),
.start_timestamp = start_timestamp,
.end_timestamp = end_timestamp,
.raw_samples = samples_obu,
.pcm_samples = samples_obu,
.down_mixing_params = down_mixing_params,
.audio_element_with_data = &audio_element_with_data});

Expand Down
5 changes: 3 additions & 2 deletions iamf/cli/proto_to_obu/tests/audio_frame_generator_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1119,9 +1119,10 @@ TEST(AudioFrameGenerator, ManyFramesThreaded) {
const InternalSampleType expected_sample = all_samples[index][kFirstSample];
// The timestamp should count up by the number of samples in each frame.
EXPECT_EQ(audio_frame.start_timestamp, kFrameSize * index);
EXPECT_DOUBLE_EQ(audio_frame.raw_samples[kFirstSample][kLeftChannel],
ASSERT_TRUE(audio_frame.pcm_samples.has_value());
EXPECT_DOUBLE_EQ((*audio_frame.pcm_samples)[kFirstSample][kLeftChannel],
expected_sample);
EXPECT_DOUBLE_EQ(audio_frame.raw_samples[kFirstSample][kRightChannel],
EXPECT_DOUBLE_EQ((*audio_frame.pcm_samples)[kFirstSample][kRightChannel],
expected_sample);
index++;
}
Expand Down
29 changes: 24 additions & 5 deletions iamf/cli/tests/demixing_module_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <cstdint>
#include <iterator>
#include <list>
#include <optional>
#include <utility>
#include <vector>

Expand Down Expand Up @@ -940,7 +941,7 @@ class DemixingModuleTest : public DemixingModuleTestBase,
public:
void ConfigureLosslessAudioFrameAndDecodedAudioFrame(
const std::list<ChannelLabel::Label>& labels,
const std::vector<std::vector<int32_t>>& raw_samples,
const std::vector<std::vector<int32_t>>& pcm_samples,
DownMixingParams down_mixing_params = {
.alpha = 1, .beta = .866, .gamma = .866, .delta = .866, .w = 0.25}) {
// The substream ID itself does not matter. Generate a unique one.
Expand All @@ -953,7 +954,7 @@ class DemixingModuleTest : public DemixingModuleTestBase,
.obu = AudioFrameObu(ObuHeader(), substream_id, {}),
.start_timestamp = kStartTimestamp,
.end_timestamp = kEndTimestamp,
.raw_samples = raw_samples,
.pcm_samples = pcm_samples,
.down_mixing_params = down_mixing_params,
});

Expand All @@ -963,7 +964,7 @@ class DemixingModuleTest : public DemixingModuleTestBase,
.end_timestamp = kEndTimestamp,
.samples_to_trim_at_end = kZeroSamplesToTrimAtEnd,
.samples_to_trim_at_start = kZeroSamplesToTrimAtStart,
.decoded_samples = raw_samples,
.decoded_samples = pcm_samples,
.down_mixing_params = down_mixing_params});

auto& expected_label_to_samples =
Expand All @@ -975,8 +976,8 @@ class DemixingModuleTest : public DemixingModuleTestBase,
for (int channel = 0; channel < labels.size(); ++channel) {
auto& samples_for_channel = expected_label_to_samples[*labels_iter];

samples_for_channel.reserve(raw_samples.size());
for (auto tick : raw_samples) {
samples_for_channel.reserve(pcm_samples.size());
for (auto tick : pcm_samples) {
samples_for_channel.push_back(tick[channel]);
}
labels_iter++;
Expand Down Expand Up @@ -1077,6 +1078,24 @@ TEST_F(DemixingModuleTest, S1ToS2Demixer) {
TestDemixing(1);
}

TEST_F(DemixingModuleTest,
DemixAudioSamplesReturnsErrorIfAudioFrameIsMissingPcmSamples) {
ConfigureAudioFrameMetadata("L2");
ConfigureAudioFrameMetadata("R2");
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kMono}, {{750}, {1500}});
ConfigureLosslessAudioFrameAndDecodedAudioFrame({kL2}, {{1000}, {2000}});
IdLabeledFrameMap unused_id_to_labeled_frame, id_to_labeled_decoded_frame;
TestCreateDemixingModule(1);
// Destroy the raw samples.
audio_frames_.back().pcm_samples = std::nullopt;

EXPECT_FALSE(demixing_module_
.DemixAudioSamples(audio_frames_, decoded_audio_frames_,
unused_id_to_labeled_frame,
id_to_labeled_decoded_frame)
.ok());
}

TEST_F(DemixingModuleTest, S2ToS3Demixer) {
// The highest layer is 3.1.2.
ConfigureAudioFrameMetadata("L3");
Expand Down
4 changes: 3 additions & 1 deletion iamf/cli/tests/obu_sequencer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ constexpr absl::string_view kOmitOutputIamfFile = "";
constexpr bool kIncludeTemporalDelimiters = true;
constexpr bool kDoNotIncludeTemporalDelimiters = false;

constexpr std::nullopt_t kOriginalSamplesAreIrrelevant = std::nullopt;

// TODO(b/302470464): Add test coverage `ObuSequencer::WriteTemporalUnit()` and
// `ObuSequencer::PickAndPlace()` configured with minimal and
// fixed-size leb generators.
Expand All @@ -81,7 +83,7 @@ void AddEmptyAudioFrameWithAudioElementIdSubstreamIdAndTimestamps(
.obu = AudioFrameObu(ObuHeader(), substream_id, {}),
.start_timestamp = start_timestamp,
.end_timestamp = end_timestamp,
.raw_samples = {},
.pcm_samples = kOriginalSamplesAreIrrelevant,
.down_mixing_params = {.in_bitstream = false},
.audio_element_with_data = &audio_elements.at(audio_element_id)});
}
Expand Down

0 comments on commit f154bd7

Please sign in to comment.