From f154bd71538da4430bac1715514d6a80ca04ef7b Mon Sep 17 00:00:00 2001 From: jwcullen Date: Mon, 28 Oct 2024 17:11:17 -0400 Subject: [PATCH] Rename `raw_samples` to `pcm_samples` and wrap in `std::optional`. - These represent the original lossless audio which was used to encode the bitstream. - These mostly have functional importance for recon gain. - And they currently are relied on for certain tests working with lossy codecs. - Certain paths, like one that decodes IAMF would never know what the "original" lossless content was. - These also can be memory intensive, so for memory efficiency it may be useful to destroy the data after it is needed. - Signal in a few places where they are irrelevant by using `std::nullopt`. - Rename in anticipation of unifying with `DecodedAudioFrame::decoded_samples` which also could be called `pcm_samples`. PiperOrigin-RevId: 690745328 --- iamf/cli/audio_frame_with_data.h | 5 +++- iamf/cli/codec/tests/encoder_test_base.h | 4 +-- iamf/cli/demixing_module.cc | 21 +++++++++----- .../cli/proto_to_obu/audio_frame_generator.cc | 2 +- .../tests/audio_frame_generator_test.cc | 5 ++-- iamf/cli/tests/demixing_module_test.cc | 29 +++++++++++++++---- iamf/cli/tests/obu_sequencer_test.cc | 4 ++- 7 files changed, 51 insertions(+), 19 deletions(-) diff --git a/iamf/cli/audio_frame_with_data.h b/iamf/cli/audio_frame_with_data.h index 5363278..6030006 100644 --- a/iamf/cli/audio_frame_with_data.h +++ b/iamf/cli/audio_frame_with_data.h @@ -14,6 +14,7 @@ #define CLI_AUDIO_FRAME_WITH_DATA_H_ #include +#include #include #include "iamf/cli/audio_element_with_data.h" @@ -32,7 +33,9 @@ struct AudioFrameWithData { int32_t end_timestamp; // End time of this frame. Measured in ticks from the // Global Timing Module. - std::vector> raw_samples; + // The PCM samples to encode this audio frame, if known. This is useful to + // calculate recon gain. + std::optional>> pcm_samples; // Down-mixing parameters used to create this audio frame. DownMixingParams down_mixing_params; diff --git a/iamf/cli/codec/tests/encoder_test_base.h b/iamf/cli/codec/tests/encoder_test_base.h index 36e66c0..d4c53df 100644 --- a/iamf/cli/codec/tests/encoder_test_base.h +++ b/iamf/cli/codec/tests/encoder_test_base.h @@ -46,7 +46,7 @@ class EncoderTestBase { EXPECT_THAT(encoder_->Initialize(kValidateCodecDelay), IsOk()); } - void EncodeAudioFrame(const std::vector>& raw_samples, + void EncodeAudioFrame(const std::vector>& pcm_samples, bool expected_encode_frame_is_ok = true) { // `EncodeAudioFrame` only passes on most of the data in the input // `AudioFrameWithData`. Simulate the timestamp to ensure frames are @@ -69,7 +69,7 @@ class EncoderTestBase { // Encode the frame as requested. EXPECT_EQ(encoder_ - ->EncodeAudioFrame(input_sample_size_, raw_samples, + ->EncodeAudioFrame(input_sample_size_, pcm_samples, std::move(partial_audio_frame_with_data)) .ok(), expected_encode_frame_is_ok); diff --git a/iamf/cli/demixing_module.cc b/iamf/cli/demixing_module.cc index dc9e3ba..cbd6bc4 100644 --- a/iamf/cli/demixing_module.cc +++ b/iamf/cli/demixing_module.cc @@ -601,14 +601,17 @@ uint32_t GetSubstreamId(const DecodedAudioFrame& audio_frame_with_data) { return audio_frame_with_data.substream_id; } -const std::vector>& GetSamples( +const std::vector>* GetSamples( const AudioFrameWithData& audio_frame_with_data) { - return audio_frame_with_data.raw_samples; + if (!audio_frame_with_data.pcm_samples.has_value()) { + return nullptr; + } + return &audio_frame_with_data.pcm_samples.value(); } -const std::vector>& GetSamples( +const std::vector>* GetSamples( const DecodedAudioFrame& audio_frame_with_data) { - return audio_frame_with_data.decoded_samples; + return &audio_frame_with_data.decoded_samples; } // NOOP function if the frame is not a DecodedAudioFrame. @@ -676,8 +679,12 @@ absl::Status StoreSamplesForAudioElementId( const auto& labels = substream_id_labels_iter->second; int channel_index = 0; for (const auto& label : labels) { - const auto& input_samples = GetSamples(audio_frame); - const size_t num_ticks = input_samples.size(); + const auto* input_samples = GetSamples(audio_frame); + if (input_samples == nullptr) { + return absl::InvalidArgumentError( + "Input samples are not available for down-mixing."); + } + const size_t num_ticks = input_samples->size(); ConfigureLabeledFrame(audio_frame, labeled_frame); @@ -685,7 +692,7 @@ absl::Status StoreSamplesForAudioElementId( samples.resize(num_ticks, 0); for (int t = 0; t < samples.size(); t++) { samples[t] = - static_cast(input_samples[t][channel_index]); + static_cast((*input_samples)[t][channel_index]); } channel_index++; } diff --git a/iamf/cli/proto_to_obu/audio_frame_generator.cc b/iamf/cli/proto_to_obu/audio_frame_generator.cc index 8c7e210..3c53aa4 100644 --- a/iamf/cli/proto_to_obu/audio_frame_generator.cc +++ b/iamf/cli/proto_to_obu/audio_frame_generator.cc @@ -503,7 +503,7 @@ absl::Status EncodeFramesForAudioElement( substream_id, {}), .start_timestamp = start_timestamp, .end_timestamp = end_timestamp, - .raw_samples = samples_obu, + .pcm_samples = samples_obu, .down_mixing_params = down_mixing_params, .audio_element_with_data = &audio_element_with_data}); diff --git a/iamf/cli/proto_to_obu/tests/audio_frame_generator_test.cc b/iamf/cli/proto_to_obu/tests/audio_frame_generator_test.cc index 2c10ca6..9f91b6b 100644 --- a/iamf/cli/proto_to_obu/tests/audio_frame_generator_test.cc +++ b/iamf/cli/proto_to_obu/tests/audio_frame_generator_test.cc @@ -1119,9 +1119,10 @@ TEST(AudioFrameGenerator, ManyFramesThreaded) { const InternalSampleType expected_sample = all_samples[index][kFirstSample]; // The timestamp should count up by the number of samples in each frame. EXPECT_EQ(audio_frame.start_timestamp, kFrameSize * index); - EXPECT_DOUBLE_EQ(audio_frame.raw_samples[kFirstSample][kLeftChannel], + ASSERT_TRUE(audio_frame.pcm_samples.has_value()); + EXPECT_DOUBLE_EQ((*audio_frame.pcm_samples)[kFirstSample][kLeftChannel], expected_sample); - EXPECT_DOUBLE_EQ(audio_frame.raw_samples[kFirstSample][kRightChannel], + EXPECT_DOUBLE_EQ((*audio_frame.pcm_samples)[kFirstSample][kRightChannel], expected_sample); index++; } diff --git a/iamf/cli/tests/demixing_module_test.cc b/iamf/cli/tests/demixing_module_test.cc index 226448e..534b363 100644 --- a/iamf/cli/tests/demixing_module_test.cc +++ b/iamf/cli/tests/demixing_module_test.cc @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -940,7 +941,7 @@ class DemixingModuleTest : public DemixingModuleTestBase, public: void ConfigureLosslessAudioFrameAndDecodedAudioFrame( const std::list& labels, - const std::vector>& raw_samples, + const std::vector>& pcm_samples, DownMixingParams down_mixing_params = { .alpha = 1, .beta = .866, .gamma = .866, .delta = .866, .w = 0.25}) { // The substream ID itself does not matter. Generate a unique one. @@ -953,7 +954,7 @@ class DemixingModuleTest : public DemixingModuleTestBase, .obu = AudioFrameObu(ObuHeader(), substream_id, {}), .start_timestamp = kStartTimestamp, .end_timestamp = kEndTimestamp, - .raw_samples = raw_samples, + .pcm_samples = pcm_samples, .down_mixing_params = down_mixing_params, }); @@ -963,7 +964,7 @@ class DemixingModuleTest : public DemixingModuleTestBase, .end_timestamp = kEndTimestamp, .samples_to_trim_at_end = kZeroSamplesToTrimAtEnd, .samples_to_trim_at_start = kZeroSamplesToTrimAtStart, - .decoded_samples = raw_samples, + .decoded_samples = pcm_samples, .down_mixing_params = down_mixing_params}); auto& expected_label_to_samples = @@ -975,8 +976,8 @@ class DemixingModuleTest : public DemixingModuleTestBase, for (int channel = 0; channel < labels.size(); ++channel) { auto& samples_for_channel = expected_label_to_samples[*labels_iter]; - samples_for_channel.reserve(raw_samples.size()); - for (auto tick : raw_samples) { + samples_for_channel.reserve(pcm_samples.size()); + for (auto tick : pcm_samples) { samples_for_channel.push_back(tick[channel]); } labels_iter++; @@ -1077,6 +1078,24 @@ TEST_F(DemixingModuleTest, S1ToS2Demixer) { TestDemixing(1); } +TEST_F(DemixingModuleTest, + DemixAudioSamplesReturnsErrorIfAudioFrameIsMissingPcmSamples) { + ConfigureAudioFrameMetadata("L2"); + ConfigureAudioFrameMetadata("R2"); + ConfigureLosslessAudioFrameAndDecodedAudioFrame({kMono}, {{750}, {1500}}); + ConfigureLosslessAudioFrameAndDecodedAudioFrame({kL2}, {{1000}, {2000}}); + IdLabeledFrameMap unused_id_to_labeled_frame, id_to_labeled_decoded_frame; + TestCreateDemixingModule(1); + // Destroy the raw samples. + audio_frames_.back().pcm_samples = std::nullopt; + + EXPECT_FALSE(demixing_module_ + .DemixAudioSamples(audio_frames_, decoded_audio_frames_, + unused_id_to_labeled_frame, + id_to_labeled_decoded_frame) + .ok()); +} + TEST_F(DemixingModuleTest, S2ToS3Demixer) { // The highest layer is 3.1.2. ConfigureAudioFrameMetadata("L3"); diff --git a/iamf/cli/tests/obu_sequencer_test.cc b/iamf/cli/tests/obu_sequencer_test.cc index 4e4d3e3..7b2acee 100644 --- a/iamf/cli/tests/obu_sequencer_test.cc +++ b/iamf/cli/tests/obu_sequencer_test.cc @@ -66,6 +66,8 @@ constexpr absl::string_view kOmitOutputIamfFile = ""; constexpr bool kIncludeTemporalDelimiters = true; constexpr bool kDoNotIncludeTemporalDelimiters = false; +constexpr std::nullopt_t kOriginalSamplesAreIrrelevant = std::nullopt; + // TODO(b/302470464): Add test coverage `ObuSequencer::WriteTemporalUnit()` and // `ObuSequencer::PickAndPlace()` configured with minimal and // fixed-size leb generators. @@ -81,7 +83,7 @@ void AddEmptyAudioFrameWithAudioElementIdSubstreamIdAndTimestamps( .obu = AudioFrameObu(ObuHeader(), substream_id, {}), .start_timestamp = start_timestamp, .end_timestamp = end_timestamp, - .raw_samples = {}, + .pcm_samples = kOriginalSamplesAreIrrelevant, .down_mixing_params = {.in_bitstream = false}, .audio_element_with_data = &audio_elements.at(audio_element_id)}); }