From f154bd71538da4430bac1715514d6a80ca04ef7b Mon Sep 17 00:00:00 2001
From: jwcullen <jwcullen@google.com>
Date: Mon, 28 Oct 2024 17:11:17 -0400
Subject: [PATCH] Rename `raw_samples` to `pcm_samples` and wrap in
 `std::optional`.

  - These represent the original lossless audio which was used to encode the bitstream.
    - These mostly have functional importance for recon gain.
    - And they currently are relied on for certain tests working with lossy codecs.
    - Certain paths, like one that decodes IAMF would never know what the "original" lossless content was.
    - These also can be memory intensive, so for memory efficiency it may be useful to destroy the data after it is needed.
  - Signal in a few places where they are irrelevant by using `std::nullopt`.
  - Rename in anticipation of unifying with `DecodedAudioFrame::decoded_samples` which also could be called `pcm_samples`.

PiperOrigin-RevId: 690745328
---
 iamf/cli/audio_frame_with_data.h              |  5 +++-
 iamf/cli/codec/tests/encoder_test_base.h      |  4 +--
 iamf/cli/demixing_module.cc                   | 21 +++++++++-----
 .../cli/proto_to_obu/audio_frame_generator.cc |  2 +-
 .../tests/audio_frame_generator_test.cc       |  5 ++--
 iamf/cli/tests/demixing_module_test.cc        | 29 +++++++++++++++----
 iamf/cli/tests/obu_sequencer_test.cc          |  4 ++-
 7 files changed, 51 insertions(+), 19 deletions(-)
diff --git a/iamf/cli/audio_frame_with_data.h b/iamf/cli/audio_frame_with_data.h
index 5363278..6030006 100644
--- a/iamf/cli/audio_frame_with_data.h
+++ b/iamf/cli/audio_frame_with_data.h
@@ -14,6 +14,7 @@
 #define CLI_AUDIO_FRAME_WITH_DATA_H_
 
 #include <cstdint>
+#include <optional>
 #include <vector>
 
 #include "iamf/cli/audio_element_with_data.h"
@@ -32,7 +33,9 @@ struct AudioFrameWithData {
   int32_t end_timestamp;  // End time of this frame. Measured in ticks from the
                           // Global Timing Module.
 
-  std::vector<std::vector<int32_t>> raw_samples;
+  // The PCM samples to encode this audio frame, if known. This is useful to
+  // calculate recon gain.
+  std::optional<std::vector<std::vector<int32_t>>> pcm_samples;
 
   // Down-mixing parameters used to create this audio frame.
   DownMixingParams down_mixing_params;
diff --git a/iamf/cli/codec/tests/encoder_test_base.h b/iamf/cli/codec/tests/encoder_test_base.h
index 36e66c0..d4c53df 100644
--- a/iamf/cli/codec/tests/encoder_test_base.h
+++ b/iamf/cli/codec/tests/encoder_test_base.h
@@ -46,7 +46,7 @@ class EncoderTestBase {
     EXPECT_THAT(encoder_->Initialize(kValidateCodecDelay), IsOk());
   }
 
-  void EncodeAudioFrame(const std::vector<std::vector<int32_t>>& raw_samples,
+  void EncodeAudioFrame(const std::vector<std::vector<int32_t>>& pcm_samples,
                         bool expected_encode_frame_is_ok = true) {
     // `EncodeAudioFrame` only passes on most of the data in the input
     // `AudioFrameWithData`. Simulate the timestamp to ensure frames are
@@ -69,7 +69,7 @@ class EncoderTestBase {
 
     // Encode the frame as requested.
     EXPECT_EQ(encoder_
-                  ->EncodeAudioFrame(input_sample_size_, raw_samples,
+                  ->EncodeAudioFrame(input_sample_size_, pcm_samples,
                                      std::move(partial_audio_frame_with_data))
                   .ok(),
               expected_encode_frame_is_ok);
diff --git a/iamf/cli/demixing_module.cc b/iamf/cli/demixing_module.cc
index dc9e3ba..cbd6bc4 100644
--- a/iamf/cli/demixing_module.cc
+++ b/iamf/cli/demixing_module.cc
@@ -601,14 +601,17 @@ uint32_t GetSubstreamId(const DecodedAudioFrame& audio_frame_with_data) {
   return audio_frame_with_data.substream_id;
 }
 
-const std::vector<std::vector<int32_t>>& GetSamples(
+const std::vector<std::vector<int32_t>>* GetSamples(
     const AudioFrameWithData& audio_frame_with_data) {
-  return audio_frame_with_data.raw_samples;
+  if (!audio_frame_with_data.pcm_samples.has_value()) {
+    return nullptr;
+  }
+  return &audio_frame_with_data.pcm_samples.value();
 }
 
-const std::vector<std::vector<int32_t>>& GetSamples(
+const std::vector<std::vector<int32_t>>* GetSamples(
     const DecodedAudioFrame& audio_frame_with_data) {
-  return audio_frame_with_data.decoded_samples;
+  return &audio_frame_with_data.decoded_samples;
 }
 
 // NOOP function if the frame is not a DecodedAudioFrame.
@@ -676,8 +679,12 @@ absl::Status StoreSamplesForAudioElementId(
     const auto& labels = substream_id_labels_iter->second;
     int channel_index = 0;
     for (const auto& label : labels) {
-      const auto& input_samples = GetSamples(audio_frame);
-      const size_t num_ticks = input_samples.size();
+      const auto* input_samples = GetSamples(audio_frame);
+      if (input_samples == nullptr) {
+        return absl::InvalidArgumentError(
+            "Input samples are not available for down-mixing.");
+      }
+      const size_t num_ticks = input_samples->size();
 
       ConfigureLabeledFrame(audio_frame, labeled_frame);
 
@@ -685,7 +692,7 @@ absl::Status StoreSamplesForAudioElementId(
       samples.resize(num_ticks, 0);
       for (int t = 0; t < samples.size(); t++) {
         samples[t] =
-            static_cast<InternalSampleType>(input_samples[t][channel_index]);
+            static_cast<InternalSampleType>((*input_samples)[t][channel_index]);
       }
       channel_index++;
     }
diff --git a/iamf/cli/proto_to_obu/audio_frame_generator.cc b/iamf/cli/proto_to_obu/audio_frame_generator.cc
index 8c7e210..3c53aa4 100644
--- a/iamf/cli/proto_to_obu/audio_frame_generator.cc
+++ b/iamf/cli/proto_to_obu/audio_frame_generator.cc
@@ -503,7 +503,7 @@ absl::Status EncodeFramesForAudioElement(
                   substream_id, {}),
               .start_timestamp = start_timestamp,
               .end_timestamp = end_timestamp,
-              .raw_samples = samples_obu,
+              .pcm_samples = samples_obu,
               .down_mixing_params = down_mixing_params,
               .audio_element_with_data = &audio_element_with_data});
 
diff --git a/iamf/cli/proto_to_obu/tests/audio_frame_generator_test.cc b/iamf/cli/proto_to_obu/tests/audio_frame_generator_test.cc
index 2c10ca6..9f91b6b 100644
--- a/iamf/cli/proto_to_obu/tests/audio_frame_generator_test.cc
+++ b/iamf/cli/proto_to_obu/tests/audio_frame_generator_test.cc
@@ -1119,9 +1119,10 @@ TEST(AudioFrameGenerator, ManyFramesThreaded) {
     const InternalSampleType expected_sample = all_samples[index][kFirstSample];
     // The timestamp should count up by the number of samples in each frame.
     EXPECT_EQ(audio_frame.start_timestamp, kFrameSize * index);
-    EXPECT_DOUBLE_EQ(audio_frame.raw_samples[kFirstSample][kLeftChannel],
+    ASSERT_TRUE(audio_frame.pcm_samples.has_value());
+    EXPECT_DOUBLE_EQ((*audio_frame.pcm_samples)[kFirstSample][kLeftChannel],
                      expected_sample);
-    EXPECT_DOUBLE_EQ(audio_frame.raw_samples[kFirstSample][kRightChannel],
+    EXPECT_DOUBLE_EQ((*audio_frame.pcm_samples)[kFirstSample][kRightChannel],
                      expected_sample);
     index++;
   }
diff --git a/iamf/cli/tests/demixing_module_test.cc b/iamf/cli/tests/demixing_module_test.cc
index 226448e..534b363 100644
--- a/iamf/cli/tests/demixing_module_test.cc
+++ b/iamf/cli/tests/demixing_module_test.cc
@@ -16,6 +16,7 @@
 #include <cstdint>
 #include <iterator>
 #include <list>
+#include <optional>
 #include <utility>
 #include <vector>
 
@@ -940,7 +941,7 @@ class DemixingModuleTest : public DemixingModuleTestBase,
  public:
   void ConfigureLosslessAudioFrameAndDecodedAudioFrame(
       const std::list<ChannelLabel::Label>& labels,
-      const std::vector<std::vector<int32_t>>& raw_samples,
+      const std::vector<std::vector<int32_t>>& pcm_samples,
       DownMixingParams down_mixing_params = {
           .alpha = 1, .beta = .866, .gamma = .866, .delta = .866, .w = 0.25}) {
     // The substream ID itself does not matter. Generate a unique one.
@@ -953,7 +954,7 @@ class DemixingModuleTest : public DemixingModuleTestBase,
         .obu = AudioFrameObu(ObuHeader(), substream_id, {}),
         .start_timestamp = kStartTimestamp,
         .end_timestamp = kEndTimestamp,
-        .raw_samples = raw_samples,
+        .pcm_samples = pcm_samples,
         .down_mixing_params = down_mixing_params,
     });
 
@@ -963,7 +964,7 @@ class DemixingModuleTest : public DemixingModuleTestBase,
                           .end_timestamp = kEndTimestamp,
                           .samples_to_trim_at_end = kZeroSamplesToTrimAtEnd,
                           .samples_to_trim_at_start = kZeroSamplesToTrimAtStart,
-                          .decoded_samples = raw_samples,
+                          .decoded_samples = pcm_samples,
                           .down_mixing_params = down_mixing_params});
 
     auto& expected_label_to_samples =
@@ -975,8 +976,8 @@ class DemixingModuleTest : public DemixingModuleTestBase,
     for (int channel = 0; channel < labels.size(); ++channel) {
       auto& samples_for_channel = expected_label_to_samples[*labels_iter];
 
-      samples_for_channel.reserve(raw_samples.size());
-      for (auto tick : raw_samples) {
+      samples_for_channel.reserve(pcm_samples.size());
+      for (auto tick : pcm_samples) {
         samples_for_channel.push_back(tick[channel]);
       }
       labels_iter++;
@@ -1077,6 +1078,24 @@ TEST_F(DemixingModuleTest, S1ToS2Demixer) {
   TestDemixing(1);
 }
 
+TEST_F(DemixingModuleTest,
+       DemixAudioSamplesReturnsErrorIfAudioFrameIsMissingPcmSamples) {
+  ConfigureAudioFrameMetadata("L2");
+  ConfigureAudioFrameMetadata("R2");
+  ConfigureLosslessAudioFrameAndDecodedAudioFrame({kMono}, {{750}, {1500}});
+  ConfigureLosslessAudioFrameAndDecodedAudioFrame({kL2}, {{1000}, {2000}});
+  IdLabeledFrameMap unused_id_to_labeled_frame, id_to_labeled_decoded_frame;
+  TestCreateDemixingModule(1);
+  // Destroy the raw samples.
+  audio_frames_.back().pcm_samples = std::nullopt;
+
+  EXPECT_FALSE(demixing_module_
+                   .DemixAudioSamples(audio_frames_, decoded_audio_frames_,
+                                      unused_id_to_labeled_frame,
+                                      id_to_labeled_decoded_frame)
+                   .ok());
+}
+
 TEST_F(DemixingModuleTest, S2ToS3Demixer) {
   // The highest layer is 3.1.2.
   ConfigureAudioFrameMetadata("L3");
diff --git a/iamf/cli/tests/obu_sequencer_test.cc b/iamf/cli/tests/obu_sequencer_test.cc
index 4e4d3e3..7b2acee 100644
--- a/iamf/cli/tests/obu_sequencer_test.cc
+++ b/iamf/cli/tests/obu_sequencer_test.cc
@@ -66,6 +66,8 @@ constexpr absl::string_view kOmitOutputIamfFile = "";
 constexpr bool kIncludeTemporalDelimiters = true;
 constexpr bool kDoNotIncludeTemporalDelimiters = false;
 
+constexpr std::nullopt_t kOriginalSamplesAreIrrelevant = std::nullopt;
+
 // TODO(b/302470464): Add test coverage `ObuSequencer::WriteTemporalUnit()` and
 //                    `ObuSequencer::PickAndPlace()` configured with minimal and
 //                    fixed-size leb generators.
@@ -81,7 +83,7 @@ void AddEmptyAudioFrameWithAudioElementIdSubstreamIdAndTimestamps(
       .obu = AudioFrameObu(ObuHeader(), substream_id, {}),
       .start_timestamp = start_timestamp,
       .end_timestamp = end_timestamp,
-      .raw_samples = {},
+      .pcm_samples = kOriginalSamplesAreIrrelevant,
       .down_mixing_params = {.in_bitstream = false},
       .audio_element_with_data = &audio_elements.at(audio_element_id)});
 }