From d39c9d9a0a67055bbb116e8bc42efcb0ee6878f7 Mon Sep 17 00:00:00 2001
From: Jyrki Alakuijala <jyrki@google.com>
Date: Mon, 19 Feb 2024 11:01:26 +0100
Subject: [PATCH] unifying to sliding fft

---
 scripts/upmix.py               |   6 +-
 speaker_experiments/revolve.cc | 114 +++++++++++++++++----------------
 2 files changed, 64 insertions(+), 56 deletions(-)
diff --git a/scripts/upmix.py b/scripts/upmix.py
index 0a33fd7..26f0dcb 100644
--- a/scripts/upmix.py
+++ b/scripts/upmix.py
@@ -54,7 +54,9 @@ def run(str):
 
 #run("./build/two_to_three /tmp/down.wav /tmp/down3.wav")
 
-run("./build/angular /tmp/down-dry.wav /tmp/down-dry-angular.wav")
+#run("./build/angular /tmp/down-dry.wav /tmp/down-dry-angular.wav")
+
+run("./build/revolve /tmp/down-dry.wav /tmp/down-dry-angular.wav")
 
 # early reflections assume this bandpass filtering and reflection_volume
 # attenuation
@@ -78,7 +80,7 @@ def run(str):
 
 run("./build/driver_model /tmp/down3-20.wav /tmp/down3-mod.wav")
 
-run("sox /tmp/down3-mod.wav -b 24 /tmp/down3-norm.wav treble -4 10000 norm -40");
+run("sox /tmp/down3-mod.wav -b 24 /tmp/down3-norm.wav treble -4 10000 norm -22");
 
 run("amixer --card 2 cset numid=3,iface=MIXER,name='UMC1820 Output Playback Volume' 127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127")
 run("amixer --card 2 cset numid=1,iface=MIXER,name='UMC1820 Output Playback Switch' on,on,on,on,on,on,on,on,on,on,on,on,on,on,on,on")
diff --git a/speaker_experiments/revolve.cc b/speaker_experiments/revolve.cc
index 63156d2..e75fc10 100644
--- a/speaker_experiments/revolve.cc
+++ b/speaker_experiments/revolve.cc
@@ -26,12 +26,20 @@
 #include "absl/flags/parse.h"
 #include "absl/log/check.h"
 
+ABSL_FLAG(int, output_channels, 120, "number of output channels");
+ABSL_FLAG(double, distance_to_interval_ratio, 40,
+          "ratio of (distance between microphone and source array) / (distance "
+          "between each source); default = 40cm / 10cm = 4");
+
+
 namespace {
 
 constexpr int kSubSourcePrecision = 1000;
 
+float SquaredNorm(const std::complex<double> c) { return std::norm(c); }
+
 double MicrophoneResponse(const double angle) {
-  return 0.5f * (1.f + std::cos(angle));
+  return 0.5f * (1.25f + std::cos(angle));
 }
 
 double ExpectedLeftToRightRatio(const double angle) {
@@ -39,48 +47,27 @@ double ExpectedLeftToRightRatio(const double angle) {
          (1e-3 + MicrophoneResponse(angle - M_PI / 4));
 }
 
-double ActualLeftToRightRatio(const double squared_left,
-                              const double squared_right) {
-  return std::sqrt((1e-13 + squared_left) / (1e-13 + squared_right));
+float ActualLeftToRightRatio(const std::complex<double> left,
+                             const std::complex<double> right) {
+  return std::sqrt((1e-3 + SquaredNorm(left)) / (1e-3 + SquaredNorm(right)));
 }
 
 double FindMedian3xLeaker(double window) {
-  // return -2.58/log(window);  // Faster approximate function.
-  double half_way_to_total_sum = 1e300;
-  for (int k = 0; k < 2; ++k) {
-    double sum = 0;
-    double v0 = 1;
-    double v1 = 0;
-    double v2 = 0;
-    double prev_v2 = 0;
-    for (int i = 0; i < 65000; ++i) {
-      v1 += v0;
-      v2 += v1;
-      v0 *= window;
-      v1 *= window;
-      v2 *= window;
-      sum += v2;
-      prev_v2 = v2;
-      if (sum >= half_way_to_total_sum) {
-        return i;
-      }
-    }
-    half_way_to_total_sum = 0.45 * sum;  // Sounds better when not quite half.
-  }
-  return 65000;
+  return -2.32/log(window);  // Faster approximate function.
 }
 
+constexpr int64_t kNumRotators = 128;
+
 struct Rotator {
   std::complex<double> rot[4] = {{1, 0}, 0};
-  double window = 0.99963;  // at 200 Hz.
+  double window = std::pow(0.9996, 128.0/kNumRotators);  // at 40 Hz.
   double windowM1 = 1 - window;
   std::complex<double> exp_mia;
-  int ix = 0;
-  double advance = 0;
+  int advance = 0;
 
   Rotator(double frequency, const double sample_rate) {
-    window = pow(window, std::max(1.0, frequency / 200.0));
-    advance = 40000 - FindMedian3xLeaker(window);
+    window = pow(window, std::max(1.0, frequency / 40.0));
+    advance = 65000 - FindMedian3xLeaker(window);
     if (advance < 1) {
       advance = 1;
     }
@@ -88,7 +75,6 @@ struct Rotator {
       advance = 0xfff0;
     }
     windowM1 = 1.0 - window;
-    windowM1 = windowM1 * windowM1 * windowM1;
     frequency *= 2 * M_PI / sample_rate;
     exp_mia = {std::cos(frequency), -std::sin(frequency)};
   }
@@ -99,33 +85,35 @@ struct Rotator {
     rot[2] *= window;
     rot[3] *= window;
 
-    rot[1] += audio * rot[0];
-    rot[2] += rot[1];
+    audio *= 0.1;
+    rot[1] += windowM1 * audio * rot[0];
+    rot[2] += windowM1 * rot[1];
     rot[3] += windowM1 * rot[2];
-    ix++;
   }
   double GetSample() const {
     return rot[0].real() * rot[3].real() + rot[0].imag() * rot[3].imag();
   }
-
   double SquaredAmplitude() const { return std::norm(rot[3]); }
 };
 
 double BarkFreq(double v) {
-  constexpr double linlogsplit = 0.165;
+  constexpr double linlogsplit = 0.1;
   if (v < linlogsplit) {
-    return 20.0 + (v / linlogsplit) * 180.0;  // Linear 20-200 Hz.
+    return 20.0 + (v / linlogsplit) * 20.0;  // Linear 20-40 Hz.
   } else {
     float normalized_v = (v - linlogsplit) * (1.0 / (1.0 - linlogsplit));
-    return 200.0 * pow(100.0, normalized_v);  // Logarithmic 200-20000 Hz.
+    return 40.0 * pow(500.0, normalized_v);  // Logarithmic 40-20000 Hz.
   }
 }
 
-static constexpr int64_t kBlockSize = 16384;
-static const int kHistorySize = (1 << 17);
+
+static constexpr int64_t kBlockSize = 32768;
+static const int kHistorySize = (1 << 18);
 static const int kHistoryMask = kHistorySize - 1;
 
 class TaskExecutor {
+  std::vector<float> speaker_to_ratio_table;
+
  public:
   TaskExecutor(size_t num_threads, size_t output_channels)
       : thread_outputs_(num_threads) {
@@ -133,6 +121,17 @@ class TaskExecutor {
     for (std::vector<double>& output : thread_outputs_) {
       output.resize(output_channels * kBlockSize, 0.f);
     }
+    const double distance_to_interval_ratio =
+        absl::GetFlag(FLAGS_distance_to_interval_ratio);
+    speaker_to_ratio_table.reserve(kSubSourcePrecision * (output_channels - 1) +
+                                   1);
+    for (int i = 0; i < kSubSourcePrecision * (output_channels - 1) + 1; ++i) {
+      const float x_div_interval = static_cast<float>(i) / kSubSourcePrecision -
+                                   0.5f * (output_channels - 1);
+      const float x_div_distance = x_div_interval / distance_to_interval_ratio;
+      const float angle = std::atan(x_div_distance);
+      speaker_to_ratio_table.push_back(ExpectedLeftToRightRatio(angle));
+    }
   }
 
   void Execute(size_t num_tasks, size_t read, size_t total,
@@ -147,6 +146,7 @@ class TaskExecutor {
     std::vector<std::future<void>> futures;
     size_t num_threads = thread_outputs_.size();
     futures.reserve(num_threads);
+
     for (size_t i = 0; i < num_threads; ++i) {
       futures.push_back(
           std::async(std::launch::async, &TaskExecutor::Run, this, i));
@@ -167,14 +167,25 @@ class TaskExecutor {
         rot_left_[my_task].Increment(delayed_r);
         rot_right_[my_task].Increment(delayed_g);
 
-        double left = rot_left_[my_task].SquaredAmplitude(),
-               right = rot_right_[my_task].SquaredAmplitude();
+        const float ratio =
+            ActualLeftToRightRatio(rot_left_[my_task].rot[3],
+                                   rot_right_[my_task].rot[3]);
+        const int subspeaker_index =
+            std::lower_bound(speaker_to_ratio_table.begin(),
+                             speaker_to_ratio_table.end(), ratio,
+                             std::greater<>()) -
+            speaker_to_ratio_table.begin();
+
 
-        left = sqrt(left);
-        right = sqrt(right);
-        double index = 0.5 * ((output_channels_ - 1) +
-                              1.0 * (output_channels_ - 1) * (left - right) /
-                                  (left + right + 1e-10));
+        float distance_from_center = (subspeaker_index - 0.5 * (output_channels_ - 1));
+        float assumed_distance_to_line = 0.75 * (output_channels_ - 1);
+        float distance_to_virtual = sqrt(distance_from_center * distance_from_center +
+                                         assumed_distance_to_line * assumed_distance_to_line);
+        float dist_ratio = distance_to_virtual * (1.0f / assumed_distance_to_line);
+        float amp = dist_ratio * dist_ratio;
+
+        float index =
+            static_cast<float>(subspeaker_index) / kSubSourcePrecision;
 
         if (index < 0) {
           index = 0;
@@ -271,11 +282,6 @@ void Process(
 
 }  // namespace
 
-ABSL_FLAG(int, output_channels, 12, "number of output channels");
-ABSL_FLAG(double, distance_to_interval_ratio, 4,
-          "ratio of (distance between microphone and source array) / (distance "
-          "between each source); default = 40cm / 10cm = 4");
-
 int main(int argc, char** argv) {
   absl::ParseCommandLine(argc, argv);