Merge pull request #123 from jyrkialakuijala/tabuli

further simplifications
google · Aug 9, 2024 · fdcef96 · fdcef96
2 parents b112037 + 577f2fb
commit fdcef96
Show file tree

Hide file tree

Showing 4 changed files with 90 additions and 65 deletions.
diff --git a/cpp/zimt/fourier_bank.cc b/cpp/zimt/fourier_bank.cc
@@ -18,56 +18,78 @@
 
 namespace tabuli {
 
-float Loudness(int k, float val) {
-  static const float pars[30][3] = {
+float Loudness(float freq, float val) {
+  static const float pars[30][4] = {
     // ~[20*1.2589254117941673**i for i in range(31)]
-    { 0.635, -31.5, 78.1, }, // 20
-    { 0.602, -27.2, 68.7, }, // 25
-    { 0.569, -23.1, 59.5, }, // 31.5
-    { 0.537, -19.3, 51.1, }, // 40
-    { 0.509, -16.1, 44.0, }, // 50
-    { 0.482, -13.1, 37.5, }, // 63
-    { 0.456, -10.4, 31.5, }, // 80
-    { 0.433, -8.2, 26.5, }, // 100
-    { 0.412, -6.3, 22.1, }, // 125
-    { 0.391, -4.6, 17.9, }, // 160
-    { 0.373, -3.2, 14.4, }, // 200
-    { 0.357, -2.1, 11.4, }, // 250
-    { 0.343, -1.2, 8.6, }, // 315
-    { 0.330, -0.5, 6.2, }, // 400
-    { 0.320, 0.0, 4.4, }, // 500
-    { 0.311, 0.4, 3.0, }, // 630
-    { 0.303, 0.5, 2.2, }, // 800
-    { 0.300, 0.0, 2.4, }, // 1000
-    { 0.295, -2.7, 3.5, }, // 1250
-    { 0.292, -4.2, 1.7, }, // 1600
-    { 0.290, -1.2, -1.3, }, // 2000
-    { 0.290, 1.4, -4.2, }, // 2500
-    { 0.289, 2.3, -6.0, }, // 3150
-    { 0.289, 1.0, -5.4, }, // 4000
-    { 0.289, -2.3, -1.5, }, // 5000
-    { 0.293, -7.2, 6.0, }, // 6300
-    { 0.303, -11.2, 12.6, }, // 8000
-    { 0.323, -10.9, 13.9, }, // 10000
-    { 0.354, -3.5, 12.3, }, // 12500
+    { 0.635, -31.5, 78.1, 20 },
+    { 0.602, -27.2, 68.7, 25 },
+    { 0.569, -23.1, 59.5, 31.5 },
+    { 0.537, -19.3, 51.1, 40 },
+    { 0.509, -16.1, 44.0, 50 },
+    { 0.482, -13.1, 37.5, 63 },
+    { 0.456, -10.4, 31.5, 80 },
+    { 0.433, -8.2, 26.5, 100 },
+    { 0.412, -6.3, 22.1, 125 },
+    { 0.391, -4.6, 17.9, 160 },
+    { 0.373, -3.2, 14.4, 200 },
+    { 0.357, -2.1, 11.4, 250 },
+    { 0.343, -1.2, 8.6, 315 },
+    { 0.330, -0.5, 6.2, 400 },
+    { 0.320, 0.0, 4.4, 500 },
+    { 0.311, 0.4, 3.0, 630 },
+    { 0.303, 0.5, 2.2, 800 },
+    { 0.300, 0.0, 2.4, 1000 },
+    { 0.295, -2.7, 3.5, 1250 },
+    { 0.292, -4.2, 1.7, 1600 },
+    { 0.290, -1.2, -1.3, 2000 },
+    { 0.290, 1.4, -4.2, 2500 },
+    { 0.289, 2.3, -6.0, 3150 },
+    { 0.289, 1.0, -5.4, 4000 },
+    { 0.289, -2.3, -1.5, 5000 },
+    { 0.293, -7.2, 6.0, 6300 },
+    { 0.303, -11.2, 12.6, 8000 },
+    { 0.323, -10.9, 13.9, 10000 },
+    { 0.354, -3.5, 12.3, 12500 },
   };
-  const float *vals = &pars[k / 5][0];
-  static float constant1 = 48.70343225300608;
-  static float constant2 = 40.43827165462807;
-  static const float kMul = -2.7207126528654677;
-  val += kMul * vals[2];
-  val *= (constant1 + vals[1]) * (1.0 / constant2);
+  int low_ix = 0;
+  int high_ix = 0;
+  float interp = 0;
+  int start_kk = 0;
+  if (freq > 200) start_kk = 10;
+  if (freq > 2000) start_kk = 20;
+  for (int kk = start_kk; kk < 30; ++kk) {
+    if (freq >= pars[kk][3] && (kk == 29 || freq < pars[kk + 1][3])) {
+      low_ix = kk;
+      high_ix = std::min(kk + 1, 29);
+      if (low_ix == high_ix) {
+        interp = 0;
+      } else {
+        interp = (freq - pars[low_ix][3]) / (pars[high_ix][3] - pars[low_ix][3]);
+      }
+      break;
+    }
+  }
+  const float *vals = &pars[low_ix][0];
+  const float *valsNext = &pars[high_ix][0];
+  float vals1 = (1.0 - interp) * vals[1] + interp * valsNext[1];
+  float vals2 = (1.0 - interp) * vals[2] + interp * valsNext[2];
+  static const float constant1 = 45.82980211617118;
+  static const float constant2 = 80.64186299717754;
+  static const float kMul = 3.4191209497307895;
+  val *= (constant1 + vals1) * (1.0 / constant2);
+  val += kMul * vals2;
   return val;
 }
 
 float SimpleDb(float energy) {
   // ideally 78.3 db
-  static const float full_scale_sine_db = 77.39771094914877;
+  static const float full_scale_sine_db = 75.572775538058821;
   static const float exp_full_scale_sine_db = exp(full_scale_sine_db);
   // epsilon, but the biggest one you saw (~4.95e23)
   static const float epsilon = 1.0033294789821357e-09 * exp_full_scale_sine_db;
   // kMul allows faster log instead of log10 below, incorporating multiplying by 10 for decibel.
-  constexpr float kMul = 10.0/log(10);
+  static const float kMul0 = 7.7911366413917413;
+  static const float kMul = kMul0 / log(10);
   return kMul * log(energy + epsilon);
 }
 
@@ -85,18 +107,19 @@ void PrepareMasker(hwy::AlignedNDArray<float, 2>& channels,
       0.02009898726851852,
       0.27419898726851855,
 
-      -0.04009898726851849,
+      0.04009898726851849,
       0.3270268229166667,
       0.6400989872685185,
 
       0.36397005208333333,
-      0.6505010127314814,
-      0.8000989872685186,
+      0.65050101273148142,
+      1.4265898726851856,
 
-      -0.15930101273148148,
-      1.5483130497685185,
-      8.31009898726852,
+      0.1,
+      1.2929090497685181,
+      8.5348843872685212,
     };
+    static const double kMul = 0.92558468864724108;
     static const float div = 1.0 / (2*(c[0]+c[1]+c[2]+c[3]+c[4]+c[5]+c[6]+c[7]+c[8])+c[9]+c[10]+c[11]);
     for (int k = 0; k < kNumRotators; ++k) {
       int prev3 = std::max(0, k - 3);
@@ -119,20 +142,20 @@ void PrepareMasker(hwy::AlignedNDArray<float, 2>& channels,
           channels[{oi2}][next2] * c[3] + channels[{oi1}][next2] * c[4] + channels[{oi0}][next2] * c[5] +
           channels[{oi2}][next3] * c[0] + channels[{oi1}][next3] * c[1] + channels[{oi0}][next3] * c[2];
 
-      masker[k] = v * div;
+      masker[k] = v * div * kMul;
     }
   }
   static const double octaves_in_20_to_20000 = log(20000/20.)/log(2);
   static const double octaves_per_rot =
       octaves_in_20_to_20000 / float(kNumRotators - 1);
-  static const double masker_step_per_octave_up_0 = 20.54547806594578;
-  static const double masker_step_per_octave_up_1 = 24.608097753757256;
-  static const double masker_step_per_octave_up_2 = 6.0;
+  static const double masker_step_per_octave_up_0 = 20.530166862351951;
+  static const double masker_step_per_octave_up_1 = 24.71276102911699;
+  static const double masker_step_per_octave_up_2 = 1.5390804378846834;
   static const double masker_step_per_rot_up_0 = octaves_per_rot * masker_step_per_octave_up_0;
   static const double masker_step_per_rot_up_1 = octaves_per_rot * masker_step_per_octave_up_1;
   static const double masker_step_per_rot_up_2 = octaves_per_rot * masker_step_per_octave_up_2;
 
-  static const double masker_step_per_octave_down = 53.40075984772409;
+  static const double masker_step_per_octave_down = 22.474862790273633;
   static const double masker_step_per_rot_down = octaves_per_rot * masker_step_per_octave_down;
   // propagate masker up
   float mask = 0;
@@ -162,20 +185,20 @@ void PrepareMasker(hwy::AlignedNDArray<float, 2>& channels,
   }
 }
 
-void FinalizeDb(hwy::AlignedNDArray<float, 2>& channels, float mul,
+void FinalizeDb(std::vector<float> rotator_frequency,
+                hwy::AlignedNDArray<float, 2>& channels, float mul,
                 size_t out_ix) {
   float masker[kNumRotators];
   for (int k = 0; k < kNumRotators; ++k) {
     float v = SimpleDb(mul * channels[{out_ix}][k]);
-    channels[{out_ix}][k] = Loudness(k, v);
+    channels[{out_ix}][k] = Loudness(rotator_frequency[k], v);
   }
   PrepareMasker(channels, &masker[0], out_ix);
 
 
-  static const double masker_gap = 20.716199363425925;
-  static const float maskingStrength = 0.22591336897956596;
-
-  static const float min_limit = -11.3968870989223;
+  static const double masker_gap = 21.233031313853495;
+  static const float maskingStrength = 0.40144654396596807;
+  static const float min_limit = 0;
 
   // Scan frequencies from bottom to top, let lower frequencies to mask higher frequencies.
   // 'masker' maintains the masking envelope from one bin to next.
@@ -203,7 +226,7 @@ void Rotators::FilterAndDownsample(hwy::Span<const float> signal,
       int64_t input_ix = ii + zz;
       if (input_ix >= signal.size()) {
         if (out_ix < channels.shape()[0]) {
-          FinalizeDb(channels, scaling_for_downsampling, out_ix);
+          FinalizeDb(rotator_frequency, channels, scaling_for_downsampling, out_ix);
         }
         if (out_ix != channels.shape()[0] - 1) {
           fprintf(stderr,
@@ -229,7 +252,7 @@ void Rotators::FilterAndDownsample(hwy::Span<const float> signal,
         }
       }
     }
-    FinalizeDb(channels, scaling_for_downsampling, out_ix);
+    FinalizeDb(rotator_frequency, channels, scaling_for_downsampling, out_ix);
     ++out_ix;
     if (out_ix >= channels.shape()[0]) {
       return;
@@ -257,7 +280,7 @@ Rotators::Rotators(int num_channels, std::vector<float> frequency,
     window[i] = std::pow(kWindow, bw * kBandwidthMagic);
     float windowM1 = 1.0f - window[i];
     float f = frequency[i] * 2.0f * M_PI / sample_rate;
-    static const float full_scale_sine_db = exp(76.66488071851488);
+    static const float full_scale_sine_db = exp(76.63936108268561);
     const float gainer = sqrt(full_scale_sine_db);
     gain[i] = gainer * filter_gains[i] * pow(windowM1, 3.0);
     rot[0][i] = float(std::cos(f));

diff --git a/cpp/zimt/nsim.cc b/cpp/zimt/nsim.cc
@@ -199,8 +199,9 @@ float HwyNSIM(const hwy::AlignedNDArray<float, 2>& a,
         return Mul(delta_a, delta_b);
       });
   const Vec two = Set(d, 2.0);
-  const Vec C1 = Set(d, 0.19863327786546683);
-  const Vec C3 = Set(d, 0.17538360286546675);
+  const Vec C1 = Set(d, 64.820932997704162);
+  const Vec C3 = Set(d, 11.285860868315538);
+  const Vec C4 = Set(d, 1e-6 * 2.9262932000000021);
   float nsim_sum = 0.0;
   const Vec num_channels_vec = Set(d, num_channels);
   const Vec zero = Zero(d);
@@ -219,8 +220,9 @@ float HwyNSIM(const hwy::AlignedNDArray<float, 2>& a,
       const Vec intensity = Div(
           MulAdd(two, Mul(mean_a_vec, mean_b_vec), C1),
           MulAdd(mean_a_vec, mean_a_vec, MulAdd(mean_b_vec, mean_b_vec, C1)));
-      const Vec structure =
-          Div(Add(cov_vec, C3), MulAdd(std_a_vec, std_b_vec, C3));
+      const Vec structure_base = Div(Add(cov_vec, C3), MulAdd(std_a_vec, std_b_vec, C3));
+      const Vec structure_clamped = IfThenElse(Lt(structure_base, zero), zero, structure_base);
+      const Vec structure = Sqrt(Sqrt(Add(structure_clamped, C4)));
       const Vec channel_index_vec = Iota(d, channel_index);
       const Vec nsim = IfThenElse(Lt(channel_index_vec, num_channels_vec),
                                   Mul(intensity, structure), zero);

diff --git a/cpp/zimt/zimtohrli.h b/cpp/zimt/zimtohrli.h
@@ -279,7 +279,7 @@ struct Zimtohrli {
 
   // Sample rate corresponding to the human hearing sensitivity to timing
   // differences.
-  float perceptual_sample_rate = 85.0;
+  float perceptual_sample_rate = 97.969986924466824;
 
   // The filterbank used to separate the signal in frequency channels.
   std::optional<CamFilterbank> cam_filterbank;
@@ -288,7 +288,7 @@ struct Zimtohrli {
   size_t nsim_step_window = 8;
 
   // The window in channels when computing the NSIM.
-  size_t nsim_channel_window = 16;
+  size_t nsim_channel_window = 7;
 
   // The window of the dynamic time warp that matches audio signals.
   //

diff --git a/go/goohrli/goohrli.a b/go/goohrli/goohrli.a