Skip to content

Commit

Permalink
Merge pull request #123 from jyrkialakuijala/tabuli
Browse files Browse the repository at this point in the history
further simplifications
  • Loading branch information
jyrkialakuijala authored Aug 9, 2024
2 parents b112037 + 577f2fb commit fdcef96
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 65 deletions.
141 changes: 82 additions & 59 deletions cpp/zimt/fourier_bank.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,56 +18,78 @@

namespace tabuli {

float Loudness(int k, float val) {
static const float pars[30][3] = {
float Loudness(float freq, float val) {
static const float pars[30][4] = {
// ~[20*1.2589254117941673**i for i in range(31)]
{ 0.635, -31.5, 78.1, }, // 20
{ 0.602, -27.2, 68.7, }, // 25
{ 0.569, -23.1, 59.5, }, // 31.5
{ 0.537, -19.3, 51.1, }, // 40
{ 0.509, -16.1, 44.0, }, // 50
{ 0.482, -13.1, 37.5, }, // 63
{ 0.456, -10.4, 31.5, }, // 80
{ 0.433, -8.2, 26.5, }, // 100
{ 0.412, -6.3, 22.1, }, // 125
{ 0.391, -4.6, 17.9, }, // 160
{ 0.373, -3.2, 14.4, }, // 200
{ 0.357, -2.1, 11.4, }, // 250
{ 0.343, -1.2, 8.6, }, // 315
{ 0.330, -0.5, 6.2, }, // 400
{ 0.320, 0.0, 4.4, }, // 500
{ 0.311, 0.4, 3.0, }, // 630
{ 0.303, 0.5, 2.2, }, // 800
{ 0.300, 0.0, 2.4, }, // 1000
{ 0.295, -2.7, 3.5, }, // 1250
{ 0.292, -4.2, 1.7, }, // 1600
{ 0.290, -1.2, -1.3, }, // 2000
{ 0.290, 1.4, -4.2, }, // 2500
{ 0.289, 2.3, -6.0, }, // 3150
{ 0.289, 1.0, -5.4, }, // 4000
{ 0.289, -2.3, -1.5, }, // 5000
{ 0.293, -7.2, 6.0, }, // 6300
{ 0.303, -11.2, 12.6, }, // 8000
{ 0.323, -10.9, 13.9, }, // 10000
{ 0.354, -3.5, 12.3, }, // 12500
{ 0.635, -31.5, 78.1, 20 },
{ 0.602, -27.2, 68.7, 25 },
{ 0.569, -23.1, 59.5, 31.5 },
{ 0.537, -19.3, 51.1, 40 },
{ 0.509, -16.1, 44.0, 50 },
{ 0.482, -13.1, 37.5, 63 },
{ 0.456, -10.4, 31.5, 80 },
{ 0.433, -8.2, 26.5, 100 },
{ 0.412, -6.3, 22.1, 125 },
{ 0.391, -4.6, 17.9, 160 },
{ 0.373, -3.2, 14.4, 200 },
{ 0.357, -2.1, 11.4, 250 },
{ 0.343, -1.2, 8.6, 315 },
{ 0.330, -0.5, 6.2, 400 },
{ 0.320, 0.0, 4.4, 500 },
{ 0.311, 0.4, 3.0, 630 },
{ 0.303, 0.5, 2.2, 800 },
{ 0.300, 0.0, 2.4, 1000 },
{ 0.295, -2.7, 3.5, 1250 },
{ 0.292, -4.2, 1.7, 1600 },
{ 0.290, -1.2, -1.3, 2000 },
{ 0.290, 1.4, -4.2, 2500 },
{ 0.289, 2.3, -6.0, 3150 },
{ 0.289, 1.0, -5.4, 4000 },
{ 0.289, -2.3, -1.5, 5000 },
{ 0.293, -7.2, 6.0, 6300 },
{ 0.303, -11.2, 12.6, 8000 },
{ 0.323, -10.9, 13.9, 10000 },
{ 0.354, -3.5, 12.3, 12500 },
};
const float *vals = &pars[k / 5][0];
static float constant1 = 48.70343225300608;
static float constant2 = 40.43827165462807;
static const float kMul = -2.7207126528654677;
val += kMul * vals[2];
val *= (constant1 + vals[1]) * (1.0 / constant2);
int low_ix = 0;
int high_ix = 0;
float interp = 0;
int start_kk = 0;
if (freq > 200) start_kk = 10;
if (freq > 2000) start_kk = 20;
for (int kk = start_kk; kk < 30; ++kk) {
if (freq >= pars[kk][3] && (kk == 29 || freq < pars[kk + 1][3])) {
low_ix = kk;
high_ix = std::min(kk + 1, 29);
if (low_ix == high_ix) {
interp = 0;
} else {
interp = (freq - pars[low_ix][3]) / (pars[high_ix][3] - pars[low_ix][3]);
}
break;
}
}
const float *vals = &pars[low_ix][0];
const float *valsNext = &pars[high_ix][0];
float vals1 = (1.0 - interp) * vals[1] + interp * valsNext[1];
float vals2 = (1.0 - interp) * vals[2] + interp * valsNext[2];
static const float constant1 = 45.82980211617118;
static const float constant2 = 80.64186299717754;
static const float kMul = 3.4191209497307895;
val *= (constant1 + vals1) * (1.0 / constant2);
val += kMul * vals2;
return val;
}

float SimpleDb(float energy) {
// ideally 78.3 db
static const float full_scale_sine_db = 77.39771094914877;
static const float full_scale_sine_db = 75.572775538058821;
static const float exp_full_scale_sine_db = exp(full_scale_sine_db);
// epsilon, but the biggest one you saw (~4.95e23)
static const float epsilon = 1.0033294789821357e-09 * exp_full_scale_sine_db;
// kMul allows faster log instead of log10 below, incorporating multiplying by 10 for decibel.
constexpr float kMul = 10.0/log(10);
static const float kMul0 = 7.7911366413917413;
static const float kMul = kMul0 / log(10);
return kMul * log(energy + epsilon);
}

Expand All @@ -85,18 +107,19 @@ void PrepareMasker(hwy::AlignedNDArray<float, 2>& channels,
0.02009898726851852,
0.27419898726851855,

-0.04009898726851849,
0.04009898726851849,
0.3270268229166667,
0.6400989872685185,

0.36397005208333333,
0.6505010127314814,
0.8000989872685186,
0.65050101273148142,
1.4265898726851856,

-0.15930101273148148,
1.5483130497685185,
8.31009898726852,
0.1,
1.2929090497685181,
8.5348843872685212,
};
static const double kMul = 0.92558468864724108;
static const float div = 1.0 / (2*(c[0]+c[1]+c[2]+c[3]+c[4]+c[5]+c[6]+c[7]+c[8])+c[9]+c[10]+c[11]);
for (int k = 0; k < kNumRotators; ++k) {
int prev3 = std::max(0, k - 3);
Expand All @@ -119,20 +142,20 @@ void PrepareMasker(hwy::AlignedNDArray<float, 2>& channels,
channels[{oi2}][next2] * c[3] + channels[{oi1}][next2] * c[4] + channels[{oi0}][next2] * c[5] +
channels[{oi2}][next3] * c[0] + channels[{oi1}][next3] * c[1] + channels[{oi0}][next3] * c[2];

masker[k] = v * div;
masker[k] = v * div * kMul;
}
}
static const double octaves_in_20_to_20000 = log(20000/20.)/log(2);
static const double octaves_per_rot =
octaves_in_20_to_20000 / float(kNumRotators - 1);
static const double masker_step_per_octave_up_0 = 20.54547806594578;
static const double masker_step_per_octave_up_1 = 24.608097753757256;
static const double masker_step_per_octave_up_2 = 6.0;
static const double masker_step_per_octave_up_0 = 20.530166862351951;
static const double masker_step_per_octave_up_1 = 24.71276102911699;
static const double masker_step_per_octave_up_2 = 1.5390804378846834;
static const double masker_step_per_rot_up_0 = octaves_per_rot * masker_step_per_octave_up_0;
static const double masker_step_per_rot_up_1 = octaves_per_rot * masker_step_per_octave_up_1;
static const double masker_step_per_rot_up_2 = octaves_per_rot * masker_step_per_octave_up_2;

static const double masker_step_per_octave_down = 53.40075984772409;
static const double masker_step_per_octave_down = 22.474862790273633;
static const double masker_step_per_rot_down = octaves_per_rot * masker_step_per_octave_down;
// propagate masker up
float mask = 0;
Expand Down Expand Up @@ -162,20 +185,20 @@ void PrepareMasker(hwy::AlignedNDArray<float, 2>& channels,
}
}

void FinalizeDb(hwy::AlignedNDArray<float, 2>& channels, float mul,
void FinalizeDb(std::vector<float> rotator_frequency,
hwy::AlignedNDArray<float, 2>& channels, float mul,
size_t out_ix) {
float masker[kNumRotators];
for (int k = 0; k < kNumRotators; ++k) {
float v = SimpleDb(mul * channels[{out_ix}][k]);
channels[{out_ix}][k] = Loudness(k, v);
channels[{out_ix}][k] = Loudness(rotator_frequency[k], v);
}
PrepareMasker(channels, &masker[0], out_ix);


static const double masker_gap = 20.716199363425925;
static const float maskingStrength = 0.22591336897956596;

static const float min_limit = -11.3968870989223;
static const double masker_gap = 21.233031313853495;
static const float maskingStrength = 0.40144654396596807;
static const float min_limit = 0;

// Scan frequencies from bottom to top, let lower frequencies to mask higher frequencies.
// 'masker' maintains the masking envelope from one bin to next.
Expand Down Expand Up @@ -203,7 +226,7 @@ void Rotators::FilterAndDownsample(hwy::Span<const float> signal,
int64_t input_ix = ii + zz;
if (input_ix >= signal.size()) {
if (out_ix < channels.shape()[0]) {
FinalizeDb(channels, scaling_for_downsampling, out_ix);
FinalizeDb(rotator_frequency, channels, scaling_for_downsampling, out_ix);
}
if (out_ix != channels.shape()[0] - 1) {
fprintf(stderr,
Expand All @@ -229,7 +252,7 @@ void Rotators::FilterAndDownsample(hwy::Span<const float> signal,
}
}
}
FinalizeDb(channels, scaling_for_downsampling, out_ix);
FinalizeDb(rotator_frequency, channels, scaling_for_downsampling, out_ix);
++out_ix;
if (out_ix >= channels.shape()[0]) {
return;
Expand Down Expand Up @@ -257,7 +280,7 @@ Rotators::Rotators(int num_channels, std::vector<float> frequency,
window[i] = std::pow(kWindow, bw * kBandwidthMagic);
float windowM1 = 1.0f - window[i];
float f = frequency[i] * 2.0f * M_PI / sample_rate;
static const float full_scale_sine_db = exp(76.66488071851488);
static const float full_scale_sine_db = exp(76.63936108268561);
const float gainer = sqrt(full_scale_sine_db);
gain[i] = gainer * filter_gains[i] * pow(windowM1, 3.0);
rot[0][i] = float(std::cos(f));
Expand Down
10 changes: 6 additions & 4 deletions cpp/zimt/nsim.cc
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,9 @@ float HwyNSIM(const hwy::AlignedNDArray<float, 2>& a,
return Mul(delta_a, delta_b);
});
const Vec two = Set(d, 2.0);
const Vec C1 = Set(d, 0.19863327786546683);
const Vec C3 = Set(d, 0.17538360286546675);
const Vec C1 = Set(d, 64.820932997704162);
const Vec C3 = Set(d, 11.285860868315538);
const Vec C4 = Set(d, 1e-6 * 2.9262932000000021);
float nsim_sum = 0.0;
const Vec num_channels_vec = Set(d, num_channels);
const Vec zero = Zero(d);
Expand All @@ -219,8 +220,9 @@ float HwyNSIM(const hwy::AlignedNDArray<float, 2>& a,
const Vec intensity = Div(
MulAdd(two, Mul(mean_a_vec, mean_b_vec), C1),
MulAdd(mean_a_vec, mean_a_vec, MulAdd(mean_b_vec, mean_b_vec, C1)));
const Vec structure =
Div(Add(cov_vec, C3), MulAdd(std_a_vec, std_b_vec, C3));
const Vec structure_base = Div(Add(cov_vec, C3), MulAdd(std_a_vec, std_b_vec, C3));
const Vec structure_clamped = IfThenElse(Lt(structure_base, zero), zero, structure_base);
const Vec structure = Sqrt(Sqrt(Add(structure_clamped, C4)));
const Vec channel_index_vec = Iota(d, channel_index);
const Vec nsim = IfThenElse(Lt(channel_index_vec, num_channels_vec),
Mul(intensity, structure), zero);
Expand Down
4 changes: 2 additions & 2 deletions cpp/zimt/zimtohrli.h
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ struct Zimtohrli {

// Sample rate corresponding to the human hearing sensitivity to timing
// differences.
float perceptual_sample_rate = 85.0;
float perceptual_sample_rate = 97.969986924466824;

// The filterbank used to separate the signal in frequency channels.
std::optional<CamFilterbank> cam_filterbank;
Expand All @@ -288,7 +288,7 @@ struct Zimtohrli {
size_t nsim_step_window = 8;

// The window in channels when computing the NSIM.
size_t nsim_channel_window = 16;
size_t nsim_channel_window = 7;

// The window of the dynamic time warp that matches audio signals.
//
Expand Down
Binary file modified go/goohrli/goohrli.a
Binary file not shown.

0 comments on commit fdcef96

Please sign in to comment.