From 433ca99e18040133aea32fef996051a953a3e6b7 Mon Sep 17 00:00:00 2001 From: Martin Bruse Date: Tue, 14 May 2024 14:56:03 +0200 Subject: [PATCH] Split identity sliding FFT into a fourier bank library. --- CMakeLists.txt | 12 +- speaker_experiments/angular.cc | 16 +- speaker_experiments/audio_closed_loop | 24 +- speaker_experiments/audio_diff.cc | 25 +- speaker_experiments/emphasizer.cc | 9 +- speaker_experiments/fourier_bank.cc | 281 +++++++++++++++ speaker_experiments/fourier_bank.h | 109 ++++++ speaker_experiments/identity_sliding_fft.cc | 375 ++------------------ speaker_experiments/revolve.cc | 206 +++++------ 9 files changed, 552 insertions(+), 505 deletions(-) mode change 100755 => 100644 speaker_experiments/audio_closed_loop create mode 100644 speaker_experiments/fourier_bank.cc create mode 100644 speaker_experiments/fourier_bank.h diff --git a/CMakeLists.txt b/CMakeLists.txt index a219048..7de21d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,11 +13,21 @@ add_subdirectory(third_party/highway EXCLUDE_FROM_ALL) add_executable(driver_model driver_model/driver_model.cc) target_link_libraries(driver_model PkgConfig::SndFile) +add_library(fourier_bank speaker_experiments/fourier_bank.h speaker_experiments/fourier_bank.cc) +target_link_libraries(fourier_bank + PkgConfig::SndFile + absl::flags + absl::flags_parse + absl::log + absl::log_internal_check_impl +) + foreach (experiment IN ITEMS angular emphasizer revolve spectrum_similarity two_to_three virtual_speakers identity_sliding_fft audio_diff) add_executable(${experiment} speaker_experiments/${experiment}.cc) - target_link_libraries(${experiment} PkgConfig::SndFile absl::flags absl::flags_parse absl::log absl::log_internal_check_impl) + target_link_libraries(${experiment} PkgConfig::SndFile absl::flags absl::flags_parse absl::log absl::log_internal_check_impl fourier_bank) endforeach () + target_link_libraries(angular PkgConfig::FFTW3) target_link_libraries(spectrum_similarity PkgConfig::FFTW3) target_link_libraries(two_to_three PkgConfig::FFTW3) diff --git a/speaker_experiments/angular.cc b/speaker_experiments/angular.cc index 8f425f4..0d1acbf 100644 --- a/speaker_experiments/angular.cc +++ b/speaker_experiments/angular.cc @@ -125,14 +125,18 @@ void Process( std::greater<>()) - speaker_to_ratio_table.begin(); - // amp-kludge to make borders louder -- it is a virtual line array where the - // borders will be further away in rendering, so let's compensate for it here. + // amp-kludge to make borders louder -- it is a virtual line array where + // the borders will be further away in rendering, so let's compensate for + // it here. - float distance_from_center = (subspeaker_index - 0.5 * (output_channels - 1)); + float distance_from_center = + (subspeaker_index - 0.5 * (output_channels - 1)); float assumed_distance_to_line = 0.75 * (output_channels - 1); - float distance_to_virtual = sqrt(distance_from_center * distance_from_center + - assumed_distance_to_line * assumed_distance_to_line); - float dist_ratio = distance_to_virtual * (1.0f / assumed_distance_to_line); + float distance_to_virtual = + sqrt(distance_from_center * distance_from_center + + assumed_distance_to_line * assumed_distance_to_line); + float dist_ratio = + distance_to_virtual * (1.0f / assumed_distance_to_line); float amp = dist_ratio * dist_ratio; const float index = diff --git a/speaker_experiments/audio_closed_loop b/speaker_experiments/audio_closed_loop old mode 100755 new mode 100644 index 58bc89a..e28f77d --- a/speaker_experiments/audio_closed_loop +++ b/speaker_experiments/audio_closed_loop @@ -1,18 +1,18 @@ -#!/usr/bin/env python +#!/ usr / bin / env python -# Copyright 2024 Google LLC +#Copyright 2024 Google LLC # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +#Licensed under the Apache License, Version 2.0(the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at # -# https://www.apache.org/licenses/LICENSE-2.0 +#https: // www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. """Usage: ./audio_closed_loop input.wav recorded.wav [ [ []]] """ @@ -44,7 +44,7 @@ def detect_offset(samples: np.ndarray, sample_rate: int) -> int: kernel = (-2.) ** -np.abs(np.arange(-24, 24 + 1)) samples = np.convolve(samples, kernel, mode='same') - # Require the beep to be at least 10dB above the background noise in amplitude +#Require the beep to be at least 10dB above the background noise in amplitude min_ratio = 10**(10/20) max_background = max(1e-7, np.max(np.abs(samples[:int(0.002 * sample_rate)]))) max_above = None diff --git a/speaker_experiments/audio_diff.cc b/speaker_experiments/audio_diff.cc index 720dcf3..62a576c 100644 --- a/speaker_experiments/audio_diff.cc +++ b/speaker_experiments/audio_diff.cc @@ -30,7 +30,7 @@ namespace { int FindMedian3xLeaker(double window) { - return static_cast(-2.32/log(window)); // Approximate filter delay. + return static_cast(-2.32 / log(window)); // Approximate filter delay. /* double windowM1 = 1.0 - window; double half_way_to_total_sum = 1e99; @@ -60,7 +60,7 @@ constexpr int64_t kNumRotators = 128; struct Rotator { std::complex rot[4] = {{1, 0}, 0}; - double window = std::pow(0.9996, 128.0/kNumRotators); // at 40 Hz. + double window = std::pow(0.9996, 128.0 / kNumRotators); // at 40 Hz. double windowM1 = 1 - window; std::complex exp_mia; int advance = 0; @@ -110,14 +110,13 @@ static const int kHistoryMask = kHistorySize - 1; class TaskExecutor { public: - TaskExecutor(size_t num_threads) - : thread_outputs_(num_threads) { - } + TaskExecutor(size_t num_threads) : thread_outputs_(num_threads) {} double error_ = 0; - void Execute(size_t num_tasks, - size_t read, size_t total, const double* history, Rotator* rot_left, Rotator* rot_right, - size_t read2, size_t total2, const double* history2, Rotator* rot_left2, Rotator* rot_right2) { + void Execute(size_t num_tasks, size_t read, size_t total, + const double* history, Rotator* rot_left, Rotator* rot_right, + size_t read2, size_t total2, const double* history2, + Rotator* rot_left2, Rotator* rot_right2) { read_ = read; total_ = total; history_ = history; @@ -184,9 +183,7 @@ class TaskExecutor { }; template -void Process( - In& input_stream, In& input_stream2, - double *error) { +void Process(In& input_stream, In& input_stream2, double* error) { std::vector history(input_stream.channels() * kHistorySize); std::vector input(input_stream.channels() * kBlockSize); @@ -231,9 +228,9 @@ void Process( if (read == 0) break; if (read2 == 0) break; - pool.Execute(kNumRotators, - read, total, history.data(), rot_left.data(), rot_right.data(), - read2, total, history2.data(), rot_left2.data(), rot_right2.data()); + pool.Execute(kNumRotators, read, total, history.data(), rot_left.data(), + rot_right.data(), read2, total, history2.data(), + rot_left2.data(), rot_right2.data()); total += read; } diff --git a/speaker_experiments/emphasizer.cc b/speaker_experiments/emphasizer.cc index 0b8fbf9..4414ac3 100644 --- a/speaker_experiments/emphasizer.cc +++ b/speaker_experiments/emphasizer.cc @@ -46,7 +46,7 @@ double ActualLeftToRightRatio(const double squared_left, } double FindMedian3xLeaker(double window) { - return static_cast(-2.32/log(window)); // Approximate filter delay. + return static_cast(-2.32 / log(window)); // Approximate filter delay. } double CalcReverbRatio(double frequency) { @@ -239,8 +239,7 @@ class TaskExecutor { template void Process( - const int output_channels, - In& input_stream, Out& output_stream, + const int output_channels, In& input_stream, Out& output_stream, const std::function& start_progress = [] {}, const std::function& set_progress = [](int64_t written) {}) { std::vector history(input_stream.channels() * kHistorySize); @@ -308,6 +307,6 @@ int main(int argc, char** argv) { /*channels=*/output_channels, /*samplerate=*/input_file.samplerate()); Process( - output_channels, input_file, output_file, - [] {}, [](const int64_t written) {}); + output_channels, input_file, output_file, [] {}, + [](const int64_t written) {}); } diff --git a/speaker_experiments/fourier_bank.cc b/speaker_experiments/fourier_bank.cc new file mode 100644 index 0000000..2ea268c --- /dev/null +++ b/speaker_experiments/fourier_bank.cc @@ -0,0 +1,281 @@ +#include "fourier_bank.h" + +#include +#include +#include +#include +#include +#include +#include +#include // NOLINT +#include +#include +#include + +#include "absl/log/check.h" +#include "absl/strings/str_split.h" +#include "sndfile.hh" + +namespace tabuli { + +float GetRotatorGains(int i) { + static const float kRotatorGains[kNumRotators] = { + 1.050645, 1.948438, 3.050339, 3.967913, 4.818584, 5.303335, 5.560281, + 5.490826, 5.156689, 4.547374, 3.691308, 2.666868, 1.539254, 0.656948, + 0.345893, 0.327111, 0.985318, 1.223506, 0.447645, 0.830961, 1.075181, + 0.613335, 0.902695, 0.855391, 0.817774, 0.823359, 0.841483, 0.838562, + 0.831912, 0.808731, 0.865214, 0.808036, 0.850837, 0.821305, 0.839458, + 0.829195, 0.836373, 0.827271, 0.836018, 0.834514, 0.825624, 0.836999, + 0.833990, 0.832992, 0.830897, 0.832593, 0.846116, 0.824796, 0.829331, + 0.844509, 0.838830, 0.821733, 0.840738, 0.841735, 0.827570, 0.838581, + 0.837742, 0.834965, 0.842970, 0.832145, 0.847596, 0.840942, 0.830891, + 0.850632, 0.841468, 0.838383, 0.841493, 0.855118, 0.826750, 0.848000, + 0.874356, 0.812177, 0.849037, 0.893550, 0.832527, 0.827986, 0.877198, + 0.851760, 0.846317, 0.883044, 0.843178, 0.856925, 0.857045, 0.860695, + 0.894345, 0.870391, 0.839519, 0.870541, 0.870573, 0.902951, 0.871798, + 0.818328, 0.871413, 0.921101, 0.863915, 0.793014, 0.936519, 0.888107, + 0.856968, 0.821018, 0.987345, 0.904846, 0.783447, 0.973613, 0.903628, + 0.875688, 0.931024, 0.992087, 0.806914, 1.050332, 0.942569, 0.800870, + 1.210426, 0.916555, 0.817352, 1.126946, 0.985119, 0.922530, 0.994633, + 0.959602, 0.381419, 1.879201, 2.078451, 0.475196, 0.952731, 1.709305, + 1.383894, 1.557669, + }; + return kRotatorGains[i]; +} + +int Rotators::FindMedian3xLeaker(float window) { + // Approximate filter delay. TODO: optimize this value along with gain values. + // Recordings can sound better with -2.32 as it pushes the bass signals a bit + // earlier and likely compensates human hearing's deficiency for temporal + // separation. + const float kMagic = -2.2028003503591482; + const float kAlmostHalfForRounding = 0.4687; + return static_cast(kMagic / log(window) + kAlmostHalfForRounding); +} + +Rotators::Rotators(int num_channels, std::vector frequency, + std::vector filter_gains, const float sample_rate, + float global_gain) { + channel.resize(num_channels); + for (int i = 0; i < kNumRotators; ++i) { + // The parameter relates to the frequency shape overlap and window length + // of triple leaking integrator. + float kWindow = 0.9996; + float w40Hz = std::pow(kWindow, 128.0 / kNumRotators); // at 40 Hz. + window[i] = pow(w40Hz, std::max(1.0, frequency[i] / 40.0)); + delay[i] = FindMedian3xLeaker(window[i]); + float windowM1 = 1.0f - window[i]; + max_delay_ = std::max(max_delay_, delay[i]); + float f = frequency[i] * 2.0f * M_PI / sample_rate; + gain[i] = filter_gains[i] * global_gain * pow(windowM1, 3.0); + rot[0][i] = float(std::cos(f)); + rot[1][i] = float(-std::sin(f)); + rot[2][i] = sqrt(gain[i]); + rot[3][i] = 0.0f; + } + for (size_t i = 0; i < kNumRotators; ++i) { + advance[i] = max_delay_ - delay[i]; + } +} + +void Rotators::Increment(int c, int i, float audio) { + if (c == 0) { + float tr = rot[0][i] * rot[2][i] - rot[1][i] * rot[3][i]; + float tc = rot[0][i] * rot[3][i] + rot[1][i] * rot[2][i]; + rot[2][i] = tr; + rot[3][i] = tc; + } + channel[c].accu[0][i] *= window[i]; + channel[c].accu[1][i] *= window[i]; + channel[c].accu[2][i] *= window[i]; + channel[c].accu[3][i] *= window[i]; + channel[c].accu[4][i] *= window[i]; + channel[c].accu[5][i] *= window[i]; + channel[c].accu[0][i] += rot[2][i] * audio; + channel[c].accu[1][i] += rot[3][i] * audio; + channel[c].accu[2][i] += channel[c].accu[0][i]; + channel[c].accu[3][i] += channel[c].accu[1][i]; + channel[c].accu[4][i] += channel[c].accu[2][i]; + channel[c].accu[5][i] += channel[c].accu[3][i]; +} + +void Rotators::AddAudio(int c, int i, float audio) { + channel[c].accu[0][i] += rot[2][i] * audio; + channel[c].accu[1][i] += rot[3][i] * audio; +} +void Rotators::OccasionallyRenormalize() { + for (int i = 0; i < kNumRotators; ++i) { + float norm = + sqrt(gain[i] / (rot[2][i] * rot[2][i] + rot[3][i] * rot[3][i])); + rot[2][i] *= norm; + rot[3][i] *= norm; + } +} +void Rotators::IncrementAll() { + for (int i = 0; i < kNumRotators; i++) { + const float tr = rot[0][i] * rot[2][i] - rot[1][i] * rot[3][i]; + const float tc = rot[0][i] * rot[3][i] + rot[1][i] * rot[2][i]; + rot[2][i] = tr; + rot[3][i] = tc; + } + for (int c = 0; c < channel.size(); ++c) { + for (int i = 0; i < kNumRotators; i++) { + const float w = window[i]; + channel[c].accu[0][i] *= w; + channel[c].accu[1][i] *= w; + channel[c].accu[2][i] *= w; + channel[c].accu[3][i] *= w; + channel[c].accu[4][i] *= w; + channel[c].accu[5][i] *= w; + channel[c].accu[2][i] += channel[c].accu[0][i]; + channel[c].accu[3][i] += channel[c].accu[1][i]; + channel[c].accu[4][i] += channel[c].accu[2][i]; + channel[c].accu[5][i] += channel[c].accu[3][i]; + } + } +} +float Rotators::GetSampleAll(int c) { + float retval = 0; + for (int i = 0; i < kNumRotators; ++i) { + retval += + (rot[2][i] * channel[c].accu[4][i] + rot[3][i] * channel[c].accu[5][i]); + } + return retval; +} +float Rotators::GetSample(int c, int i, FilterMode mode) const { + return ( + mode == IDENTITY ? (rot[2][i] * channel[c].accu[4][i] + + rot[3][i] * channel[c].accu[5][i]) + : mode == AMPLITUDE + ? std::sqrt(gain[i] * (channel[c].accu[4][i] * channel[c].accu[4][i] + + channel[c].accu[5][i] * channel[c].accu[5][i])) + : std::atan2(channel[c].accu[4][i], channel[c].accu[5][i])); +} + +float BarkFreq(float v) { + constexpr float linlogsplit = 0.1; + if (v < linlogsplit) { + return 20.0 + (v / linlogsplit) * 20.0; // Linear 20-40 Hz. + } else { + float normalized_v = (v - linlogsplit) * (1.0 / (1.0 - linlogsplit)); + return 40.0 * pow(500.0, normalized_v); // Logarithmic 40-20000 Hz. + } +} + +float HardClip(float v) { return std::max(-1.0f, std::min(1.0f, v)); } + +RotatorFilterBank::RotatorFilterBank(size_t num_rotators, size_t num_channels, + size_t samplerate, size_t num_threads, + const std::vector &filter_gains, + float global_gain) { + num_rotators_ = num_rotators; + num_channels_ = num_channels; + num_threads_ = num_threads; + std::vector freqs(num_rotators); + for (size_t i = 0; i < num_rotators_; ++i) { + freqs[i] = BarkFreq(static_cast(i) / (num_rotators_ - 1)); + // printf("%d %g\n", i, freqs[i]); + } + rotators_.reset( + new Rotators(num_channels, freqs, filter_gains, samplerate, global_gain)); + + max_delay_ = rotators_->max_delay_; + QCHECK_LE(max_delay_, kBlockSize); + fprintf(stderr, "Rotator bank output delay: %zu\n", max_delay_); + filter_outputs_.resize(num_rotators); + for (std::vector &output : filter_outputs_) { + output.resize(num_channels_ * kBlockSize, 0.f); + } +} + +// TODO(jyrki): filter all at once in the generic case, filtering one +// is not memory friendly in this memory tabulation. +void RotatorFilterBank::FilterOne(size_t f_ix, const float *history, + int64_t total_in, int64_t len, + FilterMode mode, float *output) { + size_t out_ix = 0; + for (int64_t i = 0; i < len; ++i) { + int64_t delayed_ix = total_in + i - rotators_->advance[f_ix]; + size_t histo_ix = num_channels_ * (delayed_ix & kHistoryMask); + for (size_t c = 0; c < num_channels_; ++c) { + float delayed = history[histo_ix + c]; + rotators_->Increment(c, f_ix, delayed); + } + if (total_in + i >= max_delay_) { + for (size_t c = 0; c < num_channels_; ++c) { + output[out_ix * num_channels_ + c] = + rotators_->GetSample(c, f_ix, mode); + } + ++out_ix; + } + } +} + +int64_t RotatorFilterBank::FilterAllSingleThreaded(const float *history, + int64_t total_in, + int64_t len, FilterMode mode, + float *output, + size_t output_size) { + size_t out_ix = 0; + for (size_t c = 0; c < num_channels_; ++c) { + rotators_->OccasionallyRenormalize(); + } + for (int64_t i = 0; i < len; ++i) { + for (size_t c = 0; c < num_channels_; ++c) { + for (int k = 0; k < kNumRotators; ++k) { + int64_t delayed_ix = total_in + i - rotators_->advance[k]; + size_t histo_ix = num_channels_ * (delayed_ix & kHistoryMask); + float delayed = history[histo_ix + c]; + rotators_->AddAudio(c, k, delayed); + } + } + rotators_->IncrementAll(); + if (total_in + i >= max_delay_) { + for (size_t c = 0; c < num_channels_; ++c) { + output[out_ix * num_channels_ + c] = + HardClip(rotators_->GetSampleAll(c)); + } + ++out_ix; + } + } + size_t out_len = total_in < max_delay_ + ? std::max(0, len - (max_delay_ - total_in)) + : len; + return out_len; +} + +int64_t RotatorFilterBank::FilterAll(const float *history, int64_t total_in, + int64_t len, FilterMode mode, + float *output, size_t output_size) { + auto run = [&](size_t thread) { + while (true) { + size_t my_task = next_task_++; + if (my_task >= num_rotators_) return; + FilterOne(my_task, history, total_in, len, mode, + filter_outputs_[my_task].data()); + } + }; + next_task_ = 0; + std::vector> futures; + futures.reserve(num_threads_); + for (size_t i = 0; i < num_threads_; ++i) { + futures.push_back(std::async(std::launch::async, run, i)); + } + for (size_t i = 0; i < num_threads_; ++i) { + futures[i].get(); + } + size_t out_len = total_in < max_delay_ + ? std::max(0, len - (max_delay_ - total_in)) + : len; + for (size_t i = 0; i < out_len; ++i) { + for (size_t j = 0; j < num_rotators_; ++j) { + for (size_t c = 0; c < num_channels_; ++c) { + size_t out_idx = (i * num_rotators_ + j) * num_channels_ + c; + output[out_idx] = filter_outputs_[j][i * num_channels_ + c]; + } + } + } + return out_len; +} + +} // namespace tabuli \ No newline at end of file diff --git a/speaker_experiments/fourier_bank.h b/speaker_experiments/fourier_bank.h new file mode 100644 index 0000000..5fcffee --- /dev/null +++ b/speaker_experiments/fourier_bank.h @@ -0,0 +1,109 @@ +#ifndef _TABULI_FOURIER_BANK_H +#define _TABULI_FOURIER_BANK_H + +#include +#include +#include +#include +#include +#include +#include +#include // NOLINT +#include +#include +#include + +#include "absl/log/check.h" +#include "absl/strings/str_split.h" +#include "sndfile.hh" + +namespace tabuli { + +constexpr int64_t kNumRotators = 128; + +float GetRotatorGains(int i); + +enum FilterMode { + IDENTITY, + AMPLITUDE, + PHASE, +}; + +float BarkFreq(float v); + +struct PerChannel { + // [0..1] is for real and imag of 1st leaking accumulation + // [2..3] is for real and imag of 2nd leaking accumulation + // [4..5] is for real and imag of 3rd leaking accumulation + float accu[6][kNumRotators] = {0}; +}; + +struct Rotators { + // Four arrays of rotators. + // [0..1] is real and imag for rotation speed + // [2..3] is real and image for a frequency rotator of length sqrt(gain[i]) + // Values inserted into the rotators are multiplied with this rotator in both + // input and output, leading to a total gain multiplication if the length is + // at sqrt(gain). + float rot[4][kNumRotators] = {0}; + std::vector channel; + // Accu has the channel related data, everything else the same between + // channels. + float window[kNumRotators]; + float gain[kNumRotators]; + int16_t delay[kNumRotators] = {0}; + int16_t advance[kNumRotators] = {0}; + int16_t max_delay_ = 0; + + int FindMedian3xLeaker(float window); + + Rotators() = default; + Rotators(int num_channels, std::vector frequency, + std::vector filter_gains, const float sample_rate, + float global_gain); + + void Increment(int c, int i, float audio); + + void AddAudio(int c, int i, float audio); + void OccasionallyRenormalize(); + void IncrementAll(); + float GetSampleAll(int c); + float GetSample(int c, int i, FilterMode mode = IDENTITY) const; +}; + +static constexpr int64_t kBlockSize = 1 << 15; +static const int kHistorySize = (1 << 18); +static const int kHistoryMask = kHistorySize - 1; + +float HardClip(float v); + +struct RotatorFilterBank { + RotatorFilterBank(size_t num_rotators, size_t num_channels, size_t samplerate, + size_t num_threads, const std::vector &filter_gains, + float global_gain); + ~RotatorFilterBank() = default; + + // TODO(jyrki): filter all at once in the generic case, filtering one + // is not memory friendly in this memory tabulation. + void FilterOne(size_t f_ix, const float *history, int64_t total_in, + int64_t len, FilterMode mode, float *output); + + int64_t FilterAllSingleThreaded(const float *history, int64_t total_in, + int64_t len, FilterMode mode, float *output, + size_t output_size); + + int64_t FilterAll(const float *history, int64_t total_in, int64_t len, + FilterMode mode, float *output, size_t output_size); + + size_t num_rotators_; + size_t num_channels_; + size_t num_threads_; + std::unique_ptr rotators_; + int64_t max_delay_; + std::vector> filter_outputs_; + std::atomic next_task_{0}; +}; + +} // namespace tabuli + +#endif // _TABULI_FOURIER_BANK_H \ No newline at end of file diff --git a/speaker_experiments/identity_sliding_fft.cc b/speaker_experiments/identity_sliding_fft.cc index d9b75fe..2dcc352 100644 --- a/speaker_experiments/identity_sliding_fft.cc +++ b/speaker_experiments/identity_sliding_fft.cc @@ -28,6 +28,7 @@ #include "absl/flags/parse.h" #include "absl/log/check.h" #include "absl/strings/str_split.h" +#include "fourier_bank.h" #include "sndfile.hh" ABSL_FLAG(bool, plot_input, false, "If set, plots the input signal."); @@ -36,13 +37,12 @@ ABSL_FLAG(bool, plot_fft, false, "If set, plots fft of signal."); ABSL_FLAG(bool, ppm, false, "If set, outputs ppm plot."); ABSL_FLAG(int, plot_from, -1, "If non-negative, start plot from here."); ABSL_FLAG(int, plot_to, -1, "If non-negative, end plot here."); -ABSL_FLAG(int, select_rot, -1, "If set, use only one rotator."); ABSL_FLAG(double, gain, 1.0, "Global volume scaling."); ABSL_FLAG(std::string, filter_mode, "identity", "Filter mode."); -namespace { +namespace tabuli { -template +template std::vector> FFT(const std::vector& x) { size_t N = x.size(); QCHECK((N & (N - 1)) == 0) << "FFT length must be power of two"; @@ -62,8 +62,8 @@ std::vector> FFT(const std::vector& x) { } return r; }; - auto butterfly = [](const std::complex& m, - std::complex& a, std::complex& b) { + auto butterfly = [](const std::complex& m, std::complex& a, + std::complex& b) { std::complex A = a; std::complex B = m * b; a = A + B; @@ -79,7 +79,7 @@ std::vector> FFT(const std::vector& x) { std::complex mul = {std::cos(freq), -std::sin(freq)}; for (size_t k = 0; k < N; k += m) { std::complex omega = {1, 0}; - for (size_t j = 0; j < m/ 2; ++j) { + for (size_t j = 0; j < m / 2; ++j) { butterfly(omega, X[k + j], X[k + j + m / 2]); omega *= mul; } @@ -96,12 +96,6 @@ bool CheckPosition(int64_t pos) { return true; } -enum FilterMode { - IDENTITY, - AMPLITUDE, - PHASE, -}; - FilterMode GetFilterMode() { std::string desc = absl::GetFlag(FLAGS_filter_mode); if (desc == "identity") { @@ -114,329 +108,13 @@ FilterMode GetFilterMode() { QCHECK(0); } -constexpr int64_t kNumRotators = 128; - -float GetRotatorGains(int i) { - static const float kRotatorGains[kNumRotators] = { - 1.050645, 1.948438, 3.050339, 3.967913, - 4.818584, 5.303335, 5.560281, 5.490826, - 5.156689, 4.547374, 3.691308, 2.666868, - 1.539254, 0.656948, 0.345893, 0.327111, - 0.985318, 1.223506, 0.447645, 0.830961, - 1.075181, 0.613335, 0.902695, 0.855391, - 0.817774, 0.823359, 0.841483, 0.838562, - 0.831912, 0.808731, 0.865214, 0.808036, - 0.850837, 0.821305, 0.839458, 0.829195, - 0.836373, 0.827271, 0.836018, 0.834514, - 0.825624, 0.836999, 0.833990, 0.832992, - 0.830897, 0.832593, 0.846116, 0.824796, - 0.829331, 0.844509, 0.838830, 0.821733, - 0.840738, 0.841735, 0.827570, 0.838581, - 0.837742, 0.834965, 0.842970, 0.832145, - 0.847596, 0.840942, 0.830891, 0.850632, - 0.841468, 0.838383, 0.841493, 0.855118, - 0.826750, 0.848000, 0.874356, 0.812177, - 0.849037, 0.893550, 0.832527, 0.827986, - 0.877198, 0.851760, 0.846317, 0.883044, - 0.843178, 0.856925, 0.857045, 0.860695, - 0.894345, 0.870391, 0.839519, 0.870541, - 0.870573, 0.902951, 0.871798, 0.818328, - 0.871413, 0.921101, 0.863915, 0.793014, - 0.936519, 0.888107, 0.856968, 0.821018, - 0.987345, 0.904846, 0.783447, 0.973613, - 0.903628, 0.875688, 0.931024, 0.992087, - 0.806914, 1.050332, 0.942569, 0.800870, - 1.210426, 0.916555, 0.817352, 1.126946, - 0.985119, 0.922530, 0.994633, 0.959602, - 0.381419, 1.879201, 2.078451, 0.475196, - 0.952731, 1.709305, 1.383894, 1.557669, - }; - return kRotatorGains[i]; -} - -struct PerChannel { - // [0..1] is for real and imag of 1st leaking accumulation - // [2..3] is for real and imag of 2nd leaking accumulation - // [4..5] is for real and imag of 3rd leaking accumulation - float accu[6][kNumRotators] = { 0 }; -}; - -struct Rotators { - // Four arrays of rotators. - // [0..1] is real and imag for rotation speed - // [2..3] is real and image for a frequency rotator of length sqrt(gain[i]) - // Values inserted into the rotators are multiplied with this rotator in both input - // and output, leading to a total gain multiplication if the length is at sqrt(gain). - float rot[4][kNumRotators] = { 0 }; - std::vector channel; - // Accu has the channel related data, everything else the same between channels. - float window[kNumRotators]; - float gain[kNumRotators]; - int16_t delay[kNumRotators] = { 0 }; - int16_t advance[kNumRotators] = { 0 }; - int16_t max_delay_ = 0; - - int FindMedian3xLeaker(float window) { - // Approximate filter delay. TODO: optimize this value along with gain values. - // Recordings can sound better with -2.32 as it pushes the bass signals a bit earlier - // and likely compensates human hearing's deficiency for temporal separation. - const float kMagic = -2.2028003503591482; - const float kAlmostHalfForRounding = 0.4687; - return static_cast(kMagic/log(window) + kAlmostHalfForRounding); - } - - Rotators() { } - Rotators(int num_channels, std::vector frequency, - std::vector filter_gains, const float sample_rate) { - channel.resize(num_channels); - for (int i = 0; i < kNumRotators; ++i) { - // The parameter relates to the frequency shape overlap and window length - // of triple leaking integrator. - float kWindow = 0.9996; - float w40Hz = std::pow(kWindow, 128.0 / kNumRotators); // at 40 Hz. - window[i] = pow(w40Hz, std::max(1.0, frequency[i] / 40.0)); - delay[i] = FindMedian3xLeaker(window[i]); - float windowM1 = 1.0f - window[i]; - max_delay_ = std::max(max_delay_, delay[i]); - float f = frequency[i] * 2.0f * M_PI / sample_rate; - gain[i] = filter_gains[i] * absl::GetFlag(FLAGS_gain) * pow(windowM1, 3.0); - rot[0][i] = float(std::cos(f)); - rot[1][i] = float(-std::sin(f)); - rot[2][i] = sqrt(gain[i]); - rot[3][i] = 0.0f; - } - for (size_t i = 0; i < kNumRotators; ++i) { - advance[i] = max_delay_ - delay[i]; - } - } - - void Increment(int c, int i, float audio) { - if (c == 0) { - float tr = rot[0][i] * rot[2][i] - rot[1][i] * rot[3][i]; - float tc = rot[0][i] * rot[3][i] + rot[1][i] * rot[2][i]; - rot[2][i] = tr; - rot[3][i] = tc; - } - channel[c].accu[0][i] *= window[i]; - channel[c].accu[1][i] *= window[i]; - channel[c].accu[2][i] *= window[i]; - channel[c].accu[3][i] *= window[i]; - channel[c].accu[4][i] *= window[i]; - channel[c].accu[5][i] *= window[i]; - channel[c].accu[0][i] += rot[2][i] * audio; - channel[c].accu[1][i] += rot[3][i] * audio; - channel[c].accu[2][i] += channel[c].accu[0][i]; - channel[c].accu[3][i] += channel[c].accu[1][i]; - channel[c].accu[4][i] += channel[c].accu[2][i]; - channel[c].accu[5][i] += channel[c].accu[3][i]; - } - - void AddAudio(int c, int i, float audio) { - channel[c].accu[0][i] += rot[2][i] * audio; - channel[c].accu[1][i] += rot[3][i] * audio; - } - void OccasionallyRenormalize() { - for (int i = 0; i < kNumRotators; ++i) { - float norm = sqrt(gain[i] / (rot[2][i] * rot[2][i] + rot[3][i] * rot[3][i])); - rot[2][i] *= norm; - rot[3][i] *= norm; - } - } - void IncrementAll() { - for (int i = 0; i < kNumRotators; i++) { - const float tr = rot[0][i] * rot[2][i] - rot[1][i] * rot[3][i]; - const float tc = rot[0][i] * rot[3][i] + rot[1][i] * rot[2][i]; - rot[2][i] = tr; - rot[3][i] = tc; - } - for (int c = 0; c < channel.size(); ++c) { - for (int i = 0; i < kNumRotators; i++) { - const float w = window[i]; - channel[c].accu[0][i] *= w; - channel[c].accu[1][i] *= w; - channel[c].accu[2][i] *= w; - channel[c].accu[3][i] *= w; - channel[c].accu[4][i] *= w; - channel[c].accu[5][i] *= w; - channel[c].accu[2][i] += channel[c].accu[0][i]; - channel[c].accu[3][i] += channel[c].accu[1][i]; - channel[c].accu[4][i] += channel[c].accu[2][i]; - channel[c].accu[5][i] += channel[c].accu[3][i]; - } - } - } - float GetSampleAll(int c) { - float retval = 0; - for (int i = 0; i < kNumRotators; ++i) { - retval += (rot[2][i] * channel[c].accu[4][i] + rot[3][i] * channel[c].accu[5][i]); - } - return retval; - } - float GetSample(int c, int i, FilterMode mode = IDENTITY) const { - return (mode == IDENTITY ? - (rot[2][i] * channel[c].accu[4][i] + rot[3][i] * channel[c].accu[5][i]) : - mode == AMPLITUDE ? std::sqrt(gain[i] * (channel[c].accu[4][i] * channel[c].accu[4][i] + - channel[c].accu[5][i] * channel[c].accu[5][i])) : - std::atan2(channel[c].accu[4][i], channel[c].accu[5][i])); - } -}; - -float BarkFreq(float v) { - constexpr float linlogsplit = 0.1; - if (v < linlogsplit) { - return 20.0 + (v / linlogsplit) * 20.0; // Linear 20-40 Hz. - } else { - float normalized_v = (v - linlogsplit) * (1.0 / (1.0 - linlogsplit)); - return 40.0 * pow(500.0, normalized_v); // Logarithmic 40-20000 Hz. - } -} - -static constexpr int64_t kBlockSize = 1 << 15; -static const int kHistorySize = (1 << 18); -static const int kHistoryMask = kHistorySize - 1; - -float HardClip(float v) { - return std::max(-1.0f, std::min(1.0f, v)); -} - -struct RotatorFilterBank { - RotatorFilterBank(size_t num_rotators, size_t num_channels, - size_t samplerate, size_t num_threads, - const std::vector& filter_gains) { - num_rotators_ = num_rotators; - num_channels_ = num_channels; - num_threads_ = num_threads; - std::vector freqs(num_rotators); - for (size_t i = 0; i < num_rotators_; ++i) { - freqs[i] = BarkFreq(static_cast(i) / (num_rotators_ - 1)); - // printf("%d %g\n", i, freqs[i]); - } - rotators_ = new Rotators(num_channels, freqs, filter_gains, samplerate); - - max_delay_ = rotators_[0].max_delay_; - QCHECK_LE(max_delay_, kBlockSize); - fprintf(stderr, "Rotator bank output delay: %zu\n", max_delay_); - filter_outputs_.resize(num_rotators); - for (std::vector& output : filter_outputs_) { - output.resize(num_channels_ * kBlockSize, 0.f); - } - } - ~RotatorFilterBank() { - delete rotators_; - } - - // TODO(jyrki): filter all at once in the generic case, filtering one - // is not memory friendly in this memory tabulation. - void FilterOne(size_t f_ix, const float* history, int64_t total_in, - int64_t len, FilterMode mode, float* output) { - size_t out_ix = 0; - for (int64_t i = 0; i < len; ++i) { - int64_t delayed_ix = total_in + i - rotators_->advance[f_ix]; - size_t histo_ix = num_channels_ * (delayed_ix & kHistoryMask); - for (size_t c = 0; c < num_channels_; ++c) { - float delayed = history[histo_ix + c]; - rotators_->Increment(c, f_ix, delayed); - } - if (total_in + i >= max_delay_) { - for (size_t c = 0; c < num_channels_; ++c) { - output[out_ix * num_channels_ + c] = rotators_->GetSample(c, f_ix, mode); - } - ++out_ix; - } - } - } - - int64_t FilterAllSingleThreaded(const float* history, int64_t total_in, int64_t len, - FilterMode mode, float* output, size_t output_size) { - size_t out_ix = 0; - for (size_t c = 0; c < num_channels_; ++c) { - rotators_->OccasionallyRenormalize(); - } - for (int64_t i = 0; i < len; ++i) { - for (size_t c = 0; c < num_channels_; ++c) { - for (int k = 0; k < kNumRotators; ++k) { - int64_t delayed_ix = total_in + i - rotators_->advance[k]; - size_t histo_ix = num_channels_ * (delayed_ix & kHistoryMask); - float delayed = history[histo_ix + c]; - rotators_->AddAudio(c, k, delayed); - } - } - rotators_->IncrementAll(); - if (total_in + i >= max_delay_) { - for (size_t c = 0; c < num_channels_; ++c) { - output[out_ix * num_channels_ + c] = HardClip(rotators_->GetSampleAll(c)); - } - ++out_ix; - } - } - size_t out_len = total_in < max_delay_ ? - std::max(0, len - (max_delay_ - total_in)) : len; - return out_len; - } - - int64_t FilterAll(const float* history, int64_t total_in, int64_t len, - FilterMode mode, float* output, size_t output_size) { - int select_rot = absl::GetFlag(FLAGS_select_rot); - auto run = [&](size_t thread) { - while (true) { - size_t my_task = next_task_++; - if (my_task >= num_rotators_) return; - if (select_rot >= 0 && my_task != select_rot) { - continue; - } - FilterOne(my_task, history, total_in, len, mode, - filter_outputs_[my_task].data()); - } - }; - next_task_ = 0; - std::vector> futures; - futures.reserve(num_threads_); - for (size_t i = 0; i < num_threads_; ++i) { - futures.push_back(std::async(std::launch::async, run, i)); - } - for (size_t i = 0; i < num_threads_; ++i) { - futures[i].get(); - } - size_t out_len = total_in < max_delay_ ? - std::max(0, len - (max_delay_ - total_in)) : len; - if (mode == IDENTITY || select_rot >= 0) { - std::fill(output, output + output_size, 0); - for (std::vector& filter_output : filter_outputs_) { - for (int i = 0; i < filter_output.size(); ++i) { - output[i] += filter_output[i]; - filter_output[i] = 0.f; - } - } - } else { - for (size_t i = 0; i < out_len; ++i) { - for (size_t j = 0; j < num_rotators_; ++j) { - for (size_t c = 0; c < num_channels_; ++c) { - size_t out_idx = (i * num_rotators_ + j) * num_channels_ + c; - output[out_idx] = - filter_outputs_[j][i * num_channels_ + c]; - } - } - } - } - return out_len; - } - - size_t num_rotators_; - size_t num_channels_; - size_t num_threads_; - Rotators *rotators_; - int64_t max_delay_; - std::vector> filter_outputs_; - std::atomic next_task_{0}; -}; - float SquareError(const float* input_history, const float* output, - size_t num_channels, size_t total, size_t output_len) { + size_t num_channels, size_t total, size_t output_len) { float res = 0.0; for (size_t i = 0; i < output_len; ++i) { int input_ix = i + total; size_t histo_ix = num_channels * (input_ix & kHistoryMask); - for (size_t c = 0; c < num_channels; ++ c) { + for (size_t c = 0; c < num_channels; ++c) { float in = input_history[histo_ix + c]; float out = output[num_channels * i + c]; float diff = in - out; @@ -541,9 +219,10 @@ class OutputSignal { public: OutputSignal(size_t channels, size_t freq_channels, size_t samplerate, bool save_output) - : channels_(channels), freq_channels_(freq_channels), - samplerate_(samplerate), save_output_(save_output) { - } + : channels_(channels), + freq_channels_(freq_channels), + samplerate_(samplerate), + save_output_(save_output) {} void writef(const float* data, size_t nframes) { if (output_file_) { @@ -618,7 +297,7 @@ void Process( RotatorFilterBank rotbank(kNumRotators, num_channels, input_stream.samplerate(), /*num_threads=*/1, - filter_gains); + filter_gains, absl::GetFlag(FLAGS_gain)); start_progress(); int64_t total_in = 0; @@ -641,8 +320,8 @@ void Process( } int64_t output_len = 0; if (mode == IDENTITY) { - output_len = rotbank.FilterAllSingleThreaded(history.data(), total_in, read, mode, - output.data(), output.size()); + output_len = rotbank.FilterAllSingleThreaded( + history.data(), total_in, read, mode, output.data(), output.size()); } else { output_len = rotbank.FilterAll(history.data(), total_in, read, mode, output.data(), output.size()); @@ -674,7 +353,7 @@ void RecomputeFilterGains(std::vector& filter_gains) { size_t f0 = scaled_f; size_t f1 = f0 + 1; float gain = std::abs(fft[f0]) * (scaled_f - f0) + - std::abs(fft[f1]) * (f1 - scaled_f); + std::abs(fft[f1]) * (f1 - scaled_f); optsum += fabs(gain - 1.0); filter_gains[i] /= pow(gain, 0.8 - 0.7 * iter / 10000); filter_gains[i] = pow(filter_gains[i], 0.9999); @@ -705,14 +384,14 @@ void ValueToRgb(float val, float good_threshold, float bad_threshold, if (val < good_threshold) { val = (val / good_threshold) * 0.3; } else if (val < bad_threshold) { - val = 0.3 + - (val - good_threshold) / (bad_threshold - good_threshold) * 0.15; + val = + 0.3 + (val - good_threshold) / (bad_threshold - good_threshold) * 0.15; } else { val = 0.45 + (val - bad_threshold) / (bad_threshold * 12) * 0.5; } static const int kTableSize = sizeof(heatmap) / sizeof(heatmap[0]); val = std::min(std::max(val * (kTableSize - 1), 0.0), - kTableSize - 2); + kTableSize - 2); int ix = static_cast(val); ix = std::min(std::max(0, ix), kTableSize - 2); // Handle NaN float mix = val - ix; @@ -769,7 +448,7 @@ void CreatePlot(InputSignal& input, OutputSignal& output, FilterMode mode) { fclose(f); return; } - std::vector > to_plot; + std::vector> to_plot; if (absl::GetFlag(FLAGS_plot_input)) { to_plot.push_back({"/tmp/input_signal.txt", "input"}); } @@ -782,7 +461,7 @@ void CreatePlot(InputSignal& input, OutputSignal& output, FilterMode mode) { output.DumpSignal(f); } fclose(f); - to_plot.push_back({fn,"output"}); + to_plot.push_back({fn, "output"}); } if (to_plot.empty()) { return; @@ -792,24 +471,24 @@ void CreatePlot(InputSignal& input, OutputSignal& output, FilterMode mode) { fprintf(f, "set output \"plot.png\"\n"); fprintf(f, "plot "); for (size_t i = 0; i < to_plot.size(); ++i) { - fprintf(f, "\"%s\" with lines title \"%s\"%s", - to_plot[i].first.c_str(), to_plot[i].second.c_str(), + fprintf(f, "\"%s\" with lines title \"%s\"%s", to_plot[i].first.c_str(), + to_plot[i].second.c_str(), i + 1 < to_plot.size() ? ", \\\n " : "\n"); } fclose(f); system("gnuplot /tmp/plot.txt"); } -} // namespace +} // namespace tabuli + +using namespace tabuli; int main(int argc, char** argv) { std::vector posargs = absl::ParseCommandLine(argc, argv); QCHECK_GE(posargs.size(), 2) << "Usage: " << argv[0] << " []"; FilterMode mode = GetFilterMode(); InputSignal input(posargs[1]); - size_t freq_channels = - mode == IDENTITY || absl::GetFlag(FLAGS_select_rot) >= 0 ? - 1 : kNumRotators; + size_t freq_channels = mode == IDENTITY ? 1 : kNumRotators; OutputSignal output(input.channels(), freq_channels, input.samplerate(), absl::GetFlag(FLAGS_plot_output)); if (posargs.size() > 2) { diff --git a/speaker_experiments/revolve.cc b/speaker_experiments/revolve.cc index b7d6d06..329a447 100644 --- a/speaker_experiments/revolve.cc +++ b/speaker_experiments/revolve.cc @@ -31,7 +31,6 @@ ABSL_FLAG(double, distance_to_interval_ratio, 8, "ratio of (distance between microphone and source array) / (distance " "between each source); default = 40cm / 10cm = 4"); - namespace { constexpr int kSubSourcePrecision = 1000; @@ -50,46 +49,31 @@ float ActualLeftToRightRatio(float left, float right) { } // Fast approximate function for temporal coherence of 3x exp leakers. -float FindMedian3xLeaker(float window) { - return -2.32/log(window); -} +float FindMedian3xLeaker(float window) { return -2.32 / log(window); } constexpr int64_t kNumRotators = 128; float GetFilterGains(int i) { static const float kFilterGains[kNumRotators] = { - 1.050645, 1.948438, 3.050339, 3.967913, - 4.818584, 5.303335, 5.560281, 5.490826, - 5.156689, 4.547374, 3.691308, 2.666868, - 1.539254, 0.656948, 0.345893, 0.327111, - 0.985318, 1.223506, 0.447645, 0.830961, - 1.075181, 0.613335, 0.902695, 0.855391, - 0.817774, 0.823359, 0.841483, 0.838562, - 0.831912, 0.808731, 0.865214, 0.808036, - 0.850837, 0.821305, 0.839458, 0.829195, - 0.836373, 0.827271, 0.836018, 0.834514, - 0.825624, 0.836999, 0.833990, 0.832992, - 0.830897, 0.832593, 0.846116, 0.824796, - 0.829331, 0.844509, 0.838830, 0.821733, - 0.840738, 0.841735, 0.827570, 0.838581, - 0.837742, 0.834965, 0.842970, 0.832145, - 0.847596, 0.840942, 0.830891, 0.850632, - 0.841468, 0.838383, 0.841493, 0.855118, - 0.826750, 0.848000, 0.874356, 0.812177, - 0.849037, 0.893550, 0.832527, 0.827986, - 0.877198, 0.851760, 0.846317, 0.883044, - 0.843178, 0.856925, 0.857045, 0.860695, - 0.894345, 0.870391, 0.839519, 0.870541, - 0.870573, 0.902951, 0.871798, 0.818328, - 0.871413, 0.921101, 0.863915, 0.793014, - 0.936519, 0.888107, 0.856968, 0.821018, - 0.987345, 0.904846, 0.783447, 0.973613, - 0.903628, 0.875688, 0.931024, 0.992087, - 0.806914, 1.050332, 0.942569, 0.800870, - 1.210426, 0.916555, 0.817352, 1.126946, - 0.985119, 0.922530, 0.994633, 0.959602, - 0.381419, 1.879201, 2.078451, 0.475196, - 0.952731, 1.709305, 1.383894, 1.557669, + 1.050645, 1.948438, 3.050339, 3.967913, 4.818584, 5.303335, 5.560281, + 5.490826, 5.156689, 4.547374, 3.691308, 2.666868, 1.539254, 0.656948, + 0.345893, 0.327111, 0.985318, 1.223506, 0.447645, 0.830961, 1.075181, + 0.613335, 0.902695, 0.855391, 0.817774, 0.823359, 0.841483, 0.838562, + 0.831912, 0.808731, 0.865214, 0.808036, 0.850837, 0.821305, 0.839458, + 0.829195, 0.836373, 0.827271, 0.836018, 0.834514, 0.825624, 0.836999, + 0.833990, 0.832992, 0.830897, 0.832593, 0.846116, 0.824796, 0.829331, + 0.844509, 0.838830, 0.821733, 0.840738, 0.841735, 0.827570, 0.838581, + 0.837742, 0.834965, 0.842970, 0.832145, 0.847596, 0.840942, 0.830891, + 0.850632, 0.841468, 0.838383, 0.841493, 0.855118, 0.826750, 0.848000, + 0.874356, 0.812177, 0.849037, 0.893550, 0.832527, 0.827986, 0.877198, + 0.851760, 0.846317, 0.883044, 0.843178, 0.856925, 0.857045, 0.860695, + 0.894345, 0.870391, 0.839519, 0.870541, 0.870573, 0.902951, 0.871798, + 0.818328, 0.871413, 0.921101, 0.863915, 0.793014, 0.936519, 0.888107, + 0.856968, 0.821018, 0.987345, 0.904846, 0.783447, 0.973613, 0.903628, + 0.875688, 0.931024, 0.992087, 0.806914, 1.050332, 0.942569, 0.800870, + 1.210426, 0.916555, 0.817352, 1.126946, 0.985119, 0.922530, 0.994633, + 0.959602, 0.381419, 1.879201, 2.078451, 0.475196, 0.952731, 1.709305, + 1.383894, 1.557669, }; return kFilterGains[i]; } @@ -98,7 +82,7 @@ struct PerChannel { // [0..1] is for real and imag of 1st leaking accumulation // [2..3] is for real and imag of 2nd leaking accumulation // [4..5] is for real and imag of 3rd leaking accumulation - float accu[6][kNumRotators] = { 0 }; + float accu[6][kNumRotators] = {0}; float LenSqr(size_t i) { return accu[4][i] * accu[4][i] + accu[5][i] * accu[5][i]; } @@ -108,27 +92,30 @@ struct Rotators { // Four arrays of rotators. // [0..1] is real and imag for rotation speed // [2..3] is real and image for a frequency rotator of length sqrt(gain[i]) - // Values inserted into the rotators are multiplied with this rotator in both input - // and output, leading to a total gain multiplication if the length is at sqrt(gain). - float rot[4][kNumRotators] = { 0 }; + // Values inserted into the rotators are multiplied with this rotator in both + // input and output, leading to a total gain multiplication if the length is + // at sqrt(gain). + float rot[4][kNumRotators] = {0}; std::vector channel; - // Accu has the channel related data, everything else the same between channels. + // Accu has the channel related data, everything else the same between + // channels. float window[kNumRotators]; float gain[kNumRotators]; - int16_t delay[kNumRotators] = { 0 }; - int16_t advance[kNumRotators] = { 0 }; + int16_t delay[kNumRotators] = {0}; + int16_t advance[kNumRotators] = {0}; int16_t max_delay_ = 0; int FindMedian3xLeaker(float window) { - // Approximate filter delay. TODO: optimize this value along with gain values. - // Recordings can sound better with -2.32 as it pushes the bass signals a bit earlier - // and likely compensates human hearing's deficiency for temporal separation. + // Approximate filter delay. TODO: optimize this value along with gain + // values. Recordings can sound better with -2.32 as it pushes the bass + // signals a bit earlier and likely compensates human hearing's deficiency + // for temporal separation. const float kMagic = -2.2028003503591482; const float kAlmostHalfForRounding = 0.4687; - return static_cast(kMagic/log(window) + kAlmostHalfForRounding); + return static_cast(kMagic / log(window) + kAlmostHalfForRounding); } - Rotators() { } + Rotators() {} Rotators(int num_channels, std::vector frequency, std::vector filter_gains, const float sample_rate) { channel.resize(num_channels); @@ -159,7 +146,8 @@ struct Rotators { } void OccasionallyRenormalize() { for (int i = 0; i < kNumRotators; ++i) { - float norm = sqrt(gain[i] / (rot[2][i] * rot[2][i] + rot[3][i] * rot[3][i])); + float norm = + sqrt(gain[i] / (rot[2][i] * rot[2][i] + rot[3][i] * rot[3][i])); rot[2][i] *= norm; rot[3][i] *= norm; } @@ -187,15 +175,9 @@ struct Rotators { } } } - void GetTriplet(float left_to_right_ratio, - int rot_ix, - float rightr, - float righti, - float leftr, - float lefti, - float &right, - float ¢er, - float &left) { + void GetTriplet(float left_to_right_ratio, int rot_ix, float rightr, + float righti, float leftr, float lefti, float &right, + float ¢er, float &left) { float aver = rightr + leftr; float avei = righti + lefti; @@ -226,9 +208,8 @@ static const int kHistorySize = (1 << 18); static const int kHistoryMask = kHistorySize - 1; struct RotatorFilterBank { - RotatorFilterBank(size_t num_rotators, size_t num_channels, - size_t samplerate, - const std::vector& filter_gains) { + RotatorFilterBank(size_t num_rotators, size_t num_channels, size_t samplerate, + const std::vector &filter_gains) { std::vector freqs(num_rotators); for (size_t i = 0; i < num_rotators; ++i) { freqs[i] = BarkFreq(static_cast(i) / (num_rotators - 1)); @@ -239,9 +220,7 @@ struct RotatorFilterBank { QCHECK_LE(max_delay_, kBlockSize); fprintf(stderr, "Rotator bank output delay: %zu\n", max_delay_); } - ~RotatorFilterBank() { - delete rotators_; - } + ~RotatorFilterBank() { delete rotators_; } Rotators *rotators_; int64_t max_delay_; }; @@ -253,12 +232,11 @@ float AngleEffect(float dy, float distance) { return cos_angle; } -float HardClip(float v) { - return std::max(-1.0f, std::min(1.0f, v)); -} +float HardClip(float v) { return std::max(-1.0f, std::min(1.0f, v)); } struct MultiChannelDriverModel { - std::vector ave; // For high pass filtering of input voltage (~20 Hz or so) + std::vector + ave; // For high pass filtering of input voltage (~20 Hz or so) std::vector pos; // Position of the driver membrane. std::vector dpos; // Velocity of the driver membrane. void Initialize(size_t n) { @@ -301,7 +279,7 @@ struct MultiChannelDriverModel { // Control delays for binaural experience. struct BinauralModel { size_t index = 0; - float channel[2][4096] = { 0 }; + float channel[2][4096] = {0}; void GetAndAdvance(float *left_arg, float *right_arg) { *left_arg = HardClip(channel[0][index & 0xfff]); *right_arg = HardClip(channel[1][index & 0xfff]); @@ -313,9 +291,7 @@ struct BinauralModel { channel[1][index & 0xfff] = 0.0; ++index; } - void Emit(float *p) { - GetAndAdvance(p, p + 1); - } + void Emit(float *p) { GetAndAdvance(p, p + 1); } void WriteWithDelay(size_t c, size_t delay, float v) { channel[c][(index + delay) & 0xfff] += v; } @@ -329,23 +305,9 @@ struct BinauralModel { float *GetBinauralTable() { static const float binau[16] = { - 1.4, - 1.3, - 1.2, - 1.1, - 1.0, - 0.9, - 0.8, - 0.7, - - 0.6, - 0.5, - 0.4, - 0.35, - 0.3, - 0.25, - 0.2, - 0.15, + 1.4, 1.3, 1.2, 1.1, 1.0, 0.9, 0.8, 0.7, + + 0.6, 0.5, 0.4, 0.35, 0.3, 0.25, 0.2, 0.15, }; static float table[kNumRotators * 16]; for (int i = 0; i < kNumRotators; ++i) { @@ -356,12 +318,10 @@ float *GetBinauralTable() { return table; } - - template -void Process( - const int output_channels, const double distance_to_interval_ratio, - In& input_stream, Out& output_stream, Out& binaural_output_stream) { +void Process(const int output_channels, const double distance_to_interval_ratio, + In &input_stream, Out &output_stream, + Out &binaural_output_stream) { std::vector history(input_stream.channels() * kHistorySize); std::vector input(input_stream.channels() * kBlockSize); std::vector output(output_channels * kBlockSize); @@ -389,7 +349,7 @@ void Process( 1); for (int i = 0; i < kSubSourcePrecision * (output_channels - 1) + 1; ++i) { const float x_div_interval = static_cast(i) / kSubSourcePrecision - - 0.5f * (output_channels - 1); + 0.5f * (output_channels - 1); const float x_div_distance = x_div_interval / distance_to_interval_ratio; const float angle = std::atan(x_div_distance); speaker_to_ratio_table.push_back(ExpectedLeftToRightRatio(angle)); @@ -440,44 +400,44 @@ void Process( (std::lower_bound(speaker_to_ratio_table.begin(), speaker_to_ratio_table.end(), ratio, std::greater<>()) - - speaker_to_ratio_table.begin()) * (1.0 / kSubSourcePrecision); + speaker_to_ratio_table.begin()) * + (1.0 / kSubSourcePrecision); if (subspeaker_index < 1.0) { subspeaker_index = 1.0; } if (subspeaker_index >= 14.0) { subspeaker_index = 14.0; } - float stage_size = 1.3; // meters - float distance_from_center = stage_size * (subspeaker_index - 0.5 * (output_channels - 1)) / (output_channels - 1); + float stage_size = 1.3; // meters + float distance_from_center = + stage_size * (subspeaker_index - 0.5 * (output_channels - 1)) / + (output_channels - 1); float assumed_distance_to_line = stage_size * 1.6; float right, center, left; - rfb.rotators_->GetTriplet(subspeaker_index / (output_channels - 1), - rot, + rfb.rotators_->GetTriplet(subspeaker_index / (output_channels - 1), rot, rfb.rotators_->channel[1].accu[4][rot], rfb.rotators_->channel[1].accu[5][rot], rfb.rotators_->channel[0].accu[4][rot], - rfb.rotators_->channel[0].accu[5][rot], - right, center, left); + rfb.rotators_->channel[0].accu[5][rot], right, + center, left); if (total_in + i >= rfb.max_delay_) { - #define BINAURAL #ifdef BINAURAL // left and right. { - //left *= 2.0; - //right *= 2.0; - // Some hacks here: 27 samples is roughly 19 cm which I use - // as an approximate for the added delay needed for this - // kind of left-right 'residual sound'. perhaps it is too - // much. perhaps having more than one delay would make sense. + // left *= 2.0; + // right *= 2.0; + // Some hacks here: 27 samples is roughly 19 cm which I use + // as an approximate for the added delay needed for this + // kind of left-right 'residual sound'. perhaps it is too + // much. perhaps having more than one delay would make sense. // - // This computation being left-right and with delay accross - // causes a relaxed feeling of space to emerge. + // This computation being left-right and with delay accross + // causes a relaxed feeling of space to emerge. float lbin = left * 2; float rbin = right * 2; size_t delay = 0; for (int i = 0; i < 5; ++i) { - binaural.WriteWithDelay(0, delay, lbin); binaural.WriteWithDelay(1, delay, rbin); @@ -529,19 +489,24 @@ void Process( } #endif - float speaker_offset_left = (2 - 7.5) * 0.1; float speaker_offset_right = (13 - 7.5) * 0.1; for (int kk = 0; kk < output_channels; ++kk) { float speaker_offset = (kk - 7.5) * 0.1; - float val = AngleEffect(speaker_offset + distance_from_center, assumed_distance_to_line) * center; + float val = AngleEffect(speaker_offset + distance_from_center, + assumed_distance_to_line) * + center; output[out_ix * output_channels + kk] += val; output[out_ix * output_channels + kk] += - AngleEffect(speaker_offset - speaker_offset_right, assumed_distance_to_line) * right; + AngleEffect(speaker_offset - speaker_offset_right, + assumed_distance_to_line) * + right; output[out_ix * output_channels + kk] += - AngleEffect(speaker_offset - speaker_offset_left, assumed_distance_to_line) * left; + AngleEffect(speaker_offset - speaker_offset_left, + assumed_distance_to_line) * + left; } } } @@ -563,14 +528,16 @@ void Process( } // namespace -int main(int argc, char** argv) { +int main(int argc, char **argv) { absl::ParseCommandLine(argc, argv); const int output_channels = absl::GetFlag(FLAGS_output_channels); const float distance_to_interval_ratio = absl::GetFlag(FLAGS_distance_to_interval_ratio); - QCHECK_EQ(argc, 4) << "Usage: " << argv[0] << " "; + QCHECK_EQ(argc, 4) + << "Usage: " << argv[0] + << " "; SndfileHandle input_file(argv[1]); QCHECK(input_file) << input_file.strError(); @@ -585,5 +552,6 @@ int main(int argc, char** argv) { argv[3], /*mode=*/SFM_WRITE, /*format=*/SF_FORMAT_WAV | SF_FORMAT_PCM_24, /*channels=*/2, /*samplerate=*/input_file.samplerate()); - Process(output_channels, distance_to_interval_ratio, input_file, output_file, binaural_output_file); + Process(output_channels, distance_to_interval_ratio, input_file, output_file, + binaural_output_file); }