From 271a9a156a35420bebba5c1a4cd1c6a257eb0de1 Mon Sep 17 00:00:00 2001 From: tnie Date: Sat, 4 Jan 2025 20:00:28 -0600 Subject: [PATCH] fftw::convolve support same mode --- include/fftconv/fftconv.hpp | 50 ++++++++----- include/fftconv/fftw.hpp | 23 +++--- py/test/test_fftconv.py | 23 ++++-- test/test_common.hpp | 15 +--- test/test_fftconv.cpp | 140 ++++++++++++++++++++---------------- test/test_fftw.cpp | 17 ++--- test/test_hilbert.cpp | 4 +- 7 files changed, 148 insertions(+), 124 deletions(-) diff --git a/include/fftconv/fftconv.hpp b/include/fftconv/fftconv.hpp index da71402..8eb6488 100644 --- a/include/fftconv/fftconv.hpp +++ b/include/fftconv/fftconv.hpp @@ -317,7 +317,7 @@ template struct FFTConvBuffer { // EngineFFTConv manages the memory of the forward and backward fft plans // and the fftw buffers -template +template struct FFTConvEngine : public fftw::cache_mixin> { using Plan = fftw::Plan; using Cx = fftw::Complex; @@ -331,9 +331,9 @@ struct FFTConvEngine : public fftw::cache_mixin> { // n is padded_length explicit FFTConvEngine(size_t n) : buf(n), forward(Plan::dft_r2c_1d(n, buf.real_ptr(), buf.cx1_ptr(), - fftw::FLAGS)), + PlannerFlag)), backward( - Plan::dft_c2r_1d(n, buf.cx1_ptr(), buf.real_ptr(), fftw::FLAGS)) {} + Plan::dft_c2r_1d(n, buf.cx1_ptr(), buf.real_ptr(), PlannerFlag)) {} // Get the fftconv_plans object for a specific kernel size static auto get_for_ksize(size_t ksize) -> FFTConvEngine & { @@ -341,34 +341,44 @@ struct FFTConvEngine : public fftw::cache_mixin> { return FFTConvEngine::get(fft_size); } + template void convolve(View a, View k, MutableView out) { std::fill(out.begin(), out.end(), static_cast(0)); // Copy input to buffer - internal::copy_to_padded_buffer(a, buf.real); - // A = fft(a) + internal::copy_to_padded_buffer(a, buf.real); forward.execute_dft_r2c(buf.real_ptr(), buf.cx1_ptr()); // Copy b to buffer - internal::copy_to_padded_buffer(k, buf.real); - // B = fft(b) + internal::copy_to_padded_buffer(k, buf.real); forward.execute_dft_r2c(buf.real_ptr(), buf.cx2_ptr()); // Complex elementwise multiple, A = A * B internal::multiply_cx(buf.cx1, buf.cx2, buf.cx1); + // a = ifft(A) const T fct = 1. / buf.real.size(); - if (isSIMDAligned<64>(out.data())) { - // a = ifft(A) - backward.execute_dft_c2r(buf.cx1_ptr(), out.data()); - fftw::normalize(out, fct); - } else { - // a = ifft(A) + if constexpr (Mode == ConvMode::Full) { + + if (isSIMDAligned<64>(out.data())) { + backward.execute_dft_c2r(buf.cx1_ptr(), out.data()); + fftw::normalize(out, fct); + } else { + backward.execute_dft_c2r(buf.cx1_ptr(), buf.real_ptr()); + fftw::normalize(buf.real, fct); + std::copy(buf.real.begin(), buf.real.end(), out.begin()); + } + + } else if constexpr (Mode == ConvMode::Same) { + const size_t padding = k.size() / 2; + backward.execute_dft_c2r(buf.cx1_ptr(), buf.real_ptr()); fftw::normalize(buf.real, fct); - std::copy(buf.real.begin(), buf.real.end(), out.begin()); + + const auto *const real_ptr = buf.real_ptr() + padding; + std::copy(real_ptr, real_ptr + out.size(), out.data()); } } @@ -442,13 +452,14 @@ struct FFTConvEngine : public fftw::cache_mixin> { // * Cache fftw_plan // * Reuse buffers (no malloc on second call to the same convolution size) // https://en.wikipedia.org/w/index.php?title=Convolution#Fast_convolution_algorithms -template +template void convolve_fftw(const std::span input, const std::span kernel, std::span output) { // length of the real arrays, including the final convolution output const size_t padded_length = input.size() + kernel.size() - 1; - auto &plan = FFTConvEngine::get(padded_length); - plan.convolve(input, kernel, output); + auto &plan = FFTConvEngine::get(padded_length); + plan.template convolve(input, kernel, output); } /** @@ -462,10 +473,11 @@ For "Same" mode, output_size == input_size 2. convolve with kernel using fft of length N. 3. add blocks together */ -template +template void oaconvolve_fftw(std::span input, std::span kernel, std::span output) { - auto &plan = FFTConvEngine::get_for_ksize(kernel.size()); + auto &plan = FFTConvEngine::get_for_ksize(kernel.size()); plan.template oaconvolve(input, kernel, output); } diff --git a/include/fftconv/fftw.hpp b/include/fftconv/fftw.hpp index 0da9102..bc62361 100644 --- a/include/fftconv/fftw.hpp +++ b/include/fftconv/fftw.hpp @@ -23,9 +23,6 @@ A C++ FFTW wrapper namespace fftw { -// const static unsigned int FLAGS = FFTW_ESTIMATE; -const static unsigned int FLAGS = FFTW_EXHAUSTIVE; - // Place this at the beginning of main() and RAII will take care of setting up // and tearing down FFTW3 (threads and wisdom) // NOLINTNEXTLINE(*-special-member-functions) @@ -561,19 +558,20 @@ template struct R2CSplitBuffer { } }; -template +template struct EngineDFT1D : public cache_mixin> { using Cx = fftw::Complex; using Plan = fftw::Plan; - C2CBuffer buf; + C2CBuffer buf; Plan plan_forward; Plan plan_backward; explicit EngineDFT1D(size_t n) - : buf(n), - plan_forward(Plan::dft_1d(n, buf.in, buf.out, FFTW_FORWARD, FLAGS)), - plan_backward(Plan::dft_1d(n, buf.out, buf.in, FFTW_BACKWARD, FLAGS)){}; + : buf(n), plan_forward(Plan::dft_1d(n, buf.in, buf.out, FFTW_FORWARD, + PlannerFlag)), + plan_backward( + Plan::dft_1d(n, buf.out, buf.in, FFTW_BACKWARD, PlannerFlag)){}; void forward() { plan_forward.execute(); } void forward(const Cx *in, Cx *out) const { plan_forward.execute(in, out); } @@ -581,7 +579,8 @@ struct EngineDFT1D : public cache_mixin> { void backward(const Cx *in, Cx *out) const { plan_backward.execute(in, out); } }; -template struct EngineR2C1D : public cache_mixin> { +template +struct EngineR2C1D : public cache_mixin> { using Cx = fftw::Complex; using Plan = fftw::Plan; @@ -591,9 +590,9 @@ template struct EngineR2C1D : public cache_mixin> { explicit EngineR2C1D(size_t n) : buf(n), plan_forward(Plan::dft_r2c_1d(static_cast(n), buf.in, - buf.out, FLAGS)), - plan_backward( - Plan::dft_c2r_1d(static_cast(n), buf.out, buf.in, FLAGS)) {} + buf.out, PlannerFlag)), + plan_backward(Plan::dft_c2r_1d(static_cast(n), buf.out, buf.in, + PlannerFlag)) {} void forward() { plan_forward.execute(); } void forward(const T *in, Cx *out) const { diff --git a/py/test/test_fftconv.py b/py/test/test_fftconv.py index 831c80e..a188a66 100644 --- a/py/test/test_fftconv.py +++ b/py/test/test_fftconv.py @@ -6,9 +6,8 @@ from pyfftconv import convolve, convolve_, hilbert, hilbert_, oaconvolve, oaconvolve_ -class TestFFTConv(unittest.TestCase): +class TestConvolve(unittest.TestCase): def setUp(self): - # Example test data self.a = np.array([1, 2, 3, 4, 5], dtype=np.float64) self.k = np.array([0.2, 0.5, 0.2], dtype=np.float64) self.out_full = np.empty(self.a.size + self.k.size - 1) @@ -24,6 +23,16 @@ def test_convolve_out_full(self): expected = np.convolve(self.a, self.k, mode="full") assert_allclose(self.out_full, expected, rtol=1e-5, atol=1e-8) + def test_convolve_same(self): + result = convolve(self.a, self.k, mode="same") + expected = np.convolve(self.a, self.k, mode="same") + assert_allclose(result, expected, rtol=1e-5, atol=1e-8) + + def test_convolve_out_same(self): + convolve_(self.a, self.k, self.out_same, mode="same") + expected = np.convolve(self.a, self.k, mode="same") + assert_allclose(self.out_same, expected, rtol=1e-5, atol=1e-8) + def test_oaconvolve_full(self): result = oaconvolve(self.a, self.k, mode="full") expected = np.convolve(self.a, self.k, mode="full") @@ -44,15 +53,21 @@ def test_oaconvolve_out_same(self): expected = np.convolve(self.a, self.k, mode="same") assert_allclose(self.out_same, expected, rtol=1e-5, atol=1e-8) + +class TestHilbert(unittest.TestCase): + def setUp(self): + self.a = np.array([1, 2, 3, 4, 5], dtype=np.float64) + self.out = np.empty(self.a.size) + def test_hilbert(self): result = hilbert(self.a) expected = np.abs(signal.hilbert(self.a)) assert_allclose(result, expected, rtol=1e-5, atol=1e-8) def test_hilbert_out(self): - hilbert_(self.a, self.out_same) + hilbert_(self.a, self.out) expected = np.abs(signal.hilbert(self.a)) - assert_allclose(self.out_same, expected, rtol=1e-5, atol=1e-8) + assert_allclose(self.out, expected, rtol=1e-5, atol=1e-8) if __name__ == "__main__": diff --git a/test/test_common.hpp b/test/test_common.hpp index a5fc63b..ca6d0e0 100644 --- a/test/test_common.hpp +++ b/test/test_common.hpp @@ -10,8 +10,8 @@ constexpr double DoubleTol{1e-9}; constexpr float FloatTol{1e-5}; template -void ExpectVectorsNear(const std::span expected, - const std::span actual, const T tolerance) { +void ExpectVectorsNear(const std::span actual, + const std::span expected, const T tolerance) { ASSERT_EQ(expected.size(), actual.size()) << "Vectors are of unequal length"; for (size_t i = 0; i < expected.size() && i < actual.size(); ++i) { @@ -19,14 +19,3 @@ void ExpectVectorsNear(const std::span expected, << "Vectors differ at index " << i; } } - -template -inline void ExpectArraysNear(const T *arr1, const T *arr2, size_t size, - T tolerance) { - for (size_t i = 0; i < size; ++i) { - if (std::abs(arr1[i] - arr2[i]) > tolerance) { - GTEST_FAIL() << "Arrays differ at index " << i << ": expected " << arr1[i] - << " but got " << arr2[i] << ", tolerance = " << tolerance; - } - } -} \ No newline at end of file diff --git a/test/test_fftconv.cpp b/test/test_fftconv.cpp index 7d0d64f..595afa1 100644 --- a/test/test_fftconv.cpp +++ b/test/test_fftconv.cpp @@ -98,115 +98,131 @@ template <> struct Tol { auto operator()() { return DoubleTol; } }; -TEST(FFTConvEngine, ExecutesOAConvFull) { +TEST(FFTConvEngine, OAConvFull) { using T = double; arma::Col kernel(95, arma::fill::randn); auto &plans = fftconv::FFTConvEngine::get_for_ksize(kernel.size()); - - // for (const auto arr_size : {1000, 2000, 3000}) { - for (const auto arr_size : {1000}) { + for (const auto arr_size : {1000, 2000, 3000}) { arma::Col arr(arr_size, arma::fill::randn); arma::Col expected = arma::conv(arr, kernel); arma::Col res(arr.size() + kernel.size() - 1, arma::fill::zeros); - plans.oaconvolve( - std::span(arr), std::span(kernel), std::span(res)); - - expected.save(fmt::format("full_expected_{}.bin", arr_size), - arma::raw_binary); - arr.save(fmt::format("full_arr_{}.bin", arr_size), arma::raw_binary); - kernel.save(fmt::format("full_kernel_{}.bin", arr_size), arma::raw_binary); - res.save(fmt::format("full_res_{}.bin", arr_size), arma::raw_binary); + plans.oaconvolve(arr, kernel, res); ExpectVectorsNear(std::span(res), std::span(expected), Tol()()); } } -TEST(FFTConvEngine, ExecutesOAConvSame) { +TEST(FFTConvEngine, OAConvSame) { using T = double; arma::Col kernel(95, arma::fill::randn); auto &plans = fftconv::FFTConvEngine::get_for_ksize(kernel.size()); - for (const auto arr_size : {1000, 2000, 3000}) { arma::Col arr(arr_size, arma::fill::randn); arma::Col expected = arma::conv(arr, kernel, "same"); arma::Col res(arr.size(), arma::fill::zeros); - plans.oaconvolve( - std::span(arr), std::span(kernel), std::span(res)); + plans.oaconvolve(arr, kernel, res); - expected.save(fmt::format("same_expected_{}.bin", arr_size), - arma::raw_binary); - arr.save(fmt::format("same_arr_{}.bin", arr_size), arma::raw_binary); - kernel.save(fmt::format("same_kernel_{}.bin", arr_size), arma::raw_binary); - res.save(fmt::format("same_res_{}.bin", arr_size), arma::raw_binary); + ExpectVectorsNear(res, expected, Tol()()); + } +} - for (size_t i = 0; i < expected.size(); ++i) { - ASSERT_NEAR(expected[i], res[i], DoubleTol) - << "Vectors differ at index " << i; +// Test convolution +template +void test_conv(Func conv_func) { + for (int real_size = 4; real_size < 100; real_size += 9) { + arma::Col a(real_size, arma::fill::randn); + arma::Col k(real_size, arma::fill::randn); + + size_t outSize{}; + arma::Col expected; + if constexpr (Mode == ConvMode::Full) { + outSize = a.size() + k.size() - 1; + expected = arma::conv(a, k, "full"); + } else if constexpr (Mode == ConvMode::Same) { + outSize = a.size(); + expected = arma::conv(a, k, "same"); } + + arma::Col out(outSize, arma::fill::zeros); + conv_func(a, k, out); + + ExpectVectorsNear(out, expected, Tol()()); } } // Test convolution -template -void execute_conv_full_correctly(Func conv_func) { - for (int real_size = 4; real_size < 100; real_size += 9) { - arma::Col arr1(real_size, arma::fill::randn); - arma::Col arr2(real_size, arma::fill::randn); - arma::Col expected = arma::conv(arr1, arr2); - arma::Col res(arr1.size() + arr2.size() - 1, arma::fill::zeros); +template +void test_oaconv(Func conv_func) { + const int ksize = 95; + const arma::Col kernel(ksize, arma::fill::randn); + + for (int real_size = 200; real_size < 501; real_size += 150) { + const arma::Col a(real_size, arma::fill::randn); + + size_t outSize{}; + arma::Col expected; + if constexpr (Mode == ConvMode::Full) { + outSize = a.size() + kernel.size() - 1; + expected = arma::conv(a, kernel, "full"); + } else if constexpr (Mode == ConvMode::Same) { + outSize = a.size(); + expected = arma::conv(a, kernel, "same"); + } - conv_func(std::span(arr1), std::span(arr2), - std::span(res)); + arma::Col res(outSize, arma::fill::zeros); + conv_func(a, kernel, res); ExpectVectorsNear(std::span(res), std::span(expected), Tol()()); } } -TEST(ConvolveFFTW, ExecuteCorrectly) { - execute_conv_full_correctly(fftconv::convolve_fftw); - - execute_conv_full_correctly(fftconv::convolve_fftw); +TEST(Convolve, Full) { + constexpr auto mode = ConvMode::Full; + test_conv(fftconv::convolve_fftw); + test_conv(fftconv::convolve_fftw); } -TEST(Convolution, ExecuteOAConvCorrectlyDifferentSizes) { - execute_conv_full_correctly( - fftconv::oaconvolve_fftw); - - execute_conv_full_correctly( - fftconv::oaconvolve_fftw); +TEST(Convolve, Same) { + constexpr auto mode = ConvMode::Same; + test_conv(fftconv::convolve_fftw); + test_conv(fftconv::convolve_fftw); } -// Test convolution -template -void execute_oaconv_same_correctly(Func conv_func) { - const int ksize = 65; - const arma::Col kernel(ksize, arma::fill::randn); - for (int real_size = 200; real_size < 501; real_size += 150) { - const arma::Col arr(real_size, arma::fill::randn); - const arma::Col expected = arma::conv(arr, kernel, "same"); +TEST(Convolve, PlannerFlag) { + constexpr auto mode = ConvMode::Full; + using T = double; + test_conv(fftconv::convolve_fftw); + test_conv(fftconv::convolve_fftw); + test_conv(fftconv::convolve_fftw); +} - arma::Col res(arr.size(), arma::fill::zeros); +TEST(OAConvolve, Full) { + constexpr auto mode = ConvMode::Full; + test_conv(fftconv::oaconvolve_fftw); + test_conv(fftconv::oaconvolve_fftw); +} - conv_func(std::span(arr), std::span(kernel), - std::span(res)); +TEST(OAConvolve, Same) { + constexpr auto mode = ConvMode::Same; - ExpectVectorsNear(std::span(res), std::span(expected), - Tol()()); - } + test_oaconv(fftconv::oaconvolve_fftw); + test_oaconv(fftconv::oaconvolve_fftw); } -TEST(OAConvolveSame, ExecuteCorrectly) { - execute_oaconv_same_correctly( - fftconv::oaconvolve_fftw); +TEST(OAConvolve, PlannerFlag) { + using T = double; + constexpr auto mode = ConvMode::Same; - execute_oaconv_same_correctly( - fftconv::oaconvolve_fftw); + test_oaconv(fftconv::oaconvolve_fftw); + test_oaconv(fftconv::oaconvolve_fftw); + test_oaconv(fftconv::oaconvolve_fftw); + test_oaconv(fftconv::oaconvolve_fftw); } // NOLINTEND(*-magic-numbers,*-array-index) diff --git a/test/test_fftw.cpp b/test/test_fftw.cpp index b88f375..99469c7 100644 --- a/test/test_fftw.cpp +++ b/test/test_fftw.cpp @@ -6,13 +6,6 @@ // NOLINTBEGIN(*-magic-numbers, *-pointer-arithmetic, *-non-private-member-*, // *-member-function, *-destructor) -template -void ExpectArraysNear(std::span arr1, std::span arr2, - T tolerance) { - assert(arr1.size() == arr2.size()); - return ExpectArraysNear(arr1.data(), arr2.data(), arr1.size(), tolerance); -} - class FFTWPlanCreateC2C : public testing::Test { protected: using T = double; @@ -301,8 +294,8 @@ TEST_F(FFTWPlanCreateC2CSplit, GuruPlanSplitCorrect) { ASSERT_NE(pf.plan, nullptr); pf.execute(); - ExpectArraysNear(ro, expect_ro, 1e-7); - ExpectArraysNear(io, expect_io, 1e-7); + ExpectVectorsNear(ro, expect_ro, 1e-7); + ExpectVectorsNear(io, expect_io, 1e-7); } TEST_F(FFTWPlanCreateR2C, GuruPlan) { @@ -383,8 +376,8 @@ TEST_F(FFTWPlanCreateR2CSplit, GuruPlanSplitCorrect) { ASSERT_NE(pf.plan, nullptr); pf.execute(); - ExpectArraysNear(expect_ro, ro, 1e-7); - ExpectArraysNear(expect_io, io, 1e-7); + ExpectVectorsNear(ro, expect_ro, 1e-7); + ExpectVectorsNear(io, expect_io, 1e-7); auto pb = fftw::Plan::guru_split_dft_c2r(rank, &dim, howmany, &howmany_dim, ro.data(), io.data(), in_.data(), @@ -396,7 +389,7 @@ TEST_F(FFTWPlanCreateR2CSplit, GuruPlanSplitCorrect) { for (double &v : in_) { v *= fct; } - ExpectArraysNear(in, in_, 1e-7); + ExpectVectorsNear(in, in_, 1e-7); } // NOLINTEND(*-magic-numbers, *-pointer-arithmetic, *-non-private-member-*, diff --git a/test/test_hilbert.cpp b/test/test_hilbert.cpp index 0f0fa31..8356a9a 100644 --- a/test/test_hilbert.cpp +++ b/test/test_hilbert.cpp @@ -3,7 +3,7 @@ #include #include -TEST(TestHilbertFFTW, Correct) { +TEST(HilbertFFTW, Correct) { const auto fn = [&]() { const std::array inp = { -0.999984, -0.736924, 0.511211, -0.0826997, 0.0655345, @@ -16,7 +16,7 @@ TEST(TestHilbertFFTW, Correct) { fftconv::hilbert(inp, out); - ExpectArraysNear(expect.data(), out.data(), expect.size(), 1e-6); + ExpectVectorsNear(out, expect, 1e-6); }; fn.template operator()();