Skip to content

Commit

Permalink
fftw::convolve support same mode
Browse files Browse the repository at this point in the history
  • Loading branch information
kwsp committed Jan 5, 2025
1 parent 26fa393 commit 271a9a1
Show file tree
Hide file tree
Showing 7 changed files with 148 additions and 124 deletions.
50 changes: 31 additions & 19 deletions include/fftconv/fftconv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ template <typename T> struct FFTConvBuffer {

// EngineFFTConv manages the memory of the forward and backward fft plans
// and the fftw buffers
template <typename T>
template <typename T, int PlannerFlag = FFTW_ESTIMATE>
struct FFTConvEngine : public fftw::cache_mixin<FFTConvEngine<T>> {
using Plan = fftw::Plan<T>;
using Cx = fftw::Complex<T>;
Expand All @@ -331,44 +331,54 @@ struct FFTConvEngine : public fftw::cache_mixin<FFTConvEngine<T>> {
// n is padded_length
explicit FFTConvEngine(size_t n)
: buf(n), forward(Plan::dft_r2c_1d(n, buf.real_ptr(), buf.cx1_ptr(),
fftw::FLAGS)),
PlannerFlag)),
backward(
Plan::dft_c2r_1d(n, buf.cx1_ptr(), buf.real_ptr(), fftw::FLAGS)) {}
Plan::dft_c2r_1d(n, buf.cx1_ptr(), buf.real_ptr(), PlannerFlag)) {}

// Get the fftconv_plans object for a specific kernel size
static auto get_for_ksize(size_t ksize) -> FFTConvEngine<T> & {
const auto fft_size = internal::get_optimal_fft_size(ksize);
return FFTConvEngine<T>::get(fft_size);
}

template <ConvMode Mode = ConvMode::Full>
void convolve(View a, View k, MutableView out) {
std::fill(out.begin(), out.end(), static_cast<T>(0));

// Copy input to buffer
internal::copy_to_padded_buffer<T>(a, buf.real);

// A = fft(a)
internal::copy_to_padded_buffer<T>(a, buf.real);
forward.execute_dft_r2c(buf.real_ptr(), buf.cx1_ptr());

// Copy b to buffer
internal::copy_to_padded_buffer<T>(k, buf.real);

// B = fft(b)
internal::copy_to_padded_buffer<T>(k, buf.real);
forward.execute_dft_r2c(buf.real_ptr(), buf.cx2_ptr());

// Complex elementwise multiple, A = A * B
internal::multiply_cx<T>(buf.cx1, buf.cx2, buf.cx1);

// a = ifft(A)
const T fct = 1. / buf.real.size();
if (isSIMDAligned<64>(out.data())) {
// a = ifft(A)
backward.execute_dft_c2r(buf.cx1_ptr(), out.data());
fftw::normalize<T>(out, fct);
} else {
// a = ifft(A)
if constexpr (Mode == ConvMode::Full) {

if (isSIMDAligned<64>(out.data())) {
backward.execute_dft_c2r(buf.cx1_ptr(), out.data());
fftw::normalize<T>(out, fct);
} else {
backward.execute_dft_c2r(buf.cx1_ptr(), buf.real_ptr());
fftw::normalize<T>(buf.real, fct);
std::copy(buf.real.begin(), buf.real.end(), out.begin());
}

} else if constexpr (Mode == ConvMode::Same) {
const size_t padding = k.size() / 2;

backward.execute_dft_c2r(buf.cx1_ptr(), buf.real_ptr());
fftw::normalize<T>(buf.real, fct);
std::copy(buf.real.begin(), buf.real.end(), out.begin());

const auto *const real_ptr = buf.real_ptr() + padding;
std::copy(real_ptr, real_ptr + out.size(), out.data());
}
}

Expand Down Expand Up @@ -442,13 +452,14 @@ struct FFTConvEngine : public fftw::cache_mixin<FFTConvEngine<T>> {
// * Cache fftw_plan
// * Reuse buffers (no malloc on second call to the same convolution size)
// https://en.wikipedia.org/w/index.php?title=Convolution#Fast_convolution_algorithms
template <Floating T, ConvMode Mode = ConvMode::Same>
template <Floating T, ConvMode Mode = ConvMode::Same,
int PlannerFlag = FFTW_ESTIMATE>
void convolve_fftw(const std::span<const T> input,
const std::span<const T> kernel, std::span<T> output) {
// length of the real arrays, including the final convolution output
const size_t padded_length = input.size() + kernel.size() - 1;
auto &plan = FFTConvEngine<T>::get(padded_length);
plan.convolve(input, kernel, output);
auto &plan = FFTConvEngine<T, PlannerFlag>::get(padded_length);
plan.template convolve<Mode>(input, kernel, output);
}

/**
Expand All @@ -462,10 +473,11 @@ For "Same" mode, output_size == input_size
2. convolve with kernel using fft of length N.
3. add blocks together
*/
template <Floating T, ConvMode Mode = ConvMode::Same>
template <Floating T, ConvMode Mode = ConvMode::Same,
int PlannerFlag = FFTW_ESTIMATE>
void oaconvolve_fftw(std::span<const T> input, std::span<const T> kernel,
std::span<T> output) {
auto &plan = FFTConvEngine<T>::get_for_ksize(kernel.size());
auto &plan = FFTConvEngine<T, PlannerFlag>::get_for_ksize(kernel.size());
plan.template oaconvolve<Mode>(input, kernel, output);
}

Expand Down
23 changes: 11 additions & 12 deletions include/fftconv/fftw.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@ A C++ FFTW wrapper

namespace fftw {

// const static unsigned int FLAGS = FFTW_ESTIMATE;
const static unsigned int FLAGS = FFTW_EXHAUSTIVE;

// Place this at the beginning of main() and RAII will take care of setting up
// and tearing down FFTW3 (threads and wisdom)
// NOLINTNEXTLINE(*-special-member-functions)
Expand Down Expand Up @@ -561,27 +558,29 @@ template <typename T> struct R2CSplitBuffer {
}
};

template <Floating T, bool InPlace = false>
template <Floating T, int PlannerFlag = FFTW_ESTIMATE, bool InPlace = false>
struct EngineDFT1D : public cache_mixin<EngineDFT1D<T>> {
using Cx = fftw::Complex<T>;
using Plan = fftw::Plan<T>;

C2CBuffer<T> buf;
C2CBuffer<T, InPlace> buf;
Plan plan_forward;
Plan plan_backward;

explicit EngineDFT1D(size_t n)
: buf(n),
plan_forward(Plan::dft_1d(n, buf.in, buf.out, FFTW_FORWARD, FLAGS)),
plan_backward(Plan::dft_1d(n, buf.out, buf.in, FFTW_BACKWARD, FLAGS)){};
: buf(n), plan_forward(Plan::dft_1d(n, buf.in, buf.out, FFTW_FORWARD,
PlannerFlag)),
plan_backward(
Plan::dft_1d(n, buf.out, buf.in, FFTW_BACKWARD, PlannerFlag)){};

void forward() { plan_forward.execute(); }
void forward(const Cx *in, Cx *out) const { plan_forward.execute(in, out); }
void backward() { plan_backward.execute(); }
void backward(const Cx *in, Cx *out) const { plan_backward.execute(in, out); }
};

template <Floating T> struct EngineR2C1D : public cache_mixin<EngineR2C1D<T>> {
template <Floating T, int PlannerFlag = FFTW_ESTIMATE>
struct EngineR2C1D : public cache_mixin<EngineR2C1D<T>> {
using Cx = fftw::Complex<T>;
using Plan = fftw::Plan<T>;

Expand All @@ -591,9 +590,9 @@ template <Floating T> struct EngineR2C1D : public cache_mixin<EngineR2C1D<T>> {

explicit EngineR2C1D(size_t n)
: buf(n), plan_forward(Plan::dft_r2c_1d(static_cast<int>(n), buf.in,
buf.out, FLAGS)),
plan_backward(
Plan::dft_c2r_1d(static_cast<int>(n), buf.out, buf.in, FLAGS)) {}
buf.out, PlannerFlag)),
plan_backward(Plan::dft_c2r_1d(static_cast<int>(n), buf.out, buf.in,
PlannerFlag)) {}

void forward() { plan_forward.execute(); }
void forward(const T *in, Cx *out) const {
Expand Down
23 changes: 19 additions & 4 deletions py/test/test_fftconv.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@
from pyfftconv import convolve, convolve_, hilbert, hilbert_, oaconvolve, oaconvolve_


class TestFFTConv(unittest.TestCase):
class TestConvolve(unittest.TestCase):
def setUp(self):
# Example test data
self.a = np.array([1, 2, 3, 4, 5], dtype=np.float64)
self.k = np.array([0.2, 0.5, 0.2], dtype=np.float64)
self.out_full = np.empty(self.a.size + self.k.size - 1)
Expand All @@ -24,6 +23,16 @@ def test_convolve_out_full(self):
expected = np.convolve(self.a, self.k, mode="full")
assert_allclose(self.out_full, expected, rtol=1e-5, atol=1e-8)

def test_convolve_same(self):
result = convolve(self.a, self.k, mode="same")
expected = np.convolve(self.a, self.k, mode="same")
assert_allclose(result, expected, rtol=1e-5, atol=1e-8)

def test_convolve_out_same(self):
convolve_(self.a, self.k, self.out_same, mode="same")
expected = np.convolve(self.a, self.k, mode="same")
assert_allclose(self.out_same, expected, rtol=1e-5, atol=1e-8)

def test_oaconvolve_full(self):
result = oaconvolve(self.a, self.k, mode="full")
expected = np.convolve(self.a, self.k, mode="full")
Expand All @@ -44,15 +53,21 @@ def test_oaconvolve_out_same(self):
expected = np.convolve(self.a, self.k, mode="same")
assert_allclose(self.out_same, expected, rtol=1e-5, atol=1e-8)


class TestHilbert(unittest.TestCase):
def setUp(self):
self.a = np.array([1, 2, 3, 4, 5], dtype=np.float64)
self.out = np.empty(self.a.size)

def test_hilbert(self):
result = hilbert(self.a)
expected = np.abs(signal.hilbert(self.a))
assert_allclose(result, expected, rtol=1e-5, atol=1e-8)

def test_hilbert_out(self):
hilbert_(self.a, self.out_same)
hilbert_(self.a, self.out)
expected = np.abs(signal.hilbert(self.a))
assert_allclose(self.out_same, expected, rtol=1e-5, atol=1e-8)
assert_allclose(self.out, expected, rtol=1e-5, atol=1e-8)


if __name__ == "__main__":
Expand Down
15 changes: 2 additions & 13 deletions test/test_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,12 @@ constexpr double DoubleTol{1e-9};
constexpr float FloatTol{1e-5};

template <fftconv::Floating T>
void ExpectVectorsNear(const std::span<const T> expected,
const std::span<const T> actual, const T tolerance) {
void ExpectVectorsNear(const std::span<const T> actual,
const std::span<const T> expected, const T tolerance) {
ASSERT_EQ(expected.size(), actual.size()) << "Vectors are of unequal length";

for (size_t i = 0; i < expected.size() && i < actual.size(); ++i) {
ASSERT_NEAR(expected[i], actual[i], tolerance)
<< "Vectors differ at index " << i;
}
}

template <typename T>
inline void ExpectArraysNear(const T *arr1, const T *arr2, size_t size,
T tolerance) {
for (size_t i = 0; i < size; ++i) {
if (std::abs(arr1[i] - arr2[i]) > tolerance) {
GTEST_FAIL() << "Arrays differ at index " << i << ": expected " << arr1[i]
<< " but got " << arr2[i] << ", tolerance = " << tolerance;
}
}
}
Loading

0 comments on commit 271a9a1

Please sign in to comment.