Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for f32, as well as f64 #17

Merged
merged 9 commits into from
Apr 1, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/kernels.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::simd::{f32x8, f64x8};
use std::simd::{f32x16, f64x8};

use num_traits::Float;

Expand All @@ -12,7 +12,7 @@ macro_rules! fft_butterfly_n_simd {
dist: usize,
) {
let chunk_size = dist << 1;
assert!(chunk_size >= 16);
assert!(chunk_size >= $lanes * 2);
reals
.chunks_exact_mut(chunk_size)
.zip(imags.chunks_exact_mut(chunk_size))
Expand Down Expand Up @@ -49,7 +49,7 @@ macro_rules! fft_butterfly_n_simd {
}

fft_butterfly_n_simd!(fft_64_chunk_n_simd, f64, 8, f64x8);
fft_butterfly_n_simd!(fft_32_chunk_n_simd, f32, 8, f32x8);
fft_butterfly_n_simd!(fft_32_chunk_n_simd, f32, 16, f32x16);

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Shnatsel Bumped up # of lanes to 16 when using AVX-512 and f32

pub(crate) fn fft_chunk_n<T: Float>(
reals: &mut [T],
Expand Down
12 changes: 7 additions & 5 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ impl_fft_for!(fft_64, f64, Planner64, fft_64_with_opts_and_plan);
impl_fft_for!(fft_32, f32, Planner32, fft_32_with_opts_and_plan);

macro_rules! impl_fft_with_opts_and_plan_for {
($func_name:ident, $precision:ty, $planner:ty, $simd_butterfly_kernel:ident) => {
($func_name:ident, $precision:ty, $planner:ty, $simd_butterfly_kernel:ident, $lanes:literal) => {
/// Same as [fft], but also accepts [`Options`] that control optimization strategies, as well as
/// a [`Planner`] in the case that this FFT will need to be run multiple times.
///
Expand Down Expand Up @@ -102,7 +102,7 @@ macro_rules! impl_fft_with_opts_and_plan_for {
if t < n - 1 {
filter_twiddles(twiddles_re, twiddles_im);
}
if chunk_size >= 16 {
if chunk_size >= $lanes * 2 {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Shnatsel Now we just use # of lanes * 2 when executing this check

$simd_butterfly_kernel(reals, imags, twiddles_re, twiddles_im, dist);
} else {
fft_chunk_n(reals, imags, twiddles_re, twiddles_im, dist);
Expand Down Expand Up @@ -131,23 +131,25 @@ impl_fft_with_opts_and_plan_for!(
fft_64_with_opts_and_plan,
f64,
Planner64,
fft_64_chunk_n_simd
fft_64_chunk_n_simd,
8
);

impl_fft_with_opts_and_plan_for!(
fft_32_with_opts_and_plan,
f32,
Planner32,
fft_32_chunk_n_simd
fft_32_chunk_n_simd,
16
);

#[cfg(test)]
mod tests {
use std::ops::Range;

use utilities::assert_float_closeness;
use utilities::rustfft::num_complex::Complex;
use utilities::rustfft::FftPlanner;
use utilities::rustfft::num_complex::Complex;

use super::*;

Expand Down
Loading