Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add functions to support interleaved complex num #27

Merged
merged 27 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
21d77db
Add a function to separate AoS to SoA
smu160 Apr 30, 2024
58374f1
Fix formatting
smu160 Apr 30, 2024
59de096
Add reverse separate fn and add test
smu160 May 1, 2024
a716cd7
Add fft_*_interleaved impls and tests
smu160 May 1, 2024
ecca75b
Fix formatting
smu160 May 1, 2024
23c1d21
Simplify separate_re_im
Shnatsel May 3, 2024
13fe64c
Update docs for interleaved fft
smu160 May 8, 2024
a0d2822
Transition `num-complex` dependency to optional
smu160 May 8, 2024
05713ff
Fix formatting
smu160 May 8, 2024
2923d53
Merge branch 'main' into feature/interleaved-complex-nums
smu160 May 8, 2024
1377ead
Fix formatting
smu160 May 8, 2024
9699f9e
Vectorize deinterleaving of AoS --> SoA
smu160 May 22, 2024
4d43190
Put macro definition begind feature flag
smu160 May 22, 2024
356e468
Fix formatting
smu160 May 22, 2024
fbb1bad
Remove index tracker from hot path
smu160 May 22, 2024
3957834
Add benchmark using criterion
smu160 May 22, 2024
b429a32
Fix formatting
smu160 May 22, 2024
b8affb2
Account for different signal sizes
smu160 May 22, 2024
c7856d0
Merge branch 'main' into feature/interleaved-complex-nums
smu160 May 22, 2024
f639900
Merge branch 'main' into feature/interleaved-complex-nums
smu160 Jun 27, 2024
5c09b89
Merge branch 'main' into feature/interleaved-complex-nums
smu160 Jul 14, 2024
98503e9
Add new python benchmarking "framework"
smu160 Jul 14, 2024
a201e2c
Update todo
smu160 Jul 14, 2024
403684b
Make examples output time elapsed in nanoseconds
smu160 Jul 14, 2024
1343268
Remove `array_chunks`
smu160 Jul 14, 2024
e94f2fc
Use new de-interleaving function
smu160 Jul 17, 2024
713b3a6
Update pre-commit hook to run clippy everywhere
smu160 Jul 17, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ jobs:
uses: actions-rs/cargo@v1
with:
command: test
args: --all-features

coverage:
runs-on: ubuntu-latest
Expand Down
8 changes: 8 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ exclude = ["assets", "scripts", "benches"]
[dependencies]
num-traits = "0.2.18"
multiversion = "0.7"
num-complex = { version = "0.4.6", features = ["bytemuck"], optional = true }
bytemuck = { version = "1.16.0", optional = true }

[features]
default = []
complex-nums = ["dep:num-complex", "dep:bytemuck"]

[dev-dependencies]
utilities = { path = "utilities" }
Expand All @@ -27,3 +33,5 @@ panic = "abort"
inherits = "release"
debug = true

[package.metadata.docs.rs]
all-features = true
185 changes: 179 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,25 @@
#![doc = include_str!("../README.md")]
#![warn(clippy::complexity)]
#![warn(missing_docs)]
#![warn(clippy::style)]
#![warn(clippy::correctness)]
#![warn(clippy::suspicious)]
#![warn(clippy::perf)]
#![warn(
missing_docs,
clippy::complexity,
clippy::perf,
clippy::style,
clippy::correctness,
clippy::suspicious
)]
#![forbid(unsafe_code)]
#![feature(portable_simd, avx512_target_feature)]

#[cfg(feature = "complex-nums")]
use std::simd::{f32x16, f64x8};

#[cfg(feature = "complex-nums")]
use bytemuck::cast_slice;
#[cfg(feature = "complex-nums")]
use num_complex::Complex;
#[cfg(feature = "complex-nums")]
use num_traits::Float;

use crate::cobra::cobra_apply;
use crate::kernels::{
fft_32_chunk_n_simd, fft_64_chunk_n_simd, fft_chunk_2, fft_chunk_4, fft_chunk_n,
Expand Down Expand Up @@ -83,6 +95,30 @@
impl_fft_for!(fft_64, f64, Planner64, fft_64_with_opts_and_plan);
impl_fft_for!(fft_32, f32, Planner32, fft_32_with_opts_and_plan);

#[cfg(feature = "complex-nums")]
macro_rules! impl_fft_interleaved_for {
($func_name:ident, $precision:ty, $fft_func:ident, $deinterleaving_func: ident) => {
/// FFT Interleaved -- this is an alternative to [`fft_64`]/[`fft_32`] in the case where
/// the input data is a array of [`Complex`].
///
/// The input should be provided in normal order, and then the modified input is
/// bit-reversed.
///
/// ## References
/// <https://inst.eecs.berkeley.edu/~ee123/sp15/Notes/Lecture08_FFT_and_SpectAnalysis.key.pdf>
pub fn $func_name(signal: &mut [Complex<$precision>], direction: Direction) {
let (mut reals, mut imags) = $deinterleaving_func(signal);
$fft_func(&mut reals, &mut imags, direction);
signal.copy_from_slice(&combine_re_im(&reals, &imags))
}
};
}

#[cfg(feature = "complex-nums")]
impl_fft_interleaved_for!(fft_32_interleaved, f32, fft_32, separate_re_im_f32);
#[cfg(feature = "complex-nums")]
impl_fft_interleaved_for!(fft_64_interleaved, f64, fft_64, separate_re_im_f64);

macro_rules! impl_fft_with_opts_and_plan_for {
($func_name:ident, $precision:ty, $planner:ty, $simd_butterfly_kernel:ident, $lanes:literal) => {
/// Same as [fft], but also accepts [`Options`] that control optimization strategies, as well as
Expand Down Expand Up @@ -172,6 +208,86 @@
16
);

macro_rules! impl_separate_re_im {

Check failure on line 211 in src/lib.rs

View workflow job for this annotation

GitHub Actions / Clippy

unused macro definition: `impl_separate_re_im`

Check failure on line 211 in src/lib.rs

View workflow job for this annotation

GitHub Actions / Clippy

unused macro definition: `impl_separate_re_im`
($func_name:ident, $precision:ty, $lanes:literal, $simd_vec:ty) => {
/// Utility function to separate interleaved format signals (i.e., Vector of Complex Number Structs)
/// into separate vectors for the corresponding real and imaginary components.
#[multiversion::multiversion(
targets("x86_64+avx512f+avx512bw+avx512cd+avx512dq+avx512vl", // x86_64-v4
"x86_64+avx2+fma", // x86_64-v3
"x86_64+sse4.2", // x86_64-v2
"x86+avx512f+avx512bw+avx512cd+avx512dq+avx512vl",
"x86+avx2+fma",
"x86+sse4.2",
"x86+sse2",
))]
pub fn $func_name(
signal: &[Complex<$precision>],
) -> (Vec<$precision>, Vec<$precision>) {
let n = signal.len();
let mut reals = vec![0.0; n];
let mut imags = vec![0.0; n];

let complex_f64 = cast_slice(signal);
const CHUNK_SIZE: usize = $lanes * 2;

let mut i = 0;
smu160 marked this conversation as resolved.
Show resolved Hide resolved
for ((chunk, chunk_re), chunk_im) in complex_f64
.chunks_exact(CHUNK_SIZE)
.zip(reals.chunks_exact_mut($lanes))
.zip(imags.chunks_exact_mut($lanes))
{
let (first_half, second_half) = chunk.split_at($lanes);

let a = <$simd_vec>::from_slice(&first_half);
let b = <$simd_vec>::from_slice(&second_half);
let (re_deinterleaved, im_deinterleaved) = a.deinterleave(b);

chunk_re.copy_from_slice(&re_deinterleaved.to_array());
chunk_im.copy_from_slice(&im_deinterleaved.to_array());
i += CHUNK_SIZE;
}

let remainder = complex_f64.chunks_exact(CHUNK_SIZE).remainder();
if !remainder.is_empty() {
remainder
.chunks_exact(2)
.zip(reals[i..].iter_mut())
.zip(imags[i..].iter_mut())
.for_each(|((c, re), im)| {
*re = c[0];
*im = c[1];
});
}

(reals, imags)
}
};
}

#[cfg(feature = "complex-nums")]
impl_separate_re_im!(separate_re_im_f32, f32, 16, f32x16);

#[cfg(feature = "complex-nums")]
impl_separate_re_im!(separate_re_im_f64, f64, 8, f64x8);

/// Utility function to combine separate vectors of real and imaginary components
/// into a single vector of Complex Number Structs.
///
/// # Panics
///
/// Panics if `reals.len() != imags.len()`.
#[cfg(feature = "complex-nums")]
pub fn combine_re_im<T: Float>(reals: &[T], imags: &[T]) -> Vec<Complex<T>> {
assert_eq!(reals.len(), imags.len());

reals
.iter()
.zip(imags.iter())
.map(|(z_re, z_im)| Complex::new(*z_re, *z_im))
.collect()
}

#[cfg(test)]
mod tests {
use std::ops::Range;
Expand All @@ -182,6 +298,23 @@

use super::*;

#[cfg(feature = "complex-nums")]
#[test]
fn test_separate_and_combine_re_im() {
let complex_vec: Vec<_> = vec![
Complex::new(1.0, 2.0),
Complex::new(3.0, 4.0),
Complex::new(5.0, 6.0),
Complex::new(7.0, 8.0),
];

let (reals, imags) = separate_re_im_f64(&complex_vec);

let recombined_vec = combine_re_im(&reals, &imags);

assert_eq!(complex_vec, recombined_vec);
}

macro_rules! non_power_of_2_planner {
($test_name:ident, $planner:ty) => {
#[should_panic]
Expand Down Expand Up @@ -278,6 +411,46 @@
test_fft_correctness!(fft_correctness_32, f32, fft_32, 4, 9);
test_fft_correctness!(fft_correctness_64, f64, fft_64, 4, 17);

#[cfg(feature = "complex-nums")]
#[test]
fn fft_interleaved_correctness() {
let n = 10;
let big_n = 1 << n;
let mut actual_signal: Vec<_> = (1..=big_n).map(|i| Complex::new(i as f64, 0.0)).collect();
let mut expected_reals: Vec<_> = (1..=big_n).map(|i| i as f64).collect();
let mut expected_imags = vec![0.0; big_n];

fft_64_interleaved(&mut actual_signal, Direction::Forward);
fft_64(&mut expected_reals, &mut expected_imags, Direction::Forward);

actual_signal
.iter()
.zip(expected_reals)
.zip(expected_imags)
.for_each(|((z, z_re), z_im)| {
assert_float_closeness(z.re, z_re, 1e-10);
assert_float_closeness(z.im, z_im, 1e-10);
});

let n = 10;
let big_n = 1 << n;
let mut actual_signal: Vec<_> = (1..=big_n).map(|i| Complex::new(i as f32, 0.0)).collect();
let mut expected_reals: Vec<_> = (1..=big_n).map(|i| i as f32).collect();
let mut expected_imags = vec![0.0; big_n];

fft_32_interleaved(&mut actual_signal, Direction::Forward);
fft_32(&mut expected_reals, &mut expected_imags, Direction::Forward);

actual_signal
.iter()
.zip(expected_reals)
.zip(expected_imags)
.for_each(|((z, z_re), z_im)| {
assert_float_closeness(z.re, z_re, 1e-10);
assert_float_closeness(z.im, z_im, 1e-10);
});
}

#[test]
fn ifft_using_fft() {
let n = 4;
Expand Down
Loading