QuState · smu160 · Jul 17, 2024 · Apr 30, 2024 · Apr 30, 2024 · May 1, 2024
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -63,6 +63,7 @@ jobs:
         uses: actions-rs/cargo@v1
         with:
           command: test
+          args: --all-features
 
   coverage:
     runs-on: ubuntu-latest

diff --git a/Cargo.toml b/Cargo.toml
@@ -13,6 +13,12 @@ exclude = ["assets", "scripts", "benches"]
 [dependencies]
 num-traits = "0.2.18"
 multiversion = "0.7"
+num-complex = { version = "0.4.6", features = ["bytemuck"], optional = true }
+bytemuck = { version = "1.16.0", optional = true }
+
+[features]
+default = []
+complex-nums = ["dep:num-complex", "dep:bytemuck"]
 
 [dev-dependencies]
 utilities = { path = "utilities" }
@@ -27,3 +33,5 @@ panic = "abort"
 inherits = "release"
 debug = true
 
+[package.metadata.docs.rs]
+all-features = true
diff --git a/src/lib.rs b/src/lib.rs
@@ -1,13 +1,25 @@
 #![doc = include_str!("../README.md")]
-#![warn(clippy::complexity)]
-#![warn(missing_docs)]
-#![warn(clippy::style)]
-#![warn(clippy::correctness)]
-#![warn(clippy::suspicious)]
-#![warn(clippy::perf)]
+#![warn(
+    missing_docs,
+    clippy::complexity,
+    clippy::perf,
+    clippy::style,
+    clippy::correctness,
+    clippy::suspicious
+)]
 #![forbid(unsafe_code)]
 #![feature(portable_simd, avx512_target_feature)]
 
+#[cfg(feature = "complex-nums")]
+use std::simd::{f32x16, f64x8};
+
+#[cfg(feature = "complex-nums")]
+use bytemuck::cast_slice;
+#[cfg(feature = "complex-nums")]
+use num_complex::Complex;
+#[cfg(feature = "complex-nums")]
+use num_traits::Float;
+
 use crate::cobra::cobra_apply;
 use crate::kernels::{
     fft_32_chunk_n_simd, fft_64_chunk_n_simd, fft_chunk_2, fft_chunk_4, fft_chunk_n,
@@ -83,6 +95,30 @@
 impl_fft_for!(fft_64, f64, Planner64, fft_64_with_opts_and_plan);
 impl_fft_for!(fft_32, f32, Planner32, fft_32_with_opts_and_plan);
 
+#[cfg(feature = "complex-nums")]
+macro_rules! impl_fft_interleaved_for {
+    ($func_name:ident, $precision:ty, $fft_func:ident, $deinterleaving_func: ident) => {
+        /// FFT Interleaved -- this is an alternative to [`fft_64`]/[`fft_32`] in the case where
+        /// the input data is a array of [`Complex`].
+        ///
+        /// The input should be provided in normal order, and then the modified input is
+        /// bit-reversed.
+        ///
+        /// ## References
+        /// <https://inst.eecs.berkeley.edu/~ee123/sp15/Notes/Lecture08_FFT_and_SpectAnalysis.key.pdf>
+        pub fn $func_name(signal: &mut [Complex<$precision>], direction: Direction) {
+            let (mut reals, mut imags) = $deinterleaving_func(signal);
+            $fft_func(&mut reals, &mut imags, direction);
+            signal.copy_from_slice(&combine_re_im(&reals, &imags))
+        }
+    };
+}
+
+#[cfg(feature = "complex-nums")]
+impl_fft_interleaved_for!(fft_32_interleaved, f32, fft_32, separate_re_im_f32);
+#[cfg(feature = "complex-nums")]
+impl_fft_interleaved_for!(fft_64_interleaved, f64, fft_64, separate_re_im_f64);
+
 macro_rules! impl_fft_with_opts_and_plan_for {
     ($func_name:ident, $precision:ty, $planner:ty, $simd_butterfly_kernel:ident, $lanes:literal) => {
         /// Same as [fft], but also accepts [`Options`] that control optimization strategies, as well as
@@ -172,6 +208,86 @@
     16
 );
 
+macro_rules! impl_separate_re_im {
+    ($func_name:ident, $precision:ty, $lanes:literal, $simd_vec:ty) => {
+        /// Utility function to separate interleaved format signals (i.e., Vector of Complex Number Structs)
+        /// into separate vectors for the corresponding real and imaginary components.
+        #[multiversion::multiversion(
+                                    targets("x86_64+avx512f+avx512bw+avx512cd+avx512dq+avx512vl", // x86_64-v4
+                                            "x86_64+avx2+fma", // x86_64-v3
+                                            "x86_64+sse4.2", // x86_64-v2
+                                            "x86+avx512f+avx512bw+avx512cd+avx512dq+avx512vl",
+                                            "x86+avx2+fma",
+                                            "x86+sse4.2",
+                                            "x86+sse2",
+                                        ))]
+        pub fn $func_name(
+            signal: &[Complex<$precision>],
+        ) -> (Vec<$precision>, Vec<$precision>) {
+            let n = signal.len();
+            let mut reals = vec![0.0; n];
+            let mut imags = vec![0.0; n];
+
+            let complex_f64 = cast_slice(signal);
+            const CHUNK_SIZE: usize = $lanes * 2;
+
+            let mut i = 0;
+            for ((chunk, chunk_re), chunk_im) in complex_f64
+                .chunks_exact(CHUNK_SIZE)
+                .zip(reals.chunks_exact_mut($lanes))
+                .zip(imags.chunks_exact_mut($lanes))
+            {
+                let (first_half, second_half) = chunk.split_at($lanes);
+
+                let a = <$simd_vec>::from_slice(&first_half);
+                let b = <$simd_vec>::from_slice(&second_half);
+                let (re_deinterleaved, im_deinterleaved) = a.deinterleave(b);
+
+                chunk_re.copy_from_slice(&re_deinterleaved.to_array());
+                chunk_im.copy_from_slice(&im_deinterleaved.to_array());
+                i += CHUNK_SIZE;
+            }
+
+            let remainder = complex_f64.chunks_exact(CHUNK_SIZE).remainder();
+            if !remainder.is_empty() {
+            remainder
+                .chunks_exact(2)
+                .zip(reals[i..].iter_mut())
+                .zip(imags[i..].iter_mut())
+                .for_each(|((c, re), im)| {
+                    *re = c[0];
+                    *im = c[1];
+                });
+            }
+
+            (reals, imags)
+        }
+    };
+}
+
+#[cfg(feature = "complex-nums")]
+impl_separate_re_im!(separate_re_im_f32, f32, 16, f32x16);
+
+#[cfg(feature = "complex-nums")]
+impl_separate_re_im!(separate_re_im_f64, f64, 8, f64x8);
+
+/// Utility function to combine separate vectors of real and imaginary components
+/// into a single vector of Complex Number Structs.
+///
+/// # Panics
+///
+/// Panics if `reals.len() != imags.len()`.
+#[cfg(feature = "complex-nums")]
+pub fn combine_re_im<T: Float>(reals: &[T], imags: &[T]) -> Vec<Complex<T>> {
+    assert_eq!(reals.len(), imags.len());
+
+    reals
+        .iter()
+        .zip(imags.iter())
+        .map(|(z_re, z_im)| Complex::new(*z_re, *z_im))
+        .collect()
+}
+
 #[cfg(test)]
 mod tests {
     use std::ops::Range;
@@ -182,6 +298,23 @@
 
     use super::*;
 
+    #[cfg(feature = "complex-nums")]
+    #[test]
+    fn test_separate_and_combine_re_im() {
+        let complex_vec: Vec<_> = vec![
+            Complex::new(1.0, 2.0),
+            Complex::new(3.0, 4.0),
+            Complex::new(5.0, 6.0),
+            Complex::new(7.0, 8.0),
+        ];
+
+        let (reals, imags) = separate_re_im_f64(&complex_vec);
+
+        let recombined_vec = combine_re_im(&reals, &imags);
+
+        assert_eq!(complex_vec, recombined_vec);
+    }
+
     macro_rules! non_power_of_2_planner {
         ($test_name:ident, $planner:ty) => {
             #[should_panic]
@@ -278,6 +411,46 @@
     test_fft_correctness!(fft_correctness_32, f32, fft_32, 4, 9);
     test_fft_correctness!(fft_correctness_64, f64, fft_64, 4, 17);
 
+    #[cfg(feature = "complex-nums")]
+    #[test]
+    fn fft_interleaved_correctness() {
+        let n = 10;
+        let big_n = 1 << n;
+        let mut actual_signal: Vec<_> = (1..=big_n).map(|i| Complex::new(i as f64, 0.0)).collect();
+        let mut expected_reals: Vec<_> = (1..=big_n).map(|i| i as f64).collect();
+        let mut expected_imags = vec![0.0; big_n];
+
+        fft_64_interleaved(&mut actual_signal, Direction::Forward);
+        fft_64(&mut expected_reals, &mut expected_imags, Direction::Forward);
+
+        actual_signal
+            .iter()
+            .zip(expected_reals)
+            .zip(expected_imags)
+            .for_each(|((z, z_re), z_im)| {
+                assert_float_closeness(z.re, z_re, 1e-10);
+                assert_float_closeness(z.im, z_im, 1e-10);
+            });
+
+        let n = 10;
+        let big_n = 1 << n;
+        let mut actual_signal: Vec<_> = (1..=big_n).map(|i| Complex::new(i as f32, 0.0)).collect();
+        let mut expected_reals: Vec<_> = (1..=big_n).map(|i| i as f32).collect();
+        let mut expected_imags = vec![0.0; big_n];
+
+        fft_32_interleaved(&mut actual_signal, Direction::Forward);
+        fft_32(&mut expected_reals, &mut expected_imags, Direction::Forward);
+
+        actual_signal
+            .iter()
+            .zip(expected_reals)
+            .zip(expected_imags)
+            .for_each(|((z, z_re), z_im)| {
+                assert_float_closeness(z.re, z_re, 1e-10);
+                assert_float_closeness(z.im, z_im, 1e-10);
+            });
+    }
+
     #[test]
     fn ifft_using_fft() {
         let n = 4;