Merge branch 'main' into feature/r2c

QuState · Jul 17, 2024 · 675cc5e · 675cc5e
2 parents 0c55f93 + 08eedcb
commit 675cc5e
Show file tree

Hide file tree

Showing 16 changed files with 851 additions and 105 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -13,12 +13,19 @@ exclude = ["assets", "scripts", "benches"]
 [dependencies]
 num-traits = "0.2.18"
 multiversion = "0.7"
+num-complex = { version = "0.4.6", features = ["bytemuck"], optional = true }
+bytemuck = { version = "1.16.0", optional = true }
+
+[features]
+default = []
+complex-nums = ["dep:num-complex", "dep:bytemuck"]
 
 [dev-dependencies]
-utilities = { path = "utilities" }
-fftw = "0.8.0"
 criterion = "0.5.1"
+fftw = "0.8.0"
 realfft = "3.3.0"
+rand = "0.8.5"
+utilities = { path = "utilities" }
 
 [[bench]]
 name = "bench"
@@ -33,3 +40,5 @@ panic = "abort"
 inherits = "release"
 debug = true
 
+[package.metadata.docs.rs]
+all-features = true
diff --git a/benches/README.md b/benches/README.md
@@ -4,35 +4,26 @@
 
 ### Setup Environment
 
-1. Install [FFTW3](http://www.fftw.org/download.html)[^1]
+1. Clone the `PhastFT` git repository [^2].
 
-   It may be possible to install `fftw3` using a package manager.
-
-   ##### debian
-   ```bash
-   sudo apt install libfftw3-dev
-   ```
-
-2. Clone the `PhastFT` git repository [^2].
-
-3. Create virtual env
+2. Create virtual env
 
 ```bash
 cd ~/PhastFT/benches && python3 -m venv .env && source .env/bin/activate
 ```
 
-4. Install python dependencies[^1]
+3. Install python dependencies[^1]
 
 ```bash
 pip install -r requirements.txt
 cd ~/PhastFT/pyphastft
 pip install .
 ```
 
-5. Run the `FFTW3` vs. `RustFFT` vs. `PhastFT` benchmark for all inputs of size `2^n`, where `n \in [4, 30].`
+5. Run the `FFTW3-RB` vs. `RustFFT` vs. `PhastFT` benchmarks`
 
 ```bash
-./benchmark.sh 4 29
+python run_benches.py
 ```
 
 6. Plot the results
@@ -125,13 +116,6 @@ On linux, open access to performance monitoring, and observability operations fo
 echo -1 | sudo tee /proc/sys/kernel/perf_event_paranoid
 ```
 
-Add debug to `Cargo.toml` under `profile.release`:
-
-```bash
-[profile.release]
-debug = true
-```
-
 Finally, run:
 
 ```bash

diff --git a/benches/bench.rs b/benches/bench.rs
@@ -3,9 +3,22 @@ use realfft::num_complex::Complex;
 use realfft::RealFftPlanner;
 use utilities::gen_random_signal;
 
+use num_traits::Float;
+use phastft::{
+    fft_32_with_opts_and_plan, fft_64_with_opts_and_plan,
+    options::Options,
+    planner::{Direction, Planner32, Planner64},
+};
+use rand::{
+    distributions::{Distribution, Standard},
+    thread_rng, Rng,
+};
+use utilities::rustfft::num_complex::Complex;
+use utilities::rustfft::FftPlanner;
+
 use phastft::{fft::r2c_fft_f64, fft_64, planner::Direction};
 
-fn criterion_benchmark(c: &mut Criterion) {
+fn benchmark_r2c_vs_c2c(c: &mut Criterion) {
     let sizes = vec![1 << 10, 1 << 12, 1 << 14, 1 << 16, 1 << 18, 1 << 20];
 
     let mut group = c.benchmark_group("r2c_versus_c2c");
@@ -56,9 +69,173 @@ fn criterion_benchmark(c: &mut Criterion) {
                     .expect("fft.process() failed!");
             });
         });
+  }
+
+
+const LENGTHS: &[usize] = &[
+    6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+];
+
+fn generate_numbers<T: Float>(n: usize) -> (Vec<T>, Vec<T>)
+where
+    Standard: Distribution<T>,
+{
+    let mut rng = thread_rng();
+
+    let samples: Vec<T> = (&mut rng).sample_iter(Standard).take(2 * n).collect();
+
+    let mut reals = vec![T::zero(); n];
+    let mut imags = vec![T::zero(); n];
+
+    for ((z_re, z_im), rand_chunk) in reals
+        .iter_mut()
+        .zip(imags.iter_mut())
+        .zip(samples.chunks_exact(2))
+    {
+        *z_re = rand_chunk[0];
+        *z_im = rand_chunk[1];
+    }
+
+    (reals, imags)
+}
+
+fn generate_complex_numbers<T: Float + Default>(n: usize) -> Vec<Complex<T>>
+where
+    Standard: Distribution<T>,
+{
+    let mut rng = thread_rng();
+
+    let samples: Vec<T> = (&mut rng).sample_iter(Standard).take(2 * n).collect();
+
+    let mut signal = vec![Complex::default(); n];
+
+    for (z, rand_chunk) in signal.iter_mut().zip(samples.chunks_exact(2)) {
+        z.re = rand_chunk[0];
+        z.im = rand_chunk[1];
+    }
+
+    signal
+}
+
+fn benchmark_forward_f32(c: &mut Criterion) {
+    let mut group = c.benchmark_group("Forward f32");
+
+    for n in LENGTHS.iter() {
+        let len = 1 << n;
+        group.throughput(Throughput::Elements(len as u64));
+
+        let id = "PhastFT FFT Forward";
+        let options = Options::guess_options(len);
+        let planner = Planner32::new(len, Direction::Forward);
+        let (mut reals, mut imags) = generate_numbers(len);
+
+        group.bench_with_input(BenchmarkId::new(id, len), &len, |b, &_len| {
+            b.iter(|| {
+                fft_32_with_opts_and_plan(
+                    black_box(&mut reals),
+                    black_box(&mut imags),
+                    black_box(&options),
+                    black_box(&planner),
+                );
+            });
+        });
+
+        let id = "RustFFT FFT Forward";
+        let mut planner = FftPlanner::<f32>::new();
+        let fft = planner.plan_fft_forward(len);
+        let mut signal = generate_complex_numbers(len);
+
+        group.bench_with_input(BenchmarkId::new(id, len), &len, |b, &_len| {
+            b.iter(|| fft.process(black_box(&mut signal)));
+        });
     }
     group.finish();
 }
 
-criterion_group!(benches, criterion_benchmark);
+fn benchmark_inverse_f32(c: &mut Criterion) {
+    let options = Options::default();
+
+    for n in LENGTHS.iter() {
+        let len = 1 << n;
+        let id = format!("FFT Inverse f32 {} elements", len);
+        let planner = Planner32::new(len, Direction::Reverse);
+
+        c.bench_function(&id, |b| {
+            let (mut reals, mut imags) = generate_numbers(len);
+            b.iter(|| {
+                fft_32_with_opts_and_plan(
+                    black_box(&mut reals),
+                    black_box(&mut imags),
+                    black_box(&options),
+                    black_box(&planner),
+                );
+            });
+        });
+    }
+}
+
+fn benchmark_forward_f64(c: &mut Criterion) {
+    let mut group = c.benchmark_group("Forward f64");
+
+    for n in LENGTHS.iter() {
+        let len = 1 << n;
+        let id = "PhastFT FFT Forward";
+        let options = Options::guess_options(len);
+        let planner = Planner64::new(len, Direction::Forward);
+        let (mut reals, mut imags) = generate_numbers(len);
+        group.throughput(Throughput::Elements(len as u64));
+
+        group.bench_with_input(BenchmarkId::new(id, len), &len, |b, &_len| {
+            b.iter(|| {
+                fft_64_with_opts_and_plan(
+                    black_box(&mut reals),
+                    black_box(&mut imags),
+                    black_box(&options),
+                    black_box(&planner),
+                );
+            });
+        });
+
+        let id = "RustFFT FFT Forward";
+        let mut planner = FftPlanner::<f64>::new();
+        let fft = planner.plan_fft_forward(len);
+        let mut signal = generate_complex_numbers(len);
+
+        group.bench_with_input(BenchmarkId::new(id, len), &len, |b, &_len| {
+            b.iter(|| fft.process(black_box(&mut signal)));
+        });
+    }
+    group.finish();
+}
+
+fn benchmark_inverse_f64(c: &mut Criterion) {
+    let options = Options::default();
+
+    for n in LENGTHS.iter() {
+        let len = 1 << n;
+        let id = format!("FFT Inverse f64 {} elements", len);
+        let planner = Planner64::new(len, Direction::Reverse);
+
+        c.bench_function(&id, |b| {
+            let (mut reals, mut imags) = generate_numbers(len);
+            b.iter(|| {
+                fft_64_with_opts_and_plan(
+                    black_box(&mut reals),
+                    black_box(&mut imags),
+                    black_box(&options),
+                    black_box(&planner),
+                );
+            });
+        });
+    }
+}
+
+criterion_group!(
+    benches,
+    benchmark_forward_f32,
+    benchmark_inverse_f32,
+    benchmark_forward_f64,
+    benchmark_inverse_f64
+    benchmark_r2c_vs_c2c
+);
 criterion_main!(benches);