Skip to content

Commit

Permalink
Merge pull request #33 from QuState/feature/reusable-planner
Browse files Browse the repository at this point in the history
Make planner reusable
  • Loading branch information
smu160 authored Jul 14, 2024
2 parents 2df2f00 + b3568d3 commit 20fafba
Show file tree
Hide file tree
Showing 11 changed files with 439 additions and 65 deletions.
8 changes: 7 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,14 @@ num-traits = "0.2.18"
multiversion = "0.7"

[dev-dependencies]
utilities = { path = "utilities" }
criterion = "0.5.1"
fftw = "0.8.0"
rand = "0.8.5"
utilities = { path = "utilities" }

[[bench]]
name = "bench"
harness = false

[profile.release]
codegen-units = 1
Expand Down
159 changes: 159 additions & 0 deletions benches/bench.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use num_traits::Float;
use phastft::{
fft_32_with_opts_and_plan, fft_64_with_opts_and_plan,
options::Options,
planner::{Direction, Planner32, Planner64},
};
use rand::{
distributions::{Distribution, Standard},
thread_rng, Rng,
};
use utilities::rustfft::num_complex::Complex;
use utilities::rustfft::FftPlanner;

const LENGTHS: &[usize] = &[
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
];

fn generate_numbers<T: Float>(n: usize) -> (Vec<T>, Vec<T>)
where
Standard: Distribution<T>,
{
let mut rng = thread_rng();

let samples: Vec<T> = (&mut rng).sample_iter(Standard).take(2 * n).collect();

let mut reals = vec![T::zero(); n];
let mut imags = vec![T::zero(); n];

for ((z_re, z_im), rand_chunk) in reals
.iter_mut()
.zip(imags.iter_mut())
.zip(samples.chunks_exact(2))
{
*z_re = rand_chunk[0];
*z_im = rand_chunk[1];
}

(reals, imags)
}

fn generate_complex_numbers<T: Float + Default>(n: usize) -> Vec<Complex<T>>
where
Standard: Distribution<T>,
{
let mut rng = thread_rng();

let samples: Vec<T> = (&mut rng).sample_iter(Standard).take(2 * n).collect();

let mut signal = vec![Complex::default(); n];

for (z, rand_chunk) in signal.iter_mut().zip(samples.chunks_exact(2)) {
z.re = rand_chunk[0];
z.im = rand_chunk[1];
}

signal
}

fn benchmark_forward_f32(c: &mut Criterion) {
let mut group = c.benchmark_group("Forward f32");

for n in LENGTHS.iter() {
let len = 1 << n;
group.throughput(Throughput::Elements(len as u64));

let id = "PhastFT FFT Forward";
let options = Options::guess_options(len);
let planner = Planner32::new(len, Direction::Forward);
let (mut reals, mut imags) = generate_numbers(len);

group.bench_with_input(BenchmarkId::new(id, len), &len, |b, &len| {
b.iter(|| {
fft_32_with_opts_and_plan(
black_box(&mut reals),
black_box(&mut imags),
black_box(&options),
black_box(&planner),
);
});
});

let id = "RustFFT FFT Forward";
let mut planner = FftPlanner::<f32>::new();
let fft = planner.plan_fft_forward(len);
let mut signal = generate_complex_numbers(len);

group.bench_with_input(BenchmarkId::new(id, len), &len, |b, &len| {
b.iter(|| fft.process(black_box(&mut signal)));
});
}
group.finish();
}

fn benchmark_inverse_f32(c: &mut Criterion) {
let options = Options::default();

for n in LENGTHS.iter() {
let len = 1 << n;
let id = format!("FFT Inverse f32 {} elements", len);
let planner = Planner32::new(len, Direction::Reverse);

c.bench_function(&id, |b| {
let (mut reals, mut imags) = generate_numbers(len);
b.iter(|| {
black_box(fft_32_with_opts_and_plan(
&mut reals, &mut imags, &options, &planner,
));
});
});
}
}

fn benchmark_forward_f64(c: &mut Criterion) {
let options = Options::default();

for n in LENGTHS.iter() {
let len = 1 << n;
let id = format!("FFT Forward f64 {} elements", len);
let planner = Planner64::new(len, Direction::Forward);

c.bench_function(&id, |b| {
let (mut reals, mut imags) = generate_numbers(len);
b.iter(|| {
black_box(fft_64_with_opts_and_plan(
&mut reals, &mut imags, &options, &planner,
));
});
});
}
}

fn benchmark_inverse_f64(c: &mut Criterion) {
let options = Options::default();

for n in LENGTHS.iter() {
let len = 1 << n;
let id = format!("FFT Inverse f64 {} elements", len);
let planner = Planner64::new(len, Direction::Reverse);

c.bench_function(&id, |b| {
let (mut reals, mut imags) = generate_numbers(len);
b.iter(|| {
black_box(fft_64_with_opts_and_plan(
&mut reals, &mut imags, &options, &planner,
));
});
});
}
}

criterion_group!(
benches,
benchmark_forward_f32,
benchmark_inverse_f32,
benchmark_forward_f64,
benchmark_inverse_f64
);
criterion_main!(benches);
10 changes: 7 additions & 3 deletions examples/benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,21 @@ use std::str::FromStr;

use utilities::gen_random_signal;

use phastft::fft_64;
use phastft::planner::Direction;
use phastft::fft_64_with_opts_and_plan;
use phastft::options::Options;
use phastft::planner::{Direction, Planner64};

fn benchmark_fft_64(n: usize) {
let big_n = 1 << n;
let mut reals = vec![0.0; big_n];
let mut imags = vec![0.0; big_n];
gen_random_signal(&mut reals, &mut imags);

let planner = Planner64::new(reals.len(), Direction::Forward);
let opts = Options::guess_options(reals.len());

let now = std::time::Instant::now();
fft_64(&mut reals, &mut imags, Direction::Forward);
fft_64_with_opts_and_plan(&mut reals, &mut imags, &opts, &planner);
let elapsed = now.elapsed().as_micros();
println!("{elapsed}");
}
Expand Down
27 changes: 18 additions & 9 deletions examples/profile.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,29 @@
use std::env;
use std::str::FromStr;

use phastft::fft_64;
use phastft::planner::Direction;

fn benchmark_fft(num_qubits: usize) {
let n = 1 << num_qubits;
let mut reals: Vec<f64> = (1..=n).map(|i| i as f64).collect();
let mut imags: Vec<f64> = (1..=n).map(|i| i as f64).collect();
fft_64(&mut reals, &mut imags, Direction::Forward);
use utilities::gen_random_signal;

use phastft::fft_64_with_opts_and_plan;
use phastft::options::Options;
use phastft::planner::{Direction, Planner64};

fn benchmark_fft_64(n: usize) {
let big_n = 1 << n;
let mut reals = vec![0.0; big_n];
let mut imags = vec![0.0; big_n];
gen_random_signal(&mut reals, &mut imags);

let planner = Planner64::new(reals.len(), Direction::Forward);
let opts = Options::guess_options(reals.len());

fft_64_with_opts_and_plan(&mut reals, &mut imags, &opts, &planner);
}

fn main() {
let args: Vec<String> = env::args().collect();
assert_eq!(args.len(), 2, "Usage {} <n>", args[0]);

let n = usize::from_str(&args[1]).unwrap();
benchmark_fft(n);

benchmark_fft_64(n);
}
7 changes: 4 additions & 3 deletions examples/rustfft.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ use utilities::{
fn benchmark_rustfft(n: usize) {
let big_n = 1 << n;

let mut reals = vec![0.0; big_n];
let mut imags = vec![0.0; big_n];
let mut reals = vec![0.0f64; big_n];
let mut imags = vec![0.0f64; big_n];

gen_random_signal(&mut reals, &mut imags);
let mut signal = vec![Complex64::default(); big_n];
Expand All @@ -23,9 +23,10 @@ fn benchmark_rustfft(n: usize) {
z.im = im;
});

let now = std::time::Instant::now();
let mut planner = FftPlanner::new();
let fft = planner.plan_fft_forward(signal.len());

let now = std::time::Instant::now();
fft.process(&mut signal);
let elapsed = now.elapsed().as_micros();
println!("{elapsed}");
Expand Down
32 changes: 32 additions & 0 deletions src/kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ use num_traits::Float;

macro_rules! fft_butterfly_n_simd {
($func_name:ident, $precision:ty, $lanes:literal, $simd_vector:ty) => {
#[multiversion::multiversion(targets("x86_64+avx512f+avx512bw+avx512cd+avx512dq+avx512vl", // x86_64-v4
"x86_64+avx2+fma", // x86_64-v3
"x86_64+sse4.2", // x86_64-v2
"x86+avx512f+avx512bw+avx512cd+avx512dq+avx512vl",
"x86+avx2+fma",
"x86+sse4.2",
"x86+sse2",
))]
#[inline]
pub fn $func_name(
reals: &mut [$precision],
Expand Down Expand Up @@ -52,6 +60,14 @@ macro_rules! fft_butterfly_n_simd {
fft_butterfly_n_simd!(fft_64_chunk_n_simd, f64, 8, f64x8);
fft_butterfly_n_simd!(fft_32_chunk_n_simd, f32, 16, f32x16);

#[multiversion::multiversion(targets("x86_64+avx512f+avx512bw+avx512cd+avx512dq+avx512vl", // x86_64-v4
"x86_64+avx2+fma", // x86_64-v3
"x86_64+sse4.2", // x86_64-v2
"x86+avx512f+avx512bw+avx512cd+avx512dq+avx512vl",
"x86+avx2+fma",
"x86+sse4.2",
"x86+sse2",
))]
#[inline]
pub(crate) fn fft_chunk_n<T: Float>(
reals: &mut [T],
Expand Down Expand Up @@ -93,6 +109,14 @@ pub(crate) fn fft_chunk_n<T: Float>(
}

/// `chunk_size == 4`, so hard code twiddle factors
#[multiversion::multiversion(targets("x86_64+avx512f+avx512bw+avx512cd+avx512dq+avx512vl", // x86_64-v4
"x86_64+avx2+fma", // x86_64-v3
"x86_64+sse4.2", // x86_64-v2
"x86+avx512f+avx512bw+avx512cd+avx512dq+avx512vl",
"x86+avx2+fma",
"x86+sse4.2",
"x86+sse2",
))]
#[inline]
pub(crate) fn fft_chunk_4<T: Float>(reals: &mut [T], imags: &mut [T]) {
let dist = 2;
Expand Down Expand Up @@ -128,6 +152,14 @@ pub(crate) fn fft_chunk_4<T: Float>(reals: &mut [T], imags: &mut [T]) {
}

/// `chunk_size == 2`, so skip phase
#[multiversion::multiversion(targets("x86_64+avx512f+avx512bw+avx512cd+avx512dq+avx512vl", // x86_64-v4
"x86_64+avx2+fma", // x86_64-v3
"x86_64+sse4.2", // x86_64-v2
"x86+avx512f+avx512bw+avx512cd+avx512dq+avx512vl",
"x86+avx2+fma",
"x86+sse4.2",
"x86+sse2",
))]
#[inline]
pub(crate) fn fft_chunk_2<T: Float>(reals: &mut [T], imags: &mut [T]) {
reals
Expand Down
Loading

0 comments on commit 20fafba

Please sign in to comment.