Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make planner reusable #33

Merged
merged 6 commits into from
Jul 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,14 @@ num-traits = "0.2.18"
multiversion = "0.7"

[dev-dependencies]
utilities = { path = "utilities" }
criterion = "0.5.1"
fftw = "0.8.0"
rand = "0.8.5"
utilities = { path = "utilities" }

[[bench]]
name = "bench"
harness = false

[profile.release]
codegen-units = 1
Expand Down
159 changes: 159 additions & 0 deletions benches/bench.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use num_traits::Float;
use phastft::{
fft_32_with_opts_and_plan, fft_64_with_opts_and_plan,
options::Options,
planner::{Direction, Planner32, Planner64},
};
use rand::{
distributions::{Distribution, Standard},
thread_rng, Rng,
};
use utilities::rustfft::num_complex::Complex;
use utilities::rustfft::FftPlanner;

const LENGTHS: &[usize] = &[
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
];

fn generate_numbers<T: Float>(n: usize) -> (Vec<T>, Vec<T>)
where
Standard: Distribution<T>,
{
let mut rng = thread_rng();

let samples: Vec<T> = (&mut rng).sample_iter(Standard).take(2 * n).collect();

let mut reals = vec![T::zero(); n];
let mut imags = vec![T::zero(); n];

for ((z_re, z_im), rand_chunk) in reals
.iter_mut()
.zip(imags.iter_mut())
.zip(samples.chunks_exact(2))
{
*z_re = rand_chunk[0];
*z_im = rand_chunk[1];
}

(reals, imags)
}

fn generate_complex_numbers<T: Float + Default>(n: usize) -> Vec<Complex<T>>
where
Standard: Distribution<T>,
{
let mut rng = thread_rng();

let samples: Vec<T> = (&mut rng).sample_iter(Standard).take(2 * n).collect();

let mut signal = vec![Complex::default(); n];

for (z, rand_chunk) in signal.iter_mut().zip(samples.chunks_exact(2)) {
z.re = rand_chunk[0];
z.im = rand_chunk[1];
}

signal
}

fn benchmark_forward_f32(c: &mut Criterion) {
let mut group = c.benchmark_group("Forward f32");

for n in LENGTHS.iter() {
let len = 1 << n;
group.throughput(Throughput::Elements(len as u64));

let id = "PhastFT FFT Forward";
let options = Options::guess_options(len);
let planner = Planner32::new(len, Direction::Forward);
let (mut reals, mut imags) = generate_numbers(len);

group.bench_with_input(BenchmarkId::new(id, len), &len, |b, &len| {
b.iter(|| {
fft_32_with_opts_and_plan(
black_box(&mut reals),
black_box(&mut imags),
black_box(&options),
black_box(&planner),
);
});
});

let id = "RustFFT FFT Forward";
let mut planner = FftPlanner::<f32>::new();
let fft = planner.plan_fft_forward(len);
let mut signal = generate_complex_numbers(len);

group.bench_with_input(BenchmarkId::new(id, len), &len, |b, &len| {
b.iter(|| fft.process(black_box(&mut signal)));
});
}
group.finish();
}

fn benchmark_inverse_f32(c: &mut Criterion) {
let options = Options::default();

for n in LENGTHS.iter() {
let len = 1 << n;
let id = format!("FFT Inverse f32 {} elements", len);
let planner = Planner32::new(len, Direction::Reverse);

c.bench_function(&id, |b| {
let (mut reals, mut imags) = generate_numbers(len);
b.iter(|| {
black_box(fft_32_with_opts_and_plan(
&mut reals, &mut imags, &options, &planner,
));
});
});
}
}

fn benchmark_forward_f64(c: &mut Criterion) {
let options = Options::default();

for n in LENGTHS.iter() {
let len = 1 << n;
let id = format!("FFT Forward f64 {} elements", len);
let planner = Planner64::new(len, Direction::Forward);

c.bench_function(&id, |b| {
let (mut reals, mut imags) = generate_numbers(len);
b.iter(|| {
black_box(fft_64_with_opts_and_plan(
&mut reals, &mut imags, &options, &planner,
));
});
});
}
}

fn benchmark_inverse_f64(c: &mut Criterion) {
let options = Options::default();

for n in LENGTHS.iter() {
let len = 1 << n;
let id = format!("FFT Inverse f64 {} elements", len);
let planner = Planner64::new(len, Direction::Reverse);

c.bench_function(&id, |b| {
let (mut reals, mut imags) = generate_numbers(len);
b.iter(|| {
black_box(fft_64_with_opts_and_plan(
&mut reals, &mut imags, &options, &planner,
));
});
});
}
}

criterion_group!(
benches,
benchmark_forward_f32,
benchmark_inverse_f32,
benchmark_forward_f64,
benchmark_inverse_f64
);
criterion_main!(benches);
10 changes: 7 additions & 3 deletions examples/benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,21 @@ use std::str::FromStr;

use utilities::gen_random_signal;

use phastft::fft_64;
use phastft::planner::Direction;
use phastft::fft_64_with_opts_and_plan;
use phastft::options::Options;
use phastft::planner::{Direction, Planner64};

fn benchmark_fft_64(n: usize) {
let big_n = 1 << n;
let mut reals = vec![0.0; big_n];
let mut imags = vec![0.0; big_n];
gen_random_signal(&mut reals, &mut imags);

let planner = Planner64::new(reals.len(), Direction::Forward);
let opts = Options::guess_options(reals.len());

let now = std::time::Instant::now();
fft_64(&mut reals, &mut imags, Direction::Forward);
fft_64_with_opts_and_plan(&mut reals, &mut imags, &opts, &planner);
let elapsed = now.elapsed().as_micros();
println!("{elapsed}");
}
Expand Down
27 changes: 18 additions & 9 deletions examples/profile.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,29 @@
use std::env;
use std::str::FromStr;

use phastft::fft_64;
use phastft::planner::Direction;

fn benchmark_fft(num_qubits: usize) {
let n = 1 << num_qubits;
let mut reals: Vec<f64> = (1..=n).map(|i| i as f64).collect();
let mut imags: Vec<f64> = (1..=n).map(|i| i as f64).collect();
fft_64(&mut reals, &mut imags, Direction::Forward);
use utilities::gen_random_signal;

use phastft::fft_64_with_opts_and_plan;
use phastft::options::Options;
use phastft::planner::{Direction, Planner64};

fn benchmark_fft_64(n: usize) {
let big_n = 1 << n;
let mut reals = vec![0.0; big_n];
let mut imags = vec![0.0; big_n];
gen_random_signal(&mut reals, &mut imags);

let planner = Planner64::new(reals.len(), Direction::Forward);
let opts = Options::guess_options(reals.len());

fft_64_with_opts_and_plan(&mut reals, &mut imags, &opts, &planner);
}

fn main() {
let args: Vec<String> = env::args().collect();
assert_eq!(args.len(), 2, "Usage {} <n>", args[0]);

let n = usize::from_str(&args[1]).unwrap();
benchmark_fft(n);

benchmark_fft_64(n);
}
7 changes: 4 additions & 3 deletions examples/rustfft.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ use utilities::{
fn benchmark_rustfft(n: usize) {
let big_n = 1 << n;

let mut reals = vec![0.0; big_n];
let mut imags = vec![0.0; big_n];
let mut reals = vec![0.0f64; big_n];
let mut imags = vec![0.0f64; big_n];

gen_random_signal(&mut reals, &mut imags);
let mut signal = vec![Complex64::default(); big_n];
Expand All @@ -23,9 +23,10 @@ fn benchmark_rustfft(n: usize) {
z.im = im;
});

let now = std::time::Instant::now();
let mut planner = FftPlanner::new();
let fft = planner.plan_fft_forward(signal.len());

let now = std::time::Instant::now();
fft.process(&mut signal);
let elapsed = now.elapsed().as_micros();
println!("{elapsed}");
Expand Down
32 changes: 32 additions & 0 deletions src/kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ use num_traits::Float;

macro_rules! fft_butterfly_n_simd {
($func_name:ident, $precision:ty, $lanes:literal, $simd_vector:ty) => {
#[multiversion::multiversion(targets("x86_64+avx512f+avx512bw+avx512cd+avx512dq+avx512vl", // x86_64-v4
"x86_64+avx2+fma", // x86_64-v3
"x86_64+sse4.2", // x86_64-v2
"x86+avx512f+avx512bw+avx512cd+avx512dq+avx512vl",
"x86+avx2+fma",
"x86+sse4.2",
"x86+sse2",
))]
#[inline]
pub fn $func_name(
reals: &mut [$precision],
Expand Down Expand Up @@ -52,6 +60,14 @@ macro_rules! fft_butterfly_n_simd {
fft_butterfly_n_simd!(fft_64_chunk_n_simd, f64, 8, f64x8);
fft_butterfly_n_simd!(fft_32_chunk_n_simd, f32, 16, f32x16);

#[multiversion::multiversion(targets("x86_64+avx512f+avx512bw+avx512cd+avx512dq+avx512vl", // x86_64-v4
"x86_64+avx2+fma", // x86_64-v3
"x86_64+sse4.2", // x86_64-v2
"x86+avx512f+avx512bw+avx512cd+avx512dq+avx512vl",
"x86+avx2+fma",
"x86+sse4.2",
"x86+sse2",
))]
#[inline]
pub(crate) fn fft_chunk_n<T: Float>(
reals: &mut [T],
Expand Down Expand Up @@ -93,6 +109,14 @@ pub(crate) fn fft_chunk_n<T: Float>(
}

/// `chunk_size == 4`, so hard code twiddle factors
#[multiversion::multiversion(targets("x86_64+avx512f+avx512bw+avx512cd+avx512dq+avx512vl", // x86_64-v4
"x86_64+avx2+fma", // x86_64-v3
"x86_64+sse4.2", // x86_64-v2
"x86+avx512f+avx512bw+avx512cd+avx512dq+avx512vl",
"x86+avx2+fma",
"x86+sse4.2",
"x86+sse2",
))]
#[inline]
pub(crate) fn fft_chunk_4<T: Float>(reals: &mut [T], imags: &mut [T]) {
let dist = 2;
Expand Down Expand Up @@ -128,6 +152,14 @@ pub(crate) fn fft_chunk_4<T: Float>(reals: &mut [T], imags: &mut [T]) {
}

/// `chunk_size == 2`, so skip phase
#[multiversion::multiversion(targets("x86_64+avx512f+avx512bw+avx512cd+avx512dq+avx512vl", // x86_64-v4
"x86_64+avx2+fma", // x86_64-v3
"x86_64+sse4.2", // x86_64-v2
"x86+avx512f+avx512bw+avx512cd+avx512dq+avx512vl",
"x86+avx2+fma",
"x86+sse4.2",
"x86+sse2",
))]
#[inline]
pub(crate) fn fft_chunk_2<T: Float>(reals: &mut [T], imags: &mut [T]) {
reals
Expand Down
Loading
Loading