Skip to content

Commit

Permalink
Merge pull request #5 from QuState/options
Browse files Browse the repository at this point in the history
Options WIP
  • Loading branch information
smu160 authored Feb 2, 2024
2 parents 7a77829 + 6c6d16f commit fbcc252
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 4 deletions.
19 changes: 15 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@

use crate::cobra::cobra_apply;
use crate::kernels::{fft_chunk_2, fft_chunk_4, fft_chunk_n, fft_chunk_n_simd, Float};
use crate::options::Options;
use crate::twiddles::{filter_twiddles, generate_twiddles, generate_twiddles_simd};

mod cobra;
mod kernels;
pub mod options;
mod twiddles;

/// FFT -- Decimation in Frequency
Expand All @@ -19,6 +21,15 @@ mod twiddles;
///
/// [1] https://inst.eecs.berkeley.edu/~ee123/sp15/Notes/Lecture08_FFT_and_SpectAnalysis.key.pdf
pub fn fft_dif(reals: &mut [Float], imags: &mut [Float]) {
let opts = Options::guess_options(reals.len());
fft_dif_with_opts(reals, imags, &opts)
}

/// Same as [fft_dif], but also accepts [`Options`] that control optimization strategies.
///
/// `fft_dif` automatically guesses the best strategy for a given input,
/// so you only need to call this if you are tuning performance for a specific hardware platform.
pub fn fft_dif_with_opts(reals: &mut [Float], imags: &mut [Float], opts: &Options) {
assert_eq!(reals.len(), imags.len());
let n: usize = reals.len().ilog2() as usize;

Expand Down Expand Up @@ -62,14 +73,14 @@ pub fn fft_dif(reals: &mut [Float], imags: &mut [Float]) {
}
}

if n < 22 {
cobra_apply(reals, n);
cobra_apply(imags, n);
} else {
if opts.multithreaded_bit_reversal {
std::thread::scope(|s| {
s.spawn(|| cobra_apply(reals, n));
s.spawn(|| cobra_apply(imags, n));
});
} else {
cobra_apply(reals, n);
cobra_apply(imags, n);
}
}

Expand Down
24 changes: 24 additions & 0 deletions src/options.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/// Options to tune to improve performance depending on the hardware and input size.
///
/// Calling FFT routines without specifying options will automatically select reasonable defaults
/// depending on the input size and other factors.
///
/// You only need to tune these options if you are trying to squeeze maximum performance
/// out of a known hardware platform that you can bechmark at varying input sizes.
#[non_exhaustive]
#[derive(Debug, Clone, Default)]
pub struct Options {
/// Whether to run bit reversal step in 2 threads instead of one.
/// This is beneficial only at large input sizes (i.e. gigabytes of data).
/// The exact threshold where it starts being beneficial varies depending on the hardware.
pub multithreaded_bit_reversal: bool,
}

impl Options {
pub(crate) fn guess_options(input_size: usize) -> Options {
let mut options = Options::default();
let n: usize = input_size.ilog2() as usize;
options.multithreaded_bit_reversal = n >= 22;
options
}
}

0 comments on commit fbcc252

Please sign in to comment.