diff --git a/Cargo.toml b/Cargo.toml index b64fb50..326d114 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ complex-nums = ["dep:num-complex", "dep:bytemuck"] [dev-dependencies] criterion = "0.5.1" fftw = "0.8.0" +realfft = "3.3.0" rand = "0.8.5" utilities = { path = "utilities" } diff --git a/assets/lines.png b/assets/lines.png new file mode 100644 index 0000000..6767aa6 Binary files /dev/null and b/assets/lines.png differ diff --git a/benches/README.md b/benches/README.md index e276e06..058ba1f 100644 --- a/benches/README.md +++ b/benches/README.md @@ -78,6 +78,30 @@ The generated images will be saved in your working directory. | Active Toolchain | nightly-x86_64-unknown-linux-gnu (default) | | Rustc Version | rustc 1.79.0-nightly (7f2fc33da 2024-04-22) | +## Measuruing throughput with `criterion` + +0. Install gnuplot if you want `criterion` to use gnuplot as the plotting backend. + On macOS, just run: +```bash +brew install gnuplot +``` + +1. Run benchmarks +```bash +cargo bench +``` + +2. Open the report using a browser. The report will be located in the `target` directory. + For example, from the root of the `PhastFT` repository (on macOS) run: +```bash +open -a firefox target/criterion/r2c_versus_c2c/report/index.html +``` + +### Results + +![Alt text](../assets/lines.png) + + ## Profiling Navigate to the cloned repo: diff --git a/benches/bench.rs b/benches/bench.rs index 00d7833..9a49f74 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -1,6 +1,11 @@ use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use realfft::num_complex::Complex; +use realfft::RealFftPlanner; +use utilities::gen_random_signal; + use num_traits::Float; use phastft::{ + fft::r2c_fft_f64, fft_32_with_opts_and_plan, fft_64_with_opts_and_plan, options::Options, planner::{Direction, Planner32, Planner64}, @@ -9,9 +14,67 @@ use rand::{ distributions::{Distribution, Standard}, thread_rng, Rng, }; -use utilities::rustfft::num_complex::Complex; use utilities::rustfft::FftPlanner; +fn benchmark_r2c_vs_c2c(c: &mut Criterion) { + let sizes = vec![1 << 10, 1 << 12, 1 << 14, 1 << 16, 1 << 18, 1 << 20]; + + let mut group = c.benchmark_group("r2c_versus_c2c"); + for &size in &sizes { + group.throughput(Throughput::Elements(size as u64)); + + group.bench_with_input(BenchmarkId::new("r2c_fft", size), &size, |b, &size| { + let mut s_re = vec![0.0; size]; + let mut s_im = vec![0.0; size]; + gen_random_signal(&mut s_re, &mut s_im); + let mut output_re = vec![0.0; size]; + let mut output_im = vec![0.0; size]; + + b.iter(|| { + r2c_fft_f64( + black_box(&s_re), + black_box(&mut output_re), + black_box(&mut output_im), + ); + }); + }); + + group.bench_with_input(BenchmarkId::new("c2c_fft", size), &size, |b, &size| { + let mut s_re = vec![0.0; size]; + let mut s_im = vec![0.0; size]; + gen_random_signal(&mut s_re, &mut s_im); + s_im = vec![0.0; size]; + + let options = Options::guess_options(s_re.len()); + let planner = Planner64::new(s_re.len(), Direction::Reverse); + + b.iter(|| { + fft_64_with_opts_and_plan( + black_box(&mut s_re), + black_box(&mut s_im), + black_box(&options), + black_box(&planner), + ); + }); + }); + + group.bench_with_input(BenchmarkId::new("real_fft", size), &size, |b, &size| { + let mut s_re = vec![0.0; size]; + let mut s_im = vec![0.0; size]; + gen_random_signal(&mut s_re, &mut s_im); + let mut output = vec![Complex::default(); s_re.len() / 2 + 1]; + let mut planner = RealFftPlanner::::new(); + + b.iter(|| { + planner + .plan_fft_forward(s_re.len()) + .process(&mut s_re, &mut output) + .expect("fft.process() failed!"); + }); + }); + } +} + const LENGTHS: &[usize] = &[ 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, ]; @@ -170,11 +233,13 @@ fn benchmark_inverse_f64(c: &mut Criterion) { } } -criterion_group!( - benches, - benchmark_forward_f32, - benchmark_inverse_f32, - benchmark_forward_f64, - benchmark_inverse_f64 -); +fn critertion_benchmark(c: &mut Criterion) { + benchmark_forward_f32(c); + benchmark_inverse_f32(c); + benchmark_forward_f64(c); + benchmark_inverse_f64(c); + benchmark_r2c_vs_c2c(c); +} + +criterion_group!(benches, critertion_benchmark,); criterion_main!(benches); diff --git a/examples/profile.rs b/examples/profile.rs index dd31597..910b20e 100644 --- a/examples/profile.rs +++ b/examples/profile.rs @@ -1,22 +1,31 @@ use std::env; use std::str::FromStr; -use utilities::gen_random_signal; - -use phastft::fft_64_with_opts_and_plan; -use phastft::options::Options; -use phastft::planner::{Direction, Planner64}; - -fn benchmark_fft_64(n: usize) { +use phastft::fft::r2c_fft_f64; +// use phastft::fft_64; +// use phastft::fft_64_with_opts_and_plan; +// use phastft::options::Options; +// use phastft::planner::{Direction, Planner64}; +// use utilities::gen_random_signal; + +// fn benchmark_fft_64(n: usize) { +// let big_n = 1 << n; +// let mut reals = vec![0.0; big_n]; +// let mut imags = vec![0.0; big_n]; +// gen_random_signal(&mut reals, &mut imags); +// +// let planner = Planner64::new(reals.len(), Direction::Forward); +// let opts = Options::guess_options(reals.len()); +// +// fft_64_with_opts_and_plan(&mut reals, &mut imags, &opts, &planner); +// } + +fn benchmark_r2c_fft(n: usize) { let big_n = 1 << n; - let mut reals = vec![0.0; big_n]; - let mut imags = vec![0.0; big_n]; - gen_random_signal(&mut reals, &mut imags); - - let planner = Planner64::new(reals.len(), Direction::Forward); - let opts = Options::guess_options(reals.len()); - - fft_64_with_opts_and_plan(&mut reals, &mut imags, &opts, &planner); + let reals: Vec = (1..=big_n).map(|i| i as f64).collect(); + let mut output_re = vec![0.0; big_n]; + let mut output_im = vec![0.0; big_n]; + r2c_fft_f64(&reals, &mut output_re, &mut output_im); } fn main() { @@ -24,6 +33,6 @@ fn main() { assert_eq!(args.len(), 2, "Usage {} ", args[0]); let n = usize::from_str(&args[1]).unwrap(); - - benchmark_fft_64(n); + benchmark_r2c_fft(n); + // benchmark_fft_64(n); } diff --git a/pyphastft/audio_visual.py b/pyphastft/audio_visual.py new file mode 100644 index 0000000..2396ebf --- /dev/null +++ b/pyphastft/audio_visual.py @@ -0,0 +1,111 @@ +import sys +import numpy as np +import pyaudio +import pyqtgraph as pg +from pyphastft import rfft +from pyqtgraph.Qt import QtWidgets, QtCore + +class RealTimeAudioSpectrum(QtWidgets.QWidget): + def __init__(self, parent=None): + super(RealTimeAudioSpectrum, self).__init__(parent) + self.n_fft_bins = 1024 + self.n_display_bins = 64 + self.sample_rate = 44100 + self.smoothing_factor = 0.1 # Fine-tuned smoothing factor + self.ema_fft_data = np.zeros(self.n_display_bins) + self.init_ui() + self.init_audio_stream() + + def init_ui(self): + self.layout = QtWidgets.QVBoxLayout(self) + self.plot_widget = pg.PlotWidget() + self.layout.addWidget(self.plot_widget) + + self.plot_widget.setBackground("k") + self.plot_item = self.plot_widget.getPlotItem() + self.plot_item.setTitle( + "Real-Time Audio Spectrum Visualizer powered by PhastFT", + color="w", + size="16pt", + ) + + self.plot_item.getAxis("left").hide() + self.plot_item.getAxis("bottom").hide() + + self.plot_item.setXRange(0, self.sample_rate / 2, padding=0) + self.plot_item.setYRange(0, 1, padding=0) + + self.bar_width = (self.sample_rate / 2) / self.n_display_bins * 0.8 + self.freqs = np.linspace( + 0, self.sample_rate / 2, self.n_display_bins, endpoint=False + ) + + self.bar_graph = pg.BarGraphItem( + x=self.freqs, + height=np.zeros(self.n_display_bins), + width=self.bar_width, + brush=pg.mkBrush("m"), + ) + self.plot_item.addItem(self.bar_graph) + + self.timer = QtCore.QTimer() + self.timer.timeout.connect(self.update) + self.timer.start(50) + + def init_audio_stream(self): + self.p = pyaudio.PyAudio() + self.stream = self.p.open( + format=pyaudio.paFloat32, + channels=1, + rate=self.sample_rate, + input=True, + frames_per_buffer=self.n_fft_bins, + stream_callback=self.audio_callback, + ) + self.stream.start_stream() + + def audio_callback(self, in_data, frame_count, time_info, status): + audio_data = np.frombuffer(in_data, dtype=np.float32) + audio_data = np.ascontiguousarray(audio_data, dtype=np.float64) + reals, imags = rfft(audio_data, direction="f") + reals = np.ascontiguousarray(reals) + imags = np.ascontiguousarray(imags) + fft_magnitude = np.sqrt(reals**2 + imags**2)[: self.n_fft_bins // 2] + + new_fft_data = np.interp( + np.linspace(0, len(fft_magnitude), self.n_display_bins), + np.arange(len(fft_magnitude)), + fft_magnitude, + ) + + new_fft_data = np.log1p(new_fft_data) # Apply logarithmic scaling + + self.ema_fft_data = self.ema_fft_data * self.smoothing_factor + new_fft_data * ( + 1.0 - self.smoothing_factor + ) + + return in_data, pyaudio.paContinue + + def update(self): + # Normalize the FFT data to ensure it fits within the display range + max_value = np.max(self.ema_fft_data) + if max_value > 0: + normalized_fft_data = self.ema_fft_data / max_value + else: + normalized_fft_data = self.ema_fft_data + + self.bar_graph.setOpts(height=normalized_fft_data, width=self.bar_width) + + def closeEvent(self, event): + self.stream.stop_stream() + self.stream.close() + self.p.terminate() + event.accept() + +if __name__ == "__main__": + app = QtWidgets.QApplication(sys.argv) + window = RealTimeAudioSpectrum() + window.show() + sys.exit(app.exec_()) + + diff --git a/pyphastft/requirements.txt b/pyphastft/requirements.txt new file mode 100644 index 0000000..122615f --- /dev/null +++ b/pyphastft/requirements.txt @@ -0,0 +1,7 @@ +maturin==1.6.0 +pip==24.0 +PyAudio==0.2.14 +pylint==3.2.3 +pyphastft==0.2.1 +PyQt5==5.15.10 +pyqtgraph==0.13.7 diff --git a/pyphastft/src/lib.rs b/pyphastft/src/lib.rs index f69b64f..a2d805e 100644 --- a/pyphastft/src/lib.rs +++ b/pyphastft/src/lib.rs @@ -1,5 +1,5 @@ -use numpy::PyReadwriteArray1; -use phastft::{fft_64 as fft_64_rs, planner::Direction}; +use numpy::{PyReadonlyArray1, PyReadwriteArray1}; +use phastft::{fft::r2c_fft_f64, fft_64 as fft_64_rs, planner::Direction}; use pyo3::prelude::*; #[pyfunction] @@ -18,9 +18,27 @@ fn fft(mut reals: PyReadwriteArray1, mut imags: PyReadwriteArray1, dir ); } +#[pyfunction] +fn rfft(reals: PyReadonlyArray1, direction: char) -> (Vec, Vec) { + assert!(direction == 'f' || direction == 'r'); + let _dir = if direction == 'f' { + Direction::Forward + } else { + Direction::Reverse + }; + + let big_n = reals.as_slice().unwrap().len(); + + let mut output_re = vec![0.0; big_n]; + let mut output_im = vec![0.0; big_n]; + r2c_fft_f64(reals.as_slice().unwrap(), &mut output_re, &mut output_im); + (output_re, output_im) +} + /// A Python module implemented in Rust. #[pymodule] fn pyphastft(_py: Python, m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(fft, m)?)?; + m.add_function(wrap_pyfunction!(rfft, m)?)?; Ok(()) } diff --git a/pyphastft/vis_qt.py b/pyphastft/vis_qt.py deleted file mode 100644 index 2a5c5e7..0000000 --- a/pyphastft/vis_qt.py +++ /dev/null @@ -1,115 +0,0 @@ -import sys - -import numpy as np -import pyaudio -import pyqtgraph as pg -from pyphastft import fft -from pyqtgraph.Qt import QtWidgets, QtCore - - -class RealTimeAudioSpectrum(QtWidgets.QWidget): - def __init__(self, parent=None): - super(RealTimeAudioSpectrum, self).__init__(parent) - self.n_fft_bins = 1024 # Increased FFT size for better frequency resolution - self.n_display_bins = 32 # Maintain the same number of bars in the display - self.sample_rate = 44100 - self.smoothing_factor = 0.1 # Smaller value for more smoothing - self.ema_fft_data = np.zeros( - self.n_display_bins - ) # Adjusted to the number of display bins - self.init_ui() - self.init_audio_stream() - - def init_ui(self): - self.layout = QtWidgets.QVBoxLayout(self) - self.plot_widget = pg.PlotWidget() - self.layout.addWidget(self.plot_widget) - - # Customize plot aesthetics - self.plot_widget.setBackground("k") - self.plot_item = self.plot_widget.getPlotItem() - self.plot_item.setTitle( - "Real-Time Audio Spectrum Visualizer powered by PhastFT", - color="w", - size="16pt", - ) - - # Hide axis labels - self.plot_item.getAxis("left").hide() - self.plot_item.getAxis("bottom").hide() - - # Set fixed ranges for the x and y axes to prevent them from jumping - self.plot_item.setXRange(0, self.sample_rate / 2, padding=0) - self.plot_item.setYRange(0, 1, padding=0) - - self.bar_width = ( - (self.sample_rate / 2) / self.n_display_bins * 0.90 - ) # Adjusted width for display bins - - # Calculate bar positions so they are centered with respect to their frequency values - self.freqs = np.linspace( - 0 + self.bar_width / 2, - self.sample_rate / 2 - self.bar_width / 2, - self.n_display_bins, - ) - - self.bar_graph = pg.BarGraphItem( - x=self.freqs, - height=np.zeros(self.n_display_bins), - width=self.bar_width, - brush=pg.mkBrush("m"), - ) - self.plot_item.addItem(self.bar_graph) - - self.timer = QtCore.QTimer() - self.timer.timeout.connect(self.update) - self.timer.start(50) # Update interval in milliseconds - - def init_audio_stream(self): - self.p = pyaudio.PyAudio() - self.stream = self.p.open( - format=pyaudio.paFloat32, - channels=1, - rate=self.sample_rate, - input=True, - frames_per_buffer=self.n_fft_bins, # This should match the FFT size - stream_callback=self.audio_callback, - ) - self.stream.start_stream() - - def audio_callback(self, in_data, frame_count, time_info, status): - audio_data = np.frombuffer(in_data, dtype=np.float32) - reals = np.zeros(self.n_fft_bins) - imags = np.zeros(self.n_fft_bins) - reals[: len(audio_data)] = audio_data # Fill the reals array with audio data - fft(reals, imags, direction="f") - fft_magnitude = np.sqrt(reals**2 + imags**2)[: self.n_fft_bins // 2] - - # Aggregate or interpolate FFT data to fit into display bins - new_fft_data = np.interp( - np.linspace(0, len(fft_magnitude), self.n_display_bins), - np.arange(len(fft_magnitude)), - fft_magnitude, - ) - - # Apply exponential moving average filter - self.ema_fft_data = self.ema_fft_data * self.smoothing_factor + new_fft_data * ( - 1.0 - self.smoothing_factor - ) - return in_data, pyaudio.paContinue - - def update(self): - self.bar_graph.setOpts(height=self.ema_fft_data, width=self.bar_width) - - def closeEvent(self, event): - self.stream.stop_stream() - self.stream.close() - self.p.terminate() - event.accept() - - -if __name__ == "__main__": - app = QtWidgets.QApplication(sys.argv) - window = RealTimeAudioSpectrum() - window.show() - sys.exit(app.exec_()) diff --git a/src/fft.rs b/src/fft.rs new file mode 100644 index 0000000..13606bd --- /dev/null +++ b/src/fft.rs @@ -0,0 +1,313 @@ +//! Implementation of Real valued FFT +use std::simd::prelude::f64x8; + +use crate::planner::{Planner32, Planner64}; +use crate::{ + fft_32_with_opts_and_plan, fft_64, fft_64_with_opts_and_plan, + twiddles::{generate_twiddles, Twiddles}, + utils::deinterleave, + Direction, Options, +}; + +macro_rules! impl_r2c_fft { + ($func_name:ident, $precision:ty, $planner:ident, $fft_w_opts_and_plan:ident) => { + /// Performs a Real-Valued Fast Fourier Transform (FFT) + /// + /// This function computes the FFT of a real-valued input signal and produces + /// complex-valued output. The implementation follows the principles of splitting + /// the input into even and odd components and then performing the FFT on these + /// components. + /// + /// # Arguments + /// + /// `input_re` - A slice containing the real-valued input signal. + /// + /// `output_re` - A mutable slice to store the real parts of the FFT output. + /// + /// `output_im` - A mutable slice to store the imaginary parts of the FFT output. + /// + /// # Panics + /// + /// Panics if `output_re.len() != output_im.len()` and `input_re.len()` == `output_re.len()` + /// + /// # Examples + /// + /// ``` + /// use phastft::fft::{r2c_fft_f32, r2c_fft_f64}; + /// + /// let big_n = 16; + /// let input: Vec = (1..=big_n).map(|x| x as f64).collect(); + /// let mut output_re = vec![0.0; big_n]; + /// let mut output_im = vec![0.0; big_n]; + /// r2c_fft_f64(&input, &mut output_re, &mut output_im); + /// + /// let input: Vec = (1..=big_n).map(|x| x as f32).collect(); + /// let mut output_re: Vec = vec![0.0; big_n]; + /// let mut output_im: Vec = vec![0.0; big_n]; + /// r2c_fft_f32(&input, &mut output_re, &mut output_im); + /// ``` + /// # References + /// + /// This implementation is based on the concepts discussed in + /// [Levente Kovács' post](https://kovleventer.com/blog/fft_real/). + pub fn $func_name( + input_re: &[$precision], + output_re: &mut [$precision], + output_im: &mut [$precision], + ) { + assert!(output_re.len() == output_im.len() && input_re.len() == output_re.len()); + let big_n = input_re.len(); + + // Split real signal of size `N` into odd (size `N/2`) and even (size `N/2`) via deinterleave + // The even part is now the "real" components and the odd part is now the "imaginary" components + let (mut z_even, mut z_odd): (Vec<_>, Vec<_>) = deinterleave(&input_re); + + let stride = big_n / 2; + let planner = <$planner>::new(big_n / 2, Direction::Forward); + + // We only need (N / 2) / 2 twiddle factors for the actual FFT call, so we filter + // filter_twiddles(&mut planner.twiddles_re, &mut planner.twiddles_im); + + let opts = Options::guess_options(z_even.len()); + $fft_w_opts_and_plan(&mut z_even, &mut z_odd, &opts, &planner); + + // Z = np.fft.fft(z) + let mut z_x_re = vec![0.0; big_n / 2]; + let mut z_x_im = vec![0.0; big_n / 2]; + let mut z_y_re = vec![0.0; big_n / 2]; + let mut z_y_im = vec![0.0; big_n / 2]; + + // Zminconj = np.roll(np.flip(Z), 1).conj() + // Zx = 0.5 * (Z + Zminconj) + // Zy = -0.5j * (Z - Zminconj) + z_even[1..] + .iter() + .zip(z_odd[1..].iter()) + .zip(z_even[1..].iter().rev()) + .zip(z_odd[1..].iter().rev()) + .zip(z_x_re[1..].iter_mut()) + .zip(z_x_im[1..].iter_mut()) + .zip(z_y_re[1..].iter_mut()) + .zip(z_y_im[1..].iter_mut()) + .for_each( + |(((((((z_e, z_o), z_e_mc), z_o_mc), zx_re), zx_im), zy_re), zy_im)| { + let a = *z_e; + let b = *z_o; + let c = *z_e_mc; + let d = -(*z_o_mc); + + let t = 0.5 * (a + c); + let u = 0.5 * (b + d); + let v = -0.5 * (a - c); + let w = 0.5 * (b - d); + + *zx_re = t; + *zx_im = u; + *zy_re = w; + *zy_im = v; + }, + ); + + let a = z_even[0]; + let b = z_odd[0]; + let c = z_even[0]; + let d = -z_odd[0]; + + let t = 0.5 * (a + c); + let u = 0.5 * (b + d); + let v = -0.5 * (a - c); + let w = 0.5 * (b - d); + + z_x_re[0] = t; + z_x_im[0] = u; + z_y_re[0] = w; + z_y_im[0] = v; + + // Zall = np.concatenate([Zx + W*Zy, Zx - W*Zy]) + let (output_re_first_half, output_re_second_half) = output_re.split_at_mut(big_n / 2); + let (output_im_first_half, output_im_second_half) = output_im.split_at_mut(big_n / 2); + let twiddles_iter = Twiddles::<$precision>::new(stride); + + z_x_re + .iter() + .zip(z_x_im.iter()) + .zip(z_y_re.iter()) + .zip(z_y_im.iter()) + .zip(output_re_first_half) + .zip(output_im_first_half) + .zip(output_re_second_half) + .zip(output_im_second_half) + .zip(twiddles_iter) + .for_each( + |( + ( + ((((((zx_re, zx_im), zy_re), zy_im), o_re_fh), o_im_fh), o_re_sh), + o_im_sh, + ), + (w_re, w_im), + )| { + let wz_re = w_re * zy_re - w_im * zy_im; + let wz_im = w_re * zy_im + w_im * zy_re; + + // Zx + W * Zy + *o_re_fh = zx_re + wz_re; + *o_im_fh = zx_im + wz_im; + + // Zx - W * Zy + *o_re_sh = zx_re - wz_re; + *o_im_sh = zx_im - wz_im; + }, + ); + } + }; +} + +impl_r2c_fft!(r2c_fft_f32, f32, Planner32, fft_32_with_opts_and_plan); +impl_r2c_fft!(r2c_fft_f64, f64, Planner64, fft_64_with_opts_and_plan); + +/// Performs a Real-Valued, Inverse, Fast Fourier Transform (FFT) +/// +/// # Panics +/// +/// Panics if `reals.len() != imags.len()` *and* `reals.len() != output.len()` +/// +pub fn r2c_ifft_f64(reals: &mut [f64], imags: &mut [f64], output: &mut [f64]) { + assert!(reals.len() == imags.len() && reals.len() == output.len()); + + let big_n = reals.len(); + + let mut z_x_re = vec![0.0; big_n / 2]; + let mut z_x_im = vec![0.0; big_n / 2]; + + let (reals_first_half, reals_second_half) = reals.split_at(big_n / 2); + let (imags_first_half, imags_second_half) = imags.split_at(big_n / 2); + + // Compute Zx + for i in 0..big_n / 2 { + let re = 0.5 * (reals_first_half[i] + reals_second_half[i]); + let im = 0.5 * (imags_first_half[i] + imags_second_half[i]); + + z_x_re[i] = re; + z_x_im[i] = im; + } + + let mut z_y_re = vec![0.0; big_n / 2]; + let mut z_y_im = vec![0.0; big_n / 2]; + let (twiddles_re, twiddles_im) = generate_twiddles::(big_n / 2, Direction::Reverse); + + // Compute W * Zy + for i in 0..big_n / 2 { + let a = reals_first_half[i]; + let b = imags_first_half[i]; + let c = reals_second_half[i]; + let d = imags_second_half[i]; + + let e = a - c; + let f = b - d; + let k = -0.5 * f; + let l = 0.5 * e; + + let m = twiddles_re[i]; + let n = twiddles_im[i]; + + z_y_re[i] = k * m - l * n; + z_y_im[i] = k * n + l * m; + } + + const CHUNK_SIZE: usize = 8; + + // Compute Zx + Zy + z_x_re + .chunks_exact_mut(CHUNK_SIZE) + .zip(z_x_im.chunks_exact_mut(CHUNK_SIZE)) + .zip(z_y_re.chunks_exact(CHUNK_SIZE)) + .zip(z_y_im.chunks_exact(CHUNK_SIZE)) + .for_each(|(((zxr, zxi), zyr), zyi)| { + let mut z_x_r = f64x8::from_slice(zxr); + let mut z_x_i = f64x8::from_slice(zxi); + let z_y_r = f64x8::from_slice(zyr); + let z_y_i = f64x8::from_slice(zyi); + + z_x_r += z_y_r; + z_x_i += z_y_i; + zxr.copy_from_slice(z_x_r.as_array()); + zxi.copy_from_slice(z_x_i.as_array()); + }); + + fft_64(&mut z_x_re, &mut z_x_im, Direction::Reverse); + + // Store reals in the even indices, and imaginaries in the odd indices + output + .chunks_exact_mut(2) + .zip(z_x_re) + .zip(z_x_im) + .for_each(|((out, z_r), z_i)| { + out[0] = z_r; + out[1] = z_i; + }); +} + +#[cfg(test)] +mod tests { + use utilities::assert_float_closeness; + + use crate::fft_32; + + use super::*; + + macro_rules! impl_r2c_vs_c2c_test { + ($func:ident, $precision:ty, $fft_precision:ident, $rfft_func:ident, $epsilon:literal) => { + #[test] + fn $func() { + for n in 4..=11 { + let big_n = 1 << n; + let input_re: Vec<$precision> = (1..=big_n).map(|i| i as $precision).collect(); // Length is 7, which is a prime number + let (mut output_re, mut output_im) = (vec![0.0; big_n], vec![0.0; big_n]); + + $rfft_func(&input_re, &mut output_re, &mut output_im); + + let mut input_re: Vec<$precision> = + (1..=big_n).map(|i| i as $precision).collect(); + let mut input_im = vec![0.0; input_re.len()]; // Assume the imaginary part is zero for this example + $fft_precision(&mut input_re, &mut input_im, Direction::Forward); + + output_re + .iter() + .zip(output_im.iter()) + .zip(input_re.iter()) + .zip(input_im.iter()) + .for_each(|(((a_re, a_im), e_re), e_im)| { + assert_float_closeness(*a_re, *e_re, $epsilon); + assert_float_closeness(*a_im, *e_im, $epsilon); + }); + } + } + }; + } + + impl_r2c_vs_c2c_test!(r2c_vs_c2c_f64, f64, fft_64, r2c_fft_f64, 1e-6); + impl_r2c_vs_c2c_test!(r2c_vs_c2c_f32, f32, fft_32, r2c_fft_f32, 3.5); + + #[test] + fn fw_inv_eq_identity() { + let n = 4; + let big_n = 1 << n; + let expected_signal: Vec = (1..=big_n).map(|s| s as f64).collect(); + + let mut output_re = vec![0.0; big_n]; + let mut output_im = vec![0.0; big_n]; + r2c_fft_f64(&expected_signal, &mut output_re, &mut output_im); + + let mut actual_signal = vec![0.0; big_n]; + r2c_ifft_f64(&mut output_re, &mut output_im, &mut actual_signal); + + for (z_a, z_e) in actual_signal.iter().zip(expected_signal.iter()) { + assert_float_closeness(*z_a, *z_e, 1e-10); + } + + println!( + "expected signal: {:?}\nactual signal: {:?}", + expected_signal, actual_signal + ); + } +} diff --git a/src/lib.rs b/src/lib.rs index 09f64a1..5a5ee94 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,6 +25,7 @@ use crate::planner::{Direction, Planner32, Planner64}; use crate::twiddles::filter_twiddles; pub mod cobra; +pub mod fft; mod kernels; pub mod options; pub mod planner; @@ -91,10 +92,10 @@ macro_rules! impl_fft_interleaved_for { }; } -#[doc(cfg(feature = "complex-nums"))] +// #[doc(cfg(feature = "complex-nums"))] #[cfg(feature = "complex-nums")] impl_fft_interleaved_for!(fft_32_interleaved, f32, fft_32, deinterleave_complex32); -#[doc(cfg(feature = "complex-nums"))] +// #[doc(cfg(feature = "complex-nums"))] #[cfg(feature = "complex-nums")] impl_fft_interleaved_for!(fft_64_interleaved, f64, fft_64, deinterleave_complex64); @@ -128,7 +129,7 @@ macro_rules! impl_fft_with_opts_and_plan_for { opts: &Options, planner: &$planner, ) { - assert!(reals.len() == imags.len() && reals.len().is_power_of_two()); + assert!(reals.len() == imags.len() && reals.len().is_power_of_two(), "reals.len() and imags.len() must be equal, and both should be a power of 2. Actual lengths - reals: {} imags: {}", reals.len(), imags.len()); let n: usize = reals.len().ilog2() as usize; // Use references to avoid unnecessary clones @@ -137,7 +138,7 @@ macro_rules! impl_fft_with_opts_and_plan_for { // We shouldn't be able to execute FFT if the # of twiddles isn't equal to the distance // between pairs - assert!(twiddles_re.len() == reals.len() / 2 && twiddles_im.len() == imags.len() / 2); + assert!(twiddles_re.len() == reals.len() / 2 && twiddles_im.len() == imags.len() / 2,"got: {} == {} and {} == {}", twiddles_re.len(), reals.len() / 2, twiddles_im.len(), imags.len() / 2); match planner.direction { Direction::Reverse => { diff --git a/utilities/src/lib.rs b/utilities/src/lib.rs index 0b330db..795d586 100644 --- a/utilities/src/lib.rs +++ b/utilities/src/lib.rs @@ -17,7 +17,7 @@ use rustfft::num_traits::Float; pub fn assert_float_closeness(actual: T, expected: T, epsilon: T) { if (actual - expected).abs() >= epsilon { panic!( - "Assertion failed: {actual} too far from expected value {expected} (with epsilon {epsilon})", + "Assertion failed: actual value {actual} too far from expected value {expected} (with epsilon {epsilon})", ); } }