From 8b436ef6ee028ca8b2b4274e468269caa4f21fcc Mon Sep 17 00:00:00 2001 From: "Sergey \"Shnatsel\" Davidoff" Date: Tue, 20 Jan 2026 21:53:06 +0000 Subject: [PATCH 1/5] Add initial experimental BRAVO impl --- src/algorithms/bravo.rs | 251 ++++++++++++++++++++++++++++++++++++++++ src/algorithms/mod.rs | 1 + 2 files changed, 252 insertions(+) create mode 100644 src/algorithms/bravo.rs diff --git a/src/algorithms/bravo.rs b/src/algorithms/bravo.rs new file mode 100644 index 0000000..78f7495 --- /dev/null +++ b/src/algorithms/bravo.rs @@ -0,0 +1,251 @@ +/// BRAVO: Bit-Reversal Algorithm using Vector permute Operations +/// +/// This implements the algorithm from "Optimal Bit-Reversal Using Vector Permutations" +/// by Lokhmotov and Mycroft (SPAA'07). +/// +/// The algorithm uses vector interleaving operations to perform bit-reversal permutation. +/// For N = 2^n elements with W-element vectors, the algorithm performs log₂(N) rounds +/// of in-place interleave operations on pairs of vectors. +/// +/// When nightly Rust with std::simd is available, this can use hardware SIMD instructions. +/// For now, we implement the interleave operation manually to demonstrate the algorithm. +/// +/// The initial implementation was heavily assisted by Claude Code + +const LANES: usize = 4; // Vector width W + +/// A simple vector type that mimics std::simd::Simd for f64 +#[derive(Clone, Copy)] +struct Vec4([f64; LANES]); + +impl Vec4 { + fn from_slice(s: &[f64]) -> Self { + Vec4([s[0], s[1], s[2], s[3]]) + } + + fn copy_to_slice(self, s: &mut [f64]) { + s[0] = self.0[0]; + s[1] = self.0[1]; + s[2] = self.0[2]; + s[3] = self.0[3]; + } + + /// Interleave two vectors, producing low and high halves. + /// For vectors [a0, a1, a2, a3] and [b0, b1, b2, b3]: + /// - low: [a0, b0, a1, b1] + /// - high: [a2, b2, a3, b3] + /// + /// This matches std::simd::Simd::interleave() behavior. + fn interleave(self, other: Vec4) -> (Vec4, Vec4) { + let a = self.0; + let b = other.0; + let lo = Vec4([a[0], b[0], a[1], b[1]]); + let hi = Vec4([a[2], b[2], a[3], b[3]]); + (lo, hi) + } +} + +/// Performs in-place bit-reversal permutation using the BRAVO algorithm. +/// +/// # Arguments +/// * `data` - The slice to permute in-place. Length must be a power of 2 and >= LANES². +/// * `n` - The log₂ of the data length (i.e., data.len() == 2^n) +pub fn bit_rev_bravo(data: &mut [f64], n: usize) { + let big_n = 1usize << n; + assert_eq!(data.len(), big_n, "Data length must be 2^n"); + + // For very small arrays, fall back to scalar bit-reversal + if big_n < LANES * LANES { + scalar_bit_reversal(data, n); + return; + } + + let w = LANES; + let log_w = w.ilog2() as usize; // = 2 for W=4 + + // π = N / W² = number of equivalence classes + let num_classes = big_n / (w * w); + let class_bits = n - 2 * log_w; + + // Process each equivalence class. + // For in-place operation, we handle class pairs that swap with each other. + // We only process when class_idx <= class_idx_rev to avoid double processing. + + for class_idx in 0..num_classes { + let class_idx_rev = if class_bits > 0 { + class_idx.reverse_bits() >> (usize::BITS - class_bits as u32) + } else { + 0 + }; + + // Only process if this is the "first" of a swapping pair (or self-mapping) + if class_idx > class_idx_rev { + continue; + } + + // Load vectors for class A + let mut vecs_a: [Vec4; LANES] = [Vec4([0.0; LANES]); LANES]; + for j in 0..w { + let base_idx = (class_idx + j * num_classes) * w; + vecs_a[j] = Vec4::from_slice(&data[base_idx..base_idx + w]); + } + + // Perform interleave rounds for class A + for round in 0..log_w { + let mut new_vecs: [Vec4; LANES] = [Vec4([0.0; LANES]); LANES]; + let stride = 1 << round; + + // W/2 pairs per round, stored as parallel arrays + let mut los: [Vec4; LANES / 2] = [Vec4([0.0; LANES]); LANES / 2]; + let mut his: [Vec4; LANES / 2] = [Vec4([0.0; LANES]); LANES / 2]; + + let mut pair_idx = 0; + let mut i = 0; + while i < w { + for offset in 0..stride { + let idx0 = i + offset; + let idx1 = i + offset + stride; + let (lo, hi) = vecs_a[idx0].interleave(vecs_a[idx1]); + los[pair_idx] = lo; + his[pair_idx] = hi; + pair_idx += 1; + } + i += stride * 2; + } + + for j in 0..(w / 2) { + let base = (j % stride) + (j / stride) * stride * 2; + new_vecs[base] = los[j]; + new_vecs[base + stride] = his[j]; + } + + vecs_a = new_vecs; + } + + if class_idx == class_idx_rev { + // Self-mapping class - just write back to same location + for j in 0..w { + let base_idx = (class_idx + j * num_classes) * w; + vecs_a[j].copy_to_slice(&mut data[base_idx..base_idx + w]); + } + } else { + // Swapping pair - load class B, process it, then swap both + let mut vecs_b: [Vec4; LANES] = [Vec4([0.0; LANES]); LANES]; + for j in 0..w { + let base_idx = (class_idx_rev + j * num_classes) * w; + vecs_b[j] = Vec4::from_slice(&data[base_idx..base_idx + w]); + } + + // Perform interleave rounds for class B + for round in 0..log_w { + let mut new_vecs: [Vec4; LANES] = [Vec4([0.0; LANES]); LANES]; + let stride = 1 << round; + + let mut los: [Vec4; LANES / 2] = [Vec4([0.0; LANES]); LANES / 2]; + let mut his: [Vec4; LANES / 2] = [Vec4([0.0; LANES]); LANES / 2]; + + let mut pair_idx = 0; + let mut i = 0; + while i < w { + for offset in 0..stride { + let idx0 = i + offset; + let idx1 = i + offset + stride; + let (lo, hi) = vecs_b[idx0].interleave(vecs_b[idx1]); + los[pair_idx] = lo; + his[pair_idx] = hi; + pair_idx += 1; + } + i += stride * 2; + } + + for j in 0..(w / 2) { + let base = (j % stride) + (j / stride) * stride * 2; + new_vecs[base] = los[j]; + new_vecs[base + stride] = his[j]; + } + + vecs_b = new_vecs; + } + + // Swap: write A's result to B's location and vice versa + for j in 0..w { + let base_idx_a = (class_idx + j * num_classes) * w; + let base_idx_b = (class_idx_rev + j * num_classes) * w; + vecs_a[j].copy_to_slice(&mut data[base_idx_b..base_idx_b + w]); + vecs_b[j].copy_to_slice(&mut data[base_idx_a..base_idx_a + w]); + } + } + } +} + +/// Scalar bit-reversal for small arrays +fn scalar_bit_reversal(data: &mut [f64], n: usize) { + let big_n = data.len(); + + for i in 0..big_n { + let j = reverse_bits_scalar(i, n as u32); + if i < j { + data.swap(i, j); + } + } +} + +/// Reverse the lower `bits` bits of `x` +fn reverse_bits_scalar(x: usize, bits: u32) -> usize { + if bits == 0 { + return 0; + } + x.reverse_bits() >> (usize::BITS - bits) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Top down bit reverse interleaving. This is a very simple and well known approach that we + /// only use for testing due to its lackluster performance. + fn top_down_bit_reverse_permutation(x: &[T]) -> Vec { + if x.len() == 1 { + return x.to_vec(); + } + let mut y = Vec::with_capacity(x.len()); + let mut evens = Vec::with_capacity(x.len() >> 1); + let mut odds = Vec::with_capacity(x.len() >> 1); + let mut i = 1; + while i < x.len() { + evens.push(x[i - 1]); + odds.push(x[i]); + i += 2; + } + y.extend_from_slice(&top_down_bit_reverse_permutation(&evens)); + y.extend_from_slice(&top_down_bit_reverse_permutation(&odds)); + y + } + + #[test] + fn test_bravo_bit_reversal() { + for n in 8..24 { + let big_n = 1 << n; + let mut actual_re: Vec = (0..big_n).map(f64::from).collect(); + let mut actual_im: Vec = (0..big_n).map(f64::from).collect(); + bit_rev_bravo(&mut actual_re, n); + bit_rev_bravo(&mut actual_im, n); + let input_re: Vec = (0..big_n).map(f64::from).collect(); + let expected_re = top_down_bit_reverse_permutation(&input_re); + assert_eq!(actual_re, expected_re); + let input_im: Vec = (0..big_n).map(f64::from).collect(); + let expected_im = top_down_bit_reverse_permutation(&input_im); + assert_eq!(actual_im, expected_im); + } + } + + #[test] + fn test_small_cases() { + // Test n=4 (16 elements) - smallest case with W=4 vectors + let mut data: Vec = (0..16).map(f64::from).collect(); + bit_rev_bravo(&mut data, 4); + let expected = + top_down_bit_reverse_permutation(&(0..16).map(f64::from).collect::>()); + assert_eq!(data, expected); + } +} diff --git a/src/algorithms/mod.rs b/src/algorithms/mod.rs index 9a7120d..259b316 100644 --- a/src/algorithms/mod.rs +++ b/src/algorithms/mod.rs @@ -18,6 +18,7 @@ //! - Use DIF if you don't want to or don't need to apply a bit reversal on the input. //! - Use DIT for slightly better performance. +pub mod bravo; pub mod cobra; pub mod dif; pub mod dit; From 740225aa20772962e9a80e588f7638c20235e527 Mon Sep 17 00:00:00 2001 From: "Sergey \"Shnatsel\" Davidoff" Date: Tue, 20 Jan 2026 22:03:02 +0000 Subject: [PATCH 2/5] jankily bump vector size to confirm everything still works for larger sizes --- src/algorithms/bravo.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/algorithms/bravo.rs b/src/algorithms/bravo.rs index 78f7495..1650dcb 100644 --- a/src/algorithms/bravo.rs +++ b/src/algorithms/bravo.rs @@ -9,10 +9,10 @@ /// /// When nightly Rust with std::simd is available, this can use hardware SIMD instructions. /// For now, we implement the interleave operation manually to demonstrate the algorithm. -/// +/// /// The initial implementation was heavily assisted by Claude Code -const LANES: usize = 4; // Vector width W +const LANES: usize = 8; // Vector width W /// A simple vector type that mimics std::simd::Simd for f64 #[derive(Clone, Copy)] @@ -20,7 +20,7 @@ struct Vec4([f64; LANES]); impl Vec4 { fn from_slice(s: &[f64]) -> Self { - Vec4([s[0], s[1], s[2], s[3]]) + Vec4([s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7]]) } fn copy_to_slice(self, s: &mut [f64]) { @@ -28,6 +28,10 @@ impl Vec4 { s[1] = self.0[1]; s[2] = self.0[2]; s[3] = self.0[3]; + s[4] = self.0[4]; + s[5] = self.0[5]; + s[6] = self.0[6]; + s[7] = self.0[7]; } /// Interleave two vectors, producing low and high halves. @@ -39,8 +43,8 @@ impl Vec4 { fn interleave(self, other: Vec4) -> (Vec4, Vec4) { let a = self.0; let b = other.0; - let lo = Vec4([a[0], b[0], a[1], b[1]]); - let hi = Vec4([a[2], b[2], a[3], b[3]]); + let lo = Vec4([a[0], b[0], a[1], b[1], a[2], b[2], a[3], b[3]]); + let hi = Vec4([a[4], b[4], a[5], b[5], a[6], b[6], a[7], b[7]]); (lo, hi) } } From d867542badcefcf20320da581d6ee8f44c106900 Mon Sep 17 00:00:00 2001 From: "Sergey \"Shnatsel\" Davidoff" Date: Tue, 20 Jan 2026 22:07:47 +0000 Subject: [PATCH 3/5] Make BRAVO impl generic over T as opposed to f64-only --- src/algorithms/bravo.rs | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/algorithms/bravo.rs b/src/algorithms/bravo.rs index 1650dcb..2cc4f77 100644 --- a/src/algorithms/bravo.rs +++ b/src/algorithms/bravo.rs @@ -15,15 +15,15 @@ const LANES: usize = 8; // Vector width W /// A simple vector type that mimics std::simd::Simd for f64 -#[derive(Clone, Copy)] -struct Vec4([f64; LANES]); +#[derive(Clone, Copy, Default)] +struct Vec4([T; LANES]); -impl Vec4 { - fn from_slice(s: &[f64]) -> Self { +impl Vec4 { + fn from_slice(s: &[T]) -> Self { Vec4([s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7]]) } - fn copy_to_slice(self, s: &mut [f64]) { + fn copy_to_slice(self, s: &mut [T]) { s[0] = self.0[0]; s[1] = self.0[1]; s[2] = self.0[2]; @@ -40,7 +40,7 @@ impl Vec4 { /// - high: [a2, b2, a3, b3] /// /// This matches std::simd::Simd::interleave() behavior. - fn interleave(self, other: Vec4) -> (Vec4, Vec4) { + fn interleave(self, other: Vec4) -> (Vec4, Vec4) { let a = self.0; let b = other.0; let lo = Vec4([a[0], b[0], a[1], b[1], a[2], b[2], a[3], b[3]]); @@ -54,7 +54,7 @@ impl Vec4 { /// # Arguments /// * `data` - The slice to permute in-place. Length must be a power of 2 and >= LANES². /// * `n` - The log₂ of the data length (i.e., data.len() == 2^n) -pub fn bit_rev_bravo(data: &mut [f64], n: usize) { +pub fn bit_rev_bravo(data: &mut [T], n: usize) { let big_n = 1usize << n; assert_eq!(data.len(), big_n, "Data length must be 2^n"); @@ -88,7 +88,7 @@ pub fn bit_rev_bravo(data: &mut [f64], n: usize) { } // Load vectors for class A - let mut vecs_a: [Vec4; LANES] = [Vec4([0.0; LANES]); LANES]; + let mut vecs_a: [Vec4; LANES] = [Vec4([T::default(); LANES]); LANES]; for j in 0..w { let base_idx = (class_idx + j * num_classes) * w; vecs_a[j] = Vec4::from_slice(&data[base_idx..base_idx + w]); @@ -96,12 +96,12 @@ pub fn bit_rev_bravo(data: &mut [f64], n: usize) { // Perform interleave rounds for class A for round in 0..log_w { - let mut new_vecs: [Vec4; LANES] = [Vec4([0.0; LANES]); LANES]; + let mut new_vecs: [Vec4; LANES] = [Vec4([T::default(); LANES]); LANES]; let stride = 1 << round; // W/2 pairs per round, stored as parallel arrays - let mut los: [Vec4; LANES / 2] = [Vec4([0.0; LANES]); LANES / 2]; - let mut his: [Vec4; LANES / 2] = [Vec4([0.0; LANES]); LANES / 2]; + let mut los: [Vec4; LANES / 2] = [Vec4([T::default(); LANES]); LANES / 2]; + let mut his: [Vec4; LANES / 2] = [Vec4([T::default(); LANES]); LANES / 2]; let mut pair_idx = 0; let mut i = 0; @@ -134,7 +134,7 @@ pub fn bit_rev_bravo(data: &mut [f64], n: usize) { } } else { // Swapping pair - load class B, process it, then swap both - let mut vecs_b: [Vec4; LANES] = [Vec4([0.0; LANES]); LANES]; + let mut vecs_b: [Vec4; LANES] = [Vec4([T::default(); LANES]); LANES]; for j in 0..w { let base_idx = (class_idx_rev + j * num_classes) * w; vecs_b[j] = Vec4::from_slice(&data[base_idx..base_idx + w]); @@ -142,11 +142,11 @@ pub fn bit_rev_bravo(data: &mut [f64], n: usize) { // Perform interleave rounds for class B for round in 0..log_w { - let mut new_vecs: [Vec4; LANES] = [Vec4([0.0; LANES]); LANES]; + let mut new_vecs: [Vec4; LANES] = [Vec4([T::default(); LANES]); LANES]; let stride = 1 << round; - let mut los: [Vec4; LANES / 2] = [Vec4([0.0; LANES]); LANES / 2]; - let mut his: [Vec4; LANES / 2] = [Vec4([0.0; LANES]); LANES / 2]; + let mut los: [Vec4; LANES / 2] = [Vec4([T::default(); LANES]); LANES / 2]; + let mut his: [Vec4; LANES / 2] = [Vec4([T::default(); LANES]); LANES / 2]; let mut pair_idx = 0; let mut i = 0; @@ -183,7 +183,7 @@ pub fn bit_rev_bravo(data: &mut [f64], n: usize) { } /// Scalar bit-reversal for small arrays -fn scalar_bit_reversal(data: &mut [f64], n: usize) { +fn scalar_bit_reversal(data: &mut [T], n: usize) { let big_n = data.len(); for i in 0..big_n { From 8a6bbc5bf467f16c207f471ff0642f919e3e4060 Mon Sep 17 00:00:00 2001 From: "Sergey \"Shnatsel\" Davidoff" Date: Tue, 20 Jan 2026 22:14:05 +0000 Subject: [PATCH 4/5] BRAVO: Port to portable_simd proper instead of stable polyfill --- Cargo.toml | 1 + src/algorithms/bravo.rs | 65 ++++++++++++----------------------------- src/algorithms/mod.rs | 1 + src/lib.rs | 1 + 4 files changed, 21 insertions(+), 47 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 99ae92f..ce457a7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,7 @@ rayon = { version = "1.11.0", optional = true } default = [] complex-nums = ["dep:num-complex", "dep:bytemuck"] parallel = ["dep:rayon"] +nightly = [] [dev-dependencies] criterion = "0.8.0" diff --git a/src/algorithms/bravo.rs b/src/algorithms/bravo.rs index 2cc4f77..83013e7 100644 --- a/src/algorithms/bravo.rs +++ b/src/algorithms/bravo.rs @@ -7,54 +7,25 @@ /// For N = 2^n elements with W-element vectors, the algorithm performs log₂(N) rounds /// of in-place interleave operations on pairs of vectors. /// -/// When nightly Rust with std::simd is available, this can use hardware SIMD instructions. -/// For now, we implement the interleave operation manually to demonstrate the algorithm. +/// Uses std::simd for hardware SIMD instructions on nightly Rust. /// /// The initial implementation was heavily assisted by Claude Code +use std::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount}; const LANES: usize = 8; // Vector width W -/// A simple vector type that mimics std::simd::Simd for f64 -#[derive(Clone, Copy, Default)] -struct Vec4([T; LANES]); - -impl Vec4 { - fn from_slice(s: &[T]) -> Self { - Vec4([s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7]]) - } - - fn copy_to_slice(self, s: &mut [T]) { - s[0] = self.0[0]; - s[1] = self.0[1]; - s[2] = self.0[2]; - s[3] = self.0[3]; - s[4] = self.0[4]; - s[5] = self.0[5]; - s[6] = self.0[6]; - s[7] = self.0[7]; - } - - /// Interleave two vectors, producing low and high halves. - /// For vectors [a0, a1, a2, a3] and [b0, b1, b2, b3]: - /// - low: [a0, b0, a1, b1] - /// - high: [a2, b2, a3, b3] - /// - /// This matches std::simd::Simd::interleave() behavior. - fn interleave(self, other: Vec4) -> (Vec4, Vec4) { - let a = self.0; - let b = other.0; - let lo = Vec4([a[0], b[0], a[1], b[1], a[2], b[2], a[3], b[3]]); - let hi = Vec4([a[4], b[4], a[5], b[5], a[6], b[6], a[7], b[7]]); - (lo, hi) - } -} +type Vec8 = Simd; /// Performs in-place bit-reversal permutation using the BRAVO algorithm. /// /// # Arguments /// * `data` - The slice to permute in-place. Length must be a power of 2 and >= LANES². /// * `n` - The log₂ of the data length (i.e., data.len() == 2^n) -pub fn bit_rev_bravo(data: &mut [T], n: usize) { +pub fn bit_rev_bravo(data: &mut [T], n: usize) +where + T: Default + Copy + Clone + SimdElement, + LaneCount: SupportedLaneCount, +{ let big_n = 1usize << n; assert_eq!(data.len(), big_n, "Data length must be 2^n"); @@ -88,20 +59,20 @@ pub fn bit_rev_bravo(data: &mut [T], n: usize) { } // Load vectors for class A - let mut vecs_a: [Vec4; LANES] = [Vec4([T::default(); LANES]); LANES]; + let mut vecs_a: [Vec8; LANES] = [Simd::splat(T::default()); LANES]; for j in 0..w { let base_idx = (class_idx + j * num_classes) * w; - vecs_a[j] = Vec4::from_slice(&data[base_idx..base_idx + w]); + vecs_a[j] = Simd::from_slice(&data[base_idx..base_idx + w]); } // Perform interleave rounds for class A for round in 0..log_w { - let mut new_vecs: [Vec4; LANES] = [Vec4([T::default(); LANES]); LANES]; + let mut new_vecs: [Vec8; LANES] = [Simd::splat(T::default()); LANES]; let stride = 1 << round; // W/2 pairs per round, stored as parallel arrays - let mut los: [Vec4; LANES / 2] = [Vec4([T::default(); LANES]); LANES / 2]; - let mut his: [Vec4; LANES / 2] = [Vec4([T::default(); LANES]); LANES / 2]; + let mut los: [Vec8; LANES / 2] = [Simd::splat(T::default()); LANES / 2]; + let mut his: [Vec8; LANES / 2] = [Simd::splat(T::default()); LANES / 2]; let mut pair_idx = 0; let mut i = 0; @@ -134,19 +105,19 @@ pub fn bit_rev_bravo(data: &mut [T], n: usize) { } } else { // Swapping pair - load class B, process it, then swap both - let mut vecs_b: [Vec4; LANES] = [Vec4([T::default(); LANES]); LANES]; + let mut vecs_b: [Vec8; LANES] = [Simd::splat(T::default()); LANES]; for j in 0..w { let base_idx = (class_idx_rev + j * num_classes) * w; - vecs_b[j] = Vec4::from_slice(&data[base_idx..base_idx + w]); + vecs_b[j] = Simd::from_slice(&data[base_idx..base_idx + w]); } // Perform interleave rounds for class B for round in 0..log_w { - let mut new_vecs: [Vec4; LANES] = [Vec4([T::default(); LANES]); LANES]; + let mut new_vecs: [Vec8; LANES] = [Simd::splat(T::default()); LANES]; let stride = 1 << round; - let mut los: [Vec4; LANES / 2] = [Vec4([T::default(); LANES]); LANES / 2]; - let mut his: [Vec4; LANES / 2] = [Vec4([T::default(); LANES]); LANES / 2]; + let mut los: [Vec8; LANES / 2] = [Simd::splat(T::default()); LANES / 2]; + let mut his: [Vec8; LANES / 2] = [Simd::splat(T::default()); LANES / 2]; let mut pair_idx = 0; let mut i = 0; diff --git a/src/algorithms/mod.rs b/src/algorithms/mod.rs index 259b316..1d9023d 100644 --- a/src/algorithms/mod.rs +++ b/src/algorithms/mod.rs @@ -18,6 +18,7 @@ //! - Use DIF if you don't want to or don't need to apply a bit reversal on the input. //! - Use DIT for slightly better performance. +#[cfg(feature = "nightly")] pub mod bravo; pub mod cobra; pub mod dif; diff --git a/src/lib.rs b/src/lib.rs index ccab469..7354096 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +#![cfg_attr(feature = "nightly", feature(portable_simd))] #![doc = include_str!("../README.md")] #![warn( missing_docs, From 7d8d8658ddabd24e22ea3e244591dac91f0fbdf2 Mon Sep 17 00:00:00 2001 From: "Sergey \"Shnatsel\" Davidoff" Date: Tue, 20 Jan 2026 22:16:26 +0000 Subject: [PATCH 5/5] jankily switch from cobra to bravo to run benchmarks --- src/algorithms/dit.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/algorithms/dit.rs b/src/algorithms/dit.rs index 0353354..e712719 100644 --- a/src/algorithms/dit.rs +++ b/src/algorithms/dit.rs @@ -14,7 +14,7 @@ //! DIT starts with fine-grained memory access and progressively works with //! larger contiguous chunks. //! -use crate::algorithms::cobra::cobra_apply; +use crate::algorithms::bravo::bit_rev_bravo; use crate::kernels::dit::{ fft_dit_32_chunk_n_simd, fft_dit_64_chunk_n_simd, fft_dit_chunk_16_simd_f32, fft_dit_chunk_16_simd_f64, fft_dit_chunk_2, fft_dit_chunk_32_simd_f32, @@ -250,8 +250,8 @@ pub fn fft_64_dit_with_planner_and_opts( // DIT requires bit-reversed input run_maybe_in_parallel( opts.multithreaded_bit_reversal, - || cobra_apply(reals, log_n), - || cobra_apply(imags, log_n), + || bit_rev_bravo(reals, log_n), + || bit_rev_bravo(imags, log_n), ); // Handle inverse FFT @@ -293,8 +293,8 @@ pub fn fft_32_dit_with_planner_and_opts( // DIT requires bit-reversed input run_maybe_in_parallel( opts.multithreaded_bit_reversal, - || cobra_apply(reals, log_n), - || cobra_apply(imags, log_n), + || bit_rev_bravo(reals, log_n), + || bit_rev_bravo(imags, log_n), ); // Handle inverse FFT