From aa5342cca31e6e7c1d7cbaf570d01fc5afb69964 Mon Sep 17 00:00:00 2001 From: James Logan Date: Sun, 11 Jan 2026 20:10:13 -0500 Subject: [PATCH 01/16] add run-time loop variants of linear methods and eliminate recursive methods --- src/multilinear/mod.rs | 4 - src/multilinear/rectilinear.rs | 106 +++--- src/multilinear/rectilinear_recursive.rs | 446 ---------------------- src/multilinear/regular.rs | 112 +++--- src/multilinear/regular_recursive.rs | 463 ----------------------- src/nearest/regular.rs | 5 - 6 files changed, 118 insertions(+), 1018 deletions(-) delete mode 100644 src/multilinear/rectilinear_recursive.rs delete mode 100644 src/multilinear/regular_recursive.rs diff --git a/src/multilinear/mod.rs b/src/multilinear/mod.rs index 8aa3d78..d7327c1 100644 --- a/src/multilinear/mod.rs +++ b/src/multilinear/mod.rs @@ -2,11 +2,7 @@ //! See individual modules for more detailed documentation. pub mod rectilinear; -pub mod rectilinear_recursive; pub mod regular; -pub mod regular_recursive; pub use rectilinear::MultilinearRectilinear; -pub use rectilinear_recursive::MultilinearRectilinearRecursive; pub use regular::MultilinearRegular; -pub use regular_recursive::MultilinearRegularRecursive; diff --git a/src/multilinear/rectilinear.rs b/src/multilinear/rectilinear.rs index a9181b1..5bee9ba 100644 --- a/src/multilinear/rectilinear.rs +++ b/src/multilinear/rectilinear.rs @@ -27,7 +27,6 @@ //! //! References //! * https://en.wikipedia.org/wiki/Bilinear_interpolation#Weighted_mean -use super::MultilinearRectilinearRecursive; use crate::index_arr_fixed_dims; use crunchy::unroll; use num_traits::Float; @@ -72,8 +71,10 @@ pub fn interpn( .interp(obs.try_into().unwrap(), out), 6 => MultilinearRectilinear::<'_, T, 6>::new(grids.try_into().unwrap(), vals)? .interp(obs.try_into().unwrap(), out), - 7 => MultilinearRectilinearRecursive::<'_, T, 7>::new(grids, vals)?.interp(obs, out), - 8 => MultilinearRectilinearRecursive::<'_, T, 8>::new(grids, vals)?.interp(obs, out), + 7 => MultilinearRectilinear::<'_, T, 7>::new(grids.try_into().unwrap(), vals)? + .interp(obs.try_into().unwrap(), out), + 8 => MultilinearRectilinear::<'_, T, 8>::new(grids.try_into().unwrap(), vals)? + .interp(obs.try_into().unwrap(), out), _ => Err( "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions.", ), @@ -174,12 +175,6 @@ impl<'a, T: Float, const N: usize> MultilinearRectilinear<'a, T, N> { /// * If any step sizes have zero or negative magnitude pub fn new(grids: &'a [&'a [T]; N], vals: &'a [T]) -> Result { // Check dimensions - const { - assert!( - N > 0 && N < 7, - "Flattened method defined for 1-6 dimensions. For higher dimensions, use recursive method." - ); - } let mut dims = [1_usize; N]; (0..N).for_each(|i| dims[i] = grids[i].len()); let nvals: usize = dims[..N].iter().product(); @@ -210,11 +205,6 @@ impl<'a, T: Float, const N: usize> MultilinearRectilinear<'a, T, N> { pub fn interp(&self, x: &[&[T]; N], out: &mut [T]) -> Result<(), &'static str> { let n = out.len(); - // Make sure there are enough coordinate inputs for each dimension - if x.len() != N { - return Err("Dimension mismatch"); - } - // Make sure the size of inputs and output match let size_matches = x.iter().all(|&xx| xx.len() == out.len()); if !size_matches { @@ -274,52 +264,66 @@ impl<'a, T: Float, const N: usize> MultilinearRectilinear<'a, T, N> { const FP: usize = 2; // Footprint size let nverts = const { FP.pow(N as u32) }; // Total number of vertices - unroll! { - for i < 64 in 0..nverts { // const loop + macro_rules! unroll_vertices_body { + ($i:ident) => { // Index, interpolate, or pass on each level of the tree for j in 0..N { - - // Most of these iterations will get optimized out - if j == 0 { // const branch - // At leaves, index values - for k in 0..N { - // Bit pattern in an integer matches C-ordered array indexing - // so we can just use the vertex index to index into the array - // by selecting the appropriate bit from the index. - let offset: usize = (i & (1 << k)) >> k; - loc[k] = origin[k] + offset; - } - const STORE_IND: usize = i % FP; - store[0][STORE_IND] = index_arr_fixed_dims(loc, dimprod, self.vals); + // Most of these iterations will get optimized out + if j == 0 { + // const branch + // At leaves, index values + for k in 0..N { + // Bit pattern in an integer matches C-ordered array indexing + // so we can just use the vertex index to index into the array + // by selecting the appropriate bit from the index. + let offset: usize = ($i & (1 << k)) >> k; + loc[k] = origin[k] + offset; } - else { // const branch - // For other nodes, interpolate on child values - - let q: usize = FP.pow(j as u32); - let level: bool = (i + 1).is_multiple_of(q); - let p: usize = ((i + 1) / q).saturating_sub(1) % FP; + let store_ind: usize = $i % FP; + store[0][store_ind] = index_arr_fixed_dims(loc, dimprod, self.vals); + } else { + // const branch + // For other nodes, interpolate on child values + let q: usize = FP.pow(j as u32); + let level: bool = ($i + 1).is_multiple_of(q); + + if level { + // const branch + let p: usize = (($i + 1) / q).saturating_sub(1) % FP; let ind: usize = j.saturating_sub(1); + let x0 = self.grids[ind][origin[ind]]; + let x1 = self.grids[ind][origin[ind] + 1]; + let step = x1 - x0; + let t = (x[ind] - x0) / step; - if level { // const branch - let x0 = self.grids[ind][origin[ind]]; - let x1 = self.grids[ind][origin[ind] + 1]; - let step = x1 - x0; - let t = (x[ind] - x0) / step; - - let y0 = store[ind][0]; - let dy = store[ind][1] - y0; + let y0 = store[ind][0]; + let dy = store[ind][1] - y0; - #[cfg(not(feature = "fma"))] - let interped = y0 + t * dy; - #[cfg(feature = "fma")] - let interped = t.mul_add(dy, y0); + #[cfg(not(feature = "fma"))] + let interped = y0 + t * dy; + #[cfg(feature = "fma")] + let interped = t.mul_add(dy, y0); - store[j][p] = interped; - } + store[j][p] = interped; } + } + } + }; + } + // Select flattened vs. looped implementation + if N <= 6 { + // For small numbers of dimensions, unroll at compile time + unroll! { + for i < 64 in 0..nverts { // const loop + unroll_vertices_body!(i) } } + } else { + // For larger numbers of dimensions, execute the loop + for i in 0..nverts { + unroll_vertices_body!(i) + } } // Interpolate the final value @@ -405,10 +409,10 @@ mod test { /// Each test evaluates at 3^ndims locations, largely extrapolated in corner regions, so it /// rapidly becomes prohibitively slow after about ndims=9. #[test] - fn test_interp_extrap_1d_to_6d() { + fn test_interp_extrap_1d_to_8d() { let mut rng = rng_fixed_seed(); - for ndims in 1..=6 { + for ndims in 1..=8 { println!("Testing in {ndims} dims"); // Interp grid let dims: Vec = vec![2; ndims]; diff --git a/src/multilinear/rectilinear_recursive.rs b/src/multilinear/rectilinear_recursive.rs deleted file mode 100644 index a2f7a8e..0000000 --- a/src/multilinear/rectilinear_recursive.rs +++ /dev/null @@ -1,446 +0,0 @@ -//! Multilinear interpolation/extrapolation on a rectilinear grid. -//! -//! ```rust -//! use interpn::multilinear::rectilinear; -//! -//! // Define a grid -//! let x = [1.0_f64, 1.2, 2.0]; -//! let y = [1.0_f64, 1.3, 1.5]; -//! -//! // Grid input for rectilinear method -//! let grids = &[&x[..], &y[..]]; -//! -//! // Values at grid points -//! let z = [2.0; 9]; -//! -//! // Points to interpolate/extrapolate -//! let xobs = [0.0_f64, 5.0]; -//! let yobs = [-1.0, 3.0]; -//! let obs = [&xobs[..], &yobs[..]]; -//! -//! // Storage for output -//! let mut out = [0.0; 2]; -//! -//! // Do interpolation, allocating for the output for convenience -//! rectilinear::interpn_alloc(grids, &z, &obs).unwrap(); -//! ``` -//! -//! References -//! * https://en.wikipedia.org/wiki/Bilinear_interpolation#Weighted_mean -use crate::index_arr; -use num_traits::Float; - -/// Evaluate multicubic interpolation on a regular grid in up to 8 dimensions. -/// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). -/// -/// This is a convenience function; best performance will be achieved by using the exact right -/// number for the MAXDIMS parameter, as this will slightly reduce compute and storage overhead, -/// and the underlying method can be extended to more than this function's limit of 8 dimensions. -/// The limit of 8 dimensions was chosen for no more specific reason than to reduce unit test times. -/// -/// While this method initializes the interpolator struct on every call, the overhead of doing this -/// is minimal even when using it to evaluate one observation point at a time. -pub fn interpn( - grids: &[&[T]], - vals: &[T], - obs: &[&[T]], - out: &mut [T], -) -> Result<(), &'static str> { - // Expanding out and using the specialized version for each size - // gives a substantial speedup for lower dimensionalities - // (4-5x speedup for 1-dim compared to using MAXDIMS=8) - let ndims = grids.len(); - match ndims { - 1 => MultilinearRectilinearRecursive::<'_, T, 1>::new(grids, vals)?.interp(obs, out), - 2 => MultilinearRectilinearRecursive::<'_, T, 2>::new(grids, vals)?.interp(obs, out), - 3 => MultilinearRectilinearRecursive::<'_, T, 3>::new(grids, vals)?.interp(obs, out), - 4 => MultilinearRectilinearRecursive::<'_, T, 4>::new(grids, vals)?.interp(obs, out), - 5 => MultilinearRectilinearRecursive::<'_, T, 5>::new(grids, vals)?.interp(obs, out), - 6 => MultilinearRectilinearRecursive::<'_, T, 6>::new(grids, vals)?.interp(obs, out), - 7 => MultilinearRectilinearRecursive::<'_, T, 7>::new(grids, vals)?.interp(obs, out), - 8 => MultilinearRectilinearRecursive::<'_, T, 8>::new(grids, vals)?.interp(obs, out), - _ => Err( - "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions.", - ), - }?; - - Ok(()) -} - -/// Evaluate interpolant, allocating a new Vec for the output. -/// -/// For best results, use the `interpn` function with preallocated output; -/// allocation has a significant performance cost, and should be used sparingly. -#[cfg(feature = "std")] -pub fn interpn_alloc( - grids: &[&[T]], - vals: &[T], - obs: &[&[T]], -) -> Result, &'static str> { - let mut out = vec![T::zero(); obs[0].len()]; - interpn(grids, vals, obs, &mut out)?; - Ok(out) -} - -/// Check whether a list of observation points are inside the grid within some absolute tolerance. -/// Assumes the grid is valid for the rectilinear interpolator (monotonically increasing). -/// -/// Output slice entry `i` is set to `false` if no points on that dimension are out of bounds, -/// and set to `true` if there is a bounds violation on that axis. -/// -/// # Errors -/// * If the dimensionality of the grid does not match the dimensionality of the observation points -/// * If the output slice length does not match the dimensionality of the grid -pub fn check_bounds( - grids: &[&[T]], - obs: &[&[T]], - atol: T, - out: &mut [bool], -) -> Result<(), &'static str> { - let ndims = grids.len(); - if !(obs.len() == ndims && out.len() == ndims && (0..ndims).all(|i| !grids[i].is_empty())) { - return Err("Dimension mismatch"); - } - for i in 0..ndims { - let lo = grids[i][0]; - let hi = grids[i].last(); - match hi { - Some(&hi) => { - let bad = obs[i] - .iter() - .any(|&x| (x - lo) <= -atol || (x - hi) >= atol); - - out[i] = bad; - } - None => return Err("Dimension mismatch"), - } - } - Ok(()) -} - -/// An arbitrary-dimensional multilinear interpolator / extrapolator on a rectilinear grid. -/// -/// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). -/// Assumes grids are monotonically _increasing_. Checking this is expensive, and is -/// left to the user. -/// -/// Operation Complexity -/// * O(2^ndims) for interpolation and extrapolation in all regions. -/// -/// Memory Complexity -/// * Peak stack usage is O(MAXDIMS), which is minimally O(ndims). -/// * While evaluation is recursive, the recursion has constant -/// max depth of MAXDIMS, which provides a guarantee on peak -/// memory usage. -/// -/// Timing -/// * Timing determinism is very tight, but not guaranteed due to the use of a bisection search. -pub struct MultilinearRectilinearRecursive<'a, T: Float, const MAXDIMS: usize> { - /// x, y, ... coordinate grids, each entry of size dims[i] - grids: &'a [&'a [T]], - - /// Size of each dimension - dims: [usize; MAXDIMS], - - /// Values at each point, size prod(dims) - vals: &'a [T], -} - -impl<'a, T: Float, const MAXDIMS: usize> MultilinearRectilinearRecursive<'a, T, MAXDIMS> { - /// Build a new interpolator, using O(MAXDIMS) calculations and storage. - /// - /// This method does not handle degenerate dimensions; all grids must have at least 4 entries. - /// - /// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). - /// - /// # Errors - /// * If any input dimensions do not match - /// * If any dimensions have size < 4 - /// * If any step sizes have zero or negative magnitude - pub fn new(grids: &'a [&'a [T]], vals: &'a [T]) -> Result { - // Check dimensions - let ndims = grids.len(); - let mut dims = [1_usize; MAXDIMS]; - (0..ndims).for_each(|i| dims[i] = grids[i].len()); - let nvals: usize = dims[..ndims].iter().product(); - if !(vals.len() == nvals && ndims > 0 && ndims <= MAXDIMS) { - return Err("Dimension mismatch"); - }; - // Check if any grids are degenerate - let degenerate = dims[..ndims].iter().any(|&x| x < 2); - if degenerate { - return Err("All grids must have at least 2 entries"); - }; - // Check that at least the first two entries in each grid are monotonic - let monotonic_maybe = grids.iter().all(|&g| g[1] > g[0]); - if !monotonic_maybe { - return Err("All grids must be monotonically increasing"); - }; - - Ok(Self { grids, dims, vals }) - } - - /// Interpolate on a contiguous list of observation points. - /// - /// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). - /// - /// # Errors - /// * If the dimensionality of the point does not match the data - /// * If the dimensionality of point or data does not match the grid - pub fn interp(&self, x: &[&[T]], out: &mut [T]) -> Result<(), &'static str> { - let n = out.len(); - let ndims = self.grids.len(); - - // Make sure there are enough coordinate inputs for each dimension - if x.len() != ndims { - return Err("Dimension mismatch"); - } - - // Make sure the size of inputs and output match - let size_matches = x.iter().all(|&xx| xx.len() == out.len()); - if !size_matches { - return Err("Dimension mismatch"); - } - - let tmp = &mut [T::zero(); MAXDIMS][..ndims]; - for i in 0..n { - (0..ndims).for_each(|j| tmp[j] = x[j][i]); - out[i] = self.interp_one(tmp)?; - } - - Ok(()) - } - - /// Interpolate the value at a point, - /// using fixed-size intermediate storage of O(ndims) and no allocation. - /// - /// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). - /// - /// # Errors - /// * If the dimensionality of the point does not match the data - /// * If the dimensionality of either one exceeds the fixed maximum - /// * If the index along any dimension exceeds the maximum representable - /// integer value within the value type `T` - pub fn interp_one(&self, x: &[T]) -> Result { - // Check sizes - let ndims = self.grids.len(); - if !(x.len() == ndims && ndims <= MAXDIMS) { - return Err("Dimension mismatch"); - } - - // Initialize fixed-size intermediate storage. - // Maybe counterintuitively, initializing this storage here on every usage - // instead of once with the top level struct is a significant speedup - // and does not increase peak stack usage. - // - // Also notably, storing the index offsets as bool instead of usize - // reduces memory overhead, but has not effect on throughput rate. - let origin = &mut [0_usize; MAXDIMS][..ndims]; // Indices of lower corner of hypercube - let dimprod = &mut [1_usize; MAXDIMS][..ndims]; - - // Populate cumulative product of higher dimensions for indexing. - // - // Each entry is the cumulative product of the size of dimensions - // higher than this one, which is the stride between blocks - // relating to a given index along each dimension. - let mut acc = 1; - for i in 0..ndims { - dimprod[ndims - i - 1] = acc; - acc *= self.dims[ndims - i - 1]; - } - - // Populate lower corner and saturation flag for each dimension - for i in 0..ndims { - origin[i] = self.get_loc(x[i], i)?; - } - - // Recursive interpolation of one dependency tree at a time - // let loc = &origin; // Starting location in the tree is the origin - let dim = ndims; // Start from the end and recurse back to zero - let loc = &mut [0_usize; MAXDIMS][..ndims]; - loc.copy_from_slice(origin); - let interped = self.populate(dim, origin, loc, dimprod, x); - - Ok(interped) - } - - /// Get the lower-corner index along this dimension where `x` is found, - /// saturating to the bounds at the edges if necessary. - /// - /// At the high bound of a given dimension, saturates to the interior. - #[inline] - fn get_loc(&self, v: T, dim: usize) -> Result { - let grid = self.grids[dim]; - - // Bisection search to find location on the grid. - // - // The search will return `0` if the point is outside-low, - // and will return `self.dims[dim]` if outside-high. - // - // This process accounts for essentially the entire difference in - // performance between this method and the regular-grid method. - let iloc: isize = grid.partition_point(|x| *x < v) as isize - 1; - - let n = self.dims[dim] as isize; // Number of grid points on this dimension - let dimmax = n.saturating_sub(2).max(0); // maximum index for lower corner - let loc: usize = iloc.max(0).min(dimmax) as usize; // unsigned integer loc clipped to interior - - Ok(loc) - } - - /// Recursive evaluation of interpolant on each dimension - #[inline] - fn populate( - &self, - dim: usize, - origin: &[usize], - loc: &mut [usize], - dimprod: &[usize], - x: &[T], - ) -> T { - // Do the calc for this entry - match dim { - // If we have arrived at a leaf, index into data - 0 => index_arr(loc, dimprod, self.vals), - - // Otherwise, continue recursion - _ => { - // Keep track of where we are in the tree - // so that we can index into the value array properly - // when we reach the leaves - let next_dim = dim - 1; - - // Populate next dim's values - let mut vals = [T::zero(); 2]; - for i in 0..2 { - loc[next_dim] = origin[next_dim] + i; - vals[i] = self.populate(next_dim, origin, loc, dimprod, x); - } - loc[next_dim] = origin[next_dim]; // Reset for next usage - - // Interpolate on next dim's values to populate an entry in this dim - let grid_cell = &self.grids[next_dim][origin[next_dim]..origin[next_dim] + 2]; - let step = grid_cell[1] - grid_cell[0]; - let t = (x[next_dim] - grid_cell[0]) / step; - let y0 = vals[0]; - let dy = vals[1] - y0; - - #[cfg(not(feature = "fma"))] - let interped = y0 + t * dy; - #[cfg(feature = "fma")] - let interped = t.mul_add(dy, y0); - - interped - } - } - } -} - -#[cfg(test)] -mod test { - use super::{MultilinearRectilinearRecursive, interpn}; - use crate::testing::*; - use crate::utils::*; - - /// Test with one dimension that is minimum size and one that is not - #[test] - fn test_interp_extrap_2d_small() { - let (nx, ny) = (3, 2); - let x = linspace(-1.0, 1.0, nx); - let y = Vec::from([0.5, 0.6]); - let grids = [&x[..], &y[..]]; - let xy = meshgrid(Vec::from([&x, &y])); - - // z = x + y - let z: Vec = (0..nx * ny).map(|i| &xy[i][0] + &xy[i][1]).collect(); - - // Observation points all over in 2D space - let xobs = linspace(-10.0_f64, 10.0, 5); - let yobs = linspace(-10.0_f64, 10.0, 5); - let xyobs = meshgrid(Vec::from([&xobs, &yobs])); - let zobs: Vec = (0..xobs.len() * yobs.len()) - .map(|i| &xyobs[i][0] + &xyobs[i][1]) - .collect(); // Every `z` should match the degenerate `y` value - - let interpolator: MultilinearRectilinearRecursive<'_, _, 2> = - MultilinearRectilinearRecursive::new(&grids, &z[..]).unwrap(); - - // Check values at every incident vertex - xyobs.iter().zip(zobs.iter()).for_each(|(xyi, zi)| { - let zii = interpolator.interp_one(&[xyi[0], xyi[1]]).unwrap(); - assert!((*zi - zii).abs() < 1e-12) - }); - } - - /// Iterate from 1 to 8 dimensions, making a minimum-sized grid for each one - /// to traverse every combination of interpolating or extrapolating high or low on each dimension. - /// Each test evaluates at 3^ndims locations, largely extrapolated in corner regions, so it - /// rapidly becomes prohibitively slow after about ndims=9. - #[test] - fn test_interp_extrap_1d_to_8d() { - let mut rng = rng_fixed_seed(); - - for ndims in 1..=8 { - println!("Testing in {ndims} dims"); - // Interp grid - let dims: Vec = vec![2; ndims]; - let xs: Vec> = (0..ndims) - .map(|i| { - // Make a linear grid and add noise - let mut x = linspace(-5.0 * (i as f64), 5.0 * ((i + 1) as f64), dims[i]); - let dx = randn::(&mut rng, x.len()); - (0..x.len()).for_each(|i| x[i] += (dx[i] - 0.5) / 10.0); - (0..x.len() - 1).for_each(|i| assert!(x[i + 1] > x[i])); - x - }) - .collect(); - - let grids: Vec<&[f64]> = xs.iter().map(|x| &x[..]).collect(); - let grid = meshgrid((0..ndims).map(|i| &xs[i]).collect()); - let u: Vec = grid.iter().map(|x| x.iter().sum()).collect(); // sum is linear in every direction, good for testing - - // Observation points - let xobs: Vec> = (0..ndims) - .map(|i| linspace(-7.0 * (i as f64), 7.0 * ((i + 1) as f64), 3)) - .collect(); - let gridobs = meshgrid((0..ndims).map(|i| &xobs[i]).collect()); - let gridobs_t: Vec> = (0..ndims) - .map(|i| gridobs.iter().map(|x| x[i]).collect()) - .collect(); // transpose - let xobsslice: Vec<&[f64]> = gridobs_t.iter().map(|x| &x[..]).collect(); - let uobs: Vec = gridobs.iter().map(|x| x.iter().sum()).collect(); // expected output at observation points - let mut out = vec![0.0; uobs.len()]; - - // Evaluate - interpn(&grids, &u, &xobsslice, &mut out[..]).unwrap(); - - // Check that interpolated values match expectation, - // using an absolute difference because some points are very close to or exactly at zero, - // and do not do well under a check on relative difference. - (0..uobs.len()).for_each(|i| assert!((out[i] - uobs[i]).abs() < 1e-12)); - } - } - - /// Interpolate on a hat-shaped function to make sure that the grid cell indexing is aligned properly - #[test] - fn test_interp_hat_func() { - fn hat_func(x: f64) -> f64 { - if x <= 1.0 { x } else { 2.0 - x } - } - - let x = (0..3).map(|x| x as f64).collect::>(); - let grids = [&x[..]]; - let y = (0..3).map(|x| hat_func(x as f64)).collect::>(); - let obs = linspace(-2.0, 4.0, 100); - - let interpolator: MultilinearRectilinearRecursive = - MultilinearRectilinearRecursive::new(&grids, &y).unwrap(); - - (0..obs.len()).for_each(|i| { - assert_eq!( - hat_func(obs[i]), - interpolator.interp_one(&[obs[i]]).unwrap() - ); - }) - } -} diff --git a/src/multilinear/regular.rs b/src/multilinear/regular.rs index 12aeaf5..538d67d 100644 --- a/src/multilinear/regular.rs +++ b/src/multilinear/regular.rs @@ -29,7 +29,6 @@ //! //! References //! * https://en.wikipedia.org/wiki/Bilinear_interpolation#Repeated_linear_interpolation -use super::MultilinearRegularRecursive; use crate::index_arr_fixed_dims; use crunchy::unroll; use num_traits::{Float, NumCast}; @@ -104,10 +103,20 @@ pub fn interpn( vals, )? .interp(obs.try_into().unwrap(), out), - 7 => MultilinearRegularRecursive::<'_, T, 7>::new(dims, starts, steps, vals)? - .interp(obs, out), - 8 => MultilinearRegularRecursive::<'_, T, 8>::new(dims, starts, steps, vals)? - .interp(obs, out), + 7 => MultilinearRegular::<'_, T, 7>::new( + dims.try_into().unwrap(), + starts.try_into().unwrap(), + steps.try_into().unwrap(), + vals, + )? + .interp(obs.try_into().unwrap(), out), + 8 => MultilinearRegular::<'_, T, 8>::new( + dims.try_into().unwrap(), + starts.try_into().unwrap(), + steps.try_into().unwrap(), + vals, + )? + .interp(obs.try_into().unwrap(), out), _ => Err( "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions.", ), @@ -229,12 +238,6 @@ impl<'a, T: Float, const N: usize> MultilinearRegular<'a, T, N> { vals: &'a [T], ) -> Result { // Check dimensions - const { - assert!( - N > 0 && N < 7, - "Flattened method defined for 1-6 dimensions. For higher dimensions, use recursive method." - ); - } let nvals: usize = dims.iter().product(); if vals.len() != nvals { return Err("Dimension mismatch"); @@ -294,11 +297,6 @@ impl<'a, T: Float, const N: usize> MultilinearRegular<'a, T, N> { /// integer value within the value type `T` #[inline] pub fn interp_one(&self, x: [T; N]) -> Result { - // Check sizes - if x.len() != N { - return Err("Dimension mismatch"); - } - // Initialize fixed-size intermediate storage. // Maybe counterintuitively, initializing this storage here on every usage // instead of once with the top level struct is a significant speedup @@ -342,46 +340,62 @@ impl<'a, T: Float, const N: usize> MultilinearRegular<'a, T, N> { const FP: usize = 2; // Footprint size let nverts = const { FP.pow(N as u32) }; // Total number of vertices - unroll! { - for i < 64 in 0..nverts { // const loop + macro_rules! unroll_vertices_body { + ($i:ident) => { // Index, interpolate, or pass on each level of the tree for j in 0..N { - - // Most of these iterations will get optimized out - if j == 0 { // const branch - // At leaves, index values - for k in 0..N { - // Bit pattern in an integer matches C-ordered array indexing - // so we can just use the vertex index to index into the array - // by selecting the appropriate bit from the index. - let offset: usize = (i & (1 << k)) >> k; - loc[k] = origin[k] + offset; - } - const STORE_IND: usize = i % FP; - store[0][STORE_IND] = index_arr_fixed_dims(loc, dimprod, self.vals); + // Most of these iterations will get optimized out + if j == 0 { + // const branch + // At leaves, index values + for k in 0..N { + // Bit pattern in an integer matches C-ordered array indexing + // so we can just use the vertex index to index into the array + // by selecting the appropriate bit from the index. + let offset: usize = ($i & (1 << k)) >> k; + loc[k] = origin[k] + offset; } - else { // const branch - // For other nodes, interpolate on child values - - let q: usize = FP.pow(j as u32); - let level: bool = (i + 1).is_multiple_of(q); - let p: usize = ((i + 1) / q).saturating_sub(1) % FP; + let store_ind: usize = $i % FP; + store[0][store_ind] = index_arr_fixed_dims(loc, dimprod, self.vals); + } else { + // const branch + // For other nodes, interpolate on child values + let q: usize = FP.pow(j as u32); + let level: bool = ($i + 1).is_multiple_of(q); + + if level { + // const branch + let p: usize = (($i + 1) / q).saturating_sub(1) % FP; let ind: usize = j.saturating_sub(1); - if level { // const branch - let y0 = store[ind][0]; - let dy = store[ind][1] - y0; - let t = dts[ind]; + let y0 = store[ind][0]; + let dy = store[ind][1] - y0; + let t = dts[ind]; - #[cfg(not(feature = "fma"))] - let interped = y0 + t * dy; - #[cfg(feature = "fma")] - let interped = t.mul_add(dy, y0); + #[cfg(not(feature = "fma"))] + let interped = y0 + t * dy; + #[cfg(feature = "fma")] + let interped = t.mul_add(dy, y0); - store[j][p] = interped; - } + store[j][p] = interped; } + } } + }; + } + + // Select flattened vs. looped implementation + if N <= 6 { + // For small numbers of dimensions, unroll at compile time + unroll! { + for i < 64 in 0..nverts { // const loop + unroll_vertices_body!(i) + } + } + } else { + // For larger numbers of dimensions, execute the loop + for i in 0..nverts { + unroll_vertices_body!(i) } } @@ -428,8 +442,8 @@ mod test { /// Each test evaluates at 3^N locations, largely extrapolated in corner regions, so it /// rapidly becomes prohibitively slow after about N=9. #[test] - fn test_interp_extrap_1d_to_6d() { - for n in 1..=6 { + fn test_interp_extrap_1d_to_8d() { + for n in 1..=8 { println!("Testing in {n} dims"); // Interp grid let dims: Vec = vec![2; n]; diff --git a/src/multilinear/regular_recursive.rs b/src/multilinear/regular_recursive.rs deleted file mode 100644 index 05890bf..0000000 --- a/src/multilinear/regular_recursive.rs +++ /dev/null @@ -1,463 +0,0 @@ -//! Multilinear interpolation/extrapolation on a regular grid. -//! -//! ```rust -//! use interpn::multilinear::regular; -//! -//! // Define a grid -//! let x = [1.0_f64, 2.0]; -//! let y = [1.0_f64, 1.5]; -//! -//! // Grid input for rectilinear method -//! let grids = &[&x[..], &y[..]]; -//! -//! // Grid input for regular grid method -//! let dims = [x.len(), y.len()]; -//! let starts = [x[0], y[0]]; -//! let steps = [x[1] - x[0], y[1] - y[0]]; -//! -//! // Values at grid points -//! let z = [2.0; 4]; -//! -//! // Observation points to interpolate/extrapolate -//! let xobs = [0.0_f64, 5.0]; -//! let yobs = [-1.0, 3.0]; -//! let obs = [&xobs[..], &yobs[..]]; -//! -//! // Do interpolation, allocating for the output for convenience -//! regular::interpn_alloc(&dims, &starts, &steps, &z, &obs).unwrap(); -//! ``` -//! -//! References -//! * https://en.wikipedia.org/wiki/Bilinear_interpolation#Repeated_linear_interpolation -use crate::index_arr; -use num_traits::{Float, NumCast}; - -/// Evaluate multilinear interpolation on a regular grid in up to 8 dimensions. -/// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). -/// -/// This is a convenience function; best performance will be achieved by using the exact right -/// number for the MAXDIMS parameter, as this will slightly reduce compute and storage overhead, -/// and the underlying method can be extended to more than this function's limit of 8 dimensions. -/// The limit of 8 dimensions was chosen for no more specific reason than to reduce unit test times. -/// -/// While this method initializes the interpolator struct on every call, the overhead of doing this -/// is minimal even when using it to evaluate one observation point at a time. -pub fn interpn( - dims: &[usize], - starts: &[T], - steps: &[T], - vals: &[T], - obs: &[&[T]], - out: &mut [T], -) -> Result<(), &'static str> { - // Expanding out and using the specialized version for each size - // gives a substantial speedup for lower dimensionalities - // (4-5x speedup for 1-dim compared to using MAXDIMS=8) - let ndims = dims.len(); - match ndims { - 1 => MultilinearRegularRecursive::<'_, T, 1>::new(dims, starts, steps, vals)? - .interp(obs, out), - 2 => MultilinearRegularRecursive::<'_, T, 2>::new(dims, starts, steps, vals)? - .interp(obs, out), - 3 => MultilinearRegularRecursive::<'_, T, 3>::new(dims, starts, steps, vals)? - .interp(obs, out), - 4 => MultilinearRegularRecursive::<'_, T, 4>::new(dims, starts, steps, vals)? - .interp(obs, out), - 5 => MultilinearRegularRecursive::<'_, T, 5>::new(dims, starts, steps, vals)? - .interp(obs, out), - 6 => MultilinearRegularRecursive::<'_, T, 6>::new(dims, starts, steps, vals)? - .interp(obs, out), - 7 => MultilinearRegularRecursive::<'_, T, 7>::new(dims, starts, steps, vals)? - .interp(obs, out), - 8 => MultilinearRegularRecursive::<'_, T, 8>::new(dims, starts, steps, vals)? - .interp(obs, out), - _ => Err( - "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions.", - ), - }?; - - Ok(()) -} - -/// Evaluate interpolant, allocating a new Vec for the output. -/// -/// For best results, use the `interpn` function with preallocated output; -/// allocation has a significant performance cost, and should be used sparingly. -#[cfg(feature = "std")] -pub fn interpn_alloc( - dims: &[usize], - starts: &[T], - steps: &[T], - vals: &[T], - obs: &[&[T]], -) -> Result, &'static str> { - let mut out = vec![T::zero(); obs[0].len()]; - interpn(dims, starts, steps, vals, obs, &mut out)?; - Ok(out) -} - -/// Check whether a list of observation points are inside the grid within some absolute tolerance. -/// Assumes the grid is valid for the rectilinear interpolator (monotonically increasing). -/// -/// Output slice entry `i` is set to `false` if no points on that dimension are out of bounds, -/// and set to `true` if there is a bounds violation on that axis. -/// -/// # Errors -/// * If the dimensionality of the grid does not match the dimensionality of the observation points -/// * If the output slice length does not match the dimensionality of the grid -pub fn check_bounds( - dims: &[usize], - starts: &[T], - steps: &[T], - obs: &[&[T]], - atol: T, - out: &mut [bool], -) -> Result<(), &'static str> { - let ndims = dims.len(); - if !(obs.len() == ndims && out.len() == ndims) { - return Err("Dimension mismatch"); - } - - for i in 0..ndims { - let first = starts[i]; - let last_elem = ::from(dims[i] - 1); // Last element in grid - - match last_elem { - Some(last_elem) => { - let last = starts[i] + steps[i] * last_elem; - let lo = first.min(last); - let hi = first.max(last); - - let bad = obs[i] - .iter() - .any(|&x| (x - lo) <= -atol || (x - hi) >= atol); - out[i] = bad; - } - // Passing an unrepresentable number in isn't, strictly speaking, an error - // and since an unrepresentable number can't be on the grid, - // we can just flag it for the bounds check like normal - None => { - out[i] = true; - } - } - } - Ok(()) -} - -/// An arbitrary-dimensional multilinear interpolator / extrapolator on a regular grid. -/// -/// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). -/// -/// Operation Complexity -/// * O(2^ndims) for interpolation and extrapolation in all regions. -/// -/// Memory Complexity -/// * Peak stack usage is O(MAXDIMS), which is minimally O(ndims). -/// * While evaluation is recursive, the recursion has constant -/// max depth of MAXDIMS, which provides a guarantee on peak -/// memory usage. -/// -/// Timing -/// * Timing determinism is guaranteed to the extent that floating-point calculation timing is consistent. -/// That said, floating-point calculations can take a different number of clock-cycles depending on numerical values. -pub struct MultilinearRegularRecursive<'a, T: Float, const MAXDIMS: usize> { - /// Number of dimensions - ndims: usize, - - /// Size of each dimension - dims: [usize; MAXDIMS], - - /// Starting point of each dimension, size dims.len() - starts: [T; MAXDIMS], - - /// Step size for each dimension, size dims.len() - steps: [T; MAXDIMS], - - /// Values at each point, size prod(dims) - vals: &'a [T], -} - -impl<'a, T: Float, const MAXDIMS: usize> MultilinearRegularRecursive<'a, T, MAXDIMS> { - /// Build a new interpolator, using O(MAXDIMS) calculations and storage. - /// - /// This method does not handle degenerate dimensions; all grids must have at least 2 entries. - /// - /// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). - /// - /// # Errors - /// * If any input dimensions do not match - /// * If any dimensions have size < 2 - /// * If any step sizes have zero or negative magnitude - pub fn new( - dims: &[usize], - starts: &[T], - steps: &[T], - vals: &'a [T], - ) -> Result { - // Check dimensions - let ndims = dims.len(); - let nvals: usize = dims.iter().product(); - if !(starts.len() == ndims && steps.len() == ndims && vals.len() == nvals && ndims > 0) { - return Err("Dimension mismatch"); - } - // Make sure all dimensions have at least four entries - let degenerate = dims[..ndims].iter().any(|&x| x < 2); - if degenerate { - return Err("All grids must have at least two entries"); - } - // Check if any dimensions have zero or negative step size - let steps_are_positive = steps.iter().all(|&x| x > T::zero()); - if !steps_are_positive { - return Err("All grids must be monotonically increasing"); - } - - // Copy grid info into struct. - // Keeping this data local to the struct eliminates an issue where, - // if the grid info is defined somewhere not local to where the struct - // is defined and used, the & references to it cost more than the entire - // rest of the interpolation operation. - let mut steps_local = [T::zero(); MAXDIMS]; - let mut starts_local = [T::zero(); MAXDIMS]; - let mut dims_local = [0_usize; MAXDIMS]; - steps_local[..ndims].copy_from_slice(&steps[..ndims]); - starts_local[..ndims].copy_from_slice(&starts[..ndims]); - dims_local[..ndims].copy_from_slice(&dims[..ndims]); - - Ok(Self { - ndims, - dims: dims_local, - starts: starts_local, - steps: steps_local, - vals, - }) - } - - /// Interpolate on a contiguous list of observation points. - /// - /// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). - /// - /// # Errors - /// * If the dimensionality of the point does not match the data - /// * If the dimensionality of point or data does not match the grid - pub fn interp(&self, x: &[&[T]], out: &mut [T]) -> Result<(), &'static str> { - let n = out.len(); - let ndims = self.ndims; - // Make sure there are enough coordinate inputs for each dimension - if x.len() != ndims { - return Err("Dimension mismatch"); - } - // Make sure the size of inputs and output match - let size_matches = x.iter().all(|&xx| xx.len() == out.len()); - if !size_matches { - return Err("Dimension mismatch"); - } - - let tmp = &mut [T::zero(); MAXDIMS][..ndims]; - for i in 0..n { - (0..ndims).for_each(|j| tmp[j] = x[j][i]); - out[i] = self.interp_one(tmp)?; - } - - Ok(()) - } - - /// Interpolate the value at a point, - /// using fixed-size intermediate storage of O(ndims) and no allocation. - /// - /// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). - /// - /// # Errors - /// * If the dimensionality of the point does not match the data - /// * If the dimensionality of either one exceeds the fixed maximum - /// * If the index along any dimension exceeds the maximum representable - /// integer value within the value type `T` - pub fn interp_one(&self, x: &[T]) -> Result { - // Check sizes - let ndims = self.ndims; - if !(x.len() == ndims && ndims <= MAXDIMS) { - return Err("Dimension mismatch"); - } - - // Initialize fixed-size intermediate storage. - // Maybe counterintuitively, initializing this storage here on every usage - // instead of once with the top level struct is a significant speedup - // and does not increase peak stack usage. - // - // Also notably, storing the index offsets as bool instead of usize - // reduces memory overhead, but has not effect on throughput rate. - let origin = &mut [0_usize; MAXDIMS][..ndims]; // Indices of lower corner of hypercube - let dts = &mut [T::zero(); MAXDIMS][..ndims]; // Normalized coordinate storage - let dimprod = &mut [1_usize; MAXDIMS][..ndims]; - - // Populate cumulative product of higher dimensions for indexing. - // - // Each entry is the cumulative product of the size of dimensions - // higher than this one, which is the stride between blocks - // relating to a given index along each dimension. - let mut acc = 1; - for i in 0..ndims { - dimprod[ndims - i - 1] = acc; - acc *= self.dims[ndims - i - 1]; - } - - // Populate lower corner and saturation flag for each dimension - for i in 0..ndims { - origin[i] = self.get_loc(x[i], i)?; - } - - // Calculate normalized delta locations - for i in 0..ndims { - let index_zero_loc = self.starts[i] - + self.steps[i] - * ::from(origin[i]).ok_or("Unrepresentable coordinate value")?; - dts[i] = (x[i] - index_zero_loc) / self.steps[i]; - } - - // Recursive interpolation of one dependency tree at a time - let dim = ndims; // Start from the end and recurse back to zero - let loc = &mut [0_usize; MAXDIMS][..ndims]; - loc.copy_from_slice(origin); - let interped = self.populate(dim, origin, loc, dimprod, dts); - - Ok(interped) - } - - /// Get the two-lower index along this dimension where `x` is found, - /// saturating to the bounds at the edges if necessary. - /// - /// At the high bound of a given dimension, saturates to the fourth internal - /// point in order to capture a full 4-cube. - /// - /// Returned value like (lower_corner_index, saturation_flag). - #[inline] - fn get_loc(&self, v: T, dim: usize) -> Result { - let floc = ((v - self.starts[dim]) / self.steps[dim]).floor(); // float loc - // Signed integer loc, with the bottom of the cell aligned to place the normalized - // coordinate t=0 at cell index 1 - let iloc = ::from(floc).ok_or("Unrepresentable coordinate value")?; - - let n = self.dims[dim] as isize; // Number of grid points on this dimension - let dimmax = n.saturating_sub(2).max(0); // maximum index for lower corner - let loc: usize = iloc.max(0).min(dimmax) as usize; // unsigned integer loc clipped to interior - - Ok(loc) - } - - /// Recursive evaluation of interpolant on each dimension - #[inline] - fn populate( - &self, - dim: usize, - origin: &[usize], - loc: &mut [usize], - dimprod: &[usize], - dts: &[T], - ) -> T { - // Do the calc for this entry - match dim { - // If we have arrived at a leaf, index into data - 0 => index_arr(loc, dimprod, self.vals), - - // Otherwise, continue recursion - _ => { - // Keep track of where we are in the tree - // so that we can index into the value array properly - // when we reach the leaves - let next_dim = dim - 1; - - // Populate next dim's values - let mut vals = [T::zero(); 2]; - for i in 0..2 { - loc[next_dim] = origin[next_dim] + i; - vals[i] = self.populate(next_dim, origin, loc, dimprod, dts); - } - loc[next_dim] = origin[next_dim]; // Reset for next usage - - // Interpolate on next dim's values to populate an entry in this dim - let t = dts[next_dim]; - let y0 = vals[0]; - let dy = vals[1] - y0; - - #[cfg(not(feature = "fma"))] - let interped = y0 + t * dy; - #[cfg(feature = "fma")] - let interped = t.mul_add(dy, y0); - - interped - } - } - } -} - -#[cfg(test)] -mod test { - use super::{MultilinearRegularRecursive, interpn}; - use crate::utils::*; - - /// Iterate from 1 to 8 dimensions, making a minimum-sized grid for each one - /// to traverse every combination of interpolating or extrapolating high or low on each dimension. - /// Each test evaluates at 3^ndims locations, largely extrapolated in corner regions, so it - /// rapidly becomes prohibitively slow after about ndims=9. - #[test] - fn test_interp_extrap_1d_to_8d() { - for ndims in 1..=8 { - println!("Testing in {ndims} dims"); - // Interp grid - let dims: Vec = vec![2; ndims]; - let xs: Vec> = (0..ndims) - .map(|i| linspace(-5.0 * (i as f64), 5.0 * ((i + 1) as f64), dims[i])) - .collect(); - let grid = meshgrid((0..ndims).map(|i| &xs[i]).collect()); - let u: Vec = grid.iter().map(|x| x.iter().sum()).collect(); // sum is linear in every direction, good for testing - let starts: Vec = xs.iter().map(|x| x[0]).collect(); - let steps: Vec = xs.iter().map(|x| x[1] - x[0]).collect(); - - // Observation points - let xobs: Vec> = (0..ndims) - .map(|i| linspace(-7.0 * (i as f64), 7.0 * ((i + 1) as f64), 3)) - .collect(); - let gridobs = meshgrid((0..ndims).map(|i| &xobs[i]).collect()); - let gridobs_t: Vec> = (0..ndims) - .map(|i| gridobs.iter().map(|x| x[i]).collect()) - .collect(); // transpose - let xobsslice: Vec<&[f64]> = gridobs_t.iter().map(|x| &x[..]).collect(); - let uobs: Vec = gridobs.iter().map(|x| x.iter().sum()).collect(); // expected output at observation points - let mut out = vec![0.0; uobs.len()]; - - // Evaluate - interpn(&dims, &starts, &steps, &u, &xobsslice, &mut out[..]).unwrap(); - - // Check that interpolated values match expectation, - // using an absolute difference because some points are very close to or exactly at zero, - // and do not do well under a check on relative difference. - - (0..uobs.len()).for_each(|i| { - let outi = out[i]; - let uobsi = uobs[i]; - println!("{outi} {uobsi}"); - assert!((out[i] - uobs[i]).abs() < 1e-12) - }); - } - } - - /// Interpolate on a hat-shaped function to make sure that the grid cell indexing is aligned properly - #[test] - fn test_interp_hat_func() { - fn hat_func(x: f64) -> f64 { - if x <= 1.0 { x } else { 2.0 - x } - } - - let y = (0..3).map(|x| hat_func(x as f64)).collect::>(); - let obs = linspace(-2.0, 4.0, 100); - - let interpolator: MultilinearRegularRecursive = - MultilinearRegularRecursive::new(&[3], &[0.0], &[1.0], &y).unwrap(); - - (0..obs.len()).for_each(|i| { - assert_eq!( - hat_func(obs[i]), - interpolator.interp_one(&[obs[i]]).unwrap() - ); - }) - } -} diff --git a/src/nearest/regular.rs b/src/nearest/regular.rs index a572301..d0a8b61 100644 --- a/src/nearest/regular.rs +++ b/src/nearest/regular.rs @@ -231,11 +231,6 @@ impl<'a, T: Float, const N: usize> NearestRegular<'a, T, N> { /// integer value within the value type `T` #[inline] pub fn interp_one(&self, x: [T; N]) -> Result { - // Check sizes - if x.len() != N { - return Err("Dimension mismatch"); - } - // Initialize fixed-size intermediate storage. // Maybe counterintuitively, initializing this storage here on every usage // instead of once with the top level struct is a significant speedup From 13efee1fa53a4b2a1d630544f7acca054d9aba13 Mon Sep 17 00:00:00 2001 From: James Logan Date: Sun, 11 Jan 2026 20:29:07 -0500 Subject: [PATCH 02/16] eliminate recursive methods for cubic interpolant --- src/lib.rs | 11 - src/multicubic/mod.rs | 4 - src/multicubic/rectilinear.rs | 107 ++-- src/multicubic/rectilinear_recursive.rs | 731 ---------------------- src/multicubic/regular.rs | 119 ++-- src/multicubic/regular_recursive.rs | 780 ------------------------ 6 files changed, 104 insertions(+), 1648 deletions(-) delete mode 100644 src/multicubic/rectilinear_recursive.rs delete mode 100644 src/multicubic/regular_recursive.rs diff --git a/src/lib.rs b/src/lib.rs index a407d68..db7ac64 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -522,17 +522,6 @@ fn resolve_grid_kind( Ok(kind) } -/// Index a single value from an array -#[inline] -pub(crate) fn index_arr(loc: &[usize], dimprod: &[usize], data: &[T]) -> T { - let mut i = 0; - for j in 0..dimprod.len() { - i += loc[j] * dimprod[j]; - } - - data[i] -} - /// Index a single value from an array with a known fixed number of dimensions #[inline] pub(crate) fn index_arr_fixed_dims( diff --git a/src/multicubic/mod.rs b/src/multicubic/mod.rs index bea8a62..e1bbba5 100644 --- a/src/multicubic/mod.rs +++ b/src/multicubic/mod.rs @@ -47,14 +47,10 @@ use num_traits::Float; pub mod rectilinear; -pub mod rectilinear_recursive; pub mod regular; -pub mod regular_recursive; pub use rectilinear::MulticubicRectilinear; -pub use rectilinear_recursive::MulticubicRectilinearRecursive; pub use regular::MulticubicRegular; -pub use regular_recursive::MulticubicRegularRecursive; #[derive(Clone, Copy, PartialEq)] pub(crate) enum Saturation { diff --git a/src/multicubic/rectilinear.rs b/src/multicubic/rectilinear.rs index 3ab6d2d..f625eb4 100644 --- a/src/multicubic/rectilinear.rs +++ b/src/multicubic/rectilinear.rs @@ -29,10 +29,7 @@ //! References //! * A. E. P. Veldman and K. Rinzema, “Playing with nonuniform grids”. //! https://pure.rug.nl/ws/portalfiles/portal/3332271/1992JEngMathVeldman.pdf -use super::{ - MulticubicRectilinearRecursive, Saturation, centered_difference_nonuniform, - normalized_hermite_spline, -}; +use super::{Saturation, centered_difference_nonuniform, normalized_hermite_spline}; use crate::index_arr_fixed_dims; use crunchy::unroll; use num_traits::Float; @@ -81,35 +78,36 @@ pub fn interpn( linearize_extrapolation, )? .interp(obs.try_into().unwrap(), out), - 4 => { - #[cfg(not(feature = "deep-unroll"))] - { - MulticubicRectilinearRecursive::<'_, T, 4>::new( - grids, - vals, - linearize_extrapolation, - )? - .interp(obs, out) - } - - #[cfg(feature = "deep-unroll")] - { - MulticubicRectilinear::<'_, T, 4>::new( - grids.try_into().unwrap(), - vals, - linearize_extrapolation, - )? - .interp(obs.try_into().unwrap(), out) - } - } - 5 => MulticubicRectilinearRecursive::<'_, T, 5>::new(grids, vals, linearize_extrapolation)? - .interp(obs, out), - 6 => MulticubicRectilinearRecursive::<'_, T, 6>::new(grids, vals, linearize_extrapolation)? - .interp(obs, out), - 7 => MulticubicRectilinearRecursive::<'_, T, 7>::new(grids, vals, linearize_extrapolation)? - .interp(obs, out), - 8 => MulticubicRectilinearRecursive::<'_, T, 8>::new(grids, vals, linearize_extrapolation)? - .interp(obs, out), + 4 => MulticubicRectilinear::<'_, T, 4>::new( + grids.try_into().unwrap(), + vals, + linearize_extrapolation, + )? + .interp(obs.try_into().unwrap(), out), + 5 => MulticubicRectilinear::<'_, T, 5>::new( + grids.try_into().unwrap(), + vals, + linearize_extrapolation, + )? + .interp(obs.try_into().unwrap(), out), + 6 => MulticubicRectilinear::<'_, T, 6>::new( + grids.try_into().unwrap(), + vals, + linearize_extrapolation, + )? + .interp(obs.try_into().unwrap(), out), + 7 => MulticubicRectilinear::<'_, T, 7>::new( + grids.try_into().unwrap(), + vals, + linearize_extrapolation, + )? + .interp(obs.try_into().unwrap(), out), + 8 => MulticubicRectilinear::<'_, T, 8>::new( + grids.try_into().unwrap(), + vals, + linearize_extrapolation, + )? + .interp(obs.try_into().unwrap(), out), _ => Err( "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions.", ), @@ -209,18 +207,6 @@ impl<'a, T: Float, const N: usize> MulticubicRectilinear<'a, T, N> { linearize_extrapolation: bool, ) -> Result { // Check dimensions - const { - #[cfg(not(feature = "deep-unroll"))] - assert!( - N > 0 && N < 4, - "Flattened method defined for 1-3 dimensions by default (1-4 with `deep-unroll` feature). For higher dimensions, use recursive method." - ); - #[cfg(feature = "deep-unroll")] - assert!( - N > 0 && N < 5, - "Flattened method defined for 1-4 dimensions with `deep-unroll`. For higher dimensions, use recursive method." - ); - } let mut dims = [1_usize; N]; (0..N).for_each(|i| dims[i] = grids[i].len()); let nvals: usize = dims.iter().product(); @@ -327,8 +313,8 @@ impl<'a, T: Float, const N: usize> MulticubicRectilinear<'a, T, N> { let offset: usize = ($i & (3 << (2 * k))) >> (2 * k); loc[k] = origin[k] + offset; } - const STORE_IND: usize = $i % FP; - store[0][STORE_IND] = index_arr_fixed_dims(loc, dimprod, self.vals); + let store_ind: usize = $i % FP; + store[0][store_ind] = index_arr_fixed_dims(loc, dimprod, self.vals); } else { // const branch // For other nodes, interpolate on child values @@ -356,14 +342,27 @@ impl<'a, T: Float, const N: usize> MulticubicRectilinear<'a, T, N> { } #[cfg(not(feature = "deep-unroll"))] - unroll! { - for i < 64 in 0..nverts { // const loop + if N <= 3 { + unroll! { + for i < 64 in 0..nverts { // const loop + unroll_vertices_body!(i); + } + } + } else { + for i in 0..nverts { unroll_vertices_body!(i); } } + #[cfg(feature = "deep-unroll")] - unroll! { - for i < 256 in 0..nverts { // const loop + if N <= 4 { + unroll! { + for i < 256 in 0..nverts { // const loop + unroll_vertices_body!(i); + } + } + } else { + for i in 0..nverts { unroll_vertices_body!(i); } } @@ -631,10 +630,10 @@ mod test { /// Under both interpolation and extrapolation, a hermite spline with natural boundary condition /// can reproduce an N-dimensional quadratic function exactly #[test] - fn test_interp_extrap_1d_to_4d_quadratic() { + fn test_interp_extrap_1d_to_6d_quadratic() { let mut rng = rng_fixed_seed(); - for ndims in 1..4 { + for ndims in 1..=6 { println!("Testing in {ndims} dims"); // Interp grid let dims: Vec = vec![4; ndims]; @@ -687,7 +686,7 @@ mod test { // Check that interpolated and extrapolated values match expectation, // using an absolute difference because some points are very close to or exactly at zero, // and do not do well under a check on relative difference. - (0..uobs.len()).for_each(|i| assert!((out[i] - uobs[i]).abs() < 1e-10)); + (0..uobs.len()).for_each(|i| assert!((out[i] - uobs[i]).abs() < 3e-10)); } } diff --git a/src/multicubic/rectilinear_recursive.rs b/src/multicubic/rectilinear_recursive.rs deleted file mode 100644 index 79ac128..0000000 --- a/src/multicubic/rectilinear_recursive.rs +++ /dev/null @@ -1,731 +0,0 @@ -//! An arbitrary-dimensional multicubic interpolator / extrapolator on a rectilinear grid. -//! -//! ```rust -//! use interpn::multicubic::rectilinear; -//! -//! // Define a grid -//! let x = [1.0_f64, 2.0, 3.0, 4.0]; -//! let y = [0.0_f64, 1.0, 2.0, 3.0]; -//! -//! // Grid input for rectilinear method -//! let grids = &[&x[..], &y[..]]; -//! -//! // Values at grid points -//! let z = [2.0; 16]; -//! -//! // Points to interpolate/extrapolate -//! let xobs = [0.0_f64, 5.0]; -//! let yobs = [-1.0, 3.0]; -//! let obs = [&xobs[..], &yobs[..]]; -//! -//! // Storage for output -//! let mut out = [0.0; 2]; -//! -//! // Do interpolation, allocating for the output for convenience -//! let linearize_extrapolation = false; -//! rectilinear::interpn_alloc(grids, &z, linearize_extrapolation, &obs).unwrap(); -//! ``` -//! -//! References -//! * A. E. P. Veldman and K. Rinzema, “Playing with nonuniform grids”. -//! https://pure.rug.nl/ws/portalfiles/portal/3332271/1992JEngMathVeldman.pdf -use super::{Saturation, centered_difference_nonuniform, normalized_hermite_spline}; -use crate::index_arr; -use num_traits::Float; - -/// Evaluate multicubic interpolation on a regular grid in up to 8 dimensions. -/// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). -/// -/// This is a convenience function; best performance will be achieved by using the exact right -/// number for the MAXDIMS parameter, as this will slightly reduce compute and storage overhead, -/// and the underlying method can be extended to more than this function's limit of 8 dimensions. -/// The limit of 8 dimensions was chosen for no more specific reason than to reduce unit test times. -/// -/// While this method initializes the interpolator struct on every call, the overhead of doing this -/// is minimal even when using it to evaluate one observation point at a time. -pub fn interpn( - grids: &[&[T]], - vals: &[T], - linearize_extrapolation: bool, - obs: &[&[T]], - out: &mut [T], -) -> Result<(), &'static str> { - // Expanding out and using the specialized version for each size - // gives a substantial speedup for lower dimensionalities - // (4-5x speedup for 1-dim compared to using MAXDIMS=8) - let ndims = grids.len(); - match ndims { - 1 => MulticubicRectilinearRecursive::<'_, T, 1>::new(grids, vals, linearize_extrapolation)? - .interp(obs, out), - 2 => MulticubicRectilinearRecursive::<'_, T, 2>::new(grids, vals, linearize_extrapolation)? - .interp(obs, out), - 3 => MulticubicRectilinearRecursive::<'_, T, 3>::new(grids, vals, linearize_extrapolation)? - .interp(obs, out), - 4 => MulticubicRectilinearRecursive::<'_, T, 4>::new(grids, vals, linearize_extrapolation)? - .interp(obs, out), - 5 => MulticubicRectilinearRecursive::<'_, T, 5>::new(grids, vals, linearize_extrapolation)? - .interp(obs, out), - 6 => MulticubicRectilinearRecursive::<'_, T, 6>::new(grids, vals, linearize_extrapolation)? - .interp(obs, out), - 7 => MulticubicRectilinearRecursive::<'_, T, 7>::new(grids, vals, linearize_extrapolation)? - .interp(obs, out), - 8 => MulticubicRectilinearRecursive::<'_, T, 8>::new(grids, vals, linearize_extrapolation)? - .interp(obs, out), - _ => Err( - "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions.", - ), - }?; - - Ok(()) -} - -/// Evaluate interpolant, allocating a new Vec for the output. -/// -/// For best results, use the `interpn` function with preallocated output; -/// allocation has a significant performance cost, and should be used sparingly. -#[cfg(feature = "std")] -pub fn interpn_alloc( - grids: &[&[T]], - vals: &[T], - linearize_extrapolation: bool, - obs: &[&[T]], -) -> Result, &'static str> { - let mut out = vec![T::zero(); obs[0].len()]; - interpn(grids, vals, linearize_extrapolation, obs, &mut out)?; - Ok(out) -} - -// We can use the same rectilinear-grid method again -pub use crate::multilinear::rectilinear::check_bounds; - -/// An arbitrary-dimensional multicubic interpolator / extrapolator on a regular grid. -/// -/// On interior points, a hermite spline is used, with the derivative at each -/// grid point matched to a second-order central difference. This allows the -/// interpolant to reproduce a quadratic function exactly, and to approximate -/// others with minimal overshoot and wobble. -/// -/// At the grid boundary, a natural spline boundary condition is applied, -/// meaning the third derivative of the interpolant is constrainted to zero -/// at the last grid point, with the result that the interpolant is quadratic -/// on the last interval before the boundary. -/// -/// With "linearize_extrapolation" set, extrapolation is linear on the extrapolated -/// dimensions, holding the same derivative as the natural boundary condition produces -/// at the last grid point. Otherwise, the last grid cell's spline function is continued, -/// producing a quadratic extrapolation. -/// -/// This effectively gives a gradual decrease in the order of the interpolant -/// as the observation point approaches then leaves the grid: -/// -/// out out -/// ---|---|---|---|---|---|--- Grid -/// 2 2 3 3 3 2 2 Order of interpolant between grid points -/// 1 1 Extrapolation with linearize_extrapolation -/// -/// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). -/// -/// Operation Complexity -/// * O(4^ndims) for interpolation and extrapolation in all regions. -/// -/// Memory Complexity -/// * Peak stack usage is O(MAXDIMS), which is minimally O(ndims). -/// * While evaluation is recursive, the recursion has constant -/// max depth of MAXDIMS, which provides a guarantee on peak -/// memory usage. -/// -/// Timing -/// * Timing determinism very tight, but is not exact due to the -/// differences in calculations (but not complexity) between -/// interpolation and extrapolation. -/// * An interpolation-only variant of this algorithm could achieve -/// near-deterministic timing, but would produce incorrect results -/// when evaluated at off-grid points. -pub struct MulticubicRectilinearRecursive<'a, T: Float, const MAXDIMS: usize> { - /// x, y, ... coordinate grids, each entry of size dims[i] - grids: &'a [&'a [T]], - - /// Size of each dimension - dims: [usize; MAXDIMS], - - /// Values at each point, size prod(dims) - vals: &'a [T], - - /// Whether to extrapolate linearly instead of continuing spline - linearize_extrapolation: bool, -} - -impl<'a, T: Float, const MAXDIMS: usize> MulticubicRectilinearRecursive<'a, T, MAXDIMS> { - /// Build a new interpolator, using O(MAXDIMS) calculations and storage. - /// - /// This method does not handle degenerate dimensions; all grids must have at least 4 entries. - /// - /// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). - /// - /// # Errors - /// * If any input dimensions do not match - /// * If any dimensions have size < 4 - /// * If any step sizes have zero or negative magnitude - pub fn new( - grids: &'a [&'a [T]], - vals: &'a [T], - linearize_extrapolation: bool, - ) -> Result { - // Check dimensions - let ndims = grids.len(); - let mut dims = [1_usize; MAXDIMS]; - (0..ndims).for_each(|i| dims[i] = grids[i].len()); - let nvals: usize = dims[..ndims].iter().product(); - if !(vals.len() == nvals && ndims > 0 && ndims <= MAXDIMS) { - return Err("Dimension mismatch"); - }; - // Check if any grids are degenerate - let degenerate = dims[..ndims].iter().any(|&x| x < 4); - if degenerate { - return Err("All grids must have at least 4 entries"); - }; - // Check that at least the first two entries in each grid are monotonic - let monotonic_maybe = grids.iter().all(|&g| g[1] > g[0]); - if !monotonic_maybe { - return Err("All grids must be monotonically increasing"); - }; - - Ok(Self { - grids, - dims, - vals, - linearize_extrapolation, - }) - } - - /// Interpolate on a contiguous list of observation points. - /// - /// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). - /// - /// # Errors - /// * If the dimensionality of the point does not match the data - /// * If the dimensionality of point or data does not match the grid - pub fn interp(&self, x: &[&[T]], out: &mut [T]) -> Result<(), &'static str> { - let n = out.len(); - let ndims = self.grids.len(); - - // Make sure there are enough coordinate inputs for each dimension - if x.len() != ndims { - return Err("Dimension mismatch"); - } - - // Make sure the size of inputs and output match - let size_matches = x.iter().all(|&xx| xx.len() == out.len()); - if !size_matches { - return Err("Dimension mismatch"); - } - - let tmp = &mut [T::zero(); MAXDIMS][..ndims]; - for i in 0..n { - (0..ndims).for_each(|j| tmp[j] = x[j][i]); - out[i] = self.interp_one(tmp)?; - } - - Ok(()) - } - - /// Interpolate the value at a point, - /// using fixed-size intermediate storage of O(ndims) and no allocation. - /// - /// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). - /// - /// # Errors - /// * If the dimensionality of the point does not match the data - /// * If the dimensionality of either one exceeds the fixed maximum - /// * If the index along any dimension exceeds the maximum representable - /// integer value within the value type `T` - pub fn interp_one(&self, x: &[T]) -> Result { - // Check sizes - let ndims = self.grids.len(); - if !(x.len() == ndims && ndims <= MAXDIMS) { - return Err("Dimension mismatch"); - } - - // Initialize fixed-size intermediate storage. - // Maybe counterintuitively, initializing this storage here on every usage - // instead of once with the top level struct is a significant speedup - // and does not increase peak stack usage. - // - // Also notably, storing the index offsets as bool instead of usize - // reduces memory overhead, but has not effect on throughput rate. - let origin = &mut [0_usize; MAXDIMS][..ndims]; // Indices of lower corner of hypercub - let sat = &mut [Saturation::None; MAXDIMS][..ndims]; // Saturation none/high/low flags for each dim - let dimprod = &mut [1_usize; MAXDIMS][..ndims]; - - // Populate cumulative product of higher dimensions for indexing. - // - // Each entry is the cumulative product of the size of dimensions - // higher than this one, which is the stride between blocks - // relating to a given index along each dimension. - let mut acc = 1; - for i in 0..ndims { - dimprod[ndims - i - 1] = acc; - acc *= self.dims[ndims - i - 1]; - } - - // Populate lower corner and saturation flag for each dimension - for i in 0..ndims { - (origin[i], sat[i]) = self.get_loc(x[i], i)?; - } - - // Recursive interpolation of one dependency tree at a time - // let loc = &origin; // Starting location in the tree is the origin - let dim = ndims; // Start from the end and recurse back to zero - let loc = &mut [0_usize; MAXDIMS][..ndims]; - loc.copy_from_slice(origin); - let interped = self.populate(dim, sat, origin, loc, dimprod, x); - - Ok(interped) - } - - /// Get the two-lower index along this dimension where `x` is found, - /// saturating to the bounds at the edges if necessary. - /// - /// At the high bound of a given dimension, saturates to the fourth internal - /// point in order to capture a full 4-cube. - /// - /// Returned value like (lower_corner_index, saturation_flag). - #[inline] - fn get_loc(&self, v: T, dim: usize) -> Result<(usize, Saturation), &'static str> { - let saturation: Saturation; // What part of the grid cell are we in? - let grid = self.grids[dim]; - - // Bisection search to find location on the grid. - // - // The search will return `0` if the point is outside-low, - // and will return `self.dims[dim]` if outside-high. - // - // This process accounts for essentially the entire difference in - // performance between this method and the regular-grid method. - let iloc: isize = grid.partition_point(|x| *x < v) as isize - 2; - - let n = self.dims[dim] as isize; // Number of grid points on this dimension - let dimmax = n.saturating_sub(4).max(0); // maximum index for lower corner - let loc: usize = iloc.max(0).min(dimmax) as usize; // unsigned integer loc clipped to interior - - // Observation point is outside the grid on the low side - if iloc == -2 { - saturation = Saturation::OutsideLow; - } - // Observation point is in the lower part of the cell - // but not outside the grid - else if iloc == -1 { - saturation = Saturation::InsideLow; - } - // Observation point is in the upper part of the cell - // but not outside the grid - else if iloc == n - 2 { - saturation = Saturation::OutsideHigh; - } - // Observation point is in the upper part of the cell - // but not outside the grid - else if iloc == n - 3 { - saturation = Saturation::InsideHigh; - } - // Observation point is on the interior - else { - saturation = Saturation::None; - } - - Ok((loc, saturation)) - } - - /// Recursive evaluation of interpolant on each dimension - #[inline] - fn populate( - &self, - dim: usize, - sat: &[Saturation], - origin: &[usize], - loc: &mut [usize], - dimprod: &[usize], - x: &[T], - ) -> T { - // Do the calc for this entry - match dim { - // If we have arrived at a leaf, index into data - 0 => index_arr(loc, dimprod, self.vals), - - // Otherwise, continue recursion - _ => { - // Keep track of where we are in the tree - // so that we can index into the value array properly - // when we reach the leaves - let next_dim = dim - 1; - - // Populate next dim's values - let mut vals = [T::zero(); 4]; - for i in 0..4 { - loc[next_dim] = origin[next_dim] + i; - vals[i] = self.populate(next_dim, sat, origin, loc, dimprod, x); - } - loc[next_dim] = origin[next_dim]; // Reset for next usage - - // Interpolate on next dim's values to populate an entry in this dim - let grid_cell = &self.grids[next_dim][origin[next_dim]..origin[next_dim] + 4]; - interp_inner::( - vals, - grid_cell, - x[next_dim], - sat[next_dim], - self.linearize_extrapolation, - ) - } - } - } -} - -/// Calculate slopes and offsets & select evaluation method -#[inline] -fn interp_inner( - vals: [T; 4], - grid_cell: &[T], - x: T, - sat: Saturation, - linearize_extrapolation: bool, -) -> T { - // Construct some constants using generic methods - let one = T::one(); - let two = one + one; - - // For cases on the interior, use two slopes (from centered difference) and two values - // as the BCs. - // - // For locations falling near and edge, take one centered - // difference for the inside derivative, - // then for the derivative at the edge, impose a natural - // spline constraint, meaning the third derivative q'''(t) = 0 - // at the last grid point, which produces a quadratic in the - // last cell, reducing wobble that would be cause by enforcing - // the use of a cubic function where there is not enough information - // to support it. - - match sat { - Saturation::None => { - // |-> t - // --|---|---|---|-- - // x - // - // This is the nominal case - let y0 = vals[1]; - let dy = vals[2] - vals[1]; - - let h01 = grid_cell[1] - grid_cell[0]; - let h12 = grid_cell[2] - grid_cell[1]; - let h23 = grid_cell[3] - grid_cell[2]; - let k0 = centered_difference_nonuniform(vals[0], vals[1], vals[2], h01 / h12, T::one()); - let k1 = centered_difference_nonuniform(vals[1], vals[2], vals[3], T::one(), h23 / h12); - - let t = (x - grid_cell[1]) / h12; - - normalized_hermite_spline(t, y0, dy, k0, k1) - } - Saturation::InsideLow => { - // t <-| - // --|---|---|---|-- - // x - // - // Flip direction to maintain symmetry - // with the InsideHigh case. - - let y0 = vals[1]; // Same starting point, opposite direction - let dy = vals[0] - vals[1]; - - let h01 = grid_cell[1] - grid_cell[0]; - let h12 = grid_cell[2] - grid_cell[1]; - let k0 = - -centered_difference_nonuniform(vals[0], vals[1], vals[2], T::one(), h12 / h01); - - #[cfg(not(feature = "fma"))] - let k1 = two * dy - k0; // Natural spline boundary condition - #[cfg(feature = "fma")] - let k1 = two.mul_add(dy, -k0); // Natural spline boundary condition - - let t = -(x - grid_cell[1]) / h01; - - normalized_hermite_spline(t, y0, dy, k0, k1) - } - Saturation::OutsideLow => { - // t <-| - // --|---|---|---|-- - // x - // - // Flip direction to maintain symmetry - // with the InsideHigh case. - - let y0 = vals[1]; - let y1 = vals[0]; - let dy = vals[0] - vals[1]; - - let h01 = grid_cell[1] - grid_cell[0]; - let h12 = grid_cell[2] - grid_cell[1]; - let k0 = - -centered_difference_nonuniform(vals[0], vals[1], vals[2], T::one(), h12 / h01); - let k1 = two * dy - k0; // Natural spline boundary condition - - let t = -(x - grid_cell[1]) / h01; - - // If we are linearizing the interpolant under extrapolation, - // hold the last slope outside the grid - if linearize_extrapolation { - #[cfg(not(feature = "fma"))] - { - y1 + k1 * (t - one) - } - #[cfg(feature = "fma")] - { - k1.mul_add(t - one, y1) - } - } else { - normalized_hermite_spline(t, y0, dy, k0, k1) - } - } - Saturation::InsideHigh => { - // |-> t - // --|---|---|---|-- - // x - let y0 = vals[2]; - let dy = vals[3] - vals[2]; - - let h12 = grid_cell[2] - grid_cell[1]; - let h23 = grid_cell[3] - grid_cell[2]; - let k0 = centered_difference_nonuniform(vals[1], vals[2], vals[3], h12 / h23, T::one()); - - #[cfg(not(feature = "fma"))] - let k1 = two * dy - k0; // Natural spline boundary condition - #[cfg(feature = "fma")] - let k1 = two.mul_add(dy, -k0); // Natural spline boundary condition - - let t = (x - grid_cell[2]) / h23; - - normalized_hermite_spline(t, y0, dy, k0, k1) - } - Saturation::OutsideHigh => { - // |-> t - // --|---|---|---|-- - // x - let y0 = vals[2]; - let y1 = vals[3]; - let dy = vals[3] - vals[2]; - - let h12 = grid_cell[2] - grid_cell[1]; - let h23 = grid_cell[3] - grid_cell[2]; - let k0 = centered_difference_nonuniform(vals[1], vals[2], vals[3], h12 / h23, T::one()); - let k1 = two * dy - k0; // Natural spline boundary condition - - let t = (x - grid_cell[2]) / h23; - - // If we are linearizing the interpolant under extrapolation, - // hold the last slope outside the grid - if linearize_extrapolation { - #[cfg(not(feature = "fma"))] - { - y1 + k1 * (t - one) - } - #[cfg(feature = "fma")] - { - k1.mul_add(t - one, y1) - } - } else { - normalized_hermite_spline(t, y0, dy, k0, k1) - } - } - } -} - -#[cfg(test)] -mod test { - use super::interpn; - use crate::testing::*; - use crate::utils::*; - - /// Iterate from 1 to 8 dimensions, making a minimum-sized grid for each one - /// to traverse every combination of interpolating or extrapolating high or low on each dimension. - /// Each test evaluates at 5^ndims locations, largely extrapolated in corner regions, so it - /// rapidly becomes prohibitively slow in higher dimensions. - #[test] - fn test_interp_extrap_1d_to_6d_linear() { - let mut rng = rng_fixed_seed(); - - for ndims in 1..=6 { - println!("Testing in {ndims} dims"); - // Interp grid - let dims: Vec = vec![4; ndims]; - let xs: Vec> = (0..ndims) - .map(|i| { - // Make a linear grid and add noise - let mut x = linspace(-5.0 * (i as f64), 5.0 * ((i + 1) as f64), dims[i]); - let dx = randn::(&mut rng, x.len()); - (0..x.len()).for_each(|i| x[i] += (dx[i] - 0.5) / 10.0); - (0..x.len() - 1).for_each(|i| assert!(x[i + 1] > x[i])); - x - }) - .collect(); - - let grids: Vec<&[f64]> = xs.iter().map(|x| &x[..]).collect(); - let grid = meshgrid((0..ndims).map(|i| &xs[i]).collect()); - let u: Vec = grid.iter().map(|x| x.iter().sum()).collect(); // sum is linear in every direction, good for testing - - // Observation points - let xobs: Vec> = (0..ndims) - .map(|i| linspace(-7.0 * (i as f64), 7.0 * ((i + 1) as f64), dims[i] + 2)) - .collect(); - let gridobs = meshgrid((0..ndims).map(|i| &xobs[i]).collect()); - let gridobs_t: Vec> = (0..ndims) - .map(|i| gridobs.iter().map(|x| x[i]).collect()) - .collect(); // transpose - let xobsslice: Vec<&[f64]> = gridobs_t.iter().map(|x| &x[..]).collect(); - let uobs: Vec = gridobs.iter().map(|x| x.iter().sum()).collect(); // expected output at observation points - let mut out = vec![0.0; uobs.len()]; - - // Evaluate with linearized extrapolation - interpn(&grids, &u, true, &xobsslice, &mut out[..]).unwrap(); - - // Check that interpolated values match expectation, - // using an absolute difference because some points are very close to or exactly at zero, - // and do not do well under a check on relative difference. - (0..uobs.len()).for_each(|i| assert!((out[i] - uobs[i]).abs() < 1e-10)); - - // Evaluate and check without linearized extrapolation - interpn(&grids, &u, false, &xobsslice, &mut out[..]).unwrap(); - (0..uobs.len()).for_each(|i| assert!((out[i] - uobs[i]).abs() < 1e-10)); - } - } - - /// Under both interpolation and extrapolation, a hermite spline with natural boundary condition - /// can reproduce an N-dimensional quadratic function exactly - #[test] - fn test_interp_extrap_1d_to_6d_quadratic() { - let mut rng = rng_fixed_seed(); - - for ndims in 1..6 { - println!("Testing in {ndims} dims"); - // Interp grid - let dims: Vec = vec![4; ndims]; - let xs: Vec> = (0..ndims) - .map(|i| { - // Make a linear grid and add noise - let mut x = linspace(-5.0 * (i as f64), 5.0 * ((i + 1) as f64), dims[i]); - let dx = randn::(&mut rng, x.len()); - (0..x.len()).for_each(|i| x[i] += (dx[i] - 0.5) / 10.0); - (0..x.len() - 1).for_each(|i| assert!(x[i + 1] > x[i])); - x - }) - .collect(); - - let grids: Vec<&[f64]> = xs.iter().map(|x| &x[..]).collect(); - let grid = meshgrid((0..ndims).map(|i| &xs[i]).collect()); - let u: Vec = (0..grid.len()) - .map(|i| { - let mut v = 0.0; - for j in 0..ndims { - v += grid[i][j] * grid[i][j]; - } - v - }) - .collect(); // Quadratic in every directio - - // Observation points - let xobs: Vec> = (0..ndims) - .map(|i| linspace(-7.0 * (i as f64), 7.0 * ((i + 1) as f64), dims[i] + 2)) - .collect(); - let gridobs = meshgrid((0..ndims).map(|i| &xobs[i]).collect()); - let gridobs_t: Vec> = (0..ndims) - .map(|i| gridobs.iter().map(|x| x[i]).collect()) - .collect(); // transpose - let xobsslice: Vec<&[f64]> = gridobs_t.iter().map(|x| &x[..]).collect(); - let uobs: Vec = (0..gridobs.len()) - .map(|i| { - let mut v = 0.0; - for j in 0..ndims { - v += gridobs[i][j] * gridobs[i][j]; - } - v - }) - .collect(); // Quadratic in every direction - let mut out = vec![0.0; uobs.len()]; - - // Evaluate - interpn(&grids, &u, false, &xobsslice, &mut out[..]).unwrap(); - - // Check that interpolated and extrapolated values match expectation, - // using an absolute difference because some points are very close to or exactly at zero, - // and do not do well under a check on relative difference. - (0..uobs.len()).for_each(|i| assert!((out[i] - uobs[i]).abs() < 1e-10)); - } - } - - /// Under interpolation, a hermite spline with natural boundary condition - /// can reproduce an N-dimensional sine function fairly closely, but not exactly. - /// More points are required to capture a sine function, so fewer dimensions are tested - /// to keep test run times low. - #[test] - fn test_interp_1d_to_3d_sine() { - let mut rng = rng_fixed_seed(); - - for ndims in 1..3 { - println!("Testing in {ndims} dims"); - // Interp grid - let dims: Vec = vec![10; ndims]; - let xs: Vec> = (0..ndims) - .map(|i| { - // Make a linear grid and add noise - let mut x = linspace(-5.0 * (i as f64), 5.0 * ((i + 1) as f64), dims[i]); - let dx = randn::(&mut rng, x.len()); - (0..x.len()).for_each(|i| x[i] += (dx[i] - 0.5) / 10.0); - (0..x.len() - 1).for_each(|i| assert!(x[i + 1] > x[i])); - x - }) - .collect(); - - let grids: Vec<&[f64]> = xs.iter().map(|x| &x[..]).collect(); - let grid = meshgrid((0..ndims).map(|i| &xs[i]).collect()); - let u: Vec = (0..grid.len()) - .map(|i| { - let mut v = 0.0; - for j in 0..ndims { - v += (grid[i][j] * 6.28 / 10.0).sin(); - } - v - }) - .collect(); // Quadratic in every direction - - // Observation points - let xobs: Vec> = (0..ndims) - .map(|i| linspace(-5.0 * (i as f64), 5.0 * ((i + 1) as f64), dims[i] + 1)) - .collect(); - let gridobs = meshgrid((0..ndims).map(|i| &xobs[i]).collect()); - let gridobs_t: Vec> = (0..ndims) - .map(|i| gridobs.iter().map(|x| x[i]).collect()) - .collect(); // transpose - let xobsslice: Vec<&[f64]> = gridobs_t.iter().map(|x| &x[..]).collect(); - let uobs: Vec = (0..gridobs.len()) - .map(|i| { - let mut v = 0.0; - for j in 0..ndims { - v += (gridobs[i][j] * 6.28 / 10.0).sin(); - } - v - }) - .collect(); // Quadratic in every direction - let mut out = vec![0.0; uobs.len()]; - - // Evaluate - interpn(&grids, &u, false, &xobsslice, &mut out[..]).unwrap(); - - // Use a tolerance that increases with the number of dimensions, since - // we are effectively summing ndims times the error from each dimension - let tol = 2e-2 * f64::from(ndims as u32); - - (0..uobs.len()).for_each(|i| { - let err = out[i] - uobs[i]; - assert!(err.abs() < tol); - }); - } - } -} diff --git a/src/multicubic/regular.rs b/src/multicubic/regular.rs index 78d0c10..c976175 100644 --- a/src/multicubic/regular.rs +++ b/src/multicubic/regular.rs @@ -30,7 +30,7 @@ //! let linearize_extrapolation = false; //! regular::interpn_alloc(&dims, &starts, &steps, &z, linearize_extrapolation, &obs).unwrap(); //! ``` -use super::{MulticubicRegularRecursive, Saturation, normalized_hermite_spline}; +use super::{Saturation, normalized_hermite_spline}; use crate::index_arr_fixed_dims; use crunchy::unroll; use num_traits::{Float, NumCast}; @@ -40,7 +40,7 @@ use num_traits::{Float, NumCast}; /// /// For 1-4 dimensions with `deep-unroll` enabled (1-3 by default), a fast flattened method is used. /// For higher dimensions, where that flattening becomes impractical due to compile times and -/// instruction size, evaluation defers to a bounded recursion. +/// instruction size, evaluation defers to a run-time loop. /// /// This is a convenience function; best performance will be achieved by using the exact right /// number for the N parameter, as this will slightly reduce compute and storage overhead, @@ -87,64 +87,46 @@ pub fn interpn( linearize_extrapolation, )? .interp(obs.try_into().unwrap(), out), - 4 => { - // 4D cubic interpolation requires deep-unroll - #[cfg(not(feature = "deep-unroll"))] - { - MulticubicRegularRecursive::<'_, T, 4>::new( - dims, - starts, - steps, - vals, - linearize_extrapolation, - )? - .interp(obs, out) - } - - #[cfg(feature = "deep-unroll")] - { - MulticubicRegular::<'_, T, 4>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - linearize_extrapolation, - )? - .interp(obs.try_into().unwrap(), out) - } - } - 5 => MulticubicRegularRecursive::<'_, T, 5>::new( - dims, - starts, - steps, + 4 => MulticubicRegular::<'_, T, 4>::new( + dims.try_into().unwrap(), + starts.try_into().unwrap(), + steps.try_into().unwrap(), vals, linearize_extrapolation, )? - .interp(obs, out), - 6 => MulticubicRegularRecursive::<'_, T, 6>::new( - dims, - starts, - steps, + .interp(obs.try_into().unwrap(), out), + 5 => MulticubicRegular::<'_, T, 5>::new( + dims.try_into().unwrap(), + starts.try_into().unwrap(), + steps.try_into().unwrap(), + vals, + linearize_extrapolation, + )? + .interp(obs.try_into().unwrap(), out), + 6 => MulticubicRegular::<'_, T, 6>::new( + dims.try_into().unwrap(), + starts.try_into().unwrap(), + steps.try_into().unwrap(), vals, linearize_extrapolation, )? - .interp(obs, out), - 7 => MulticubicRegularRecursive::<'_, T, 7>::new( - dims, - starts, - steps, + .interp(obs.try_into().unwrap(), out), + 7 => MulticubicRegular::<'_, T, 7>::new( + dims.try_into().unwrap(), + starts.try_into().unwrap(), + steps.try_into().unwrap(), vals, linearize_extrapolation, )? - .interp(obs, out), - 8 => MulticubicRegularRecursive::<'_, T, 8>::new( - dims, - starts, - steps, + .interp(obs.try_into().unwrap(), out), + 8 => MulticubicRegular::<'_, T, 8>::new( + dims.try_into().unwrap(), + starts.try_into().unwrap(), + steps.try_into().unwrap(), vals, linearize_extrapolation, )? - .interp(obs, out), + .interp(obs.try_into().unwrap(), out), _ => Err( "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions.", ), @@ -260,18 +242,6 @@ impl<'a, T: Float, const N: usize> MulticubicRegular<'a, T, N> { linearize_extrapolation: bool, ) -> Result { // Check dimensions - const { - #[cfg(not(feature = "deep-unroll"))] - assert!( - N > 0 && N < 4, - "Flattened method defined for 1-3 dimensions by default (1-4 with `deep-unroll` feature). For higher dimensions, use recursive method." - ); - #[cfg(feature = "deep-unroll")] - assert!( - N > 0 && N < 5, - "Flattened method defined for 1-4 dimensions with `deep-unroll`. For higher dimensions, use recursive method." - ); - } let nvals: usize = dims.iter().product(); if !(starts.len() == N && steps.len() == N && vals.len() == nvals && N > 0) { return Err("Dimension mismatch"); @@ -400,8 +370,8 @@ impl<'a, T: Float, const N: usize> MulticubicRegular<'a, T, N> { let offset: usize = ($i & (3 << (2 * k))) >> (2 * k); loc[k] = origin[k] + offset; } - const STORE_IND: usize = $i % FP; - store[0][STORE_IND] = index_arr_fixed_dims(loc, dimprod, self.vals); + let store_ind: usize = $i % FP; + store[0][store_ind] = index_arr_fixed_dims(loc, dimprod, self.vals); } else { // const branch // For other nodes, interpolate on child values @@ -427,14 +397,27 @@ impl<'a, T: Float, const N: usize> MulticubicRegular<'a, T, N> { } #[cfg(not(feature = "deep-unroll"))] - unroll! { - for i < 64 in 0..nverts { // const loop + if N <= 3 { + unroll! { + for i < 64 in 0..nverts { // const loop + unroll_vertices_body!(i); + } + } + } else { + for i in 0..nverts { unroll_vertices_body!(i); } } + #[cfg(feature = "deep-unroll")] - unroll! { - for i < 256 in 0..nverts { // const loop + if N <= 4 { + unroll! { + for i < 256 in 0..nverts { // const loop + unroll_vertices_body!(i); + } + } + } else { + for i in 0..nverts { unroll_vertices_body!(i); } } @@ -706,8 +689,8 @@ mod test { /// Under both interpolation and extrapolation, a hermite spline with natural boundary condition /// can reproduce an N-dimensional quadratic function exactly #[test] - fn test_interp_extrap_1d_to_4d_quadratic() { - for ndims in 1..=4 { + fn test_interp_extrap_1d_to_6d_quadratic() { + for ndims in 1..=6 { println!("Testing in {ndims} dims"); // Interp grid let dims: Vec = vec![4; ndims]; diff --git a/src/multicubic/regular_recursive.rs b/src/multicubic/regular_recursive.rs deleted file mode 100644 index 332ffdb..0000000 --- a/src/multicubic/regular_recursive.rs +++ /dev/null @@ -1,780 +0,0 @@ -//! An arbitrary-dimensional multicubic interpolator / extrapolator on a regular grid. -//! -//! ```rust -//! use interpn::multicubic::regular; -//! -//! // Define a grid -//! let x = [1.0_f64, 2.0, 3.0, 4.0]; -//! let y = [0.0_f64, 1.0, 2.0, 3.0]; -//! -//! // Grid input for rectilinear method -//! let grids = &[&x[..], &y[..]]; -//! -//! // Grid input for regular grid method -//! let dims = [x.len(), y.len()]; -//! let starts = [x[0], y[0]]; -//! let steps = [x[1] - x[0], y[1] - y[0]]; -//! -//! // Values at grid points -//! let z = [2.0; 16]; -//! -//! // Observation points to interpolate/extrapolate -//! let xobs = [0.0_f64, 5.0]; -//! let yobs = [-1.0, 3.0]; -//! let obs = [&xobs[..], &yobs[..]]; -//! -//! // Storage for output -//! let mut out = [0.0; 2]; -//! -//! // Do interpolation, allocating for the output for convenience -//! let linearize_extrapolation = false; -//! regular::interpn_alloc(&dims, &starts, &steps, &z, linearize_extrapolation, &obs).unwrap(); -//! ``` -use super::{Saturation, normalized_hermite_spline}; -use crate::index_arr; -use num_traits::{Float, NumCast}; - -/// Evaluate multicubic interpolation on a regular grid in up to 8 dimensions. -/// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). -/// -/// This is a convenience function; best performance will be achieved by using the exact right -/// number for the MAXDIMS parameter, as this will slightly reduce compute and storage overhead, -/// and the underlying method can be extended to more than this function's limit of 8 dimensions. -/// The limit of 8 dimensions was chosen for no more specific reason than to reduce unit test times. -/// -/// While this method initializes the interpolator struct on every call, the overhead of doing this -/// is minimal even when using it to evaluate one observation point at a time. -pub fn interpn( - dims: &[usize], - starts: &[T], - steps: &[T], - vals: &[T], - linearize_extrapolation: bool, - obs: &[&[T]], - out: &mut [T], -) -> Result<(), &'static str> { - // Expanding out and using the specialized version for each size - // gives a substantial speedup for lower dimensionalities - // (4-5x speedup for 1-dim compared to using MAXDIMS=8) - let ndims = dims.len(); - match ndims { - 1 => MulticubicRegularRecursive::<'_, T, 1>::new( - dims, - starts, - steps, - vals, - linearize_extrapolation, - )? - .interp(obs, out), - 2 => MulticubicRegularRecursive::<'_, T, 2>::new( - dims, - starts, - steps, - vals, - linearize_extrapolation, - )? - .interp(obs, out), - 3 => MulticubicRegularRecursive::<'_, T, 3>::new( - dims, - starts, - steps, - vals, - linearize_extrapolation, - )? - .interp(obs, out), - 4 => MulticubicRegularRecursive::<'_, T, 4>::new( - dims, - starts, - steps, - vals, - linearize_extrapolation, - )? - .interp(obs, out), - 5 => MulticubicRegularRecursive::<'_, T, 5>::new( - dims, - starts, - steps, - vals, - linearize_extrapolation, - )? - .interp(obs, out), - 6 => MulticubicRegularRecursive::<'_, T, 6>::new( - dims, - starts, - steps, - vals, - linearize_extrapolation, - )? - .interp(obs, out), - 7 => MulticubicRegularRecursive::<'_, T, 7>::new( - dims, - starts, - steps, - vals, - linearize_extrapolation, - )? - .interp(obs, out), - 8 => MulticubicRegularRecursive::<'_, T, 8>::new( - dims, - starts, - steps, - vals, - linearize_extrapolation, - )? - .interp(obs, out), - _ => Err( - "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions.", - ), - }?; - - Ok(()) -} - -/// Evaluate interpolant, allocating a new Vec for the output. -/// -/// For best results, use the `interpn` function with preallocated output; -/// allocation has a significant performance cost, and should be used sparingly. -#[cfg(feature = "std")] -pub fn interpn_alloc( - dims: &[usize], - starts: &[T], - steps: &[T], - vals: &[T], - linearize_extrapolation: bool, - obs: &[&[T]], -) -> Result, &'static str> { - let mut out = vec![T::zero(); obs[0].len()]; - interpn( - dims, - starts, - steps, - vals, - linearize_extrapolation, - obs, - &mut out, - )?; - Ok(out) -} - -// We can use the same regular-grid method again -pub use crate::multilinear::regular::check_bounds; - -/// An arbitrary-dimensional multicubic interpolator / extrapolator on a regular grid. -/// -/// On interior points, a hermite spline is used, with the derivative at each -/// grid point matched to a second-order central difference. This allows the -/// interpolant to reproduce a quadratic function exactly, and to approximate -/// others with minimal overshoot and wobble. -/// -/// At the grid boundary, a natural spline boundary condition is applied, -/// meaning the third derivative of the interpolant is constrainted to zero -/// at the last grid point, with the result that the interpolant is quadratic -/// on the last interval before the boundary. -/// -/// With "linearize_extrapolation" set, extrapolation is linear on the extrapolated -/// dimensions, holding the same derivative as the natural boundary condition produces -/// at the last grid point. Otherwise, the last grid cell's spline function is continued, -/// producing a quadratic extrapolation. -/// -/// This effectively gives a gradual decrease in the order of the interpolant -/// as the observation point approaches then leaves the grid: -/// -/// out out -/// ---|---|---|---|---|---|--- Grid -/// 2 2 3 3 3 2 2 Order of interpolant between grid points -/// 1 1 Extrapolation with linearize_extrapolation -/// -/// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). -/// -/// Operation Complexity -/// * O(4^ndims) for interpolation and extrapolation in all regions. -/// -/// Memory Complexity -/// * Peak stack usage is O(MAXDIMS), which is minimally O(ndims). -/// * While evaluation is recursive, the recursion has constant -/// max depth of MAXDIMS, which provides a guarantee on peak -/// memory usage. -/// -/// Timing -/// * Timing determinism very tight, but is not exact due to the -/// differences in calculations (but not complexity) between -/// interpolation and extrapolation. -/// * An interpolation-only variant of this algorithm could achieve -/// near-deterministic timing, but would produce incorrect results -/// when evaluated at off-grid points. -pub struct MulticubicRegularRecursive<'a, T: Float, const MAXDIMS: usize> { - /// Number of dimensions - ndims: usize, - - /// Size of each dimension - dims: [usize; MAXDIMS], - - /// Starting point of each dimension, size dims.len() - starts: [T; MAXDIMS], - - /// Step size for each dimension, size dims.len() - steps: [T; MAXDIMS], - - /// Values at each point, size prod(dims) - vals: &'a [T], - - /// Whether to extrapolate linearly instead of continuing spline - linearize_extrapolation: bool, -} - -impl<'a, T: Float, const MAXDIMS: usize> MulticubicRegularRecursive<'a, T, MAXDIMS> { - /// Build a new interpolator, using O(MAXDIMS) calculations and storage. - /// - /// This method does not handle degenerate dimensions; all grids must have at least 4 entries. - /// - /// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). - /// - /// # Errors - /// * If any input dimensions do not match - /// * If any dimensions have size < 4 - /// * If any step sizes have zero or negative magnitude - pub fn new( - dims: &[usize], - starts: &[T], - steps: &[T], - vals: &'a [T], - linearize_extrapolation: bool, - ) -> Result { - // Check dimensions - let ndims = dims.len(); - let nvals: usize = dims.iter().product(); - if !(starts.len() == ndims && steps.len() == ndims && vals.len() == nvals && ndims > 0) { - return Err("Dimension mismatch"); - } - - // Make sure all dimensions have at least four entries - let degenerate = dims[..ndims].iter().any(|&x| x < 4); - if degenerate { - return Err("All grids must have at least four entries"); - } - // Check if any dimensions have zero or negative step size - let steps_are_positive = steps.iter().all(|&x| x > T::zero()); - if !steps_are_positive { - return Err("All grids must be monotonically increasing"); - } - - // Copy grid info into struct. - // Keeping this data local to the struct eliminates an issue where, - // if the grid info is defined somewhere not local to where the struct - // is defined and used, the & references to it cost more than the entire - // rest of the interpolation operation. - let mut steps_local = [T::zero(); MAXDIMS]; - let mut starts_local = [T::zero(); MAXDIMS]; - let mut dims_local = [0_usize; MAXDIMS]; - steps_local[..ndims].copy_from_slice(&steps[..ndims]); - starts_local[..ndims].copy_from_slice(&starts[..ndims]); - dims_local[..ndims].copy_from_slice(&dims[..ndims]); - - Ok(Self { - ndims, - dims: dims_local, - starts: starts_local, - steps: steps_local, - vals, - linearize_extrapolation, - }) - } - - /// Interpolate on a contiguous list of observation points. - /// - /// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). - /// - /// # Errors - /// * If the dimensionality of the point does not match the data - /// * If the dimensionality of point or data does not match the grid - pub fn interp(&self, x: &[&[T]], out: &mut [T]) -> Result<(), &'static str> { - let n = out.len(); - let ndims = self.ndims; - // Make sure there are enough coordinate inputs for each dimension - if x.len() != ndims { - return Err("Dimension mismatch"); - } - // Make sure the size of inputs and output match - let size_matches = x.iter().all(|&xx| xx.len() == out.len()); - if !size_matches { - return Err("Dimension mismatch"); - } - - let tmp = &mut [T::zero(); MAXDIMS][..ndims]; - for i in 0..n { - (0..ndims).for_each(|j| tmp[j] = x[j][i]); - out[i] = self.interp_one(tmp)?; - } - - Ok(()) - } - - /// Interpolate the value at a point, - /// using fixed-size intermediate storage of O(ndims) and no allocation. - /// - /// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). - /// - /// # Errors - /// * If the dimensionality of the point does not match the data - /// * If the dimensionality of either one exceeds the fixed maximum - /// * If the index along any dimension exceeds the maximum representable - /// integer value within the value type `T` - pub fn interp_one(&self, x: &[T]) -> Result { - // Check sizes - let ndims = self.ndims; - if !(x.len() == ndims && ndims <= MAXDIMS) { - return Err("Dimension mismatch"); - } - - // Initialize fixed-size intermediate storage. - // Maybe counterintuitively, initializing this storage here on every usage - // instead of once with the top level struct is a significant speedup - // and does not increase peak stack usage. - // - // Also notably, storing the index offsets as bool instead of usize - // reduces memory overhead, but has not effect on throughput rate. - let origin = &mut [0_usize; MAXDIMS][..ndims]; // Indices of lower corner of hypercube - let sat = &mut [Saturation::None; MAXDIMS][..ndims]; // Saturation none/high/low flags for each dim - let dts = &mut [T::zero(); MAXDIMS][..ndims]; // Normalized coordinate storage - let dimprod = &mut [1_usize; MAXDIMS][..ndims]; - - // Populate cumulative product of higher dimensions for indexing. - // - // Each entry is the cumulative product of the size of dimensions - // higher than this one, which is the stride between blocks - // relating to a given index along each dimension. - let mut acc = 1; - for i in 0..ndims { - dimprod[ndims - i - 1] = acc; - acc *= self.dims[ndims - i - 1]; - } - - // Populate lower corner and saturation flag for each dimension - for i in 0..ndims { - (origin[i], sat[i]) = self.get_loc(x[i], i)?; - } - - // Calculate normalized delta locations - // For the cubic method, the normalized coordinate `t` is always relative - // to cube index 1 (out of 0-3) - for i in 0..ndims { - let index_one_loc = self.starts[i] - + self.steps[i] - * ::from(origin[i] + 1) - .ok_or("Unrepresentable coordinate value")?; - dts[i] = (x[i] - index_one_loc) / self.steps[i]; - } - - // Recursive interpolation of one dependency tree at a time - // let loc = &origin; // Starting location in the tree is the origin - let dim = ndims; // Start from the end and recurse back to zero - let loc = &mut [0_usize; MAXDIMS][..ndims]; - loc.copy_from_slice(origin); - let interped = self.populate(dim, sat, origin, loc, dimprod, dts); - - Ok(interped) - } - - /// Get the two-lower index along this dimension where `x` is found, - /// saturating to the bounds at the edges if necessary. - /// - /// At the high bound of a given dimension, saturates to the fourth internal - /// point in order to capture a full 4-cube. - /// - /// Returned value like (lower_corner_index, saturation_flag). - #[inline] - fn get_loc(&self, v: T, dim: usize) -> Result<(usize, Saturation), &'static str> { - let saturation: Saturation; // What part of the grid cell are we in? - - let floc = ((v - self.starts[dim]) / self.steps[dim]).floor(); // float loc - // Signed integer loc, with the bottom of the cell aligned to place the normalized - // coordinate t=0 at cell index 1 - let iloc = ::from(floc).ok_or("Unrepresentable coordinate value")? - 1; - - let n = self.dims[dim] as isize; // Number of grid points on this dimension - let dimmax = n.saturating_sub(4).max(0); // maximum index for lower corner - let loc: usize = iloc.max(0).min(dimmax) as usize; // unsigned integer loc clipped to interior - - // Observation point is outside the grid on the low side - if iloc < -1 { - saturation = Saturation::OutsideLow; - } - // Observation point is in the lower part of the cell - // but not outside the grid - else if iloc == -1 { - saturation = Saturation::InsideLow; - } - // Observation point is in the upper part of the cell - // but not outside the grid - else if iloc > (n - 3) { - saturation = Saturation::OutsideHigh; - } - // Observation point is in the upper part of the cell - // but not outside the grid - else if iloc == (n - 3) { - saturation = Saturation::InsideHigh; - } - // Observation point is on the interior - else { - saturation = Saturation::None; - } - - Ok((loc, saturation)) - } - - /// Recursive evaluation of interpolant on each dimension - #[inline] - fn populate( - &self, - dim: usize, - sat: &[Saturation], - origin: &[usize], - loc: &mut [usize], - dimprod: &[usize], - dts: &[T], - ) -> T { - // Do the calc for this entry - match dim { - // If we have arrived at a leaf, index into data - 0 => index_arr(loc, dimprod, self.vals), - - // Otherwise, continue recursion - _ => { - // Keep track of where we are in the tree - // so that we can index into the value array properly - // when we reach the leaves - let next_dim = dim - 1; - - // Populate next dim's values - let mut vals = [T::zero(); 4]; - for i in 0..4 { - loc[next_dim] = origin[next_dim] + i; - vals[i] = self.populate(next_dim, sat, origin, loc, dimprod, dts); - } - loc[next_dim] = origin[next_dim]; // Reset for next usage - - // Interpolate on next dim's values to populate an entry in this dim - interp_inner::( - vals, - dts[next_dim], - sat[next_dim], - self.linearize_extrapolation, - ) - } - } - } -} - -/// Calculate slopes and offsets & select evaluation method -#[inline] -fn interp_inner(vals: [T; 4], t: T, sat: Saturation, linearize_extrapolation: bool) -> T { - // Construct some constants using generic methods - let one = T::one(); - let two = one + one; - - // For cases on the interior, use two slopes (from centered difference) and two values - // as the BCs. - // - // For locations falling near and edge, take one centered - // difference for the inside derivative, - // then for the derivative at the edge, impose a natural - // spline constraint, meaning the third derivative q'''(t) = 0 - // at the last grid point, which produces a quadratic in the - // last cell, reducing wobble that would be cause by enforcing - // the use of a cubic function where there is not enough information - // to support it. - - match sat { - Saturation::None => { - // |-> t - // --|---|---|---|-- - // x - // - // This is the nominal case - let y0 = vals[1]; - let dy = vals[2] - vals[1]; - - // Take slopes from centered difference - let k0 = (vals[2] - vals[0]) / two; - let k1 = (vals[3] - vals[1]) / two; - - normalized_hermite_spline(t, y0, dy, k0, k1) - } - Saturation::InsideLow => { - // t <-| - // --|---|---|---|-- - // x - // - // Flip direction to maintain symmetry - // with the InsideHigh case - let t = -t; // `t` always w.r.t. index 1 of cube - let y0 = vals[1]; // Same starting point, opposite direction - let dy = vals[0] - vals[1]; - - let k0 = -(vals[2] - vals[0]) / two; - - #[cfg(not(feature = "fma"))] - let k1 = two * dy - k0; // Natural spline boundary condition - #[cfg(feature = "fma")] - let k1 = two.mul_add(dy, -k0); // Natural spline boundary condition - - normalized_hermite_spline(t, y0, dy, k0, k1) - } - Saturation::OutsideLow => { - // t <-| - // --|---|---|---|-- - // x - // - // Flip direction to maintain symmetry - // with the InsideHigh case - let t = -t; // `t` always w.r.t. index 1 of cube - let y0 = vals[1]; // Same starting point, opposite direction - let y1 = vals[0]; - let dy = vals[0] - vals[1]; - - let k0 = -(vals[2] - vals[0]) / two; - let k1 = two * dy - k0; // Natural spline boundary condition - - // If we are linearizing the interpolant under extrapolation, - // hold the last slope outside the grid - if linearize_extrapolation { - #[cfg(not(feature = "fma"))] - { - y1 + k1 * (t - one) - } - #[cfg(feature = "fma")] - { - k1.mul_add(t - one, y1) - } - } else { - normalized_hermite_spline(t, y0, dy, k0, k1) - } - } - Saturation::InsideHigh => { - // |-> t - // --|---|---|---|-- - // x - // - // Shift cell up an index - // and offset `t`, which has value between 1 and 2 - // because it is calculated w.r.t. index 1 - let t = t - one; - let y0 = vals[2]; - let dy = vals[3] - vals[2]; - - let k0 = (vals[3] - vals[1]) / two; - - #[cfg(not(feature = "fma"))] - let k1 = two * dy - k0; // Natural spline boundary condition - #[cfg(feature = "fma")] - let k1 = two.mul_add(dy, -k0); // Natural spline boundary condition - - normalized_hermite_spline(t, y0, dy, k0, k1) - } - Saturation::OutsideHigh => { - // |-> t - // --|---|---|---|-- - // x - // - // Shift cell up an index - // and offset `t`, which has value between 1 and 2 - // because it is calculated w.r.t. index 1 - let t = t - one; - let y0 = vals[2]; - let y1 = vals[3]; - let dy = vals[3] - vals[2]; - - let k0 = (vals[3] - vals[1]) / two; - - #[cfg(not(feature = "fma"))] - let k1 = two * dy - k0; // Natural spline boundary condition - #[cfg(feature = "fma")] - let k1 = two.mul_add(dy, -k0); // Natural spline boundary condition - - // If we are linearizing the interpolant under extrapolation, - // hold the last slope outside the grid - if linearize_extrapolation { - #[cfg(not(feature = "fma"))] - { - y1 + k1 * (t - one) - } - #[cfg(feature = "fma")] - { - k1.mul_add(t - one, y1) - } - } else { - normalized_hermite_spline(t, y0, dy, k0, k1) - } - } - } -} - -#[cfg(test)] -mod test { - use super::interpn; - use crate::utils::*; - - /// Iterate from 1 to 6 dimensions, making a minimum-sized grid for each one - /// to traverse every combination of interpolating or extrapolating high or low on each dimension. - /// Each test evaluates at 5^ndims locations, largely extrapolated in corner regions, so it - /// rapidly becomes prohibitively slow above ndims=6. - #[test] - fn test_interp_extrap_1d_to_6d_linear() { - for ndims in 1..6 { - println!("Testing in {ndims} dims"); - // Interp grid - let dims: Vec = vec![4; ndims]; - let xs: Vec> = (0..ndims) - .map(|i| linspace(-5.0 * (i as f64), 5.0 * ((i + 1) as f64), dims[i])) - .collect(); - let grid = meshgrid((0..ndims).map(|i| &xs[i]).collect()); - let u: Vec = grid.iter().map(|x| x.iter().sum()).collect(); // sum is linear in every direction, good for testing - let starts: Vec = xs.iter().map(|x| x[0]).collect(); - let steps: Vec = xs.iter().map(|x| x[1] - x[0]).collect(); - - // Observation points - let xobs: Vec> = (0..ndims) - .map(|i| linspace(-7.0 * (i as f64), 7.0 * ((i + 1) as f64), dims[i] + 2)) - .collect(); - let gridobs = meshgrid((0..ndims).map(|i| &xobs[i]).collect()); - let gridobs_t: Vec> = (0..ndims) - .map(|i| gridobs.iter().map(|x| x[i]).collect()) - .collect(); // transpose - let xobsslice: Vec<&[f64]> = gridobs_t.iter().map(|x| &x[..]).collect(); - let uobs: Vec = gridobs.iter().map(|x| x.iter().sum()).collect(); // expected output at observation points - let mut out = vec![0.0; uobs.len()]; - - // Evaluate with spline extrapolation, which should collapse to linear - interpn(&dims, &starts, &steps, &u, false, &xobsslice, &mut out[..]).unwrap(); - - // Check that interpolated values match expectation, - // using an absolute difference because some points are very close to or exactly at zero, - // and do not do well under a check on relative difference. - (0..uobs.len()).for_each(|i| assert!((out[i] - uobs[i]).abs() < 1e-12)); - - // Evaluate with linear extrapolation - interpn(&dims, &starts, &steps, &u, true, &xobsslice, &mut out[..]).unwrap(); - - // Check that interpolated values match expectation, - // using an absolute difference because some points are very close to or exactly at zero, - // and do not do well under a check on relative difference. - (0..uobs.len()).for_each(|i| assert!((out[i] - uobs[i]).abs() < 1e-12)); - } - } - - /// Under both interpolation and extrapolation, a hermite spline with natural boundary condition - /// can reproduce an N-dimensional quadratic function exactly - #[test] - fn test_interp_extrap_1d_to_6d_quadratic() { - for ndims in 1..6 { - println!("Testing in {ndims} dims"); - // Interp grid - let dims: Vec = vec![4; ndims]; - let xs: Vec> = (0..ndims) - .map(|i| linspace(-5.0 * (i as f64), 5.0 * ((i + 1) as f64), dims[i])) - .collect(); - let grid = meshgrid((0..ndims).map(|i| &xs[i]).collect()); - let u: Vec = (0..grid.len()) - .map(|i| { - let mut v = 0.0; - for j in 0..ndims { - v += grid[i][j] * grid[i][j]; - } - v - }) - .collect(); // Quadratic in every direction - let starts: Vec = xs.iter().map(|x| x[0]).collect(); - let steps: Vec = xs.iter().map(|x| x[1] - x[0]).collect(); - - // Observation points - let xobs: Vec> = (0..ndims) - .map(|i| linspace(-7.0 * (i as f64), 7.0 * ((i + 1) as f64), dims[i] + 2)) - .collect(); - let gridobs = meshgrid((0..ndims).map(|i| &xobs[i]).collect()); - let gridobs_t: Vec> = (0..ndims) - .map(|i| gridobs.iter().map(|x| x[i]).collect()) - .collect(); // transpose - let xobsslice: Vec<&[f64]> = gridobs_t.iter().map(|x| &x[..]).collect(); - let uobs: Vec = (0..gridobs.len()) - .map(|i| { - let mut v = 0.0; - for j in 0..ndims { - v += gridobs[i][j] * gridobs[i][j]; - } - v - }) - .collect(); // Quadratic in every direction - let mut out = vec![0.0; uobs.len()]; - - // Evaluate - interpn(&dims, &starts, &steps, &u, false, &xobsslice, &mut out[..]).unwrap(); - - // Check that interpolated and extrapolated values match expectation, - // using an absolute difference because some points are very close to or exactly at zero, - // and do not do well under a check on relative difference. - (0..uobs.len()).for_each(|i| assert!((out[i] - uobs[i]).abs() < 1e-10)); - } - } - - /// Under interpolation, a hermite spline with natural boundary condition - /// can reproduce an N-dimensional sine function fairly closely, but not exactly. - /// More points are required to capture a sine function, so fewer dimensions are tested - /// to keep test run times low. - #[test] - fn test_interp_1d_to_3d_sine() { - for ndims in 1..3 { - println!("Testing in {ndims} dims"); - // Interp grid - let dims: Vec = vec![10; ndims]; - let xs: Vec> = (0..ndims) - .map(|i| linspace(-5.0 * (i as f64), 5.0 * ((i + 1) as f64), dims[i])) - .collect(); - let grid = meshgrid((0..ndims).map(|i| &xs[i]).collect()); - let u: Vec = (0..grid.len()) - .map(|i| { - let mut v = 0.0; - for j in 0..ndims { - v += (grid[i][j] * 6.28 / 10.0).sin(); - } - v - }) - .collect(); // Quadratic in every direction - let starts: Vec = xs.iter().map(|x| x[0]).collect(); - let steps: Vec = xs.iter().map(|x| x[1] - x[0]).collect(); - - // Observation points - let xobs: Vec> = (0..ndims) - .map(|i| linspace(-5.0 * (i as f64), 5.0 * ((i + 1) as f64), dims[i] + 2)) - .collect(); - let gridobs = meshgrid((0..ndims).map(|i| &xobs[i]).collect()); - let gridobs_t: Vec> = (0..ndims) - .map(|i| gridobs.iter().map(|x| x[i]).collect()) - .collect(); // transpose - let xobsslice: Vec<&[f64]> = gridobs_t.iter().map(|x| &x[..]).collect(); - let uobs: Vec = (0..gridobs.len()) - .map(|i| { - let mut v = 0.0; - for j in 0..ndims { - v += (gridobs[i][j] * 6.28 / 10.0).sin(); - } - v - }) - .collect(); // Quadratic in every direction - let mut out = vec![0.0; uobs.len()]; - - // Evaluate - interpn(&dims, &starts, &steps, &u, false, &xobsslice, &mut out[..]).unwrap(); - - // Check that interpolated and extrapolated values match expectation, - // using an absolute difference because some points are very close to or exactly at zero, - // and do not do well under a check on relative difference. - let tol = 2e-2 * f64::from(ndims as u32); - - (0..uobs.len()).for_each(|i| { - let err = out[i] - uobs[i]; - // println!("{err}"); - assert!(err.abs() < tol); - }); - } - } -} From 6c7fb17434cffd4be1e50590e7e5b190d8a2ce5f Mon Sep 17 00:00:00 2001 From: James Logan Date: Mon, 12 Jan 2026 00:23:04 -0500 Subject: [PATCH 03/16] update perf plots --- docs/1d_quality_of_fit_Rectilinear.html | 2 +- docs/1d_quality_of_fit_Rectilinear.svg | 2 +- docs/1d_quality_of_fit_Regular.html | 2 +- docs/1d_quality_of_fit_Regular.svg | 2 +- docs/2d_quality_of_fit_Rectilinear.html | 2 +- docs/2d_quality_of_fit_Rectilinear.svg | 2 +- docs/2d_quality_of_fit_Regular.html | 2 +- docs/2d_quality_of_fit_Regular.svg | 2 +- docs/3d_throughput_vs_nobs_cubic.html | 2 +- docs/3d_throughput_vs_nobs_cubic.svg | 2 +- docs/3d_throughput_vs_nobs_linear.html | 2 +- docs/3d_throughput_vs_nobs_linear.svg | 2 +- docs/3d_throughput_vs_nobs_prealloc_cubic.html | 2 +- docs/3d_throughput_vs_nobs_prealloc_cubic.svg | 2 +- docs/3d_throughput_vs_nobs_prealloc_linear.html | 2 +- docs/3d_throughput_vs_nobs_prealloc_linear.svg | 2 +- docs/4d_throughput_vs_nobs_cubic.html | 2 +- docs/4d_throughput_vs_nobs_cubic.svg | 2 +- docs/4d_throughput_vs_nobs_linear.html | 2 +- docs/4d_throughput_vs_nobs_linear.svg | 2 +- docs/4d_throughput_vs_nobs_prealloc_cubic.html | 2 +- docs/4d_throughput_vs_nobs_prealloc_cubic.svg | 2 +- docs/4d_throughput_vs_nobs_prealloc_linear.html | 2 +- docs/4d_throughput_vs_nobs_prealloc_linear.svg | 2 +- docs/nearest_quality_of_fit.html | 2 +- docs/nearest_quality_of_fit.svg | 2 +- docs/speedup_vs_dims_1000_obs_cubic.html | 2 +- docs/speedup_vs_dims_1000_obs_cubic.svg | 2 +- docs/speedup_vs_dims_1000_obs_linear.html | 2 +- docs/speedup_vs_dims_1000_obs_linear.svg | 2 +- docs/speedup_vs_dims_1_obs_cubic.html | 2 +- docs/speedup_vs_dims_1_obs_cubic.svg | 2 +- docs/speedup_vs_dims_1_obs_linear.html | 2 +- docs/speedup_vs_dims_1_obs_linear.svg | 2 +- docs/speedup_vs_threads_10000000_obs.html | 2 +- docs/throughput_vs_dims_1000_obs.html | 2 +- docs/throughput_vs_dims_1000_obs.svg | 2 +- docs/throughput_vs_dims_1_obs.html | 2 +- docs/throughput_vs_dims_1_obs.svg | 2 +- 39 files changed, 39 insertions(+), 39 deletions(-) diff --git a/docs/1d_quality_of_fit_Rectilinear.html b/docs/1d_quality_of_fit_Rectilinear.html index b5392f5..c53f0e2 100644 --- a/docs/1d_quality_of_fit_Rectilinear.html +++ b/docs/1d_quality_of_fit_Rectilinear.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/1d_quality_of_fit_Rectilinear.svg b/docs/1d_quality_of_fit_Rectilinear.svg index 25f0bf9..353ffaa 100644 --- a/docs/1d_quality_of_fit_Rectilinear.svg +++ b/docs/1d_quality_of_fit_Rectilinear.svg @@ -1 +1 @@ -05−1−0.500.51−1−0.500.51−202−200μ−100μ0−202−0.200.2−202−2−1.5−1−0.50DataInterpNScipyInterpN ErrorScipy ErrorComparison — InterpN vs. Scipy w/ Cubic InterpolantRectilinear Gridxxxf(x)ErrorQuadraticSineStepError, QuadraticError, SineError, Step \ No newline at end of file +05−1−0.500.51−1−0.500.51−202−200μ−100μ0−202−0.200.2−202−2−1.5−1−0.50DataInterpNScipyInterpN ErrorScipy ErrorComparison — InterpN vs. Scipy w/ Cubic InterpolantRectilinear Gridxxxf(x)ErrorQuadraticSineStepError, QuadraticError, SineError, Step \ No newline at end of file diff --git a/docs/1d_quality_of_fit_Regular.html b/docs/1d_quality_of_fit_Regular.html index 096796f..ecc5521 100644 --- a/docs/1d_quality_of_fit_Regular.html +++ b/docs/1d_quality_of_fit_Regular.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/1d_quality_of_fit_Regular.svg b/docs/1d_quality_of_fit_Regular.svg index 475657f..6b189de 100644 --- a/docs/1d_quality_of_fit_Regular.svg +++ b/docs/1d_quality_of_fit_Regular.svg @@ -1 +1 @@ -05−1−0.500.51012−202−10f−5f0−202−0.200.2−202012DataInterpNScipyInterpN ErrorScipy ErrorComparison — InterpN vs. Scipy w/ Cubic InterpolantRegular Gridxxxf(x)ErrorQuadraticSineStepError, QuadraticError, SineError, Step \ No newline at end of file +05−1−0.500.51012−202−10f−5f0−202−0.200.2−202012DataInterpNScipyInterpN ErrorScipy ErrorComparison — InterpN vs. Scipy w/ Cubic InterpolantRegular Gridxxxf(x)ErrorQuadraticSineStepError, QuadraticError, SineError, Step \ No newline at end of file diff --git a/docs/2d_quality_of_fit_Rectilinear.html b/docs/2d_quality_of_fit_Rectilinear.html index b7e5fe4..bda719b 100644 --- a/docs/2d_quality_of_fit_Rectilinear.html +++ b/docs/2d_quality_of_fit_Rectilinear.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/2d_quality_of_fit_Rectilinear.svg b/docs/2d_quality_of_fit_Rectilinear.svg index 0b7ec17..3bd71d3 100644 --- a/docs/2d_quality_of_fit_Rectilinear.svg +++ b/docs/2d_quality_of_fit_Rectilinear.svg @@ -1 +1 @@ -Sampled data1020304050−0.00200.002Quadratic Test Function w/ Cubic InterpolantRectilinear GridTruthInterpNScipyError, InterpNError, Scipy \ No newline at end of file +Sampled data1020304050−0.00200.002Quadratic Test Function w/ Cubic InterpolantRectilinear GridTruthInterpNScipyError, InterpNError, Scipy \ No newline at end of file diff --git a/docs/2d_quality_of_fit_Regular.html b/docs/2d_quality_of_fit_Regular.html index 891a69a..2ece335 100644 --- a/docs/2d_quality_of_fit_Regular.html +++ b/docs/2d_quality_of_fit_Regular.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/2d_quality_of_fit_Regular.svg b/docs/2d_quality_of_fit_Regular.svg index b92b02b..f567745 100644 --- a/docs/2d_quality_of_fit_Regular.svg +++ b/docs/2d_quality_of_fit_Regular.svg @@ -1 +1 @@ -Sampled data1020304050−500f0500fQuadratic Test Function w/ Cubic InterpolantRegular GridTruthInterpNScipyError, InterpNError, Scipy \ No newline at end of file +Sampled data1020304050−500f0500fQuadratic Test Function w/ Cubic InterpolantRegular GridTruthInterpNScipyError, InterpNError, Scipy \ No newline at end of file diff --git a/docs/3d_throughput_vs_nobs_cubic.html b/docs/3d_throughput_vs_nobs_cubic.html index b2bb5b2..067584d 100644 --- a/docs/3d_throughput_vs_nobs_cubic.html +++ b/docs/3d_throughput_vs_nobs_cubic.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/3d_throughput_vs_nobs_cubic.svg b/docs/3d_throughput_vs_nobs_cubic.svg index 25ba095..1719dea 100644 --- a/docs/3d_throughput_vs_nobs_cubic.svg +++ b/docs/3d_throughput_vs_nobs_cubic.svg @@ -1 +1 @@ -12510251002510002510k2468ScipyInterpNInterpolation on 20x20x20 GridWithout Preallocated Output, CubicNumber of Observation PointsNormalized Throughput \ No newline at end of file +12510251002510002510k2468ScipyInterpNInterpolation on 20x20x20 GridWithout Preallocated Output, CubicNumber of Observation PointsNormalized Throughput \ No newline at end of file diff --git a/docs/3d_throughput_vs_nobs_linear.html b/docs/3d_throughput_vs_nobs_linear.html index 388f324..87f0447 100644 --- a/docs/3d_throughput_vs_nobs_linear.html +++ b/docs/3d_throughput_vs_nobs_linear.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/3d_throughput_vs_nobs_linear.svg b/docs/3d_throughput_vs_nobs_linear.svg index bc4a521..5c5a34b 100644 --- a/docs/3d_throughput_vs_nobs_linear.svg +++ b/docs/3d_throughput_vs_nobs_linear.svg @@ -1 +1 @@ -12510251002510002510k051015202530ScipyInterpNInterpolation on 20x20x20 GridWithout Preallocated Output, LinearNumber of Observation PointsNormalized Throughput \ No newline at end of file +12510251002510002510k0510152025ScipyInterpNInterpolation on 20x20x20 GridWithout Preallocated Output, LinearNumber of Observation PointsNormalized Throughput \ No newline at end of file diff --git a/docs/3d_throughput_vs_nobs_prealloc_cubic.html b/docs/3d_throughput_vs_nobs_prealloc_cubic.html index 29e0750..6b690c9 100644 --- a/docs/3d_throughput_vs_nobs_prealloc_cubic.html +++ b/docs/3d_throughput_vs_nobs_prealloc_cubic.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/3d_throughput_vs_nobs_prealloc_cubic.svg b/docs/3d_throughput_vs_nobs_prealloc_cubic.svg index 1c7fb58..55a0c6c 100644 --- a/docs/3d_throughput_vs_nobs_prealloc_cubic.svg +++ b/docs/3d_throughput_vs_nobs_prealloc_cubic.svg @@ -1 +1 @@ -12510251002510002510k05101520ScipyInterpNInterpolation on 20x20x20 GridWith Preallocated Output, CubicNumber of Observation PointsNormalized Throughput \ No newline at end of file +12510251002510002510k51015ScipyInterpNInterpolation on 20x20x20 GridWith Preallocated Output, CubicNumber of Observation PointsNormalized Throughput \ No newline at end of file diff --git a/docs/3d_throughput_vs_nobs_prealloc_linear.html b/docs/3d_throughput_vs_nobs_prealloc_linear.html index 005952c..70b0af3 100644 --- a/docs/3d_throughput_vs_nobs_prealloc_linear.html +++ b/docs/3d_throughput_vs_nobs_prealloc_linear.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/3d_throughput_vs_nobs_prealloc_linear.svg b/docs/3d_throughput_vs_nobs_prealloc_linear.svg index 7f2e854..4a3b2d7 100644 --- a/docs/3d_throughput_vs_nobs_prealloc_linear.svg +++ b/docs/3d_throughput_vs_nobs_prealloc_linear.svg @@ -1 +1 @@ -12510251002510002510k0204060ScipyInterpNInterpolation on 20x20x20 GridWith Preallocated Output, LinearNumber of Observation PointsNormalized Throughput \ No newline at end of file +12510251002510002510k0102030405060ScipyInterpNInterpolation on 20x20x20 GridWith Preallocated Output, LinearNumber of Observation PointsNormalized Throughput \ No newline at end of file diff --git a/docs/4d_throughput_vs_nobs_cubic.html b/docs/4d_throughput_vs_nobs_cubic.html index 5820a75..2951473 100644 --- a/docs/4d_throughput_vs_nobs_cubic.html +++ b/docs/4d_throughput_vs_nobs_cubic.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/4d_throughput_vs_nobs_cubic.svg b/docs/4d_throughput_vs_nobs_cubic.svg index 6c0f69c..4583390 100644 --- a/docs/4d_throughput_vs_nobs_cubic.svg +++ b/docs/4d_throughput_vs_nobs_cubic.svg @@ -1 +1 @@ -12510251002510002510k2468ScipyInterpNInterpolation on 20x...x20 4D GridWithout Preallocated Output, CubicNumber of Observation PointsNormalized Throughput \ No newline at end of file +12510251002510002510k2468ScipyInterpNInterpolation on 20x...x20 4D GridWithout Preallocated Output, CubicNumber of Observation PointsNormalized Throughput \ No newline at end of file diff --git a/docs/4d_throughput_vs_nobs_linear.html b/docs/4d_throughput_vs_nobs_linear.html index 1215987..225eaa6 100644 --- a/docs/4d_throughput_vs_nobs_linear.html +++ b/docs/4d_throughput_vs_nobs_linear.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/4d_throughput_vs_nobs_linear.svg b/docs/4d_throughput_vs_nobs_linear.svg index d81b614..9d695fd 100644 --- a/docs/4d_throughput_vs_nobs_linear.svg +++ b/docs/4d_throughput_vs_nobs_linear.svg @@ -1 +1 @@ -12510251002510002510k010203040ScipyInterpNInterpolation on 20x...x20 4D GridWithout Preallocated Output, LinearNumber of Observation PointsNormalized Throughput \ No newline at end of file +12510251002510002510k010203040ScipyInterpNInterpolation on 20x...x20 4D GridWithout Preallocated Output, LinearNumber of Observation PointsNormalized Throughput \ No newline at end of file diff --git a/docs/4d_throughput_vs_nobs_prealloc_cubic.html b/docs/4d_throughput_vs_nobs_prealloc_cubic.html index 3a7af65..1dc7176 100644 --- a/docs/4d_throughput_vs_nobs_prealloc_cubic.html +++ b/docs/4d_throughput_vs_nobs_prealloc_cubic.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/4d_throughput_vs_nobs_prealloc_cubic.svg b/docs/4d_throughput_vs_nobs_prealloc_cubic.svg index 2568752..c5e7976 100644 --- a/docs/4d_throughput_vs_nobs_prealloc_cubic.svg +++ b/docs/4d_throughput_vs_nobs_prealloc_cubic.svg @@ -1 +1 @@ -12510251002510002510k51015ScipyInterpNInterpolation on 20x...x20 4D GridWith Preallocated Output, CubicNumber of Observation PointsNormalized Throughput \ No newline at end of file +12510251002510002510k510ScipyInterpNInterpolation on 20x...x20 4D GridWith Preallocated Output, CubicNumber of Observation PointsNormalized Throughput \ No newline at end of file diff --git a/docs/4d_throughput_vs_nobs_prealloc_linear.html b/docs/4d_throughput_vs_nobs_prealloc_linear.html index cfec02b..b2fe08d 100644 --- a/docs/4d_throughput_vs_nobs_prealloc_linear.html +++ b/docs/4d_throughput_vs_nobs_prealloc_linear.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/4d_throughput_vs_nobs_prealloc_linear.svg b/docs/4d_throughput_vs_nobs_prealloc_linear.svg index d171e91..a6c1e72 100644 --- a/docs/4d_throughput_vs_nobs_prealloc_linear.svg +++ b/docs/4d_throughput_vs_nobs_prealloc_linear.svg @@ -1 +1 @@ -12510251002510002510k020406080ScipyInterpNInterpolation on 20x...x20 4D GridWith Preallocated Output, LinearNumber of Observation PointsNormalized Throughput \ No newline at end of file +12510251002510002510k020406080ScipyInterpNInterpolation on 20x...x20 4D GridWith Preallocated Output, LinearNumber of Observation PointsNormalized Throughput \ No newline at end of file diff --git a/docs/nearest_quality_of_fit.html b/docs/nearest_quality_of_fit.html index 96c98d1..fd2b84a 100644 --- a/docs/nearest_quality_of_fit.html +++ b/docs/nearest_quality_of_fit.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/nearest_quality_of_fit.svg b/docs/nearest_quality_of_fit.svg index eec8f87..e2676b3 100644 --- a/docs/nearest_quality_of_fit.svg +++ b/docs/nearest_quality_of_fit.svg @@ -1 +1 @@ -Grid samples−1012−101Nearest-Neighbor Quality of Fit — InterpN vs. Scipy griddata (nearest)TruthInterpNScipyError: InterpNError: ScipyScipy - InterpN \ No newline at end of file +Grid samples−1012−101Nearest-Neighbor Quality of Fit — InterpN vs. Scipy griddata (nearest)TruthInterpNScipyError: InterpNError: ScipyScipy - InterpN \ No newline at end of file diff --git a/docs/speedup_vs_dims_1000_obs_cubic.html b/docs/speedup_vs_dims_1000_obs_cubic.html index f1762c8..1f7e9f4 100644 --- a/docs/speedup_vs_dims_1000_obs_cubic.html +++ b/docs/speedup_vs_dims_1000_obs_cubic.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/speedup_vs_dims_1000_obs_cubic.svg b/docs/speedup_vs_dims_1000_obs_cubic.svg index 9d5a269..d58085d 100644 --- a/docs/speedup_vs_dims_1000_obs_cubic.svg +++ b/docs/speedup_vs_dims_1000_obs_cubic.svg @@ -1 +1 @@ -12345624681012InterpN Speedup vs. ScipyCubic, 1000 Observation PointsNumber of DimensionsSpeedup vs. Scipy \ No newline at end of file +123456024681012InterpN Speedup vs. ScipyCubic, 1000 Observation PointsNumber of DimensionsSpeedup vs. Scipy \ No newline at end of file diff --git a/docs/speedup_vs_dims_1000_obs_linear.html b/docs/speedup_vs_dims_1000_obs_linear.html index 9a83b47..48ade75 100644 --- a/docs/speedup_vs_dims_1000_obs_linear.html +++ b/docs/speedup_vs_dims_1000_obs_linear.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/speedup_vs_dims_1000_obs_linear.svg b/docs/speedup_vs_dims_1000_obs_linear.svg index b806e7e..652c19f 100644 --- a/docs/speedup_vs_dims_1000_obs_linear.svg +++ b/docs/speedup_vs_dims_1000_obs_linear.svg @@ -1 +1 @@ -1234562468101214InterpN Speedup vs. ScipyLinear, 1000 Observation PointsNumber of DimensionsSpeedup vs. Scipy \ No newline at end of file +1234562468101214InterpN Speedup vs. ScipyLinear, 1000 Observation PointsNumber of DimensionsSpeedup vs. Scipy \ No newline at end of file diff --git a/docs/speedup_vs_dims_1_obs_cubic.html b/docs/speedup_vs_dims_1_obs_cubic.html index 6976bf7..86019ad 100644 --- a/docs/speedup_vs_dims_1_obs_cubic.html +++ b/docs/speedup_vs_dims_1_obs_cubic.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/speedup_vs_dims_1_obs_cubic.svg b/docs/speedup_vs_dims_1_obs_cubic.svg index e6af1cd..26369f7 100644 --- a/docs/speedup_vs_dims_1_obs_cubic.svg +++ b/docs/speedup_vs_dims_1_obs_cubic.svg @@ -1 +1 @@ -12345651015InterpN Speedup vs. ScipyCubic, 1 Observation PointNumber of DimensionsSpeedup vs. Scipy \ No newline at end of file +12345605101520InterpN Speedup vs. ScipyCubic, 1 Observation PointNumber of DimensionsSpeedup vs. Scipy \ No newline at end of file diff --git a/docs/speedup_vs_dims_1_obs_linear.html b/docs/speedup_vs_dims_1_obs_linear.html index f72ae9d..50a5f3c 100644 --- a/docs/speedup_vs_dims_1_obs_linear.html +++ b/docs/speedup_vs_dims_1_obs_linear.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/speedup_vs_dims_1_obs_linear.svg b/docs/speedup_vs_dims_1_obs_linear.svg index f413929..27d705d 100644 --- a/docs/speedup_vs_dims_1_obs_linear.svg +++ b/docs/speedup_vs_dims_1_obs_linear.svg @@ -1 +1 @@ -123456050100150200250InterpN Speedup vs. ScipyLinear, 1 Observation PointNumber of DimensionsSpeedup vs. Scipy \ No newline at end of file +123456050100150200250InterpN Speedup vs. ScipyLinear, 1 Observation PointNumber of DimensionsSpeedup vs. Scipy \ No newline at end of file diff --git a/docs/speedup_vs_threads_10000000_obs.html b/docs/speedup_vs_threads_10000000_obs.html index 1c55871..d7ba2bb 100644 --- a/docs/speedup_vs_threads_10000000_obs.html +++ b/docs/speedup_vs_threads_10000000_obs.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/throughput_vs_dims_1000_obs.html b/docs/throughput_vs_dims_1000_obs.html index b0c0854..165877c 100644 --- a/docs/throughput_vs_dims_1000_obs.html +++ b/docs/throughput_vs_dims_1000_obs.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/throughput_vs_dims_1000_obs.svg b/docs/throughput_vs_dims_1000_obs.svg index e7c43f1..e16fd26 100644 --- a/docs/throughput_vs_dims_1000_obs.svg +++ b/docs/throughput_vs_dims_1000_obs.svg @@ -1 +1 @@ -246250.01250.12512460.0010.010.11LinearScipy RegularGridInterpolator LinearInterpN MultilinearRegularInterpN MultilinearRectilinearInterpN NearestRegularInterpN NearestRectilinearNumpy InterpCubicScipy RegularGridInterpolator CubicInterpN MulticubicRegularInterpN MulticubicRectilinearScipy RectBivariateSpline CubicInterpolation on 4x...x4 N-Dimensional Grid1000 Observation PointsNumber of DimensionsNormalized ThroughputLinearCubic \ No newline at end of file +246250.01250.1251246100μ0.0010.010.11LinearScipy RegularGridInterpolator LinearInterpN MultilinearRegularInterpN MultilinearRectilinearInterpN NearestRegularInterpN NearestRectilinearNumpy InterpCubicScipy RegularGridInterpolator CubicInterpN MulticubicRegularInterpN MulticubicRectilinearScipy RectBivariateSpline CubicInterpolation on 4x...x4 N-Dimensional Grid1000 Observation PointsNumber of DimensionsNormalized ThroughputLinearCubic \ No newline at end of file diff --git a/docs/throughput_vs_dims_1_obs.html b/docs/throughput_vs_dims_1_obs.html index 4541cd9..4d286a9 100644 --- a/docs/throughput_vs_dims_1_obs.html +++ b/docs/throughput_vs_dims_1_obs.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/throughput_vs_dims_1_obs.svg b/docs/throughput_vs_dims_1_obs.svg index f970e92..2a2c8f6 100644 --- a/docs/throughput_vs_dims_1_obs.svg +++ b/docs/throughput_vs_dims_1_obs.svg @@ -1 +1 @@ -2460.001250.01250.125122460.01250.1251LinearScipy RegularGridInterpolator LinearInterpN MultilinearRegularInterpN MultilinearRectilinearInterpN NearestRegularInterpN NearestRectilinearNumpy InterpCubicScipy RegularGridInterpolator CubicInterpN MulticubicRegularInterpN MulticubicRectilinearScipy RectBivariateSpline CubicInterpolation on 4x...x4 N-Dimensional Grid1 Observation PointNumber of DimensionsNormalized ThroughputLinearCubic \ No newline at end of file +2460.001250.01250.125122460.01250.1251LinearScipy RegularGridInterpolator LinearInterpN MultilinearRegularInterpN MultilinearRectilinearInterpN NearestRegularInterpN NearestRectilinearNumpy InterpCubicScipy RegularGridInterpolator CubicInterpN MulticubicRegularInterpN MulticubicRectilinearScipy RectBivariateSpline CubicInterpolation on 4x...x4 N-Dimensional Grid1 Observation PointNumber of DimensionsNormalized ThroughputLinearCubic \ No newline at end of file From d1b521526f0ffe2edc3355fc0d93d57f106195e7 Mon Sep 17 00:00:00 2001 From: James Logan Date: Mon, 12 Jan 2026 19:42:24 -0500 Subject: [PATCH 04/16] better bounds checks --- src/multicubic/rectilinear.rs | 8 ++------ src/multicubic/regular.rs | 8 ++------ src/multilinear/rectilinear.rs | 8 ++------ src/multilinear/regular.rs | 7 ++----- src/nearest/rectilinear.rs | 13 ++----------- src/nearest/regular.rs | 7 ++----- 6 files changed, 12 insertions(+), 39 deletions(-) diff --git a/src/multicubic/rectilinear.rs b/src/multicubic/rectilinear.rs index f625eb4..371cc4b 100644 --- a/src/multicubic/rectilinear.rs +++ b/src/multicubic/rectilinear.rs @@ -240,13 +240,9 @@ impl<'a, T: Float, const N: usize> MulticubicRectilinear<'a, T, N> { /// * If the dimensionality of the point does not match the data /// * If the dimensionality of point or data does not match the grid pub fn interp(&self, x: &[&[T]; N], out: &mut [T]) -> Result<(), &'static str> { - let n = out.len(); - // Make sure the size of inputs and output match - let size_matches = x.iter().all(|&xx| xx.len() == out.len()); - if !size_matches { - return Err("Dimension mismatch"); - } + let n = out.len(); + for i in 0..N { if x[i].len() != n { return Err("Dimension mismatch") } } let mut tmp = [T::zero(); N]; for i in 0..n { diff --git a/src/multicubic/regular.rs b/src/multicubic/regular.rs index c976175..a88a29c 100644 --- a/src/multicubic/regular.rs +++ b/src/multicubic/regular.rs @@ -287,13 +287,9 @@ impl<'a, T: Float, const N: usize> MulticubicRegular<'a, T, N> { /// * If the dimensionality of the point does not match the data /// * If the dimensionality of point or data does not match the grid pub fn interp(&self, x: &[&[T]; N], out: &mut [T]) -> Result<(), &'static str> { - let n = out.len(); - // Make sure the size of inputs and output match - let size_matches = x.iter().all(|&xx| xx.len() == out.len()); - if !size_matches { - return Err("Dimension mismatch"); - } + let n = out.len(); + for i in 0..N { if x[i].len() != n { return Err("Dimension mismatch") } } let mut tmp = [T::zero(); N]; for i in 0..n { diff --git a/src/multilinear/rectilinear.rs b/src/multilinear/rectilinear.rs index 5bee9ba..53939e5 100644 --- a/src/multilinear/rectilinear.rs +++ b/src/multilinear/rectilinear.rs @@ -203,13 +203,9 @@ impl<'a, T: Float, const N: usize> MultilinearRectilinear<'a, T, N> { /// * If the dimensionality of the point does not match the data /// * If the dimensionality of point or data does not match the grid pub fn interp(&self, x: &[&[T]; N], out: &mut [T]) -> Result<(), &'static str> { - let n = out.len(); - // Make sure the size of inputs and output match - let size_matches = x.iter().all(|&xx| xx.len() == out.len()); - if !size_matches { - return Err("Dimension mismatch"); - } + let n = out.len(); + for i in 0..N { if x[i].len() != n { return Err("Dimension mismatch") } } let mut tmp = [T::zero(); N]; for i in 0..n { diff --git a/src/multilinear/regular.rs b/src/multilinear/regular.rs index 538d67d..a18e6c4 100644 --- a/src/multilinear/regular.rs +++ b/src/multilinear/regular.rs @@ -269,12 +269,9 @@ impl<'a, T: Float, const N: usize> MultilinearRegular<'a, T, N> { /// * If the dimensionality of the point does not match the data /// * If the dimensionality of point or data does not match the grid pub fn interp(&self, x: &[&[T]; N], out: &mut [T]) -> Result<(), &'static str> { - let n = out.len(); // Make sure the size of inputs and output match - let size_matches = x.iter().all(|&xx| xx.len() == out.len()); - if !size_matches { - return Err("Dimension mismatch"); - } + let n = out.len(); + for i in 0..N { if x[i].len() != n { return Err("Dimension mismatch") } } let mut tmp = [T::zero(); N]; for i in 0..n { diff --git a/src/nearest/rectilinear.rs b/src/nearest/rectilinear.rs index d7c9c44..b72fa32 100644 --- a/src/nearest/rectilinear.rs +++ b/src/nearest/rectilinear.rs @@ -156,18 +156,9 @@ impl<'a, T: Float, const N: usize> NearestRectilinear<'a, T, N> { /// * If the dimensionality of the point does not match the data /// * If the dimensionality of point or data does not match the grid pub fn interp(&self, x: &[&[T]; N], out: &mut [T]) -> Result<(), &'static str> { - let n = out.len(); - - // Make sure there are enough coordinate inputs for each dimension - if x.len() != N { - return Err("Dimension mismatch"); - } - // Make sure the size of inputs and output match - let size_matches = x.iter().all(|&xx| xx.len() == out.len()); - if !size_matches { - return Err("Dimension mismatch"); - } + let n = out.len(); + for i in 0..N { if x[i].len() != n { return Err("Dimension mismatch") } } let mut tmp = [T::zero(); N]; for i in 0..n { diff --git a/src/nearest/regular.rs b/src/nearest/regular.rs index d0a8b61..9c79b38 100644 --- a/src/nearest/regular.rs +++ b/src/nearest/regular.rs @@ -203,12 +203,9 @@ impl<'a, T: Float, const N: usize> NearestRegular<'a, T, N> { /// * If the dimensionality of the point does not match the data /// * If the dimensionality of point or data does not match the grid pub fn interp(&self, x: &[&[T]; N], out: &mut [T]) -> Result<(), &'static str> { - let n = out.len(); // Make sure the size of inputs and output match - let size_matches = x.iter().all(|&xx| xx.len() == out.len()); - if !size_matches { - return Err("Dimension mismatch"); - } + let n = out.len(); + for i in 0..N { if x[i].len() != n { return Err("Dimension mismatch") } } let mut tmp = [T::zero(); N]; for i in 0..n { From 3f9523864ee35855df5c385b080e26355d3240f5 Mon Sep 17 00:00:00 2001 From: James Logan Date: Mon, 12 Jan 2026 19:56:13 -0500 Subject: [PATCH 05/16] format --- src/multicubic/rectilinear.rs | 6 +++++- src/multicubic/regular.rs | 6 +++++- src/multilinear/rectilinear.rs | 6 +++++- src/multilinear/regular.rs | 6 +++++- src/nearest/rectilinear.rs | 6 +++++- src/nearest/regular.rs | 6 +++++- 6 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/multicubic/rectilinear.rs b/src/multicubic/rectilinear.rs index 371cc4b..6a69eb9 100644 --- a/src/multicubic/rectilinear.rs +++ b/src/multicubic/rectilinear.rs @@ -242,7 +242,11 @@ impl<'a, T: Float, const N: usize> MulticubicRectilinear<'a, T, N> { pub fn interp(&self, x: &[&[T]; N], out: &mut [T]) -> Result<(), &'static str> { // Make sure the size of inputs and output match let n = out.len(); - for i in 0..N { if x[i].len() != n { return Err("Dimension mismatch") } } + for i in 0..N { + if x[i].len() != n { + return Err("Dimension mismatch"); + } + } let mut tmp = [T::zero(); N]; for i in 0..n { diff --git a/src/multicubic/regular.rs b/src/multicubic/regular.rs index a88a29c..1ce30e9 100644 --- a/src/multicubic/regular.rs +++ b/src/multicubic/regular.rs @@ -289,7 +289,11 @@ impl<'a, T: Float, const N: usize> MulticubicRegular<'a, T, N> { pub fn interp(&self, x: &[&[T]; N], out: &mut [T]) -> Result<(), &'static str> { // Make sure the size of inputs and output match let n = out.len(); - for i in 0..N { if x[i].len() != n { return Err("Dimension mismatch") } } + for i in 0..N { + if x[i].len() != n { + return Err("Dimension mismatch"); + } + } let mut tmp = [T::zero(); N]; for i in 0..n { diff --git a/src/multilinear/rectilinear.rs b/src/multilinear/rectilinear.rs index 53939e5..cbf4fca 100644 --- a/src/multilinear/rectilinear.rs +++ b/src/multilinear/rectilinear.rs @@ -205,7 +205,11 @@ impl<'a, T: Float, const N: usize> MultilinearRectilinear<'a, T, N> { pub fn interp(&self, x: &[&[T]; N], out: &mut [T]) -> Result<(), &'static str> { // Make sure the size of inputs and output match let n = out.len(); - for i in 0..N { if x[i].len() != n { return Err("Dimension mismatch") } } + for i in 0..N { + if x[i].len() != n { + return Err("Dimension mismatch"); + } + } let mut tmp = [T::zero(); N]; for i in 0..n { diff --git a/src/multilinear/regular.rs b/src/multilinear/regular.rs index a18e6c4..66ff4c4 100644 --- a/src/multilinear/regular.rs +++ b/src/multilinear/regular.rs @@ -271,7 +271,11 @@ impl<'a, T: Float, const N: usize> MultilinearRegular<'a, T, N> { pub fn interp(&self, x: &[&[T]; N], out: &mut [T]) -> Result<(), &'static str> { // Make sure the size of inputs and output match let n = out.len(); - for i in 0..N { if x[i].len() != n { return Err("Dimension mismatch") } } + for i in 0..N { + if x[i].len() != n { + return Err("Dimension mismatch"); + } + } let mut tmp = [T::zero(); N]; for i in 0..n { diff --git a/src/nearest/rectilinear.rs b/src/nearest/rectilinear.rs index b72fa32..a88c49a 100644 --- a/src/nearest/rectilinear.rs +++ b/src/nearest/rectilinear.rs @@ -158,7 +158,11 @@ impl<'a, T: Float, const N: usize> NearestRectilinear<'a, T, N> { pub fn interp(&self, x: &[&[T]; N], out: &mut [T]) -> Result<(), &'static str> { // Make sure the size of inputs and output match let n = out.len(); - for i in 0..N { if x[i].len() != n { return Err("Dimension mismatch") } } + for i in 0..N { + if x[i].len() != n { + return Err("Dimension mismatch"); + } + } let mut tmp = [T::zero(); N]; for i in 0..n { diff --git a/src/nearest/regular.rs b/src/nearest/regular.rs index 9c79b38..c9e2846 100644 --- a/src/nearest/regular.rs +++ b/src/nearest/regular.rs @@ -205,7 +205,11 @@ impl<'a, T: Float, const N: usize> NearestRegular<'a, T, N> { pub fn interp(&self, x: &[&[T]; N], out: &mut [T]) -> Result<(), &'static str> { // Make sure the size of inputs and output match let n = out.len(); - for i in 0..N { if x[i].len() != n { return Err("Dimension mismatch") } } + for i in 0..N { + if x[i].len() != n { + return Err("Dimension mismatch"); + } + } let mut tmp = [T::zero(); N]; for i in 0..n { From 09d461e12d81008b29c4b0bc09e8a471c20d0716 Mon Sep 17 00:00:00 2001 From: James Logan Date: Tue, 13 Jan 2026 14:32:51 -0500 Subject: [PATCH 06/16] consolidate grid checks --- src/lib.rs | 43 ++++++++++++++++++++++++++++++++++ src/multilinear/rectilinear.rs | 17 +------------- src/multilinear/regular.rs | 15 +----------- src/nearest/rectilinear.rs | 17 +------------- src/nearest/regular.rs | 15 +----------- 5 files changed, 47 insertions(+), 60 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index db7ac64..9805932 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -158,6 +158,49 @@ const MAXDIMS_ERR: &str = "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions."; const MIN_CHUNK_SIZE: usize = 1024; +#[inline(always)] +pub(crate) fn validate_regular_grid( + dims: &[usize; N], + steps: &[T; N], + vals: &[T], +) -> Result<(), &'static str> { + let nvals: usize = dims.iter().product(); + if vals.len() != nvals { + return Err("Dimension mismatch"); + } + let degenerate = dims.iter().any(|&x| x < 2); + if degenerate { + return Err("All grids must have at least two entries"); + } + let steps_are_positive = steps.iter().all(|&x| x > T::zero()); + if !steps_are_positive { + return Err("All grids must be monotonically increasing"); + } + Ok(()) +} + +#[inline(always)] +pub(crate) fn validate_rectilinear_grid( + grids: &[&[T]; N], + vals: &[T], +) -> Result<[usize; N], &'static str> { + let mut dims = [1_usize; N]; + (0..N).for_each(|i| dims[i] = grids[i].len()); + let nvals: usize = dims.iter().product(); + if vals.len() != nvals { + return Err("Dimension mismatch"); + } + let degenerate = dims.iter().any(|&x| x < 2); + if degenerate { + return Err("All grids must have at least 2 entries"); + } + let monotonic_maybe = grids.iter().all(|&g| g[1] > g[0]); + if !monotonic_maybe { + return Err("All grids must be monotonically increasing"); + } + Ok(dims) +} + /// The number of physical cores present on the machine; /// initialized once, then never again, because each call involves some file I/O /// and allocations that can be slower than the function call that they support. diff --git a/src/multilinear/rectilinear.rs b/src/multilinear/rectilinear.rs index cbf4fca..bf38e21 100644 --- a/src/multilinear/rectilinear.rs +++ b/src/multilinear/rectilinear.rs @@ -175,22 +175,7 @@ impl<'a, T: Float, const N: usize> MultilinearRectilinear<'a, T, N> { /// * If any step sizes have zero or negative magnitude pub fn new(grids: &'a [&'a [T]; N], vals: &'a [T]) -> Result { // Check dimensions - let mut dims = [1_usize; N]; - (0..N).for_each(|i| dims[i] = grids[i].len()); - let nvals: usize = dims[..N].iter().product(); - if vals.len() != nvals { - return Err("Dimension mismatch"); - }; - // Check if any grids are degenerate - let degenerate = dims.iter().any(|&x| x < 2); - if degenerate { - return Err("All grids must have at least 2 entries"); - }; - // Check that at least the first two entries in each grid are monotonic - let monotonic_maybe = grids.iter().all(|&g| g[1] > g[0]); - if !monotonic_maybe { - return Err("All grids must be monotonically increasing"); - }; + let dims = crate::validate_rectilinear_grid(grids, vals)?; Ok(Self { grids, dims, vals }) } diff --git a/src/multilinear/regular.rs b/src/multilinear/regular.rs index 66ff4c4..c111330 100644 --- a/src/multilinear/regular.rs +++ b/src/multilinear/regular.rs @@ -238,20 +238,7 @@ impl<'a, T: Float, const N: usize> MultilinearRegular<'a, T, N> { vals: &'a [T], ) -> Result { // Check dimensions - let nvals: usize = dims.iter().product(); - if vals.len() != nvals { - return Err("Dimension mismatch"); - } - // Make sure all dimensions have at least four entries - let degenerate = dims[..N].iter().any(|&x| x < 2); - if degenerate { - return Err("All grids must have at least two entries"); - } - // Check if any dimensions have zero or negative step size - let steps_are_positive = steps.iter().all(|&x| x > T::zero()); - if !steps_are_positive { - return Err("All grids must be monotonically increasing"); - } + crate::validate_regular_grid(&dims, &steps, vals)?; Ok(Self { dims, diff --git a/src/nearest/rectilinear.rs b/src/nearest/rectilinear.rs index a88c49a..1bbd974 100644 --- a/src/nearest/rectilinear.rs +++ b/src/nearest/rectilinear.rs @@ -128,22 +128,7 @@ impl<'a, T: Float, const N: usize> NearestRectilinear<'a, T, N> { "Flattened method defined for 1-6 dimensions." ); } - let mut dims = [1_usize; N]; - (0..N).for_each(|i| dims[i] = grids[i].len()); - let nvals: usize = dims[..N].iter().product(); - if vals.len() != nvals { - return Err("Dimension mismatch"); - }; - // Check if any grids are degenerate - let degenerate = dims.iter().any(|&x| x < 2); - if degenerate { - return Err("All grids must have at least 2 entries"); - }; - // Check that at least the first two entries in each grid are monotonic - let monotonic_maybe = grids.iter().all(|&g| g[1] > g[0]); - if !monotonic_maybe { - return Err("All grids must be monotonically increasing"); - }; + let dims = crate::validate_rectilinear_grid(grids, vals)?; Ok(Self { grids, dims, vals }) } diff --git a/src/nearest/regular.rs b/src/nearest/regular.rs index c9e2846..1130e58 100644 --- a/src/nearest/regular.rs +++ b/src/nearest/regular.rs @@ -172,20 +172,7 @@ impl<'a, T: Float, const N: usize> NearestRegular<'a, T, N> { "Flattened method defined for 1-6 dimensions. For higher dimensions, use recursive method." ); } - let nvals: usize = dims.iter().product(); - if vals.len() != nvals { - return Err("Dimension mismatch"); - } - // Make sure all dimensions have at least four entries - let degenerate = dims[..N].iter().any(|&x| x < 2); - if degenerate { - return Err("All grids must have at least two entries"); - } - // Check if any dimensions have zero or negative step size - let steps_are_positive = steps.iter().all(|&x| x > T::zero()); - if !steps_are_positive { - return Err("All grids must be monotonically increasing"); - } + crate::validate_regular_grid(&dims, &steps, vals)?; Ok(Self { dims, From 85a7a7dee8dda7e454bbab27910a413acb4c6b1c Mon Sep 17 00:00:00 2001 From: James Logan Date: Tue, 13 Jan 2026 14:47:07 -0500 Subject: [PATCH 07/16] factor out dimension selection --- src/lib.rs | 16 ++++++++ src/multilinear/rectilinear.rs | 30 ++++---------- src/multilinear/regular.rs | 75 +++++++--------------------------- src/nearest/rectilinear.rs | 24 ++++------- src/nearest/regular.rs | 59 +++++++------------------- 5 files changed, 62 insertions(+), 142 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9805932..a323adf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -90,6 +90,22 @@ use num_traits::Float; +#[doc(hidden)] +#[macro_export] +macro_rules! dispatch_ndims { + ($ndims:expr, $err:expr, [$($n:literal),+ $(,)?], |$N:ident| $body:expr $(,)?) => {{ + match $ndims { + $( + $n => { + const $N: usize = $n; + $body + } + )+ + _ => Err($err), + } + }}; +} + pub mod multilinear; pub use multilinear::{MultilinearRectilinear, MultilinearRegular}; diff --git a/src/multilinear/rectilinear.rs b/src/multilinear/rectilinear.rs index bf38e21..817798e 100644 --- a/src/multilinear/rectilinear.rs +++ b/src/multilinear/rectilinear.rs @@ -58,27 +58,15 @@ pub fn interpn( if grids.len() != ndims || obs.len() != ndims { return Err("Dimension mismatch"); } - match ndims { - 1 => MultilinearRectilinear::<'_, T, 1>::new(grids.try_into().unwrap(), vals)? - .interp(obs.try_into().unwrap(), out), - 2 => MultilinearRectilinear::<'_, T, 2>::new(grids.try_into().unwrap(), vals)? - .interp(obs.try_into().unwrap(), out), - 3 => MultilinearRectilinear::<'_, T, 3>::new(grids.try_into().unwrap(), vals)? - .interp(obs.try_into().unwrap(), out), - 4 => MultilinearRectilinear::<'_, T, 4>::new(grids.try_into().unwrap(), vals)? - .interp(obs.try_into().unwrap(), out), - 5 => MultilinearRectilinear::<'_, T, 5>::new(grids.try_into().unwrap(), vals)? - .interp(obs.try_into().unwrap(), out), - 6 => MultilinearRectilinear::<'_, T, 6>::new(grids.try_into().unwrap(), vals)? - .interp(obs.try_into().unwrap(), out), - 7 => MultilinearRectilinear::<'_, T, 7>::new(grids.try_into().unwrap(), vals)? - .interp(obs.try_into().unwrap(), out), - 8 => MultilinearRectilinear::<'_, T, 8>::new(grids.try_into().unwrap(), vals)? - .interp(obs.try_into().unwrap(), out), - _ => Err( - "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions.", - ), - }?; + crate::dispatch_ndims!( + ndims, + "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions.", + [1, 2, 3, 4, 5, 6, 7, 8], + |N| { + MultilinearRectilinear::<'_, T, N>::new(grids.try_into().unwrap(), vals)? + .interp(obs.try_into().unwrap(), out) + } + )?; Ok(()) } diff --git a/src/multilinear/regular.rs b/src/multilinear/regular.rs index c111330..8c36efb 100644 --- a/src/multilinear/regular.rs +++ b/src/multilinear/regular.rs @@ -60,67 +60,20 @@ pub fn interpn( return Err("Dimension mismatch"); } - match ndims { - 1 => MultilinearRegular::<'_, T, 1>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - )? - .interp(obs.try_into().unwrap(), out), - 2 => MultilinearRegular::<'_, T, 2>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - )? - .interp(obs.try_into().unwrap(), out), - 3 => MultilinearRegular::<'_, T, 3>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - )? - .interp(obs.try_into().unwrap(), out), - 4 => MultilinearRegular::<'_, T, 4>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - )? - .interp(obs.try_into().unwrap(), out), - 5 => MultilinearRegular::<'_, T, 5>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - )? - .interp(obs.try_into().unwrap(), out), - 6 => MultilinearRegular::<'_, T, 6>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - )? - .interp(obs.try_into().unwrap(), out), - 7 => MultilinearRegular::<'_, T, 7>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - )? - .interp(obs.try_into().unwrap(), out), - 8 => MultilinearRegular::<'_, T, 8>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - )? - .interp(obs.try_into().unwrap(), out), - _ => Err( - "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions.", - ), - }?; + crate::dispatch_ndims!( + ndims, + "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions.", + [1, 2, 3, 4, 5, 6, 7, 8], + |N| { + MultilinearRegular::<'_, T, N>::new( + dims.try_into().unwrap(), + starts.try_into().unwrap(), + steps.try_into().unwrap(), + vals, + )? + .interp(obs.try_into().unwrap(), out) + } + )?; Ok(()) } diff --git a/src/nearest/rectilinear.rs b/src/nearest/rectilinear.rs index 1bbd974..469c279 100644 --- a/src/nearest/rectilinear.rs +++ b/src/nearest/rectilinear.rs @@ -45,21 +45,15 @@ pub fn interpn( if grids.len() != ndims || obs.len() != ndims { return Err("Dimension mismatch"); } - match ndims { - 1 => NearestRectilinear::<'_, T, 1>::new(grids.try_into().unwrap(), vals)? - .interp(obs.try_into().unwrap(), out), - 2 => NearestRectilinear::<'_, T, 2>::new(grids.try_into().unwrap(), vals)? - .interp(obs.try_into().unwrap(), out), - 3 => NearestRectilinear::<'_, T, 3>::new(grids.try_into().unwrap(), vals)? - .interp(obs.try_into().unwrap(), out), - 4 => NearestRectilinear::<'_, T, 4>::new(grids.try_into().unwrap(), vals)? - .interp(obs.try_into().unwrap(), out), - 5 => NearestRectilinear::<'_, T, 5>::new(grids.try_into().unwrap(), vals)? - .interp(obs.try_into().unwrap(), out), - 6 => NearestRectilinear::<'_, T, 6>::new(grids.try_into().unwrap(), vals)? - .interp(obs.try_into().unwrap(), out), - _ => Err("Dimension exceeds maximum (6)."), - }?; + crate::dispatch_ndims!( + ndims, + "Dimension exceeds maximum (6).", + [1, 2, 3, 4, 5, 6], + |N| { + NearestRectilinear::<'_, T, N>::new(grids.try_into().unwrap(), vals)? + .interp(obs.try_into().unwrap(), out) + } + )?; Ok(()) } diff --git a/src/nearest/regular.rs b/src/nearest/regular.rs index 1130e58..aa3e7b8 100644 --- a/src/nearest/regular.rs +++ b/src/nearest/regular.rs @@ -50,51 +50,20 @@ pub fn interpn( return Err("Dimension mismatch"); } - match ndims { - 1 => NearestRegular::<'_, T, 1>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - )? - .interp(obs.try_into().unwrap(), out), - 2 => NearestRegular::<'_, T, 2>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - )? - .interp(obs.try_into().unwrap(), out), - 3 => NearestRegular::<'_, T, 3>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - )? - .interp(obs.try_into().unwrap(), out), - 4 => NearestRegular::<'_, T, 4>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - )? - .interp(obs.try_into().unwrap(), out), - 5 => NearestRegular::<'_, T, 5>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - )? - .interp(obs.try_into().unwrap(), out), - 6 => NearestRegular::<'_, T, 6>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - )? - .interp(obs.try_into().unwrap(), out), - _ => Err("Dimension exceeds maximum (6)."), - }?; + crate::dispatch_ndims!( + ndims, + "Dimension exceeds maximum (6).", + [1, 2, 3, 4, 5, 6], + |N| { + NearestRegular::<'_, T, N>::new( + dims.try_into().unwrap(), + starts.try_into().unwrap(), + steps.try_into().unwrap(), + vals, + )? + .interp(obs.try_into().unwrap(), out) + } + )?; Ok(()) } From 2bab9345a306ccc55689e868b6a5493a38243030 Mon Sep 17 00:00:00 2001 From: James Logan Date: Tue, 13 Jan 2026 17:30:23 -0500 Subject: [PATCH 08/16] extend Nearest interpn methods to 8 dims by default --- src/nearest/rectilinear.rs | 14 +++++++------- src/nearest/regular.rs | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/nearest/rectilinear.rs b/src/nearest/rectilinear.rs index 469c279..8c0e19f 100644 --- a/src/nearest/rectilinear.rs +++ b/src/nearest/rectilinear.rs @@ -27,7 +27,7 @@ use crate::index_arr_fixed_dims; use num_traits::Float; -/// Evaluate multicubic interpolation on a regular grid in up to 6 dimensions. +/// Evaluate nearest-neighbor interpolation on a rectilinear grid in up to 8 dimensions. /// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). /// /// This is a convenience function; best performance will be achieved by using the exact right @@ -47,8 +47,8 @@ pub fn interpn( } crate::dispatch_ndims!( ndims, - "Dimension exceeds maximum (6).", - [1, 2, 3, 4, 5, 6], + "Dimension exceeds maximum (8).", + [1, 2, 3, 4, 5, 6, 7, 8], |N| { NearestRectilinear::<'_, T, N>::new(grids.try_into().unwrap(), vals)? .interp(obs.try_into().unwrap(), out) @@ -118,8 +118,8 @@ impl<'a, T: Float, const N: usize> NearestRectilinear<'a, T, N> { // Check dimensions const { assert!( - N > 0 && N < 7, - "Flattened method defined for 1-6 dimensions." + N > 0 && N < 9, + "Flattened method defined for 1-8 dimensions." ); } let dims = crate::validate_rectilinear_grid(grids, vals)?; @@ -284,10 +284,10 @@ mod test { /// Each test evaluates at 3^ndims locations, largely extrapolated in corner regions, so it /// rapidly becomes prohibitively slow after about ndims=9. #[test] - fn test_interp_extrap_1d_to_6d() { + fn test_interp_extrap_1d_to_8d() { let mut rng = rng_fixed_seed(); - for ndims in 1..=6 { + for ndims in 1..=8 { println!("Testing in {ndims} dims"); // Interp grid let dims: Vec = vec![2; ndims]; diff --git a/src/nearest/regular.rs b/src/nearest/regular.rs index aa3e7b8..fc595ff 100644 --- a/src/nearest/regular.rs +++ b/src/nearest/regular.rs @@ -29,7 +29,7 @@ use crate::index_arr_fixed_dims; use num_traits::{Float, NumCast}; -/// Evaluate nearest-neighbor interpolation on a regular grid in up to 6 dimensions. +/// Evaluate nearest-neighbor interpolation on a regular grid in up to 8 dimensions. /// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). /// /// This is a convenience function; best performance will be achieved by using the exact right @@ -52,8 +52,8 @@ pub fn interpn( crate::dispatch_ndims!( ndims, - "Dimension exceeds maximum (6).", - [1, 2, 3, 4, 5, 6], + "Dimension exceeds maximum (8).", + [1, 2, 3, 4, 5, 6, 7, 8], |N| { NearestRegular::<'_, T, N>::new( dims.try_into().unwrap(), @@ -137,8 +137,8 @@ impl<'a, T: Float, const N: usize> NearestRegular<'a, T, N> { // Check dimensions const { assert!( - N > 0 && N < 7, - "Flattened method defined for 1-6 dimensions. For higher dimensions, use recursive method." + N > 0 && N < 9, + "Flattened method defined for 1-8 dimensions. For higher dimensions, use recursive method." ); } crate::validate_regular_grid(&dims, &steps, vals)?; @@ -288,8 +288,8 @@ mod test { /// Each test evaluates at 3^N locations, largely extrapolated in corner regions, so it /// rapidly becomes prohibitively slow after about N=9. #[test] - fn test_interp_extrap_1d_to_6d() { - for n in 1..=6 { + fn test_interp_extrap_1d_to_8d() { + for n in 1..=8 { println!("Testing in {n} dims"); // Interp grid let dims: Vec = vec![2; n]; From 8019b0ecf275b0634039bd2735aa063d9348ecd5 Mon Sep 17 00:00:00 2001 From: James Logan Date: Thu, 15 Jan 2026 19:58:42 -0500 Subject: [PATCH 09/16] update plots for latest perf with pgo --- docs/3d_throughput_vs_nobs_cubic.html | 2 +- docs/3d_throughput_vs_nobs_cubic.svg | 2 +- docs/3d_throughput_vs_nobs_linear.html | 2 +- docs/3d_throughput_vs_nobs_linear.svg | 2 +- docs/3d_throughput_vs_nobs_prealloc_cubic.html | 2 +- docs/3d_throughput_vs_nobs_prealloc_cubic.svg | 2 +- docs/3d_throughput_vs_nobs_prealloc_linear.html | 2 +- docs/3d_throughput_vs_nobs_prealloc_linear.svg | 2 +- docs/4d_throughput_vs_nobs_cubic.html | 2 +- docs/4d_throughput_vs_nobs_cubic.svg | 2 +- docs/4d_throughput_vs_nobs_linear.html | 2 +- docs/4d_throughput_vs_nobs_linear.svg | 2 +- docs/4d_throughput_vs_nobs_prealloc_cubic.html | 2 +- docs/4d_throughput_vs_nobs_prealloc_cubic.svg | 2 +- docs/4d_throughput_vs_nobs_prealloc_linear.html | 2 +- docs/4d_throughput_vs_nobs_prealloc_linear.svg | 2 +- docs/speedup_vs_dims_1000_obs_cubic.html | 2 +- docs/speedup_vs_dims_1000_obs_cubic.svg | 2 +- docs/speedup_vs_dims_1000_obs_linear.html | 2 +- docs/speedup_vs_dims_1000_obs_linear.svg | 2 +- docs/speedup_vs_dims_1_obs_cubic.html | 2 +- docs/speedup_vs_dims_1_obs_cubic.svg | 2 +- docs/speedup_vs_dims_1_obs_linear.html | 2 +- docs/speedup_vs_dims_1_obs_linear.svg | 2 +- docs/speedup_vs_threads_10000000_obs.html | 2 +- docs/throughput_vs_dims_1000_obs.html | 2 +- docs/throughput_vs_dims_1000_obs.svg | 2 +- docs/throughput_vs_dims_1_obs.html | 2 +- docs/throughput_vs_dims_1_obs.svg | 2 +- 29 files changed, 29 insertions(+), 29 deletions(-) diff --git a/docs/3d_throughput_vs_nobs_cubic.html b/docs/3d_throughput_vs_nobs_cubic.html index 067584d..3a637d5 100644 --- a/docs/3d_throughput_vs_nobs_cubic.html +++ b/docs/3d_throughput_vs_nobs_cubic.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/3d_throughput_vs_nobs_cubic.svg b/docs/3d_throughput_vs_nobs_cubic.svg index 1719dea..6ba6f82 100644 --- a/docs/3d_throughput_vs_nobs_cubic.svg +++ b/docs/3d_throughput_vs_nobs_cubic.svg @@ -1 +1 @@ -12510251002510002510k2468ScipyInterpNInterpolation on 20x20x20 GridWithout Preallocated Output, CubicNumber of Observation PointsNormalized Throughput \ No newline at end of file +12510251002510002510k24681012ScipyInterpNInterpolation on 20x20x20 GridWithout Preallocated Output, CubicNumber of Observation PointsNormalized Throughput \ No newline at end of file diff --git a/docs/3d_throughput_vs_nobs_linear.html b/docs/3d_throughput_vs_nobs_linear.html index 87f0447..aeba478 100644 --- a/docs/3d_throughput_vs_nobs_linear.html +++ b/docs/3d_throughput_vs_nobs_linear.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/3d_throughput_vs_nobs_linear.svg b/docs/3d_throughput_vs_nobs_linear.svg index 5c5a34b..8d0e56f 100644 --- a/docs/3d_throughput_vs_nobs_linear.svg +++ b/docs/3d_throughput_vs_nobs_linear.svg @@ -1 +1 @@ -12510251002510002510k0510152025ScipyInterpNInterpolation on 20x20x20 GridWithout Preallocated Output, LinearNumber of Observation PointsNormalized Throughput \ No newline at end of file +12510251002510002510k010203040ScipyInterpNInterpolation on 20x20x20 GridWithout Preallocated Output, LinearNumber of Observation PointsNormalized Throughput \ No newline at end of file diff --git a/docs/3d_throughput_vs_nobs_prealloc_cubic.html b/docs/3d_throughput_vs_nobs_prealloc_cubic.html index 6b690c9..a16bdd8 100644 --- a/docs/3d_throughput_vs_nobs_prealloc_cubic.html +++ b/docs/3d_throughput_vs_nobs_prealloc_cubic.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/3d_throughput_vs_nobs_prealloc_cubic.svg b/docs/3d_throughput_vs_nobs_prealloc_cubic.svg index 55a0c6c..46af271 100644 --- a/docs/3d_throughput_vs_nobs_prealloc_cubic.svg +++ b/docs/3d_throughput_vs_nobs_prealloc_cubic.svg @@ -1 +1 @@ -12510251002510002510k51015ScipyInterpNInterpolation on 20x20x20 GridWith Preallocated Output, CubicNumber of Observation PointsNormalized Throughput \ No newline at end of file +12510251002510002510k0510152025ScipyInterpNInterpolation on 20x20x20 GridWith Preallocated Output, CubicNumber of Observation PointsNormalized Throughput \ No newline at end of file diff --git a/docs/3d_throughput_vs_nobs_prealloc_linear.html b/docs/3d_throughput_vs_nobs_prealloc_linear.html index 70b0af3..e7f58dd 100644 --- a/docs/3d_throughput_vs_nobs_prealloc_linear.html +++ b/docs/3d_throughput_vs_nobs_prealloc_linear.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/3d_throughput_vs_nobs_prealloc_linear.svg b/docs/3d_throughput_vs_nobs_prealloc_linear.svg index 4a3b2d7..edd786d 100644 --- a/docs/3d_throughput_vs_nobs_prealloc_linear.svg +++ b/docs/3d_throughput_vs_nobs_prealloc_linear.svg @@ -1 +1 @@ -12510251002510002510k0102030405060ScipyInterpNInterpolation on 20x20x20 GridWith Preallocated Output, LinearNumber of Observation PointsNormalized Throughput \ No newline at end of file +12510251002510002510k020406080ScipyInterpNInterpolation on 20x20x20 GridWith Preallocated Output, LinearNumber of Observation PointsNormalized Throughput \ No newline at end of file diff --git a/docs/4d_throughput_vs_nobs_cubic.html b/docs/4d_throughput_vs_nobs_cubic.html index 2951473..2bd0a36 100644 --- a/docs/4d_throughput_vs_nobs_cubic.html +++ b/docs/4d_throughput_vs_nobs_cubic.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/4d_throughput_vs_nobs_cubic.svg b/docs/4d_throughput_vs_nobs_cubic.svg index 4583390..ffd1df5 100644 --- a/docs/4d_throughput_vs_nobs_cubic.svg +++ b/docs/4d_throughput_vs_nobs_cubic.svg @@ -1 +1 @@ -12510251002510002510k2468ScipyInterpNInterpolation on 20x...x20 4D GridWithout Preallocated Output, CubicNumber of Observation PointsNormalized Throughput \ No newline at end of file +12510251002510002510k24681012ScipyInterpNInterpolation on 20x...x20 4D GridWithout Preallocated Output, CubicNumber of Observation PointsNormalized Throughput \ No newline at end of file diff --git a/docs/4d_throughput_vs_nobs_linear.html b/docs/4d_throughput_vs_nobs_linear.html index 225eaa6..d38ed06 100644 --- a/docs/4d_throughput_vs_nobs_linear.html +++ b/docs/4d_throughput_vs_nobs_linear.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/4d_throughput_vs_nobs_linear.svg b/docs/4d_throughput_vs_nobs_linear.svg index 9d695fd..a7e93d1 100644 --- a/docs/4d_throughput_vs_nobs_linear.svg +++ b/docs/4d_throughput_vs_nobs_linear.svg @@ -1 +1 @@ -12510251002510002510k010203040ScipyInterpNInterpolation on 20x...x20 4D GridWithout Preallocated Output, LinearNumber of Observation PointsNormalized Throughput \ No newline at end of file +12510251002510002510k0102030405060ScipyInterpNInterpolation on 20x...x20 4D GridWithout Preallocated Output, LinearNumber of Observation PointsNormalized Throughput \ No newline at end of file diff --git a/docs/4d_throughput_vs_nobs_prealloc_cubic.html b/docs/4d_throughput_vs_nobs_prealloc_cubic.html index 1dc7176..1a5c536 100644 --- a/docs/4d_throughput_vs_nobs_prealloc_cubic.html +++ b/docs/4d_throughput_vs_nobs_prealloc_cubic.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/4d_throughput_vs_nobs_prealloc_cubic.svg b/docs/4d_throughput_vs_nobs_prealloc_cubic.svg index c5e7976..e0b0773 100644 --- a/docs/4d_throughput_vs_nobs_prealloc_cubic.svg +++ b/docs/4d_throughput_vs_nobs_prealloc_cubic.svg @@ -1 +1 @@ -12510251002510002510k510ScipyInterpNInterpolation on 20x...x20 4D GridWith Preallocated Output, CubicNumber of Observation PointsNormalized Throughput \ No newline at end of file +12510251002510002510k05101520ScipyInterpNInterpolation on 20x...x20 4D GridWith Preallocated Output, CubicNumber of Observation PointsNormalized Throughput \ No newline at end of file diff --git a/docs/4d_throughput_vs_nobs_prealloc_linear.html b/docs/4d_throughput_vs_nobs_prealloc_linear.html index b2fe08d..8652604 100644 --- a/docs/4d_throughput_vs_nobs_prealloc_linear.html +++ b/docs/4d_throughput_vs_nobs_prealloc_linear.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/4d_throughput_vs_nobs_prealloc_linear.svg b/docs/4d_throughput_vs_nobs_prealloc_linear.svg index a6c1e72..3e671c8 100644 --- a/docs/4d_throughput_vs_nobs_prealloc_linear.svg +++ b/docs/4d_throughput_vs_nobs_prealloc_linear.svg @@ -1 +1 @@ -12510251002510002510k020406080ScipyInterpNInterpolation on 20x...x20 4D GridWith Preallocated Output, LinearNumber of Observation PointsNormalized Throughput \ No newline at end of file +12510251002510002510k020406080100120ScipyInterpNInterpolation on 20x...x20 4D GridWith Preallocated Output, LinearNumber of Observation PointsNormalized Throughput \ No newline at end of file diff --git a/docs/speedup_vs_dims_1000_obs_cubic.html b/docs/speedup_vs_dims_1000_obs_cubic.html index 1f7e9f4..0254994 100644 --- a/docs/speedup_vs_dims_1000_obs_cubic.html +++ b/docs/speedup_vs_dims_1000_obs_cubic.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/speedup_vs_dims_1000_obs_cubic.svg b/docs/speedup_vs_dims_1000_obs_cubic.svg index d58085d..6d4a7f3 100644 --- a/docs/speedup_vs_dims_1000_obs_cubic.svg +++ b/docs/speedup_vs_dims_1000_obs_cubic.svg @@ -1 +1 @@ -123456024681012InterpN Speedup vs. ScipyCubic, 1000 Observation PointsNumber of DimensionsSpeedup vs. Scipy \ No newline at end of file +12345601234567InterpN Speedup vs. ScipyCubic, 1000 Observation PointsNumber of DimensionsSpeedup vs. Scipy \ No newline at end of file diff --git a/docs/speedup_vs_dims_1000_obs_linear.html b/docs/speedup_vs_dims_1000_obs_linear.html index 48ade75..4ef968d 100644 --- a/docs/speedup_vs_dims_1000_obs_linear.html +++ b/docs/speedup_vs_dims_1000_obs_linear.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/speedup_vs_dims_1000_obs_linear.svg b/docs/speedup_vs_dims_1000_obs_linear.svg index 652c19f..0b6b74d 100644 --- a/docs/speedup_vs_dims_1000_obs_linear.svg +++ b/docs/speedup_vs_dims_1000_obs_linear.svg @@ -1 +1 @@ -1234562468101214InterpN Speedup vs. ScipyLinear, 1000 Observation PointsNumber of DimensionsSpeedup vs. Scipy \ No newline at end of file +1234562468101214InterpN Speedup vs. ScipyLinear, 1000 Observation PointsNumber of DimensionsSpeedup vs. Scipy \ No newline at end of file diff --git a/docs/speedup_vs_dims_1_obs_cubic.html b/docs/speedup_vs_dims_1_obs_cubic.html index 86019ad..998f729 100644 --- a/docs/speedup_vs_dims_1_obs_cubic.html +++ b/docs/speedup_vs_dims_1_obs_cubic.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/speedup_vs_dims_1_obs_cubic.svg b/docs/speedup_vs_dims_1_obs_cubic.svg index 26369f7..d45b472 100644 --- a/docs/speedup_vs_dims_1_obs_cubic.svg +++ b/docs/speedup_vs_dims_1_obs_cubic.svg @@ -1 +1 @@ -12345605101520InterpN Speedup vs. ScipyCubic, 1 Observation PointNumber of DimensionsSpeedup vs. Scipy \ No newline at end of file +1234560510152025InterpN Speedup vs. ScipyCubic, 1 Observation PointNumber of DimensionsSpeedup vs. Scipy \ No newline at end of file diff --git a/docs/speedup_vs_dims_1_obs_linear.html b/docs/speedup_vs_dims_1_obs_linear.html index 50a5f3c..b044b4e 100644 --- a/docs/speedup_vs_dims_1_obs_linear.html +++ b/docs/speedup_vs_dims_1_obs_linear.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/speedup_vs_dims_1_obs_linear.svg b/docs/speedup_vs_dims_1_obs_linear.svg index 27d705d..05eb30b 100644 --- a/docs/speedup_vs_dims_1_obs_linear.svg +++ b/docs/speedup_vs_dims_1_obs_linear.svg @@ -1 +1 @@ -123456050100150200250InterpN Speedup vs. ScipyLinear, 1 Observation PointNumber of DimensionsSpeedup vs. Scipy \ No newline at end of file +123456050100150200250300InterpN Speedup vs. ScipyLinear, 1 Observation PointNumber of DimensionsSpeedup vs. Scipy \ No newline at end of file diff --git a/docs/speedup_vs_threads_10000000_obs.html b/docs/speedup_vs_threads_10000000_obs.html index d7ba2bb..91340b0 100644 --- a/docs/speedup_vs_threads_10000000_obs.html +++ b/docs/speedup_vs_threads_10000000_obs.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/throughput_vs_dims_1000_obs.html b/docs/throughput_vs_dims_1000_obs.html index 165877c..41d773e 100644 --- a/docs/throughput_vs_dims_1000_obs.html +++ b/docs/throughput_vs_dims_1000_obs.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/throughput_vs_dims_1000_obs.svg b/docs/throughput_vs_dims_1000_obs.svg index e16fd26..f6c5a1e 100644 --- a/docs/throughput_vs_dims_1000_obs.svg +++ b/docs/throughput_vs_dims_1000_obs.svg @@ -1 +1 @@ -246250.01250.1251246100μ0.0010.010.11LinearScipy RegularGridInterpolator LinearInterpN MultilinearRegularInterpN MultilinearRectilinearInterpN NearestRegularInterpN NearestRectilinearNumpy InterpCubicScipy RegularGridInterpolator CubicInterpN MulticubicRegularInterpN MulticubicRectilinearScipy RectBivariateSpline CubicInterpolation on 4x...x4 N-Dimensional Grid1000 Observation PointsNumber of DimensionsNormalized ThroughputLinearCubic \ No newline at end of file +246250.01250.1251246100μ0.0010.010.11LinearScipy RegularGridInterpolator LinearInterpN MultilinearRegularInterpN MultilinearRectilinearInterpN NearestRegularInterpN NearestRectilinearNumpy InterpCubicScipy RegularGridInterpolator CubicInterpN MulticubicRegularInterpN MulticubicRectilinearScipy RectBivariateSpline CubicInterpolation on 4x...x4 N-Dimensional Grid1000 Observation PointsNumber of DimensionsNormalized ThroughputLinearCubic \ No newline at end of file diff --git a/docs/throughput_vs_dims_1_obs.html b/docs/throughput_vs_dims_1_obs.html index 4d286a9..8167e62 100644 --- a/docs/throughput_vs_dims_1_obs.html +++ b/docs/throughput_vs_dims_1_obs.html @@ -1,2 +1,2 @@
-
\ No newline at end of file +
\ No newline at end of file diff --git a/docs/throughput_vs_dims_1_obs.svg b/docs/throughput_vs_dims_1_obs.svg index 2a2c8f6..a2ad561 100644 --- a/docs/throughput_vs_dims_1_obs.svg +++ b/docs/throughput_vs_dims_1_obs.svg @@ -1 +1 @@ -2460.001250.01250.125122460.01250.1251LinearScipy RegularGridInterpolator LinearInterpN MultilinearRegularInterpN MultilinearRectilinearInterpN NearestRegularInterpN NearestRectilinearNumpy InterpCubicScipy RegularGridInterpolator CubicInterpN MulticubicRegularInterpN MulticubicRectilinearScipy RectBivariateSpline CubicInterpolation on 4x...x4 N-Dimensional Grid1 Observation PointNumber of DimensionsNormalized ThroughputLinearCubic \ No newline at end of file +2460.001250.01250.12512460.01250.1251LinearScipy RegularGridInterpolator LinearInterpN MultilinearRegularInterpN MultilinearRectilinearInterpN NearestRegularInterpN NearestRectilinearNumpy InterpCubicScipy RegularGridInterpolator CubicInterpN MulticubicRegularInterpN MulticubicRectilinearScipy RectBivariateSpline CubicInterpolation on 4x...x4 N-Dimensional Grid1 Observation PointNumber of DimensionsNormalized ThroughputLinearCubic \ No newline at end of file From ee3a8ae9e9deaa8b7f23052e2412bae41124dc15 Mon Sep 17 00:00:00 2001 From: James Logan Date: Thu, 15 Jan 2026 21:19:37 -0500 Subject: [PATCH 10/16] update version and changelog --- CHANGELOG.md | 16 ++++++++++++++++ Cargo.lock | 6 +++--- Cargo.toml | 4 ++-- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1be8242..ef3fc16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,21 @@ # Changelog +## 0.11.0 2026-01-15 + +### Changed + +* !Remove recursive implementations + * All dimensionalities now use const-generic bounded implementations + * For dimensions higher than const-unrolling limit, a run-time loop of the same form is used + * No notable change in performance for high dimensions + * No change to API for `::interpn` convenience functions + * All methods are now compatible with static analysis of memory usage via cargo-call-stack +* Refactor internals to reduce duplication +* Improve form of early bounds checks to be interpreted correctly by the compiler + * 30% speedup in some cases, no effect on most + * Bounds checks done via iterator do not always result in optimizing out panic branches +* Test cubic methods in higher dimensions + ## 0.10.0 2026-01-10 ### Added diff --git a/Cargo.lock b/Cargo.lock index c1d4854..d724307 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -238,7 +238,7 @@ checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" [[package]] name = "interpn" -version = "0.10.0" +version = "0.11.0" dependencies = [ "criterion", "crunchy", @@ -331,9 +331,9 @@ dependencies = [ [[package]] name = "ndarray" -version = "0.17.1" +version = "0.17.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c7c9125e8f6f10c9da3aad044cc918cf8784fa34de857b1aa68038eb05a50a9" +checksum = "520080814a7a6b4a6e9070823bb24b4531daac8c4627e08ba5de8c5ef2f2752d" dependencies = [ "matrixmultiply", "num-complex", diff --git a/Cargo.toml b/Cargo.toml index 847926c..18d690e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "interpn" -version = "0.10.0" +version = "0.11.0" edition = "2024" rust-version = "1.87" # 2025-05-15 authors = ["James Logan "] @@ -35,7 +35,7 @@ itertools = { version = "0.14.0", optional = true } [dev-dependencies] rand = "0.9.2" criterion = "0.8.1" -ndarray = "0.17.1" +ndarray = "0.17.2" [features] default = ["std", "crunchy/limit_64", "par"] From 3f20d9590ca1294f15844dda73108ea75b52a8df Mon Sep 17 00:00:00 2001 From: James Logan Date: Thu, 15 Jan 2026 21:21:46 -0500 Subject: [PATCH 11/16] update front page plot --- docs/speedup_vs_dims_1_obs_linear_inverted.svg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/speedup_vs_dims_1_obs_linear_inverted.svg b/docs/speedup_vs_dims_1_obs_linear_inverted.svg index 274262c..e210133 100644 --- a/docs/speedup_vs_dims_1_obs_linear_inverted.svg +++ b/docs/speedup_vs_dims_1_obs_linear_inverted.svg @@ -1,6 +1,6 @@ - + -123456050100150200InterpN Speedup vs. ScipyLinear, 1 Observation PointNumber of DimensionsSpeedup vs. Scipy \ No newline at end of file +123456050100150200250300InterpN Speedup vs. ScipyLinear, 1 Observation PointNumber of DimensionsSpeedup vs. Scipy \ No newline at end of file From e8a09a0ae13ec3dbe58dad95cb89b34e896661d8 Mon Sep 17 00:00:00 2001 From: James Logan Date: Thu, 15 Jan 2026 21:24:45 -0500 Subject: [PATCH 12/16] add thread speedup plot --- docs/speedup_vs_threads_10000000_obs.svg | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/speedup_vs_threads_10000000_obs.svg diff --git a/docs/speedup_vs_threads_10000000_obs.svg b/docs/speedup_vs_threads_10000000_obs.svg new file mode 100644 index 0000000..390387e --- /dev/null +++ b/docs/speedup_vs_threads_10000000_obs.svg @@ -0,0 +1 @@ +123456123456InterpN Thread Speedup (10000000 Observation Points)ThreadsSpeedup vs. 1 Thread \ No newline at end of file From e86845d728de52c6279261881776d5b71da44671 Mon Sep 17 00:00:00 2001 From: James Logan Date: Thu, 15 Jan 2026 21:33:34 -0500 Subject: [PATCH 13/16] update readme and docs --- README.md | 22 ++++++---- docs/index.md | 41 +++++++++++-------- ...eedup_vs_threads_10000000_obs_inverted.svg | 6 +++ 3 files changed, 44 insertions(+), 25 deletions(-) create mode 100644 docs/speedup_vs_threads_10000000_obs_inverted.svg diff --git a/README.md b/README.md index cc74655..8f62d46 100644 --- a/README.md +++ b/README.md @@ -6,17 +6,17 @@ [Rust Docs](https://docs.rs/interpn/latest/interpn/) N-dimensional interpolation/extrapolation methods, no-std and no-alloc compatible, -prioritizing correctness, performance, and compatiblity with memory-constrained environments. +prioritizing correctness, performance, and compatibility with memory-constrained environments. Available as a rust crate and python library. ## Features -| Feature →
↓ Interpolant Method | Regular
Grid | Rectilinear
Grid | Json
Serialization | -|-----------------------------------|-----------------|---------------------|-----------------------| -| Nearest-Neighbor | ✅ | ✅ | ✅ | -| Linear | ✅ | ✅ | ✅ | -| Cubic | ✅ | ✅ | ✅ | +| Feature →
↓ Interpolant Method | Regular
Grid | Rectilinear
Grid | Json
Serialization | Thread Parallelism | +|-----------------------------------|-----------------|---------------------|-----------------------| ------------------ | +| Nearest-Neighbor | ✅ | ✅ | ✅ | ✅ | +| Linear | ✅ | ✅ | ✅ | ✅ | +| Cubic | ✅ | ✅ | ✅ | ✅ | The methods provided here, while more limited in scope than scipy's, @@ -24,13 +24,18 @@ The methods provided here, while more limited in scope than scipy's, * use almost no RAM (and perform no heap allocations at all) * produce significantly improved floating-point error (by several orders of magnitude) * are json-serializable using Pydantic -* can also be used easily in web, embedded and gpu applications via the Rust library +* can also be used easily in web and embedded applications via the Rust library * are permissively licensed
+For larger jobs such as image processing, the non-allocating evaluation pattern results +in a nearly-linear thread speedup, limited only by thread spawning overhead and memory bandwidth. + + + See [here](https://interpnpy.readthedocs.io/en/latest/perf/) for more info about quality-of-fit, throughput, and memory usage. ## Installation @@ -48,6 +53,9 @@ after installing this extra compiler dependency: rustup component add llvm-tools-preview ``` +An LLVM install matching the version used by rustc is also required for doing PGO; +see the `./scripts/distr_pgo.sh` or CI workflows for the exact version. + ## Rust Examples ### Regular Grid diff --git a/docs/index.md b/docs/index.md index 3c4ef88..d3e1429 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,21 +1,22 @@ # InterpN +[Writeup](https://jlogan.dev/blog/#2025-11-10-interpn-fast-interpolation) | [Repo](https://github.com/jlogan03/interpn) | [Python Docs](https://interpnpy.readthedocs.io/en/latest/) | [Rust Docs](https://docs.rs/interpn/latest/interpn/) -This library provides serializable N-dimensional interpolators -backed by compute-heavy code written in Rust. +N-dimensional interpolation/extrapolation methods, no-std and no-alloc compatible, +prioritizing correctness, performance, and compatibility with memory-constrained environments. -These methods perform zero allocation when evaluated (except, optionally, for the output). -Because of this, they have minimal per-call overhead, and are particularly -effective when examining small numbers of observation points. See the [performance](/perf) page for detailed benchmarks. +Available as a rust crate and python library. ## Features -| Feature →
↓ Interpolant Method | Regular
Grid | Rectilinear
Grid | Json
Serialization | -|-----------------------------------|-----------------|---------------------|-----------------------| -| Linear | ✅ | ✅ | ✅ | -| Cubic | ✅ | ✅ | ✅ | + +| Feature →
↓ Interpolant Method | Regular
Grid | Rectilinear
Grid | Json
Serialization | Thread Parallelism | +|-----------------------------------|-----------------|---------------------|-----------------------| ------------------ | +| Nearest-Neighbor | ✅ | ✅ | ✅ | ✅ | +| Linear | ✅ | ✅ | ✅ | ✅ | +| Cubic | ✅ | ✅ | ✅ | ✅ | The methods provided here, while more limited in scope than scipy's, @@ -26,13 +27,14 @@ The methods provided here, while more limited in scope than scipy's, * can also be used easily in web and embedded applications via the Rust library * are permissively licensed ---8<-- -docs/speedup_vs_dims_1_obs_linear.html ---8<-- +
+ + + +For larger jobs such as image processing, the non-allocating evaluation pattern results +in a nearly-linear thread speedup, limited only by thread spawning overhead and memory bandwidth. ---8<-- -docs/speedup_vs_dims_1_obs_cubic.html ---8<-- + See [here](https://interpnpy.readthedocs.io/en/latest/perf/) for more info about quality-of-fit, throughput, and memory usage. @@ -44,14 +46,16 @@ pip install interpn ## Profile-Guided Optimization -To build the extension with profile-guided optimization, do `uv run python ./scripts/run_pgo.py` -after installing these extra compiler dependencies: +To build the extension with profile-guided optimization, do `sh ./scripts/distr_pgo.sh` +after installing this extra compiler dependency: ```bash -cargo install cargo-pgo rustup component add llvm-tools-preview ``` +An LLVM install matching the version used by rustc is also required for doing PGO; +see the `./scripts/distr_pgo.sh` or CI workflows for the exact version. + ## Rust Examples ### Regular Grid @@ -182,6 +186,7 @@ out2 = roundtrip_interpolator.eval(obs) assert np.all(out == out2) ``` + # License Licensed under either of diff --git a/docs/speedup_vs_threads_10000000_obs_inverted.svg b/docs/speedup_vs_threads_10000000_obs_inverted.svg new file mode 100644 index 0000000..c4dbc8f --- /dev/null +++ b/docs/speedup_vs_threads_10000000_obs_inverted.svg @@ -0,0 +1,6 @@ + + +123456123456InterpN Thread Speedup (10000000 Observation Points)ThreadsSpeedup vs. 1 Thread \ No newline at end of file From 8dbb1d0faca7cbff9f5eab7562cec039246dc13e Mon Sep 17 00:00:00 2001 From: James Logan Date: Thu, 15 Jan 2026 21:46:18 -0500 Subject: [PATCH 14/16] remove references to recursion from the docs --- src/lib.rs | 4 ++-- src/multicubic/rectilinear.rs | 7 ++----- src/multicubic/regular.rs | 5 +---- src/multilinear/rectilinear.rs | 9 +++------ src/multilinear/regular.rs | 9 +++------ src/nearest/rectilinear.rs | 5 +---- src/nearest/regular.rs | 5 +---- 7 files changed, 13 insertions(+), 31 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index a323adf..63fc7bd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -234,8 +234,8 @@ static PHYSICAL_CORES: LazyLock = LazyLock::new(num_cpus::get_physical); /// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). /// /// For lower dimensions, a fast flattened method is used. For higher dimensions, where that flattening -/// becomes impractical due to compile times and instruction size, evaluation defers to a bounded -/// recursion. +/// becomes impractical due to compile times and instruction size, evaluation defers to a run-time +/// loop. /// The linear method uses the flattening for 1-6 dimensions, while /// flattened cubic methods are available up to 3 dimensions by default and up to 4 dimensions /// with the `deep_unroll` feature enabled. diff --git a/src/multicubic/rectilinear.rs b/src/multicubic/rectilinear.rs index 6a69eb9..d96b90b 100644 --- a/src/multicubic/rectilinear.rs +++ b/src/multicubic/rectilinear.rs @@ -39,7 +39,7 @@ use num_traits::Float; /// /// For 1-4 dimensions with `deep-unroll` enabled (1-3 by default), a fast flattened method is used. /// For higher dimensions, where that flattening becomes impractical due to compile times and -/// instruction size, evaluation defers to a bounded recursion. +/// instruction size, evaluation defers to a run-time loop. /// /// This is a convenience function; best performance will be achieved by using the exact right /// number for the N parameter, as this will slightly reduce compute and storage overhead, @@ -164,10 +164,7 @@ pub use crate::multilinear::rectilinear::check_bounds; /// * O(4^ndims) for interpolation and extrapolation in all regions. /// /// Memory Complexity -/// * Peak stack usage is O(N), which is minimally O(ndims). -/// * While evaluation is recursive, the recursion has constant -/// max depth of N, which provides a guarantee on peak -/// memory usage. +/// * Peak stack usage is O(4^ndims) for lower dimensions (unrolled), and O(N) otherwise. /// /// Timing /// * Timing determinism very tight, but is not exact due to the diff --git a/src/multicubic/regular.rs b/src/multicubic/regular.rs index 1ce30e9..872d60f 100644 --- a/src/multicubic/regular.rs +++ b/src/multicubic/regular.rs @@ -194,10 +194,7 @@ pub use crate::multilinear::regular::check_bounds; /// * O(4^ndims) for interpolation and extrapolation in all regions. /// /// Memory Complexity -/// * Peak stack usage is O(N), which is minimally O(ndims). -/// * While evaluation is recursive, the recursion has constant -/// max depth of N, which provides a guarantee on peak -/// memory usage. +/// * Peak stack usage is O(4^ndims) for lower dimensions (unrolled), and O(N) otherwise. /// /// Timing /// * Timing determinism very tight, but is not exact due to the diff --git a/src/multilinear/rectilinear.rs b/src/multilinear/rectilinear.rs index 817798e..cfd748d 100644 --- a/src/multilinear/rectilinear.rs +++ b/src/multilinear/rectilinear.rs @@ -35,8 +35,8 @@ use num_traits::Float; /// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). /// /// For 1-6 dimensions, a fast flattened method is used. For higher dimensions, where that flattening -/// becomes impractical due to compile times and instruction size, evaluation defers to a bounded -/// recursion. +/// becomes impractical due to compile times and instruction size, evaluation defers to a run-time +/// loop. /// /// This is a convenience function; best performance will be achieved by using the exact right /// number for the N parameter, as this will slightly reduce compute and storage overhead, @@ -132,10 +132,7 @@ pub fn check_bounds( /// * O(2^ndims) for interpolation and extrapolation in all regions. /// /// Memory Complexity -/// * Peak stack usage is O(N), which is minimally O(ndims). -/// * While evaluation is recursive, the recursion has constant -/// max depth of N, which provides a guarantee on peak -/// memory usage. +/// * Peak stack usage is O(2^ndims) for lower dimensions (unrolled), and O(N) otherwise. /// /// Timing /// * Timing determinism is very tight, but not guaranteed due to the use of a bisection search. diff --git a/src/multilinear/regular.rs b/src/multilinear/regular.rs index 8c36efb..34f8923 100644 --- a/src/multilinear/regular.rs +++ b/src/multilinear/regular.rs @@ -37,8 +37,8 @@ use num_traits::{Float, NumCast}; /// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). /// /// For 1-6 dimensions, a fast flattened method is used. For higher dimensions, where that flattening -/// becomes impractical due to compile times and instruction size, evaluation defers to a bounded -/// recursion. +/// becomes impractical due to compile times and instruction size, evaluation defers to a run-time +/// loop. /// /// This is a convenience function; best performance will be achieved by using the exact right /// number for the N parameter, as this will slightly reduce compute and storage overhead, @@ -151,10 +151,7 @@ pub fn check_bounds( /// * O(2^N) for interpolation and extrapolation in all regions. /// /// Memory Complexity -/// * Peak stack usage is O(N), which is minimally O(N). -/// * While evaluation is recursive, the recursion has constant -/// max depth of N, which provides a guarantee on peak -/// memory usage. +/// * Peak stack usage is O(2^ndims) for lower dimensions (unrolled), and O(N) otherwise. /// /// Timing /// * Timing determinism is guaranteed to the extent that floating-point calculation timing is consistent. diff --git a/src/nearest/rectilinear.rs b/src/nearest/rectilinear.rs index 8c0e19f..113537b 100644 --- a/src/nearest/rectilinear.rs +++ b/src/nearest/rectilinear.rs @@ -85,10 +85,7 @@ pub use crate::multilinear::rectilinear::check_bounds; /// * O(2^ndims) for interpolation and extrapolation in all regions. /// /// Memory Complexity -/// * Peak stack usage is O(N), which is minimally O(ndims). -/// * While evaluation is recursive, the recursion has constant -/// max depth of N, which provides a guarantee on peak -/// memory usage. +/// * Peak stack usage is O(N). /// /// Timing /// * Timing determinism is very tight, but not guaranteed due to the use of a bisection search. diff --git a/src/nearest/regular.rs b/src/nearest/regular.rs index fc595ff..2e28f48 100644 --- a/src/nearest/regular.rs +++ b/src/nearest/regular.rs @@ -95,10 +95,7 @@ pub use crate::multilinear::regular::check_bounds; /// * O(2^N) for interpolation and extrapolation in all regions. /// /// Memory Complexity -/// * Peak stack usage is O(N), which is minimally O(N). -/// * While evaluation is recursive, the recursion has constant -/// max depth of N, which provides a guarantee on peak -/// memory usage. +/// * Peak stack usage is O(N). /// /// Timing /// * Timing determinism is guaranteed to the extent that floating-point calculation timing is consistent. From c728567e7019752c8b91542f52d0ef6a67021fcb Mon Sep 17 00:00:00 2001 From: James Logan Date: Thu, 15 Jan 2026 21:52:51 -0500 Subject: [PATCH 15/16] cleanup --- src/lib.rs | 123 +++++++++++++++++---------------- src/multilinear/rectilinear.rs | 6 +- src/multilinear/regular.rs | 2 + src/nearest/rectilinear.rs | 3 + src/nearest/regular.rs | 2 + 5 files changed, 74 insertions(+), 62 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 63fc7bd..aca68b1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -90,22 +90,6 @@ use num_traits::Float; -#[doc(hidden)] -#[macro_export] -macro_rules! dispatch_ndims { - ($ndims:expr, $err:expr, [$($n:literal),+ $(,)?], |$N:ident| $body:expr $(,)?) => {{ - match $ndims { - $( - $n => { - const $N: usize = $n; - $body - } - )+ - _ => Err($err), - } - }}; -} - pub mod multilinear; pub use multilinear::{MultilinearRectilinear, MultilinearRegular}; @@ -174,49 +158,6 @@ const MAXDIMS_ERR: &str = "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions."; const MIN_CHUNK_SIZE: usize = 1024; -#[inline(always)] -pub(crate) fn validate_regular_grid( - dims: &[usize; N], - steps: &[T; N], - vals: &[T], -) -> Result<(), &'static str> { - let nvals: usize = dims.iter().product(); - if vals.len() != nvals { - return Err("Dimension mismatch"); - } - let degenerate = dims.iter().any(|&x| x < 2); - if degenerate { - return Err("All grids must have at least two entries"); - } - let steps_are_positive = steps.iter().all(|&x| x > T::zero()); - if !steps_are_positive { - return Err("All grids must be monotonically increasing"); - } - Ok(()) -} - -#[inline(always)] -pub(crate) fn validate_rectilinear_grid( - grids: &[&[T]; N], - vals: &[T], -) -> Result<[usize; N], &'static str> { - let mut dims = [1_usize; N]; - (0..N).for_each(|i| dims[i] = grids[i].len()); - let nvals: usize = dims.iter().product(); - if vals.len() != nvals { - return Err("Dimension mismatch"); - } - let degenerate = dims.iter().any(|&x| x < 2); - if degenerate { - return Err("All grids must have at least 2 entries"); - } - let monotonic_maybe = grids.iter().all(|&g| g[1] > g[0]); - if !monotonic_maybe { - return Err("All grids must be monotonically increasing"); - } - Ok(dims) -} - /// The number of physical cores present on the machine; /// initialized once, then never again, because each call involves some file I/O /// and allocations that can be slower than the function call that they support. @@ -596,3 +537,67 @@ pub(crate) fn index_arr_fixed_dims( data[i] } + +/// Light-weight regular grid checks. Doing a more complete validation +/// would break asymptotic scaling for evaluation. +#[inline(always)] +pub(crate) fn validate_regular_grid( + dims: &[usize; N], + steps: &[T; N], + vals: &[T], +) -> Result<(), &'static str> { + let nvals: usize = dims.iter().product(); + if vals.len() != nvals { + return Err("Dimension mismatch"); + } + let degenerate = dims.iter().any(|&x| x < 2); + if degenerate { + return Err("All grids must have at least two entries"); + } + let steps_are_positive = steps.iter().all(|&x| x > T::zero()); + if !steps_are_positive { + return Err("All grids must be monotonically increasing"); + } + Ok(()) +} + +/// Light-weight rectilinear grid checks. Doing a more complete validation +/// would break asymptotic scaling for evaluation. +#[inline(always)] +pub(crate) fn validate_rectilinear_grid( + grids: &[&[T]; N], + vals: &[T], +) -> Result<[usize; N], &'static str> { + let mut dims = [1_usize; N]; + (0..N).for_each(|i| dims[i] = grids[i].len()); + let nvals: usize = dims.iter().product(); + if vals.len() != nvals { + return Err("Dimension mismatch"); + } + let degenerate = dims.iter().any(|&x| x < 2); + if degenerate { + return Err("All grids must have at least 2 entries"); + } + let monotonic_maybe = grids.iter().all(|&g| g[1] > g[0]); + if !monotonic_maybe { + return Err("All grids must be monotonically increasing"); + } + Ok(dims) +} + +/// Helper for dispatching to a generic method matching the input dimensionality. +#[doc(hidden)] +#[macro_export] +macro_rules! dispatch_ndims { + ($ndims:expr, $err:expr, [$($n:literal),+ $(,)?], |$N:ident| $body:expr $(,)?) => {{ + match $ndims { + $( + $n => { + const $N: usize = $n; + $body + } + )+ + _ => Err($err), + } + }}; +} diff --git a/src/multilinear/rectilinear.rs b/src/multilinear/rectilinear.rs index cfd748d..0641c96 100644 --- a/src/multilinear/rectilinear.rs +++ b/src/multilinear/rectilinear.rs @@ -51,13 +51,13 @@ pub fn interpn( obs: &[&[T]], out: &mut [T], ) -> Result<(), &'static str> { - // Expanding out and using the specialized version for each size - // gives a substantial speedup for lower dimensionalities - // (4-5x speedup for 1-dim compared to using N=8) + // Check dimensionality let ndims = grids.len(); if grids.len() != ndims || obs.len() != ndims { return Err("Dimension mismatch"); } + + // Dispatch to specialized implementation crate::dispatch_ndims!( ndims, "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions.", diff --git a/src/multilinear/regular.rs b/src/multilinear/regular.rs index 34f8923..3695ebb 100644 --- a/src/multilinear/regular.rs +++ b/src/multilinear/regular.rs @@ -55,11 +55,13 @@ pub fn interpn( obs: &[&[T]], out: &mut [T], ) -> Result<(), &'static str> { + // Check dimensionality let ndims = dims.len(); if starts.len() != ndims || steps.len() != ndims || obs.len() != ndims { return Err("Dimension mismatch"); } + // Dispatch to specialized implementation crate::dispatch_ndims!( ndims, "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions.", diff --git a/src/nearest/rectilinear.rs b/src/nearest/rectilinear.rs index 113537b..8b1822b 100644 --- a/src/nearest/rectilinear.rs +++ b/src/nearest/rectilinear.rs @@ -41,10 +41,13 @@ pub fn interpn( obs: &[&[T]], out: &mut [T], ) -> Result<(), &'static str> { + // Check dimensionality let ndims = grids.len(); if grids.len() != ndims || obs.len() != ndims { return Err("Dimension mismatch"); } + + // Dispatch to specialized implementation crate::dispatch_ndims!( ndims, "Dimension exceeds maximum (8).", diff --git a/src/nearest/regular.rs b/src/nearest/regular.rs index 2e28f48..557dad1 100644 --- a/src/nearest/regular.rs +++ b/src/nearest/regular.rs @@ -45,11 +45,13 @@ pub fn interpn( obs: &[&[T]], out: &mut [T], ) -> Result<(), &'static str> { + // Check dimensionality let ndims = dims.len(); if starts.len() != ndims || steps.len() != ndims || obs.len() != ndims { return Err("Dimension mismatch"); } + // Dispatch to specialized implementation crate::dispatch_ndims!( ndims, "Dimension exceeds maximum (8).", From 3c862923e056283e1e8ef3d039fd5d54ead4adba Mon Sep 17 00:00:00 2001 From: James Logan Date: Thu, 15 Jan 2026 21:55:25 -0500 Subject: [PATCH 16/16] use dispatch_ndims for cubic methods --- src/multicubic/rectilinear.rs | 72 +++++++--------------------- src/multicubic/regular.rs | 90 +++++++---------------------------- 2 files changed, 34 insertions(+), 128 deletions(-) diff --git a/src/multicubic/rectilinear.rs b/src/multicubic/rectilinear.rs index d96b90b..cb963a6 100644 --- a/src/multicubic/rectilinear.rs +++ b/src/multicubic/rectilinear.rs @@ -55,63 +55,23 @@ pub fn interpn( obs: &[&[T]], out: &mut [T], ) -> Result<(), &'static str> { - // Expanding out and using the specialized version for each size - // gives a substantial speedup for lower dimensionalities - // (4-5x speedup for 1-dim compared to using N=8) + // Check dimensionality let ndims = grids.len(); - match ndims { - 1 => MulticubicRectilinear::<'_, T, 1>::new( - grids.try_into().unwrap(), - vals, - linearize_extrapolation, - )? - .interp(obs.try_into().unwrap(), out), - 2 => MulticubicRectilinear::<'_, T, 2>::new( - grids.try_into().unwrap(), - vals, - linearize_extrapolation, - )? - .interp(obs.try_into().unwrap(), out), - 3 => MulticubicRectilinear::<'_, T, 3>::new( - grids.try_into().unwrap(), - vals, - linearize_extrapolation, - )? - .interp(obs.try_into().unwrap(), out), - 4 => MulticubicRectilinear::<'_, T, 4>::new( - grids.try_into().unwrap(), - vals, - linearize_extrapolation, - )? - .interp(obs.try_into().unwrap(), out), - 5 => MulticubicRectilinear::<'_, T, 5>::new( - grids.try_into().unwrap(), - vals, - linearize_extrapolation, - )? - .interp(obs.try_into().unwrap(), out), - 6 => MulticubicRectilinear::<'_, T, 6>::new( - grids.try_into().unwrap(), - vals, - linearize_extrapolation, - )? - .interp(obs.try_into().unwrap(), out), - 7 => MulticubicRectilinear::<'_, T, 7>::new( - grids.try_into().unwrap(), - vals, - linearize_extrapolation, - )? - .interp(obs.try_into().unwrap(), out), - 8 => MulticubicRectilinear::<'_, T, 8>::new( - grids.try_into().unwrap(), - vals, - linearize_extrapolation, - )? - .interp(obs.try_into().unwrap(), out), - _ => Err( - "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions.", - ), - } + + // Dispatch to specialized implementation + crate::dispatch_ndims!( + ndims, + "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions.", + [1, 2, 3, 4, 5, 6, 7, 8], + |N| { + MulticubicRectilinear::<'_, T, N>::new( + grids.try_into().unwrap(), + vals, + linearize_extrapolation, + )? + .interp(obs.try_into().unwrap(), out) + } + ) } /// Evaluate interpolant, allocating a new Vec for the output. diff --git a/src/multicubic/regular.rs b/src/multicubic/regular.rs index 872d60f..32a1d31 100644 --- a/src/multicubic/regular.rs +++ b/src/multicubic/regular.rs @@ -58,79 +58,25 @@ pub fn interpn( obs: &[&[T]], out: &mut [T], ) -> Result<(), &'static str> { - // Expanding out and using the specialized version for each size - // gives a substantial speedup for lower dimensionalities - // (4-5x speedup for 1-dim compared to using N=8) + // Check dimensionality let ndims = dims.len(); - match ndims { - 1 => MulticubicRegular::<'_, T, 1>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - linearize_extrapolation, - )? - .interp(obs.try_into().unwrap(), out), - 2 => MulticubicRegular::<'_, T, 2>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - linearize_extrapolation, - )? - .interp(obs.try_into().unwrap(), out), - 3 => MulticubicRegular::<'_, T, 3>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - linearize_extrapolation, - )? - .interp(obs.try_into().unwrap(), out), - 4 => MulticubicRegular::<'_, T, 4>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - linearize_extrapolation, - )? - .interp(obs.try_into().unwrap(), out), - 5 => MulticubicRegular::<'_, T, 5>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - linearize_extrapolation, - )? - .interp(obs.try_into().unwrap(), out), - 6 => MulticubicRegular::<'_, T, 6>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - linearize_extrapolation, - )? - .interp(obs.try_into().unwrap(), out), - 7 => MulticubicRegular::<'_, T, 7>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - linearize_extrapolation, - )? - .interp(obs.try_into().unwrap(), out), - 8 => MulticubicRegular::<'_, T, 8>::new( - dims.try_into().unwrap(), - starts.try_into().unwrap(), - steps.try_into().unwrap(), - vals, - linearize_extrapolation, - )? - .interp(obs.try_into().unwrap(), out), - _ => Err( - "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions.", - ), - } + + // Dispatch to specialized implementation + crate::dispatch_ndims!( + ndims, + "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions.", + [1, 2, 3, 4, 5, 6, 7, 8], + |N| { + MulticubicRegular::<'_, T, N>::new( + dims.try_into().unwrap(), + starts.try_into().unwrap(), + steps.try_into().unwrap(), + vals, + linearize_extrapolation, + )? + .interp(obs.try_into().unwrap(), out) + } + ) } /// Evaluate interpolant, allocating a new Vec for the output.