From 5da02a35be781585587d3aa69d68f00aa4d6ec6e Mon Sep 17 00:00:00 2001 From: James Logan Date: Thu, 8 Jan 2026 18:46:41 -0500 Subject: [PATCH 01/20] add top level interpn function --- Cargo.lock | 54 +++++++++++++++++++++--- Cargo.toml | 12 +++--- src/lib.rs | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 175 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 461a64f..25a0e27 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "alloca" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4" +dependencies = [ + "cc", +] + [[package]] name = "anes" version = "0.1.6" @@ -111,10 +120,11 @@ checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" [[package]] name = "criterion" -version = "0.7.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1c047a62b0cc3e145fa84415a3191f628e980b194c2755aa12300a4e6cbd928" +checksum = "4d883447757bb0ee46f233e9dc22eb84d93a9508c9b868687b274fc431d886bf" dependencies = [ + "alloca", "anes", "cast", "ciborium", @@ -123,6 +133,7 @@ dependencies = [ "itertools 0.13.0", "num-traits", "oorandom", + "page_size", "plotters", "rayon", "regex", @@ -134,9 +145,9 @@ dependencies = [ [[package]] name = "criterion-plot" -version = "0.6.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b1bcc0dc7dfae599d84ad0b1a55f80cde8af3725da8313b528da95ef783e338" +checksum = "ed943f81ea2faa8dcecbbfa50164acf95d555afec96a27871663b300e387b2e4" dependencies = [ "cast", "itertools 0.13.0", @@ -221,7 +232,7 @@ checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" [[package]] name = "interpn" -version = "0.9.1" +version = "0.9.2" dependencies = [ "criterion", "crunchy", @@ -231,6 +242,7 @@ dependencies = [ "numpy", "pyo3", "rand", + "rayon", ] [[package]] @@ -381,6 +393,16 @@ version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" +[[package]] +name = "page_size" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "plotters" version = "0.3.7" @@ -824,6 +846,22 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.11" @@ -833,6 +871,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-link" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index 5ddb9a4..125f39f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "interpn" -version = "0.9.1" +version = "0.9.2" edition = "2024" rust-version = "1.87" # 2025-05-15 authors = ["James Logan "] @@ -17,8 +17,9 @@ crate-type = ["cdylib", "rlib"] [dependencies] # Rust lib deps -num-traits = { version = "0.2.19", default-features = false, features = ["libm"] } -crunchy = { version = "0.2.4", default-features = false } +num-traits = { version = "^0.2.19", default-features = false, features = ["libm"] } +crunchy = { version = "^0.2.4", default-features = false } +rayon = { version = "^1.11.0", optional = true } # Python bindings pyo3 = { version = "0.27.2", features = ["extension-module", "abi3-py310", "generate-import-lib"], optional = true } @@ -29,15 +30,16 @@ itertools = { version = "0.14.0", optional = true } [dev-dependencies] rand = "0.9.2" -criterion = "0.7.0" +criterion = "0.8.1" ndarray = "0.17.1" [features] -default = ["std", "crunchy/limit_64"] +default = ["std", "crunchy/limit_64", "par"] python = ["numpy", "pyo3", "std"] std = ["itertools"] deep-unroll = ["crunchy/limit_256"] fma = [] +par = ["std", "rayon"] [profile.release] opt-level = 3 diff --git a/src/lib.rs b/src/lib.rs index 88d8cab..da481a0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -88,12 +88,24 @@ // expanded code that is entirely in const. #![allow(clippy::absurd_extreme_comparisons)] +use num_traits::Float; + pub mod multilinear; pub use multilinear::{MultilinearRectilinear, MultilinearRegular}; pub mod multicubic; pub use multicubic::{MulticubicRectilinear, MulticubicRegular}; +pub mod linear { + pub use crate::multilinear::rectilinear; + pub use crate::multilinear::regular; +} + +pub mod cubic { + pub use crate::multicubic::rectilinear; + pub use crate::multicubic::regular; +} + pub mod nearest; pub use nearest::{NearestRectilinear, NearestRegular}; @@ -112,6 +124,113 @@ pub(crate) mod testing; #[cfg(feature = "python")] pub mod python; +pub enum GridInterpMethod { + Linear, + Cubic, +} + +pub enum GridKind { + Regular, + Rectilinear, +} + +const MAXDIMS: usize = 8; +const MAXDIMS_ERR: &str = + "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions."; + +pub fn interpn( + grids: &[&[T]], + vals: &[T], + obs: &[&[T]], + out: &mut [T], + method: GridInterpMethod, + assume_grid_kind: Option, + linearize_extrapolation: bool, +) -> Result<(), &'static str> { + let ndims = grids.len(); + if ndims > MAXDIMS { + return Err(MAXDIMS_ERR); + } + + let kind = match assume_grid_kind { + Some(GridKind::Regular) => GridKind::Regular, + Some(GridKind::Rectilinear) => GridKind::Rectilinear, + None => { + // Check whether grid is regular + let mut is_regular = true; + + for grid in grids.iter() { + if grid.len() < 2 { + return Err("All grids must have at least two entries"); + } + let step = grid[1] - grid[0]; + + if !grid.windows(2).all(|pair| pair[1] - pair[0] == step) { + is_regular = false; + break; + } + } + + if is_regular { + GridKind::Regular + } else { + GridKind::Rectilinear + } + } + }; + + // Extract regular grid params + let get_regular_grid = || { + let mut dims = [0_usize; MAXDIMS]; + let mut starts = [T::zero(); MAXDIMS]; + let mut steps = [T::zero(); MAXDIMS]; + + for (i, grid) in grids.iter().enumerate() { + if grid.len() < 2 { + return Err("All grids must have at least two entries"); + } + dims[i] = grid.len(); + starts[i] = grid[0]; + steps[i] = grid[1] - grid[0]; + } + + Ok((dims, starts, steps)) + }; + + // Select lower-level method + match (method, kind) { + (GridInterpMethod::Linear, GridKind::Regular) => { + let (dims, starts, steps) = get_regular_grid()?; + linear::regular::interpn( + &dims[..ndims], + &starts[..ndims], + &steps[..ndims], + vals, + obs, + out, + ) + } + (GridInterpMethod::Linear, GridKind::Rectilinear) => { + linear::rectilinear::interpn(grids, vals, obs, out) + } + (GridInterpMethod::Cubic, GridKind::Regular) => { + let (dims, starts, steps) = get_regular_grid()?; + cubic::regular::interpn( + &dims[..ndims], + &starts[..ndims], + &steps[..ndims], + vals, + linearize_extrapolation, + obs, + out, + ) + } + (GridInterpMethod::Cubic, GridKind::Rectilinear) => { + cubic::rectilinear::interpn(grids, vals, linearize_extrapolation, obs, out) + } + } +} + /// Index a single value from an array #[inline] pub(crate) fn index_arr(loc: &[usize], dimprod: &[usize], data: &[T]) -> T { From 1438e5770f6d09d632582d51e1a3574ce5cbef66 Mon Sep 17 00:00:00 2001 From: James Logan Date: Thu, 8 Jan 2026 19:48:02 -0500 Subject: [PATCH 02/20] first cut at parallel implementation --- src/lib.rs | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 88 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index da481a0..aacc2ee 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -115,6 +115,15 @@ pub use one_dim::{ linear::Linear1D, linear::LinearHoldLast1D, }; +#[cfg(feature = "par")] +use rayon::{ + iter::{IndexedParallelIterator, ParallelIterator}, + slice::ParallelSliceMut, +}; + +#[cfg(feature = "par")] +use std::sync::Mutex; + #[cfg(feature = "std")] pub mod utils; @@ -124,13 +133,19 @@ pub(crate) mod testing; #[cfg(feature = "python")] pub mod python; +/// Interpolant function for multi-dimensional methods. +#[derive(Clone, Copy)] pub enum GridInterpMethod { Linear, Cubic, } +/// Grid spacing category for multi-dimensional methods. +#[derive(Clone, Copy)] pub enum GridKind { + /// Evenly-spaced points along each axis. Regular, + /// Un-evenly spaced points along each axis. Rectilinear, } @@ -138,7 +153,79 @@ const MAXDIMS: usize = 8; const MAXDIMS_ERR: &str = "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions."; -pub fn interpn( +#[cfg(feature = "par")] +pub fn interpn( + grids: &[&[T]], + vals: &[T], + obs: &[&[T]], + out: &mut [T], + method: GridInterpMethod, + assume_grid_kind: Option, + linearize_extrapolation: bool, + max_threads: Option, +) -> Result<(), &'static str> { + let ndims = grids.len(); + if ndims > MAXDIMS { + return Err(MAXDIMS_ERR); + } + + // Chunk for parallelism + let num_cores = rayon::current_num_threads() + .max(1) + .min(max_threads.unwrap_or(usize::MAX)); + let n = out.len(); + let chunk = 1024.max(n / num_cores); + + // Make a shared error indicator + let result: Mutex> = Mutex::new(None); + let write_err = |msg: &'static str| { + let mut guard = result.lock().unwrap(); + if guard.is_none() { + *guard = Some(msg); + } + }; + + // Run threaded + out.par_chunks_mut(chunk).enumerate().for_each(|(i, outc)| { + // Calculate the start and end of observation point chunks + let start = chunk * i; + let end = start + outc.len(); + + // Chunk observation points + let mut obs_slices: [&[T]; 8] = [&[]; 8]; + for (j, o) in obs.iter().enumerate() { + let s = &o.get(start..end); + match s { + Some(s) => obs_slices[j] = s, + None => write_err("Dimension mismatch"), + }; + } + + // Do interpolations + let res_inner = interpn_serial( + grids, + vals, + &obs_slices[..ndims], + outc, + method, + assume_grid_kind, + linearize_extrapolation, + ); + + match res_inner { + Ok(()) => {} + Err(msg) => write_err(msg), + } + }); + + // Handle errors from threads + match *result.lock().unwrap() { + Some(msg) => Err(msg), + None => Ok(()), + } +} + +pub fn interpn_serial( grids: &[&[T]], vals: &[T], obs: &[&[T]], @@ -251,11 +338,8 @@ pub(crate) fn index_arr_fixed_dims( ) -> T { let mut i = 0; - // unroll! { - // for j < 7 in 0..N { for j in 0..N { i += loc[j] * dimprod[j]; - // } } data[i] From 86a2c3b85e6761a5b1193538d3f13f3a14b24c8e Mon Sep 17 00:00:00 2001 From: James Logan Date: Thu, 8 Jan 2026 20:27:04 -0500 Subject: [PATCH 03/20] raw python bindings for new method --- src/python.rs | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/src/python.rs b/src/python.rs index 165d684..d27d86b 100644 --- a/src/python.rs +++ b/src/python.rs @@ -5,6 +5,7 @@ use pyo3::prelude::*; use crate::multicubic; use crate::multilinear; use crate::nearest; +use crate::{GridInterpMethod, GridKind}; /// Maximum number of dimensions for linear interpn convenience methods const MAXDIMS: usize = 8; @@ -35,6 +36,9 @@ fn interpn<'py>(_py: Python, m: &Bound<'py, PyModule>) -> PyResult<()> { // Multicubic with rectilinear grid m.add_function(wrap_pyfunction!(interpn_cubic_rectilinear_f64, m)?)?; m.add_function(wrap_pyfunction!(interpn_cubic_rectilinear_f32, m)?)?; + // Top-level interpn dispatch + m.add_function(wrap_pyfunction!(interpn_f64, m)?)?; + m.add_function(wrap_pyfunction!(interpn_f32, m)?)?; Ok(()) } @@ -290,3 +294,66 @@ macro_rules! interpn_cubic_rectilinear_impl { interpn_cubic_rectilinear_impl!(interpn_cubic_rectilinear_f64, f64); interpn_cubic_rectilinear_impl!(interpn_cubic_rectilinear_f32, f32); + +fn parse_grid_interp_method(method: &str) -> Result { + match method.to_ascii_lowercase().as_str() { + "linear" => Ok(GridInterpMethod::Linear), + "cubic" => Ok(GridInterpMethod::Cubic), + _ => Err(exceptions::PyValueError::new_err( + "`method` must be 'linear' or 'cubic'", + )), + } +} + +fn parse_grid_kind(grid_kind: Option<&str>) -> Result, PyErr> { + match grid_kind.map(|kind| kind.to_ascii_lowercase()) { + None => Ok(None), + Some(kind) => match kind.as_str() { + "regular" => Ok(Some(GridKind::Regular)), + "rectilinear" => Ok(Some(GridKind::Rectilinear)), + _ => Err(exceptions::PyValueError::new_err( + "`grid_kind` must be 'regular', 'rectilinear', or None", + )), + }, + } +} + +macro_rules! interpn_top_impl { + ($funcname:ident, $T:ty) => { + #[pyfunction] + #[pyo3(signature = (grids, vals, obs, out, method="linear", grid_kind=None, linearize_extrapolation=true, max_threads=None))] + fn $funcname( + grids: Vec>, + vals: PyReadonlyArray1<$T>, + obs: Vec>, + mut out: PyReadwriteArray1<$T>, + method: &str, + grid_kind: Option<&str>, + linearize_extrapolation: bool, + max_threads: Option, + ) -> PyResult<()> { + unpack_vec_of_arr!(grids, grids, $T); + unpack_vec_of_arr!(obs, obs, $T); + + let method = parse_grid_interp_method(method)?; + let grid_kind = parse_grid_kind(grid_kind)?; + + match crate::interpn( + grids, + vals.as_slice()?, + obs, + out.as_slice_mut()?, + method, + grid_kind, + linearize_extrapolation, + max_threads, + ) { + Ok(()) => Ok(()), + Err(msg) => Err(exceptions::PyAssertionError::new_err(msg)), + } + } + }; +} + +interpn_top_impl!(interpn_f64, f64); +interpn_top_impl!(interpn_f32, f32); From 1987ff154f9bd9917b69d36f68927377e9b84d89 Mon Sep 17 00:00:00 2001 From: James Logan Date: Thu, 8 Jan 2026 21:11:07 -0500 Subject: [PATCH 04/20] add bounds checks --- src/lib.rs | 33 +++++++++++++++++++++++++++++++++ src/python.rs | 4 +++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index aacc2ee..e632773 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -162,6 +162,7 @@ pub fn interpn( method: GridInterpMethod, assume_grid_kind: Option, linearize_extrapolation: bool, + check_bounds_with_atol: Option, max_threads: Option, ) -> Result<(), &'static str> { let ndims = grids.len(); @@ -210,6 +211,7 @@ pub fn interpn( method, assume_grid_kind, linearize_extrapolation, + check_bounds_with_atol, ); match res_inner { @@ -233,6 +235,7 @@ pub fn interpn_serial( method: GridInterpMethod, assume_grid_kind: Option, linearize_extrapolation: bool, + check_bounds_with_atol: Option, ) -> Result<(), &'static str> { let ndims = grids.len(); if ndims > MAXDIMS { @@ -284,10 +287,37 @@ pub fn interpn_serial( Ok((dims, starts, steps)) }; + // Bounds checks for regular grid, if requested + let maybe_check_bounds_regular = |dims: &[usize], starts: &[T], steps: &[T], obs: &[&[T]]| { + if let Some(atol) = check_bounds_with_atol { + let mut bounds = [false; MAXDIMS]; + let out = &mut bounds[..ndims]; + multilinear::regular::check_bounds( + &dims[..ndims], + &starts[..ndims], + &steps[..ndims], + obs, + atol, + out, + )? + } + Ok(()) + }; + + let maybe_check_bounds_rectilinear = |grids, obs| { + if let Some(atol) = check_bounds_with_atol { + let mut bounds = [false; MAXDIMS]; + let out = &mut bounds[..ndims]; + multilinear::rectilinear::check_bounds(grids, obs, atol, out)? + } + Ok(()) + }; + // Select lower-level method match (method, kind) { (GridInterpMethod::Linear, GridKind::Regular) => { let (dims, starts, steps) = get_regular_grid()?; + maybe_check_bounds_regular(&dims, &starts, &steps, obs)?; linear::regular::interpn( &dims[..ndims], &starts[..ndims], @@ -298,10 +328,12 @@ pub fn interpn_serial( ) } (GridInterpMethod::Linear, GridKind::Rectilinear) => { + maybe_check_bounds_rectilinear(grids, obs)?; linear::rectilinear::interpn(grids, vals, obs, out) } (GridInterpMethod::Cubic, GridKind::Regular) => { let (dims, starts, steps) = get_regular_grid()?; + maybe_check_bounds_regular(&dims, &starts, &steps, obs)?; cubic::regular::interpn( &dims[..ndims], &starts[..ndims], @@ -313,6 +345,7 @@ pub fn interpn_serial( ) } (GridInterpMethod::Cubic, GridKind::Rectilinear) => { + maybe_check_bounds_rectilinear(grids, obs)?; cubic::rectilinear::interpn(grids, vals, linearize_extrapolation, obs, out) } } diff --git a/src/python.rs b/src/python.rs index d27d86b..dcd7beb 100644 --- a/src/python.rs +++ b/src/python.rs @@ -321,7 +321,7 @@ fn parse_grid_kind(grid_kind: Option<&str>) -> Result, PyErr> { macro_rules! interpn_top_impl { ($funcname:ident, $T:ty) => { #[pyfunction] - #[pyo3(signature = (grids, vals, obs, out, method="linear", grid_kind=None, linearize_extrapolation=true, max_threads=None))] + #[pyo3(signature = (grids, vals, obs, out, method="linear", grid_kind=None, linearize_extrapolation=true, check_bounds_with_atol=None, max_threads=None))] fn $funcname( grids: Vec>, vals: PyReadonlyArray1<$T>, @@ -330,6 +330,7 @@ macro_rules! interpn_top_impl { method: &str, grid_kind: Option<&str>, linearize_extrapolation: bool, + check_bounds_with_atol: Option<$T>, max_threads: Option, ) -> PyResult<()> { unpack_vec_of_arr!(grids, grids, $T); @@ -346,6 +347,7 @@ macro_rules! interpn_top_impl { method, grid_kind, linearize_extrapolation, + check_bounds_with_atol, max_threads, ) { Ok(()) => Ok(()), From adf07fbbfce5e7923798d29ce021caf9abcef191 Mon Sep 17 00:00:00 2001 From: James Logan Date: Thu, 8 Jan 2026 21:31:37 -0500 Subject: [PATCH 05/20] add nearest methods --- src/lib.rs | 23 +++++++++++++++++++++++ src/python.rs | 3 ++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index e632773..5dd259f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -136,8 +136,12 @@ pub mod python; /// Interpolant function for multi-dimensional methods. #[derive(Clone, Copy)] pub enum GridInterpMethod { + /// Multi-linear interpolation. Linear, + /// Cubic Hermite spline interpolation. Cubic, + /// Nearest-neighbor interpolation. + Nearest, } /// Grid spacing category for multi-dimensional methods. @@ -242,6 +246,8 @@ pub fn interpn_serial( return Err(MAXDIMS_ERR); } + // Resolve grid kind, checking the grid if + // the kind is not provided by the user. let kind = match assume_grid_kind { Some(GridKind::Regular) => GridKind::Regular, Some(GridKind::Rectilinear) => GridKind::Rectilinear, @@ -304,6 +310,7 @@ pub fn interpn_serial( Ok(()) }; + // Bounds checks for rectilinear grid, if requested let maybe_check_bounds_rectilinear = |grids, obs| { if let Some(atol) = check_bounds_with_atol { let mut bounds = [false; MAXDIMS]; @@ -348,6 +355,22 @@ pub fn interpn_serial( maybe_check_bounds_rectilinear(grids, obs)?; cubic::rectilinear::interpn(grids, vals, linearize_extrapolation, obs, out) } + (GridInterpMethod::Nearest, GridKind::Regular) => { + let (dims, starts, steps) = get_regular_grid()?; + maybe_check_bounds_regular(&dims, &starts, &steps, obs)?; + nearest::regular::interpn( + &dims[..ndims], + &starts[..ndims], + &steps[..ndims], + vals, + obs, + out, + ) + } + (GridInterpMethod::Nearest, GridKind::Rectilinear) => { + maybe_check_bounds_rectilinear(grids, obs)?; + nearest::rectilinear::interpn(grids, vals, obs, out) + } } } diff --git a/src/python.rs b/src/python.rs index dcd7beb..2a3d3c9 100644 --- a/src/python.rs +++ b/src/python.rs @@ -299,8 +299,9 @@ fn parse_grid_interp_method(method: &str) -> Result { match method.to_ascii_lowercase().as_str() { "linear" => Ok(GridInterpMethod::Linear), "cubic" => Ok(GridInterpMethod::Cubic), + "nearest" => Ok(GridInterpMethod::Nearest), _ => Err(exceptions::PyValueError::new_err( - "`method` must be 'linear' or 'cubic'", + "`method` must be 'linear', 'cubic', or 'nearest'", )), } } From d23c0a2a5c7f1ddacefd35ab0bb5df272803f8a5 Mon Sep 17 00:00:00 2001 From: James Logan Date: Thu, 8 Jan 2026 21:50:24 -0500 Subject: [PATCH 06/20] use rust top level interpn function for python top level interpn function. update tests. --- src/interpn/__init__.py | 122 +++++++++------------------------------- src/interpn/raw.py | 4 ++ src/interpn/raw.pyi | 25 ++++++++ src/lib.rs | 10 +++- 4 files changed, 64 insertions(+), 97 deletions(-) diff --git a/src/interpn/__init__.py b/src/interpn/__init__.py index fd2db38..54c4b89 100644 --- a/src/interpn/__init__.py +++ b/src/interpn/__init__.py @@ -56,17 +56,16 @@ def interpn( method: Literal["linear", "cubic", "nearest"] = "linear", out: NDArray | None = None, linearize_extrapolation: bool = True, - assume_regular: bool = False, - check_bounds: bool = False, - bounds_atol: float = 1e-8, + grid_kind: Literal["regular", "rectilinear"] | None = None, + check_bounds_with_atol: float | None = None, + max_threads: int | None = None, ) -> NDArray: """ Evaluate an N-dimensional grid at the supplied observation points. - Performs some small allocations to prepare the inputs and - performs O(gridsize) checks to determine grid regularity - unless `assume_regular` is set. To avoid this overhead entirely, - use the persistent wrapper classes or raw bindings instead. + Performs some small allocations to prepare the inputs. + Use the persistent wrapper classes or raw bindings instead + to avoid this overhead entirely. Reallocates input arrays if and only if they are not contiguous yet. @@ -78,9 +77,11 @@ def interpn( out: Optional preallocated array that receives the result. linearize_extrapolation: Whether cubic extrapolation should fall back to linear behaviour outside the grid bounds. - assume_regular: Treat the grid as regular without checking spacing. - check_bounds: When True, raise if any observation lies outside the grid. - bounds_atol: Absolute tolerance for bounds checks, to avoid spurious errors + grid_kind: Optional ``"regular"`` or ``"rectilinear"`` to skip + grid-shape autodetection. + check_bounds_with_atol: When set, raise if any observation lies outside + the grid by more than this absolute tolerance. + max_threads: Optional upper bound for parallel execution threads. Returns: Interpolated values @@ -101,106 +102,37 @@ def interpn( "`interpn` defined only for float32 and float64 data" ) - # Check regularity - is_regular = assume_regular or _check_regular(grids) - - if is_regular: - dims = np.array([len(grid) for grid in grids], dtype=int) - starts = np.array([grid[0] for grid in grids], dtype=dtype) - steps = np.array([grid[1] - grid[0] for grid in grids], dtype=dtype) - else: - # Pyright doesn't understand match-case - dims = np.empty((0,), dtype=int) - starts = np.empty((0,), dtype=dtype) - steps = starts - - # Check bounds - if check_bounds: - outb = np.zeros_like(out.shape, dtype=bool) - match (dtype, is_regular): - case (np.float32, True): - raw.check_bounds_regular_f32( - dims, starts, steps, obs, atol=bounds_atol, out=outb - ) - case (np.float64, True): - raw.check_bounds_regular_f64( - dims, starts, steps, obs, atol=bounds_atol, out=outb - ) - case (np.float32, False): - raw.check_bounds_rectilinear_f32(grids, obs, atol=bounds_atol, out=outb) - case (np.float64, False): - raw.check_bounds_rectilinear_f64(grids, obs, atol=bounds_atol, out=outb) - - if any(outb): - raise ValueError("Observation points violate interpolator bounds") - # Do interpolation - match (dtype, is_regular, method): - case (np.float32, True, "linear"): - raw.interpn_linear_regular_f32(dims, starts, steps, vals, obs, out) - case (np.float64, True, "linear"): - raw.interpn_linear_regular_f64(dims, starts, steps, vals, obs, out) - case (np.float32, False, "linear"): - raw.interpn_linear_rectilinear_f32(grids, vals, obs, out) - case (np.float64, False, "linear"): - raw.interpn_linear_rectilinear_f64(grids, vals, obs, out) - case (np.float32, True, "nearest"): - raw.interpn_nearest_regular_f32(dims, starts, steps, vals, obs, out) - case (np.float64, True, "nearest"): - raw.interpn_nearest_regular_f64(dims, starts, steps, vals, obs, out) - case (np.float32, False, "nearest"): - raw.interpn_nearest_rectilinear_f32(grids, vals, obs, out) - case (np.float64, False, "nearest"): - raw.interpn_nearest_rectilinear_f64(grids, vals, obs, out) - case (np.float32, True, "cubic"): - raw.interpn_cubic_regular_f32( - dims, - starts, - steps, - vals, - linearize_extrapolation, - obs, - out, - ) - case (np.float64, True, "cubic"): - raw.interpn_cubic_regular_f64( - dims, - starts, - steps, - vals, - linearize_extrapolation, - obs, - out, - ) - case (np.float32, False, "cubic"): - raw.interpn_cubic_rectilinear_f32( + match dtype: + case np.float32: + raw.interpn_f32( grids, vals, - linearize_extrapolation, obs, out, + method=method, + grid_kind=grid_kind, + linearize_extrapolation=linearize_extrapolation, + check_bounds_with_atol=check_bounds_with_atol, + max_threads=max_threads, ) - case (np.float64, False, "cubic"): - raw.interpn_cubic_rectilinear_f64( + case np.float64: + raw.interpn_f64( grids, vals, - linearize_extrapolation, obs, out, + method=method, + grid_kind=grid_kind, + linearize_extrapolation=linearize_extrapolation, + check_bounds_with_atol=check_bounds_with_atol, + max_threads=max_threads, ) case _: raise ValueError( "Unsupported interpolation configuration:" - f" {dtype}, {is_regular}, {method}" + f" {dtype}, {method}" ) return out.reshape(outshape) - -def _check_regular(grids: Sequence[NDArray]) -> bool: - """Check if grids are all regularly spaced""" - is_regular = True - for grid in grids: - dgrid = np.diff(grid) - is_regular = is_regular and np.all(dgrid == dgrid[0]) - return bool(is_regular) diff --git a/src/interpn/raw.py b/src/interpn/raw.py index 1d72221..3b80dac 100644 --- a/src/interpn/raw.py +++ b/src/interpn/raw.py @@ -4,6 +4,8 @@ """ from .interpn import ( + interpn_f64, + interpn_f32, interpn_linear_regular_f64, interpn_linear_regular_f32, interpn_linear_rectilinear_f64, @@ -23,6 +25,8 @@ ) __all__ = [ + "interpn_f64", + "interpn_f32", "interpn_linear_regular_f64", "interpn_linear_regular_f32", "interpn_linear_rectilinear_f64", diff --git a/src/interpn/raw.pyi b/src/interpn/raw.pyi index af7f424..256ab30 100644 --- a/src/interpn/raw.pyi +++ b/src/interpn/raw.pyi @@ -11,6 +11,8 @@ BoolArray = NDArray[np.bool_] IntArray = NDArray[np.intp] __all__ = [ + "interpn_f64", + "interpn_f32", "interpn_linear_regular_f64", "interpn_linear_regular_f32", "interpn_linear_rectilinear_f64", @@ -29,6 +31,29 @@ __all__ = [ "check_bounds_rectilinear_f32", ] +def interpn_f64( + grids: Sequence[NDArrayF64], + vals: NDArrayF64, + obs: Sequence[NDArrayF64], + out: NDArrayF64, + method: str = "linear", + grid_kind: str | None = None, + linearize_extrapolation: bool = True, + check_bounds_with_atol: float | None = None, + max_threads: int | None = None, +) -> None: ... +def interpn_f32( + grids: Sequence[NDArrayF32], + vals: NDArrayF32, + obs: Sequence[NDArrayF32], + out: NDArrayF32, + method: str = "linear", + grid_kind: str | None = None, + linearize_extrapolation: bool = True, + check_bounds_with_atol: float | None = None, + max_threads: int | None = None, +) -> None: ... + def interpn_linear_regular_f64( dims: IntArray, starts: NDArrayF64, diff --git a/src/lib.rs b/src/lib.rs index 5dd259f..35994b8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -305,7 +305,10 @@ pub fn interpn_serial( obs, atol, out, - )? + )?; + if bounds.iter().any(|x| *x) { + return Err("At least one observation point is outside the grid.") + } } Ok(()) }; @@ -315,7 +318,10 @@ pub fn interpn_serial( if let Some(atol) = check_bounds_with_atol { let mut bounds = [false; MAXDIMS]; let out = &mut bounds[..ndims]; - multilinear::rectilinear::check_bounds(grids, obs, atol, out)? + multilinear::rectilinear::check_bounds(grids, obs, atol, out)?; + if bounds.iter().any(|x| *x) { + return Err("At least one observation point is outside the grid.") + } } Ok(()) }; From ce69fdff3270bc79ab24af5165c5bee8dc27e2e3 Mon Sep 17 00:00:00 2001 From: James Logan Date: Thu, 8 Jan 2026 21:55:57 -0500 Subject: [PATCH 07/20] update clamping order for number of cores --- src/lib.rs | 4 ++-- test/test_interpn.py | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 35994b8..1282de2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -176,8 +176,8 @@ pub fn interpn( // Chunk for parallelism let num_cores = rayon::current_num_threads() - .max(1) - .min(max_threads.unwrap_or(usize::MAX)); + .min(max_threads.unwrap_or(usize::MAX)) + .max(1); let n = out.len(); let chunk = 1024.max(n / num_cores); diff --git a/test/test_interpn.py b/test/test_interpn.py index 989731b..cee796c 100644 --- a/test/test_interpn.py +++ b/test/test_interpn.py @@ -17,17 +17,17 @@ def test_interpn_check_bounds_regular(dtype): grids=[grid], vals=vals, method="linear", - check_bounds=True, + check_bounds_with_atol=1e-8, ) assert inside.shape == obs_inside[0].shape - with pytest.raises(ValueError): + with pytest.raises(AssertionError): interpn( obs=obs_outside, grids=[grid], vals=vals, method="linear", - check_bounds=True, + check_bounds_with_atol=1e-8, ) @@ -44,15 +44,15 @@ def test_interpn_check_bounds_rectilinear(dtype): grids=[grid], vals=vals, method="linear", - check_bounds=True, + check_bounds_with_atol=1e-8, ) assert inside.shape == obs_inside[0].shape - with pytest.raises(ValueError): + with pytest.raises(AssertionError): interpn( obs=obs_outside, grids=[grid], vals=vals, method="linear", - check_bounds=True, + check_bounds_with_atol=1e-8, ) From afbe98a00b8565ab131d44d88f1ece291461ed73 Mon Sep 17 00:00:00 2001 From: James Logan Date: Thu, 8 Jan 2026 22:08:48 -0500 Subject: [PATCH 08/20] parametrize parallelism in tests --- test/test_interpn.py | 10 ++++++++-- test/test_multicubic_rectilinear.py | 5 ++++- test/test_multicubic_regular.py | 5 ++++- test/test_multilinear_rectilinear.py | 5 ++++- test/test_multilinear_regular.py | 5 ++++- test/test_nearest_rectilinear.py | 5 ++++- test/test_nearest_regular.py | 5 ++++- 7 files changed, 32 insertions(+), 8 deletions(-) diff --git a/test/test_interpn.py b/test/test_interpn.py index cee796c..0f185f8 100644 --- a/test/test_interpn.py +++ b/test/test_interpn.py @@ -5,7 +5,8 @@ @pytest.mark.parametrize("dtype", [np.float64, np.float32]) -def test_interpn_check_bounds_regular(dtype): +@pytest.mark.parametrize("max_threads", [None, 1], ids=["parallel", "serial"]) +def test_interpn_check_bounds_regular(dtype, max_threads): grid = np.linspace(-1.0, 1.0, 5).astype(dtype) vals = np.linspace(0.0, 10.0, grid.size).astype(dtype) @@ -18,6 +19,7 @@ def test_interpn_check_bounds_regular(dtype): vals=vals, method="linear", check_bounds_with_atol=1e-8, + max_threads=max_threads, ) assert inside.shape == obs_inside[0].shape @@ -28,11 +30,13 @@ def test_interpn_check_bounds_regular(dtype): vals=vals, method="linear", check_bounds_with_atol=1e-8, + max_threads=max_threads, ) @pytest.mark.parametrize("dtype", [np.float64, np.float32]) -def test_interpn_check_bounds_rectilinear(dtype): +@pytest.mark.parametrize("max_threads", [None, 1], ids=["parallel", "serial"]) +def test_interpn_check_bounds_rectilinear(dtype, max_threads): grid = np.array([-1.0, -0.25, 0.5, 2.0], dtype=dtype) vals = np.linspace(0.0, 10.0, grid.size).astype(dtype) @@ -45,6 +49,7 @@ def test_interpn_check_bounds_rectilinear(dtype): vals=vals, method="linear", check_bounds_with_atol=1e-8, + max_threads=max_threads, ) assert inside.shape == obs_inside[0].shape @@ -55,4 +60,5 @@ def test_interpn_check_bounds_rectilinear(dtype): vals=vals, method="linear", check_bounds_with_atol=1e-8, + max_threads=max_threads, ) diff --git a/test/test_multicubic_rectilinear.py b/test/test_multicubic_rectilinear.py index 65d5b7f..61313dc 100644 --- a/test/test_multicubic_rectilinear.py +++ b/test/test_multicubic_rectilinear.py @@ -1,8 +1,10 @@ import numpy as np +import pytest import interpn -def test_multilinear_rectilinear(): +@pytest.mark.parametrize("max_threads", [None, 1], ids=["parallel", "serial"]) +def test_multilinear_rectilinear(max_threads): for dtype in [np.float64, np.float32]: x = np.linspace(0.0, 10.0, 5).astype(dtype) y = np.linspace(20.0, 30.0, 4).astype(dtype) @@ -48,6 +50,7 @@ def test_multilinear_rectilinear(): vals=zgrid.flatten(), method="cubic", linearize_extrapolation=False, + max_threads=max_threads, ) for i in range(out_helper.size): assert out_helper[i] == zf[i] diff --git a/test/test_multicubic_regular.py b/test/test_multicubic_regular.py index 54cfa4a..8989f42 100644 --- a/test/test_multicubic_regular.py +++ b/test/test_multicubic_regular.py @@ -1,8 +1,10 @@ import numpy as np +import pytest import interpn -def test_multicubic_regular(): +@pytest.mark.parametrize("max_threads", [None, 1], ids=["parallel", "serial"]) +def test_multicubic_regular(max_threads): for dtype, tol in [(np.float64, 1e-12), (np.float32, 1e-6)]: x = np.linspace(0.0, 10.0, 7).astype(dtype) y = np.linspace(20.0, 30.0, 5).astype(dtype) @@ -55,6 +57,7 @@ def test_multicubic_regular(): vals=zgrid.flatten(), method="cubic", linearize_extrapolation=False, + max_threads=max_threads, ) for i in range(out_helper.size): assert approx(out_helper[i], zf[i], dtype(tol)) diff --git a/test/test_multilinear_rectilinear.py b/test/test_multilinear_rectilinear.py index e6b9528..aa1fbe0 100644 --- a/test/test_multilinear_rectilinear.py +++ b/test/test_multilinear_rectilinear.py @@ -1,8 +1,10 @@ import numpy as np +import pytest import interpn -def test_multilinear_rectilinear(): +@pytest.mark.parametrize("max_threads", [None, 1], ids=["parallel", "serial"]) +def test_multilinear_rectilinear(max_threads): for dtype in [np.float64, np.float32]: x = np.linspace(0.0, 10.0, 5).astype(dtype) y = np.linspace(20.0, 30.0, 3).astype(dtype) @@ -46,6 +48,7 @@ def test_multilinear_rectilinear(): grids=grids, vals=zgrid.flatten(), method="linear", + max_threads=max_threads, ) for i in range(out_helper.size): diff --git a/test/test_multilinear_regular.py b/test/test_multilinear_regular.py index 4280869..b39d7df 100644 --- a/test/test_multilinear_regular.py +++ b/test/test_multilinear_regular.py @@ -1,8 +1,10 @@ import numpy as np +import pytest import interpn -def test_multilinear_regular(): +@pytest.mark.parametrize("max_threads", [None, 1], ids=["parallel", "serial"]) +def test_multilinear_regular(max_threads): for dtype in [np.float64, np.float32]: x = np.linspace(0.0, 10.0, 5).astype(dtype) y = np.linspace(20.0, 30.0, 3).astype(dtype) @@ -53,6 +55,7 @@ def test_multilinear_regular(): grids=grids, vals=zgrid.flatten(), method="linear", + max_threads=max_threads, ) for i in range(out_helper.size): diff --git a/test/test_nearest_rectilinear.py b/test/test_nearest_rectilinear.py index f47182a..9ece6f4 100644 --- a/test/test_nearest_rectilinear.py +++ b/test/test_nearest_rectilinear.py @@ -1,4 +1,5 @@ import numpy as np +import pytest import interpn @@ -11,7 +12,8 @@ def _nearest_rectilinear_index(value: float, grid: np.ndarray) -> int: return idx if dt <= 0.5 else idx + 1 -def test_nearest_rectilinear(): +@pytest.mark.parametrize("max_threads", [None, 1], ids=["parallel", "serial"]) +def test_nearest_rectilinear(max_threads): for dtype in [np.float64, np.float32]: x = np.array([0.0, 1.0, 3.5, 4.0], dtype=dtype) y = np.array([-2.0, -0.5, 0.1], dtype=dtype) @@ -54,6 +56,7 @@ def test_nearest_rectilinear(): grids=grids, vals=zgrid.flatten(), method="nearest", + max_threads=max_threads, ) np.testing.assert_array_equal(out_helper, expected) diff --git a/test/test_nearest_regular.py b/test/test_nearest_regular.py index f1d64ca..9f22a8d 100644 --- a/test/test_nearest_regular.py +++ b/test/test_nearest_regular.py @@ -1,4 +1,5 @@ import numpy as np +import pytest import interpn @@ -10,7 +11,8 @@ def _nearest_regular_index(value: float, start: float, step: float, size: int) - return min(loc if dt <= 0.5 else loc + 1, size - 1) -def test_nearest_regular(): +@pytest.mark.parametrize("max_threads", [None, 1], ids=["parallel", "serial"]) +def test_nearest_regular(max_threads): for dtype in [np.float64, np.float32]: x = np.linspace(0.0, 6.0, 4).astype(dtype) y = np.linspace(-3.0, 3.0, 3).astype(dtype) @@ -65,6 +67,7 @@ def test_nearest_regular(): grids=grids, vals=zgrid.flatten(), method="nearest", + max_threads=max_threads, ) np.testing.assert_array_equal(out_helper, expected) From f28b5b5dc6c93bc668087b67c67fe1cc96bd8c58 Mon Sep 17 00:00:00 2001 From: James Logan Date: Thu, 8 Jan 2026 22:49:32 -0500 Subject: [PATCH 09/20] add parallel methods to pgo profile and benchmarks --- benches/bench_cpu.py | 177 ++++++++++++++++++++++++++++++++++++ scripts/profile_workload.py | 103 +++++++++++---------- src/interpn/__init__.py | 6 +- 3 files changed, 234 insertions(+), 52 deletions(-) diff --git a/benches/bench_cpu.py b/benches/bench_cpu.py index ab06eab..d2a858d 100644 --- a/benches/bench_cpu.py +++ b/benches/bench_cpu.py @@ -18,6 +18,7 @@ MulticubicRectilinear, NearestRegular, NearestRectilinear, + interpn as interpn_fn, ) # Toggle SciPy/NumPy baselines via environment for PGO workloads. @@ -50,6 +51,11 @@ def average_call_time( DASH_STYLES = ["solid", "dash", "dot", "dashdot", "longdash", "longdashdot"] +THREAD_SPEEDUP_COLORS = { + "linear": "#1f77b4", + "cubic": "#ff7f0e", + "nearest": "#2ca02c", +} def _normalized_line_style(index: int) -> str: @@ -466,6 +472,176 @@ def _plot_speedup_vs_dims( fig.show() +def _thread_counts() -> list[int]: + max_threads = os.cpu_count() or 1 + counts = [] + threads = 1 + while threads < max_threads: + counts.append(threads) + threads *= 2 + counts.append(max_threads) + return sorted(set(counts)) + + +def _plot_speedup_vs_threads( + *, + thread_counts: list[int], + speedups: dict[str, dict[str, list[float]]], + nobs: int, + output_path: Path, +) -> None: + fig = make_subplots( + rows=1, + cols=2, + subplot_titles=("Regular Grid", "Rectilinear Grid"), + horizontal_spacing=0.08, + ) + for col, grid_kind in enumerate(["regular", "rectilinear"], start=1): + for method in ["linear", "cubic", "nearest"]: + values = speedups[grid_kind].get(method) + if not values: + continue + fig.add_trace( + go.Scatter( + x=thread_counts, + y=values, + mode="lines+markers", + name=method.title(), + line=dict(color=THREAD_SPEEDUP_COLORS[method], width=2), + marker=dict(size=7), + showlegend=col == 1, + ), + row=1, + col=col, + ) + fig.add_hline( + y=1.0, + line=dict(color="black", dash="dot", width=1), + row=1, + col=col, + ) + fig.update_xaxes( + title_text="Threads", + row=1, + col=col, + showline=True, + linecolor="black", + linewidth=1, + mirror=True, + ticks="outside", + tickcolor="black", + showgrid=False, + zeroline=False, + ) + fig.update_yaxes( + title_text="Speedup vs. 1 Thread", + row=1, + col=col, + showline=True, + linecolor="black", + linewidth=1, + mirror=True, + ticks="outside", + tickcolor="black", + showgrid=False, + zeroline=False, + ) + + fig.update_layout( + title=dict( + text=f"InterpN Thread Speedup ({nobs} Observation Points)", + y=0.98, + yanchor="top", + ), + height=430, + margin=dict(t=70, l=60, r=40, b=80), + legend=dict( + orientation="h", + yanchor="top", + y=1.05, + x=1.0, + xanchor="right", + ), + plot_bgcolor="rgba(0,0,0,0)", + paper_bgcolor="rgba(0,0,0,0)", + font=dict(color="black"), + ) + fig.write_image(str(output_path)) + fig.write_html( + str(output_path.with_suffix(".html")), + include_plotlyjs="cdn", + full_html=False, + ) + fig.show() + + +def bench_thread_speedup_vs_threads(): + nobs = 10_000 + ndims = 3 + ngrid = 20 + rng = np.random.default_rng(17) + + thread_counts = _thread_counts() + speedups: dict[str, dict[str, list[float]]] = { + "regular": {}, + "rectilinear": {}, + } + + def make_grids(kind: str) -> list[NDArray]: + base = np.linspace(-1.0, 1.0, ngrid) + if kind == "regular": + return [base for _ in range(ndims)] + warped = base**3 + return [warped for _ in range(ndims)] + + for grid_kind in ["regular", "rectilinear"]: + grids = make_grids(grid_kind) + mesh = np.meshgrid(*grids, indexing="ij") + vals = (mesh[0] + 2.0 * mesh[1] - 0.5 * mesh[2]).astype(np.float64) + vals_flat = vals.ravel() + obs = [] + for grid in grids: + lo = grid[0] + hi = grid[-1] + span = hi - lo + obs.append( + rng.uniform(lo + 0.05 * span, hi - 0.05 * span, size=nobs).astype( + np.float64 + ) + ) + out = np.zeros_like(obs[0]) + + for method in ["linear", "cubic", "nearest"]: + timings = [] + for threads in thread_counts: + timed = average_call_time( + lambda points, threads=threads: interpn_fn( + obs=points, + grids=grids, + vals=vals_flat, + method=method, + out=out, + linearize_extrapolation=False, + grid_kind=grid_kind, + max_threads=threads, + ), + obs, + ) + timings.append(timed) + baseline = timings[0] + speedups[grid_kind][method] = [ + baseline / t if t else 0.0 for t in timings + ] + + output_path = Path(__file__).parent / "../docs/speedup_vs_threads_10000_obs.svg" + _plot_speedup_vs_threads( + thread_counts=thread_counts, + speedups=speedups, + nobs=nobs, + output_path=output_path, + ) + + def bench_4_dims_1_obs(): nbench = 30 # Bench iterations preallocate = False # Whether to preallocate output array for InterpN @@ -1076,6 +1252,7 @@ def bench_throughput_vs_dims(): def main(): + bench_thread_speedup_vs_threads() bench_throughput_vs_dims() bench_4_dims_1_obs() bench_4_dims_n_obs_unordered() diff --git a/scripts/profile_workload.py b/scripts/profile_workload.py index 87debc1..4b68e7e 100644 --- a/scripts/profile_workload.py +++ b/scripts/profile_workload.py @@ -5,14 +5,7 @@ import numpy as np -from interpn import ( - MulticubicRectilinear, - MulticubicRegular, - MultilinearRectilinear, - MultilinearRegular, - NearestRectilinear, - NearestRegular, -) +from interpn import interpn as interpn_fn _TARGET_COUNT = int(1e4) _OBSERVATION_COUNTS = (1, 3, 571, 2017) @@ -34,12 +27,35 @@ def _observation_points( return points -def _evaluate(interpolator, points: list[np.ndarray]) -> None: - # Without preallocated output - interpolator.eval(points) - # With preallocated output +def _evaluate( + *, + grids: list[np.ndarray], + vals: np.ndarray, + points: list[np.ndarray], + method: str, + grid_kind: str, + max_threads: int | None, +) -> None: + interpn_fn( + obs=points, + grids=grids, + vals=vals, + method=method, + grid_kind=grid_kind, + linearize_extrapolation=True, + max_threads=max_threads, + ) out = np.empty_like(points[0]) - interpolator.eval(points, out) + interpn_fn( + obs=points, + grids=grids, + vals=vals, + method=method, + grid_kind=grid_kind, + linearize_extrapolation=True, + out=out, + max_threads=max_threads, + ) def main() -> None: @@ -55,46 +71,37 @@ def main() -> None: ] mesh = np.meshgrid(*grids, indexing="ij") zgrid = rng.uniform(-1.0, 1.0, mesh[0].size).astype(dtype) - dims = [grid.size for grid in grids] - starts = np.array([grid[0] for grid in grids], dtype=dtype) - steps = np.array([grid[1] - grid[0] for grid in grids], dtype=dtype) - - linear_regular = MultilinearRegular.new(dims, starts, steps, zgrid) - linear_rect = MultilinearRectilinear.new(grids_rect, zgrid) - cubic_regular = MulticubicRegular.new( - dims, - starts, - steps, - zgrid, - linearize_extrapolation=True, + cases = ( + ("linear", "regular", grids), + ("linear", "rectilinear", grids_rect), + ("cubic", "regular", grids), + ("cubic", "rectilinear", grids_rect), + ("nearest", "regular", grids), + ("nearest", "rectilinear", grids_rect), ) - cubic_rect = MulticubicRectilinear.new( - grids_rect, - zgrid, - linearize_extrapolation=True, - ) - nearest_regular = NearestRegular.new(dims, starts, steps, zgrid) - nearest_rect = NearestRectilinear.new(grids_rect, zgrid) for nobs in _OBSERVATION_COUNTS: nreps = max(int(_TARGET_COUNT / nobs), 1) - for interpolator in ( - linear_regular, - linear_rect, - cubic_regular, - cubic_rect, - nearest_regular, - nearest_rect, - ): - for _ in range(nreps): - points = _observation_points(rng, ndims, nobs, dtype) - _evaluate(interpolator, points) - - print( - f"Completed {type(interpolator).__name__} " - f"dtype={np.dtype(dtype).name} ndims={ndims} nobs={nobs}" - ) + for max_threads in (None, 1): + for method, grid_kind, grids_in in cases: + for _ in range(nreps): + points = _observation_points(rng, ndims, nobs, dtype) + _evaluate( + grids=grids_in, + vals=zgrid, + points=points, + method=method, + grid_kind=grid_kind, + max_threads=max_threads, + ) + + mode = "parallel" if max_threads is None else "serial" + print( + f"Completed interpn method={method} grid={grid_kind} " + f"dtype={np.dtype(dtype).name} ndims={ndims} nobs={nobs} " + f"mode={mode}" + ) if __name__ == "__main__": diff --git a/src/interpn/__init__.py b/src/interpn/__init__.py index 54c4b89..087f0af 100644 --- a/src/interpn/__init__.py +++ b/src/interpn/__init__.py @@ -87,7 +87,7 @@ def interpn( Interpolated values """ # Allocate for the output if it is not supplied - out = out or np.zeros_like(obs[0]) + out = out if out is not None else np.zeros_like(obs[0]) outshape = out.shape out = out.ravel() # Flat view without reallocating @@ -130,9 +130,7 @@ def interpn( ) case _: raise ValueError( - "Unsupported interpolation configuration:" - f" {dtype}, {method}" + f"Unsupported interpolation configuration: {dtype}, {method}" ) return out.reshape(outshape) - From 08b6f1cd23e372fee1859c808e872453f7cfb8a0 Mon Sep 17 00:00:00 2001 From: James Logan Date: Thu, 8 Jan 2026 23:36:27 -0500 Subject: [PATCH 10/20] add serial workflow back in. name pgo profiles per process. --- benches/bench_cpu.py | 4 +- scripts/distr_pgo_profile.sh | 19 +++--- scripts/profile_workload.py | 8 ++- scripts/profile_workload_ser.py | 101 ++++++++++++++++++++++++++++++++ 4 files changed, 121 insertions(+), 11 deletions(-) create mode 100644 scripts/profile_workload_ser.py diff --git a/benches/bench_cpu.py b/benches/bench_cpu.py index d2a858d..8c4a3ac 100644 --- a/benches/bench_cpu.py +++ b/benches/bench_cpu.py @@ -576,7 +576,7 @@ def _plot_speedup_vs_threads( def bench_thread_speedup_vs_threads(): - nobs = 10_000 + nobs = 1_000_000 ndims = 3 ngrid = 20 rng = np.random.default_rng(17) @@ -633,7 +633,7 @@ def make_grids(kind: str) -> list[NDArray]: baseline / t if t else 0.0 for t in timings ] - output_path = Path(__file__).parent / "../docs/speedup_vs_threads_10000_obs.svg" + output_path = Path(__file__).parent / "../docs/speedup_vs_threads_1000000_obs.svg" _plot_speedup_vs_threads( thread_counts=thread_counts, speedups=speedups, diff --git a/scripts/distr_pgo_profile.sh b/scripts/distr_pgo_profile.sh index 46323e3..68755fa 100644 --- a/scripts/distr_pgo_profile.sh +++ b/scripts/distr_pgo_profile.sh @@ -1,23 +1,26 @@ #!/bin/bash -# Get llvm-profdata from `apt install llvm-20` -# Must match rust's llvm version or it will crash +# Get llvm-profdata from `apt install llvm-20`. +# Must match rust's llvm version or it will crash. -# Build instrumented wheel +# Build instrumented wheel. cargo clean uv cache clean uv pip install maturin rm -rf dist/ UV_NO_BUILD_CACHE=1 uv run --no-sync maturin build --compatibility pypi --out dist --verbose -- "-Cprofile-generate=${PWD}/scripts/pgo-profiles/pgo.profraw" -# Install instrumented wheel +# Install instrumented wheel. uv pip install $(find dist/ -name '*.whl')[pydantic] --group test --group bench --reinstall -# Clear existing profiles +# Clear existing profiles. rm -rf ./scripts/pgo-profiles; mkdir ./scripts/pgo-profiles; mkdir ./scripts/pgo-profiles/pgo.profraw -# Run reference workload to generate profile -uv run --no-sync ./scripts/profile_workload.py +# Run reference workload to generate profile. +# Name each profile file per-binary (%m), per-process (%p) +# so that they don't overwrite each other's results. +# Unfortunately, per-thread (%t) is not always available. +LLVM_PROFILE_FILE="${PWD}/scripts/pgo-profiles/pgo.profraw/%m-%p.profraw" uv run --no-sync ./scripts/profile_workload.py -# Merge profiles +# Merge profiles. /usr/lib/llvm-21/bin/llvm-profdata merge -o scripts/pgo-profiles/pgo.profdata $(find scripts/pgo-profiles/pgo.profraw -name '*.profraw') diff --git a/scripts/profile_workload.py b/scripts/profile_workload.py index 4b68e7e..9dbeccb 100644 --- a/scripts/profile_workload.py +++ b/scripts/profile_workload.py @@ -3,12 +3,16 @@ from __future__ import annotations +import subprocess +import sys +from pathlib import Path + import numpy as np from interpn import interpn as interpn_fn _TARGET_COUNT = int(1e4) -_OBSERVATION_COUNTS = (1, 3, 571, 2017) +_OBSERVATION_COUNTS = (1, 3, 571, 2017, int(1e6)) _MAX_DIMS = 4 _GRID_SIZE = 30 @@ -106,3 +110,5 @@ def main() -> None: if __name__ == "__main__": main() + script = Path(__file__).with_name("profile_workload_ser.py") + subprocess.run([sys.executable, str(script)], check=True) diff --git a/scripts/profile_workload_ser.py b/scripts/profile_workload_ser.py new file mode 100644 index 0000000..87debc1 --- /dev/null +++ b/scripts/profile_workload_ser.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +"""Lightweight workload used to gather PGO profiles for interpn.""" + +from __future__ import annotations + +import numpy as np + +from interpn import ( + MulticubicRectilinear, + MulticubicRegular, + MultilinearRectilinear, + MultilinearRegular, + NearestRectilinear, + NearestRegular, +) + +_TARGET_COUNT = int(1e4) +_OBSERVATION_COUNTS = (1, 3, 571, 2017) +_MAX_DIMS = 4 +_GRID_SIZE = 30 + + +def _observation_points( + rng: np.random.Generator, ndims: int, nobs: int, dtype: np.dtype +) -> tuple[list[np.ndarray], list[np.ndarray]]: + """Generate observation points inside and outside the grid domain. + The fraction of points outside the domain here will set the relative weight of + extrapolation branches. + """ + m = max(int(float(nobs) ** (1.0 / ndims) + 2.0), 2) + axes = [rng.uniform(-1.05, 1.05, m).astype(dtype) for _ in range(ndims)] + mesh = np.meshgrid(*axes, indexing="ij") + points = [axis.flatten()[:nobs].copy() for axis in mesh] + return points + + +def _evaluate(interpolator, points: list[np.ndarray]) -> None: + # Without preallocated output + interpolator.eval(points) + # With preallocated output + out = np.empty_like(points[0]) + interpolator.eval(points, out) + + +def main() -> None: + rng = np.random.default_rng(2394587) + + for dtype in (np.float64, np.float32): + for ndims in range(1, _MAX_DIMS + 1): + ngrid = _GRID_SIZE if ndims < 5 else 6 + grids = [np.linspace(-1.0, 1.0, ngrid, dtype=dtype) for _ in range(ndims)] + grids_rect = [ + np.array(sorted(np.random.uniform(-1.0, 1.0, ngrid).astype(dtype))) + for _ in range(ndims) + ] + mesh = np.meshgrid(*grids, indexing="ij") + zgrid = rng.uniform(-1.0, 1.0, mesh[0].size).astype(dtype) + dims = [grid.size for grid in grids] + starts = np.array([grid[0] for grid in grids], dtype=dtype) + steps = np.array([grid[1] - grid[0] for grid in grids], dtype=dtype) + + linear_regular = MultilinearRegular.new(dims, starts, steps, zgrid) + linear_rect = MultilinearRectilinear.new(grids_rect, zgrid) + cubic_regular = MulticubicRegular.new( + dims, + starts, + steps, + zgrid, + linearize_extrapolation=True, + ) + cubic_rect = MulticubicRectilinear.new( + grids_rect, + zgrid, + linearize_extrapolation=True, + ) + nearest_regular = NearestRegular.new(dims, starts, steps, zgrid) + nearest_rect = NearestRectilinear.new(grids_rect, zgrid) + + for nobs in _OBSERVATION_COUNTS: + nreps = max(int(_TARGET_COUNT / nobs), 1) + + for interpolator in ( + linear_regular, + linear_rect, + cubic_regular, + cubic_rect, + nearest_regular, + nearest_rect, + ): + for _ in range(nreps): + points = _observation_points(rng, ndims, nobs, dtype) + _evaluate(interpolator, points) + + print( + f"Completed {type(interpolator).__name__} " + f"dtype={np.dtype(dtype).name} ndims={ndims} nobs={nobs}" + ) + + +if __name__ == "__main__": + main() From 294f774c1b6f703dffccd31545cb4a679d5a3cfb Mon Sep 17 00:00:00 2001 From: James Logan Date: Fri, 9 Jan 2026 18:07:34 -0500 Subject: [PATCH 11/20] use physical core count --- Cargo.lock | 17 ++++++ Cargo.toml | 6 +- benches/bench_cpu.py | 131 +++++++++++++++++++++++++------------------ src/lib.rs | 24 ++++++-- 4 files changed, 116 insertions(+), 62 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 25a0e27..5aeb1ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -224,6 +224,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "indoc" version = "2.0.5" @@ -239,6 +245,7 @@ dependencies = [ "itertools 0.14.0", "ndarray", "num-traits", + "num_cpus", "numpy", "pyo3", "rand", @@ -365,6 +372,16 @@ dependencies = [ "libm", ] +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "numpy" version = "0.27.1" diff --git a/Cargo.toml b/Cargo.toml index 125f39f..6c19cf1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,10 @@ crate-type = ["cdylib", "rlib"] # Rust lib deps num-traits = { version = "^0.2.19", default-features = false, features = ["libm"] } crunchy = { version = "^0.2.4", default-features = false } + +# Parallelism rayon = { version = "^1.11.0", optional = true } +num_cpus = { version = "1.17.0", optional = true } # Python bindings pyo3 = { version = "0.27.2", features = ["extension-module", "abi3-py310", "generate-import-lib"], optional = true } @@ -28,6 +31,7 @@ numpy = { version = "0.27.1", optional = true } # Test-only utils itertools = { version = "0.14.0", optional = true } + [dev-dependencies] rand = "0.9.2" criterion = "0.8.1" @@ -39,7 +43,7 @@ python = ["numpy", "pyo3", "std"] std = ["itertools"] deep-unroll = ["crunchy/limit_256"] fma = [] -par = ["std", "rayon"] +par = ["std", "rayon", "num_cpus"] [profile.release] opt-level = 3 diff --git a/benches/bench_cpu.py b/benches/bench_cpu.py index 8c4a3ac..5356ff9 100644 --- a/benches/bench_cpu.py +++ b/benches/bench_cpu.py @@ -474,6 +474,7 @@ def _plot_speedup_vs_dims( def _thread_counts() -> list[int]: max_threads = os.cpu_count() or 1 + max_threads = max(int(max_threads / 2), 1) # Real threads, not hyperthreads counts = [] threads = 1 while threads < max_threads: @@ -490,62 +491,88 @@ def _plot_speedup_vs_threads( nobs: int, output_path: Path, ) -> None: - fig = make_subplots( - rows=1, - cols=2, - subplot_titles=("Regular Grid", "Rectilinear Grid"), - horizontal_spacing=0.08, - ) - for col, grid_kind in enumerate(["regular", "rectilinear"], start=1): + fig = make_subplots(rows=1, cols=1) + dash_styles = [ + _normalized_line_style(i) for i in range(len(["linear", "cubic", "nearest"]) * 2) + ] + all_values = [] + thread_arr = np.array(thread_counts) + series: list[tuple[str, np.ndarray]] = [] + for grid_kind in ["regular", "rectilinear"]: for method in ["linear", "cubic", "nearest"]: values = speedups[grid_kind].get(method) if not values: continue - fig.add_trace( - go.Scatter( - x=thread_counts, - y=values, - mode="lines+markers", - name=method.title(), - line=dict(color=THREAD_SPEEDUP_COLORS[method], width=2), - marker=dict(size=7), - showlegend=col == 1, - ), - row=1, - col=col, - ) - fig.add_hline( - y=1.0, - line=dict(color="black", dash="dot", width=1), - row=1, - col=col, - ) - fig.update_xaxes( - title_text="Threads", + values_arr = np.array(values, dtype=float) + all_values.append(values_arr) + series.append((f"{method.title()} {grid_kind}", values_arr)) + for _, values_arr in series: + ones = np.ones_like(values_arr) + fill_between( + fig, + x=thread_arr, + upper=np.maximum(values_arr, ones), + lower=np.minimum(values_arr, ones), row=1, - col=col, - showline=True, - linecolor="black", - linewidth=1, - mirror=True, - ticks="outside", - tickcolor="black", - showgrid=False, - zeroline=False, + col=1, + fillcolor="rgba(139, 196, 59, 0.25)", ) - fig.update_yaxes( - title_text="Speedup vs. 1 Thread", + for idx, (label, values_arr) in enumerate(series): + fig.add_trace( + go.Scatter( + x=thread_arr, + y=values_arr, + mode="lines+markers", + name=label, + line=dict( + color="black", + width=2, + dash=dash_styles[idx], + ), + marker=dict(size=7, color="black"), + showlegend=False, + ), row=1, - col=col, - showline=True, - linecolor="black", - linewidth=1, - mirror=True, - ticks="outside", - tickcolor="black", - showgrid=False, - zeroline=False, + col=1, ) + fig.add_hline( + y=1.0, + line=dict(color="black", dash="dot", width=1), + row=1, + col=1, + ) + y_min = 1.0 + if all_values: + y_min = min(1.0, min(values.min() for values in all_values)) + y_max = max(thread_counts) if thread_counts else 1 + fig.update_xaxes( + title_text="Threads", + row=1, + col=1, + range=[min(thread_counts), max(thread_counts)] if thread_counts else None, + showline=True, + linecolor="black", + linewidth=1, + mirror=True, + ticks="outside", + tickcolor="black", + showgrid=False, + zeroline=False, + ) + fig.update_yaxes( + title_text="Speedup vs. 1 Thread", + row=1, + col=1, + range=[y_min, y_max], + showline=True, + linecolor="black", + linewidth=1, + mirror=True, + ticks="outside", + tickcolor="black", + showgrid=False, + zeroline=False, + ) fig.update_layout( title=dict( @@ -555,13 +582,7 @@ def _plot_speedup_vs_threads( ), height=430, margin=dict(t=70, l=60, r=40, b=80), - legend=dict( - orientation="h", - yanchor="top", - y=1.05, - x=1.0, - xanchor="right", - ), + showlegend=False, plot_bgcolor="rgba(0,0,0,0)", paper_bgcolor="rgba(0,0,0,0)", font=dict(color="black"), diff --git a/src/lib.rs b/src/lib.rs index 1282de2..de38158 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -174,10 +174,22 @@ pub fn interpn( return Err(MAXDIMS_ERR); } - // Chunk for parallelism - let num_cores = rayon::current_num_threads() - .min(max_threads.unwrap_or(usize::MAX)) - .max(1); + // Chunk for parallelism. + // + // By default, use only physical cores, because on most machines as of + // 2026, only half the available cores represent real compute capability due to + // the widespread adoption of hyperthreading. + // + // We also use a minimum chunk size of 1024 as a heuristic, because below that limit, + // single-threaded performance is usually faster due to a combination of threading overhead + // and memory page sizing. + let num_cores_physical = num_cpus::get_physical(); // Real cores + let num_cores_pool = rayon::current_num_threads(); // Available cores from rayon thread pool + let num_cores_available = num_cores_physical.min(num_cores_pool).max(1); // Real max + let num_cores = match max_threads { + Some(num_cores_requested) => num_cores_requested.min(num_cores_available), + None => num_cores_available, + }; let n = out.len(); let chunk = 1024.max(n / num_cores); @@ -307,7 +319,7 @@ pub fn interpn_serial( out, )?; if bounds.iter().any(|x| *x) { - return Err("At least one observation point is outside the grid.") + return Err("At least one observation point is outside the grid."); } } Ok(()) @@ -320,7 +332,7 @@ pub fn interpn_serial( let out = &mut bounds[..ndims]; multilinear::rectilinear::check_bounds(grids, obs, atol, out)?; if bounds.iter().any(|x| *x) { - return Err("At least one observation point is outside the grid.") + return Err("At least one observation point is outside the grid."); } } Ok(()) From a1376e2d689e072a06815efe9dab52e6a430f7fe Mon Sep 17 00:00:00 2001 From: James Logan Date: Sat, 10 Jan 2026 12:42:10 -0500 Subject: [PATCH 12/20] docstring for rust top level function --- Cargo.toml | 4 +- scripts/profile_workload.py | 2 +- src/lib.rs | 124 ++++++++++++++++++++++++++---------- 3 files changed, 95 insertions(+), 35 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6c19cf1..847926c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "interpn" -version = "0.9.2" +version = "0.10.0" edition = "2024" rust-version = "1.87" # 2025-05-15 authors = ["James Logan "] @@ -22,7 +22,7 @@ crunchy = { version = "^0.2.4", default-features = false } # Parallelism rayon = { version = "^1.11.0", optional = true } -num_cpus = { version = "1.17.0", optional = true } +num_cpus = { version = "^1.17.0", optional = true } # Python bindings pyo3 = { version = "0.27.2", features = ["extension-module", "abi3-py310", "generate-import-lib"], optional = true } diff --git a/scripts/profile_workload.py b/scripts/profile_workload.py index 9dbeccb..267789a 100644 --- a/scripts/profile_workload.py +++ b/scripts/profile_workload.py @@ -12,7 +12,7 @@ from interpn import interpn as interpn_fn _TARGET_COUNT = int(1e4) -_OBSERVATION_COUNTS = (1, 3, 571, 2017, int(1e6)) +_OBSERVATION_COUNTS = (1, 3, 571, 2017, int(1e4)) _MAX_DIMS = 4 _GRID_SIZE = 30 diff --git a/src/lib.rs b/src/lib.rs index de38158..fec1f82 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -157,6 +157,51 @@ const MAXDIMS: usize = 8; const MAXDIMS_ERR: &str = "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions."; + +/// Evaluate multidimensional interpolation on a regular grid in up to 8 dimensions. +/// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). +/// +/// For lower dimensions, a fast flattened method is used. For higher dimensions, where that flattening +/// becomes impractical due to compile times and instruction size, evaluation defers to a bounded +/// recursion. +/// The linear method uses the flattening for 1-6 dimensions, while +/// flattened cubic methods are available up to 3 dimensions by default and up to 4 dimensions +/// with the `deep_unroll` feature enabled. +/// +/// This is a convenience function; best performance will be achieved by using the exact right +/// number for the N parameter, as this will slightly reduce compute and storage overhead, +/// and the underlying method can be extended to more than this function's limit of 8 dimensions. +/// The limit of 8 dimensions was chosen for no more specific reason than to reduce unit test times. +/// +/// While this method initializes the interpolator struct on every call, the overhead of doing this +/// is minimal even when using it to evaluate one observation point at a time. +/// +/// Like most grid search algorithms (including in the standard library), the uniqueness and +/// monotonicity of the grid is the responsibility of the user, because checking it is often much +/// more expensive than the algorithm that we will perform on it. Behavior with ill-posed grids +/// is undefined. +/// +/// #### Args: +/// +/// * `grids`: `N` slices of each axis' grid coordinates. Must be unique and monotonically increasing. +/// * `vals`: Flattened `N`-dimensional array of data values at each grid point in C-style order. +/// Must be the same length as the cartesian product of the grids, (n_x * n_y * ...). +/// * `obs`: `N` slices of Observation points where the interpolant should be evaluated. +/// Must be of equal length. +/// * `out`: Pre-allocated output buffer to place the resulting values. +/// Must be the same length as each of the `obs` slices. +/// * `method`: Choice of interpolant function. +/// * `assume_grid_kind`: Whether to assume the grid is regular (evenly-spaced), +/// rectilinear (un-evenly spaced), or make no assumption. +/// If an assumption is provided, this bypasses a check of each +/// grid, which can be a major speedup in some cases. +/// * `linearize_extrapolation`: Whether cubic methods should extrapolate linearly instead of the default +/// quadratic extrapolation. Linearization is recommended to prevent +/// the interpolant from diverging to extremes outside the grid. +/// * `check_bounds_with_atol`: If provided, return an error if any observation points are outside the grid +/// by an amount exceeding the provided tolerance. +/// * `max_threads`: If provided, limit number of threads used to at most this number. Otherwise, +/// use a heuristic to choose the number that will provide the best throughput. #[cfg(feature = "par")] pub fn interpn( grids: &[&[T]], @@ -174,15 +219,21 @@ pub fn interpn( return Err(MAXDIMS_ERR); } + // Resolve grid kind, checking the grid if the kind is not provided by the user. + // We do this once at the top level so that the work is not repeated by each thread. + let kind = resolve_grid_kind(assume_grid_kind, grids)?; + // Chunk for parallelism. // // By default, use only physical cores, because on most machines as of // 2026, only half the available cores represent real compute capability due to - // the widespread adoption of hyperthreading. + // the widespread adoption of hyperthreading. If a larger number is requested for + // max_threads, that value is clamped to the total available threads so that we don't + // queue chunks unnecessarily. // // We also use a minimum chunk size of 1024 as a heuristic, because below that limit, - // single-threaded performance is usually faster due to a combination of threading overhead - // and memory page sizing. + // single-threaded performance is usually faster due to a combination of thread spawning overhead, + // memory page sizing, and improved vectorization over larger inputs. let num_cores_physical = num_cpus::get_physical(); // Real cores let num_cores_pool = rayon::current_num_threads(); // Available cores from rayon thread pool let num_cores_available = num_cores_physical.min(num_cores_pool).max(1); // Real max @@ -225,7 +276,7 @@ pub fn interpn( &obs_slices[..ndims], outc, method, - assume_grid_kind, + Some(kind), linearize_extrapolation, check_bounds_with_atol, ); @@ -258,34 +309,8 @@ pub fn interpn_serial( return Err(MAXDIMS_ERR); } - // Resolve grid kind, checking the grid if - // the kind is not provided by the user. - let kind = match assume_grid_kind { - Some(GridKind::Regular) => GridKind::Regular, - Some(GridKind::Rectilinear) => GridKind::Rectilinear, - None => { - // Check whether grid is regular - let mut is_regular = true; - - for grid in grids.iter() { - if grid.len() < 2 { - return Err("All grids must have at least two entries"); - } - let step = grid[1] - grid[0]; - - if !grid.windows(2).all(|pair| pair[1] - pair[0] == step) { - is_regular = false; - break; - } - } - - if is_regular { - GridKind::Regular - } else { - GridKind::Rectilinear - } - } - }; + // Resolve grid kind, checking the grid if the kind is not provided by the user. + let kind = resolve_grid_kind(assume_grid_kind, grids)?; // Extract regular grid params let get_regular_grid = || { @@ -392,6 +417,41 @@ pub fn interpn_serial( } } +/// Figure out whether a grid is regular or rectilinear. +fn resolve_grid_kind( + assume_grid_kind: Option, + grids: &[&[T]], +) -> Result { + let kind = match assume_grid_kind { + Some(GridKind::Regular) => GridKind::Regular, + Some(GridKind::Rectilinear) => GridKind::Rectilinear, + None => { + // Check whether grid is regular + let mut is_regular = true; + + for grid in grids.iter() { + if grid.len() < 2 { + return Err("All grids must have at least two entries"); + } + let step = grid[1] - grid[0]; + + if !grid.windows(2).all(|pair| pair[1] - pair[0] == step) { + is_regular = false; + break; + } + } + + if is_regular { + GridKind::Regular + } else { + GridKind::Rectilinear + } + } + }; + + Ok(kind) +} + /// Index a single value from an array #[inline] pub(crate) fn index_arr(loc: &[usize], dimprod: &[usize], data: &[T]) -> T { From a2ee55b5aa33cbf1dcb6382ec6df3a4da122e0e3 Mon Sep 17 00:00:00 2001 From: James Logan Date: Sat, 10 Jan 2026 13:02:20 -0500 Subject: [PATCH 13/20] add top level interpn_alloc --- src/lib.rs | 50 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fec1f82..fded30b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -157,7 +157,6 @@ const MAXDIMS: usize = 8; const MAXDIMS_ERR: &str = "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions."; - /// Evaluate multidimensional interpolation on a regular grid in up to 8 dimensions. /// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). /// @@ -175,14 +174,14 @@ const MAXDIMS_ERR: &str = /// /// While this method initializes the interpolator struct on every call, the overhead of doing this /// is minimal even when using it to evaluate one observation point at a time. -/// +/// /// Like most grid search algorithms (including in the standard library), the uniqueness and /// monotonicity of the grid is the responsibility of the user, because checking it is often much /// more expensive than the algorithm that we will perform on it. Behavior with ill-posed grids /// is undefined. -/// +/// /// #### Args: -/// +/// /// * `grids`: `N` slices of each axis' grid coordinates. Must be unique and monotonically increasing. /// * `vals`: Flattened `N`-dimensional array of data values at each grid point in C-style order. /// Must be the same length as the cartesian product of the grids, (n_x * n_y * ...). @@ -265,7 +264,10 @@ pub fn interpn( let s = &o.get(start..end); match s { Some(s) => obs_slices[j] = s, - None => write_err("Dimension mismatch"), + None => { + write_err("Dimension mismatch"); + return; + } }; } @@ -294,6 +296,44 @@ pub fn interpn( } } +/// Allocating variant of [interpn]. +/// It is recommended to pre-allocate outputs and use the non-allocating variant +/// whenever possible. +#[cfg(feature = "par")] +pub fn interpn_alloc( + grids: &[&[T]], + vals: &[T], + obs: &[&[T]], + out: Option>, + method: GridInterpMethod, + assume_grid_kind: Option, + linearize_extrapolation: bool, + check_bounds_with_atol: Option, + max_threads: Option, +) -> Result, &'static str> { + // Empty input -> empty output + if obs.len() == 0 { + return Ok(Vec::with_capacity(0)); + } + + // If output storage was not provided, build it now + let mut out = out.unwrap_or_else(|| vec![T::zero(); obs[0].len()]); + + interpn( + grids, + vals, + obs, + &mut out, + method, + assume_grid_kind, + linearize_extrapolation, + check_bounds_with_atol, + max_threads, + )?; + + Ok(out) +} + pub fn interpn_serial( grids: &[&[T]], vals: &[T], From 2c5603ad7267103a829127a0dc32ad2ac6b5c655 Mon Sep 17 00:00:00 2001 From: James Logan Date: Sat, 10 Jan 2026 13:31:01 -0500 Subject: [PATCH 14/20] use lazylock for physical cores --- src/lib.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fded30b..72ad5b6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -122,7 +122,7 @@ use rayon::{ }; #[cfg(feature = "par")] -use std::sync::Mutex; +use std::sync::{LazyLock, Mutex}; #[cfg(feature = "std")] pub mod utils; @@ -157,6 +157,19 @@ const MAXDIMS: usize = 8; const MAXDIMS_ERR: &str = "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions."; +/// The number of physical cores present on the machine; +/// initialized once, then never again, because each call involves some file I/O +/// and allocations that can be slower than the function call that they support. +/// +/// On subsequent accesses, each access is an atomic load without any waiting paths. +/// +/// This lock can only be contended if multiple threads attempt access +/// before it is initialized; in that case, the waiting threads may park +/// until initialization is complete, which can cause ~20us delays +/// on first access only. +#[cfg(feature = "par")] +static PHYSICAL_CORES: LazyLock = LazyLock::new(num_cpus::get_physical); + /// Evaluate multidimensional interpolation on a regular grid in up to 8 dimensions. /// Assumes C-style ordering of vals (z(x0, y0), z(x0, y1), ..., z(x0, yn), z(x1, y0), ...). /// @@ -233,7 +246,7 @@ pub fn interpn( // We also use a minimum chunk size of 1024 as a heuristic, because below that limit, // single-threaded performance is usually faster due to a combination of thread spawning overhead, // memory page sizing, and improved vectorization over larger inputs. - let num_cores_physical = num_cpus::get_physical(); // Real cores + let num_cores_physical = *PHYSICAL_CORES; // Real cores, populated on first access let num_cores_pool = rayon::current_num_threads(); // Available cores from rayon thread pool let num_cores_available = num_cores_physical.min(num_cores_pool).max(1); // Real max let num_cores = match max_threads { From a240647758d37a2aaa8d5b24fb2748850775e89c Mon Sep 17 00:00:00 2001 From: James Logan Date: Sat, 10 Jan 2026 13:38:19 -0500 Subject: [PATCH 15/20] add serial path for top level interpn function if there are not enough points --- src/lib.rs | 103 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 59 insertions(+), 44 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 72ad5b6..b3ad133 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -160,9 +160,9 @@ const MAXDIMS_ERR: &str = /// The number of physical cores present on the machine; /// initialized once, then never again, because each call involves some file I/O /// and allocations that can be slower than the function call that they support. -/// +/// /// On subsequent accesses, each access is an atomic load without any waiting paths. -/// +/// /// This lock can only be contended if multiple threads attempt access /// before it is initialized; in that case, the waiting threads may park /// until initialization is complete, which can cause ~20us delays @@ -256,56 +256,71 @@ pub fn interpn( let n = out.len(); let chunk = 1024.max(n / num_cores); - // Make a shared error indicator - let result: Mutex> = Mutex::new(None); - let write_err = |msg: &'static str| { - let mut guard = result.lock().unwrap(); - if guard.is_none() { - *guard = Some(msg); - } - }; + // If there are enough points to justify it, run parallel + if chunk > 2 * n { + // Make a shared error indicator + let result: Mutex> = Mutex::new(None); + let write_err = |msg: &'static str| { + let mut guard = result.lock().unwrap(); + if guard.is_none() { + *guard = Some(msg); + } + }; + + // Run threaded + out.par_chunks_mut(chunk).enumerate().for_each(|(i, outc)| { + // Calculate the start and end of observation point chunks + let start = chunk * i; + let end = start + outc.len(); + + // Chunk observation points + let mut obs_slices: [&[T]; 8] = [&[]; 8]; + for (j, o) in obs.iter().enumerate() { + let s = &o.get(start..end); + match s { + Some(s) => obs_slices[j] = s, + None => { + write_err("Dimension mismatch"); + return; + } + }; + } - // Run threaded - out.par_chunks_mut(chunk).enumerate().for_each(|(i, outc)| { - // Calculate the start and end of observation point chunks - let start = chunk * i; - let end = start + outc.len(); - - // Chunk observation points - let mut obs_slices: [&[T]; 8] = [&[]; 8]; - for (j, o) in obs.iter().enumerate() { - let s = &o.get(start..end); - match s { - Some(s) => obs_slices[j] = s, - None => { - write_err("Dimension mismatch"); - return; - } - }; - } + // Do interpolations + let res_inner = interpn_serial( + grids, + vals, + &obs_slices[..ndims], + outc, + method, + Some(kind), + linearize_extrapolation, + check_bounds_with_atol, + ); + + match res_inner { + Ok(()) => {} + Err(msg) => write_err(msg), + } + }); - // Do interpolations - let res_inner = interpn_serial( + // Handle errors from threads + match *result.lock().unwrap() { + Some(msg) => Err(msg), + None => Ok(()), + } + } else { + // If there are not enough points to justify parallelism, run serial + interpn_serial( grids, vals, - &obs_slices[..ndims], - outc, + obs, + out, method, Some(kind), linearize_extrapolation, check_bounds_with_atol, - ); - - match res_inner { - Ok(()) => {} - Err(msg) => write_err(msg), - } - }); - - // Handle errors from threads - match *result.lock().unwrap() { - Some(msg) => Err(msg), - None => Ok(()), + ) } } From 0b1c48ae14623c546a669ece76230b3739389d36 Mon Sep 17 00:00:00 2001 From: James Logan Date: Sat, 10 Jan 2026 14:25:38 -0500 Subject: [PATCH 16/20] better heuristic for when to use serial path --- src/lib.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b3ad133..46899bb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -156,6 +156,7 @@ pub enum GridKind { const MAXDIMS: usize = 8; const MAXDIMS_ERR: &str = "Dimension exceeds maximum (8). Use interpolator struct directly for higher dimensions."; +const MIN_CHUNK_SIZE: usize = 1024; /// The number of physical cores present on the machine; /// initialized once, then never again, because each call involves some file I/O @@ -254,10 +255,10 @@ pub fn interpn( None => num_cores_available, }; let n = out.len(); - let chunk = 1024.max(n / num_cores); + let chunk = MIN_CHUNK_SIZE.max(n / num_cores); // If there are enough points to justify it, run parallel - if chunk > 2 * n { + if 2 * MIN_CHUNK_SIZE <= n { // Make a shared error indicator let result: Mutex> = Mutex::new(None); let write_err = |msg: &'static str| { From 8a49db6bbc28d5ff66c6089b7f77112be43215b0 Mon Sep 17 00:00:00 2001 From: James Logan Date: Sat, 10 Jan 2026 14:59:49 -0500 Subject: [PATCH 17/20] don't check cores if running serial. use 10 megapixel case for threading bench. --- benches/bench_cpu.py | 23 ++++++------- docs/speedup_vs_threads_10000000_obs.html | 2 ++ src/lib.rs | 42 +++++++++++------------ 3 files changed, 34 insertions(+), 33 deletions(-) create mode 100644 docs/speedup_vs_threads_10000000_obs.html diff --git a/benches/bench_cpu.py b/benches/bench_cpu.py index 5356ff9..b5fcdc3 100644 --- a/benches/bench_cpu.py +++ b/benches/bench_cpu.py @@ -493,7 +493,8 @@ def _plot_speedup_vs_threads( ) -> None: fig = make_subplots(rows=1, cols=1) dash_styles = [ - _normalized_line_style(i) for i in range(len(["linear", "cubic", "nearest"]) * 2) + _normalized_line_style(i) + for i in range(len(["linear", "cubic", "nearest"]) * 2) ] all_values = [] thread_arr = np.array(thread_counts) @@ -597,9 +598,9 @@ def _plot_speedup_vs_threads( def bench_thread_speedup_vs_threads(): - nobs = 1_000_000 - ndims = 3 - ngrid = 20 + nobs = 10_000_000 + ndims = 2 + ngrid = int(10e6**0.5) rng = np.random.default_rng(17) thread_counts = _thread_counts() @@ -618,7 +619,7 @@ def make_grids(kind: str) -> list[NDArray]: for grid_kind in ["regular", "rectilinear"]: grids = make_grids(grid_kind) mesh = np.meshgrid(*grids, indexing="ij") - vals = (mesh[0] + 2.0 * mesh[1] - 0.5 * mesh[2]).astype(np.float64) + vals = (mesh[0] + 2.0 * mesh[1]).astype(np.float64) vals_flat = vals.ravel() obs = [] for grid in grids: @@ -650,11 +651,9 @@ def make_grids(kind: str) -> list[NDArray]: ) timings.append(timed) baseline = timings[0] - speedups[grid_kind][method] = [ - baseline / t if t else 0.0 for t in timings - ] + speedups[grid_kind][method] = [baseline / t if t else 0.0 for t in timings] - output_path = Path(__file__).parent / "../docs/speedup_vs_threads_1000000_obs.svg" + output_path = Path(__file__).parent / f"../docs/speedup_vs_threads_{nobs}_obs.svg" _plot_speedup_vs_threads( thread_counts=thread_counts, speedups=speedups, @@ -847,7 +846,7 @@ def bench_4_dims_1_obs(): def bench_3_dims_n_obs_unordered(): - for preallocate in [False, True]: + for preallocate in [True, False]: ndims = 3 # Number of grid dimensions ngrid = 20 # Size of grid on each dimension @@ -987,7 +986,7 @@ def bench_3_dims_n_obs_unordered(): def bench_4_dims_n_obs_unordered(): - for preallocate in [False, True]: + for preallocate in [True, False]: ndims = 4 # Number of grid dimensions ngrid = 20 # Size of grid on each dimension @@ -1273,8 +1272,8 @@ def bench_throughput_vs_dims(): def main(): - bench_thread_speedup_vs_threads() bench_throughput_vs_dims() + bench_thread_speedup_vs_threads() bench_4_dims_1_obs() bench_4_dims_n_obs_unordered() bench_3_dims_n_obs_unordered() diff --git a/docs/speedup_vs_threads_10000000_obs.html b/docs/speedup_vs_threads_10000000_obs.html new file mode 100644 index 0000000..1c55871 --- /dev/null +++ b/docs/speedup_vs_threads_10000000_obs.html @@ -0,0 +1,2 @@ +
+
\ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 46899bb..c5dac83 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -231,34 +231,34 @@ pub fn interpn( if ndims > MAXDIMS { return Err(MAXDIMS_ERR); } + let n = out.len(); // Resolve grid kind, checking the grid if the kind is not provided by the user. // We do this once at the top level so that the work is not repeated by each thread. let kind = resolve_grid_kind(assume_grid_kind, grids)?; - // Chunk for parallelism. - // - // By default, use only physical cores, because on most machines as of - // 2026, only half the available cores represent real compute capability due to - // the widespread adoption of hyperthreading. If a larger number is requested for - // max_threads, that value is clamped to the total available threads so that we don't - // queue chunks unnecessarily. - // - // We also use a minimum chunk size of 1024 as a heuristic, because below that limit, - // single-threaded performance is usually faster due to a combination of thread spawning overhead, - // memory page sizing, and improved vectorization over larger inputs. - let num_cores_physical = *PHYSICAL_CORES; // Real cores, populated on first access - let num_cores_pool = rayon::current_num_threads(); // Available cores from rayon thread pool - let num_cores_available = num_cores_physical.min(num_cores_pool).max(1); // Real max - let num_cores = match max_threads { - Some(num_cores_requested) => num_cores_requested.min(num_cores_available), - None => num_cores_available, - }; - let n = out.len(); - let chunk = MIN_CHUNK_SIZE.max(n / num_cores); - // If there are enough points to justify it, run parallel if 2 * MIN_CHUNK_SIZE <= n { + // Chunk for parallelism. + // + // By default, use only physical cores, because on most machines as of + // 2026, only half the available cores represent real compute capability due to + // the widespread adoption of hyperthreading. If a larger number is requested for + // max_threads, that value is clamped to the total available threads so that we don't + // queue chunks unnecessarily. + // + // We also use a minimum chunk size of 1024 as a heuristic, because below that limit, + // single-threaded performance is usually faster due to a combination of thread spawning overhead, + // memory page sizing, and improved vectorization over larger inputs. + let num_cores_physical = *PHYSICAL_CORES; // Real cores, populated on first access + let num_cores_pool = rayon::current_num_threads(); // Available cores from rayon thread pool + let num_cores_available = num_cores_physical.min(num_cores_pool).max(1); // Real max + let num_cores = match max_threads { + Some(num_cores_requested) => num_cores_requested.min(num_cores_available), + None => num_cores_available, + }; + let chunk = MIN_CHUNK_SIZE.max(n / num_cores); + // Make a shared error indicator let result: Mutex> = Mutex::new(None); let write_err = |msg: &'static str| { From 58ff0a4e081d72491d026b5dd2bf01283a2efbb4 Mon Sep 17 00:00:00 2001 From: James Logan Date: Sat, 10 Jan 2026 15:37:00 -0500 Subject: [PATCH 18/20] update changelog and docstrings --- CHANGELOG.md | 19 +++++++++++++++++++ Cargo.lock | 2 +- src/interpn/__init__.py | 13 ++++++++----- src/lib.rs | 1 + 4 files changed, 29 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a62923..df771bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,24 @@ # Changelog +## 0.10.0 2026-01-10 + +### Added + +* Rust + * Add top-level `interpn`, `interpn_alloc` and `interpn_serial` methods along with supporting enums for selecting methods + * Add `par` feature, enabled by default, that enables parallelism with rayon in `interpn` function + * Add lazy static for getting number of physical cores to advise thread chunk sizes + +### Changed + +* Rust + * Update deps +* Python + * !Refactor `interpn` function + * Combine `check_bounds: bool` and `bounds_check_atol: float` to `check_bounds_with_atol: float | None` + * Add `max_threads: int | None` input to allow manually limiting parallelism + * Use rust top-level `interpn` function as backend for method selection, bounds checks, and parallelism + ## 0.9.1 2025-12-31 ### Changed diff --git a/Cargo.lock b/Cargo.lock index 5aeb1ee..c1d4854 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -238,7 +238,7 @@ checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" [[package]] name = "interpn" -version = "0.9.2" +version = "0.10.0" dependencies = [ "criterion", "crunchy", diff --git a/src/interpn/__init__.py b/src/interpn/__init__.py index 087f0af..64cc2ef 100644 --- a/src/interpn/__init__.py +++ b/src/interpn/__init__.py @@ -63,16 +63,19 @@ def interpn( """ Evaluate an N-dimensional grid at the supplied observation points. - Performs some small allocations to prepare the inputs. - Use the persistent wrapper classes or raw bindings instead - to avoid this overhead entirely. - Reallocates input arrays if and only if they are not contiguous yet. + Note: values must be defined in C-order, like made by + `numpy.meshgrid(*grids, indexing="ij")`. Values on meshgrids defined + in graphics-order without `indexing="ij"` will not have the desired effect. + + If a pre-allocated output array is provided, the returned array is a + reference to that array. + Args: obs: Observation coordinates, one array per dimension. grids: Grid axis coordinates, one array per dimension. - vals: Values defined on the full tensor-product grid. + vals: Values defined on the full cartesian-product grid. method: Interpolation kind, one of ``"linear"``, ``"cubic"``, or ``"nearest"``. out: Optional preallocated array that receives the result. linearize_extrapolation: Whether cubic extrapolation should fall back to diff --git a/src/lib.rs b/src/lib.rs index c5dac83..a407d68 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -363,6 +363,7 @@ pub fn interpn_alloc( Ok(out) } +/// Single-threaded, non-allocating variant of [interpn] available without `par` feature. pub fn interpn_serial( grids: &[&[T]], vals: &[T], From dd46e69faf87266da4536f7525d4f1f03ef3a58c Mon Sep 17 00:00:00 2001 From: James Logan Date: Sat, 10 Jan 2026 15:40:05 -0500 Subject: [PATCH 19/20] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index df771bf..1be8242 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ * Python * !Refactor `interpn` function * Combine `check_bounds: bool` and `bounds_check_atol: float` to `check_bounds_with_atol: float | None` + * Replace `assume_regular` input with optional `grid_kind` to allow assuming either regular or rectilinear, or making no assumption * Add `max_threads: int | None` input to allow manually limiting parallelism * Use rust top-level `interpn` function as backend for method selection, bounds checks, and parallelism From 42f7b4d74eb44558d956872c59af0cbd54038a03 Mon Sep 17 00:00:00 2001 From: James Logan Date: Sat, 10 Jan 2026 15:43:37 -0500 Subject: [PATCH 20/20] format --- src/interpn/raw.pyi | 1 - 1 file changed, 1 deletion(-) diff --git a/src/interpn/raw.pyi b/src/interpn/raw.pyi index 256ab30..e585c4c 100644 --- a/src/interpn/raw.pyi +++ b/src/interpn/raw.pyi @@ -53,7 +53,6 @@ def interpn_f32( check_bounds_with_atol: float | None = None, max_threads: int | None = None, ) -> None: ... - def interpn_linear_regular_f64( dims: IntArray, starts: NDArrayF64,