jlogan03 · jlogan03 · Dec 31, 2025 · Dec 30, 2025 · Dec 30, 2025 · Dec 30, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,18 @@
 # Changelog
 
+## 0.9.1 2025-12-31
+
+### Changed
+
+* Rust
+  * Remove explicit const-unrolling for shallow loops (eliminates nested const-unrolling)
+    * Improves compile time
+    * Minimal effect on performance in Rust benchmarks
+    * About 20% improvement for single-sample throughput from Python
+* Python
+  * Lock python package version to rust version
+  * Improve handling of `__version__` when package is not present
+
 ## 0.9.0 2025-12-27
 
 ### Added

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "interpn"
-version = "0.9.0"
+version = "0.9.1"
 edition = "2024"
 authors = ["James Logan <jlogan03@gmail.com>"]
 license = "MIT OR Apache-2.0"

diff --git a/docs/1d_quality_of_fit_Rectilinear.html b/docs/1d_quality_of_fit_Rectilinear.html
diff --git a/docs/1d_quality_of_fit_Rectilinear.svg b/docs/1d_quality_of_fit_Rectilinear.svg
diff --git a/docs/1d_quality_of_fit_Regular.html b/docs/1d_quality_of_fit_Regular.html
diff --git a/docs/1d_quality_of_fit_Regular.svg b/docs/1d_quality_of_fit_Regular.svg
diff --git a/docs/2d_quality_of_fit_Rectilinear.html b/docs/2d_quality_of_fit_Rectilinear.html
diff --git a/docs/2d_quality_of_fit_Rectilinear.svg b/docs/2d_quality_of_fit_Rectilinear.svg
diff --git a/docs/2d_quality_of_fit_Regular.html b/docs/2d_quality_of_fit_Regular.html
diff --git a/docs/2d_quality_of_fit_Regular.svg b/docs/2d_quality_of_fit_Regular.svg
diff --git a/docs/3d_throughput_vs_nobs_cubic.html b/docs/3d_throughput_vs_nobs_cubic.html
diff --git a/docs/3d_throughput_vs_nobs_cubic.svg b/docs/3d_throughput_vs_nobs_cubic.svg
diff --git a/docs/3d_throughput_vs_nobs_linear.html b/docs/3d_throughput_vs_nobs_linear.html
diff --git a/docs/3d_throughput_vs_nobs_linear.svg b/docs/3d_throughput_vs_nobs_linear.svg
diff --git a/docs/3d_throughput_vs_nobs_prealloc_cubic.html b/docs/3d_throughput_vs_nobs_prealloc_cubic.html
diff --git a/docs/3d_throughput_vs_nobs_prealloc_cubic.svg b/docs/3d_throughput_vs_nobs_prealloc_cubic.svg
diff --git a/docs/3d_throughput_vs_nobs_prealloc_linear.html b/docs/3d_throughput_vs_nobs_prealloc_linear.html
diff --git a/docs/3d_throughput_vs_nobs_prealloc_linear.svg b/docs/3d_throughput_vs_nobs_prealloc_linear.svg
diff --git a/docs/4d_throughput_vs_nobs_cubic.html b/docs/4d_throughput_vs_nobs_cubic.html
diff --git a/docs/4d_throughput_vs_nobs_cubic.svg b/docs/4d_throughput_vs_nobs_cubic.svg
diff --git a/docs/4d_throughput_vs_nobs_linear.html b/docs/4d_throughput_vs_nobs_linear.html
diff --git a/docs/4d_throughput_vs_nobs_linear.svg b/docs/4d_throughput_vs_nobs_linear.svg
diff --git a/docs/4d_throughput_vs_nobs_prealloc_cubic.html b/docs/4d_throughput_vs_nobs_prealloc_cubic.html
diff --git a/docs/4d_throughput_vs_nobs_prealloc_cubic.svg b/docs/4d_throughput_vs_nobs_prealloc_cubic.svg
diff --git a/docs/4d_throughput_vs_nobs_prealloc_linear.html b/docs/4d_throughput_vs_nobs_prealloc_linear.html
diff --git a/docs/4d_throughput_vs_nobs_prealloc_linear.svg b/docs/4d_throughput_vs_nobs_prealloc_linear.svg
diff --git a/docs/nearest_quality_of_fit.html b/docs/nearest_quality_of_fit.html
diff --git a/docs/nearest_quality_of_fit.svg b/docs/nearest_quality_of_fit.svg
diff --git a/docs/speedup_vs_dims_1000_obs_cubic.html b/docs/speedup_vs_dims_1000_obs_cubic.html
diff --git a/docs/speedup_vs_dims_1000_obs_cubic.svg b/docs/speedup_vs_dims_1000_obs_cubic.svg
diff --git a/docs/speedup_vs_dims_1000_obs_linear.html b/docs/speedup_vs_dims_1000_obs_linear.html
diff --git a/docs/speedup_vs_dims_1000_obs_linear.svg b/docs/speedup_vs_dims_1000_obs_linear.svg
diff --git a/docs/speedup_vs_dims_1_obs_cubic.html b/docs/speedup_vs_dims_1_obs_cubic.html
diff --git a/docs/speedup_vs_dims_1_obs_cubic.svg b/docs/speedup_vs_dims_1_obs_cubic.svg
diff --git a/docs/speedup_vs_dims_1_obs_linear.html b/docs/speedup_vs_dims_1_obs_linear.html
diff --git a/docs/speedup_vs_dims_1_obs_linear.svg b/docs/speedup_vs_dims_1_obs_linear.svg
diff --git a/docs/throughput_vs_dims_1000_obs.html b/docs/throughput_vs_dims_1000_obs.html
diff --git a/docs/throughput_vs_dims_1000_obs.svg b/docs/throughput_vs_dims_1000_obs.svg
diff --git a/docs/throughput_vs_dims_1_obs.html b/docs/throughput_vs_dims_1_obs.html
diff --git a/docs/throughput_vs_dims_1_obs.svg b/docs/throughput_vs_dims_1_obs.svg
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "maturin"
 
 [project]
 name = "interpn"
-version = "0.9.0"
+dynamic = ["version"]
 description = "N-dimensional interpolation/extrapolation methods"
 authors = [{ name = "James Logan", email = "jlogan03@gmail.com" }]
 readme = "README.md"
@@ -74,12 +74,7 @@ module-name = "interpn.interpn"
 python-packages = ["interpn"]
 python-source = "src"
 profile = "release"
-
-#rustc-args = [
-#    "-Cprofile-use=./scripts/pgo-profiles/interpn.profdata",
-#    "-Cllvm-args=-pgo-warn-missing-function"
-#]
-
+version = { source = "cargo" }
 include = [
   { path = "all", format = ["sdist", "wheel"] },
 ]

diff --git a/src/interpn/__init__.py b/src/interpn/__init__.py
@@ -7,7 +7,7 @@
 
 from typing import Literal
 from collections.abc import Sequence
-from importlib.metadata import version
+from importlib.metadata import PackageNotFoundError, version
 from importlib.util import find_spec
 
 import numpy as np
@@ -26,7 +26,10 @@
     from .nearest_regular import NearestRegular
     from .nearest_rectilinear import NearestRectilinear
 
-__version__ = version("interpn")
+try:
+    __version__ = version("interpn")
+except PackageNotFoundError:
+    __version__ = "0.0.0"
 
 __all__ = [
     "__version__",

diff --git a/src/lib.rs b/src/lib.rs
@@ -88,8 +88,6 @@
 // expanded code that is entirely in const.
 #![allow(clippy::absurd_extreme_comparisons)]
 
-use crunchy::unroll;
-
 pub mod multilinear;
 pub use multilinear::{MultilinearRectilinear, MultilinearRegular};
 
@@ -134,10 +132,11 @@ pub(crate) fn index_arr_fixed_dims<T: Copy, const N: usize>(
 ) -> T {
     let mut i = 0;
 
-    unroll! {
-        for j < 7 in 0..N {
-            i += loc[j] * dimprod[j];
-        }
+    // unroll! {
+    //     for j < 7 in 0..N {
+    for j in 0..N {
+        i += loc[j] * dimprod[j];
+        // }
     }
 
     data[i]

diff --git a/src/multicubic/rectilinear.rs b/src/multicubic/rectilinear.rs
@@ -293,21 +293,19 @@ impl<'a, T: Float, const N: usize> MulticubicRectilinear<'a, T, N> {
         let mut store = [[T::zero(); FP]; N];
 
         let mut acc = 1;
-        unroll! {
-            for i < 5 in 0..N {
-                // Populate cumulative product of higher dimensions for indexing.
-                //
-                // Each entry is the cumulative product of the size of dimensions
-                // higher than this one, which is the stride between blocks
-                // relating to a given index along each dimension.
-                if const { i > 0 } {
-                    acc *= self.dims[N - i];
-                }
-                dimprod[N - i - 1] = acc;
-
-                // Populate lower corner and saturation flag for each dimension
-                (origin[i], sat[i]) = self.get_loc(x[i], i)?;
+        for i in 0..N {
+            // Populate cumulative product of higher dimensions for indexing.
+            //
+            // Each entry is the cumulative product of the size of dimensions
+            // higher than this one, which is the stride between blocks
+            // relating to a given index along each dimension.
+            if i > 0 {
+                acc *= self.dims[N - i];
             }
+            dimprod[N - i - 1] = acc;
+
+            // Populate lower corner and saturation flag for each dimension
+            (origin[i], sat[i]) = self.get_loc(x[i], i)?;
         }
 
         // Recursive interpolation of one dependency tree at a time
@@ -317,45 +315,40 @@ impl<'a, T: Float, const N: usize> MulticubicRectilinear<'a, T, N> {
         macro_rules! unroll_vertices_body {
             ($i:ident) => {
                 // Index, interpolate, or pass on each level of the tree
-                unroll! {
-                    for j < 5 in 0..N {  // const loop
-
-                        // Most of these iterations will get optimized out
-                        if const{j == 0} { // const branch
-                            // At leaves, index values
-
-                            unroll!{
-                                for k < 5 in 0..N {  // const loop
-                                    // Bit pattern in an integer matches C-ordered array indexing
-                                    // so we can just use the vertex index to index into the array
-                                    // by selecting the appropriate bit from the index.
-                                    const OFFSET: usize = const{($i & (3 << (2*k))) >> (2*k)};
-                                    loc[k] = origin[k] + OFFSET;
-                                }
-                            }
-                            const STORE_IND: usize = $i % FP;
-                            store[0][STORE_IND] = index_arr_fixed_dims(loc, dimprod, self.vals);
+                for j in 0..N {
+                    // Most of these iterations will get optimized out
+                    if j == 0 {
+                        // const branch
+                        // At leaves, index values
+                        for k in 0..N {
+                            // Bit pattern in an integer matches C-ordered array indexing
+                            // so we can just use the vertex index to index into the array
+                            // by selecting the appropriate bit from the index.
+                            let offset: usize = ($i & (3 << (2 * k))) >> (2 * k);
+                            loc[k] = origin[k] + offset;
                         }
-                        else { // const branch
-                            // For other nodes, interpolate on child values
-
-                            const Q: usize = const{FP.pow(j as u32)};
-                            const LEVEL: bool = const {($i + 1).is_multiple_of(Q)};
-                            const P: usize = const{(($i + 1) / Q).saturating_sub(1) % FP};
-                            const IND: usize = const{j.saturating_sub(1)};
-
-                            if LEVEL { // const branch
-                                let grid_cell = &self.grids[IND][origin[IND]..origin[IND] + 4];
-                                let interped = interp_inner::<T>(
-                                    store[IND],
-                                    grid_cell.try_into().unwrap(),
-                                    x[IND],
-                                    sat[IND],
-                                    self.linearize_extrapolation
-                                );
-
-                                store[j][P] = interped;
-                            }
+                        const STORE_IND: usize = $i % FP;
+                        store[0][STORE_IND] = index_arr_fixed_dims(loc, dimprod, self.vals);
+                    } else {
+                        // const branch
+                        // For other nodes, interpolate on child values
+                        let q: usize = FP.pow(j as u32);
+                        let level: bool = ($i + 1).is_multiple_of(q);
+                        let p: usize = (($i + 1) / q).saturating_sub(1) % FP;
+                        let ind: usize = j.saturating_sub(1);
+
+                        if level {
+                            // const branch
+                            let grid_cell = &self.grids[ind][origin[ind]..origin[ind] + 4];
+                            let interped = interp_inner::<T>(
+                                store[ind],
+                                grid_cell.try_into().unwrap(),
+                                x[ind],
+                                sat[ind],
+                                self.linearize_extrapolation,
+                            );
+
+                            store[j][p] = interped;
                         }
                     }
                 }

diff --git a/src/multicubic/regular.rs b/src/multicubic/regular.rs
@@ -357,30 +357,28 @@ impl<'a, T: Float, const N: usize> MulticubicRegular<'a, T, N> {
         let mut store = [[T::zero(); FP]; N];
 
         let mut acc = 1;
-        unroll! {
-            for i < 5 in 0..N {
-                // Populate cumulative product of higher dimensions for indexing.
-                //
-                // Each entry is the cumulative product of the size of dimensions
-                // higher than this one, which is the stride between blocks
-                // relating to a given index along each dimension.
-                if const { i > 0 } {
-                    acc *= self.dims[N - i];
-                }
-                dimprod[N - i - 1] = acc;
-
-                // Populate lower corner and saturation flag for each dimension
-                (origin[i], sat[i]) = self.get_loc(x[i], i)?;
-
-                // Calculate normalized delta locations
-                // For the cubic method, the normalized coordinate `t` is always relative
-                // to cube index 1 (out of 0-3)
-                let index_one_loc = self.starts[i]
-                    + self.steps[i]
-                        * <T as NumCast>::from(origin[i] + 1)
-                            .ok_or("Unrepresentable coordinate value")?;
-                dts[i] = (x[i] - index_one_loc) / self.steps[i];
+        for i in 0..N {
+            // Populate cumulative product of higher dimensions for indexing.
+            //
+            // Each entry is the cumulative product of the size of dimensions
+            // higher than this one, which is the stride between blocks
+            // relating to a given index along each dimension.
+            if i > 0 {
+                acc *= self.dims[N - i];
             }
+            dimprod[N - i - 1] = acc;
+
+            // Populate lower corner and saturation flag for each dimension
+            (origin[i], sat[i]) = self.get_loc(x[i], i)?;
+
+            // Calculate normalized delta locations
+            // For the cubic method, the normalized coordinate `t` is always relative
+            // to cube index 1 (out of 0-3)
+            let index_one_loc = self.starts[i]
+                + self.steps[i]
+                    * <T as NumCast>::from(origin[i] + 1)
+                        .ok_or("Unrepresentable coordinate value")?;
+            dts[i] = (x[i] - index_one_loc) / self.steps[i];
         }
 
         // Recursive interpolation of one dependency tree at a time
@@ -390,43 +388,38 @@ impl<'a, T: Float, const N: usize> MulticubicRegular<'a, T, N> {
         macro_rules! unroll_vertices_body {
             ($i:ident) => {
                 // Index, interpolate, or pass on each level of the tree
-                unroll! {
-                    for j < 5 in 0..N {  // const loop
-
-                        // Most of these iterations will get optimized out
-                        if const{j == 0} { // const branch
-                            // At leaves, index values
-
-                            unroll!{
-                                for k < 5 in 0..N {  // const loop
-                                    // Bit pattern in an integer matches C-ordered array indexing
-                                    // so we can just use the vertex index to index into the array
-                                    // by selecting the appropriate bit from the index.
-                                    const OFFSET: usize = const{($i & (3 << (2*k))) >> (2*k)};
-                                    loc[k] = origin[k] + OFFSET;
-                                }
-                            }
-                            const STORE_IND: usize = $i % FP;
-                            store[0][STORE_IND] = index_arr_fixed_dims(loc, dimprod, self.vals);
+                for j in 0..N {
+                    // Most of these iterations will get optimized out
+                    if j == 0 {
+                        // const branch
+                        // At leaves, index values
+                        for k in 0..N {
+                            // Bit pattern in an integer matches C-ordered array indexing
+                            // so we can just use the vertex index to index into the array
+                            // by selecting the appropriate bit from the index.
+                            let offset: usize = ($i & (3 << (2 * k))) >> (2 * k);
+                            loc[k] = origin[k] + offset;
                         }
-                        else { // const branch
-                            // For other nodes, interpolate on child values
-
-                            const Q: usize = const{FP.pow(j as u32)};
-                            const LEVEL: bool = const {($i + 1).is_multiple_of(Q)};
-                            const P: usize = const{(($i + 1) / Q).saturating_sub(1) % FP};
-                            const IND: usize = const{j.saturating_sub(1)};
-
-                            if LEVEL { // const branch
-                                let interped = interp_inner::<T>(
-                                    &store[IND],
-                                    dts[IND],
-                                    sat[IND],
-                                    self.linearize_extrapolation
-                                );
-
-                                store[j][P] = interped;
-                            }
+                        const STORE_IND: usize = $i % FP;
+                        store[0][STORE_IND] = index_arr_fixed_dims(loc, dimprod, self.vals);
+                    } else {
+                        // const branch
+                        // For other nodes, interpolate on child values
+                        let q: usize = FP.pow(j as u32);
+                        let level: bool = ($i + 1).is_multiple_of(q);
+                        let p: usize = (($i + 1) / q).saturating_sub(1) % FP;
+                        let ind: usize = j.saturating_sub(1);
+
+                        if level {
+                            // const branch
+                            let interped = interp_inner::<T>(
+                                &store[ind],
+                                dts[ind],
+                                sat[ind],
+                                self.linearize_extrapolation,
+                            );
+
+                            store[j][p] = interped;
                         }
                     }
                 }