diff --git a/Cargo.lock b/Cargo.lock index 463aa71..f8ec756 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -24,6 +24,7 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" name = "fpsample" version = "0.1.0" dependencies = [ + "kdtree", "numpy", "pyo3", ] @@ -34,6 +35,16 @@ version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfa799dd5ed20a7e349f3b4639aa80d74549c81716d9ec4f994c9b5815598306" +[[package]] +name = "kdtree" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f0a0e9f770b65bac9aad00f97a67ab5c5319effed07f6da385da3c2115e47ba" +dependencies = [ + "num-traits", + "thiserror", +] + [[package]] name = "libc" version = "0.2.147" @@ -209,7 +220,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -220,7 +231,7 @@ checksum = "947dc12175c254889edc0c02e399476c2f652b4b9ebd123aa655c224de259536" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -276,12 +287,43 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syn" +version = "2.0.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "239814284fd6f1a4ffe4ca893952cdd93c224b6a1571c9a9eadd670295c0c9e2" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "target-lexicon" version = "0.12.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d0e916b1148c8e263850e1ebcbd046f333e0683c724876bb0da63ea4373dc8a" +[[package]] +name = "thiserror" +version = "1.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d6d7a740b8a666a7e828dd00da9c0dc290dff53154ea77ac109281de90589b7" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.32", +] + [[package]] name = "unicode-ident" version = "1.0.11" diff --git a/Cargo.toml b/Cargo.toml index 7e9a7af..0708197 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,3 +12,4 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.19", features = ["extension-module"] } numpy = "0.19" +kdtree = "0.7" diff --git a/README.md b/README.md index 87ea3b8..47d0048 100644 --- a/README.md +++ b/README.md @@ -4,12 +4,18 @@ [![license badge](https://img.shields.io/github/license/leonardodalinky/fpsample)](https://github.com/leonardodalinky/fpsample/blob/main/LICENSE) [![star badge](https://img.shields.io/github/stars/leonardodalinky/fpsample?style=social)](https://github.com/leonardodalinky/fpsample) -Python farthest point sampling library built on Rust pyo3 bindings. Compatible with numpy. +Python efficient farthest point sampling (FPS) library, 100x faster than pure Python implementation. + +`fpsample` is coupled with `numpy` and built upon Rust pyo3 bindings. + +*Library for GPU is under construction*. Any issues are welcome. ## Installation ### Install from PyPI +`numpy>=1.16.0` is required. Install `fpsample` using pip: + ```shell pip install -U fpsample ``` @@ -32,26 +38,37 @@ import numpy as np # Generate random point cloud pc = np.random.rand(4096, 3) # sample 1024 points -fps_samples = fpsample.fps_sampling(pc, 1024) +fps_samples_idx = fpsample.fps_sampling(pc, 1024) -fps_npdu_samples = fpsample.fps_npdu_sampling(pc, 1024) +fps_npdu_samples_idx = fpsample.fps_npdu_sampling(pc, 1024) # or specify the windows size -fps_npdu_samples = fpsample.fps_npdu_sampling(pc, 1024, k=64) +fps_npdu_samples_idx = fpsample.fps_npdu_sampling(pc, 1024, k=64) + +fps_npdu_kdtree_samples_idx = fpsample.fps_npdu_kdtree_sampling(pc, 1024) +# or specify the windows size +fps_npdu_kdtree_samples_idx = fpsample.fps_npdu_kdtree_sampling(pc, 1024, k=64) ``` -> NOTE: NPDU method only gives sub-optimal answers. And it assumes that the points are approximately sorted or have **dimensional locality**. Otherwise, the result may be **worse**. Check the paper for details. +**NOTE**: NPDU method only gives sub-optimal answers. And it assumes that the points are approximately sorted or have **dimensional locality**. Otherwise, the result may be **worse**. Check the paper for details. + +NPDU+KDTree method is more robust than NPDU method. It does not require the dimensional locality. But it is slightly slower than vanilla NPDU method. **It is recommended to use NPDU+KDTree method in general cases**. ## Performance Setup: - CPU: Intel(R) Core(TM) i9-10940X CPU @ 3.30GHz - RAM: 128 GiB -| Method | #samples | #points | Time | -|:--------:|:--------:|:-------:|:-----------------:| -| FPS | 1024 | 4096 | 18.6 ms ± 1.3 ms | -| FPS+NPDU | 1024 | 4096 | 5.97 ms ± 0.61 ms | -| FPS | 50,000 | 100,000 | 27.4 s ± 551 ms | -| FPS+NPDU | 50,000 | 100,000 | 5.36 s ± 152 ms | +| Method | #samples | #points | Time | +| :-------------: | :------: | :-----: | :----------------: | +| FPS | 1024 | 4096 | 18.6 ms ± 0.17 ms | +| FPS+NPDU | 1024 | 4096 | 3.68 ms ± 0.10 ms | +| FPS+NPDU+KDTree | 1024 | 4096 | 13.10 ms ± 0.16 ms | +| FPS | 4000 | 50,000 | 832 ms ± 9.01 ms | +| FPS+NPDU | 4000 | 50,000 | 143 ms ± 1.98 ms | +| FPS+NPDU+KDTree | 4000 | 50,000 | 294 ms ± 9.06 ms | +| FPS | 50,000 | 100,000 | 22.1 s ± 207 ms | +| FPS+NPDU | 50,000 | 100,000 | 3.35 s ± 66.8 ms | +| FPS+NPDU+KDTree | 50,000 | 100,000 | 4.08 s ± 60.4 ms | ## Reference The nearest-point-distance-updating (NPDU) heuristic strategy is proposed in the following paper: diff --git a/pyproject.toml b/pyproject.toml index b744851..f3462e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "fpsample" -version = "0.1.0" +version = "0.2.0" authors = [{ name = "Leonard Lin", email = "leonard.keilin@gmail.com" }] keyword = [ "sampling", @@ -12,7 +12,7 @@ keyword = [ "furthest point sampling", "point cloud", ] -description = "A efficient CPU implementation of farthest point sampling (FPS) for point clouds." +description = "An efficient CPU implementation of farthest point sampling (FPS) for point clouds." readme = "README.md" license = { file = "LICENSE" } requires-python = ">=3.7" @@ -24,8 +24,8 @@ classifiers = [ dependencies = ["numpy>=1.16.0"] [project.urls] -Homepage = "https://github.com/leonardodalinky/fpsample" Repository = "https://github.com/leonardodalinky/fpsample" +Tracker = "https://github.com/leonardodalinky/fpsample/issues" [tool.maturin] python-source = "python" diff --git a/python/fpsample/wrapper.py b/python/fpsample/wrapper.py index 9c3857d..69dc79a 100644 --- a/python/fpsample/wrapper.py +++ b/python/fpsample/wrapper.py @@ -1,8 +1,9 @@ +import warnings from typing import Optional import numpy as np -from .fpsample import _fps_npdu_sampling, _fps_sampling +from .fpsample import _fps_npdu_kdtree_sampling, _fps_npdu_sampling, _fps_sampling def fps_sampling(pc: np.ndarray, n_samples: int) -> np.ndarray: @@ -40,6 +41,32 @@ def fps_npdu_sampling(pc: np.ndarray, n_samples: int, k: Optional[int] = None) - assert n_pts >= n_samples, "n_pts should be >= n_samples" pc = pc.astype(np.float32) k = k or int(n_pts / n_samples * 16) + if k >= n_pts - 1: + warnings.warn(f"k is too large, set to {n_pts - 1}") + k = n_pts - 1 # Random pick a start start_idx = np.random.randint(low=0, high=n_pts) return _fps_npdu_sampling(pc, n_samples, k, start_idx) + + +def fps_npdu_kdtree_sampling(pc: np.ndarray, n_samples: int, k: Optional[int] = None) -> np.ndarray: + """ + Args: + pc (np.ndarray): The input point cloud of shape (n_pts, D). + n_samples (int): Number of samples. + k (int, default=None): Windows size of local heuristic search. If set to None, it will be set to `n_pts / n_samples * 16`. + Returns: + np.ndarray: The selected indices of shape (n_samples,). + """ + assert n_samples >= 1, "n_samples should be >= 1" + assert pc.ndim == 2 + n_pts, _ = pc.shape + assert n_pts >= n_samples, "n_pts should be >= n_samples" + pc = pc.astype(np.float32) + k = k or int(n_pts / n_samples * 16) + if k >= n_pts: + warnings.warn(f"k is too large, set to {n_pts}") + k = n_pts + # Random pick a start + start_idx = np.random.randint(low=0, high=n_pts) + return _fps_npdu_kdtree_sampling(pc, n_samples, k, start_idx) diff --git a/src/lib.rs b/src/lib.rs index bda674a..688c8af 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,5 @@ +use kdtree::distance::squared_euclidean; +use kdtree::KdTree; use numpy::{ ndarray::s, ndarray::{Array1, ArrayView2, Axis, Zip}, @@ -174,10 +176,92 @@ fn fps_npdu_sampling_py<'py>( Ok(ret) } +fn fps_npdu_kdtree_sampling( + points: ArrayView2, + n_samples: usize, + k: usize, + start_idx: usize, +) -> Array1 { + let [p, c] = points.shape() else { + panic!("points must be a 2D array") + }; + // contruct kd-tree + let std_points = points.as_standard_layout(); + let mut kdtree: KdTree = KdTree::new(*c); + let std_points_vec = std_points.outer_iter().collect::>(); + let std_points_slice_vec = std_points_vec + .iter() + .map(|x| x.as_slice().unwrap()) + .collect::>(); + for (i, point) in std_points_slice_vec.iter().enumerate() { + kdtree.add(*point, i).unwrap(); + } + + // previous round selected point index + let mut res_selected_idx: Option = None; + // distance from each point to the selected point set + let mut dist_pts_to_selected_min = Array1::::from_elem((*p,), f32::INFINITY); + // selected points index + let mut selected_pts_idx = Vec::::with_capacity(n_samples); + + while selected_pts_idx.len() < n_samples { + if let Some(prev_idx) = res_selected_idx { + // find nearest point + let nearest_idx = kdtree + .nearest(std_points_slice_vec[prev_idx], k, &squared_euclidean) + .unwrap(); + for (dist, idx) in nearest_idx { + // dist_pts_to_selected_min[idx] = f32::min(dist_pts_to_selected_min[idx], dist); + let d = dist_pts_to_selected_min.get_mut(*idx).unwrap(); + *d = f32::min(*d, dist); + } + + let max_idx = dist_pts_to_selected_min + .indexed_iter() + .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap()) + .unwrap() + .0; + selected_pts_idx.push(max_idx); + res_selected_idx = Some(max_idx); + } else { + // update distance at the first round + let dist = &points - &points.slice(s![start_idx, ..]); + let dist = dist.mapv(|x| x.powi(2)).sum_axis(Axis(1)); + // update min distance + Zip::from(&mut dist_pts_to_selected_min) + .and(&dist) + .for_each(|x, &y| { + *x = f32::min(*x, y); + }); + // first point + selected_pts_idx.push(start_idx); + res_selected_idx = Some(start_idx); + } + } + selected_pts_idx.into() +} + +#[pyfunction] +#[pyo3(name = "_fps_npdu_kdtree_sampling")] +fn fps_npdu_kdtree_sampling_py<'py>( + py: Python<'py>, + points: PyReadonlyArray2, + n_samples: usize, + k: usize, + start_idx: usize, +) -> PyResult<&'py PyArray1> { + check_py_input(&points, n_samples, start_idx)?; + let points = points.as_array(); + let idxs = py.allow_threads(|| fps_npdu_kdtree_sampling(points, n_samples, k, start_idx)); + let ret = idxs.to_pyarray(py); + Ok(ret) +} + #[pymodule] fn fpsample(_py: Python, m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(fps_sampling_py, m)?)?; m.add_function(wrap_pyfunction!(fps_npdu_sampling_py, m)?)?; + m.add_function(wrap_pyfunction!(fps_npdu_kdtree_sampling_py, m)?)?; Ok(()) }