From 76b2f004fa0b4b6a08bdf471338d25a8c511028b Mon Sep 17 00:00:00 2001 From: koide3 <31344317+koide3@users.noreply.github.com> Date: Mon, 1 Jul 2024 16:25:28 +0900 Subject: [PATCH] improve knn performance of voxelmaps (#79) * improve knn performance of voxelmaps * add voxel search patterns * add (gicp|vgicp)_model_omp --- CMakeLists.txt | 2 + include/small_gicp/ann/flat_container.hpp | 54 +++++------ include/small_gicp/ann/gaussian_voxelmap.hpp | 5 + .../small_gicp/ann/incremental_voxelmap.hpp | 94 +++++++++++++------ include/small_gicp/ann/knn_result.hpp | 27 ++++-- ...dometry_benchmark_small_gicp_model_omp.cpp | 66 +++++++++++++ ...ometry_benchmark_small_vgicp_model_omp.cpp | 66 +++++++++++++ 7 files changed, 251 insertions(+), 63 deletions(-) create mode 100644 src/benchmark/odometry_benchmark_small_gicp_model_omp.cpp create mode 100644 src/benchmark/odometry_benchmark_small_vgicp_model_omp.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ccc42a0b..4cdcd860 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -187,7 +187,9 @@ if(BUILD_BENCHMARKS) src/benchmark/odometry_benchmark_small_gicp_tbb.cpp src/benchmark/odometry_benchmark_small_vgicp_tbb.cpp src/benchmark/odometry_benchmark_small_gicp_model_tbb.cpp + src/benchmark/odometry_benchmark_small_gicp_model_omp.cpp src/benchmark/odometry_benchmark_small_vgicp_model_tbb.cpp + src/benchmark/odometry_benchmark_small_vgicp_model_omp.cpp src/benchmark/odometry_benchmark_small_gicp_tbb_flow.cpp src/benchmark/odometry_benchmark.cpp ) diff --git a/include/small_gicp/ann/flat_container.hpp b/include/small_gicp/ann/flat_container.hpp index f16bde37..0d48d180 100644 --- a/include/small_gicp/ann/flat_container.hpp +++ b/include/small_gicp/ann/flat_container.hpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace small_gicp { @@ -67,21 +68,25 @@ struct FlatContainer { return 0; } - size_t min_index = -1; - double min_sq_dist = std::numeric_limits::max(); + KnnResult<1> result(k_index, k_sq_dist); + knn_search(pt, result); + return result.num_found(); + } - for (size_t i = 0; i < points.size(); i++) { - const double sq_dist = (points[i] - pt).squaredNorm(); - if (sq_dist < min_sq_dist) { - min_index = i; - min_sq_dist = sq_dist; - } + /// @brief Find k nearest neighbors. + /// @param pt Query point + /// @param k Number of neighbors + /// @param k_index Indices of nearest neighbors + /// @param k_sq_dist Squared distances to nearest neighbors + /// @return Number of found points + size_t knn_search(const Eigen::Vector4d& pt, int k, size_t* k_indices, double* k_sq_dists) const { + if (points.empty()) { + return 0; } - *k_index = min_index; - *k_sq_dist = min_sq_dist; - - return 1; + KnnResult<-1> result(k_indices, k_sq_dists, k); + knn_search(pt, result); + return result.num_found(); } /// @brief Find k nearest neighbors. @@ -90,28 +95,16 @@ struct FlatContainer { /// @param k_index Indices of nearest neighbors /// @param k_sq_dist Squared distances to nearest neighbors /// @return Number of found points - size_t knn_search(const Eigen::Vector4d& pt, int k, size_t* k_index, double* k_sq_dist) const { + template + void knn_search(const Eigen::Vector4d& pt, Result& result) const { if (points.empty()) { - return 0; + return; } - std::priority_queue> queue; for (size_t i = 0; i < points.size(); i++) { const double sq_dist = (points[i] - pt).squaredNorm(); - queue.push({i, sq_dist}); - if (queue.size() > k) { - queue.pop(); - } + result.push(i, sq_dist); } - - const size_t n = queue.size(); - while (!queue.empty()) { - k_index[queue.size() - 1] = queue.top().first; - k_sq_dist[queue.size() - 1] = queue.top().second; - queue.pop(); - } - - return n; } public: @@ -151,6 +144,11 @@ struct Traits> { static size_t knn_search(const FlatContainer& container, const Eigen::Vector4d& pt, size_t k, size_t* k_index, double* k_sq_dist) { return container.knn_search(pt, k, k_index, k_sq_dist); } + + template + static void knn_search(const FlatContainer& container, const Eigen::Vector4d& pt, Result& result) { + container.knn_search(pt, result); + } }; } // namespace traits diff --git a/include/small_gicp/ann/gaussian_voxelmap.hpp b/include/small_gicp/ann/gaussian_voxelmap.hpp index 7f7fc38d..51d1aad2 100644 --- a/include/small_gicp/ann/gaussian_voxelmap.hpp +++ b/include/small_gicp/ann/gaussian_voxelmap.hpp @@ -79,6 +79,11 @@ struct Traits { static size_t knn_search(const GaussianVoxel& voxel, const Eigen::Vector4d& pt, size_t k, size_t* k_index, double* k_sq_dist) { return nearest_neighbor_search(voxel, pt, k_index, k_sq_dist); } + + template + static void knn_search(const GaussianVoxel& voxel, const Eigen::Vector4d& pt, Result& result) { + result.push(0, (voxel.mean - pt).squaredNorm()); + } }; } // namespace traits diff --git a/include/small_gicp/ann/incremental_voxelmap.hpp b/include/small_gicp/ann/incremental_voxelmap.hpp index 73ecba14..f297ac2f 100644 --- a/include/small_gicp/ann/incremental_voxelmap.hpp +++ b/include/small_gicp/ann/incremental_voxelmap.hpp @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -40,7 +41,7 @@ struct IncrementalVoxelMap { /// @brief Constructor. /// @param leaf_size Voxel size - explicit IncrementalVoxelMap(double leaf_size) : inv_leaf_size(1.0 / leaf_size), lru_horizon(100), lru_clear_cycle(10), lru_counter(0) {} + explicit IncrementalVoxelMap(double leaf_size) : inv_leaf_size(1.0 / leaf_size), lru_horizon(100), lru_clear_cycle(10), lru_counter(0) { set_search_offsets(1); } /// @brief Number of points in the voxelmap. size_t size() const { return flat_voxels.size(); } @@ -94,22 +95,25 @@ struct IncrementalVoxelMap { /// @param sq_dist Squared distance to the nearest neighbor /// @return Number of found points (0 or 1) size_t nearest_neighbor_search(const Eigen::Vector4d& pt, size_t* index, double* sq_dist) const { - const Eigen::Vector3i coord = fast_floor(pt * inv_leaf_size).template head<3>(); - const auto found = voxels.find(coord); - if (found == voxels.end()) { - return 0; - } + const Eigen::Vector3i center = fast_floor(pt * inv_leaf_size).template head<3>(); + size_t voxel_index = 0; + const auto index_transform = [&](size_t i) { return calc_index(voxel_index, i); }; + KnnResult<1, decltype(index_transform)> result(index, sq_dist, -1, index_transform); + + for (const auto& offset : search_offsets) { + const Eigen::Vector3i coord = center + offset; + const auto found = voxels.find(coord); + if (found == voxels.end()) { + continue; + } - const size_t voxel_index = found->second; - const auto& voxel = flat_voxels[voxel_index]->second; + voxel_index = found->second; + const auto& voxel = flat_voxels[voxel_index]->second; - size_t point_index; - if (traits::nearest_neighbor_search(voxel, pt, &point_index, sq_dist) == 0) { - return 0; + traits::Traits::knn_search(voxel, pt, result); } - *index = calc_index(voxel_index, point_index); - return 1; + return result.num_found(); } /// @brief Find k nearest neighbors @@ -119,24 +123,26 @@ struct IncrementalVoxelMap { /// @param k_sq_dists Squared distances to nearest neighbors /// @return Number of found points size_t knn_search(const Eigen::Vector4d& pt, size_t k, size_t* k_indices, double* k_sq_dists) const { - const Eigen::Vector3i coord = fast_floor(pt * inv_leaf_size).template head<3>(); - const auto found = voxels.find(coord); - if (found == voxels.end()) { - return 0; - } + const Eigen::Vector3i center = fast_floor(pt * inv_leaf_size).template head<3>(); + + size_t voxel_index = 0; + const auto index_transform = [&](size_t i) { return calc_index(voxel_index, i); }; + KnnResult<-1, decltype(index_transform)> result(k_indices, k_sq_dists, k, index_transform); - const size_t voxel_index = found->second; - const auto& voxel = flat_voxels[voxel_index]->second; + for (const auto& offset : search_offsets) { + const Eigen::Vector3i coord = center + offset; + const auto found = voxels.find(coord); + if (found == voxels.end()) { + continue; + } - std::vector point_indices(k); - std::vector sq_dists(k); - const size_t num_found = traits::knn_search(voxel, pt, k, point_indices.data(), sq_dists.data()); + voxel_index = found->second; + const auto& voxel = flat_voxels[voxel_index]->second; - for (size_t i = 0; i < num_found; i++) { - k_indices[i] = calc_index(voxel_index, point_indices[i]); - k_sq_dists[i] = sq_dists[i]; + traits::Traits::knn_search(voxel, pt, result); } - return num_found; + + return result.num_found(); } /// @brief Calculate the global point index from the voxel index and the point index. @@ -144,6 +150,38 @@ struct IncrementalVoxelMap { inline size_t voxel_id(const size_t i) const { return i >> point_id_bits; } ///< Extract the point ID from a global index. inline size_t point_id(const size_t i) const { return i & ((1ull << point_id_bits) - 1); } ///< Extract the voxel ID from a global index. + /// @brief Set the pattern of the search offsets. (Must be 1, 7, or 27) + void set_search_offsets(int num_offsets) { + switch (num_offsets) { + default: + std::cerr << "warning: unsupported search_offsets=" << num_offsets << " (supported values: 1, 7, 27)" << std::endl; + std::cerr << " : using default search_offsets=1" << std::endl; + [[fallthrough]]; + case 1: + search_offsets = {Eigen::Vector3i(0, 0, 0)}; + break; + case 7: + search_offsets = { + Eigen::Vector3i(0, 0, 0), + Eigen::Vector3i(1, 0, 0), + Eigen::Vector3i(0, 1, 0), + Eigen::Vector3i(0, 0, 1), + Eigen::Vector3i(-1, 0, 0), + Eigen::Vector3i(0, -1, 0), + Eigen::Vector3i(0, 0, -1)}; + break; + case 27: + for (int i = -1; i <= 1; i++) { + for (int j = -1; j <= 1; j++) { + for (int k = -1; k <= 1; k++) { + search_offsets.emplace_back(i, j, k); + } + } + } + break; + } + } + public: static_assert(sizeof(size_t) == 8, "size_t must be 64-bit"); static constexpr int point_id_bits = 32; ///< Use the first 32 bits for point id @@ -154,6 +192,8 @@ struct IncrementalVoxelMap { size_t lru_clear_cycle; ///< LRU clear cycle. Voxel deletion is performed every lru_clear_cycle steps. size_t lru_counter; ///< LRU counter. Incremented every step. + std::vector search_offsets; ///< Voxel search offsets. + typename VoxelContents::Setting voxel_setting; ///< Voxel setting. std::vector>> flat_voxels; ///< Voxel contents. std::unordered_map voxels; ///< Voxel index map. diff --git a/include/small_gicp/ann/knn_result.hpp b/include/small_gicp/ann/knn_result.hpp index 53ee3756..edaa4197 100644 --- a/include/small_gicp/ann/knn_result.hpp +++ b/include/small_gicp/ann/knn_result.hpp @@ -22,9 +22,14 @@ struct KnnSetting { double epsilon = 0.0; ///< Early termination threshold }; +/// @brief Identity transform (alternative to std::identity in C++20). +struct identity_transform { + size_t operator()(size_t i) const { return i; } +}; + /// @brief K-nearest neighbor search result container. /// @tparam N Number of neighbors to search. If N == -1, the number of neighbors is dynamicaly determined. -template +template struct KnnResult { public: static constexpr size_t INVALID = std::numeric_limits::max(); @@ -33,7 +38,12 @@ struct KnnResult { /// @param indices Buffer to store indices (must be larger than k=max(N, num_neighbors)) /// @param distances Buffer to store distances (must be larger than k=max(N, num_neighbors)) /// @param num_neighbors Number of neighbors to search (must be -1 for static case N > 0) - explicit KnnResult(size_t* indices, double* distances, int num_neighbors = -1) : capacity(num_neighbors), num_found_neighbors(0), indices(indices), distances(distances) { + explicit KnnResult(size_t* indices, double* distances, int num_neighbors = -1, const IndexTransform& index_transform = identity_transform()) + : index_transform(index_transform), + capacity(num_neighbors), + num_found_neighbors(0), + indices(indices), + distances(distances) { if constexpr (N > 0) { if (num_neighbors >= 0) { std::cerr << "warning: Specifying dynamic num_neighbors=" << num_neighbors << " for a static KNN result container (N=" << N << ")" << std::endl; @@ -72,7 +82,7 @@ struct KnnResult { } if constexpr (N == 1) { - indices[0] = index; + indices[0] = index_transform(index); distances[0] = distance; } else { int insert_loc = std::min(num_found_neighbors, buffer_size() - 1); @@ -81,7 +91,7 @@ struct KnnResult { distances[insert_loc] = distances[insert_loc - 1]; } - indices[insert_loc] = index; + indices[insert_loc] = index_transform(index); distances[insert_loc] = distance; } @@ -89,10 +99,11 @@ struct KnnResult { } public: - const int capacity; ///< Maximum number of neighbors to search - int num_found_neighbors; ///< Number of found neighbors - size_t* indices; ///< Indices of neighbors - double* distances; ///< Distances to neighbors + const IndexTransform index_transform; ///< Point index transformation (e.g., local point index to global point/voxel index) + const int capacity; ///< Maximum number of neighbors to search + int num_found_neighbors; ///< Number of found neighbors + size_t* indices; ///< Indices of neighbors + double* distances; ///< Distances to neighbors }; } // namespace small_gicp diff --git a/src/benchmark/odometry_benchmark_small_gicp_model_omp.cpp b/src/benchmark/odometry_benchmark_small_gicp_model_omp.cpp new file mode 100644 index 00000000..3348ef61 --- /dev/null +++ b/src/benchmark/odometry_benchmark_small_gicp_model_omp.cpp @@ -0,0 +1,66 @@ +#include + +#include +#include +#include +#include +#include +#include + +namespace small_gicp { + +class SmallGICPModelOnlineOdometryEstimationOMP : public OnlineOdometryEstimation { +public: + explicit SmallGICPModelOnlineOdometryEstimationOMP(const OdometryEstimationParams& params) : OnlineOdometryEstimation(params), T_world_lidar(Eigen::Isometry3d::Identity()) {} + + Eigen::Isometry3d estimate(const PointCloud::Ptr& points) override { + Stopwatch sw; + sw.start(); + + // Note that input points here is already downsampled + // Estimate per-point covariances + estimate_covariances_omp(*points, params.num_neighbors, params.num_threads); + + if (voxelmap == nullptr) { + // This is the very first frame + voxelmap = std::make_shared>(params.voxel_resolution); + voxelmap->insert(*points); + return T_world_lidar; + } + + // Registration using GICP + OMP-based parallel reduction + Registration registration; + registration.reduction.num_threads = params.num_threads; + auto result = registration.align(*voxelmap, *points, *voxelmap, T_world_lidar); + + // Update T_world_lidar with the estimated transformation + T_world_lidar = result.T_target_source; + + // Insert points to the target voxel map + voxelmap->insert(*points, T_world_lidar); + + sw.stop(); + reg_times.push(sw.msec()); + + if (params.visualize) { + update_model_points(T_world_lidar, traits::voxel_points(*voxelmap)); + } + + return T_world_lidar; + } + + void report() override { // + std::cout << "registration_time_stats=" << reg_times.str() << " [msec/scan] total_throughput=" << total_times.str() << " [msec/scan]" << std::endl; + } + +private: + Summarizer reg_times; + + IncrementalVoxelMap::Ptr voxelmap; // Target voxel map that is an accumulation of past point clouds + Eigen::Isometry3d T_world_lidar; // Current world-to-lidar transformation +}; + +static auto small_gicp_model_omp_registry = + register_odometry("small_gicp_model_omp", [](const OdometryEstimationParams& params) { return std::make_shared(params); }); + +} // namespace small_gicp diff --git a/src/benchmark/odometry_benchmark_small_vgicp_model_omp.cpp b/src/benchmark/odometry_benchmark_small_vgicp_model_omp.cpp new file mode 100644 index 00000000..5710899a --- /dev/null +++ b/src/benchmark/odometry_benchmark_small_vgicp_model_omp.cpp @@ -0,0 +1,66 @@ +#include + +#include +#include +#include +#include +#include +#include + +namespace small_gicp { + +class SmallVGICPModelOnlineOdometryEstimationOMP : public OnlineOdometryEstimation { +public: + explicit SmallVGICPModelOnlineOdometryEstimationOMP(const OdometryEstimationParams& params) : OnlineOdometryEstimation(params), T_world_lidar(Eigen::Isometry3d::Identity()) {} + + Eigen::Isometry3d estimate(const PointCloud::Ptr& points) override { + Stopwatch sw; + sw.start(); + + // Note that input points here is already downsampled + // Estimate per-point covariances + estimate_covariances_omp(*points, params.num_neighbors, params.num_threads); + + if (voxelmap == nullptr) { + // This is the very first frame + voxelmap = std::make_shared(params.voxel_resolution); + voxelmap->insert(*points); + return T_world_lidar; + } + + // Registration using GICP + OMP-based parallel reduction + Registration registration; + registration.reduction.num_threads = params.num_threads; + auto result = registration.align(*voxelmap, *points, *voxelmap, T_world_lidar); + + // Update T_world_lidar with the estimated transformation + T_world_lidar = result.T_target_source; + + // Insert points to the target voxel map + voxelmap->insert(*points, T_world_lidar); + + sw.stop(); + reg_times.push(sw.msec()); + + if (params.visualize) { + update_model_points(T_world_lidar, traits::voxel_points(*voxelmap)); + } + + return T_world_lidar; + } + + void report() override { // + std::cout << "registration_time_stats=" << reg_times.str() << " [msec/scan] total_throughput=" << total_times.str() << " [msec/scan]" << std::endl; + } + +private: + Summarizer reg_times; + + GaussianVoxelMap::Ptr voxelmap; // Target voxel map that is an accumulation of past point clouds + Eigen::Isometry3d T_world_lidar; // Current world-to-lidar transformation +}; + +static auto small_gicp_model_omp_registry = + register_odometry("small_vgicp_model_omp", [](const OdometryEstimationParams& params) { return std::make_shared(params); }); + +} // namespace small_gicp