diff --git a/.bazelrc b/.bazelrc index ce4a80e..c3f61f9 100644 --- a/.bazelrc +++ b/.bazelrc @@ -28,6 +28,7 @@ build:ci --announce_rc #build:linux --copt="-march=skylake" #build:linux --copt="-march=haswell" #build:linux --copt="-march=native" +#build:linux --copt="-fno-inline" build:linux --copt="-fvisibility=hidden" build:linux --copt="-fno-omit-frame-pointer" # for friendlier stack traces build:linux --copt="-Wno-error" diff --git a/README.md b/README.md index 412e257..45ddc63 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,10 @@ More information about PH-Trees (including a Java implementation) is available [ * [Theory](#theory) +## License + + + ---------------------------------- ## API Usage @@ -674,3 +678,15 @@ The PH-Tree is discussed in the following publications and reports: - T. Zaeschke: "The PH-Tree Revisited", (2015) - T. Zaeschke, M.C. Norrie: "Efficient Z-Ordered Traversal of Hypercube Indexes" (BTW 2017). +## License + + + +The PH-tree is licensed under [Apache APL 2.0](LICENSE), except for code in +[include/phtree/aux](include/phtree/aux). + +The code in [include/phtree/aux](include/phtree/aux) is based on code by +Malte Skarupke (Copyright 2020) and is licensed under the +[Boost Software License 1.0](https://www.boost.org/LICENSE_1_0.txt). + + diff --git a/benchmark/BUILD b/benchmark/BUILD index 4b84294..1152c5d 100644 --- a/benchmark/BUILD +++ b/benchmark/BUILD @@ -248,6 +248,21 @@ cc_binary( ], ) +cc_binary( + name = "knn_mm_d_benchmark", + testonly = True, + srcs = [ + "knn_mm_d_benchmark.cc", + ], + linkstatic = True, + deps = [ + ":benchmark", + "//:phtree", + "@gbenchmark//:benchmark", + "@spdlog", + ], +) + cc_binary( name = "query_benchmark", testonly = True, diff --git a/benchmark/bpt_insert_benchmark.cc b/benchmark/bpt_insert_benchmark.cc index 9e25b7b..da4a3b1 100644 --- a/benchmark/bpt_insert_benchmark.cc +++ b/benchmark/bpt_insert_benchmark.cc @@ -1,5 +1,5 @@ /* -* Copyright 2022-2023 Tilmann Zäschke + * Copyright 2022-2023 Tilmann Zäschke * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include "benchmark_util.h" #include "logging.h" #include "phtree/common/b_plus_tree_hash_map.h" +#include "phtree/common/b_plus_tree_heap.h" #include "phtree/common/b_plus_tree_map.h" #include "phtree/common/b_plus_tree_multimap.h" #include @@ -30,6 +31,7 @@ const int GLOBAL_MAX = 10000; enum Scenario { MAP, MULTIMAP, + MULTIMAP2, HASH_MAP, STD_MAP, STD_MULTIMAP, @@ -118,7 +120,22 @@ void IndexBenchmark::SetupWorld(benchmark::State& state) { template void IndexBenchmark::Insert(benchmark::State& state, Index& tree) { switch (TYPE) { - default: { + case MAP: { + for (size_t i = 0; i < num_entities_; ++i) { + tree.emplace(points_[i][0], (payload_t)i); + } + break; + } + case MULTIMAP: + case MULTIMAP2: + case STD_MULTIMAP: { + for (size_t i = 0; i < num_entities_; ++i) { + tree.emplace(points_[i][0], (payload_t)i); + } + break; + } + case HASH_MAP: + case STD_MAP: { for (size_t i = 0; i < num_entities_; ++i) { tree.emplace(points_[i][0], (payload_t)i); } diff --git a/benchmark/hd_erase_d_benchmark.cc b/benchmark/hd_erase_d_benchmark.cc index f2650c1..4f03e24 100644 --- a/benchmark/hd_erase_d_benchmark.cc +++ b/benchmark/hd_erase_d_benchmark.cc @@ -34,7 +34,7 @@ using payload_t = std::uint32_t; template class IndexBenchmark { public: - IndexBenchmark(benchmark::State& state); + explicit IndexBenchmark(benchmark::State& state); void Benchmark(benchmark::State& state); private: diff --git a/benchmark/knn_d_benchmark.cc b/benchmark/knn_d_benchmark.cc index dcf5abf..5c3b9f0 100644 --- a/benchmark/knn_d_benchmark.cc +++ b/benchmark/knn_d_benchmark.cc @@ -140,6 +140,12 @@ BENCHMARK_CAPTURE(PhTree3D, KNN_CU_10_of_10K, TestGenerator::CUBE, 10000, 10) BENCHMARK_CAPTURE(PhTree3D, KNN_CU_10_of_1M, TestGenerator::CUBE, 1000000, 10) ->Unit(benchmark::kMillisecond); +BENCHMARK_CAPTURE(PhTree3D, KNN_CU_100_of_10K, TestGenerator::CUBE, 10000, 100) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, KNN_CU_100_of_1M, TestGenerator::CUBE, 1000000, 100) + ->Unit(benchmark::kMillisecond); + // index type, scenario name, data_type, num_entities, query_result_size // PhTree 3D CLUSTER BENCHMARK_CAPTURE(PhTree3D, KNN_CL_1_of_10K, TestGenerator::CLUSTER, 10000, 1) @@ -154,4 +160,10 @@ BENCHMARK_CAPTURE(PhTree3D, KNN_CL_10_of_10K, TestGenerator::CLUSTER, 10000, 10) BENCHMARK_CAPTURE(PhTree3D, KNN_CL_10_of_1M, TestGenerator::CLUSTER, 1000000, 10) ->Unit(benchmark::kMillisecond); +BENCHMARK_CAPTURE(PhTree3D, KNN_CL_100_of_10K, TestGenerator::CLUSTER, 10000, 100) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, KNN_CL_100_of_1M, TestGenerator::CLUSTER, 1000000, 100) + ->Unit(benchmark::kMillisecond); + BENCHMARK_MAIN(); diff --git a/benchmark/knn_mm_d_benchmark.cc b/benchmark/knn_mm_d_benchmark.cc new file mode 100644 index 0000000..391fc59 --- /dev/null +++ b/benchmark/knn_mm_d_benchmark.cc @@ -0,0 +1,282 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark_util.h" +#include "logging.h" +#include "phtree/phtree.h" +#include "phtree/phtree_multimap.h" +#include +#include + +using namespace improbable; +using namespace improbable::phtree; +using namespace improbable::phtree::phbenchmark; + +/* + * Benchmark for k-nearest-neighbour queries in multi-map implementations. + */ +namespace { + +const double GLOBAL_MAX = 10000; +const dimension_t DIM = 3; + +enum Scenario { + TREE_SET, + PHTREE_MM, + PHTREE_MM_STD, + PHTREE2, + TS_KD, + TS_QT, +}; + +using payload_t = int64_t; +using payload2_t = uint32_t; + +using TestPoint = PhPointD; +using QueryBox = PhBoxD; +using BucketType = std::set; + +template +using CONVERTER = ConverterIEEE; + +template +using TestMap = typename std::conditional_t< + SCENARIO == TREE_SET, + PhTreeD>, + typename std::conditional_t< + SCENARIO == PHTREE_MM, + PhTreeMultiMap, b_plus_tree_hash_set>, +// typename std::conditional_t< +// SCENARIO == PHTREE2, +// PhTreeMultiMap2D, + typename std::conditional_t< + SCENARIO == PHTREE_MM_STD, + PhTreeMultiMap, BucketType>, + void>>>; + +template +class IndexBenchmark { + public: + IndexBenchmark(benchmark::State& state, int knn_result_size_); + + void Benchmark(benchmark::State& state); + + private: + void SetupWorld(benchmark::State& state); + void QueryWorld(benchmark::State& state, TestPoint& center); + void CreateQuery(TestPoint& center); + + const TestGenerator data_type_; + const size_t num_entities_; + const size_t knn_result_size_; + + TestMap tree_; + std::default_random_engine random_engine_; + std::uniform_real_distribution<> cube_distribution_; + std::vector points_; +}; + +template +IndexBenchmark::IndexBenchmark(benchmark::State& state, int knn_result_size) +: data_type_{static_cast(state.range(1))} +, num_entities_(state.range(0)) +, knn_result_size_(knn_result_size) +, random_engine_{1} +, cube_distribution_{0, GLOBAL_MAX} +, points_(state.range(0)) { + logging::SetupDefaultLogging(); + SetupWorld(state); +} + +template +void IndexBenchmark::Benchmark(benchmark::State& state) { + for (auto _ : state) { + state.PauseTiming(); + TestPoint center; + CreateQuery(center); + state.ResumeTiming(); + + QueryWorld(state, center); + } +} + +template < + dimension_t DIM, + Scenario SCENARIO, + std::enable_if_t<(SCENARIO == Scenario::TREE_SET), int> = 0> +void InsertEntries(TestMap& tree, const std::vector& points) { + for (size_t i = 0; i < points.size(); ++i) { + BucketType& bucket = tree.emplace(points[i]).first; + bucket.emplace((payload_t)i); + } +} + +template = 0> +void InsertEntries(TestMap& tree, const std::vector& points) { + for (size_t i = 0; i < points.size(); ++i) { + tree.emplace(points[i], (payload_t)i); + } +} + +template +size_t QueryAll(TestMap& tree, const TestPoint& center, const size_t k) { + size_t n = 0; + for (auto q = tree.begin_knn_query(k, center, DistanceEuclidean()); q != tree.end(); ++q) { + ++n; + } + return n; +} + +struct CounterTreeWithMap { + void operator()(const TestPoint&, const BucketType& value) { + for (auto& x : value) { + (void)x; + n_ += 1; + } + } + size_t n_; +}; + +struct CounterMultiMap { + void operator()(const TestPoint&, const payload_t&) { + n_ += 1; + } + size_t n_; +}; + +template +void IndexBenchmark::SetupWorld(benchmark::State& state) { + logging::info("Setting up world with {} entities and {} dimensions.", num_entities_, DIM); + CreatePointData(points_, data_type_, num_entities_, 0, GLOBAL_MAX); + InsertEntries(tree_, points_); + + state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["result_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate); + state.counters["avg_result_count"] = benchmark::Counter(0, benchmark::Counter::kAvgIterations); + logging::info("World setup complete."); +} + +template +void IndexBenchmark::QueryWorld(benchmark::State& state, TestPoint& center) { + size_t n = QueryAll(tree_, center, knn_result_size_); + + state.counters["query_rate"] += 1; + state.counters["result_rate"] += n; + state.counters["avg_result_count"] += n; +} + +template +void IndexBenchmark::CreateQuery(TestPoint& center) { + for (dimension_t d = 0; d < DIM; ++d) { + center[d] = cube_distribution_(random_engine_) * GLOBAL_MAX; + } +} + +} // namespace + +// template +// void TinspinKDTree(benchmark::State& state, Arguments&&... arguments) { +// IndexBenchmark benchmark{state, arguments...}; +// benchmark.Benchmark(state); +// } +// +// template +// void TinspinQuadtree(benchmark::State& state, Arguments&&... arguments) { +// IndexBenchmark benchmark{state, arguments...}; +// benchmark.Benchmark(state); +// } + +template +void PhTree3D(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +template +void PhTreeMM(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +//template +//void PhTreeMM2(benchmark::State& state, Arguments&&... arguments) { +// IndexBenchmark benchmark{state, arguments...}; +// benchmark.Benchmark(state); +//} + +template +void PhTreeMMStdSet(benchmark::State& state, Arguments&&... arguments) { + IndexBenchmark benchmark{state, arguments...}; + benchmark.Benchmark(state); +} + +// index type, scenario name, data_type, num_entities, query_result_size + +// PhTree multi-map 1.0 +BENCHMARK_CAPTURE(PhTreeMM, KNN_1, 1) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTreeMM, KNN_10, 10) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +//// Multimap 2.0 +//BENCHMARK_CAPTURE(PhTreeMM2, KNN_1, 1) +// ->RangeMultiplier(10) +// ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) +// ->Unit(benchmark::kMillisecond); +// +//BENCHMARK_CAPTURE(PhTreeMM2, KNN_10, 10) +// ->RangeMultiplier(10) +// ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) +// ->Unit(benchmark::kMillisecond); + +//// KD-tree +// BENCHMARK_CAPTURE(TinspinKDTree, KNN_1, 1) +// ->RangeMultiplier(10) +// ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) +// ->Unit(benchmark::kMillisecond); +// +// BENCHMARK_CAPTURE(TinspinKDTree, KNN_10, 10) +// ->RangeMultiplier(10) +// ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) +// ->Unit(benchmark::kMillisecond); +// +//// Quadtree +// BENCHMARK_CAPTURE(TinspinQuadtree, KNN_1, 1) +// ->RangeMultiplier(10) +// ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) +// ->Unit(benchmark::kMillisecond); +// +// BENCHMARK_CAPTURE(TinspinQuadtree, KNN_10, 10) +// ->RangeMultiplier(10) +// ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) +// ->Unit(benchmark::kMillisecond); + +// PhTree 3D with set +BENCHMARK_CAPTURE(PhTree3D, KNN_1, 1) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_CAPTURE(PhTree3D, KNN_10, 10) + ->RangeMultiplier(10) + ->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/fuzzer/b_plus_heap_fuzzer.cc b/fuzzer/b_plus_heap_fuzzer.cc new file mode 100644 index 0000000..c92eb86 --- /dev/null +++ b/fuzzer/b_plus_heap_fuzzer.cc @@ -0,0 +1,138 @@ +/* + * Copyright 2023 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include + +#include "include/phtree/common/b_plus_tree_heap.h" + +static volatile int Sink; + +using Instruction = std::uint8_t; +using Key = std::uint8_t; +using Value = std::uint8_t; + +constexpr bool PRINT = !true; + +void print() {} + +using Id = std::pair; + +struct CompareId { + bool operator()(const Id& left, const Id& right) const { + return left.first > right.first; + }; +}; + + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* Data, size_t Size) { + assert(Data); + + if (PRINT) { + std::cout << "TEST(PhTreeBptHeapTest, FuzzTest1) {" << std::endl; + std::cout << " using Key = std::uint8_t;" << std::endl; + std::cout << " using Value = std::uint8_t;" << std::endl; + std::cout << " using Id = std::pair;" << std::endl; + std::cout << " b_plus_tree_heap> tree{};" << std::endl; + } + + auto scopeguard = []() { std::cout << "};" << std::endl; }; + + //phtree::bptree::b_plus_tree_heap tree; + phtree::bptree::b_plus_tree_heap> tree; + std::multimap> map; + + size_t pos = 0; + + while (pos + 4 < Size) { + Instruction inst = Data[pos++] % 5; + Key key = Data[pos++]; + Value value = Data[pos++]; + Id entry{key, value}; + switch (inst) { + case 0: { + if (PRINT) + std::cout << " tree.emplace(" << (int)key << ", " << (int)value << ");" + << std::endl; + tree.emplace(entry); + map.emplace(key, value); + break; + } + case 1: { + if (!tree.empty()) { + if (PRINT) + std::cout << " tree.pop();" << std::endl; + tree.pop(); + map.erase(--map.end()); + } + break; + } + case 2: { + if (!tree.empty()) { + if (PRINT) + std::cout << " tree.pop_max();" << std::endl; + tree.pop_max(); + map.erase(map.begin()); + } + break; + } + case 3: { + if (!tree.empty()) { + if (PRINT) + std::cout << " auto x = tree.top();" << std::endl; + auto& x = tree.top(); + auto& x2 = *(--map.end()); + assert(x.first == x2.first); + } + break; + } + case 4: { + if (!tree.empty()) { + if (PRINT) + std::cout << " auto x = tree.top_max();" << std::endl; + auto& x = tree.top_max(); + auto& x2 = *(map.begin()); + assert(x.first == x2.first); + } + break; + } + default: + std::cout << "Unexpected instruction: " << inst << std::endl; + } + } + + tree._check(); + +// for (auto& entry : map) { +// const Key& vRef = entry.first; +// Key vMap = tree.find(vRef)->first; +// assert(vMap == vRef); +// } +// for (auto& entry : tree) { +// Key v = entry.first; +// const Key& vRef = map.find(v)->first; +// Key vMap = tree.find(v)->first; +// assert(vMap == vRef); +// } + assert(tree.size() == map.size()); + + return 0; +} diff --git a/fuzzer/b_plus_vector_tree_fuzzer.cc b/fuzzer/b_plus_vector_tree_fuzzer.cc new file mode 100644 index 0000000..744f386 --- /dev/null +++ b/fuzzer/b_plus_vector_tree_fuzzer.cc @@ -0,0 +1,136 @@ +/* + * Copyright 2023 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include + +#include "include/phtree/common/bpt_vector_tree.h" + +static volatile int Sink; + +using Instruction = std::uint8_t; +using Key = std::uint8_t; +using Value = std::uint8_t; + +constexpr bool PRINT = !true; + +void print() {} + +using Id = std::pair; + +struct CompareId { + bool operator()(const Id& left, const Id& right) const { + return left.first > right.first; + }; +}; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* Data, size_t Size) { + assert(Data); + + if (PRINT) { + std::cout << "TEST(PhTreeBptHeapTest, FuzzTest1) {" << std::endl; + std::cout << " using Key = std::uint8_t;" << std::endl; + std::cout << " using Value = std::uint8_t;" << std::endl; + std::cout << " using Id = std::pair;" << std::endl; + std::cout << " vector_tree tree{};" << std::endl; + std::cout << " std::vector ref{};" << std::endl; + } + + auto scopeguard = []() { std::cout << "};" << std::endl; }; + + phtree::bptree::detail::vector_tree tree; + std::vector ref; + + size_t pos = 0; + + while (pos + 4 < Size) { + Instruction inst = Data[pos++] % 3; + Key key = Data[pos++]; + Value value = Data[pos++]; + switch (inst) { + case 0: { + if (PRINT) { + std::cout << " tree.emplace_back(" << (int)key << ", " << (int)value << ");" + << std::endl; + std::cout << " ref.emplace_back(" << (int)key << ", " << (int)value << ");" + << std::endl; + } + tree.emplace_back(key, value); + ref.emplace_back(key, value); + break; + } + case 1: { + if (!tree.empty()) { + if (PRINT) { + std::cout << " tree.erase_back();" << std::endl; + std::cout << " ref.erase(ref.end() - 1);" << std::endl; + } + tree.erase_back(); + ref.erase(ref.end() - 1); + } + break; + } + case 2: { + if (!tree.empty()) { + size_t index = key % tree.size(); + if (PRINT) { + std::cout << " tree[" << (int)key << " % tree.size()] = std::make_pair(" + << (int)key << ", " << (int)value << ");" << std::endl; + std::cout << " ref[" << (int)key << " % ref.size()] = std::make_pair(" + << (int)key << ", " << (int)value << ");" << std::endl; + } + tree[index] = std::make_pair(key, value); + ref[index] = std::make_pair(key, value); + } + break; + } + default: + std::cout << "Unexpected instruction: " << inst << std::endl; + assert(false); + } + } + + // tree._check(); + //std::cout << " sizes: " << tree.size() << " == " << ref.size() << std::endl; + assert(tree.size() == ref.size()); + + for (size_t i = 0; i < tree.size(); ++i) { + if (!true) { + std::cout << " " << (int)tree[i].first << " == " << (int)ref[i].first << std::endl; + } + assert(tree[i].first == ref[i].first); + assert(tree[i].second == ref[i].second); + } + + // for (auto& entry : map) { + // const Key& vRef = entry.first; + // Key vMap = tree.find(vRef)->first; + // assert(vMap == vRef); + // } + // for (auto& entry : tree) { + // Key v = entry.first; + // const Key& vRef = map.find(v)->first; + // Key vMap = tree.find(v)->first; + // assert(vMap == vRef); + // } + + return 0; +} diff --git a/include/phtree/aux/BUILD b/include/phtree/aux/BUILD new file mode 100644 index 0000000..f54fa9b --- /dev/null +++ b/include/phtree/aux/BUILD @@ -0,0 +1,16 @@ +package(default_visibility = ["//visibility:private"]) + +cc_library( + name = "aux", + hdrs = [ + "min_max_helpers.h", + "min_max_tree_heap.h", + "min_max_vector_heap.h", + ], + visibility = [ + "//visibility:public", + ], + deps = [ + "//include/phtree/common", + ], +) diff --git a/include/phtree/aux/LICENSE-boost b/include/phtree/aux/LICENSE-boost new file mode 100644 index 0000000..36b7cd9 --- /dev/null +++ b/include/phtree/aux/LICENSE-boost @@ -0,0 +1,23 @@ +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/include/phtree/aux/min_max_helpers.h b/include/phtree/aux/min_max_helpers.h new file mode 100644 index 0000000..cbb3a83 --- /dev/null +++ b/include/phtree/aux/min_max_helpers.h @@ -0,0 +1,230 @@ +// Copyright Malte Skarupke 2020. +// Distributed under the Boost Software License, Version 1.0. +// (See http://www.boost.org/LICENSE_1_0.txt) + +#ifndef PHTREE_AUX_MIN_MAX_HELPERS_H +#define PHTREE_AUX_MIN_MAX_HELPERS_H + +namespace phtree::aux::minmax_heap_helpers { +// returns the index of the highest set bit. undefined if no bits are set. +// examples: +// highest_set_bit(1) = 0 +// highest_set_bit(4) = 2 +// highest_set_bit(55) = 5 +inline int highest_set_bit(uint64_t i) { +#ifdef _MSC_VER + unsigned long result; + _BitScanReverse64(&result, i); + return result; +#else + return 63 - __builtin_clzl(i); +#endif +} + +inline bool is_new_item_min(uint64_t length) { + return (highest_set_bit(length) & 1) == 0; +} + +inline bool is_min_item(uint64_t index) { + return is_new_item_min(index + 1); +} + +inline uint64_t grandparent_index(uint64_t index) { + return (index - 3) / 4; +} + +inline uint64_t parent_index(uint64_t index) { + return (index - 1) / 2; +} + +inline uint64_t first_child_index(uint64_t index) { + return (index * 2) + 1; +} +inline uint64_t last_grandchild_index(uint64_t index) { + return (index * 4) + 6; +} +template +uint64_t smallest_descendant( + Data& data, + uint64_t length, + uint64_t first_child, + uint64_t first_grandchild, + Compare&& compare) { + uint64_t second_child = first_child + 1; + if (first_grandchild >= length) + return first_child + + (second_child != length && compare(data[second_child], data[first_child])); + uint64_t second_grandchild = first_grandchild + 1; + if (second_grandchild == length) + return compare(data[first_grandchild], data[second_child]) ? first_grandchild + : second_child; + uint64_t min_grandchild = + first_grandchild + !!compare(data[second_grandchild], data[first_grandchild]); + uint64_t third_grandchild = second_grandchild + 1; + if (third_grandchild == length) + return compare(data[min_grandchild], data[second_child]) ? min_grandchild : second_child; + else + return compare(data[min_grandchild], data[third_grandchild]) ? min_grandchild + : third_grandchild; +} +template +uint64_t largest_descendant( + Data& data, + uint64_t length, + uint64_t first_child, + uint64_t first_grandchild, + Compare&& compare) { + uint64_t second_child = first_child + 1; + if (first_grandchild >= length) + return first_child + + (second_child != length && compare(data[first_child], data[second_child])); + uint64_t second_grandchild = first_grandchild + 1; + if (second_grandchild == length) + return compare(data[second_child], data[first_grandchild]) ? first_grandchild + : second_child; + uint64_t max_grandchild = + first_grandchild + !!compare(data[first_grandchild], data[second_grandchild]); + uint64_t third_grandchild = second_grandchild + 1; + if (third_grandchild == length) + return compare(data[second_child], data[max_grandchild]) ? max_grandchild : second_child; + else + return compare(data[max_grandchild], data[third_grandchild]) ? third_grandchild + : max_grandchild; +} + +template +void push_down_min( + Data& data, + typename Data::value_type value, + uint64_t index, + uint64_t length, + Compare&& compare) { + using std::swap; + for (;;) { + uint64_t last_grandchild = last_grandchild_index(index); + if (last_grandchild < length) { + // auto it = data.begin() + last_grandchild; // TODO? + auto it = last_grandchild; + uint64_t min_first_half = last_grandchild - 2 - !!compare(data[it - 3], data[it - 2]); + uint64_t min_second_half = last_grandchild - !!compare(data[it - 1], data[it]); + uint64_t smallest = compare(data[min_second_half], data[min_first_half]) + ? min_second_half + : min_first_half; + if (!compare(data[smallest], value)) + break; + data[index] = std::move(data[smallest]); + index = smallest; + uint64_t parent = parent_index(index); + if (compare(data[parent], value)) + swap(data[parent], value); + } else { + uint64_t first_child = first_child_index(index); + if (first_child >= length) + break; + uint64_t first_grandchild = last_grandchild - 3; + uint64_t smallest = + smallest_descendant(data, length, first_child, first_grandchild, compare); + if (!compare(data[smallest], value)) + break; + data[index] = std::move(data[smallest]); + index = smallest; + if (smallest < first_grandchild) + break; + uint64_t parent = parent_index(index); + if (compare(data[parent], value)) { + data[index] = std::move(data[parent]); + index = parent; + } + break; + } + } + data[index] = std::move(value); +} + +template +void push_down_min_one_child_only(It begin, uint64_t index, Compare&& compare) { + using std::swap; + uint64_t child = first_child_index(index); + if (compare(begin[child], begin[index])) + swap(begin[index], begin[child]); +} + +template +void push_down_min_one_level_only(It begin, uint64_t index, Compare&& compare) { + using std::swap; + uint64_t first_child = first_child_index(index); + uint64_t smaller_child = first_child + !!compare(begin[first_child + 1], begin[first_child]); + if (compare(begin[smaller_child], begin[index])) + swap(begin[index], begin[smaller_child]); +} + +template +void push_down_max( + Data& data, + typename Data::value_type value, + uint64_t index, + uint64_t length, + Compare&& compare) { + using std::swap; + for (;;) { + uint64_t last_grandchild = last_grandchild_index(index); + if (last_grandchild < length) { + // auto it = data.begin() + last_grandchild; // TODO? + auto it = last_grandchild; + uint64_t max_first_half = last_grandchild - 2 - !!compare(data[it - 2], data[it - 3]); + uint64_t max_second_half = last_grandchild - !!compare(data[it], data[it - 1]); + uint64_t largest = compare(data[max_first_half], data[max_second_half]) + ? max_second_half + : max_first_half; + if (!compare(value, data[largest])) + break; + data[index] = std::move(data[largest]); + index = largest; + uint64_t parent = parent_index(index); + if (compare(value, data[parent])) + swap(data[parent], value); + } else { + uint64_t first_child = first_child_index(index); + if (first_child >= length) + break; + uint64_t first_grandchild = last_grandchild - 3; + uint64_t largest = + largest_descendant(data, length, first_child, first_grandchild, compare); + if (!compare(value, data[largest])) + break; + data[index] = std::move(data[largest]); + index = largest; + if (largest < first_grandchild) + break; + uint64_t parent = parent_index(index); + if (compare(value, data[parent])) { + data[index] = std::move(data[parent]); + index = parent; + } + break; + } + } + data[index] = std::move(value); +} + +template +void push_down_max_one_child_only(It begin, uint64_t index, Compare&& compare) { + using std::swap; + uint64_t child = first_child_index(index); + if (compare(begin[index], begin[child])) + swap(begin[index], begin[child]); +} + +template +void push_down_max_one_level_only(It begin, uint64_t index, Compare&& compare) { + using std::swap; + uint64_t first_child = first_child_index(index); + uint64_t bigger_child = first_child + !!compare(begin[first_child], begin[first_child + 1]); + if (compare(begin[index], begin[bigger_child])) + swap(begin[index], begin[bigger_child]); +} + +} // namespace minmax_heap_helpers + + +#endif // PHTREE_AUX_MIN_MAX_HELPERS_H diff --git a/include/phtree/aux/min_max_tree_heap.h b/include/phtree/aux/min_max_tree_heap.h new file mode 100644 index 0000000..36f941f --- /dev/null +++ b/include/phtree/aux/min_max_tree_heap.h @@ -0,0 +1,296 @@ +// Copyright Malte Skarupke 2020. +// Copyright Tilmann Zäschke 2023. +// Distributed under the Boost Software License, Version 1.0. +// (See http://www.boost.org/LICENSE_1_0.txt) + +#ifndef PHTREE_AUX_MIN_MAX_TREE_HEAP_H +#define PHTREE_AUX_MIN_MAX_TREE_HEAP_H + +#include "include/phtree/common/bpt_vector_tree.h" +#include "min_max_helpers.h" +#include +#include +#include + +namespace phtree::aux { + +namespace tree_heap { +template +bool is_minmax_heap(It begin, It end, Compare&& compare) { + uint64_t length = static_cast(end - begin); + auto test_index = [](uint64_t index, auto compare_index) { + uint64_t first_child = minmax_heap_helpers::first_child_index(index); + uint64_t second_child = first_child + 1; + uint64_t first_grandchild = minmax_heap_helpers::first_child_index(first_child); + uint64_t second_grandchild = first_grandchild + 1; + uint64_t third_grandchild = minmax_heap_helpers::first_child_index(second_child); + uint64_t fourth_grandchild = third_grandchild + 1; + return compare_index(first_child) && compare_index(second_child) && + compare_index(first_grandchild) && compare_index(second_grandchild) && + compare_index(third_grandchild) && compare_index(fourth_grandchild); + }; + for (uint64_t i = 0; i < length; ++i) { + if (minmax_heap_helpers::is_min_item(i)) { + auto compare_one = [&](uint64_t child) { + return child >= length || !compare(begin[child], begin[i]); + }; + if (!test_index(i, compare_one)) + return false; + } else { + auto compare_one = [&](uint64_t child) { + return child >= length || !compare(begin[i], begin[child]); + }; + if (!test_index(i, compare_one)) + return false; + } + } + return true; +} + +template +void push_minmax_heap(Data& data, Compare&& compare) { + uint64_t length = static_cast(data.size()); + uint64_t index = length - 1; + uint64_t parent = minmax_heap_helpers::parent_index(index); + auto value = std::move(data.back()); + if (minmax_heap_helpers::is_new_item_min(length)) { + if (index == 0) + static_cast(0); + else if (compare(data[parent], value)) { + data[index] = std::move(data[parent]); + index = parent; + goto push_up_max; + } else { + for (;;) { + { + uint64_t grandparent = minmax_heap_helpers::grandparent_index(index); + if (compare(value, data[grandparent])) { + data[index] = std::move(data[grandparent]); + index = grandparent; + } else + break; + } + push_up_min: + if (!index) + break; + } + } + } else if (compare(value, data[parent])) { + data[index] = std::move(data[parent]); + index = parent; + goto push_up_min; + } else { + push_up_max: + while (index > 2) { + uint64_t grandparent = minmax_heap_helpers::grandparent_index(index); + if (compare(data[grandparent], value)) { + data[index] = std::move(data[grandparent]); + index = grandparent; + } else + break; + } + } + data[index] = std::move(value); +} + +template +void pop_minmax_heap_min(Data& data, Compare&& compare) { + uint64_t length = static_cast(data.size()) - 1; + if (length == 0) + return; + minmax_heap_helpers::push_down_min( + data, std::exchange(data.back(), std::move(data.front())), 0, length, compare); +} + +template +void pop_minmax_heap_max(Data& data, Compare&& compare) { + uint64_t length = static_cast(data.size()) - 1; + if (length <= 1) + return; + + uint64_t index = 1 + !!compare(data[1], data[2]); + minmax_heap_helpers::push_down_max( + data, + std::exchange(data.back(), std::move(data[index])), + index, + length, + std::forward(compare)); +} + +template +void make_minmax_heap(It begin, It end, Compare&& compare) { + uint64_t length = end - begin; + uint64_t index = length / 2; + if (index == 0) + return; + // optimization: there can be only one item that has only one child + // handling that item up front simplifies the second loop a little, since + // we know that all other items have two children + if ((length & 1) == 0) { + --index; + if (minmax_heap_helpers::is_min_item(index)) + minmax_heap_helpers::push_down_min_one_child_only(begin, index, compare); + else + minmax_heap_helpers::push_down_max_one_child_only(begin, index, compare); + if (index == 0) + return; + } + // optimization: half of all the items will have no grandchildren. this + // simplifies the push_down function a lot, so we handle these items + // first. we could then do another optimization where we know that + // after the first half, the next quarter of items has grandchildren but + // no great-grandchildren, but the code is already big enough + if (length != 4) { + uint64_t lowest_index_with_no_grandchildren = length / 4; + for (;;) { + int highest_bit = minmax_heap_helpers::highest_set_bit(index); + uint64_t loop_until = std::max( + lowest_index_with_no_grandchildren, (static_cast(1) << highest_bit) - 1); + --index; + if (highest_bit & 1) { + for (;; --index) { + minmax_heap_helpers::push_down_max_one_level_only(begin, index, compare); + if (index == loop_until) + break; + } + } else { + for (;; --index) { + minmax_heap_helpers::push_down_min_one_level_only(begin, index, compare); + if (index == loop_until) + break; + } + if (index == 0) + return; + } + if (index == lowest_index_with_no_grandchildren) + break; + } + } + int highest_bit = minmax_heap_helpers::highest_set_bit(index); + uint64_t loop_until = (static_cast(1) << highest_bit) - 1; + switch (highest_bit & 1) { + for (;;) { + case 0: + for (;;) { + --index; + minmax_heap_helpers::push_down_min( + begin, std::move(begin[index]), index, length, compare); + if (index == loop_until) + break; + } + if (index == 0) + return; + loop_until /= 2; + [[fallthrough]]; + case 1: + for (;;) { + --index; + minmax_heap_helpers::push_down_max( + begin, std::move(begin[index]), index, length, compare); + if (index == loop_until) + break; + } + loop_until /= 2; + } + } +} +} // namespace tree_heap + +/** + * A min-max heap that uses a vector-tree as underlying data structure. + */ +template +class min_max_tree_heap { + // TODO + // - reserve() + + struct SwapComp { + // TODO template? + Compare comp; + constexpr bool operator()(T const& x, T const& y) const noexcept { + return !comp(x, y); + } + }; + + public: + explicit min_max_tree_heap(size_t reserve = 16) noexcept { + data_.reserve(reserve); + } + + const T& top() const noexcept { + assert(!data_.empty()); + return data_[0]; + } + + T& top_max() noexcept { + assert(!data_.empty()); + switch (data_.size()) { + case 1: + return data_[0]; + case 2: + return data_[1]; + default: { + uint64_t index = 1 + cmp(data_[1], data_[2]); + return data_[index]; + } + } + } + + // TODO do some output and show what it returns, why is it so much faster???? + // TODO Also, some kNN tests STILL fail + // TODO fix q++ vs ++q (update tests but also fix implementation! + // const T& top_max() const noexcept { +// assert(data_.size() >= 3); // TODO +// uint64_t index = 1 + !!cmp(data_[1], data_[2]); +// return data_[index]; +// } +// +// T& top_max() noexcept { +// assert(data_.size() >= 3); // TODO +// uint64_t index = 1 + !!cmp(data_[1], data_[2]); +// return data_[index]; +// } + + template + void emplace(Args&&... args) { + data_.emplace_back(std::forward(args)...); + tree_heap::push_minmax_heap(data_, cmp); + } + + void emplace(T&& x) { + data_.emplace_back(std::move(x)); + tree_heap::push_minmax_heap(data_, cmp); + } + + void emplace(const T& x) { + data_.emplace_back(x); + tree_heap::push_minmax_heap(data_, cmp); + } + + void pop() noexcept { + assert(!data_.empty()); + tree_heap::pop_minmax_heap_min(data_, cmp); + data_.erase_back(); + } + + void pop_max() noexcept { + assert(!data_.empty()); + tree_heap::pop_minmax_heap_max(data_, cmp); + data_.erase_back(); + } + + [[nodiscard]] bool empty() const noexcept { + return data_.empty(); + } + + [[nodiscard]] size_t size() const noexcept { + return data_.size(); + } + + private: + ::phtree::bptree::detail::vector_tree data_; // The heap array. + SwapComp cmp{}; // TODO create on the fly only? +}; +} // namespace phtree::aux + +#endif // PHTREE_AUX_MIN_MAX_TREE_HEAP_H diff --git a/include/phtree/aux/min_max_vector_heap.h b/include/phtree/aux/min_max_vector_heap.h new file mode 100644 index 0000000..9d9335f --- /dev/null +++ b/include/phtree/aux/min_max_vector_heap.h @@ -0,0 +1,608 @@ +// Copyright Malte Skarupke 2020. +// Copyright Tilmann Zäschke 2023. +// Distributed under the Boost Software License, Version 1.0. +// (See http://www.boost.org/LICENSE_1_0.txt) + +#ifndef PHTREE_AUX_MIN_MAX_VECTOR_HEAP_H +#define PHTREE_AUX_MIN_MAX_VECTOR_HEAP_H + +#include "min_max_helpers.h" +#include +#include +#include + +namespace phtree::aux { + +namespace vector_heap { + +template +bool is_minmax_heap(It begin, It end, Compare&& compare) { + uint64_t length = static_cast(end - begin); + auto test_index = [](uint64_t index, auto compare_index) { + uint64_t first_child = minmax_heap_helpers::first_child_index(index); + uint64_t second_child = first_child + 1; + uint64_t first_grandchild = minmax_heap_helpers::first_child_index(first_child); + uint64_t second_grandchild = first_grandchild + 1; + uint64_t third_grandchild = minmax_heap_helpers::first_child_index(second_child); + uint64_t fourth_grandchild = third_grandchild + 1; + return compare_index(first_child) && compare_index(second_child) && + compare_index(first_grandchild) && compare_index(second_grandchild) && + compare_index(third_grandchild) && compare_index(fourth_grandchild); + }; + for (uint64_t i = 0; i < length; ++i) { + if (minmax_heap_helpers::is_min_item(i)) { + auto compare_one = [&](uint64_t child) { + return child >= length || !compare(begin[child], begin[i]); + }; + if (!test_index(i, compare_one)) + return false; + } else { + auto compare_one = [&](uint64_t child) { + return child >= length || !compare(begin[i], begin[child]); + }; + if (!test_index(i, compare_one)) + return false; + } + } + return true; +} +template +bool is_minmax_heap(It begin, It end) { + return is_minmax_heap(begin, end, std::less<>{}); +} + +template +void push_minmax_heap(It begin, It end, Compare&& compare) { + uint64_t length = static_cast(end - begin); + uint64_t index = length - 1; + uint64_t parent = minmax_heap_helpers::parent_index(index); + typename std::iterator_traits::value_type value = std::move(end[-1]); + if (minmax_heap_helpers::is_new_item_min(length)) { + if (index == 0) + static_cast(0); + else if (compare(begin[parent], value)) { + begin[index] = std::move(begin[parent]); + index = parent; + goto push_up_max; + } else { + for (;;) { + { + uint64_t grandparent = minmax_heap_helpers::grandparent_index(index); + if (compare(value, begin[grandparent])) { + begin[index] = std::move(begin[grandparent]); + index = grandparent; + } else + break; + } + push_up_min: + if (!index) + break; + } + } + } else if (compare(value, begin[parent])) { + begin[index] = std::move(begin[parent]); + index = parent; + goto push_up_min; + } else { + push_up_max: + while (index > 2) { + uint64_t grandparent = minmax_heap_helpers::grandparent_index(index); + if (compare(begin[grandparent], value)) { + begin[index] = std::move(begin[grandparent]); + index = grandparent; + } else + break; + } + } + begin[index] = std::move(value); +} +template +void push_minmax_heap(It begin, It end) { + push_minmax_heap(begin, end, std::less<>{}); +} + +template +void pop_minmax_heap_min(It begin, It end, Compare&& compare) { + uint64_t length = static_cast(end - begin) - 1; + if (length == 0) + return; + minmax_heap_helpers::push_down_min( + begin, std::exchange(end[-1], std::move(begin[0])), 0, length, compare); +} + +template +void pop_minmax_heap_min(It begin, It end) { + pop_minmax_heap_min(begin, end, std::less<>{}); +} + +template +void pop_minmax_heap_max(It begin, It end, Compare&& compare) { + uint64_t length = static_cast(end - begin) - 1; + if (length <= 1) + return; + + uint64_t index = 1 + !!compare(begin[1], begin[2]); + minmax_heap_helpers::push_down_max( + begin, + std::exchange(end[-1], std::move(begin[index])), + index, + length, + std::forward(compare)); +} +template +void pop_minmax_heap_max(It begin, It end) { + pop_minmax_heap_max(begin, end, std::less<>{}); +} + +template +void make_minmax_heap(It begin, It end, Compare&& compare) { + uint64_t length = end - begin; + uint64_t index = length / 2; + if (index == 0) + return; + // optimization: there can be only one item that has only one child + // handling that item up front simplifies the second loop a little, since + // we know that all other items have two children + if ((length & 1) == 0) { + --index; + if (minmax_heap_helpers::is_min_item(index)) + minmax_heap_helpers::push_down_min_one_child_only(begin, index, compare); + else + minmax_heap_helpers::push_down_max_one_child_only(begin, index, compare); + if (index == 0) + return; + } + // optimization: half of all the items will have no grandchildren. this + // simplifies the push_down function a lot, so we handle these items + // first. we could then do another optimization where we know that + // after the first half, the next quarter of items has grandchildren but + // no great-grandchildren, but the code is already big enough + if (length != 4) { + uint64_t lowest_index_with_no_grandchildren = length / 4; + for (;;) { + int highest_bit = minmax_heap_helpers::highest_set_bit(index); + uint64_t loop_until = std::max( + lowest_index_with_no_grandchildren, (static_cast(1) << highest_bit) - 1); + --index; + if (highest_bit & 1) { + for (;; --index) { + minmax_heap_helpers::push_down_max_one_level_only(begin, index, compare); + if (index == loop_until) + break; + } + } else { + for (;; --index) { + minmax_heap_helpers::push_down_min_one_level_only(begin, index, compare); + if (index == loop_until) + break; + } + if (index == 0) + return; + } + if (index == lowest_index_with_no_grandchildren) + break; + } + } + int highest_bit = minmax_heap_helpers::highest_set_bit(index); + uint64_t loop_until = (static_cast(1) << highest_bit) - 1; + switch (highest_bit & 1) { + for (;;) { + case 0: + for (;;) { + --index; + minmax_heap_helpers::push_down_min( + begin, std::move(begin[index]), index, length, compare); + if (index == loop_until) + break; + } + if (index == 0) + return; + loop_until /= 2; + [[fallthrough]]; + case 1: + for (;;) { + --index; + minmax_heap_helpers::push_down_max( + begin, std::move(begin[index]), index, length, compare); + if (index == loop_until) + break; + } + loop_until /= 2; + } + } +} +template +void make_minmax_heap(It begin, It end) { + return make_minmax_heap(begin, end, std::less<>{}); +} + +namespace dary_heap_helpers { +template +uint64_t first_child_index(uint64_t index) { + return index * D + 1; +} +template +uint64_t last_child_index(uint64_t index) { + return index * D + D; +} +template +uint64_t last_grandchild_index(uint64_t index) { + return index * (D * D) + (D * D + D); +} +template +uint64_t parent_index(uint64_t index) { + return (index - 1) / D; +} +template +uint64_t grandparent_index(uint64_t index) { + return (index - (D + 1)) / (D * D); +} +template +uint64_t index_with_no_grandchild(uint64_t length) { + return grandparent_index(length - 1) + 1; +} +template +inline It largest_child(It first_child_it, Compare&& compare) { + if constexpr (D == 1) + return first_child_it; + else if constexpr (D == 2) + return first_child_it + !!compare(first_child_it[0], first_child_it[1]); + else { + It first_half_largest = largest_child(first_child_it, compare); + It second_half_largest = largest_child(first_child_it + D / 2, compare); + return compare(*first_half_largest, *second_half_largest) ? second_half_largest + : first_half_largest; + } +} +template +It largest_child(It first_child_it, int num_children, Compare&& compare) { + if constexpr (D == 2) + return first_child_it; + else if constexpr (D == 3) { + if (num_children == 1) + return first_child_it; + else + return first_child_it + !!compare(first_child_it[0], first_child_it[1]); + } else if constexpr (D == 4) { + switch (num_children) { + case 1: + return first_child_it; + case 2: + return first_child_it + !!compare(first_child_it[0], first_child_it[1]); + default: + It largest = first_child_it + !!compare(first_child_it[0], first_child_it[1]); + return compare(*largest, first_child_it[2]) ? first_child_it + 2 : largest; + } + } else { + switch (num_children) { + case 1: + return first_child_it; + case 2: + return first_child_it + !!compare(first_child_it[0], first_child_it[1]); + case 3: { + It largest = first_child_it + !!compare(first_child_it[0], first_child_it[1]); + return compare(*largest, first_child_it[2]) ? first_child_it + 2 : largest; + } + case 4: { + It largest_first_half = + first_child_it + !!compare(first_child_it[0], first_child_it[1]); + It largest_second_half = + first_child_it + 2 + !!compare(first_child_it[2], first_child_it[3]); + return compare(*largest_first_half, *largest_second_half) ? largest_second_half + : largest_first_half; + } + default: + int half = num_children / 2; + It first_half_largest = largest_child(first_child_it, half, compare); + It second_half_largest = + largest_child(first_child_it + half, num_children - half, compare); + return compare(*first_half_largest, *second_half_largest) ? second_half_largest + : first_half_largest; + } + } +} +} // namespace dary_heap_helpers + +template +void make_dary_heap(It begin, It end, Compare&& compare) { + using std::swap; + uint64_t length = end - begin; + if (length <= 1) + return; + uint64_t index = (length - 2) / D; + // optimization: there can be only one item that has fewer than D children + // handling that item up front simplifies the second loop a little, since + // we know that all other items have two children + int num_children_end = (length - 1) % D; + if (num_children_end) { + It largest_child = dary_heap_helpers::largest_child( + begin + dary_heap_helpers::first_child_index(index), num_children_end, compare); + if (compare(begin[index], *largest_child)) + swap(begin[index], *largest_child); + if (index == 0) + return; + --index; + } + // optimization: half of all the items will have no grandchildren. this + // simplifies the push_down function a lot, so we handle these items + // first. we could then do another optimization where we know that + // after the first half, the next quarter of items has grandchildren but + // no great-grandchildren, but the code is already big enough + if (index > 0) { + uint64_t lowest_index_with_no_grandchildren = + dary_heap_helpers::index_with_no_grandchild(length); + for (;;) { + It largest_child = dary_heap_helpers::largest_child( + begin + dary_heap_helpers::first_child_index(index), compare); + if (compare(begin[index], *largest_child)) + swap(begin[index], *largest_child); + if (index-- == lowest_index_with_no_grandchildren) + break; + } + } + for (;; --index) { + typename std::iterator_traits::value_type value = std::move(begin[index]); + uint64_t move_down_index = index; + for (;;) { + uint64_t last_child_index = dary_heap_helpers::last_child_index(move_down_index); + uint64_t first_child_index = last_child_index - (D - 1); + It largest_child = begin; + if (last_child_index < length) + largest_child = + dary_heap_helpers::largest_child(begin + first_child_index, compare); + else if (first_child_index >= length) + break; + else + largest_child = dary_heap_helpers::largest_child( + begin + first_child_index, length - first_child_index, compare); + if (!compare(value, *largest_child)) + break; + begin[move_down_index] = std::move(*largest_child); + move_down_index = largest_child - begin; + } + begin[move_down_index] = std::move(value); + if (index == 0) + break; + } +} +template +void make_dary_heap(It begin, It end) { + make_dary_heap(begin, end, std::less<>{}); +} + +template +bool is_dary_heap(It begin, It end, Compare&& compare) { + uint64_t length = end - begin; + for (uint64_t i = 1; i < length; ++i) { + uint64_t parent = dary_heap_helpers::parent_index(i); + if (compare(begin[parent], begin[i])) + return false; + } + return true; +} +template +bool is_dary_heap(It begin, It end) { + return is_dary_heap(begin, end, std::less<>{}); +} + +template +void push_dary_heap(It begin, It end, Compare&& compare) { + typename std::iterator_traits::value_type value = std::move(end[-1]); + uint64_t index = (end - begin) - 1; + while (index > 0) { + uint64_t parent = dary_heap_helpers::parent_index(index); + if (!compare(begin[parent], value)) + break; + begin[index] = std::move(begin[parent]); + index = parent; + } + begin[index] = std::move(value); +} + +template +void push_dary_heap(It begin, It end) { + return push_dary_heap(begin, end, std::less<>{}); +} + +template +void pop_dary_heap(It begin, It end, Compare&& compare) { + uint64_t length = (end - begin) - 1; + typename std::iterator_traits::value_type value = std::move(end[-1]); + end[-1] = std::move(begin[0]); + uint64_t index = 0; + for (;;) { + uint64_t last_child = dary_heap_helpers::last_child_index(index); + uint64_t first_child = last_child - (D - 1); + if (last_child < length) { + It largest_child = dary_heap_helpers::largest_child(begin + first_child, compare); + if (!compare(value, *largest_child)) + break; + begin[index] = std::move(*largest_child); + index = largest_child - begin; + } else if (first_child < length) { + It largest_child = dary_heap_helpers::largest_child( + begin + first_child, length - first_child, compare); + if (compare(value, *largest_child)) { + begin[index] = std::move(*largest_child); + index = largest_child - begin; + } + break; + } else + break; + } + begin[index] = std::move(value); +} +template +void pop_dary_heap(It begin, It end) { + return pop_dary_heap(begin, end, std::less<>{}); +} +} // namespace vector_heap + +template +class min_max_vector_heap { + // TODO + // - reserve() + // - reverse min_max? + + struct SwapComp { + Compare comp; + constexpr bool operator()(T const& x, T const& y) const noexcept { + return !comp(x, y); + } + }; + + public: + explicit min_max_vector_heap(size_t reserve = 16) noexcept { + data_.reserve(reserve); + } + + const T& top() const noexcept { + assert(!data_.empty()); + return data_[0]; + } + + T& top_max() noexcept { + uint64_t index; + if (data_.size() >= 3) { + index = 1 + cmp(data_[1], data_[2]); + return data_[index]; + } else if (data_.size() == 2) { + return data_[1]; + } else if (data_.size() == 1) { + return data_[0]; + } + assert(!data_.empty()); +// switch (data_.size()) { +// case 1: +// return data_[0]; +// case 2: +// return data_[1]; +// default: { +// uint64_t index = 1 + !!cmp(data_[1], data_[2]); +// return data_[index]; +// } +// } + } + + const T& top_max() const noexcept { + return const_cast(this)->top_max(); + } + + template + void emplace(Args&&... args) { + reserve(); + data_.emplace_back(std::forward(args)...); + vector_heap::push_minmax_heap(data_.begin(), data_.end(), cmp); + } + + void emplace(T&& x) { + reserve(); + data_.emplace_back(std::move(x)); + vector_heap::push_minmax_heap(data_.begin(), data_.end(), cmp); + } + + void emplace(const T& x) { + reserve(); + data_.emplace_back(x); + vector_heap::push_minmax_heap(data_.begin(), data_.end(), cmp); + } + + void pop() noexcept { + assert(!data_.empty()); + vector_heap::pop_minmax_heap_min(data_.begin(), data_.end(), cmp); + data_.erase(data_.end() - 1); + } + + void pop_max() noexcept { + assert(!data_.empty()); + vector_heap::pop_minmax_heap_max(data_.begin(), data_.end(), cmp); + data_.erase(data_.end() - 1); + } + + [[nodiscard]] bool empty() const noexcept { + return data_.empty(); + } + + [[nodiscard]] size_t size() const noexcept { + return data_.size(); + } + + private: + void reserve() noexcept { + if (data_.capacity() == data_.size()) { + data_.reserve(data_.capacity() * 2); + } + } + std::vector data_; // The heap array. + SwapComp cmp{}; // TODO create on the fly only? +}; + +template +class min_vector_heap { + // TODO + // - reserve() + + struct SwapComp { + Compare comp; + constexpr bool operator()(T const& x, T const& y) const noexcept { + return !comp(x, y); + } + }; + + public: + explicit min_vector_heap(size_t reserve = 16) noexcept { + data_.reserve(reserve); + } + + const T& top() const noexcept { + assert(!data_.empty()); + return data_[0]; + } + + template + void emplace(Args&&... args) { + reserve(); + data_.emplace_back(std::forward(args)...); + vector_heap::push_dary_heap(data_.begin(), data_.end(), cmp); + } + + void emplace(T&& x) { + reserve(); + data_.emplace_back(std::move(x)); + vector_heap::push_dary_heap(data_.begin(), data_.end(), cmp); + } + + void emplace(const T& x) { + reserve(); + data_.emplace_back(x); + vector_heap::push_dary_heap(data_.begin(), data_.end(), cmp); + } + + void pop() noexcept { + assert(!data_.empty()); + vector_heap::pop_dary_heap(data_.begin(), data_.end(), cmp); + data_.erase(data_.end() - 1); + } + + [[nodiscard]] bool empty() const noexcept { + return data_.empty(); + } + + [[nodiscard]] size_t size() const noexcept { + return data_.size(); + } + + private: + void reserve() noexcept { + if (data_.capacity() == data_.size()) { + data_.reserve(data_.capacity() * 2); + } + } + std::vector data_; // The heap array. + // Compare cmp{}; + SwapComp cmp{}; // TODO create on the fly only? +}; + +} // namespace phtree::aux + +#endif // PHTREE_AUX_MIN_MAX_VECTOR_HEAP_H diff --git a/include/phtree/common/BUILD b/include/phtree/common/BUILD index 95fe051..5b03ad4 100644 --- a/include/phtree/common/BUILD +++ b/include/phtree/common/BUILD @@ -5,11 +5,14 @@ cc_library( hdrs = [ "b_plus_tree_base.h", "b_plus_tree_hash_map.h", + "b_plus_tree_heap.h", "b_plus_tree_map.h", "b_plus_tree_multimap.h", "base_types.h", "bits.h", "bpt_fixed_vector.h", + "bpt_priority_queue.h", + "bpt_vector_tree.h", "common.h", "debug_helper.h", "flat_array_map.h", diff --git a/include/phtree/common/b_plus_tree_base.h b/include/phtree/common/b_plus_tree_base.h index 0ca01b4..cf7129b 100644 --- a/include/phtree/common/b_plus_tree_base.h +++ b/include/phtree/common/b_plus_tree_base.h @@ -77,6 +77,7 @@ template < typename NInnerT, typename NLeafT, bool IsLeaf, + typename Compare = std::less, typename CFG = bpt_config<16, 2, 2>> class bpt_node_data : public bpt_node_base { // TODO This could be improved but requires a code change to move > 1 entry when merging. @@ -106,7 +107,7 @@ class bpt_node_data : public bpt_node_base { [[nodiscard]] auto lower_bound(KeyT key) noexcept { // If this doesn´t compile, check #include !!! return std::lower_bound(data_.begin(), data_.end(), key, [](EntryT& left, const KeyT key) { - return left.first < key; + return Compare{}(left.first, key); }); } @@ -131,6 +132,12 @@ class bpt_node_data : public bpt_node_base { return check_merge(it_after_erased, max_key, root); } + auto pop_back(NodeT*& root) { + auto max_key = data_.back().first; + data_.pop_back(); + return check_merge(data_.end(), max_key, root); + } + auto check_merge(DataIteratorT iter_after_erased, KeyT max_key_old, NodeT*& root) { using ER = EraseResult; auto& parent_ = this->parent_; @@ -225,12 +232,12 @@ class bpt_node_data : public bpt_node_base { auto check_split_and_adjust_iterator(DataIteratorT it, KeyT key, NodeT*& root) { auto* dest = (ThisT*)this; bool is_split = this->check_split(root); - if (is_split && key > this->data_.back().first) { + if (is_split && Compare{}(this->data_.back().first, key)) { dest = this->next_node_; it = dest->lower_bound(key); } - if (dest->parent_ != nullptr && key > dest->data_.back().first) { + if (dest->parent_ != nullptr && Compare{}(dest->data_.back().first, key)) { dest->parent_->update_key(dest->data_.back().first, key, dest); } @@ -331,15 +338,26 @@ class bpt_node_data : public bpt_node_base { // This type is replaced with the proper type inside bpt_node_data using bpt_dummy = void; -template > -class bpt_node_inner -: public bpt_node_data, NLeafT, false, CFG> { - using NInnerT = bpt_node_inner; +template < + typename KeyT, + typename NLeafT, + typename Compare = std::less, + typename CFG = bpt_config<16, 2, 2>> +class bpt_node_inner : public bpt_node_data< + KeyT, + bpt_dummy, + bpt_node_inner, + NLeafT, + false, + Compare, + CFG> { + using NInnerT = bpt_node_inner; using NodePtrT = bpt_node_base*; public: explicit bpt_node_inner(NInnerT* parent, NInnerT* prev, NInnerT* next) noexcept - : bpt_node_data(false, parent, prev, next) {} + : bpt_node_data( + false, parent, prev, next) {} ~bpt_node_inner() noexcept { for (auto& e : this->data_) { @@ -376,9 +394,9 @@ class bpt_node_inner auto prev_key = this->data_[0].first; size_t n = 0; for (auto& e : this->data_) { - assert(n == 0 || e.first >= prev_key); + assert(n == 0 || !Compare{}(e.first, prev_key)); e.second->_check(count, this, prev_leaf, known_min, e.first); - assert(this->parent_ == nullptr || e.first <= known_max); + assert(this->parent_ == nullptr || !Compare{}(known_max, e.first)); prev_key = e.first; ++n; } @@ -409,17 +427,17 @@ class bpt_node_inner // splits are always "forward", i.e. creating a "next" node. How about rebalance()? auto* dest = this; - if (has_split && key1_old > this->data_.back().first) { + if (has_split && Compare{}(this->data_.back().first, key1_old)) { dest = this->next_node_; } // update child1 auto it = dest->lower_bound_node(key1_old, child1); - assert(key1_old >= key1_new && it != dest->data_.end()); + assert(!Compare{}(key1_old, key1_new) && it != dest->data_.end()); it->first = key1_new; if (dest == this && this->next_node_ != nullptr) { - assert(this->next_node_->data_.front().first >= key1_new); + assert(!Compare{}(this->next_node_->data_.front().first, key1_new)); } ++it; // key_1_old is the max_key of child2 @@ -445,7 +463,7 @@ template class bpt_iterator_base { using IterT = bpt_iterator_base; - template + template friend class bpt_node_data; friend F1; friend NLeafT; diff --git a/include/phtree/common/b_plus_tree_heap.h b/include/phtree/common/b_plus_tree_heap.h new file mode 100644 index 0000000..06374ea --- /dev/null +++ b/include/phtree/common/b_plus_tree_heap.h @@ -0,0 +1,227 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_B_PLUS_TREE_HEAP_H +#define PHTREE_COMMON_B_PLUS_TREE_HEAP_H + +#include "b_plus_tree_base.h" +#include "b_plus_tree_multimap.h" +#include "bits.h" +#include +#include +#include +#include +#include + +/* + * PLEASE do not include this file directly, it is included via common.h. + * + * This file contains the B+tree multimap implementation which is used in high-dimensional nodes in + * the PH-Tree. + */ +namespace phtree::bptree { + +/* + * TODO update doc! + * The b_plus_tree_multimap is a B+tree implementation that uses a hierarchy of horizontally + * connected nodes for fast traversal through all entries. + * + * Behavior + * ======== + * This is a multimap. It behaves just like std::multimap, minus some API functions. + * The set/map is ordered by their key. Entries with identical keys have no specific ordering + * but the order is stable with respect to insertion/removal of other entries. + * + * + * Rationale + * ========= + * This implementations is optimized for small entry count, however it should + * scale well with large entry counts. + * + * + * Internals + * ========= + * The individual nodes have at most M entries. + * The tree has O(log n) lookup and O(M log n) insertion/removal time complexity, + * space complexity is O(n). + * + * Tree structure: + * - Inner nodes: have other nodes as children; their key of an entry represents the highest + * key of any subnode in that entry + * - Leaf nodes: have values as children; their key represents the key of a key/value pair + * - Every node is either a leaf (l-node; contains values) or an inner node + * (n-node; contains nodes). + * - "Sibling" nodes refer to the nodes linked by prev_node_ or next_node_. Sibling nodes + * usually have the same parent but may also be children of their parent's siblings. + * + * - Guarantee: All leaf nodes are horizontally connected + * - Inner nodes may or may not be connected. Specifically: + * - New inner nodes will be assigned siblings from the same parent or the parent's sibling + * (if the new node is the first or last node in a parent) + * - There is no guarantee that inner nodes know about their potential sibling (=other inner + * nodes that own bordering values/child-nodes). + * - There is no guarantee that siblings are on the same depth of the tree. + * - The tree is not balanced + * + */ + +namespace detail { +template +struct DefaultGetKey1 { + double operator()(const Value& v) const { + return v.first; + } +}; +} // namespace detail + +// TODO clean this up -> double? +template < + typename Value, + typename Compare = std::less, + typename GetKey = detail::DefaultGetKey1> +class b_plus_tree_heap { + using Key = decltype(GetKey{}(Value{})); + struct SwapComp { + Compare comp; + // template + bool operator()(Key const& x, Key const& y) const { + return !comp(x, y); + } + }; + + public: + const Value& top() const { + assert(!data_.empty()); + return data_.back().second; + } + + const Value& top_max() const { + assert(!data_.empty()); + return data_.begin()->second; + } + + Value& top_max() { + assert(!data_.empty()); + return data_.begin()->second; + } + + template + void emplace(Args&&... args) { + Value v{std::forward(args)...}; + Key key = GetKey{}(v); + data_.emplace(key, std::move(v)); + // data_.emplace(std::forward(args)...); + } + + void pop() { + assert(!data_.empty()); + data_.pop_back(); + } + + void pop_max() { + assert(!data_.empty()); + data_.erase(data_.begin()); + } + + [[nodiscard]] bool empty() const noexcept { + return data_.empty(); + } + + [[nodiscard]] size_t size() const noexcept { + return data_.size(); + } + + // TODO Simple hack: just negate the key to a negative value? + void _check() const { + data_._check(); + } + + private: + b_plus_tree_multimap data_{}; // The heap array. +}; + +namespace detail { +template +struct DefaultGetKey2 { + double operator()(const Value& v) const { + return v.first; + } +}; +} // namespace detail + +// TODO clean this up -> double? +template < + typename Value, + typename Compare = std::less, + typename GetKey = detail::DefaultGetKey2> +class b_plus_tree_heap2 { + using Key = decltype(GetKey{}(Value{})); // TODO remove?!?!? + + public: + const Value& top() const { + return data_.rbegin()->second; + } + + const Value& top_max() const { + return data_.begin()->second; + } + + Value& top_max() { + return data_.begin()->second; + } + + template + void emplace(Args&&... args) { + Value v{std::forward(args)...}; + Key key = GetKey{}(v); + data_.emplace(key, std::move(v)); + } + + void pop() { + data_.erase(--data_.end()); + } + + void pop_max() { + data_.erase(data_.begin()); + } + + // pop_max() + emplace() + template + void replace_max(Key& key, Args&&... args) { + pop_max(); + emplace(key, std::forward(args)...); + } + + [[nodiscard]] bool empty() const noexcept { + return data_.empty(); + } + + [[nodiscard]] size_t size() const noexcept { + return data_.size(); + } + + // TODO Simple hack: just negate the key to a negative value? + void _check() { + // data_._check(); + } + + private: + std::multimap data_; +}; + +} // namespace phtree::bptree + +#endif // PHTREE_COMMON_B_PLUS_TREE_HEAP_H diff --git a/include/phtree/common/b_plus_tree_map.h b/include/phtree/common/b_plus_tree_map.h index 61eca59..cd7c281 100644 --- a/include/phtree/common/b_plus_tree_map.h +++ b/include/phtree/common/b_plus_tree_map.h @@ -89,7 +89,7 @@ class b_plus_tree_map { class bpt_iterator; using IterT = bpt_iterator; using NLeafT = bpt_node_leaf; - using NInnerT = detail::bpt_node_inner; + using NInnerT = detail::bpt_node_inner, INNER_CFG>; using NodeT = detail::bpt_node_base; using TreeT = b_plus_tree_map; @@ -232,8 +232,8 @@ class b_plus_tree_map { } private: - using bpt_leaf_super = - ::phtree::bptree::detail::bpt_node_data; + using bpt_leaf_super = ::phtree::bptree::detail:: + bpt_node_data, LEAF_CFG>; class bpt_node_leaf : public bpt_leaf_super { public: explicit bpt_node_leaf(NInnerT* parent, NLeafT* prev, NLeafT* next) noexcept diff --git a/include/phtree/common/b_plus_tree_multimap.h b/include/phtree/common/b_plus_tree_multimap.h index a0d13f2..616ab67 100644 --- a/include/phtree/common/b_plus_tree_multimap.h +++ b/include/phtree/common/b_plus_tree_multimap.h @@ -73,18 +73,17 @@ namespace phtree::bptree { * - The tree is not balanced * */ -template +template > class b_plus_tree_multimap { - static_assert(std::is_integral() && "Key type must be integer"); - static_assert(std::is_unsigned() && "Key type must unsigned"); + static_assert(std::is_arithmetic() && "Key type must integral or floating point"); class bpt_node_leaf; class bpt_iterator; using IterT = bpt_iterator; using NLeafT = bpt_node_leaf; - using NInnerT = detail::bpt_node_inner; + using NInnerT = detail::bpt_node_inner; using NodeT = detail::bpt_node_base; - using TreeT = b_plus_tree_multimap; + using TreeT = b_plus_tree_multimap; public: explicit b_plus_tree_multimap() : root_{new NLeafT(nullptr, nullptr, nullptr)}, size_{0} {}; @@ -164,6 +163,42 @@ class b_plus_tree_multimap { return IterT(); } + [[nodiscard]] auto& front() noexcept { + NodeT* node = root_; + while (!node->is_leaf()) { + node = node->as_inner()->data_[0].second; + } + return node->as_leaf()->data_[0]; + } + + [[nodiscard]] auto& front() const noexcept { + return const_cast(*this).back(); + } + + [[nodiscard]] auto& back() noexcept { + NodeT* node = root_; + while (!node->is_leaf()) { + node = node->as_inner()->data_.back().second; + } + return node->as_leaf()->data_.back(); + } + + [[nodiscard]] auto& back() const noexcept { + return const_cast(*this).back(); + } + + void pop_back() noexcept { + assert(!empty()); + NodeT* node = root_; + while (!node->is_leaf()) { + node = node->as_inner()->data_.back().second; + } + // TODO use pop_back() in erase_entry...? + // node->as_leaf()->erase_entry(node->as_leaf()->data_.rbegin(), root_); + node->as_leaf()->pop_back(root_); + --size_; + } + template auto emplace(KeyT key, Args&&... args) { auto leaf = lower_bound_or_last_leaf(key, root_); @@ -185,7 +220,8 @@ class b_plus_tree_multimap { auto node = hint.node_->as_leaf(); // The following may drop a valid hint but is easy to check. - if (node->data_.begin()->first > key || (node->data_.end() - 1)->first < key) { + if (Compare{}(key, node->data_.begin()->first) || + Compare{}((node->data_.end() - 1)->first, key)) { return emplace(key, std::forward(args)...); } return node->try_emplace(key, root_, size_, std::forward(args)...); @@ -198,7 +234,13 @@ class b_plus_tree_multimap { size_t erase(const KeyT key) { auto begin = lower_bound(key); - auto end = key == std::numeric_limits::max() ? IterT() : lower_bound(key + 1); + static_assert(std::is_integral_v); + IterT end; + if constexpr (Compare{}(0, 1)) { + end = key == std::numeric_limits::max() ? IterT() : lower_bound(key + 1); + } else { + end = key == std::numeric_limits::min() ? IterT() : lower_bound(key - 1); + } if (begin == end) { return 0; } @@ -272,7 +314,7 @@ class b_plus_tree_multimap { } private: - using bpt_leaf_super = detail::bpt_node_data; + using bpt_leaf_super = detail::bpt_node_data; class bpt_node_leaf : public bpt_leaf_super { public: explicit bpt_node_leaf(NInnerT* parent, NLeafT* prev, NLeafT* next) noexcept @@ -308,8 +350,8 @@ class b_plus_tree_multimap { assert(prev_leaf == this->prev_node_); for (auto& e : this->data_) { - assert(count == 0 || e.first >= known_min); - assert(this->parent_ == nullptr || e.first <= known_max); + assert(count == 0 || !Compare{}(e.first, known_min)); + assert(this->parent_ == nullptr || !Compare{}(known_max, e.first)); ++count; known_min = e.first; } diff --git a/include/phtree/common/bpt_fixed_vector.h b/include/phtree/common/bpt_fixed_vector.h index 96b9a34..8cb6a12 100644 --- a/include/phtree/common/bpt_fixed_vector.h +++ b/include/phtree/common/bpt_fixed_vector.h @@ -246,6 +246,11 @@ class bpt_vector { return iterator{dst}; } + void pop_back() noexcept { + assert(size_ > 0); + std::destroy_at(&data(--size_)); + } + [[nodiscard]] size_t size() const noexcept { return size_; } @@ -301,7 +306,7 @@ class bpt_vector { } const_iterator to_iter_c(size_t index) const noexcept { - return iterator{&data(index)}; + return const_iterator{&data_c(index)}; } iterator to_iter(size_t index) noexcept { diff --git a/include/phtree/common/bpt_priority_queue.h b/include/phtree/common/bpt_priority_queue.h new file mode 100644 index 0000000..2b02e21 --- /dev/null +++ b/include/phtree/common/bpt_priority_queue.h @@ -0,0 +1,269 @@ +/* + * Copyright 2023 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_BPT_PRIORITY_QUEUE_H +#define PHTREE_COMMON_BPT_PRIORITY_QUEUE_H + +#include +#include +#include +#include +#include +#include + +namespace phtree::bptree::detail { + +/** + * A priority queue based on a sorted vector. + */ +template > +class priority_queue { + public: + // Member types + using value_type = V; + // using allocator_type = Allocator + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using reference = value_type&; + using const_reference = const value_type&; + + public: + explicit priority_queue() noexcept : data_{}, comp_{} {} + + explicit priority_queue(size_t initial_size) noexcept : data_{}, comp_{} { + data_.reserve(initial_size); + } + + priority_queue(const priority_queue& rhs) noexcept : data_(rhs.data_), comp_{} {} + + priority_queue(priority_queue&& rhs) noexcept : data_{std::move(rhs.data_)}, comp_{} {} + + // TODO use default functions? + priority_queue& operator=(const priority_queue& rhs) noexcept { + data_ = rhs.data_; + comp_ = rhs.comp_; + return *this; + } + + priority_queue& operator=(priority_queue&& rhs) noexcept { + data_ = std::move(rhs.data_); + comp_ = std::move(rhs.comp_); + return *this; + } + + ~priority_queue() noexcept = default; + + const V& top() const { + assert(!data_.empty()); + return data_.back(); + } + + // TODO rename bottom() + const V& top_max() const { + assert(!data_.empty()); + return data_.front(); + } + + V& top_max() { + assert(!data_.empty()); + return data_.front(); + } + + void pop() { + assert(!data_.empty()); + data_.pop_back(); + } + + void pop_max() { + assert(!data_.empty()); + data_.erase(data_.begin()); + // data_.pop_front(); + } + + V& operator[](size_t index) noexcept { + assert(index < data_.size()); + return data_[index]; + } + + const V& operator[](size_t index) const noexcept { + assert(index < data_.size()); + return data_[index]; + } + + template + void emplace(Args&&... args) { + V v{std::forward(args)...}; + // TODO this is bad!!! We should ask for key/value separately.... and avoid "first" + auto pos = std::lower_bound(data_.begin(), data_.end(), v, comp_); + data_.emplace(pos, std::move(v)); + } + + void emplace_back(const V& v) { + // TODO this is bad!!! We should ask for key/value separately.... and avoid "first" + auto pos = std::lower_bound(data_.begin(), data_.end(), v, comp_); + data_.emplace(pos, std::move(v)); + } + + [[nodiscard]] bool empty() const noexcept { + return data_.empty(); + } + + [[nodiscard]] size_t size() const noexcept { + return data_.size(); + } + + void reserve(size_t size) noexcept { + data_.reserve(size); + } + + constexpr reference front() noexcept { + return data_.front(); + } + + constexpr const_reference front() const noexcept { + return data_.front(); + } + + constexpr reference back() noexcept { + return data_.back(); + } + + constexpr const_reference back() const noexcept { + return data_.back(); + } + + private: + std::vector data_; + Compare comp_; +}; + +/** + * A priority queue based on a sorted vector. + */ +template > +class priority_dequeue { + public: + // Member types + using value_type = V; + // using allocator_type = Allocator + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using reference = value_type&; + using const_reference = const value_type&; + + public: + explicit priority_dequeue(size_t initial_size = 16) noexcept : data_{}, comp_{} {} + + priority_dequeue(const priority_dequeue& rhs) noexcept : data_(rhs.data_), comp_{} {} + + priority_dequeue(priority_dequeue&& rhs) noexcept : data_{std::move(rhs.data_)}, comp_{} {} + + // TODO use default functions? + // TODO ommit comp_? -> check std::priority_queue + priority_dequeue& operator=(const priority_dequeue& rhs) noexcept { + data_ = rhs.data_; + comp_ = rhs.comp_; + return *this; + } + + priority_dequeue& operator=(priority_dequeue&& rhs) noexcept { + data_ = std::move(rhs.data_); + comp_ = std::move(rhs.comp_); + return *this; + } + + ~priority_dequeue() noexcept = default; + + const V& top() const { + assert(!data_.empty()); + return data_.back(); + } + + // TODO rename bottom() + const V& top_max() const { + assert(!data_.empty()); + return data_.front(); + } + + V& top_max() { + assert(!data_.empty()); + return data_.front(); + } + + void pop() { + assert(!data_.empty()); + data_.pop_back(); + } + + void pop_max() { + assert(!data_.empty()); + data_.pop_front(); + } + + V& operator[](size_t index) noexcept { + assert(index < data_.size()); + return data_[index]; + } + + const V& operator[](size_t index) const noexcept { + assert(index < data_.size()); + return data_[index]; + } + + template + void emplace(Args&&... args) { + V v{std::forward(args)...}; + // TODO this is bad!!! We should ask for key/value separately.... and avoid "first" + auto pos = std::lower_bound(data_.begin(), data_.end(), v, comp_); + data_.emplace(pos, std::move(v)); + } + + [[nodiscard]] bool empty() const noexcept { + return data_.empty(); + } + + [[nodiscard]] size_t size() const noexcept { + return data_.size(); + } + + void reserve(size_t size) noexcept { + data_.reserve(size); + } + + constexpr reference front() noexcept { + return data_.front(); + } + + constexpr const_reference front() const noexcept { + return data_.front(); + } + + constexpr reference back() noexcept { + return data_.back(); + } + + constexpr const_reference back() const noexcept { + return data_.back(); + } + + private: + std::deque data_; + Compare comp_; +}; + +} // namespace phtree::bptree::detail + +#endif // PHTREE_COMMON_BPT_PRIORITY_QUEUE_H diff --git a/include/phtree/common/bpt_vector_tree.h b/include/phtree/common/bpt_vector_tree.h new file mode 100644 index 0000000..046d41b --- /dev/null +++ b/include/phtree/common/bpt_vector_tree.h @@ -0,0 +1,322 @@ +/* + * Copyright 2023 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_COMMON_BPT_VECTOR_TREE_H +#define PHTREE_COMMON_BPT_VECTOR_TREE_H + +#include +#include +#include +#include +#include + +namespace phtree::bptree::detail { + +/** + * + * @tparam V + * @tparam SIZE + */ +template +class bpt_vector_tree_iterator { + private: + using V2 = std::remove_cv_t; + using leaf_t = std::vector; + using parent_t = std::vector; + using leaf_iter_t = std::remove_cv_t().begin())>; + using parent_iter_t = std::remove_cv_t().begin())>; + + using normal_iterator = bpt_vector_tree_iterator; + + public: + using iterator_category = std::random_access_iterator_tag; + using value_type = V; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + bpt_vector_tree_iterator() noexcept + : parent_{nullptr}, parent_iter_{nullptr}, leaf_iter_{nullptr} {} + explicit bpt_vector_tree_iterator( + parent_t* parent, parent_iter_t parent_iter, leaf_iter_t leaf_iter) noexcept + : parent_{parent}, parent_iter_{parent_iter}, leaf_iter_{leaf_iter} {} + + reference operator*() const noexcept { + return (*leaf_iter_); + } + + pointer operator->() const noexcept { + return &*leaf_iter_; + } + + constexpr bpt_vector_tree_iterator& operator++() noexcept { + ++leaf_iter_; + if (leaf_iter_ == parent_iter_->end()) { + if (parent_iter_ + 1 != parent_->end()) { + ++parent_iter_; + leaf_iter_ = parent_iter_->begin(); + } + } + return *this; + } + + // constexpr bpt_vector_tree_iterator operator++(int) noexcept { + // return bpt_vector_iterator(ptr_++); + // } + + constexpr bool operator<(const bpt_vector_tree_iterator& right) const noexcept { + return parent_iter_ < right.parent_iter_ || + (parent_iter_ == right.parent_iter_ && leaf_iter_ < right.leaf_iter_); + } + + friend bool operator==( + const bpt_vector_tree_iterator& left, + const bpt_vector_tree_iterator& right) noexcept { + return left.leaf_iter_ == right.leaf_iter_; + } + + friend bool operator!=( + const bpt_vector_tree_iterator& left, + const bpt_vector_tree_iterator& right) noexcept { + return left.leaf_iter_ != right.leaf_iter_; + } + + // Bidirectional iterator requirements + constexpr normal_iterator& operator--() noexcept { + if (leaf_iter_ == parent_iter_->begin()) { + --parent_iter_; + leaf_iter_ = parent_iter_->end(); + } + --leaf_iter_; + return *this; + } + + // constexpr normal_iterator operator--(int) noexcept { + // return normal_iterator(ptr_--); + // } + + // Random access iterator requirements + // constexpr reference operator[](difference_type n) const noexcept { + // return ptr_[n]; + // } + // + // constexpr normal_iterator& operator+=(difference_type n) noexcept { + // ptr_ += n; + // return *this; + // } + // + // constexpr normal_iterator operator+(difference_type n) const noexcept { + // return normal_iterator(ptr_ + n); + // } + // + // constexpr normal_iterator& operator-=(difference_type n) noexcept { + // ptr_ -= n; + // return *this; + // } + // + // constexpr normal_iterator operator-(difference_type n) const noexcept { + // return normal_iterator(ptr_ - n); + // } + // + // // Other // TODO??? + // constexpr auto operator-(const normal_iterator& it) const noexcept { + // return ptr_ - it.ptr_; + // } + + // constexpr normal_iterator operator-(V* ptr) const noexcept { + // return normal_iterator(ptr_ - ptr); + // } + + // implicit conversion to const iterator + operator bpt_vector_tree_iterator() { + return bpt_vector_tree_iterator{parent_iter_, leaf_iter_}; + } + + private: + parent_t* parent_; + parent_iter_t parent_iter_; + leaf_iter_t leaf_iter_; +}; + +/** + * A vector_tree acts consists of a vector of vectors. + * The idea is that it has almost the same execution speed as vector for the following operations: + * - Access via [] operator + * - emplace_back + * - erase last entry (via erase_back) + * + * At the same time it scales much better when the vector grows because only the parent vector needs + * resizing. + */ +template +class vector_tree { + using node_t = std::vector; + + // TODO implement "Member types": https://en.cppreference.com/w/cpp/container/vector + public: + // Member types + using value_type = V; + // using allocator_type = Allocator + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using reference = value_type&; + using const_reference = const value_type&; + using pointer = value_type*; // Allocator::pointer (until C++11) + // std::allocator_traits::pointer (since C++11) + using const_pointer = const value_type*; // Allocator::const_pointer (until C++11) + // std::allocator_traits::const_pointer (since C++11) + // TODO LegacyContiguousIterator ?!!?!?!? + using iterator = + detail::bpt_vector_tree_iterator; // LegacyRandomAccessIterator + using const_iterator = detail::bpt_vector_tree_iterator; + // using iterator = value_type*; // LegacyRandomAccessIterator + // using const_iterator = const value_type*; + + public: + vector_tree() noexcept : data_{}, size_{0} {} + + vector_tree(const vector_tree& rhs) noexcept : data_(rhs.data_), size_{rhs.size_} {} + + vector_tree(vector_tree&& rhs) noexcept : data_{std::move(rhs.data_)}, size_{rhs.size_} {} + + vector_tree& operator=(const vector_tree& rhs) noexcept { + data_ = rhs.data_; + size_ = rhs.size_; + return *this; + } + + vector_tree& operator=(vector_tree&& rhs) noexcept { + data_ = std::move(rhs.data_); + size_ = rhs.size_; + return *this; + } + + ~vector_tree() noexcept = default; + + V& operator[](size_t index) noexcept { + assert(index < size_); + return node(index)[index % SIZE]; + } + + const V& operator[](size_t index) const noexcept { + assert(index < size_); + return node(index)[index % SIZE]; + } + + template + void emplace_back(Args&&... args) { + ensure_capacity(++size_); + back_node().emplace_back(std::forward(args)...); + } + + void emplace_back(V&& x) { + ensure_capacity(++size_); + back_node().emplace_back(std::move(x)); + } + + void emplace_back(const V& x) { + ensure_capacity(++size_); + back_node().emplace_back(x); + } + + void erase_back() { + assert(!empty()); + --size_; + back_node().erase(back_node().end() - 1); + if (back_node().empty() && data_.size() > 1) { + data_.erase(data_.end() - 1); + } + } + + [[nodiscard]] bool empty() const noexcept { + return size_ == 0; + } + + [[nodiscard]] size_t size() const noexcept { + return size_; + } + + /** + * Reserves capacity for min(size, SIZE) entries. + * @param size + */ + void reserve(size_t size) noexcept { + if (data_.empty()) { + data_.emplace_back(); + } + data_[0].reserve(std::min(size, SIZE)); + } + + constexpr reference front() noexcept { + return data_.front().front(); + } + + constexpr const_reference front() const noexcept { + return data_.front().front(); + } + + constexpr reference back() noexcept { + return data_.back().back(); + } + + constexpr const_reference back() const noexcept { + return data_.back().back(); + } + + // constexpr iterator begin() noexcept { + // return iterator(&data_, data_.begin(), data_.front().begin()); + // } + // + // constexpr const_iterator begin() const noexcept { + // return const_iterator(&data_, data_.begin(), data_.front().begin()); + // } + // + // constexpr iterator end() noexcept { + // // TODO end of empty iterator??? + // return iterator(&data_, data_.end() - 1, data_.back().end()); + // } + // + // constexpr const_iterator end() const noexcept { + // // TODO end of empty iterator??? + // return const_iterator(&data_, data_.end() - 1, data_.back().end()); + // } + + private: + node_t& node(size_t index) noexcept { + return data_[index / SIZE]; + } + + const node_t& node(size_t index) const noexcept { + return data_[index / SIZE]; + } + + node_t& back_node() noexcept { + return data_.back(); + } + + void ensure_capacity(size_t index) noexcept { + if (index > data_.size() * SIZE) { + data_.emplace_back(); + } + } + + std::vector data_; + size_t size_; +}; + +} // namespace phtree::bptree::detail + +#endif // PHTREE_COMMON_BPT_VECTOR_TREE_H diff --git a/include/phtree/v16/BUILD b/include/phtree/v16/BUILD index d851227..6da6121 100644 --- a/include/phtree/v16/BUILD +++ b/include/phtree/v16/BUILD @@ -13,6 +13,8 @@ cc_library( "iterator_full.h", "iterator_hc.h", "iterator_knn_hs.h", + "iterator_knn_hs1.h", + "iterator_knn_hs2.h", "iterator_lower_bound.h", "iterator_with_parent.h", "node.h", @@ -22,6 +24,7 @@ cc_library( "//visibility:public", ], deps = [ + "//include/phtree/aux", "//include/phtree/common", ], ) diff --git a/include/phtree/v16/iterator_knn_hs.h b/include/phtree/v16/iterator_knn_hs.h index 2791089..9b3dc81 100644 --- a/include/phtree/v16/iterator_knn_hs.h +++ b/include/phtree/v16/iterator_knn_hs.h @@ -19,6 +19,7 @@ #include "iterator_base.h" #include "phtree/common/common.h" +#include #include namespace improbable::phtree::v16 { @@ -76,8 +77,35 @@ class IteratorKnnHS : public IteratorWithFilter { // Initialize queue, use d=0 because every imaginable point lies inside the root Node queue_.emplace(0, &root); FindNextElement(); + ++N_CREATE; + if (N_CREATE % 10000 == 0) { + std::cout << "KNN1: " << MAX_DEPTH << " N_Q=" << N_CREATE + << " N_PR=" << N_PROCESSED / N_CREATE << " N_PR_R=" << N_PR_RESULT / N_CREATE + << " N_PR_N=" << N_PR_NODES / N_CREATE << " N_Q_R=" << N_Q_RESULT / N_CREATE + << " N_Q_N=" << N_Q_NODES / N_CREATE << " N_Q_N=" << N_Q_NODES_0 / N_CREATE + << " avg_D=" << (TOTAL_DEPTH / N_CREATE) << std::endl; + MAX_DEPTH = 0; + N_CREATE = 0; + N_PROCESSED = 0; + N_PR_RESULT = 0; + N_PR_NODES = 0; + N_Q_RESULT = 0; + N_Q_NODES = 0; + N_Q_NODES_0 = 0; + TOTAL_DEPTH = 0; + } } + inline static size_t MAX_DEPTH{0}; + inline static long N_CREATE{0}; + inline static long N_PROCESSED{0}; + inline static long N_PR_RESULT{0}; + inline static long N_PR_NODES{0}; + inline static long N_Q_RESULT{0}; + inline static long N_Q_NODES{0}; + inline static long N_Q_NODES_0{0}; + inline static double TOTAL_DEPTH{0}; + [[nodiscard]] double distance() const { return current_distance_; } @@ -98,7 +126,9 @@ class IteratorKnnHS : public IteratorWithFilter { while (num_found_results_ < num_requested_results_ && !queue_.empty()) { auto& candidate = queue_.top(); auto* o = candidate.second; + ++N_PROCESSED; if (!o->IsNode()) { + ++N_PR_RESULT; // data entry ++num_found_results_; this->SetCurrentResult(o); @@ -108,6 +138,7 @@ class IteratorKnnHS : public IteratorWithFilter { queue_.pop(); return; } else { + ++N_PR_NODES; // inner node auto& node = o->GetNode(); queue_.pop(); @@ -115,16 +146,21 @@ class IteratorKnnHS : public IteratorWithFilter { auto& e2 = entry.second; if (this->ApplyFilter(e2)) { if (e2.IsNode()) { + ++N_Q_NODES; double d = DistanceToNode(e2.GetKey(), e2.GetNodePostfixLen() + 1); + N_Q_NODES_0 += d <= 0.0001; queue_.emplace(d, &e2); } else { + ++N_Q_RESULT; double d = distance_(center_post_, this->post(e2.GetKey())); queue_.emplace(d, &e2); } } } + MAX_DEPTH = std::max(MAX_DEPTH, queue_.size()); } } + TOTAL_DEPTH += queue_.size(); this->SetFinished(); current_distance_ = std::numeric_limits::max(); } diff --git a/include/phtree/v16/iterator_knn_hs1.h b/include/phtree/v16/iterator_knn_hs1.h new file mode 100644 index 0000000..9e44695 --- /dev/null +++ b/include/phtree/v16/iterator_knn_hs1.h @@ -0,0 +1,247 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * Copyright 2023 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_V16_QUERY_KNN_HS1_H +#define PHTREE_V16_QUERY_KNN_HS1_H + +#include "iterator_base.h" +#include "phtree/common/bpt_priority_queue.h" +#include "phtree/common/common.h" +#include +#include + +namespace improbable::phtree::v16 { + +/* + * kNN query implementation that uses preprocessors and distance functions. + * + * Implementation after Hjaltason and Samet (with some deviations: no MinDist or MaxDist used). + * G. R. Hjaltason and H. Samet., "Distance browsing in spatial databases.", ACM TODS + * 24(2):265--318. 1999 + */ + +namespace { +template +using EntryDist1 = std::pair*>; + +template +struct CompareEntryDist1 { + bool operator()(const ENTRY& left, const ENTRY& right) const { + return left.first > right.first; + }; +}; +} // namespace + +template +class IteratorKnnHS1 : public IteratorWithFilter { + static constexpr dimension_t DIM = CONVERT::DimInternal; + using KeyExternal = typename CONVERT::KeyExternal; + using KeyInternal = typename CONVERT::KeyInternal; + using SCALAR = typename CONVERT::ScalarInternal; + using EntryT = typename IteratorWithFilter::EntryT; + using EntryDistT = EntryDist1; + + // static_assert(std::is_trivially_copyable::value); + // static_assert(std::is_trivially_move_assignable::value); + // static_assert(std::is_trivially_move_constructible::value); + + public: + template + explicit IteratorKnnHS1( + const EntryT& root, + size_t min_results, + const KeyInternal& center, + const CONVERT* converter, + DIST&& dist, + F&& filter) + : IteratorWithFilter(converter, std::forward(filter)) + , center_{center} + , center_post_{converter->post(center)} + , current_distance_{std::numeric_limits::max()} + , num_found_results_(0) + , num_requested_results_(min_results) + , distance_(std::forward(dist)) { + if (min_results <= 0 || root.GetNode().GetEntryCount() == 0) { + this->SetFinished(); + return; + } + + // Initialize queue, use d=0 because every imaginable point lies inside the root Node + assert(root.IsNode()); + queue_n_.emplace(EntryDistT{0, &const_cast(root)}); // TODO remove const_casts etc + + FindNextElement(); + ++N_CREATE; + if (N_CREATE % 10000 == 0) { + std::cout << "KNN1: " << MAX_DEPTH << " N_Q=" << N_CREATE + << " N_PR=" << N_PROCESSED / N_CREATE << " N_PR_R=" << N_PR_RESULT / N_CREATE + << " N_PR_N=" << N_PR_NODES / N_CREATE << " N_Q_R=" << N_Q_RESULT / N_CREATE + << " N_Q_N=" << N_Q_NODES / N_CREATE << " N_Q_N=" << N_Q_NODES_0 / N_CREATE + << " avg_D=" << (TOTAL_DEPTH / N_CREATE) << std::endl; + MAX_DEPTH = 0; + N_CREATE = 0; + N_PROCESSED = 0; + N_PR_RESULT = 0; + N_PR_NODES = 0; + N_Q_RESULT = 0; + N_Q_NODES = 0; + N_Q_NODES_0 = 0; + TOTAL_DEPTH = 0; + } + } + + inline static size_t MAX_DEPTH{0}; + inline static long N_CREATE{0}; + inline static long N_PROCESSED{0}; + inline static long N_PR_RESULT{0}; + inline static long N_PR_NODES{0}; + inline static long N_Q_RESULT{0}; + inline static long N_Q_NODES{0}; + inline static long N_Q_NODES_0{0}; + inline static double TOTAL_DEPTH{0}; + + [[nodiscard]] double distance() const { + return current_distance_; + } + + IteratorKnnHS1& operator++() noexcept { + FindNextElement(); + return *this; + } + + [[deprecated]] // This iterator is MUCH slower! + IteratorKnnHS1 + operator++(int) noexcept { + IteratorKnnHS1 iterator(*this); + ++(*this); + return iterator; + } + + private: + void FindNextElement() { + while (num_found_results_ < num_requested_results_ && + !(queue_n_.empty() && queue_v_.empty())) { + bool use_v = !queue_v_.empty(); + if (use_v && !queue_n_.empty()) { + use_v = queue_v_.top() < queue_n_.top(); // TODO "<=" ??? + } + ++N_PROCESSED; + if (use_v) { + ++N_PR_RESULT; + // data entry + auto& cand_v = queue_v_.top(); + ++num_found_results_; + this->SetCurrentResult(cand_v.second); + current_distance_ = cand_v.first; + // We need to pop() AFTER we processed the value, otherwise the reference is + // overwritten. + queue_v_.pop(); + return; + } else { + ++N_PR_NODES; + // inner node + auto top = queue_n_.top(); + auto& node = top.second->GetNode(); + auto d_node = top.first; + queue_n_.pop(); + + if (queue_v_.size() >= num_requested_results_ && d_node > max_node_dist_) { + // ignore this node + continue; + } + + for (auto& entry : node.Entries()) { + // auto& e2 = const_cast(entry.second); + const auto& e2 = entry.second; + if (this->ApplyFilter(e2)) { + if (e2.IsNode()) { + ++N_Q_NODES; + double d = DistanceToNode(e2.GetKey(), e2.GetNodePostfixLen() + 1); + N_Q_NODES_0 += d <= 0.0001; + if (d <= max_node_dist_) { + queue_n_.emplace(d, &e2); + } + } else { + ++N_Q_RESULT; + double d = distance_(center_post_, this->post(e2.GetKey())); + if (d < max_node_dist_) { + queue_v_.emplace(d, &e2); + MAX_DEPTH = std::max(MAX_DEPTH, queue_n_.size()); + if (queue_v_.size() >= + num_requested_results_ - num_found_results_) { + if (queue_v_.size() > + num_requested_results_ - num_found_results_) { + queue_v_.pop_max(); + } + double d_max = queue_v_.top_max().first; +// auto pos_max = queue_v_.size() - num_requested_results_ + +// num_found_results_; +// double d_max = queue_v_[pos_max].first; + max_node_dist_ = std::min(max_node_dist_, d_max); + } + } + } + } + } + MAX_DEPTH = std::max(MAX_DEPTH, queue_n_.size()); + } + } + TOTAL_DEPTH += queue_n_.size(); + this->SetFinished(); + current_distance_ = std::numeric_limits::max(); + } + + double DistanceToNode(const KeyInternal& prefix, std::uint32_t bits_to_ignore) { + assert(bits_to_ignore < detail::MAX_BIT_WIDTH); + SCALAR mask_min = detail::MAX_MASK << bits_to_ignore; + SCALAR mask_max = ~mask_min; + KeyInternal buf; + // The following calculates the point inside the node that is closest to center_. + for (dimension_t i = 0; i < DIM; ++i) { + // if center_[i] is outside the node, return distance to the closest edge, + // otherwise return center_[i] itself (assume possible distance=0) + SCALAR min = prefix[i] & mask_min; + SCALAR max = prefix[i] | mask_max; + buf[i] = min > center_[i] ? min : (max < center_[i] ? max : center_[i]); + } + return distance_(center_post_, this->post(buf)); + } + + private: + const KeyInternal center_; + // center after post processing == the external representation + const KeyExternal center_post_; + double current_distance_; + size_t num_found_results_; + size_t num_requested_results_; + std::priority_queue, CompareEntryDist1> + queue_n_; + // std::priority_queue, + // CompareEntryDistByDistance1> + // queue_v_; + // ::phtree::bptree::detail::priority_queue> + // queue_n_; + ::phtree::bptree::detail::priority_queue> queue_v_{ + num_requested_results_}; + DISTANCE distance_; + double max_node_dist_ = std::numeric_limits::max(); +}; + +} // namespace improbable::phtree::v16 + +#endif // PHTREE_V16_QUERY_KNN_HS1_H diff --git a/include/phtree/v16/iterator_knn_hs2.h b/include/phtree/v16/iterator_knn_hs2.h new file mode 100644 index 0000000..a26d779 --- /dev/null +++ b/include/phtree/v16/iterator_knn_hs2.h @@ -0,0 +1,247 @@ +/* + * Copyright 2020 Improbable Worlds Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PHTREE_V16_QUERY_KNN_HS2_H +#define PHTREE_V16_QUERY_KNN_HS2_H + +#include "iterator_base.h" +#include "phtree/aux/min_max_tree_heap.h" +#include "phtree/aux/min_max_vector_heap.h" +#include "phtree/common/b_plus_tree_heap.h" +#include "phtree/common/common.h" +#include + +namespace improbable::phtree::v16 { + +/* + * kNN query implementation that uses preprocessors and distance functions. + * + * Implementation after Hjaltason and Samet (with some deviations: no MinDist or MaxDist used). + * G. R. Hjaltason and H. Samet., "Distance browsing in spatial databases.", ACM TODS + * 24(2):265--318. 1999 + */ + +namespace { +template +using EntryDist2 = std::pair*>; // TODO const pointer?!?! + +template +struct CompareEntryDistByDistance2 { + bool operator()(const ENTRY& left, const ENTRY& right) const { + return left.first > right.first; + }; +}; +} // namespace + +template +class IteratorKnnHS2 : public IteratorWithFilter { + static constexpr dimension_t DIM = CONVERT::DimInternal; + using KeyExternal = typename CONVERT::KeyExternal; + using KeyInternal = typename CONVERT::KeyInternal; + using SCALAR = typename CONVERT::ScalarInternal; + using EntryT = typename IteratorWithFilter::EntryT; + using EntryDistT = EntryDist2; + + public: + template + explicit IteratorKnnHS2( + const EntryT& root, + size_t min_results, + const KeyInternal& center, + const CONVERT* converter, + DIST&& dist, + F&& filter) + : IteratorWithFilter(converter, std::forward(filter)) + , center_{center} + , center_post_{converter->post(center)} + , current_distance_{std::numeric_limits::max()} + , num_found_results_(0) + , num_requested_results_(min_results) + , distance_(std::forward(dist)) { + if (min_results <= 0 || root.GetNode().GetEntryCount() == 0) { + this->SetFinished(); + return; + } + + // Initialize queue, use d=0 because every imaginable point lies inside the root Node + assert(root.IsNode()); + queue_n_.emplace(EntryDistT{0, &const_cast(root)}); // TODO remove const_casts etc + + FindNextElement(); + } + + + [[nodiscard]] double distance() const { + return current_distance_; + } + + IteratorKnnHS2& operator++() noexcept { + FindNextElement(); + return *this; + } + + [[deprecated]] // This iterator is MUCH slower! + IteratorKnnHS2 operator++(int) noexcept { + IteratorKnnHS2 iterator(*this); + ++(*this); + return iterator; + } + + private: + void FindNextElement() { + while (num_found_results_ < num_requested_results_ && + !(queue_n_.empty() && queue_v_.empty())) { + bool use_v = !queue_v_.empty(); + if (use_v && !queue_n_.empty()) { + use_v = queue_n_.top() >= queue_v_.top(); + } + if (use_v) { + // data entry + auto& cand_v = queue_v_.top(); + ++num_found_results_; + this->SetCurrentResult(cand_v.second); + current_distance_ = cand_v.first; + // We need to pop() AFTER we processed the value, otherwise the reference is + // overwritten. + queue_v_.pop(); + return; + } else { + // inner node + auto& node = queue_n_.top().second->GetNode(); + auto d_node = queue_n_.top().first; // TODO merge with previous + queue_n_.pop(); + + if (queue_v_.size() >= num_requested_results_ && d_node >max_node_dist_) { + // ignore this node + continue; + } + // TODO + // - Improve bpt pop()/top() and top_max()/pop_max() + // - THIS works only if FILTER=true: + // Get a_max_dist from first k nodes (plus found entries) and use as max_node_dist_ + // Repeat this! + // - Consider rebuilding queue_n once queue_v is full. + // - Heuristic: Consider rebuild queue_n when + // -- new max_node_dist is a lot smaller than the previous one ( + // wont work for high dim.... + // -- size() > 2*k -> this depends on DIM! + // We could just do it, and depending on how much gets removed we wait + // longer/shorter for next rebuild. + + // TODO + // - queue_v should be decreased in size as num_found_results_ increases! + + // TODO test/handle/assert case when tree.size() < n + + // TODO TODO TODO TODO TODO + // TODO Minmax tree.top-max() probably fails for k=2 and k=3!! + // TODO test copy/move of minmax and of kNN iterator! + + // CC=clang bazel test //test:phtree_test --config=ubsan + + + for (auto& entry : node.Entries()) { + //auto& e2 = const_cast(entry.second); + const auto& e2 = entry.second; + if (this->ApplyFilter(e2)) { + if (e2.IsNode()) { + double d = DistanceToNode(e2.GetKey(), e2.GetNodePostfixLen() + 1); + if (d <= max_node_dist_) { + queue_n_.emplace(d, &e2); + } + } else { + double d = distance_(center_post_, this->post(e2.GetKey())); +// if (queue_v_.size() < num_requested_results_) { +// queue_v_.emplace(d, &e2); +// } else if (d < queue_v_.top_max().first) { +// queue_v_.pop_max(); +// queue_v_.emplace(d, &e2); +// } +// if (queue_v_.size() >= num_requested_results_) { +// // TODO adjust with 10th value in queue i.o. last value? +// // -> in case we allow more than 10... +// max_node_dist_ = std::min(max_node_dist_, queue_v_.top_max().first); +// } + // TODO num_found_results_ breaks because of pop_max() being wrong for n < 3!!!! + // TODO num_found_results_ breaks because of pop_max() being wrong for n < 3!!!! + // TODO num_found_results_ breaks because of pop_max() being wrong for n < 3!!!! + // TODO num_found_results_ breaks because of pop_max() being wrong for n < 3!!!! + // TODO num_found_results_ breaks because of pop_max() being wrong for n < 3!!!! + // TODO num_found_results_ breaks because of pop_max() being wrong for n < 3!!!! + // TODO num_found_results_ breaks because of pop_max() being wrong for n < 3!!!! + // TODO num_found_results_ breaks because of pop_max() being wrong for n < 3!!!! + if (d < max_node_dist_) { + queue_v_.emplace(d, &e2); + if (queue_v_.size() > num_requested_results_ - num_found_results_) { + queue_v_.pop_max(); + } + if (queue_v_.size() >= num_requested_results_ - num_found_results_) { + // TODO adjust with 10th value in queue i.o. last value? + // -> in case we allow more than 10... + max_node_dist_ = std::min(max_node_dist_, queue_v_.top_max().first); + } + } + } + } + } + } + } + this->SetFinished(); + current_distance_ = std::numeric_limits::max(); + } + + double DistanceToNode(const KeyInternal& prefix, std::uint32_t bits_to_ignore) { + assert(bits_to_ignore < detail::MAX_BIT_WIDTH); + SCALAR mask_min = detail::MAX_MASK << bits_to_ignore; + SCALAR mask_max = ~mask_min; + KeyInternal buf; + // The following calculates the point inside the node that is closest to center_. + for (dimension_t i = 0; i < DIM; ++i) { + // if center_[i] is outside the node, return distance to the closest edge, + // otherwise return center_[i] itself (assume possible distance=0) + SCALAR min = prefix[i] & mask_min; + SCALAR max = prefix[i] | mask_max; + buf[i] = min > center_[i] ? min : (max < center_[i] ? max : center_[i]); + } + return distance_(center_post_, this->post(buf)); + } + + private: + const KeyInternal center_; + // center after post processing == the external representation + const KeyExternal center_post_; + double current_distance_; +// std:: +// priority_queue, CompareEntryDistByDistance2> +// queue_n_; +// std:: +// priority_queue, CompareEntryDistByDistance2> +// queue_v_; +// ::phtree::aux::min_max_tree_heap> queue_n_; +// ::phtree::aux::min_max_tree_heap> queue_v_; + ::phtree::aux::min_max_vector_heap> queue_n_; + ::phtree::aux::min_max_vector_heap> queue_v_; +// ::phtree::bptree::b_plus_tree_heap> queue_n_; +// ::phtree::bptree::b_plus_tree_heap> queue_v_; + size_t num_found_results_; + size_t num_requested_results_; + DISTANCE distance_; + double max_node_dist_ = std::numeric_limits::max(); +}; + +} // namespace improbable::phtree::v16 + +#endif // PHTREE_V16_QUERY_KNN_HS2_H diff --git a/include/phtree/v16/phtree_v16.h b/include/phtree/v16/phtree_v16.h index a63e597..8c7583d 100644 --- a/include/phtree/v16/phtree_v16.h +++ b/include/phtree/v16/phtree_v16.h @@ -24,6 +24,8 @@ #include "iterator_full.h" #include "iterator_hc.h" #include "iterator_knn_hs.h" +#include "iterator_knn_hs1.h" +#include "iterator_knn_hs2.h" #include "iterator_lower_bound.h" #include "iterator_with_parent.h" #include "node.h" @@ -734,7 +736,7 @@ class PhTreeV16 { const KeyT& center, DISTANCE&& distance_function = DISTANCE(), FILTER&& filter = FILTER()) const { - return IteratorKnnHS( + return IteratorKnnHS1( root_, min_results, center, diff --git a/test/common/BUILD b/test/common/BUILD index 0f9434f..4b7dc58 100644 --- a/test/common/BUILD +++ b/test/common/BUILD @@ -52,6 +52,19 @@ cc_test( ], ) +cc_test( + name = "b_priority_queue_test", + timeout = "long", + srcs = [ + "b_priority_queue_test.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", + ], +) + cc_test( name = "b_vector_test", timeout = "long", @@ -65,6 +78,19 @@ cc_test( ], ) +cc_test( + name = "b_vector_tree_test", + timeout = "long", + srcs = [ + "b_vector_tree_test.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", + ], +) + cc_test( name = "b_plus_tree_hash_map_test", timeout = "long", @@ -104,6 +130,19 @@ cc_test( ], ) +cc_test( + name = "b_plus_tree_heap_test", + timeout = "long", + srcs = [ + "b_plus_tree_heap_test.cc", + ], + linkstatic = True, + deps = [ + "//:phtree", + "@gtest//:gtest_main", + ], +) + cc_test( name = "flat_sparse_map_test", timeout = "long", diff --git a/test/common/CMakeLists.txt b/test/common/CMakeLists.txt index 9fd11f9..911d0e1 100644 --- a/test/common/CMakeLists.txt +++ b/test/common/CMakeLists.txt @@ -1,6 +1,7 @@ include(scripts.cmake) package_add_test(b_plus_tree_hash_map_test b_plus_tree_hash_map_test.cc) +package_add_test(b_plus_tree_heap_test b_plus_tree_heap_test.cc) package_add_test(b_plus_tree_map_test b_plus_tree_map_test.cc) package_add_test(b_plus_tree_multimap_test b_plus_tree_multimap_test.cc) package_add_test(base_types_test base_types_test.cc) diff --git a/test/common/b_plus_tree_heap_test.cc b/test/common/b_plus_tree_heap_test.cc new file mode 100644 index 0000000..ab53003 --- /dev/null +++ b/test/common/b_plus_tree_heap_test.cc @@ -0,0 +1,354 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/common/b_plus_tree_heap.h" +#include +#include +#include +#include + +using namespace phtree::bptree; + +static int default_construct_count_ = 0; +static int construct_count_ = 0; +static int copy_construct_count_ = 0; +static int move_construct_count_ = 0; +static int copy_assign_count_ = 0; +static int move_assign_count_ = 0; +static int destruct_count_ = 0; + +[[maybe_unused]] static void reset_id_counters() { + default_construct_count_ = 0; + construct_count_ = 0; + copy_construct_count_ = 0; + move_construct_count_ = 0; + copy_assign_count_ = 0; + move_assign_count_ = 0; + destruct_count_ = 0; +} + +[[maybe_unused]] static void print_id_counters() { + std::cout << "dc=" << default_construct_count_ << " c=" << construct_count_ + << " cc=" << copy_construct_count_ << " mc=" << move_construct_count_ + << " ca=" << copy_assign_count_ << " ma=" << move_assign_count_ + << " d=" << destruct_count_ << std::endl; +} + +template +void populate( + const size_t N, + b_plus_tree_heap& test_map, + std::multimap& reference_map, + std::vector>& reverse_map, + std::default_random_engine& random_engine) { + std::uniform_int_distribution<> cube_distribution(0, (int)N / 2); + for (size_t j = 0; j < N; j++) { + Key key = cube_distribution(random_engine); + Value value = j; + bool hasVal = test_map.find(key) != test_map.end(); + bool hasValRef = reference_map.find(key) != reference_map.end(); + assert(hasVal == hasValRef); + reference_map.emplace(key, value); + test_map.try_emplace(key, value); + reverse_map.emplace_back(value, key); + } +} + +struct Id { + Id() : first{-1}, _i{0} { + ++default_construct_count_; + } + + explicit Id(double dist, const size_t i) : first{dist}, _i{static_cast(i)} { + ++construct_count_; + } + + explicit Id(double dist, const int i) : first{dist}, _i{i} { + ++construct_count_; + } + + Id(const Id& other) { + ++copy_construct_count_; + first = other.first; + _i = other._i; + } + + Id(Id&& other) noexcept { + ++move_construct_count_; + first = other.first; + _i = other._i; + } + + Id& operator=(const Id& other) noexcept { + ++copy_assign_count_; + first = other.first; + _i = other._i; + return *this; + } + Id& operator=(Id&& other) noexcept { + ++move_assign_count_; + first = other.first; + _i = other._i; + return *this; + } + + bool operator==(const Id& rhs) const { + return _i == rhs._i && first == rhs.first; + } + + ~Id() { + ++destruct_count_; + } + + double first; + int _i; +}; + +struct IdComparator { + bool operator()(const Id& left, const Id& right) const { + return left.first > right.first; + } +}; + +template +struct SwapComp { + Compare comp; + template + bool operator()(const Key& x, const Key& y) const { + return !comp(x, y); + } +}; + +void SmokeTest() { + const size_t N = 1000; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + std::uniform_real_distribution dist_distribution(0, 100); + + using ValueT = Id; + for (int i = 0; i < 100; i++) { + b_plus_tree_heap> test_map; + std::priority_queue, IdComparator> ref_heap{}; + + // populate + for (size_t j = 0; j < N; j++) { + double dist = ((i + 1) * (j % 2)) == 0 ? 0 : dist_distribution(random_engine); + ValueT val{dist, j}; + test_map.emplace(val); + // test_map._check(); + ref_heap.emplace(val); + + ASSERT_EQ(test_map.size(), ref_heap.size()); + ASSERT_EQ(test_map.size(), j + 1u); + + ASSERT_EQ(test_map.top().first, ref_heap.top().first); + } + + // update + for (size_t j = 0; j < N; j++) { + // pop + double d1 = test_map.top().first; + test_map.pop(); + ref_heap.pop(); + ASSERT_EQ(test_map.top().first, ref_heap.top().first); + ASSERT_LE(d1, test_map.top().first); + + // push + double dist = (i * (j % 2)) == 0 ? 0 : dist_distribution(random_engine); + ValueT val{dist, j}; + test_map.emplace(val); + ref_heap.emplace(val); + + ASSERT_EQ(test_map.size(), ref_heap.size()); + ASSERT_EQ(test_map.size(), N); + + ASSERT_EQ(test_map.top().first, ref_heap.top().first); + } + + // drain + double prev_dist = 0; + for (size_t j = 0; j < N; j++) { + ASSERT_EQ(test_map.top().first, ref_heap.top().first); + ASSERT_LE(prev_dist, test_map.top().first); + double dist = test_map.top().first; + test_map.pop(); + ref_heap.pop(); + prev_dist = dist; + } + + ASSERT_EQ(0u, test_map.size()); + ASSERT_TRUE(test_map.empty()); + } +} + +TEST(PhTreeBptHeapTest, SmokeTest) { + SmokeTest(); +} + +void SmokeTestTop(bool reverse) { + const size_t N = 1000; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + std::uniform_real_distribution dist_distribution(0, 100); + + using ValueT = Id; + for (int i = 0; i < 100; i++) { + b_plus_tree_heap> test_map; + std::priority_queue, SwapComp> ref_heap_inverse{}; + + // populate + for (size_t j = 0; j < N; j++) { + double dist = ((i + 1) * (j % 2)) == 0 ? 0 : dist_distribution(random_engine); + ValueT val{dist, j}; + test_map.emplace(val); + // test_map._check(); + ref_heap_inverse.emplace(val); + + ASSERT_EQ(test_map.size(), ref_heap_inverse.size()); + ASSERT_EQ(test_map.size(), j + 1u); + + ASSERT_EQ(test_map.top_max().first, ref_heap_inverse.top().first); + } + + // update + for (size_t j = 0; j < N; j++) { + // pop + double d1 = test_map.top().first; + test_map.pop_max(); + ref_heap_inverse.pop(); + ASSERT_EQ(test_map.top_max().first, ref_heap_inverse.top().first); + ASSERT_LE(d1, test_map.top().first); + + // push + double dist = (i * (j % 2)) == 0 ? 0 : dist_distribution(random_engine); + ValueT val{dist, j}; + test_map.emplace(val); + ref_heap_inverse.emplace(val); + + ASSERT_EQ(test_map.size(), ref_heap_inverse.size()); + ASSERT_EQ(test_map.size(), N); + + ASSERT_EQ(test_map.top_max().first, ref_heap_inverse.top().first); + } + + // drain + double prev_dist = 0; + for (size_t j = 0; j < N; j++) { + ASSERT_EQ(test_map.top_max().first, ref_heap_inverse.top().first); + ASSERT_LE(prev_dist, test_map.top().first); + double dist = test_map.top().first; + test_map.pop_max(); + ref_heap_inverse.pop(); + prev_dist = dist; + } + + ASSERT_EQ(0u, test_map.size()); + ASSERT_TRUE(test_map.empty()); + } +} + +TEST(PhTreeBptHeapTest, SmokeTestTop) { + SmokeTestTop(false); +} + +TEST(PhTreeBptHeapTest, SmokeTestTopReverse) { + SmokeTestTop(true); // TODO +} + +TEST(PhTreeBptHeapTest, DestructionTest) { + const size_t N = 1000; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + std::uniform_real_distribution dist_distribution(0, 100); + + using ValueT = Id; + for (int i = 0; i < 10; i++) { + b_plus_tree_heap> test_map; + + // populate + for (size_t j = 0; j < N; j++) { + double dist = ((i + 1) * (j % 2)) == 0 ? 0 : dist_distribution(random_engine); + ValueT val{dist, j}; + test_map.emplace(val); + } + // Remove some (or not) + for (size_t j = 0; j < i * N / 100; j++) { + double dist = ((i + 1) * (j % 2)) == 0 ? 0 : dist_distribution(random_engine); + ValueT val{dist, j}; + test_map.pop(); + test_map.pop_max(); + } + // Automatic destruction happens here. + } +} + +template +void test_tree(TREE& tree) { + // test various operations + tree.emplace(Id(43, 2)); + Id id3{44, 3}; + tree.emplace(id3); + ASSERT_EQ(tree.size(), 3u); + + tree.pop(); + ASSERT_EQ(2u, tree.size()); + tree.pop(); + tree.pop(); + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeBptMulitmapTest, TestCopyConstruct) { + using TestTree = b_plus_tree_heap; + TestTree tree1; + tree1.emplace(Id(42, 1)); + + TestTree tree{tree1}; + test_tree(tree); + // The old tree should still work! + test_tree(tree1); +} + +TEST(PhTreeBptMulitmapTest, TestCopyAssign) { + using TestTree = b_plus_tree_heap; + TestTree tree1; + tree1.emplace(Id(42, 1)); + + TestTree tree{}; + tree = tree1; + test_tree(tree); + // The old tree should still work! + test_tree(tree1); +} + +TEST(PhTreeBptMulitmapTest, TestMoveConstruct) { + using TestTree = b_plus_tree_heap; + TestTree tree1; + tree1.emplace(Id(42, 1)); + + TestTree tree{std::move(tree1)}; + test_tree(tree); +} + +TEST(PhTreeBptMulitmapTest, TestMoveAssign) { + using TestTree = b_plus_tree_heap; + TestTree tree1; + tree1.emplace(Id(42, 1)); + + TestTree tree{}; + tree = std::move(tree1); + test_tree(tree); +} diff --git a/test/common/b_plus_tree_multimap_test.cc b/test/common/b_plus_tree_multimap_test.cc index 8436299..1a3f8ea 100644 --- a/test/common/b_plus_tree_multimap_test.cc +++ b/test/common/b_plus_tree_multimap_test.cc @@ -389,7 +389,7 @@ TEST(PhTreeBptMulitmapTest, SmokeTestUpdateByIterator) { std::multimap reference_map{}; std::vector> reverse_map{}; populate(N, test_map, reference_map, reverse_map, random_engine); - for (int i = 0; i < 100; i++) { + for (int i = 0; i < 10; i++) { std::shuffle(reverse_map.begin(), reverse_map.end(), random_engine); for (auto& reverse_pair : reverse_map) { auto key = reverse_pair.second; @@ -447,6 +447,66 @@ TEST(PhTreeBptMulitmapTest, SmokeTestUpdateByIterator) { } } +template +void SmokeTestOrdering(bool inverse_ordering) { + const size_t N = 300; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + + using Key = size_t; + using Value = Id; + for (int i = 0; i < 10; i++) { + b_plus_tree_multimap test_map; + for (size_t j = 0; j < N; j++) { + size_t key = cube_distribution(random_engine); + + auto lb = test_map.lower_bound(key); + if (lb != test_map.end()) { + if (inverse_ordering) { + ASSERT_LE(lb->first, key); + } else { + ASSERT_GE(lb->first, key); + } + } + + Value val{j}; + CheckMapResult(test_map.emplace(key, val), test_map.end(), key, val); + ASSERT_EQ(test_map.find(key)->first, key); + ASSERT_EQ(test_map.lower_bound(key)->first, key); + test_map._check(); + } + ASSERT_EQ(test_map.size(), N); + + for (size_t j = 0; j < N / 2; ++j) { + size_t key = cube_distribution(random_engine); + test_map.erase(key); + } + ASSERT_LE(test_map.size(), N); + ASSERT_GE(test_map.size(), N / 4); + + Key prev_key = inverse_ordering ? N : 0; + size_t n = 0; + for (auto& entry : test_map) { + if (inverse_ordering) { + ASSERT_GE(prev_key, entry.first); + } else { + ASSERT_LE(prev_key, entry.first); + } + prev_key = entry.first; + ++n; + } + ASSERT_EQ(test_map.size(), n); + } +} + +TEST(PhTreeBptMulitmapTest, TestOrdering) { + SmokeTestOrdering>(false); +} + +TEST(PhTreeBptMulitmapTest, TestOrderingReverse) { + SmokeTestOrdering>(true); +} + template void test_tree(TREE& tree) { using Key = size_t; diff --git a/test/common/b_priority_queue_test.cc b/test/common/b_priority_queue_test.cc new file mode 100644 index 0000000..dd28d1a --- /dev/null +++ b/test/common/b_priority_queue_test.cc @@ -0,0 +1,354 @@ +/* + * Copyright 2023 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/common/bpt_priority_queue.h" +#include +#include +#include +#include + +using namespace phtree::bptree; + +static int default_construct_count_ = 0; +static int construct_count_ = 0; +static int copy_construct_count_ = 0; +static int move_construct_count_ = 0; +static int copy_assign_count_ = 0; +static int move_assign_count_ = 0; +static int destruct_count_ = 0; + +[[maybe_unused]] static void reset_id_counters() { + default_construct_count_ = 0; + construct_count_ = 0; + copy_construct_count_ = 0; + move_construct_count_ = 0; + copy_assign_count_ = 0; + move_assign_count_ = 0; + destruct_count_ = 0; +} + +[[maybe_unused]] static void print_id_counters() { + std::cout << "dc=" << default_construct_count_ << " c=" << construct_count_ + << " cc=" << copy_construct_count_ << " mc=" << move_construct_count_ + << " ca=" << copy_assign_count_ << " ma=" << move_assign_count_ + << " d=" << destruct_count_ << std::endl; +} + +template +void populate( + const size_t N, + detail::priority_queue& test_map, + std::multimap& reference_map, + std::vector>& reverse_map, + std::default_random_engine& random_engine) { + std::uniform_int_distribution<> cube_distribution(0, (int)N / 2); + for (size_t j = 0; j < N; j++) { + Key key = cube_distribution(random_engine); + Value value = j; + bool hasVal = test_map.find(key) != test_map.end(); + bool hasValRef = reference_map.find(key) != reference_map.end(); + assert(hasVal == hasValRef); + reference_map.emplace(key, value); + test_map.try_emplace(key, value); + reverse_map.emplace_back(value, key); + } +} + +struct Id { + Id() : first{-1}, _i{0} { + ++default_construct_count_; + } + + explicit Id(double dist, const size_t i) : first{dist}, _i{static_cast(i)} { + ++construct_count_; + } + + explicit Id(double dist, const int i) : first{dist}, _i{i} { + ++construct_count_; + } + + Id(const Id& other) { + ++copy_construct_count_; + first = other.first; + _i = other._i; + } + + Id(Id&& other) noexcept { + ++move_construct_count_; + first = other.first; + _i = other._i; + } + + Id& operator=(const Id& other) noexcept { + ++copy_assign_count_; + first = other.first; + _i = other._i; + return *this; + } + Id& operator=(Id&& other) noexcept { + ++move_assign_count_; + first = other.first; + _i = other._i; + return *this; + } + + bool operator==(const Id& rhs) const { + return _i == rhs._i && first == rhs.first; + } + + ~Id() { + ++destruct_count_; + } + + double first; + int _i; +}; + +struct IdComparator { + bool operator()(const Id& left, const Id& right) const { + return left.first > right.first; + } +}; + +template +struct SwapComp { + Compare comp; + template + bool operator()(const Key& x, const Key& y) const { + return !comp(x, y); + } +}; + +void SmokeTest() { + const size_t N = 1000; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + std::uniform_real_distribution dist_distribution(0, 100); + + using ValueT = Id; + for (int i = 0; i < 100; i++) { + detail::priority_queue test_map; + std::priority_queue, IdComparator> ref_heap{}; + + // populate + for (size_t j = 0; j < N; j++) { + double dist = ((i + 1) * (j % 2)) == 0 ? 0 : dist_distribution(random_engine); + ValueT val{dist, j}; + test_map.emplace(val); + // test_map._check(); + ref_heap.emplace(val); + + ASSERT_EQ(test_map.size(), ref_heap.size()); + ASSERT_EQ(test_map.size(), j + 1u); + + ASSERT_EQ(test_map.top().first, ref_heap.top().first); + } + + // update + for (size_t j = 0; j < N; j++) { + // pop + double d1 = test_map.top().first; + test_map.pop(); + ref_heap.pop(); + ASSERT_EQ(test_map.top().first, ref_heap.top().first); + ASSERT_LE(d1, test_map.top().first); + + // push + double dist = (i * (j % 2)) == 0 ? 0 : dist_distribution(random_engine); + ValueT val{dist, j}; + test_map.emplace(val); + ref_heap.emplace(val); + + ASSERT_EQ(test_map.size(), ref_heap.size()); + ASSERT_EQ(test_map.size(), N); + + ASSERT_EQ(test_map.top().first, ref_heap.top().first); + } + + // drain + double prev_dist = 0; + for (size_t j = 0; j < N; j++) { + ASSERT_EQ(test_map.top().first, ref_heap.top().first); + ASSERT_LE(prev_dist, test_map.top().first); + double dist = test_map.top().first; + test_map.pop(); + ref_heap.pop(); + prev_dist = dist; + } + + ASSERT_EQ(0u, test_map.size()); + ASSERT_TRUE(test_map.empty()); + } +} + +TEST(PhTreeBptHeapTest, SmokeTest) { + SmokeTest(); +} + +void SmokeTestTop(bool reverse) { + const size_t N = 1000; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + std::uniform_real_distribution dist_distribution(0, 100); + + using ValueT = Id; + for (int i = 0; i < 100; i++) { + detail::priority_queue test_map; + std::priority_queue, SwapComp> ref_heap_inverse{}; + + // populate + for (size_t j = 0; j < N; j++) { + double dist = ((i + 1) * (j % 2)) == 0 ? 0 : dist_distribution(random_engine); + ValueT val{dist, j}; + test_map.emplace(val); + // test_map._check(); + ref_heap_inverse.emplace(val); + + ASSERT_EQ(test_map.size(), ref_heap_inverse.size()); + ASSERT_EQ(test_map.size(), j + 1u); + + ASSERT_EQ(test_map.top_max().first, ref_heap_inverse.top().first); + } + + // update + for (size_t j = 0; j < N; j++) { + // pop + double d1 = test_map.top().first; + test_map.pop_max(); + ref_heap_inverse.pop(); + ASSERT_EQ(test_map.top_max().first, ref_heap_inverse.top().first); + ASSERT_LE(d1, test_map.top().first); + + // push + double dist = (i * (j % 2)) == 0 ? 0 : dist_distribution(random_engine); + ValueT val{dist, j}; + test_map.emplace(val); + ref_heap_inverse.emplace(val); + + ASSERT_EQ(test_map.size(), ref_heap_inverse.size()); + ASSERT_EQ(test_map.size(), N); + + ASSERT_EQ(test_map.top_max().first, ref_heap_inverse.top().first); + } + + // drain + double prev_dist = 0; + for (size_t j = 0; j < N; j++) { + ASSERT_EQ(test_map.top_max().first, ref_heap_inverse.top().first); + ASSERT_LE(prev_dist, test_map.top().first); + double dist = test_map.top().first; + test_map.pop_max(); + ref_heap_inverse.pop(); + prev_dist = dist; + } + + ASSERT_EQ(0u, test_map.size()); + ASSERT_TRUE(test_map.empty()); + } +} + +TEST(PhTreeBptHeapTest, SmokeTestTop) { + SmokeTestTop(false); +} + +TEST(PhTreeBptHeapTest, SmokeTestTopReverse) { + SmokeTestTop(true); // TODO +} + +TEST(PhTreeBptHeapTest, DestructionTest) { + const size_t N = 1000; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N / 2); + std::uniform_real_distribution dist_distribution(0, 100); + + using ValueT = Id; + for (int i = 0; i < 10; i++) { + detail::priority_queue test_map; + + // populate + for (size_t j = 0; j < N; j++) { + double dist = ((i + 1) * (j % 2)) == 0 ? 0 : dist_distribution(random_engine); + ValueT val{dist, j}; + test_map.emplace(val); + } + // Remove some (or not) + for (size_t j = 0; j < i * N / 100; j++) { + double dist = ((i + 1) * (j % 2)) == 0 ? 0 : dist_distribution(random_engine); + ValueT val{dist, j}; + test_map.pop(); + test_map.pop_max(); + } + // Automatic destruction happens here. + } +} + +template +void test_tree(TREE& tree) { + // test various operations + tree.emplace(Id(43, 2)); + Id id3{44, 3}; + tree.emplace(id3); + ASSERT_EQ(tree.size(), 3u); + + tree.pop(); + ASSERT_EQ(2u, tree.size()); + tree.pop(); + tree.pop(); + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeBptMulitmapTest, TestCopyConstruct) { + using TestTree = detail::priority_queue; + TestTree tree1; + tree1.emplace(Id(42, 1)); + + TestTree tree{tree1}; + test_tree(tree); + // The old tree should still work! + test_tree(tree1); +} + +TEST(PhTreeBptMulitmapTest, TestCopyAssign) { + using TestTree = detail::priority_queue; + TestTree tree1; + tree1.emplace(Id(42, 1)); + + TestTree tree{}; + tree = tree1; + test_tree(tree); + // The old tree should still work! + test_tree(tree1); +} + +TEST(PhTreeBptMulitmapTest, TestMoveConstruct) { + using TestTree = detail::priority_queue; + TestTree tree1; + tree1.emplace(Id(42, 1)); + + TestTree tree{std::move(tree1)}; + test_tree(tree); +} + +TEST(PhTreeBptMulitmapTest, TestMoveAssign) { + using TestTree = detail::priority_queue; + TestTree tree1; + tree1.emplace(Id(42, 1)); + + TestTree tree{}; + tree = std::move(tree1); + test_tree(tree); +} diff --git a/test/common/b_vector_tree_test.cc b/test/common/b_vector_tree_test.cc new file mode 100644 index 0000000..12df228 --- /dev/null +++ b/test/common/b_vector_tree_test.cc @@ -0,0 +1,341 @@ +/* + * Copyright 2022 Tilmann Zäschke + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "phtree/common/bpt_vector_tree.h" +#include +#include +#include + +using namespace phtree::bptree::detail; + +static int default_construct_count_ = 0; +static int construct_count_ = 0; +static int copy_construct_count_ = 0; +static int move_construct_count_ = 0; +static int copy_assign_count_ = 0; +static int move_assign_count_ = 0; +static int destruct_count_ = 0; + +[[maybe_unused]] static void reset_id_counters() { + default_construct_count_ = 0; + construct_count_ = 0; + copy_construct_count_ = 0; + move_construct_count_ = 0; + copy_assign_count_ = 0; + move_assign_count_ = 0; + destruct_count_ = 0; +} + +[[maybe_unused]] static void print_id_counters() { + std::cout << "dc=" << default_construct_count_ << " c=" << construct_count_ + << " cc=" << copy_construct_count_ << " mc=" << move_construct_count_ + << " ca=" << copy_assign_count_ << " ma=" << move_assign_count_ + << " d=" << destruct_count_ << std::endl; +} + +struct Id { + Id() : i_{0} { + ++default_construct_count_; + } + + explicit Id(const size_t i) : i_{static_cast(i)} { + ++construct_count_; + } + + explicit Id(const int i) : i_{i} { + ++construct_count_; + } + + Id(const Id& other) { + ++copy_construct_count_; + i_ = other.i_; + } + + Id(Id&& other) noexcept { + ++move_construct_count_; + i_ = other.i_; + } + + Id& operator=(const Id& other) noexcept { + ++copy_assign_count_; + i_ = other.i_; + return *this; + } + Id& operator=(Id&& other) noexcept { + ++move_assign_count_; + i_ = other.i_; + return *this; + } + + bool operator==(const Id& rhs) const { + return i_ == rhs.i_; + } + + ~Id() { + ++destruct_count_; + } + + int i_; +}; + +struct IdTriviallyCopyable { + IdTriviallyCopyable() : i_{0} {} + + explicit IdTriviallyCopyable(const size_t i) : i_{static_cast(i)} {} + + explicit IdTriviallyCopyable(const int i) : i_{i} {} + + bool operator==(const IdTriviallyCopyable& rhs) const { + return i_ == rhs.i_; + } + + int i_; +}; + +// TEST(PhTreeBptVectorTreeTest, SmokeTest0) { +// const size_t N = 100; +// std::default_random_engine random_engine{0}; +// std::uniform_int_distribution<> cube_distribution(0, N - 1); +// +// using ValueT = Id; +// for (int i = 0; i < 100; i++) { +// vector_tree test_map{}; +// +// // populate +// for (size_t j = 0; j < N; j++) { +// test_map.emplace_back(j); +// // test_map._check(); +// } +// ASSERT_EQ(test_map.size(), N); +// +// for (size_t j = 0; j < N; j++) { +// ASSERT_LE(test_map[j].i_, (int)N); +// } +// +// // update +// for (size_t j = 0; j < N; j++) { +// int pos = cube_distribution(random_engine); +// test_map.erase(test_map.begin() + pos); +// +// // add +// int pos2 = cube_distribution(random_engine); +// test_map.emplace(test_map.begin() + pos2, j); +// } +// ASSERT_EQ(test_map.size(), N); +// +// // update ranges +// for (size_t j = 0; j < N; j++) { +// int R = 5; +// size_t pos = cube_distribution(random_engine) % (N - R); +// test_map.erase(test_map.begin() + pos, test_map.begin() + pos + R); +// +// // add +// int pos2 = std::max(0, (int)(cube_distribution(random_engine) % N - R)); +// vector_tree tm2{}; +// // std::vector ref2{}; +// for (int k = 0; k < R; ++k) { +// tm2.emplace_back(j + k); +// } +// test_map.insert( +// test_map.begin() + pos2, +// std::move_iterator(tm2.begin()), +// std::move_iterator(tm2.end())); +// } +// ASSERT_EQ(test_map.size(), N); +// +// size_t n = 0; +// for (auto it = test_map.begin(); it != test_map.end(); ++it) { +// ++n; +// } +// ASSERT_EQ(N, n); +// static_assert(std::is_same_v); +// +// // drain 50% +// while (test_map.size() > N / 2) { +// size_t pos = cube_distribution(random_engine) % (N / 4); +// test_map.erase(test_map.begin() + pos, test_map.begin() + pos + 3); +// } +// +// // drain 100% +// while (!test_map.empty()) { +// size_t pos = cube_distribution(random_engine) % test_map.size(); +// test_map.erase(test_map.begin() + pos); +// } +// +// ASSERT_EQ(0u, test_map.size()); +// ASSERT_TRUE(test_map.empty()); +// } +// +// ASSERT_GE(construct_count_ + copy_construct_count_ + move_construct_count_, destruct_count_); +// ASSERT_LE(construct_count_ + copy_construct_count_, destruct_count_); +// } + +template +void SmokeTest() { + const size_t N = 100; + std::default_random_engine random_engine{0}; + std::uniform_int_distribution<> cube_distribution(0, N - 1); + + using ValueT = Id; + for (int i = 0; i < 100; i++) { + vector_tree test_map{}; + std::vector reference_map{}; + + // populate + for (size_t j = 0; j < N; j++) { + test_map.emplace_back(j); + // test_map._check(); + reference_map.emplace_back(j); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + + for (size_t j = 0; j < N; j++) { + ASSERT_EQ(test_map[j], reference_map[j]); + } + + // update + for (size_t j = 0; j < N; j++) { + int pos = cube_distribution(random_engine); + ASSERT_EQ(test_map[pos], reference_map[pos]); + test_map[pos].i_ += 17; + reference_map[pos].i_ += 17; + ASSERT_EQ(test_map[pos], reference_map[pos]); + ASSERT_EQ(test_map.front(), reference_map.front()); + ASSERT_EQ(test_map.back(), reference_map.back()); + } + ASSERT_EQ(test_map.size(), reference_map.size()); + + // TODO? + // size_t n = 0; + // for (auto it = test_map.begin(); it != test_map.end(); ++it) { + // ++n; + // } + // ASSERT_EQ(N, n); + // static_assert(std::is_same_v); + + // drain 100% + for (size_t j = 0; j < N; j++) { + size_t pos = cube_distribution(random_engine) % test_map.size(); + ASSERT_EQ(test_map[pos], reference_map[pos]); + test_map.erase_back(); + reference_map.erase(reference_map.end() - 1); + } + + // fill and drain again + for (size_t j = 0; j < N; j++) { + test_map.emplace_back(j); + reference_map.emplace_back(j); + } + for (size_t j = 0; j < N; j++) { + ASSERT_EQ(test_map[j], reference_map[j]); + } + for (size_t j = 0; j < N; j++) { + test_map.erase_back(); + reference_map.erase(reference_map.end() - 1); + } + + ASSERT_EQ(0u, test_map.size()); + ASSERT_TRUE(test_map.empty()); + } +} + +TEST(PhTreeBptVectorTreeTest, SmokeTest) { + static_assert(!std::is_trivially_copyable_v); + reset_id_counters(); + SmokeTest(); + ASSERT_GE(construct_count_ + copy_construct_count_ + move_construct_count_, destruct_count_); + ASSERT_LE(construct_count_ + copy_construct_count_, destruct_count_); +} + +TEST(PhTreeBptVectorTreeTest, SmokeTest_TriviallyCopyable) { + static_assert(std::is_trivially_copyable_v); + SmokeTest(); +} + +template +void test_tree(TREE& tree) { + // test various operations + tree.emplace_back(Id(2)); + Id id3{3}; + tree.emplace_back(id3); + ASSERT_EQ(tree.size(), 3u); + + tree.erase_back(); + ASSERT_EQ(2u, tree.size()); + tree.erase_back(); + tree.erase_back(); + ASSERT_EQ(0u, tree.size()); + ASSERT_TRUE(tree.empty()); +} + +TEST(PhTreeBptVectorTreeTest, TestCopyConstruct) { + using TestTree = vector_tree; + TestTree tree1; + tree1.emplace_back(Id(1)); + + TestTree tree{tree1}; + test_tree(tree); + // The old tree should still work! + test_tree(tree1); +} + +TEST(PhTreeBptVectorTreeTest, TestCopyAssign) { + using TestTree = vector_tree; + TestTree tree1; + tree1.emplace_back(Id(1)); + + TestTree tree{}; + tree = tree1; + test_tree(tree); + // The old tree should still work! + test_tree(tree1); +} + +TEST(PhTreeBptVectorTreeTest, TestMoveConstruct) { + using TestTree = vector_tree; + TestTree tree1; + tree1.emplace_back(Id(1)); + + TestTree tree{std::move(tree1)}; + test_tree(tree); +} + +TEST(PhTreeBptVectorTreeTest, TestMoveAssign) { + using TestTree = vector_tree; + TestTree tree1; + tree1.emplace_back(Id(1)); + + TestTree tree{}; + tree = std::move(tree1); + test_tree(tree); +} + +TEST(PhTreeBptHeapTest, FuzzTest1) { + using Id = std::pair; + vector_tree tree{}; + std::vector ref{}; + tree.emplace_back(42, 11); + ref.emplace_back(42, 11); + tree.erase_back(); + ref.erase(ref.end() - 1); + + for (size_t i = 0; i < tree.size(); ++i) { + ASSERT_EQ(tree[i].first, ref[i].first); + ASSERT_EQ(tree[i].second, ref[i].second); + } + ASSERT_EQ(tree.size(), ref.size()); +} \ No newline at end of file diff --git a/test/phtree_test.cc b/test/phtree_test.cc index 7b57f16..5f2efa3 100644 --- a/test/phtree_test.cc +++ b/test/phtree_test.cc @@ -1262,7 +1262,7 @@ TEST(PhTreeTest, TestKnnQueryFilterAndDistanceL1) { // entries with the same distance but with different ordering than sorted_data. ASSERT_GE(q.distance(), prevDist); prevDist = q.distance(); - q++; + ++q; n++; } ASSERT_EQ(Nq, n); @@ -1288,9 +1288,9 @@ TEST(PhTreeTest, TestKnnQueryIterator) { ASSERT_NE(q1, tree.end()); ASSERT_NE(q2, tree.end()); ASSERT_EQ(q1, q2); - q1++; + ++q1; ASSERT_NE(q1, q2); - q2++; + ++q2; n++; } ASSERT_EQ(Nq, n);