Skip to content

Commit

Permalink
Chebyshev distance (#139)
Browse files Browse the repository at this point in the history
  • Loading branch information
tzaeschke authored Mar 20, 2023
1 parent 731a21e commit 915ec44
Show file tree
Hide file tree
Showing 8 changed files with 245 additions and 66 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Added `lower_bounds(key)` to API. [#126](https://github.com/tzaeschke/phtree-cpp/issues/126)
- Added `bpt_fixed_vector`, a fixed size flat vector for future use. It can be dropped in for an std::vector.
[#124](https://github.com/tzaeschke/phtree-cpp/pull/124)
- Added Chebyshev distance metric `DIstanceChebyshev`. [#129](https://github.com/tzaeschke/phtree-cpp/pull/139)

### Changed
- Changed `bpt_vectot` to use `std::destroy` i.o. default dstr. [#132](https://github.com/tzaeschke/phtree-cpp/pull/132)
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -250,8 +250,8 @@ struct FilterMultiMapByValueId {

##### Distance function

Nearest neighbor queries can also use custom distance metrics, such as L1 distance. Note that this returns a special
iterator that provides a function to get the distance of the current entry:
Nearest neighbor queries can also use other distance metrics, such as L1 or Chebyshev distance. Note that the query
returns a special iterator that provides a function to get the distance of the current entry:

```C++
#include "phtree/phtree.h"
Expand Down
49 changes: 44 additions & 5 deletions include/phtree/distance.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ namespace improbable::phtree {

template <dimension_t DIM>
struct DistanceEuclidean {
double operator()(const PhPoint<DIM>& v1, const PhPoint<DIM>& v2) const {
double operator()(const PhPoint<DIM>& v1, const PhPoint<DIM>& v2) const noexcept {
double sum2 = 0;
for (dimension_t i = 0; i < DIM; ++i) {
assert(
Expand All @@ -51,7 +51,7 @@ struct DistanceEuclidean {
return sqrt(sum2);
};

double operator()(const PhPointD<DIM>& p1, const PhPointD<DIM>& p2) const {
double operator()(const PhPointD<DIM>& p1, const PhPointD<DIM>& p2) const noexcept {
double sum2 = 0;
for (dimension_t i = 0; i < DIM; ++i) {
double d2 = p1[i] - p2[i];
Expand All @@ -60,7 +60,7 @@ struct DistanceEuclidean {
return sqrt(sum2);
};

double operator()(const PhPointF<DIM>& v1, const PhPointF<DIM>& v2) const {
double operator()(const PhPointF<DIM>& v1, const PhPointF<DIM>& v2) const noexcept {
double sum2 = 0;
for (dimension_t i = 0; i < DIM; i++) {
double d2 = double(v1[i] - v2[i]);
Expand All @@ -72,7 +72,7 @@ struct DistanceEuclidean {

template <dimension_t DIM>
struct DistanceL1 {
double operator()(const PhPoint<DIM>& v1, const PhPoint<DIM>& v2) const {
double operator()(const PhPoint<DIM>& v1, const PhPoint<DIM>& v2) const noexcept {
double sum = 0;
for (dimension_t i = 0; i < DIM; ++i) {
assert(
Expand All @@ -84,13 +84,52 @@ struct DistanceL1 {
return sum;
};

double operator()(const PhPointD<DIM>& v1, const PhPointD<DIM>& v2) const {
double operator()(const PhPointD<DIM>& v1, const PhPointD<DIM>& v2) const noexcept {
double sum = 0;
for (dimension_t i = 0; i < DIM; ++i) {
sum += std::abs(v1[i] - v2[i]);
}
return sum;
};

float operator()(const PhPointF<DIM>& v1, const PhPointF<DIM>& v2) const noexcept {
float sum = 0;
for (dimension_t i = 0; i < DIM; ++i) {
sum += std::abs(v1[i] - v2[i]);
}
return sum;
};
};

template <dimension_t DIM>
struct DistanceChebyshev {
double operator()(const PhPoint<DIM>& v1, const PhPoint<DIM>& v2) const noexcept {
double sum = 0;
for (dimension_t i = 0; i < DIM; ++i) {
assert(
(v1[i] >= 0) != (v2[i] >= 0) ||
double(v1[i]) - double(v2[i]) <
double(std::numeric_limits<decltype(v1[i] - v2[i])>::max()));
sum = std::max(sum, std::abs(double(v1[i] - v2[i])));
}
return sum;
};

double operator()(const PhPointD<DIM>& v1, const PhPointD<DIM>& v2) const noexcept {
double sum = 0;
for (dimension_t i = 0; i < DIM; ++i) {
sum = std::max(sum, std::abs(v1[i] - v2[i]));
}
return sum;
};

float operator()(const PhPointF<DIM>& v1, const PhPointF<DIM>& v2) const noexcept {
float sum = 0;
for (dimension_t i = 0; i < DIM; ++i) {
sum = std::max(sum, std::abs(v1[i] - v2[i]));
}
return sum;
};
};

} // namespace improbable::phtree
Expand Down
28 changes: 28 additions & 0 deletions test/distance_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

using namespace improbable::phtree;

// NOTE: These are very superficial tests. Proper testing is done in the respective PhTree tests.

TEST(PhTreeDistanceTest, DoubleEuclidean) {
auto distance = DistanceEuclidean<2>();
ASSERT_DOUBLE_EQ(5, distance(PhPointD<2>{-1, -1}, PhPointD<2>{2, 3}));
Expand All @@ -30,6 +32,26 @@ TEST(PhTreeDistanceTest, DoubleL1) {
ASSERT_DOUBLE_EQ(7, distance(PhPointD<2>{-1, -1}, PhPointD<2>{2, 3}));
}

TEST(PhTreeDistanceTest, DoubleChebyshev) {
auto distance = DistanceChebyshev<2>();
ASSERT_DOUBLE_EQ(4, distance(PhPointD<2>{-1, -1}, PhPointD<2>{2, 3}));
}

TEST(PhTreeDistanceTest, FloatEuclidean) {
auto distance = DistanceEuclidean<2>();
ASSERT_DOUBLE_EQ(5, distance(PhPointF<2>{-1, -1}, PhPointF<2>{2, 3}));
}

TEST(PhTreeDistanceTest, FloatL1) {
auto distance = DistanceL1<2>();
ASSERT_DOUBLE_EQ(7, distance(PhPointF<2>{-1, -1}, PhPointF<2>{2, 3}));
}

TEST(PhTreeDistanceTest, FloatChebyshev) {
auto distance = DistanceChebyshev<2>();
ASSERT_DOUBLE_EQ(4, distance(PhPointF<2>{-1, -1}, PhPointF<2>{2, 3}));
}

TEST(PhTreeDistanceTest, LongEuclidean) {
auto distance = DistanceEuclidean<2>();
ASSERT_DOUBLE_EQ(5, distance(PhPoint<2>{-1, -1}, PhPoint<2>{2, 3}));
Expand All @@ -39,3 +61,9 @@ TEST(PhTreeDistanceTest, LongL1) {
auto distance = DistanceL1<2>();
ASSERT_DOUBLE_EQ(7, distance(PhPoint<2>{-1, -1}, PhPoint<2>{2, 3}));
}

TEST(PhTreeDistanceTest, LongChebyshev) {
auto distance = DistanceChebyshev<2>();
ASSERT_DOUBLE_EQ(4, distance(PhPoint<2>{-1, -1}, PhPoint<2>{2, 3}));
}

55 changes: 41 additions & 14 deletions test/phtree_d_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,23 @@ double distance(const TestPoint<DIM>& p1, const TestPoint<DIM>& p2) {
}

template <dimension_t DIM>
double distanceL1(const TestPoint<DIM>& p1, const TestPoint<DIM>& p2) {
double distance_L1(const TestPoint<DIM>& p1, const TestPoint<DIM>& p2) {
double sum = 0;
for (dimension_t i = 0; i < DIM; i++) {
sum += std::abs(p1[i] - p2[i]);
}
return sum;
}

template <dimension_t DIM>
double distance_chebyshev(const TestPoint<DIM>& p1, const TestPoint<DIM>& p2) {
double sum = 0;
for (dimension_t i = 0; i < DIM; i++) {
sum = std::max(sum, std::abs(p1[i] - p2[i]));
}
return sum;
}

template <dimension_t DIM>
void generateCube(std::vector<TestPoint<DIM>>& points, size_t N) {
DoubleRng rng(-1000, 1000);
Expand Down Expand Up @@ -1051,7 +1060,8 @@ TEST(PhTreeDTest, TestWindowQueryFilter) {
ASSERT_GE(50, num_e);
}

TEST(PhTreeDTest, TestKnnQuery) {
template <typename DIST_TEST, typename DIST_REF>
void test_knn_query(DIST_TEST dist_fn, DIST_REF dist_fn_reference) {
// deliberately allowing outside of main points range
DoubleRng rng(-1500, 1500);
const dimension_t dim = 3;
Expand All @@ -1068,18 +1078,18 @@ TEST(PhTreeDTest, TestKnnQuery) {
// sort points manually
std::vector<PointDistance> sorted_data;
for (size_t i = 0; i < points.size(); i++) {
double dist = distance(center, points[i]);
double dist = dist_fn_reference(center, points[i]);
sorted_data.emplace_back(dist, i);
}
std::sort(sorted_data.begin(), sorted_data.end(), comparePointDistance);

size_t n = 0;
double prevDist = -1;
auto q = tree.begin_knn_query(Nq, center, DistanceEuclidean<3>());
auto q = tree.begin_knn_query(Nq, center, dist_fn);
while (q != tree.end()) {
// just read the entry
auto& e = *q;
ASSERT_EQ(sorted_data[n]._distance, q.distance());
ASSERT_DOUBLE_EQ(sorted_data[n]._distance, q.distance());
ASSERT_EQ(sorted_data[n]._id, e._i);
ASSERT_EQ(points[sorted_data[n]._id], q.first());
ASSERT_EQ(sorted_data[n]._id, q.second()._i);
Expand All @@ -1092,18 +1102,35 @@ TEST(PhTreeDTest, TestKnnQuery) {
}
}

TEST(PhTreeDTest, TestKnnQuery_Euclidean) {
const dimension_t DIM = 3;
test_knn_query(DistanceEuclidean<3>(), [](const TestPoint<DIM>& v1, const TestPoint<DIM>& v2) {
return distance(v1, v2);
});
}

TEST(PhTreeDTest, TestKnnQuery_L1) {
const dimension_t DIM = 3;
test_knn_query(DistanceL1<3>(), [](const TestPoint<DIM>& v1, const TestPoint<DIM>& v2) {
return distance_L1(v1, v2);
});
}

TEST(PhTreeDTest, TestKnnQuery_Chebyshev) {
const dimension_t DIM = 3;
test_knn_query(DistanceChebyshev<3>(), [](const TestPoint<DIM>& v1, const TestPoint<DIM>& v2) {
return distance_chebyshev(v1, v2);
});
}

template <dimension_t DIM>
struct PhDistanceLongL1 {
struct MyDistance {
double operator()(const TestPoint<DIM>& v1, const TestPoint<DIM>& v2) const {
double sum = 0;
for (dimension_t i = 0; i < DIM; i++) {
sum += std::abs(v1[i] - v2[i]);
}
return sum;
return distance_L1(v1, v2);
};
};

TEST(PhTreeDTest, TestKnnQueryFilterAndDistanceL1) {
TEST(PhTreeDTest, TestKnnQueryFilterAndCustomDistance) {
// deliberately allowing outside of main points range
DoubleRng rng(-1500, 1500);
const dimension_t dim = 3;
Expand All @@ -1120,14 +1147,14 @@ TEST(PhTreeDTest, TestKnnQueryFilterAndDistanceL1) {
// sort points manually by L1; skip every 2nd point
std::vector<PointDistance> sorted_data;
for (size_t i = 0; i < points.size(); i += 2) {
double dist = distanceL1(center, points[i]);
double dist = MyDistance<dim>{}(center, points[i]);
sorted_data.emplace_back(dist, i);
}
std::sort(sorted_data.begin(), sorted_data.end(), comparePointDistance);

size_t n = 0;
double prevDist = -1;
auto q = tree.begin_knn_query(Nq, center, PhDistanceLongL1<dim>(), FilterEvenId<dim, Id>());
auto q = tree.begin_knn_query(Nq, center, MyDistance<dim>(), FilterEvenId<dim, Id>());
while (q != tree.end()) {
// just read the entry
auto& e = *q;
Expand Down
55 changes: 41 additions & 14 deletions test/phtree_f_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,23 @@ double distance(const TestPoint<DIM>& p1, const TestPoint<DIM>& p2) {
}

template <dimension_t DIM>
double distanceL1(const TestPoint<DIM>& p1, const TestPoint<DIM>& p2) {
double distance_L1(const TestPoint<DIM>& p1, const TestPoint<DIM>& p2) {
double sum = 0;
for (dimension_t i = 0; i < DIM; i++) {
sum += std::abs(p1[i] - p2[i]);
}
return sum;
}

template <dimension_t DIM>
double distance_chebyshev(const TestPoint<DIM>& p1, const TestPoint<DIM>& p2) {
float sum = 0;
for (dimension_t i = 0; i < DIM; i++) {
sum = std::max(sum, std::abs(p1[i] - p2[i]));
}
return sum;
}

template <dimension_t DIM>
void generateCube(std::vector<TestPoint<DIM>>& points, size_t N) {
FloatRng rng(-1000, 1000);
Expand Down Expand Up @@ -782,7 +791,8 @@ TEST(PhTreeFTest, TestWindowQueryFilter) {
ASSERT_GE(50, num_e);
}

TEST(PhTreeFTest, TestKnnQuery) {
template <typename DIST_TEST, typename DIST_REF>
void test_knn_query(DIST_TEST dist_fn, DIST_REF dist_fn_reference) {
// deliberately allowing outside of main points range
FloatRng rng(-1500, 1500);
const dimension_t dim = 3;
Expand All @@ -799,18 +809,18 @@ TEST(PhTreeFTest, TestKnnQuery) {
// sort points manually
std::vector<PointDistance> sorted_data;
for (size_t i = 0; i < points.size(); i++) {
double dist = distance(center, points[i]);
double dist = dist_fn_reference(center, points[i]);
sorted_data.emplace_back(dist, i);
}
std::sort(sorted_data.begin(), sorted_data.end(), comparePointDistance);

size_t n = 0;
double prevDist = -1;
auto q = tree.begin_knn_query(Nq, center, DistanceEuclidean<3>());
auto q = tree.begin_knn_query(Nq, center, dist_fn);
while (q != tree.end()) {
// just read the entry
auto& e = *q;
ASSERT_EQ(sorted_data[n]._distance, q.distance());
ASSERT_FLOAT_EQ(sorted_data[n]._distance, q.distance());
ASSERT_EQ(sorted_data[n]._id, e._i);
ASSERT_EQ(points[sorted_data[n]._id], q.first());
ASSERT_EQ(sorted_data[n]._id, q.second()._i);
Expand All @@ -823,18 +833,35 @@ TEST(PhTreeFTest, TestKnnQuery) {
}
}

TEST(PhTreeFTest, TestKnnQuery_Euclidean) {
const dimension_t DIM = 3;
test_knn_query(DistanceEuclidean<3>(), [](const TestPoint<DIM>& v1, const TestPoint<DIM>& v2) {
return distance(v1, v2);
});
}

TEST(PhTreeFTest, TestKnnQuery_L1) {
const dimension_t DIM = 3;
test_knn_query(DistanceL1<3>(), [](const TestPoint<DIM>& v1, const TestPoint<DIM>& v2) {
return distance_L1(v1, v2);
});
}

TEST(PhTreeFTest, TestKnnQuery_Chebyshev) {
const dimension_t DIM = 3;
test_knn_query(DistanceChebyshev<3>(), [](const TestPoint<DIM>& v1, const TestPoint<DIM>& v2) {
return distance_chebyshev(v1, v2);
});
}

template <dimension_t DIM>
struct PhDistanceLongL1 {
struct MyDistance {
double operator()(const TestPoint<DIM>& v1, const TestPoint<DIM>& v2) const {
double sum = 0;
for (dimension_t i = 0; i < DIM; i++) {
sum += std::abs(v1[i] - v2[i]);
}
return sum;
return distance_L1(v1, v2);
};
};

TEST(PhTreeFTest, TestKnnQueryFilterAndDistanceL1) {
TEST(PhTreeFTest, TestKnnQueryFilterAndCustomDistance) {
// deliberately allowing outside of main points range
FloatRng rng(-1500, 1500);
const dimension_t dim = 3;
Expand All @@ -851,14 +878,14 @@ TEST(PhTreeFTest, TestKnnQueryFilterAndDistanceL1) {
// sort points manually by L1; skip every 2nd point
std::vector<PointDistance> sorted_data;
for (size_t i = 0; i < points.size(); i += 2) {
double dist = distanceL1(center, points[i]);
double dist = MyDistance<dim>{}(center, points[i]);
sorted_data.emplace_back(dist, i);
}
std::sort(sorted_data.begin(), sorted_data.end(), comparePointDistance);

size_t n = 0;
double prevDist = -1;
auto q = tree.begin_knn_query(Nq, center, PhDistanceLongL1<dim>(), FilterEvenId<dim, Id>());
auto q = tree.begin_knn_query(Nq, center, MyDistance<dim>(), FilterEvenId<dim, Id>());
while (q != tree.end()) {
// just read the entry
auto& e = *q;
Expand Down
Loading

0 comments on commit 915ec44

Please sign in to comment.