Skip to content

Commit 607c430

Browse files
authored
Switch to uintptr_t's to pass pointers to/from C++. (#35) (#1)
This is motivated by a similar change in mattress. It eliminates the need to rely on pybind11 in downstream packages; as long as they can take a uintptr_t, they can use knncolle. We add a knncolle_py.h header with type definitions for correct casting of the uintptr_t; this can be obtained via include(). A side-effect of this change is that we need to implement destructors for the pointed-to objects for each uintptr_t. This involves writing __del__ methods for GenericIndex instances as well as creating a new Builder class. While we're here, we move more logic into GenericIndex to reduce repetition.
1 parent 3a633e9 commit 607c430

26 files changed

+292
-148
lines changed

MANIFEST.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
recursive-include src/knncolle/include *

lib/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ pybind11_add_module(knncolle_py
2020
)
2121

2222
target_include_directories(knncolle_py PRIVATE "${ASSORTHEAD_INCLUDE_DIR}")
23+
target_include_directories(knncolle_py PRIVATE "../src/knncolle/include")
2324

2425
set_property(TARGET knncolle_py PROPERTY CXX_STANDARD 17)
2526

lib/src/annoy.cpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,43 @@
1-
#include "def.h"
1+
#include "knncolle_py.h"
22
#include "pybind11/pybind11.h"
33

4+
#include <memory>
5+
#include <stdexcept>
6+
47
// Turn off manual vectorization always, to avoid small inconsistencies in
58
// distance calculations across otherwise-compliant machines.
69
#define NO_MANUAL_VECTORIZATION 1
710

811
#include "knncolle_annoy/knncolle_annoy.hpp"
912

10-
BuilderPointer create_annoy_builder(int num_trees, double search_mult, std::string distance) {
13+
uintptr_t create_annoy_builder(int num_trees, double search_mult, std::string distance) {
1114
knncolle_annoy::AnnoyOptions opt;
1215
opt.num_trees = num_trees;
1316
opt.search_mult = search_mult;
17+
auto tmp = std::make_unique<knncolle_py::WrappedBuilder>();
1418

1519
if (distance == "Manhattan") {
16-
return BuilderPointer(new knncolle_annoy::AnnoyBuilder<Annoy::Manhattan, SimpleMatrix, double>(opt));
20+
tmp->ptr.reset(new knncolle_annoy::AnnoyBuilder<Annoy::Manhattan, knncolle_py::SimpleMatrix, knncolle_py::Distance>(opt));
1721

1822
} else if (distance == "Euclidean") {
19-
return BuilderPointer(new knncolle_annoy::AnnoyBuilder<Annoy::Euclidean, SimpleMatrix, double>(opt));
23+
tmp->ptr.reset(new knncolle_annoy::AnnoyBuilder<Annoy::Euclidean, knncolle_py::SimpleMatrix, knncolle_py::Distance>(opt));
2024

2125
} else if (distance == "Cosine") {
22-
return BuilderPointer(
23-
new knncolle::L2NormalizedBuilder<SimpleMatrix, double>(
26+
tmp->ptr.reset(
27+
new knncolle::L2NormalizedBuilder<knncolle_py::SimpleMatrix, knncolle_py::Distance>(
2428
new knncolle_annoy::AnnoyBuilder<
2529
Annoy::Euclidean,
26-
knncolle::L2NormalizedMatrix<SimpleMatrix>,
30+
knncolle::L2NormalizedMatrix<knncolle_py::SimpleMatrix>,
2731
double
2832
>(opt)
2933
)
3034
);
3135

3236
} else {
3337
throw std::runtime_error("unknown distance type '" + distance + "'");
34-
return BuilderPointer();
3538
}
39+
40+
return reinterpret_cast<uintptr_t>(static_cast<void*>(tmp.release()));
3641
}
3742

3843
void init_annoy(pybind11::module& m) {

lib/src/def.h

Lines changed: 0 additions & 19 deletions
This file was deleted.

lib/src/exhaustive.cpp

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,34 @@
1-
#include "def.h"
1+
#include "knncolle_py.h"
22
#include "pybind11/pybind11.h"
33

4-
BuilderPointer create_exhaustive_builder(std::string distance) {
4+
#include <memory>
5+
#include <stdexcept>
6+
7+
uintptr_t create_exhaustive_builder(std::string distance) {
8+
auto tmp = std::make_unique<knncolle_py::WrappedBuilder>();
9+
510
if (distance == "Manhattan") {
6-
return BuilderPointer(new knncolle::BruteforceBuilder<knncolle::ManhattanDistance, SimpleMatrix, double>);
11+
tmp->ptr.reset(new knncolle::BruteforceBuilder<knncolle::ManhattanDistance, knncolle_py::SimpleMatrix, knncolle_py::Distance>);
712

813
} else if (distance == "Euclidean") {
9-
return BuilderPointer(new knncolle::BruteforceBuilder<knncolle::EuclideanDistance, SimpleMatrix, double>);
14+
tmp->ptr.reset(new knncolle::BruteforceBuilder<knncolle::EuclideanDistance, knncolle_py::SimpleMatrix, knncolle_py::Distance>);
1015

1116
} else if (distance == "Cosine") {
12-
return BuilderPointer(
17+
tmp->ptr.reset(
1318
new knncolle::L2NormalizedBuilder(
1419
new knncolle::BruteforceBuilder<
1520
knncolle::EuclideanDistance,
16-
knncolle::L2NormalizedMatrix<SimpleMatrix>,
21+
knncolle::L2NormalizedMatrix<knncolle_py::SimpleMatrix>,
1722
double
1823
>
1924
)
2025
);
2126

2227
} else {
2328
throw std::runtime_error("unknown distance type '" + distance + "'");
24-
return BuilderPointer();
2529
}
30+
31+
return reinterpret_cast<uintptr_t>(static_cast<void*>(tmp.release()));
2632
}
2733

2834
void init_exhaustive(pybind11::module& m) {

lib/src/generics.cpp

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,44 @@
1-
#include "def.h"
1+
#include "knncolle_py.h"
2+
23
#include "pybind11/pybind11.h"
34
#include "pybind11/numpy.h"
45
#include "pybind11/stl.h"
56

67
#include <algorithm>
78
#include <cstdint>
89
#include <optional>
10+
#include <memory>
11+
#include <stdexcept>
12+
#include <vector>
913

1014
typedef pybind11::array_t<double, pybind11::array::f_style | pybind11::array::forcecast> DataMatrix;
1115

12-
PrebuiltPointer generic_build(const BuilderPointer& builder, const DataMatrix& data) {
16+
void free_builder(uintptr_t builder_ptr) {
17+
delete knncolle_py::cast_builder(builder_ptr);
18+
}
19+
20+
uintptr_t generic_build(uintptr_t builder_ptr, const DataMatrix& data) {
1321
auto buffer = data.request();
1422
uint32_t NR = buffer.shape[0], NC = buffer.shape[1];
15-
return PrebuiltPointer(builder->build_raw(SimpleMatrix(NR, NC, static_cast<const double*>(buffer.ptr))));
23+
24+
auto builder = knncolle_py::cast_builder(builder_ptr);
25+
auto tmp = std::make_unique<knncolle_py::WrappedPrebuilt>();
26+
tmp->ptr.reset(builder->ptr->build_raw(knncolle_py::SimpleMatrix(NR, NC, static_cast<const double*>(buffer.ptr))));
27+
28+
return reinterpret_cast<uintptr_t>(static_cast<void*>(tmp.release()));
1629
}
1730

18-
uint32_t generic_num_obs(const PrebuiltPointer& prebuilt) {
31+
void free_prebuilt(uintptr_t prebuilt_ptr) {
32+
delete knncolle_py::cast_prebuilt(prebuilt_ptr);
33+
}
34+
35+
uint32_t generic_num_obs(uintptr_t prebuilt_ptr) {
36+
const auto& prebuilt = knncolle_py::cast_prebuilt(prebuilt_ptr)->ptr;
1937
return prebuilt->num_observations();
2038
}
2139

22-
uint32_t generic_num_dims(const PrebuiltPointer& prebuilt) {
40+
uint32_t generic_num_dims(uintptr_t prebuilt_ptr) {
41+
const auto& prebuilt = knncolle_py::cast_prebuilt(prebuilt_ptr)->ptr;
2342
return prebuilt->num_dimensions();
2443
}
2544

@@ -54,7 +73,7 @@ typedef pybind11::array_t<uint32_t, pybind11::array::f_style | pybind11::array::
5473
typedef pybind11::array_t<uint32_t, pybind11::array::f_style | pybind11::array::forcecast> ChosenVector;
5574

5675
pybind11::object generic_find_knn(
57-
const PrebuiltPointer& prebuilt,
76+
uintptr_t prebuilt_ptr,
5877
const NeighborVector& num_neighbors,
5978
bool force_variable_neighbors,
6079
std::optional<ChosenVector> chosen,
@@ -63,6 +82,7 @@ pybind11::object generic_find_knn(
6382
bool report_index,
6483
bool report_distance)
6584
{
85+
const auto& prebuilt = knncolle_py::cast_prebuilt(prebuilt_ptr)->ptr;
6686
uint32_t nobs = prebuilt->num_observations();
6787

6888
// Checking if we have to handle subsets.
@@ -206,7 +226,7 @@ pybind11::object generic_find_knn(
206226
}
207227

208228
pybind11::object generic_query_knn(
209-
const PrebuiltPointer& prebuilt,
229+
uintptr_t prebuilt_ptr,
210230
const DataMatrix& query,
211231
const NeighborVector& num_neighbors,
212232
bool force_variable_neighbors,
@@ -215,18 +235,19 @@ pybind11::object generic_query_knn(
215235
bool report_index,
216236
bool report_distance)
217237
{
218-
int nobs = prebuilt->num_observations();
219-
size_t ndim = prebuilt->num_dimensions();
238+
const auto& prebuilt = knncolle_py::cast_prebuilt(prebuilt_ptr)->ptr;
239+
uint32_t nobs = prebuilt->num_observations();
240+
uint32_t ndim = prebuilt->num_dimensions();
220241

221242
auto buf_info = query.request();
222243
uint32_t nquery = buf_info.shape[1];
223244
const double* query_ptr = static_cast<const double*>(buf_info.ptr);
224-
if (static_cast<size_t>(buf_info.shape[0]) != ndim) {
245+
if (static_cast<uint32_t>(buf_info.shape[0]) != ndim) {
225246
throw std::runtime_error("mismatch in dimensionality between index and 'query'");
226247
}
227248

228249
// Checking that 'k' is valid.
229-
auto sanitize_k = [&](int k) -> int {
250+
auto sanitize_k = [&](uint32_t k) -> int {
230251
if (k <= nobs) {
231252
return k;
232253
}
@@ -354,13 +375,14 @@ pybind11::object generic_query_knn(
354375
typedef pybind11::array_t<double, pybind11::array::f_style | pybind11::array::forcecast> ThresholdVector;
355376

356377
pybind11::object generic_find_all(
357-
const PrebuiltPointer& prebuilt,
378+
uintptr_t prebuilt_ptr,
358379
std::optional<ChosenVector> chosen,
359380
const ThresholdVector& thresholds,
360381
int num_threads,
361382
bool report_index,
362383
bool report_distance)
363384
{
385+
const auto& prebuilt = knncolle_py::cast_prebuilt(prebuilt_ptr)->ptr;
364386
uint32_t nobs = prebuilt->num_observations();
365387

366388
uint32_t num_output = nobs;
@@ -438,13 +460,14 @@ pybind11::object generic_find_all(
438460
}
439461

440462
pybind11::object generic_query_all(
441-
const PrebuiltPointer& prebuilt,
463+
uintptr_t prebuilt_ptr,
442464
const DataMatrix& query,
443465
const ThresholdVector& thresholds,
444466
int num_threads,
445467
bool report_index,
446468
bool report_distance)
447469
{
470+
const auto& prebuilt = knncolle_py::cast_prebuilt(prebuilt_ptr)->ptr;
448471
size_t ndim = prebuilt->num_dimensions();
449472

450473
auto buf_info = query.request();
@@ -522,7 +545,9 @@ pybind11::object generic_query_all(
522545
*********************************/
523546

524547
void init_generics(pybind11::module& m) {
548+
m.def("free_builder", &free_builder);
525549
m.def("generic_build", &generic_build);
550+
m.def("free_prebuilt", &free_prebuilt);
526551
m.def("generic_num_obs", &generic_num_obs);
527552
m.def("generic_num_dims", &generic_num_dims);
528553
m.def("generic_find_knn", &generic_find_knn);

lib/src/hnsw.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#include "def.h"
1+
#include "knncolle_py.h"
22
#include "pybind11/pybind11.h"
33

44
// Turn off manual vectorization always, to avoid small inconsistencies in
@@ -7,35 +7,37 @@
77

88
#include "knncolle_hnsw/knncolle_hnsw.hpp"
99

10-
BuilderPointer create_hnsw_builder(int nlinks, int ef_construct, int ef_search, std::string distance) {
10+
uintptr_t create_hnsw_builder(int nlinks, int ef_construct, int ef_search, std::string distance) {
1111
knncolle_hnsw::HnswOptions<uint32_t, float> opt;
1212
opt.num_links = nlinks;
1313
opt.ef_construction = ef_construct;
1414
opt.ef_search = ef_search;
15+
auto tmp = std::make_unique<knncolle_py::WrappedBuilder>();
1516

1617
if (distance == "Manhattan") {
1718
opt.distance_options.create = [&](int dim) -> hnswlib::SpaceInterface<float>* {
1819
return new knncolle_hnsw::ManhattanDistance<float>(dim);
1920
};
20-
return BuilderPointer(new knncolle_hnsw::HnswBuilder<SimpleMatrix, double>(opt));
21+
tmp->ptr.reset(new knncolle_hnsw::HnswBuilder<knncolle_py::SimpleMatrix, knncolle_py::Distance>(opt));
2122

2223
} else if (distance == "Euclidean") {
23-
return BuilderPointer(new knncolle_hnsw::HnswBuilder<SimpleMatrix, double>(opt));
24+
tmp->ptr.reset(new knncolle_hnsw::HnswBuilder<knncolle_py::SimpleMatrix, knncolle_py::Distance>(opt));
2425

2526
} else if (distance == "Cosine") {
26-
return BuilderPointer(
27-
new knncolle::L2NormalizedBuilder<SimpleMatrix, double>(
27+
tmp->ptr.reset(
28+
new knncolle::L2NormalizedBuilder<knncolle_py::SimpleMatrix, knncolle_py::Distance>(
2829
new knncolle_hnsw::HnswBuilder<
29-
knncolle::L2NormalizedMatrix<SimpleMatrix>,
30+
knncolle::L2NormalizedMatrix<knncolle_py::SimpleMatrix>,
3031
double
3132
>(opt)
3233
)
3334
);
3435

3536
} else {
3637
throw std::runtime_error("unknown distance type '" + distance + "'");
37-
return BuilderPointer();
3838
}
39+
40+
return reinterpret_cast<uintptr_t>(static_cast<void*>(tmp.release()));
3941
}
4042

4143
void init_hnsw(pybind11::module& m) {

lib/src/init.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#include "def.h"
21
#include "pybind11/pybind11.h"
32
#include "pybind11/numpy.h"
43
#include "pybind11/stl.h"
@@ -17,7 +16,4 @@ PYBIND11_MODULE(lib_knncolle, m) {
1716
init_hnsw(m);
1817
init_kmknn(m);
1918
init_vptree(m);
20-
21-
pybind11::class_<Builder, BuilderPointer>(m, "Builder");
22-
pybind11::class_<Prebuilt, PrebuiltPointer>(m, "Prebuilt");
2319
}

lib/src/kmknn.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,34 @@
1-
#include "def.h"
1+
#include "knncolle_py.h"
22
#include "pybind11/pybind11.h"
33

4-
BuilderPointer create_kmknn_builder(std::string distance) {
4+
#include <memory>
5+
#include <stdexcept>
6+
7+
uintptr_t create_kmknn_builder(std::string distance) {
8+
auto tmp = std::make_unique<knncolle_py::WrappedBuilder>();
9+
510
if (distance == "Manhattan") {
6-
return BuilderPointer(new knncolle::KmknnBuilder<knncolle::ManhattanDistance, SimpleMatrix, double>);
11+
tmp->ptr.reset(new knncolle::KmknnBuilder<knncolle::ManhattanDistance, knncolle_py::SimpleMatrix, knncolle_py::Distance>);
712

813
} else if (distance == "Euclidean") {
9-
return BuilderPointer(new knncolle::KmknnBuilder<knncolle::EuclideanDistance, SimpleMatrix, double>);
14+
tmp->ptr.reset(new knncolle::KmknnBuilder<knncolle::EuclideanDistance, knncolle_py::SimpleMatrix, knncolle_py::Distance>);
1015

1116
} else if (distance == "Cosine") {
12-
return BuilderPointer(
13-
new knncolle::L2NormalizedBuilder<SimpleMatrix, double>(
17+
tmp->ptr.reset(
18+
new knncolle::L2NormalizedBuilder<knncolle_py::SimpleMatrix, knncolle_py::Distance>(
1419
new knncolle::KmknnBuilder<
1520
knncolle::EuclideanDistance,
16-
knncolle::L2NormalizedMatrix<SimpleMatrix>,
21+
knncolle::L2NormalizedMatrix<knncolle_py::SimpleMatrix>,
1722
double
1823
>
1924
)
2025
);
2126

2227
} else {
2328
throw std::runtime_error("unknown distance type '" + distance + "'");
24-
return BuilderPointer();
2529
}
30+
31+
return reinterpret_cast<uintptr_t>(static_cast<void*>(tmp.release()));
2632
}
2733

2834
void init_kmknn(pybind11::module& m) {

0 commit comments

Comments
 (0)