From c4268e087585933d92c5cc34155e238744543b1a Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Mon, 30 Sep 2024 14:38:32 -0500 Subject: [PATCH] docs: pybind11 demo project should have NumPy own the data (#3261) * step 0: remove all existing code and return None * step 1: make sure we can iterate over names_nbytes * step 2: make sure we can create a NumPy array through pybind11 * step 3: make sure we can see the raw data in the array * step 4: make sure we can fill the dict and the std::map * step 5: filling the cpp_container fills the py_container * done: we are now returning the array build by ak.from_buffers --- header-only/examples/pybind11/demo.cpp | 54 ++++++++++++-------------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/header-only/examples/pybind11/demo.cpp b/header-only/examples/pybind11/demo.cpp index 94c216d0af..138e4379e8 100644 --- a/header-only/examples/pybind11/demo.cpp +++ b/header-only/examples/pybind11/demo.cpp @@ -34,42 +34,36 @@ using MyBuilder = RecordBuilder< */ template py::object snapshot_builder(const T &builder) { + // We need NumPy (to allocate arrays) and Awkward Array (ak.from_buffers). + // pybind11 will raise a ModuleNotFoundError if they aren't installed. + auto np = py::module::import("numpy"); + auto ak = py::module::import("awkward"); + + auto dtype_u1 = np.attr("dtype")("u1"); + // How much memory to allocate? - std::map names_nbytes = {}; + std::map names_nbytes; builder.buffer_nbytes(names_nbytes); - // Allocate memory - std::map buffers = {}; - for (auto it: names_nbytes) { - uint8_t *ptr = new uint8_t[it.second]; - buffers[it.first] = (void *) ptr; - } + // Ask NumPy to allocate memory and get pointers to the raw buffers. + py::dict py_container; + std::map cpp_container; + for (auto name_nbytes : names_nbytes) { + py::object array = np.attr("empty")(name_nbytes.second, dtype_u1); - // Write non-contiguous contents to memory - builder.to_buffers(buffers); - auto from_buffers = py::module::import("awkward").attr("from_buffers"); - - // Build Python dictionary containing arrays - // dtypes not important here as long as they match the underlying buffer - // as Awkward Array calls `frombuffer` to convert to the correct type - py::dict container; - for (auto it: buffers) { - - py::capsule free_when_done(it.second, [](void *data) { - uint8_t *dataPtr = reinterpret_cast(data); - delete[] dataPtr; - }); - - uint8_t *data = reinterpret_cast(it.second); - container[py::str(it.first)] = py::array_t( - {names_nbytes[it.first]}, - {sizeof(uint8_t)}, - data, - free_when_done - ); + size_t pointer = py::cast(array.attr("ctypes").attr("data")); + void* raw_data = (void*)pointer; + + py::str py_name(name_nbytes.first); + py_container[py_name] = array; + cpp_container[name_nbytes.first] = raw_data; } - return from_buffers(builder.form(), builder.length(), container); + // Write non-contiguous contents to memory. + builder.to_buffers(cpp_container); + + // Build Python dictionary containing arrays. + return ak.attr("from_buffers")(builder.form(), builder.length(), py_container); }