Skip to content

Commit

Permalink
Move and swap functions for ArrowArray and ArrowSchema
Browse files Browse the repository at this point in the history
  • Loading branch information
JohanMabille committed Jan 31, 2025
1 parent b0794ca commit 3bbd038
Show file tree
Hide file tree
Showing 8 changed files with 348 additions and 136 deletions.
34 changes: 34 additions & 0 deletions include/sparrow/arrow_interface/arrow_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,26 @@ namespace sparrow
return array;
}

inline ArrowArray make_empty_arrow_array()
{
using buffer_type = arrow_array_private_data::BufferType;
return make_arrow_array(0, 0, 0, buffer_type{}, 0u, nullptr, nullptr);
}

SPARROW_API void release_arrow_array(ArrowArray* array);

SPARROW_API sparrow::buffer_view<uint8_t>
get_bitmap_buffer(const ArrowArray& array);

SPARROW_API std::vector<sparrow::buffer_view<uint8_t>>
get_arrow_array_buffers(const ArrowArray& array, const ArrowSchema& schema);

/**
* Swaps the contents of the two ArrowArray objects.
*/
SPARROW_API void
swap(ArrowArray& lhs, ArrowArray& rhs);

/**
* Fill the target ArrowArray with a deep copy of the data from the source ArrowArray.
*/
Expand All @@ -154,6 +169,25 @@ namespace sparrow
return target;
}

/**
* Moves the content of source into a stack-allocated array, and
* reset the source to an empty ArrowArray.
*/
inline ArrowArray move_array(ArrowArray&& source)
{
ArrowArray target = make_empty_arrow_array();
swap(source, target);
return target;
}

/**
* Moves the content of source into a stack-allocated array, and
* reset the source to an empty ArrowArray.
*/
inline ArrowArray move_array(ArrowArray& source)
{
return move_array(std::move(source));
}
};

#if defined(__cpp_lib_format)
Expand Down
33 changes: 32 additions & 1 deletion include/sparrow/arrow_interface/arrow_schema.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,17 @@ namespace sparrow
return schema;
};

inline ArrowSchema make_empty_arrow_schema()
{
return make_arrow_schema(std::string_view("n"), "", "", std::nullopt, 0, nullptr, nullptr);
}

/**
* Swaps the contents of the two ArrowSchema objects.
*/
SPARROW_API void
swap(ArrowSchema& lhs, ArrowSchema& rhs);

/**
* Fills the target `ArrowSchema` with a deep copy of the data from the source `ArrowSchema`.
*/
Expand All @@ -166,6 +177,26 @@ namespace sparrow
copy_schema(source, target);
return target;
}

/**
* Moves the content of source into a stack-allocated array, and
* reset the source to an empty ArrowSchema.
*/
inline ArrowSchema move_schema(ArrowSchema&& source)
{
ArrowSchema target = make_empty_arrow_schema();
swap(source, target);
return target;
}

/**
* Moves the content of source into a stack-allocated array, and
* reset the source to an empty ArrowSchema.
*/
inline ArrowSchema move_schema(ArrowSchema& source)
{
return move_schema(std::move(source));
}
}

#if defined(__cpp_lib_format)
Expand All @@ -192,7 +223,7 @@ struct std::formatter<ArrowSchema>

return std::format_to(
ctx.out(),
"ArrowArray - ptr address: {}\n- format: {}\n- name: {}\n- metadata: {}\n- flags: {}\n- n_children: {}\n- children: {}\n- dictionary: {}\n- release: {}\n- private_data: {}\n",
"ArrowSchema - ptr address: {}\n- format: {}\n- name: {}\n- metadata: {}\n- flags: {}\n- n_children: {}\n- children: {}\n- dictionary: {}\n- release: {}\n- private_data: {}\n",
static_cast<const void*>(&obj),
format,
name,
Expand Down
32 changes: 26 additions & 6 deletions src/arrow_interface/arrow_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,25 @@ namespace sparrow
return const_cast<T*>(static_cast<const T*>(ptr));
}

sparrow::buffer_view<uint8_t>
get_bitmap_buffer(const ArrowArray& array)
{
using buffer_view_type = sparrow::buffer_view<uint8_t>;
const auto size = static_cast<size_t>(array.length + array.offset);
const auto buffer_size = static_cast<size_t>(size + 7) / 8;
auto typed_buffer_ptr = static_const_ptr_cast<uint8_t>(array.buffers[0]);
return typed_buffer_ptr != nullptr ? buffer_view_type(typed_buffer_ptr, buffer_size)
: buffer_view_type(nullptr, 0);
}

std::vector<sparrow::buffer_view<uint8_t>>
get_arrow_array_buffers(const ArrowArray& array, const ArrowSchema& schema)
{
using buffer_view_type = sparrow::buffer_view<uint8_t>;
const auto size = static_cast<size_t>(array.length + array.offset);
auto make_valid_buffer = [&]()
{
const auto buffer_size = static_cast<size_t>(size + 7) / 8;
auto typed_buffer_ptr = static_const_ptr_cast<uint8_t>(array.buffers[0]);
return typed_buffer_ptr != nullptr ? buffer_view_type(typed_buffer_ptr, buffer_size)
: buffer_view_type(nullptr, 0);
return get_bitmap_buffer(array);
};
auto make_buffer = [&](auto index, auto size)
{
Expand Down Expand Up @@ -158,6 +166,20 @@ namespace sparrow
return {};
}

void swap(ArrowArray& lhs, ArrowArray& rhs)
{
std::swap(lhs.length, rhs.length);
std::swap(lhs.null_count, rhs.null_count);
std::swap(lhs.offset, rhs.offset);
std::swap(lhs.n_buffers, rhs.n_buffers);
std::swap(lhs.n_children, rhs.n_children);
std::swap(lhs.buffers, rhs.buffers);
std::swap(lhs.children, rhs.children);
std::swap(lhs.dictionary, rhs.dictionary);
std::swap(lhs.release, rhs.release);
std::swap(lhs.private_data, rhs.private_data);
}

void copy_array(const ArrowArray& source_array, const ArrowSchema& source_schema, ArrowArray& target)
{
SPARROW_ASSERT_TRUE(&source_array != &target);
Expand Down Expand Up @@ -206,6 +228,4 @@ namespace sparrow
target.buffers = private_data->buffers_ptrs<void>();
target.release = release_arrow_array;
}


}
13 changes: 13 additions & 0 deletions src/arrow_interface/arrow_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,19 @@ namespace sparrow
SPARROW_ASSERT_TRUE(schema->release == std::addressof(empty_release_arrow_schema));
}

void swap(ArrowSchema& lhs, ArrowSchema& rhs)
{
std::swap(lhs.format, rhs.format);
std::swap(lhs.name, rhs.name);
std::swap(lhs.metadata, rhs.metadata);
std::swap(lhs.flags, rhs.flags);
std::swap(lhs.n_children, rhs.n_children);
std::swap(lhs.children, rhs.children);
std::swap(lhs.dictionary, rhs.dictionary);
std::swap(lhs.release, rhs.release);
std::swap(lhs.private_data, rhs.private_data);
}

void copy_schema(const ArrowSchema& source, ArrowSchema& target)
{
SPARROW_ASSERT_TRUE(&source != &target);
Expand Down
113 changes: 76 additions & 37 deletions test/arrow_array_schema_creation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,50 +22,89 @@

#include "external_array_data_creation.hpp"

inline std::pair<ArrowArray, ArrowSchema> make_external_arrow_schema_and_array()
namespace test
{
std::pair<ArrowArray, ArrowSchema> pair;
constexpr size_t size = 10;
constexpr size_t offset = 1;
sparrow::test::fill_external_schema_and_array<uint32_t>(pair.second, pair.first, size, offset, {2, 3});
return pair;
}
using buffer_type = sparrow::buffer<uint8_t>;
using buffer_list = std::vector<buffer_type>;

namespace detail
{
inline void fill_sparrow_array_schema(ArrowArray& array, ArrowSchema& schema)
namespace detail
{
static constexpr std::size_t number_children = 4;

inline buffer_list get_test_buffer_list0()
{
buffer_list res = {
buffer_type({0xF3, 0xFF}),
buffer_type({0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
};
return res;
}

inline buffer_list get_test_buffer_list1()
{
buffer_list res = {
buffer_type({0xF3}),
buffer_type({9, 8, 7, 6, 5})
};
return res;
}
}

inline ArrowArray make_arrow_array(bool with_children)
{
ArrowArray res;
if (with_children)
{
auto nb_children = detail::number_children;
auto children = new ArrowArray*[nb_children];
for (std::size_t i = 0; i < nb_children; ++i)
{
children[i] = new ArrowArray(make_arrow_array(false));
}
auto dict = new ArrowArray(make_arrow_array(false));
sparrow::fill_arrow_array(res, 5, 2, 0, detail::get_test_buffer_list1(), nb_children, children, dict);
}
else
{
sparrow::fill_arrow_array(res, 10, 2, 0, detail::get_test_buffer_list0(), 0, nullptr, nullptr);
}
return res;
}

inline ArrowSchema make_arrow_schema(bool with_children)
{
using namespace std::literals;
sparrow::fill_arrow_schema(
schema,
sparrow::data_type_to_format(sparrow::data_type::UINT8),
"test"sv,
"test metadata"sv,
std::nullopt,
0,
nullptr,
nullptr
);
std::vector<sparrow::buffer<std::uint8_t>> buffers_dummy = {
sparrow::buffer<std::uint8_t>({0xF3, 0xFF}),
sparrow::buffer<std::uint8_t>({0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
};
sparrow::fill_arrow_array(array, 10, 2, 0, buffers_dummy, 0, nullptr, nullptr);
ArrowSchema res;
if (with_children)
{
ArrowSchema** children = new ArrowSchema*[detail::number_children];
auto nb_children = static_cast<int64_t>(detail::number_children);
for (size_t i = 0; i < detail::number_children; ++i)
{
children[i] = new ArrowSchema(make_arrow_schema(false));
}
auto dict = new ArrowSchema(make_arrow_schema(false));
sparrow::fill_arrow_schema(res, "c"sv, "with_children"sv, "meta1"sv, std::nullopt, nb_children, children, dict);
}
else
{
sparrow::fill_arrow_schema(res, "c"sv, "no_children"sv, "meta0"sv, std::nullopt, 0, nullptr, nullptr);
}
return res;
}
}

inline sparrow::arrow_array_and_schema_pointers make_sparrow_arrow_schema_and_array_pointers()
{
ArrowArray* array = new ArrowArray{};
ArrowSchema* schema = new ArrowSchema{};
detail::fill_sparrow_array_schema(*array, *schema);
return {array, schema};
inline sparrow::arrow_array_and_schema make_arrow_schema_and_array(bool with_children)
{
return { make_arrow_array(with_children), make_arrow_schema(with_children) };
}
}

inline sparrow::arrow_array_and_schema make_sparrow_arrow_schema_and_array()
inline std::pair<ArrowArray, ArrowSchema> make_external_arrow_schema_and_array()
{
ArrowArray array;
ArrowSchema schema;
detail::fill_sparrow_array_schema(array, schema);
return {std::move(array), std::move(schema)};
std::pair<ArrowArray, ArrowSchema> pair;
constexpr size_t size = 10;
constexpr size_t offset = 1;
sparrow::test::fill_external_schema_and_array<uint32_t>(pair.second, pair.first, size, offset, {2, 3});
return pair;
}

Loading

0 comments on commit 3bbd038

Please sign in to comment.