Skip to content

Commit

Permalink
apacheGH-41684: [C++][Python] Add optional null_bitmap to MapArray::F…
Browse files Browse the repository at this point in the history
…romArrays (apache#41757)

### Rationale for this change

When constructing a `MapArray` with `FromArrays` one can not supply a `null_bitmap`.

### What changes are included in this PR?

Optional `null_bitmap` argument is added to `MapArray::FromArrays`.

### Are these changes tested?

TODO (have them locally, need to clean them up and commit.

### Are there any user-facing changes?

No.
* GitHub Issue: apache#41684

Authored-by: AlenkaF <frim.alenka@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
  • Loading branch information
AlenkaF committed May 31, 2024
1 parent 31fe24d commit 255dbf9
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 22 deletions.
17 changes: 17 additions & 0 deletions cpp/src/arrow/array/array_list_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1368,6 +1368,23 @@ TEST_F(TestMapArray, FromArrays) {
ASSERT_EQ(keys_with_null->length(), tmp_items->length());
ASSERT_RAISES(Invalid,
MapArray::FromArrays(offsets1, keys_with_null, tmp_items, pool_));

// With null_bitmap
ASSERT_OK_AND_ASSIGN(auto map7, MapArray::FromArrays(offsets1, keys, items, pool_,
offsets3->data()->buffers[0]));
ASSERT_OK(map7->Validate());
MapArray expected7(map_type, length, offsets1->data()->buffers[1], keys, items,
offsets3->data()->buffers[0], 1);
AssertArraysEqual(expected7, *map7);

// Null bitmap and offset with null
ASSERT_RAISES(Invalid, MapArray::FromArrays(offsets3, keys, items, pool_,
offsets3->data()->buffers[0]));

// Null bitmap and offset with offset
ASSERT_RAISES(NotImplemented,
MapArray::FromArrays(offsets3->Slice(2), keys, items, pool_,
offsets3->data()->buffers[0]));
}

TEST_F(TestMapArray, FromArraysEquality) {
Expand Down
45 changes: 31 additions & 14 deletions cpp/src/arrow/array/array_nested.cc
Original file line number Diff line number Diff line change
Expand Up @@ -807,7 +807,7 @@ MapArray::MapArray(const std::shared_ptr<DataType>& type, int64_t length,
Result<std::shared_ptr<Array>> MapArray::FromArraysInternal(
std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
MemoryPool* pool) {
MemoryPool* pool, const std::shared_ptr<Buffer>& null_bitmap) {
using offset_type = typename MapType::offset_type;
using OffsetArrowType = typename CTypeTraits<offset_type>::ArrowType;

Expand All @@ -827,6 +827,15 @@ Result<std::shared_ptr<Array>> MapArray::FromArraysInternal(
return Status::Invalid("Map key and item arrays must be equal length");
}

if (null_bitmap != nullptr && offsets->null_count() > 0) {
return Status::Invalid(
"Ambiguous to specify both validity map and offsets with nulls");
}

if (null_bitmap != nullptr && offsets->offset() != 0) {
return Status::NotImplemented("Null bitmap with offsets slice not supported.");
}

if (offsets->null_count() > 0) {
ARROW_ASSIGN_OR_RAISE(auto buffers,
CleanListOffsets<MapType>(NULLPTR, *offsets, pool));
Expand All @@ -836,24 +845,32 @@ Result<std::shared_ptr<Array>> MapArray::FromArraysInternal(

using OffsetArrayType = typename TypeTraits<OffsetArrowType>::ArrayType;
const auto& typed_offsets = checked_cast<const OffsetArrayType&>(*offsets);
auto buffers = BufferVector({nullptr, typed_offsets.values()});

BufferVector buffers;
int64_t null_count;
if (null_bitmap != nullptr) {
buffers = BufferVector({std::move(null_bitmap), typed_offsets.values()});
null_count = null_bitmap->size();
} else {
buffers = BufferVector({null_bitmap, typed_offsets.values()});
null_count = 0;
}
return std::make_shared<MapArray>(type, offsets->length() - 1, std::move(buffers), keys,
items, /*null_count=*/0, offsets->offset());
items, /*null_count=*/null_count, offsets->offset());
}

Result<std::shared_ptr<Array>> MapArray::FromArrays(const std::shared_ptr<Array>& offsets,
const std::shared_ptr<Array>& keys,
const std::shared_ptr<Array>& items,
MemoryPool* pool) {
Result<std::shared_ptr<Array>> MapArray::FromArrays(
const std::shared_ptr<Array>& offsets, const std::shared_ptr<Array>& keys,
const std::shared_ptr<Array>& items, MemoryPool* pool,
const std::shared_ptr<Buffer>& null_bitmap) {
return FromArraysInternal(std::make_shared<MapType>(keys->type(), items->type()),
offsets, keys, items, pool);
offsets, keys, items, pool, null_bitmap);
}

Result<std::shared_ptr<Array>> MapArray::FromArrays(std::shared_ptr<DataType> type,
const std::shared_ptr<Array>& offsets,
const std::shared_ptr<Array>& keys,
const std::shared_ptr<Array>& items,
MemoryPool* pool) {
Result<std::shared_ptr<Array>> MapArray::FromArrays(
std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
MemoryPool* pool, const std::shared_ptr<Buffer>& null_bitmap) {
if (type->id() != Type::MAP) {
return Status::TypeError("Expected map type, got ", type->ToString());
}
Expand All @@ -864,7 +881,7 @@ Result<std::shared_ptr<Array>> MapArray::FromArrays(std::shared_ptr<DataType> ty
if (!map_type.item_type()->Equals(items->type())) {
return Status::TypeError("Mismatching map items type");
}
return FromArraysInternal(std::move(type), offsets, keys, items, pool);
return FromArraysInternal(std::move(type), offsets, keys, items, pool, null_bitmap);
}

Status MapArray::ValidateChildData(
Expand Down
9 changes: 6 additions & 3 deletions cpp/src/arrow/array/array_nested.h
Original file line number Diff line number Diff line change
Expand Up @@ -532,15 +532,18 @@ class ARROW_EXPORT MapArray : public ListArray {
/// \param[in] keys Array containing key values
/// \param[in] items Array containing item values
/// \param[in] pool MemoryPool in case new offsets array needs to be
/// \param[in] null_bitmap Optional validity bitmap
/// allocated because of null values
static Result<std::shared_ptr<Array>> FromArrays(
const std::shared_ptr<Array>& offsets, const std::shared_ptr<Array>& keys,
const std::shared_ptr<Array>& items, MemoryPool* pool = default_memory_pool());
const std::shared_ptr<Array>& items, MemoryPool* pool = default_memory_pool(),
const std::shared_ptr<Buffer>& null_bitmap = NULLPTR);

static Result<std::shared_ptr<Array>> FromArrays(
std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
MemoryPool* pool = default_memory_pool());
MemoryPool* pool = default_memory_pool(),
const std::shared_ptr<Buffer>& null_bitmap = NULLPTR);

const MapType* map_type() const { return map_type_; }

Expand All @@ -560,7 +563,7 @@ class ARROW_EXPORT MapArray : public ListArray {
static Result<std::shared_ptr<Array>> FromArraysInternal(
std::shared_ptr<DataType> type, const std::shared_ptr<Array>& offsets,
const std::shared_ptr<Array>& keys, const std::shared_ptr<Array>& items,
MemoryPool* pool);
MemoryPool* pool, const std::shared_ptr<Buffer>& null_bitmap = NULLPTR);

private:
const MapType* map_type_;
Expand Down
11 changes: 8 additions & 3 deletions python/pyarrow/array.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -3060,7 +3060,7 @@ cdef class MapArray(ListArray):
"""

@staticmethod
def from_arrays(offsets, keys, items, DataType type=None, MemoryPool pool=None):
def from_arrays(offsets, keys, items, DataType type=None, MemoryPool pool=None, mask=None):
"""
Construct MapArray from arrays of int32 offsets and key, item arrays.
Expand All @@ -3072,6 +3072,8 @@ cdef class MapArray(ListArray):
type : DataType, optional
If not specified, a default MapArray with the keys' and items' type is used.
pool : MemoryPool
mask : Array (boolean type), optional
Indicate which values are null (True) or not null (False).
Returns
-------
Expand Down Expand Up @@ -3153,24 +3155,27 @@ cdef class MapArray(ListArray):
cdef:
Array _offsets, _keys, _items
shared_ptr[CArray] out
shared_ptr[CBuffer] c_mask
cdef CMemoryPool* cpool = maybe_unbox_memory_pool(pool)

_offsets = asarray(offsets, type='int32')
_keys = asarray(keys)
_items = asarray(items)

c_mask = c_mask_inverted_from_obj(mask, pool)

if type is not None:
with nogil:
out = GetResultValue(
CMapArray.FromArraysAndType(
type.sp_type, _offsets.sp_array,
_keys.sp_array, _items.sp_array, cpool))
_keys.sp_array, _items.sp_array, cpool, c_mask))
else:
with nogil:
out = GetResultValue(
CMapArray.FromArrays(_offsets.sp_array,
_keys.sp_array,
_items.sp_array, cpool))
_items.sp_array, cpool, c_mask))
cdef Array result = pyarrow_wrap_array(out)
result.validate()
return result
Expand Down
8 changes: 6 additions & 2 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -823,15 +823,19 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
const shared_ptr[CArray]& offsets,
const shared_ptr[CArray]& keys,
const shared_ptr[CArray]& items,
CMemoryPool* pool)
CMemoryPool* pool,
const shared_ptr[CBuffer] null_bitmap,
)

@staticmethod
CResult[shared_ptr[CArray]] FromArraysAndType" FromArrays"(
shared_ptr[CDataType],
const shared_ptr[CArray]& offsets,
const shared_ptr[CArray]& keys,
const shared_ptr[CArray]& items,
CMemoryPool* pool)
CMemoryPool* pool,
const shared_ptr[CBuffer] null_bitmap,
)

shared_ptr[CArray] keys()
shared_ptr[CArray] items()
Expand Down
34 changes: 34 additions & 0 deletions python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1079,6 +1079,40 @@ def test_map_from_arrays():
pa.int64()
))

# pass in null bitmap with type
result = pa.MapArray.from_arrays([0, 2, 2, 6], keys, items, pa.map_(
keys.type,
items.type),
mask=pa.array([False, True, False], type=pa.bool_())
)
assert result.equals(expected)

# pass in null bitmap without the type
result = pa.MapArray.from_arrays([0, 2, 2, 6], keys, items,
mask=pa.array([False, True, False],
type=pa.bool_())
)
assert result.equals(expected)

# error if null bitmap and offsets with nulls passed
msg1 = 'Ambiguous to specify both validity map and offsets with nulls'
with pytest.raises(pa.ArrowInvalid, match=msg1):
pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
keys.type,
items.type),
mask=pa.array([False, True, False], type=pa.bool_())
)

# error if null bitmap passed to sliced offset
msg2 = 'Null bitmap with offsets slice not supported.'
offsets = pa.array(offsets, pa.int32())
with pytest.raises(pa.ArrowNotImplementedError, match=msg2):
pa.MapArray.from_arrays(offsets.slice(2), keys, items, pa.map_(
keys.type,
items.type),
mask=pa.array([False, True, False], type=pa.bool_())
)

# check invalid usage
offsets = [0, 1, 3, 5]
keys = np.arange(5)
Expand Down

0 comments on commit 255dbf9

Please sign in to comment.