Skip to content

Commit

Permalink
Use Array's ArrayData constructor to assign statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
kou committed Jul 12, 2024
1 parent 2d5ab34 commit c905bfc
Show file tree
Hide file tree
Showing 16 changed files with 232 additions and 167 deletions.
41 changes: 27 additions & 14 deletions cpp/src/arrow/array/array_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,27 +233,31 @@ class ARROW_EXPORT Array {
/// \return DeviceAllocationType
DeviceAllocationType device_type() const { return data_->device_type(); }

/// \brief Set the statistics to this Array
///
/// \param[in] statistics the statistics of this Array
void SetStatistics(std::shared_ptr<ArrayStatistics> statistics) {
statistics_ = std::move(statistics);
}

/// \brief Return the statistics of this Array
///
/// \return std::shared_ptr<ArrayStatistics>
std::shared_ptr<ArrayStatistics> GetStatistics() const { return statistics_; }
/// \return const std::shared_ptr<ArrayStatistics>&
const std::shared_ptr<ArrayStatistics>& statistics() const { return statistics_; }

protected:
Array() = default;
explicit Array(const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR) {
SetData(data);
if (statistics) {
SetStatistics(statistics);
}
}
ARROW_DEFAULT_MOVE_AND_ASSIGN(Array);

std::shared_ptr<ArrayData> data_;
const uint8_t* null_bitmap_data_ = NULLPTR;

/// Protected method for constructors
void SetData(const std::shared_ptr<ArrayData>& data) {
virtual void ValidateData(const std::shared_ptr<ArrayData>& data) {}

/// Protected method for constructors
virtual void SetData(const std::shared_ptr<ArrayData>& data) {
ValidateData(data);
if (data->buffers.size() > 0) {
null_bitmap_data_ = data->GetValuesSafe<uint8_t>(0, /*offset=*/0);
} else {
Expand All @@ -265,6 +269,11 @@ class ARROW_EXPORT Array {
// The statistics for this Array.
std::shared_ptr<ArrayStatistics> statistics_;

/// Protected method for constructors
void SetStatistics(const std::shared_ptr<ArrayStatistics>& statistics) {
statistics_ = statistics;
}

private:
ARROW_DISALLOW_COPY_AND_ASSIGN(Array);

Expand Down Expand Up @@ -296,12 +305,14 @@ class ARROW_EXPORT PrimitiveArray : public FlatArray {
protected:
PrimitiveArray() : raw_values_(NULLPTR) {}

void SetData(const std::shared_ptr<ArrayData>& data) {
void SetData(const std::shared_ptr<ArrayData>& data) override {
this->Array::SetData(data);
raw_values_ = data->GetValuesSafe<uint8_t>(1, /*offset=*/0);
}

explicit PrimitiveArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
explicit PrimitiveArray(const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR)
: FlatArray(data, statistics) {}

const uint8_t* raw_values_;
};
Expand All @@ -311,11 +322,13 @@ class ARROW_EXPORT NullArray : public FlatArray {
public:
using TypeClass = NullType;

explicit NullArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
explicit NullArray(const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR)
: FlatArray(data, statistics) {}
explicit NullArray(int64_t length);

private:
void SetData(const std::shared_ptr<ArrayData>& data) {
void SetData(const std::shared_ptr<ArrayData>& data) override {
null_bitmap_data_ = NULLPTR;
data->null_count = data->length;
data_ = data;
Expand Down
22 changes: 6 additions & 16 deletions cpp/src/arrow/array/array_binary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,8 @@ namespace arrow {

using internal::checked_cast;

BinaryArray::BinaryArray(const std::shared_ptr<ArrayData>& data) {
void BinaryArray::ValidateData(const std::shared_ptr<ArrayData>& data) {
ARROW_CHECK(is_binary_like(data->type->id()));
SetData(data);
}

BinaryArray::BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
Expand All @@ -45,9 +44,8 @@ BinaryArray::BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_of
null_count, offset));
}

LargeBinaryArray::LargeBinaryArray(const std::shared_ptr<ArrayData>& data) {
void LargeBinaryArray::ValidateData(const std::shared_ptr<ArrayData>& data) {
ARROW_CHECK(is_large_binary_like(data->type->id()));
SetData(data);
}

LargeBinaryArray::LargeBinaryArray(int64_t length,
Expand All @@ -59,9 +57,8 @@ LargeBinaryArray::LargeBinaryArray(int64_t length,
null_count, offset));
}

StringArray::StringArray(const std::shared_ptr<ArrayData>& data) {
void StringArray::ValidateData(const std::shared_ptr<ArrayData>& data) {
ARROW_CHECK_EQ(data->type->id(), Type::STRING);
SetData(data);
}

StringArray::StringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
Expand All @@ -74,9 +71,8 @@ StringArray::StringArray(int64_t length, const std::shared_ptr<Buffer>& value_of

Status StringArray::ValidateUTF8() const { return internal::ValidateUTF8(*data_); }

LargeStringArray::LargeStringArray(const std::shared_ptr<ArrayData>& data) {
void LargeStringArray::ValidateData(const std::shared_ptr<ArrayData>& data) {
ARROW_CHECK_EQ(data->type->id(), Type::LARGE_STRING);
SetData(data);
}

LargeStringArray::LargeStringArray(int64_t length,
Expand All @@ -90,9 +86,8 @@ LargeStringArray::LargeStringArray(int64_t length,

Status LargeStringArray::ValidateUTF8() const { return internal::ValidateUTF8(*data_); }

BinaryViewArray::BinaryViewArray(std::shared_ptr<ArrayData> data) {
void BinaryViewArray::ValidateData(const std::shared_ptr<ArrayData>& data) {
ARROW_CHECK_EQ(data->type->id(), Type::BINARY_VIEW);
SetData(std::move(data));
}

BinaryViewArray::BinaryViewArray(std::shared_ptr<DataType> type, int64_t length,
Expand All @@ -110,17 +105,12 @@ std::string_view BinaryViewArray::GetView(int64_t i) const {
return util::FromBinaryView(raw_values_[i], data_buffers);
}

StringViewArray::StringViewArray(std::shared_ptr<ArrayData> data) {
void StringViewArray::ValidateData(const std::shared_ptr<ArrayData>& data) {
ARROW_CHECK_EQ(data->type->id(), Type::STRING_VIEW);
SetData(std::move(data));
}

Status StringViewArray::ValidateUTF8() const { return internal::ValidateUTF8(*data_); }

FixedSizeBinaryArray::FixedSizeBinaryArray(const std::shared_ptr<ArrayData>& data) {
SetData(data);
}

FixedSizeBinaryArray::FixedSizeBinaryArray(const std::shared_ptr<DataType>& type,
int64_t length,
const std::shared_ptr<Buffer>& data,
Expand Down
52 changes: 42 additions & 10 deletions cpp/src/arrow/array/array_binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,12 @@ class BaseBinaryArray : public FlatArray {
protected:
// For subclasses
BaseBinaryArray() = default;
explicit BaseBinaryArray(const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR)
: FlatArray(data, statistics) {}

// Protected method for constructors
void SetData(const std::shared_ptr<ArrayData>& data) {
void SetData(const std::shared_ptr<ArrayData>& data) override {
this->Array::SetData(data);
raw_value_offsets_ = data->GetValuesSafe<offset_type>(1, /*offset=*/0);
raw_data_ = data->GetValuesSafe<uint8_t>(2, /*offset=*/0);
Expand All @@ -155,7 +158,9 @@ class BaseBinaryArray : public FlatArray {
/// Concrete Array class for variable-size binary data
class ARROW_EXPORT BinaryArray : public BaseBinaryArray<BinaryType> {
public:
explicit BinaryArray(const std::shared_ptr<ArrayData>& data);
explicit BinaryArray(const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR)
: BaseBinaryArray(data, statistics) {}

BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
const std::shared_ptr<Buffer>& data,
Expand All @@ -165,14 +170,18 @@ class ARROW_EXPORT BinaryArray : public BaseBinaryArray<BinaryType> {
protected:
// For subclasses such as StringArray
BinaryArray() : BaseBinaryArray() {}

void ValidateData(const std::shared_ptr<ArrayData>& data) override;
};

/// Concrete Array class for variable-size string (utf-8) data
class ARROW_EXPORT StringArray : public BinaryArray {
public:
using TypeClass = StringType;

explicit StringArray(const std::shared_ptr<ArrayData>& data);
explicit StringArray(const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR)
: BinaryArray(data, statistics) {}

StringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
const std::shared_ptr<Buffer>& data,
Expand All @@ -183,12 +192,17 @@ class ARROW_EXPORT StringArray : public BinaryArray {
///
/// This check is also implied by ValidateFull()
Status ValidateUTF8() const;

protected:
void ValidateData(const std::shared_ptr<ArrayData>& data) override;
};

/// Concrete Array class for large variable-size binary data
class ARROW_EXPORT LargeBinaryArray : public BaseBinaryArray<LargeBinaryType> {
public:
explicit LargeBinaryArray(const std::shared_ptr<ArrayData>& data);
explicit LargeBinaryArray(const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR)
: BaseBinaryArray(data, statistics) {}

LargeBinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
const std::shared_ptr<Buffer>& data,
Expand All @@ -198,14 +212,17 @@ class ARROW_EXPORT LargeBinaryArray : public BaseBinaryArray<LargeBinaryType> {
protected:
// For subclasses such as LargeStringArray
LargeBinaryArray() : BaseBinaryArray() {}
void ValidateData(const std::shared_ptr<ArrayData>& data) override;
};

/// Concrete Array class for large variable-size string (utf-8) data
class ARROW_EXPORT LargeStringArray : public LargeBinaryArray {
public:
using TypeClass = LargeStringType;

explicit LargeStringArray(const std::shared_ptr<ArrayData>& data);
explicit LargeStringArray(const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR)
: LargeBinaryArray(data, statistics) {}

LargeStringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
const std::shared_ptr<Buffer>& data,
Expand All @@ -216,6 +233,9 @@ class ARROW_EXPORT LargeStringArray : public LargeBinaryArray {
///
/// This check is also implied by ValidateFull()
Status ValidateUTF8() const;

protected:
void ValidateData(const std::shared_ptr<ArrayData>& data) override;
};

// ----------------------------------------------------------------------
Expand All @@ -229,7 +249,9 @@ class ARROW_EXPORT BinaryViewArray : public FlatArray {
using IteratorType = stl::ArrayIterator<BinaryViewArray>;
using c_type = BinaryViewType::c_type;

explicit BinaryViewArray(std::shared_ptr<ArrayData> data);
explicit BinaryViewArray(const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR)
: FlatArray(data, statistics) {}

BinaryViewArray(std::shared_ptr<DataType> type, int64_t length,
std::shared_ptr<Buffer> views, BufferVector data_buffers,
Expand All @@ -253,8 +275,10 @@ class ARROW_EXPORT BinaryViewArray : public FlatArray {
protected:
using FlatArray::FlatArray;

void SetData(std::shared_ptr<ArrayData> data) {
FlatArray::SetData(std::move(data));
void ValidateData(const std::shared_ptr<ArrayData>& data) override;

void SetData(const std::shared_ptr<ArrayData>& data) override {
FlatArray::SetData(data);
raw_values_ = data_->GetValuesSafe<c_type>(1);
}

Expand All @@ -267,14 +291,19 @@ class ARROW_EXPORT StringViewArray : public BinaryViewArray {
public:
using TypeClass = StringViewType;

explicit StringViewArray(std::shared_ptr<ArrayData> data);
explicit StringViewArray(const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR)
: BinaryViewArray(data, statistics) {}

using BinaryViewArray::BinaryViewArray;

/// \brief Validate that this array contains only valid UTF8 entries
///
/// This check is also implied by ValidateFull()
Status ValidateUTF8() const;

protected:
void ValidateData(const std::shared_ptr<ArrayData>& data) override;
};

// ----------------------------------------------------------------------
Expand All @@ -286,7 +315,10 @@ class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {
using TypeClass = FixedSizeBinaryType;
using IteratorType = stl::ArrayIterator<FixedSizeBinaryArray>;

explicit FixedSizeBinaryArray(const std::shared_ptr<ArrayData>& data);
explicit FixedSizeBinaryArray(
const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR)
: PrimitiveArray(data, statistics) {}

FixedSizeBinaryArray(const std::shared_ptr<DataType>& type, int64_t length,
const std::shared_ptr<Buffer>& data,
Expand Down
9 changes: 6 additions & 3 deletions cpp/src/arrow/array/array_dict.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,14 @@ int64_t DictionaryArray::GetValueIndex(int64_t i) const {
}
}

DictionaryArray::DictionaryArray(const std::shared_ptr<ArrayData>& data)
: dict_type_(checked_cast<const DictionaryType*>(data->type.get())) {
DictionaryArray::DictionaryArray(const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics)
: Array(data, statistics),
dict_type_(checked_cast<const DictionaryType*>(data->type.get())) {}

void DictionaryArray::ValidateData(const std::shared_ptr<ArrayData>& data) {
ARROW_CHECK_EQ(data->type->id(), Type::DICTIONARY);
ARROW_CHECK_NE(data->dictionary, nullptr);
SetData(data);
}

void DictionaryArray::SetData(const std::shared_ptr<ArrayData>& data) {
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/arrow/array/array_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ class ARROW_EXPORT DictionaryArray : public Array {
public:
using TypeClass = DictionaryType;

explicit DictionaryArray(const std::shared_ptr<ArrayData>& data);
explicit DictionaryArray(const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR);

DictionaryArray(const std::shared_ptr<DataType>& type,
const std::shared_ptr<Array>& indices,
Expand Down Expand Up @@ -114,7 +115,8 @@ class ARROW_EXPORT DictionaryArray : public Array {
const DictionaryType* dict_type() const { return dict_type_; }

private:
void SetData(const std::shared_ptr<ArrayData>& data);
void ValidateData(const std::shared_ptr<ArrayData>& data) override;
void SetData(const std::shared_ptr<ArrayData>& data) override;
const DictionaryType* dict_type_;
std::shared_ptr<Array> indices_;

Expand Down
Loading

0 comments on commit c905bfc

Please sign in to comment.