Skip to content

Commit

Permalink
Add is_min_exact/is_max_exact
Browse files Browse the repository at this point in the history
  • Loading branch information
kou committed Jul 11, 2024
1 parent c2ba4ed commit b99948d
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 3 deletions.
13 changes: 10 additions & 3 deletions cpp/src/arrow/array/statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,23 @@ struct ARROW_EXPORT ArrayStatistics {
/// \brief The number of distinct values, may not be set
std::optional<int64_t> distinct_count = std::nullopt;

/// \brief The current minimum value buffer, may not be set
/// \brief The minimum value buffer, may not be set
std::optional<ElementBufferType> min_buffer = std::nullopt;

/// \brief The current maximum value buffer, may not be set
/// \brief Whether the minimum value is exact or not, may not be set
std::optional<bool> is_min_exact = std::nullopt;

/// \brief The maximum value buffer, may not be set
std::optional<ElementBufferType> max_buffer = std::nullopt;

/// \brief Whether the maximum value is exact or not, may not be set
std::optional<bool> is_max_exact = std::nullopt;

/// \brief Check two Statistics for equality
bool Equals(const ArrayStatistics& other) const {
return null_count == other.null_count && distinct_count == other.distinct_count &&
min_buffer == other.min_buffer && max_buffer == other.max_buffer;
min_buffer == other.min_buffer && is_min_exact == other.is_min_exact &&
max_buffer == other.max_buffer && is_max_exact == other.is_max_exact;
}
};

Expand Down
8 changes: 8 additions & 0 deletions cpp/src/parquet/arrow/arrow_statistics_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,12 @@ TEST(TestStatisticsRead, Boolean) {
ASSERT_EQ(false, statistics->distinct_count.has_value());
ASSERT_EQ(true, statistics->min().has_value());
ASSERT_EQ(true, statistics->min().value());
ASSERT_EQ(true, statistics->is_min_exact.has_value());
ASSERT_EQ(true, statistics->is_min_exact.value());
ASSERT_EQ(true, statistics->max().has_value());
ASSERT_EQ(true, statistics->max().value());
ASSERT_EQ(true, statistics->is_min_exact.has_value());
ASSERT_EQ(true, statistics->is_min_exact.value());
}

TEST(TestStatisticsRead, Int8) {
Expand All @@ -206,8 +210,12 @@ TEST(TestStatisticsRead, Int8) {
ASSERT_EQ(false, statistics->distinct_count.has_value());
ASSERT_EQ(true, statistics->min().has_value());
ASSERT_EQ(-1, statistics->min().value());
ASSERT_EQ(true, statistics->is_min_exact.has_value());
ASSERT_EQ(true, statistics->is_min_exact.value());
ASSERT_EQ(true, statistics->max().has_value());
ASSERT_EQ(1, statistics->max().value());
ASSERT_EQ(true, statistics->is_min_exact.has_value());
ASSERT_EQ(true, statistics->is_min_exact.value());
}

} // namespace parquet::arrow
4 changes: 4 additions & 0 deletions cpp/src/parquet/arrow/reader_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,9 @@ Status TransferInt(RecordReader* reader,
auto typed_statistics =
static_cast<::parquet::TypedStatistics<ParquetType>*>(statistics);
array_statistics->min_buffer = static_cast<ArrowCType>(typed_statistics->min());
array_statistics->is_min_exact = true;
array_statistics->max_buffer = static_cast<ArrowCType>(typed_statistics->max());
array_statistics->is_max_exact = true;
}
}
auto array = std::make_shared<ArrayType<ArrowType>>(std::move(array_data));
Expand Down Expand Up @@ -412,7 +414,9 @@ Status TransferBool(RecordReader* reader,
if (statistics->HasMinMax()) {
auto bool_statistics = static_cast<::parquet::BoolStatistics*>(statistics);
array_statistics->min_buffer = bool_statistics->min();
array_statistics->is_min_exact = true;
array_statistics->max_buffer = bool_statistics->max();
array_statistics->is_max_exact = true;
}
}
auto array = std::make_shared<BooleanArray>(std::move(array_data));
Expand Down

0 comments on commit b99948d

Please sign in to comment.