From 5bede24a5a476adfac3d92bb660f5b76f543a863 Mon Sep 17 00:00:00 2001 From: APKAI5AM767AKRTSIELQ Date: Fri, 26 Aug 2022 13:29:55 -0400 Subject: [PATCH] #610: SampleArray util method replaced by using arrow::compute::Take and pass corresponding indices to it. Note: 1)Commented code that was deprecated by the resolution of this issue 2) Note comments in sample_binary_array function related to result status. --- cpp/src/cylon/util/arrow_utils.cpp | 65 +++++++++++++++++------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/cpp/src/cylon/util/arrow_utils.cpp b/cpp/src/cylon/util/arrow_utils.cpp index cd1883a9c..0c963cd97 100644 --- a/cpp/src/cylon/util/arrow_utils.cpp +++ b/cpp/src/cylon/util/arrow_utils.cpp @@ -166,7 +166,7 @@ arrow::Status Duplicate(const std::shared_ptr &table, arrow::Memor return arrow::Status::OK(); } -template +/*template static inline arrow::Status sample_fixed_size_array(const std::shared_ptr &ch_array, uint64_t num_samples, std::shared_ptr &out, @@ -215,7 +215,7 @@ static inline arrow::Status sample_fixed_size_array(const std::shared_ptr static inline arrow::Status sample_binary_array(const std::shared_ptr &ch_array, @@ -275,32 +275,41 @@ arrow::Status SampleArray(const std::shared_ptr &arr, uint64_t num_samples, std::shared_ptr &out, arrow::MemoryPool *pool) { - switch (arr->type()->id()) { - case arrow::Type::BOOL:return sample_fixed_size_array(arr, num_samples, out, pool); - case arrow::Type::UINT8:return sample_fixed_size_array(arr, num_samples, out, pool); - case arrow::Type::INT8:return sample_fixed_size_array(arr, num_samples, out, pool); - case arrow::Type::UINT16:return sample_fixed_size_array(arr, num_samples, out, pool); - case arrow::Type::INT16:return sample_fixed_size_array(arr, num_samples, out, pool); - case arrow::Type::UINT32:return sample_fixed_size_array(arr, num_samples, out, pool); - case arrow::Type::INT32:return sample_fixed_size_array(arr, num_samples, out, pool); - case arrow::Type::UINT64:return sample_fixed_size_array(arr, num_samples, out, pool); - case arrow::Type::INT64:return sample_fixed_size_array(arr, num_samples, out, pool); - case arrow::Type::FLOAT:return sample_fixed_size_array(arr, num_samples, out, pool); - case arrow::Type::DOUBLE:return sample_fixed_size_array(arr, num_samples, out, pool); - case arrow::Type::DATE32:return sample_fixed_size_array(arr, num_samples, out, pool); - case arrow::Type::DATE64:return sample_fixed_size_array(arr, num_samples, out, pool); - case arrow::Type::TIMESTAMP:return sample_fixed_size_array(arr, num_samples, out, pool); - case arrow::Type::TIME32:return sample_fixed_size_array(arr, num_samples, out, pool); - case arrow::Type::TIME64:return sample_fixed_size_array(arr, num_samples, out, pool); - case arrow::Type::STRING: return sample_binary_array(arr, num_samples, out, pool); - case arrow::Type::BINARY:return sample_binary_array(arr, num_samples, out, pool); - case arrow::Type::FIXED_SIZE_BINARY: - return sample_fixed_size_array(arr, - num_samples, - out, - pool); - default:return arrow::Status(arrow::StatusCode::Invalid, "unsupported type"); - } + + auto result = arrow::compute::Take(out, arr); + if (result.ok()) { + return result.status(); + } else { + //could just return result.status() but would this break clients? + return arrow::Status(arrow::StatusCode::Invalid, "unsupported type"); + } + + /*switch (arr->type()->id()) { + case arrow::Type::BOOL:return sample_fixed_size_array(arr, num_samples, out, pool); + case arrow::Type::UINT8:return sample_fixed_size_array(arr, num_samples, out, pool); + case arrow::Type::INT8:return sample_fixed_size_array(arr, num_samples, out, pool); + case arrow::Type::UINT16:return sample_fixed_size_array(arr, num_samples, out, pool); + case arrow::Type::INT16:return sample_fixed_size_array(arr, num_samples, out, pool); + case arrow::Type::UINT32:return sample_fixed_size_array(arr, num_samples, out, pool); + case arrow::Type::INT32:return sample_fixed_size_array(arr, num_samples, out, pool); + case arrow::Type::UINT64:return sample_fixed_size_array(arr, num_samples, out, pool); + case arrow::Type::INT64:return sample_fixed_size_array(arr, num_samples, out, pool); + case arrow::Type::FLOAT:return sample_fixed_size_array(arr, num_samples, out, pool); + case arrow::Type::DOUBLE:return sample_fixed_size_array(arr, num_samples, out, pool); + case arrow::Type::DATE32:return sample_fixed_size_array(arr, num_samples, out, pool); + case arrow::Type::DATE64:return sample_fixed_size_array(arr, num_samples, out, pool); + case arrow::Type::TIMESTAMP:return sample_fixed_size_array(arr, num_samples, out, pool); + case arrow::Type::TIME32:return sample_fixed_size_array(arr, num_samples, out, pool); + case arrow::Type::TIME64:return sample_fixed_size_array(arr, num_samples, out, pool); + case arrow::Type::STRING: return sample_binary_array(arr, num_samples, out, pool); + case arrow::Type::BINARY:return sample_binary_array(arr, num_samples, out, pool); + case arrow::Type::FIXED_SIZE_BINARY: + return sample_fixed_size_array(arr, + num_samples, + out, + pool); + default:return arrow::Status(arrow::StatusCode::Invalid, "unsupported type"); + }*/ } arrow::Status SampleArray(const std::shared_ptr &arr,