man-group · willdealtry · Dec 16, 2024 · Jan 14, 2025 · Mar 4, 2025 · Mar 5, 2025
@@ -383,6 +383,11 @@ set(arcticdb_srcs
         util/type_traits.hpp
         util/variant.hpp
         util/gil_safe_py_none.hpp
+        util/min_max_integer.hpp
+        util/mean.hpp
+        util/min_max_float.hpp
+        util/sum.hpp
+        util/vector_common.hpp
         version/de_dup_map.hpp
         version/op_log.hpp
         version/schema_checks.hpp
@@ -760,8 +765,8 @@ if (SSL_LINK)
     find_package(OpenSSL REQUIRED)
     list(APPEND arcticdb_core_libraries OpenSSL::SSL)
     if (NOT WIN32)
-        list(APPEND arcticdb_core_libraries ${KERBEROS_LIBRARY})
-        list(APPEND arcticdb_core_includes  ${KERBEROS_INCLUDE_DIR})
+        #list(APPEND arcticdb_core_libraries ${KERBEROS_LIBRARY})
+        #list(APPEND arcticdb_core_includes  ${KERBEROS_INCLUDE_DIR})
     endif()
 endif ()
 target_link_libraries(arcticdb_core_object PUBLIC ${arcticdb_core_libraries})
@@ -979,6 +984,10 @@ if(${TEST})
             util/test/test_storage_lock.cpp
             util/test/test_string_pool.cpp
             util/test/test_string_utils.cpp
+            util/test/test_min_max_float.cpp
+            util/test/test_min_max_integer.cpp
+            util/test/test_sum.cpp
+            util/test/test_mean.cpp
             util/test/test_tracing_allocator.cpp
             version/test/test_append.cpp
             version/test/test_key_block.cpp
@@ -991,8 +1000,7 @@ if(${TEST})
             version/test/test_version_map_batch.cpp
             version/test/test_version_store.cpp
             version/test/version_map_model.hpp
-            python/python_handlers.cpp
-)
+            python/python_handlers.cpp)
 
     set(EXECUTABLE_PERMS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) # 755
 
@@ -1097,7 +1105,7 @@ if(${TEST})
             util/test/rapidcheck_string_pool.cpp
             util/test/rapidcheck_main.cpp
             util/test/rapidcheck_lru_cache.cpp
-            version/test/rapidcheck_version_map.cpp)
+            version/test/rapidcheck_version_map.cpp util/test/test_min_max_integer.cpp)
 
     add_executable(arcticdb_rapidcheck_tests ${rapidcheck_srcs})
     install(TARGETS arcticdb_rapidcheck_tests RUNTIME

@@ -15,7 +15,6 @@
 namespace arcticdb {
 
 struct MemBlock {
-    static const size_t Align = 128;
     static const size_t MinSize = 64;
     using magic_t = arcticdb::util::MagicNum<'M', 'e', 'm', 'b'>;
     magic_t magic_;
@@ -140,17 +139,21 @@ struct MemBlock {
     bool owns_external_data_ = false;
 
     static const size_t HeaderDataSize =
-            sizeof(magic_) +   // 8 bytes
-            sizeof(bytes_) +   // 8 bytes
-            sizeof(capacity_) +   // 8 bytes
+        sizeof(magic_) +
+            sizeof(bytes_) +
+            sizeof(capacity_) +
             sizeof(external_data_) +
             sizeof(offset_) +
-            sizeof(timestamp_) + 
+            sizeof(timestamp_) +
             sizeof(owns_external_data_);
 
-    uint8_t pad[Align - HeaderDataSize];
-    static const size_t HeaderSize = HeaderDataSize + sizeof(pad);
-    static_assert(HeaderSize == Align);
-    uint8_t data_[MinSize];
+    static const size_t DataAlignment = 64;
+    static const size_t PadSize = (DataAlignment - (HeaderDataSize % DataAlignment)) % DataAlignment;
+
+    uint8_t pad[PadSize];
+    static const size_t HeaderSize = HeaderDataSize + PadSize;
+    static_assert(HeaderSize % DataAlignment == 0, "Header size must be aligned to 64 bytes");
+
+    alignas(DataAlignment) uint8_t data_[MinSize];
 };
 }
@@ -68,7 +68,7 @@ std::vector<ChunkedBufferImpl<BlockSize>> split(const ChunkedBufferImpl<BlockSiz
 }
 
 template std::vector<ChunkedBufferImpl<64>> split(const ChunkedBufferImpl<64>& input, size_t nbytes);
-template std::vector<ChunkedBufferImpl<3968>> split(const ChunkedBufferImpl<3968>& input, size_t nbytes);
+template std::vector<ChunkedBufferImpl<4032ul>> split(const ChunkedBufferImpl<4032ul>& input, size_t nbytes);
 
 // Inclusive of start_byte, exclusive of end_byte
 template <size_t BlockSize>
@@ -112,6 +112,6 @@ ChunkedBufferImpl<BlockSize> truncate(const ChunkedBufferImpl<BlockSize>& input,
 }
 
 template ChunkedBufferImpl<64> truncate(const ChunkedBufferImpl<64>& input, size_t start_byte, size_t end_byte);
-template ChunkedBufferImpl<3968> truncate(const ChunkedBufferImpl<3968>& input, size_t start_byte, size_t end_byte);
+template ChunkedBufferImpl<4032ul> truncate(const ChunkedBufferImpl<4032ul>& input, size_t start_byte, size_t end_byte);
 
 } //namespace arcticdb
@@ -40,7 +40,6 @@ class ChunkedBufferImpl {
 
     using BlockType = MemBlock;
 
-    static_assert(sizeof(BlockType) == BlockType::Align + BlockType::MinSize);
     static_assert(DefaultBlockSize >= BlockType::MinSize);
 
   public:

@@ -6,7 +6,6 @@
  */
 
 #include <gtest/gtest.h>
-#include <cstdint>
 #include <limits>
 
 #include <arcticdb/column_store/memory_segment.hpp>
@@ -410,7 +409,7 @@ TEST(ColumnStats, MultipleBlocks) {
     EXPECT_TRUE(stats.has_max());
     EXPECT_TRUE(stats.has_unique());
 
-    EXPECT_EQ(single_col.buffer().num_blocks(), 2017);
+    EXPECT_EQ(single_col.buffer().num_blocks(), 1985);
 
     EXPECT_EQ(stats.get_min<uint64_t>(), 0);
     EXPECT_EQ(stats.get_max<int32_t>(), 999'999);

@@ -19,8 +19,7 @@ class ColumnDataRandomAccessorTest : public testing::Test {
         input_data.resize(n);
         std::iota(input_data.begin(), input_data.end(), 42);
     }
-    // 3968 bytes == 496 int64s per block, so 3 blocks here
-    size_t n{1000};
+    size_t n{1100};
     std::vector<int64_t> input_data;
     TypeDescriptor type_descriptor{static_cast<TypeDescriptor>(TDT{})};
 };

@@ -138,7 +138,6 @@ folly::Future<S3Result<std::monostate>> S3ClientTestWrapper::delete_object(
 
 // Using a fixed page size since it's only being used for simple tests.
 // If we ever need to configure it we should move it to the s3 proto config instead.
-constexpr auto page_size = 10;
 S3Result<ListObjectsOutput> S3ClientTestWrapper::list_objects(
         const std::string& name_prefix,
         const std::string& bucket_name,

@@ -0,0 +1,123 @@
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+#include <cstddef>
+
+#include <arcticdb/util/vector_common.hpp>
+#include <arcticdb/util/preconditions.hpp>
+
+namespace arcticdb {
+
+#if HAS_VECTOR_EXTENSIONS
+
+template<typename Vec, typename U>
+inline void fill_vector(Vec &vec, U value) {
+    U* p = reinterpret_cast<U*>(&vec);
+    constexpr size_t count = sizeof(Vec) / sizeof(U);
+    for (size_t i = 0; i < count; i++) {
+        p[i] = value;
+    }
+}
+
+template<typename T>
+class MeanFinder {
+    static_assert(is_supported_int<T>::value || is_supported_float<T>::value, "Unsupported type");
+public:
+    static double find(const T* __restrict data, size_t n) {
+        util::check(n > 0, "Empty array provided");
+        using VectorType = vector_type<T>;
+        using AccumVectorType = vector_type<double>;
+        constexpr size_t elements_per_vector = sizeof(VectorType) / sizeof(T);
+        constexpr size_t doubles_per_vector = sizeof(AccumVectorType) / sizeof(double);
+        constexpr size_t vectors_per_acc = elements_per_vector / doubles_per_vector;
+        AccumVectorType vsum;
+        double* vsum_ptr = reinterpret_cast<double*>(&vsum);
+        fill_vector<AccumVectorType, double>(vsum, 0.0);
+        size_t valid_count = 0;
+        const VectorType* vdata = reinterpret_cast<const VectorType*>(data);
+        const size_t vector_len = n / elements_per_vector;
+        for (size_t i = 0; i < vector_len; i++) {
+            VectorType v = vdata[i];
+            const T* v_arr = reinterpret_cast<const T*>(&v);
+            if constexpr (std::is_floating_point_v<T>) {
+                bool has_nan = false;
+                for (size_t j = 0; j < elements_per_vector; j++) {
+                    if (std::isnan(v_arr[j])) { has_nan = true; break; }
+                }
+                if (!has_nan) {
+                    for (size_t chunk = 0; chunk < vectors_per_acc; chunk++) {
+                        size_t base = chunk * doubles_per_vector;
+                        for (size_t j = 0; j < doubles_per_vector; j++) {
+                            if constexpr (std::is_same_v<T, double>)
+                                vsum_ptr[j] += v_arr[base + j];
+                            else
+                                vsum_ptr[j] += static_cast<double>(v_arr[base + j]);
+                        }
+                    }
+                    valid_count += elements_per_vector;
+                } else {
+                    for (size_t chunk = 0; chunk < vectors_per_acc; chunk++) {
+                        size_t base = chunk * doubles_per_vector;
+                        for (size_t j = 0; j < doubles_per_vector; j++) {
+                            size_t idx = base + j;
+                            if (!std::isnan(v_arr[idx])) {
+                                if constexpr (std::is_same_v<T, double>)
+                                    vsum_ptr[j] += v_arr[idx];
+                                else
+                                    vsum_ptr[j] += static_cast<double>(v_arr[idx]);
+                                valid_count++;
+                            }
+                        }
+                    }
+                }
+            } else {
+                for (size_t chunk = 0; chunk < vectors_per_acc; chunk++) {
+                    size_t base = chunk * doubles_per_vector;
+                    for (size_t j = 0; j < doubles_per_vector; j++) {
+                        vsum_ptr[j] += static_cast<double>(v_arr[base + j]);
+                    }
+                }
+                valid_count += elements_per_vector;
+            }
+        }
+        double total = 0.0;
+        for (size_t j = 0; j < doubles_per_vector; j++) {
+            total += vsum_ptr[j];
+        }
+        const T* remain = data + (vector_len * elements_per_vector);
+        size_t rem = n % elements_per_vector;
+        for (size_t i = 0; i < rem; i++) {
+            if constexpr (std::is_floating_point_v<T>) {
+                if (!std::isnan(remain[i])) {
+                    if constexpr (std::is_same_v<T, double>)
+                        total += remain[i];
+                    else
+                        total += static_cast<double>(remain[i]);
+                    valid_count++;
+                }
+            } else {
+                total += static_cast<double>(remain[i]);
+                valid_count++;
+            }
+        }
+        return valid_count > 0 ? total / static_cast<double>(valid_count) : 0.0;
+    }
+};
+
+template<typename T>
+double find_mean(const T* __restrict data, size_t n) {
+    return MeanFinder<T>::find(data, n);
+}
+
+#else
+
+template <typename T, typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>
+double find_mean(const T* data, std::size_t size) {
+    util::check(size > 0, "Cannot compute mean of an empty array.");
+    double sum = std::accumulate(data, data + size, 0.0);
+    return sum / size;
+}
+
+#endif
+
+} // namespace arcticdb
@@ -0,0 +1,93 @@
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+#include <cstddef>
+#include <algorithm>
+
+#include <arcticdb/util/vector_common.hpp>
+
+namespace arcticdb {
+
+#if HAS_VECTOR_EXTENSIONS
+
+template<typename T, typename Comparator>
+class FloatExtremumFinder {
+    static_assert(is_supported_float<T>::value, "Type must be float or double");
+    static_assert(std::is_floating_point_v<T>, "Type must be floating point");
+public:
+    static T find(const T* data, size_t n) {
+        if (n == 0)
+            return Comparator::identity();
+        using vec_t = vector_type<T>;
+        constexpr size_t lane_count = sizeof(vec_t) / sizeof(T);
+        vec_t vext;
+        for (size_t i = 0; i < lane_count; i++)
+            reinterpret_cast<T*>(&vext)[i] = Comparator::identity();
+
+        const vec_t* vdata = reinterpret_cast<const vec_t*>(data);
+        size_t vlen = n / lane_count;
+        for (size_t i = 0; i < vlen; i++) {
+            vec_t v = vdata[i];
+            if constexpr (Comparator::is_min)
+                vext = (v < vext) ? v : vext;
+            else
+                vext = (v > vext) ? v : vext;
+        }
+        T result = Comparator::identity();
+        const T* lanes = reinterpret_cast<const T*>(&vext);
+        for (size_t i = 0; i < lane_count; i++) {
+            if (lanes[i] == lanes[i])
+                result = Comparator::compare(lanes[i], result);
+        }
+        const T* remain = data + (vlen * lane_count);
+        size_t remain_count = n % lane_count;
+        for (size_t i = 0; i < remain_count; i++) {
+            if (remain[i] == remain[i])
+                result = Comparator::compare(remain[i], result);
+        }
+        return result;
+    }
+};
+
+template<typename T>
+struct FloatMinComparator {
+    static constexpr bool is_min = true;
+    static T identity() { return std::numeric_limits<T>::infinity(); }
+    static T compare(T a, T b) { return std::min(a, b); }
+};
+
+template<typename T>
+struct FloatMaxComparator {
+    static constexpr bool is_min = false;
+    static T identity() { return -std::numeric_limits<T>::infinity(); }
+    static T compare(T a, T b) { return std::max(a, b); }
+};
+
+template<typename T>
+T find_float_min(const T* data, size_t n) {
+    return FloatExtremumFinder<T, FloatMinComparator<T>>::find(data, n);
+}
+
+template<typename T>
+T find_float_max(const T* data, size_t n) {
+    return FloatExtremumFinder<T, FloatMaxComparator<T>>::find(data, n);
+}
+
+#else
+
+template<typename T>
+typename std::enable_if<std::is_floating_point<T>::value, T>::type
+find_float_min(const T *data, size_t n) {
+    util::check(size != 0, "Got zero size in find_float_min");
+    return *std::min_element(data, data + n);
+}
+
+template<typename T>
+typename std::enable_if<std::is_floating_point<T>::value, T>::type
+find_float_max(const T *data, size_t n) {
+    util::check(size != 0, "Got zero size in find_float_max");
+    return *std::max_element(data, data + n);
+}
+
+#endif
+} // namespace arcticdb
-Original file line number
+Diff line change
@@ Expand Up / @@ -40,7 +40,6 @@ class ChunkedBufferImpl { @@
         using BlockType = MemBlock;
-        static_assert(sizeof(BlockType) == BlockType::Align + BlockType::MinSize);
         static_assert(DefaultBlockSize >= BlockType::MinSize);
       public:
@@ Expand Down @@