Skip to content

Commit 5e46644

Browse files
committed
Vectorized min and max
1 parent 8d2e2f5 commit 5e46644

14 files changed

+1463
-17
lines changed

cpp/arcticdb/CMakeLists.txt

+11-4
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,11 @@ set(arcticdb_srcs
383383
util/type_traits.hpp
384384
util/variant.hpp
385385
util/gil_safe_py_none.hpp
386+
util/min_max_integer.hpp
387+
util/mean.hpp
388+
util/min_max_float.hpp
389+
util/sum.hpp
390+
util/vector_common.hpp
386391
version/de_dup_map.hpp
387392
version/op_log.hpp
388393
version/schema_checks.hpp
@@ -760,8 +765,8 @@ if (SSL_LINK)
760765
find_package(OpenSSL REQUIRED)
761766
list(APPEND arcticdb_core_libraries OpenSSL::SSL)
762767
if (NOT WIN32)
763-
list(APPEND arcticdb_core_libraries ${KERBEROS_LIBRARY})
764-
list(APPEND arcticdb_core_includes ${KERBEROS_INCLUDE_DIR})
768+
#list(APPEND arcticdb_core_libraries ${KERBEROS_LIBRARY})
769+
#list(APPEND arcticdb_core_includes ${KERBEROS_INCLUDE_DIR})
765770
endif()
766771
endif ()
767772
target_link_libraries(arcticdb_core_object PUBLIC ${arcticdb_core_libraries})
@@ -956,7 +961,6 @@ if(${TEST})
956961
stream/test/stream_test_common.cpp
957962
stream/test/test_aggregator.cpp
958963
stream/test/test_incompletes.cpp
959-
stream/test/test_append_map.cpp
960964
stream/test/test_protobuf_mappings.cpp
961965
stream/test/test_row_builder.cpp
962966
stream/test/test_segment_aggregator.cpp
@@ -980,6 +984,9 @@ if(${TEST})
980984
util/test/test_storage_lock.cpp
981985
util/test/test_string_pool.cpp
982986
util/test/test_string_utils.cpp
987+
util/test/test_min_max_float.cpp
988+
util/test/test_sum.cpp
989+
util/test/test_mean.cpp
983990
util/test/test_tracing_allocator.cpp
984991
version/test/test_append.cpp
985992
version/test/test_key_block.cpp
@@ -1097,7 +1104,7 @@ if(${TEST})
10971104
util/test/rapidcheck_string_pool.cpp
10981105
util/test/rapidcheck_main.cpp
10991106
util/test/rapidcheck_lru_cache.cpp
1100-
version/test/rapidcheck_version_map.cpp)
1107+
version/test/rapidcheck_version_map.cpp util/test/test_min_max_integer.cpp)
11011108

11021109
add_executable(arcticdb_rapidcheck_tests ${rapidcheck_srcs})
11031110
install(TARGETS arcticdb_rapidcheck_tests RUNTIME

cpp/arcticdb/column_store/block.hpp

+12-9
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
namespace arcticdb {
1616

1717
struct MemBlock {
18-
static const size_t Align = 128;
1918
static const size_t MinSize = 64;
2019
using magic_t = arcticdb::util::MagicNum<'M', 'e', 'm', 'b'>;
2120
magic_t magic_;
@@ -140,17 +139,21 @@ struct MemBlock {
140139
bool owns_external_data_ = false;
141140

142141
static const size_t HeaderDataSize =
143-
sizeof(magic_) + // 8 bytes
144-
sizeof(bytes_) + // 8 bytes
145-
sizeof(capacity_) + // 8 bytes
142+
sizeof(magic_) +
143+
sizeof(bytes_) +
144+
sizeof(capacity_) +
146145
sizeof(external_data_) +
147146
sizeof(offset_) +
148-
sizeof(timestamp_) +
147+
sizeof(timestamp_) +
149148
sizeof(owns_external_data_);
150149

151-
uint8_t pad[Align - HeaderDataSize];
152-
static const size_t HeaderSize = HeaderDataSize + sizeof(pad);
153-
static_assert(HeaderSize == Align);
154-
uint8_t data_[MinSize];
150+
static const size_t DataAlignment = 64;
151+
static const size_t PadSize = (DataAlignment - (HeaderDataSize % DataAlignment)) % DataAlignment;
152+
153+
uint8_t pad[PadSize];
154+
static const size_t HeaderSize = HeaderDataSize + PadSize;
155+
static_assert(HeaderSize % DataAlignment == 0, "Header size must be aligned to 64 bytes");
156+
157+
alignas(DataAlignment) uint8_t data_[MinSize];
155158
};
156159
}

cpp/arcticdb/column_store/chunked_buffer.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ std::vector<ChunkedBufferImpl<BlockSize>> split(const ChunkedBufferImpl<BlockSiz
6868
}
6969

7070
template std::vector<ChunkedBufferImpl<64>> split(const ChunkedBufferImpl<64>& input, size_t nbytes);
71-
template std::vector<ChunkedBufferImpl<3968>> split(const ChunkedBufferImpl<3968>& input, size_t nbytes);
71+
template std::vector<ChunkedBufferImpl<4032ul>> split(const ChunkedBufferImpl<4032ul>& input, size_t nbytes);
7272

7373
// Inclusive of start_byte, exclusive of end_byte
7474
template <size_t BlockSize>
@@ -112,6 +112,6 @@ ChunkedBufferImpl<BlockSize> truncate(const ChunkedBufferImpl<BlockSize>& input,
112112
}
113113

114114
template ChunkedBufferImpl<64> truncate(const ChunkedBufferImpl<64>& input, size_t start_byte, size_t end_byte);
115-
template ChunkedBufferImpl<3968> truncate(const ChunkedBufferImpl<3968>& input, size_t start_byte, size_t end_byte);
115+
template ChunkedBufferImpl<4032ul> truncate(const ChunkedBufferImpl<4032ul>& input, size_t start_byte, size_t end_byte);
116116

117117
} //namespace arcticdb

cpp/arcticdb/column_store/chunked_buffer.hpp

-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ class ChunkedBufferImpl {
4040

4141
using BlockType = MemBlock;
4242

43-
static_assert(sizeof(BlockType) == BlockType::Align + BlockType::MinSize);
4443
static_assert(DefaultBlockSize >= BlockType::MinSize);
4544

4645
public:

cpp/arcticdb/storage/s3/s3_client_wrapper.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,6 @@ folly::Future<S3Result<std::monostate>> S3ClientTestWrapper::delete_object(
138138

139139
// Using a fixed page size since it's only being used for simple tests.
140140
// If we ever need to configure it we should move it to the s3 proto config instead.
141-
constexpr auto page_size = 10;
142141
S3Result<ListObjectsOutput> S3ClientTestWrapper::list_objects(
143142
const std::string& name_prefix,
144143
const std::string& bucket_name,

cpp/arcticdb/util/mean.hpp

+81
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#include <cstdint>
2+
#include <limits>
3+
#include <type_traits>
4+
#include <cstddef>
5+
6+
#include <arcticdb/util/vector_common.hpp>
7+
8+
namespace arcticdb {
9+
10+
template<typename T>
11+
class MeanFinder {
12+
static_assert(is_supported_int<T>::value || is_supported_float<T>::value, "Unsupported type");
13+
14+
public:
15+
static double find(const T* data, size_t n) {
16+
using VectorType = vector_type<T>;
17+
using AccumVectorType = vector_type<double>;
18+
19+
AccumVectorType vsum = {0.0};
20+
const size_t elements_per_vector = sizeof(VectorType) / sizeof(T);
21+
const size_t doubles_per_vector = sizeof(AccumVectorType) / sizeof(double);
22+
const size_t vectors_per_acc = elements_per_vector / doubles_per_vector;
23+
24+
size_t valid_count = 0;
25+
26+
const auto* vdata = reinterpret_cast<const VectorType*>(data);
27+
const size_t vector_len = n / elements_per_vector;
28+
29+
for(size_t i = 0; i < vector_len; i++) {
30+
VectorType v = vdata[i];
31+
32+
if constexpr(std::is_floating_point_v<T>) {
33+
VectorType mask = v == v;
34+
v = v & mask;
35+
36+
const T* mask_arr = reinterpret_cast<const T*>(&mask);
37+
for(size_t j = 0; j < elements_per_vector; j++) {
38+
if(mask_arr[j] != 0) valid_count++;
39+
}
40+
} else {
41+
valid_count += elements_per_vector;
42+
}
43+
44+
const T* v_arr = reinterpret_cast<const T*>(&v);
45+
for(size_t chunk = 0; chunk < vectors_per_acc; chunk++) {
46+
for(size_t j = 0; j < doubles_per_vector; j++) {
47+
size_t idx = chunk * doubles_per_vector + j;
48+
reinterpret_cast<double*>(&vsum)[j] += static_cast<double>(v_arr[idx]);
49+
}
50+
}
51+
}
52+
53+
double total = 0.0;
54+
const auto* sum_arr = reinterpret_cast<const double*>(&vsum);
55+
for(size_t i = 0; i < doubles_per_vector; i++) {
56+
total += sum_arr[i];
57+
}
58+
59+
const T* remain = data + (vector_len * elements_per_vector);
60+
for(size_t i = 0; i < n % elements_per_vector; i++) {
61+
if constexpr(std::is_floating_point_v<T>) {
62+
if (remain[i] == remain[i]) { // Not NaN
63+
total += static_cast<double>(remain[i]);
64+
valid_count++;
65+
}
66+
} else {
67+
total += static_cast<double>(remain[i]);
68+
valid_count++;
69+
}
70+
}
71+
72+
return valid_count > 0 ? total / static_cast<double>(valid_count) : 0.0;
73+
}
74+
};
75+
76+
template<typename T>
77+
double find_mean(const T *data, size_t n) {
78+
return MeanFinder<T>::find(data, n);
79+
}
80+
81+
} // namespace arcticdb

cpp/arcticdb/util/min_max_float.hpp

+118
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
#include <cstdint>
2+
#include <limits>
3+
#include <type_traits>
4+
#include <cstddef>
5+
#include <algorithm>
6+
7+
#include <arcticdb/util/vector_common.hpp>
8+
9+
namespace arcticdb {
10+
11+
template<typename T>
12+
using vector_type __attribute__((vector_size(64))) = T;
13+
14+
template<typename T>
15+
class FloatMinFinder {
16+
static_assert(is_supported_float<T>::value, "Type must be float or double");
17+
static_assert(std::is_floating_point_v<T>, "Type must be floating point");
18+
19+
public:
20+
static T find(const T* data, size_t n) {
21+
using vec_t = vector_type<T>;
22+
23+
// Initialize min vector with infinity
24+
vec_t vmin;
25+
for(size_t i = 0; i < sizeof(vec_t)/sizeof(T); i++) {
26+
reinterpret_cast<T*>(&vmin)[i] = std::numeric_limits<T>::infinity();
27+
}
28+
29+
// Process full vectors
30+
const vec_t* vdata = reinterpret_cast<const vec_t*>(data);
31+
const size_t elements_per_vector = sizeof(vec_t) / sizeof(T);
32+
const size_t vlen = n / elements_per_vector;
33+
34+
// Main SIMD loop
35+
for(size_t i = 0; i < vlen; i++) {
36+
vec_t v = vdata[i];
37+
vmin = (v < vmin) ? v : vmin;
38+
}
39+
40+
// Reduce vector to scalar
41+
T min_val = std::numeric_limits<T>::infinity();
42+
const T* min_arr = reinterpret_cast<const T*>(&vmin);
43+
for(size_t i = 0; i < elements_per_vector; i++) {
44+
if (min_arr[i] == min_arr[i]) { // Not NaN
45+
min_val = std::min(min_val, min_arr[i]);
46+
}
47+
}
48+
49+
// Handle remainder
50+
const T* remain = data + (vlen * elements_per_vector);
51+
for(size_t i = 0; i < n % elements_per_vector; i++) {
52+
if (remain[i] == remain[i]) { // Not NaN
53+
min_val = std::min(min_val, remain[i]);
54+
}
55+
}
56+
57+
return min_val;
58+
}
59+
};
60+
61+
template<typename T>
62+
class FloatMaxFinder {
63+
static_assert(is_supported_float<T>::value, "Type must be float or double");
64+
static_assert(std::is_floating_point_v<T>, "Type must be floating point");
65+
66+
public:
67+
static T find(const T* data, size_t n) {
68+
using vec_t = vector_type<T>;
69+
70+
// Initialize max vector with negative infinity
71+
vec_t vmax;
72+
for(size_t i = 0; i < sizeof(vec_t)/sizeof(T); i++) {
73+
reinterpret_cast<T*>(&vmax)[i] = -std::numeric_limits<T>::infinity();
74+
}
75+
76+
// Process full vectors
77+
const vec_t* vdata = reinterpret_cast<const vec_t*>(data);
78+
const size_t elements_per_vector = sizeof(vec_t) / sizeof(T);
79+
const size_t vlen = n / elements_per_vector;
80+
81+
// Main SIMD loop
82+
for(size_t i = 0; i < vlen; i++) {
83+
vec_t v = vdata[i];
84+
vmax = (v > vmax) ? v : vmax;
85+
}
86+
87+
// Reduce vector to scalar
88+
T max_val = -std::numeric_limits<T>::infinity();
89+
const T* max_arr = reinterpret_cast<const T*>(&vmax);
90+
for(size_t i = 0; i < elements_per_vector; i++) {
91+
if (max_arr[i] == max_arr[i]) { // Not NaN
92+
max_val = std::max(max_val, max_arr[i]);
93+
}
94+
}
95+
96+
// Handle remainder
97+
const T* remain = data + (vlen * elements_per_vector);
98+
for(size_t i = 0; i < n % elements_per_vector; i++) {
99+
if (remain[i] == remain[i]) { // Not NaN
100+
max_val = std::max(max_val, remain[i]);
101+
}
102+
}
103+
104+
return max_val;
105+
}
106+
};
107+
108+
template<typename T>
109+
T find_float_min(const T *data, size_t n) {
110+
return FloatMinFinder<T>::find(data, n);
111+
}
112+
113+
template<typename T>
114+
T find_float_max(const T *data, size_t n) {
115+
return FloatMaxFinder<T>::find(data, n);
116+
}
117+
118+
} // namespace arcticdb

0 commit comments

Comments
 (0)