-
Notifications
You must be signed in to change notification settings - Fork 119
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Vector comparisons #2119
base: master
Are you sure you want to change the base?
Vector comparisons #2119
Changes from all commits
aefdffb
16f22a1
06046cc
8ee1854
ecf54d5
a21f35b
75ed893
239e9e4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -383,6 +383,11 @@ set(arcticdb_srcs | |
util/type_traits.hpp | ||
util/variant.hpp | ||
util/gil_safe_py_none.hpp | ||
util/min_max_integer.hpp | ||
util/mean.hpp | ||
util/min_max_float.hpp | ||
util/sum.hpp | ||
util/vector_common.hpp | ||
version/de_dup_map.hpp | ||
version/op_log.hpp | ||
version/schema_checks.hpp | ||
|
@@ -760,8 +765,8 @@ if (SSL_LINK) | |
find_package(OpenSSL REQUIRED) | ||
list(APPEND arcticdb_core_libraries OpenSSL::SSL) | ||
if (NOT WIN32) | ||
list(APPEND arcticdb_core_libraries ${KERBEROS_LIBRARY}) | ||
list(APPEND arcticdb_core_includes ${KERBEROS_INCLUDE_DIR}) | ||
#list(APPEND arcticdb_core_libraries ${KERBEROS_LIBRARY}) | ||
#list(APPEND arcticdb_core_includes ${KERBEROS_INCLUDE_DIR}) | ||
endif() | ||
endif () | ||
target_link_libraries(arcticdb_core_object PUBLIC ${arcticdb_core_libraries}) | ||
|
@@ -979,6 +984,10 @@ if(${TEST}) | |
util/test/test_storage_lock.cpp | ||
util/test/test_string_pool.cpp | ||
util/test/test_string_utils.cpp | ||
util/test/test_min_max_float.cpp | ||
util/test/test_min_max_integer.cpp | ||
util/test/test_sum.cpp | ||
util/test/test_mean.cpp | ||
util/test/test_tracing_allocator.cpp | ||
version/test/test_append.cpp | ||
version/test/test_key_block.cpp | ||
|
@@ -991,8 +1000,7 @@ if(${TEST}) | |
version/test/test_version_map_batch.cpp | ||
version/test/test_version_store.cpp | ||
version/test/version_map_model.hpp | ||
python/python_handlers.cpp | ||
) | ||
python/python_handlers.cpp) | ||
|
||
set(EXECUTABLE_PERMS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) # 755 | ||
|
||
|
@@ -1097,7 +1105,7 @@ if(${TEST}) | |
util/test/rapidcheck_string_pool.cpp | ||
util/test/rapidcheck_main.cpp | ||
util/test/rapidcheck_lru_cache.cpp | ||
version/test/rapidcheck_version_map.cpp) | ||
version/test/rapidcheck_version_map.cpp util/test/test_min_max_integer.cpp) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not needed here, although this code is a good candidate for rapidcheck testing |
||
|
||
add_executable(arcticdb_rapidcheck_tests ${rapidcheck_srcs}) | ||
install(TARGETS arcticdb_rapidcheck_tests RUNTIME | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,7 +15,6 @@ | |
namespace arcticdb { | ||
|
||
struct MemBlock { | ||
static const size_t Align = 128; | ||
static const size_t MinSize = 64; | ||
using magic_t = arcticdb::util::MagicNum<'M', 'e', 'm', 'b'>; | ||
magic_t magic_; | ||
|
@@ -140,17 +139,21 @@ struct MemBlock { | |
bool owns_external_data_ = false; | ||
|
||
static const size_t HeaderDataSize = | ||
sizeof(magic_) + // 8 bytes | ||
sizeof(bytes_) + // 8 bytes | ||
sizeof(capacity_) + // 8 bytes | ||
sizeof(magic_) + | ||
sizeof(bytes_) + | ||
sizeof(capacity_) + | ||
sizeof(external_data_) + | ||
sizeof(offset_) + | ||
sizeof(timestamp_) + | ||
sizeof(timestamp_) + | ||
sizeof(owns_external_data_); | ||
|
||
uint8_t pad[Align - HeaderDataSize]; | ||
static const size_t HeaderSize = HeaderDataSize + sizeof(pad); | ||
static_assert(HeaderSize == Align); | ||
uint8_t data_[MinSize]; | ||
static const size_t DataAlignment = 64; | ||
static const size_t PadSize = (DataAlignment - (HeaderDataSize % DataAlignment)) % DataAlignment; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. mod operator is a no-op, |
||
|
||
uint8_t pad[PadSize]; | ||
static const size_t HeaderSize = HeaderDataSize + PadSize; | ||
static_assert(HeaderSize % DataAlignment == 0, "Header size must be aligned to 64 bytes"); | ||
|
||
alignas(DataAlignment) uint8_t data_[MinSize]; | ||
}; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -68,7 +68,7 @@ std::vector<ChunkedBufferImpl<BlockSize>> split(const ChunkedBufferImpl<BlockSiz | |
} | ||
|
||
template std::vector<ChunkedBufferImpl<64>> split(const ChunkedBufferImpl<64>& input, size_t nbytes); | ||
template std::vector<ChunkedBufferImpl<3968>> split(const ChunkedBufferImpl<3968>& input, size_t nbytes); | ||
template std::vector<ChunkedBufferImpl<4032ul>> split(const ChunkedBufferImpl<4032ul>& input, size_t nbytes); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was this just too small in error before? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If there's any data that has been written into storage in 3968 byte blocks, will they still be readable without further modifications? |
||
|
||
// Inclusive of start_byte, exclusive of end_byte | ||
template <size_t BlockSize> | ||
|
@@ -112,6 +112,6 @@ ChunkedBufferImpl<BlockSize> truncate(const ChunkedBufferImpl<BlockSize>& input, | |
} | ||
|
||
template ChunkedBufferImpl<64> truncate(const ChunkedBufferImpl<64>& input, size_t start_byte, size_t end_byte); | ||
template ChunkedBufferImpl<3968> truncate(const ChunkedBufferImpl<3968>& input, size_t start_byte, size_t end_byte); | ||
template ChunkedBufferImpl<4032ul> truncate(const ChunkedBufferImpl<4032ul>& input, size_t start_byte, size_t end_byte); | ||
|
||
} //namespace arcticdb |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,8 +19,7 @@ class ColumnDataRandomAccessorTest : public testing::Test { | |
input_data.resize(n); | ||
std::iota(input_data.begin(), input_data.end(), 42); | ||
} | ||
// 3968 bytes == 496 int64s per block, so 3 blocks here | ||
size_t n{1000}; | ||
size_t n{1100}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Comment is useful, it's now 504 int64s per block |
||
std::vector<int64_t> input_data; | ||
TypeDescriptor type_descriptor{static_cast<TypeDescriptor>(TDT{})}; | ||
}; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
#include <cstdint> | ||
#include <limits> | ||
#include <type_traits> | ||
#include <cstddef> | ||
|
||
#include <arcticdb/util/vector_common.hpp> | ||
#include <arcticdb/util/preconditions.hpp> | ||
|
||
namespace arcticdb { | ||
|
||
#if HAS_VECTOR_EXTENSIONS | ||
|
||
template<typename Vec, typename U> | ||
inline void fill_vector(Vec &vec, U value) { | ||
U* p = reinterpret_cast<U*>(&vec); | ||
constexpr size_t count = sizeof(Vec) / sizeof(U); | ||
for (size_t i = 0; i < count; i++) { | ||
p[i] = value; | ||
} | ||
} | ||
|
||
template<typename T> | ||
class MeanFinder { | ||
static_assert(is_supported_int<T>::value || is_supported_float<T>::value, "Unsupported type"); | ||
public: | ||
static double find(const T* __restrict data, size_t n) { | ||
util::check(n > 0, "Empty array provided"); | ||
using VectorType = vector_type<T>; | ||
using AccumVectorType = vector_type<double>; | ||
constexpr size_t elements_per_vector = sizeof(VectorType) / sizeof(T); | ||
constexpr size_t doubles_per_vector = sizeof(AccumVectorType) / sizeof(double); | ||
constexpr size_t vectors_per_acc = elements_per_vector / doubles_per_vector; | ||
AccumVectorType vsum; | ||
double* vsum_ptr = reinterpret_cast<double*>(&vsum); | ||
fill_vector<AccumVectorType, double>(vsum, 0.0); | ||
size_t valid_count = 0; | ||
const VectorType* vdata = reinterpret_cast<const VectorType*>(data); | ||
const size_t vector_len = n / elements_per_vector; | ||
for (size_t i = 0; i < vector_len; i++) { | ||
VectorType v = vdata[i]; | ||
const T* v_arr = reinterpret_cast<const T*>(&v); | ||
if constexpr (std::is_floating_point_v<T>) { | ||
bool has_nan = false; | ||
for (size_t j = 0; j < elements_per_vector; j++) { | ||
if (std::isnan(v_arr[j])) { has_nan = true; break; } | ||
} | ||
if (!has_nan) { | ||
for (size_t chunk = 0; chunk < vectors_per_acc; chunk++) { | ||
size_t base = chunk * doubles_per_vector; | ||
for (size_t j = 0; j < doubles_per_vector; j++) { | ||
if constexpr (std::is_same_v<T, double>) | ||
vsum_ptr[j] += v_arr[base + j]; | ||
else | ||
vsum_ptr[j] += static_cast<double>(v_arr[base + j]); | ||
} | ||
} | ||
valid_count += elements_per_vector; | ||
} else { | ||
for (size_t chunk = 0; chunk < vectors_per_acc; chunk++) { | ||
size_t base = chunk * doubles_per_vector; | ||
for (size_t j = 0; j < doubles_per_vector; j++) { | ||
size_t idx = base + j; | ||
if (!std::isnan(v_arr[idx])) { | ||
if constexpr (std::is_same_v<T, double>) | ||
vsum_ptr[j] += v_arr[idx]; | ||
else | ||
vsum_ptr[j] += static_cast<double>(v_arr[idx]); | ||
valid_count++; | ||
} | ||
} | ||
} | ||
} | ||
} else { | ||
for (size_t chunk = 0; chunk < vectors_per_acc; chunk++) { | ||
size_t base = chunk * doubles_per_vector; | ||
for (size_t j = 0; j < doubles_per_vector; j++) { | ||
vsum_ptr[j] += static_cast<double>(v_arr[base + j]); | ||
} | ||
} | ||
valid_count += elements_per_vector; | ||
} | ||
} | ||
double total = 0.0; | ||
for (size_t j = 0; j < doubles_per_vector; j++) { | ||
total += vsum_ptr[j]; | ||
} | ||
const T* remain = data + (vector_len * elements_per_vector); | ||
size_t rem = n % elements_per_vector; | ||
for (size_t i = 0; i < rem; i++) { | ||
if constexpr (std::is_floating_point_v<T>) { | ||
if (!std::isnan(remain[i])) { | ||
if constexpr (std::is_same_v<T, double>) | ||
total += remain[i]; | ||
else | ||
total += static_cast<double>(remain[i]); | ||
valid_count++; | ||
} | ||
} else { | ||
total += static_cast<double>(remain[i]); | ||
valid_count++; | ||
} | ||
} | ||
return valid_count > 0 ? total / static_cast<double>(valid_count) : 0.0; | ||
} | ||
}; | ||
|
||
template<typename T> | ||
double find_mean(const T* __restrict data, size_t n) { | ||
return MeanFinder<T>::find(data, n); | ||
} | ||
|
||
#else | ||
|
||
template <typename T, typename = typename std::enable_if<std::is_arithmetic<T>::value>::type> | ||
double find_mean(const T* data, std::size_t size) { | ||
util::check(size > 0, "Cannot compute mean of an empty array."); | ||
double sum = std::accumulate(data, data + size, 0.0); | ||
return sum / size; | ||
} | ||
|
||
#endif | ||
|
||
} // namespace arcticdb |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
#include <cstdint> | ||
#include <limits> | ||
#include <type_traits> | ||
#include <cstddef> | ||
#include <algorithm> | ||
|
||
#include <arcticdb/util/vector_common.hpp> | ||
|
||
namespace arcticdb { | ||
|
||
#if HAS_VECTOR_EXTENSIONS | ||
|
||
template<typename T, typename Comparator> | ||
class FloatExtremumFinder { | ||
static_assert(is_supported_float<T>::value, "Type must be float or double"); | ||
static_assert(std::is_floating_point_v<T>, "Type must be floating point"); | ||
public: | ||
static T find(const T* data, size_t n) { | ||
if (n == 0) | ||
return Comparator::identity(); | ||
using vec_t = vector_type<T>; | ||
constexpr size_t lane_count = sizeof(vec_t) / sizeof(T); | ||
vec_t vext; | ||
for (size_t i = 0; i < lane_count; i++) | ||
reinterpret_cast<T*>(&vext)[i] = Comparator::identity(); | ||
|
||
const vec_t* vdata = reinterpret_cast<const vec_t*>(data); | ||
size_t vlen = n / lane_count; | ||
for (size_t i = 0; i < vlen; i++) { | ||
vec_t v = vdata[i]; | ||
if constexpr (Comparator::is_min) | ||
vext = (v < vext) ? v : vext; | ||
else | ||
vext = (v > vext) ? v : vext; | ||
} | ||
T result = Comparator::identity(); | ||
const T* lanes = reinterpret_cast<const T*>(&vext); | ||
for (size_t i = 0; i < lane_count; i++) { | ||
if (lanes[i] == lanes[i]) | ||
result = Comparator::compare(lanes[i], result); | ||
} | ||
const T* remain = data + (vlen * lane_count); | ||
size_t remain_count = n % lane_count; | ||
for (size_t i = 0; i < remain_count; i++) { | ||
if (remain[i] == remain[i]) | ||
result = Comparator::compare(remain[i], result); | ||
} | ||
return result; | ||
} | ||
}; | ||
|
||
template<typename T> | ||
struct FloatMinComparator { | ||
static constexpr bool is_min = true; | ||
static T identity() { return std::numeric_limits<T>::infinity(); } | ||
static T compare(T a, T b) { return std::min(a, b); } | ||
}; | ||
|
||
template<typename T> | ||
struct FloatMaxComparator { | ||
static constexpr bool is_min = false; | ||
static T identity() { return -std::numeric_limits<T>::infinity(); } | ||
static T compare(T a, T b) { return std::max(a, b); } | ||
}; | ||
|
||
template<typename T> | ||
T find_float_min(const T* data, size_t n) { | ||
return FloatExtremumFinder<T, FloatMinComparator<T>>::find(data, n); | ||
} | ||
|
||
template<typename T> | ||
T find_float_max(const T* data, size_t n) { | ||
return FloatExtremumFinder<T, FloatMaxComparator<T>>::find(data, n); | ||
} | ||
|
||
#else | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comments below the |
||
|
||
template<typename T> | ||
typename std::enable_if<std::is_floating_point<T>::value, T>::type | ||
find_float_min(const T *data, size_t n) { | ||
util::check(size != 0, "Got zero size in find_float_min"); | ||
Check failure on line 81 in cpp/arcticdb/util/min_max_float.hpp
|
||
return *std::min_element(data, data + n); | ||
} | ||
|
||
template<typename T> | ||
typename std::enable_if<std::is_floating_point<T>::value, T>::type | ||
find_float_max(const T *data, size_t n) { | ||
util::check(size != 0, "Got zero size in find_float_max"); | ||
Check failure on line 88 in cpp/arcticdb/util/min_max_float.hpp
|
||
return *std::max_element(data, data + n); | ||
} | ||
|
||
#endif | ||
} // namespace arcticdb |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Uncomment