4
4
#include < cstddef>
5
5
#include < algorithm>
6
6
7
- namespace arcticdb {
7
+ # include < arcticdb/util/vector_common.hpp >
8
8
9
- template <typename T>
10
- struct is_supported_float : std::false_type {};
9
+ namespace arcticdb {
11
10
12
11
template <typename T>
13
12
using vector_type __attribute__ ((vector_size(64 ))) = T;
14
13
15
- template <> struct is_supported_float <float > : std::true_type {};
16
- template <> struct is_supported_float <double > : std::true_type {};
17
-
18
14
template <typename T>
19
15
class FloatMinFinder {
20
16
static_assert (is_supported_float<T>::value, " Type must be float or double" );
21
17
static_assert (std::is_floating_point_v<T>, " Type must be floating point" );
22
18
23
19
public:
24
- static T find (const T * data, size_t n) {
25
- using vec_t __attribute__ (( vector_size ( 64 ))) = T ;
20
+ static T find (const T* data, size_t n) {
21
+ using vec_t = vector_type<T> ;
26
22
23
+ // Initialize min vector with infinity
27
24
vec_t vmin;
28
- for (size_t i = 0 ; i < sizeof (vec_t ) / sizeof (T); i++) {
29
- reinterpret_cast <T *>(&vmin)[i] = std::numeric_limits<T>::infinity ();
25
+ for (size_t i = 0 ; i < sizeof (vec_t )/ sizeof (T); i++) {
26
+ reinterpret_cast <T*>(&vmin)[i] = std::numeric_limits<T>::infinity ();
30
27
}
31
28
32
- const vec_t *vdata = reinterpret_cast <const vec_t *>(data);
29
+ // Process full vectors
30
+ const vec_t * vdata = reinterpret_cast <const vec_t *>(data);
33
31
const size_t elements_per_vector = sizeof (vec_t ) / sizeof (T);
34
32
const size_t vlen = n / elements_per_vector;
35
33
36
- for (size_t i = 0 ; i < vlen; i++) {
34
+ // Main SIMD loop
35
+ for (size_t i = 0 ; i < vlen; i++) {
37
36
vec_t v = vdata[i];
38
- vec_t mask = v == v; // !NaN
39
- vec_t valid = v & mask;
40
- vec_t replaced = vmin & ~mask;
41
- v = valid | replaced;
42
37
vmin = (v < vmin) ? v : vmin;
43
38
}
44
39
40
+ // Reduce vector to scalar
45
41
T min_val = std::numeric_limits<T>::infinity ();
46
- const T * min_arr = reinterpret_cast <const T *>(&vmin);
47
- for (size_t i = 0 ; i < elements_per_vector; i++) {
42
+ const T* min_arr = reinterpret_cast <const T*>(&vmin);
43
+ for (size_t i = 0 ; i < elements_per_vector; i++) {
48
44
if (min_arr[i] == min_arr[i]) { // Not NaN
49
45
min_val = std::min (min_val, min_arr[i]);
50
46
}
51
47
}
52
48
53
- const T *remain = data + (vlen * elements_per_vector);
54
- for (size_t i = 0 ; i < n % elements_per_vector; i++) {
49
+ // Handle remainder
50
+ const T* remain = data + (vlen * elements_per_vector);
51
+ for (size_t i = 0 ; i < n % elements_per_vector; i++) {
55
52
if (remain[i] == remain[i]) { // Not NaN
56
53
min_val = std::min (min_val, remain[i]);
57
54
}
@@ -67,41 +64,38 @@ class FloatMaxFinder {
67
64
static_assert (std::is_floating_point_v<T>, " Type must be floating point" );
68
65
69
66
public:
70
- static T find (const T * data, size_t n) {
67
+ static T find (const T* data, size_t n) {
71
68
using vec_t = vector_type<T>;
72
69
73
70
// Initialize max vector with negative infinity
74
71
vec_t vmax;
75
- for (size_t i = 0 ; i < sizeof (vec_t ) / sizeof (T); i++) {
76
- reinterpret_cast <T *>(&vmax)[i] = -std::numeric_limits<T>::infinity ();
72
+ for (size_t i = 0 ; i < sizeof (vec_t )/ sizeof (T); i++) {
73
+ reinterpret_cast <T*>(&vmax)[i] = -std::numeric_limits<T>::infinity ();
77
74
}
78
75
79
- const vec_t *vdata = reinterpret_cast <const vec_t *>(data);
76
+ // Process full vectors
77
+ const vec_t * vdata = reinterpret_cast <const vec_t *>(data);
80
78
const size_t elements_per_vector = sizeof (vec_t ) / sizeof (T);
81
79
const size_t vlen = n / elements_per_vector;
82
80
83
81
// Main SIMD loop
84
- for (size_t i = 0 ; i < vlen; i++) {
82
+ for (size_t i = 0 ; i < vlen; i++) {
85
83
vec_t v = vdata[i];
86
- // Create mask for non-NaN values
87
- vec_t mask = v == v; // false for NaN
88
- vec_t valid = v & mask;
89
- vec_t replaced = vmax & ~mask;
90
- v = valid | replaced;
91
- // Vector max
92
84
vmax = (v > vmax) ? v : vmax;
93
85
}
94
86
87
+ // Reduce vector to scalar
95
88
T max_val = -std::numeric_limits<T>::infinity ();
96
- const T * max_arr = reinterpret_cast <const T *>(&vmax);
97
- for (size_t i = 0 ; i < elements_per_vector; i++) {
89
+ const T* max_arr = reinterpret_cast <const T*>(&vmax);
90
+ for (size_t i = 0 ; i < elements_per_vector; i++) {
98
91
if (max_arr[i] == max_arr[i]) { // Not NaN
99
92
max_val = std::max (max_val, max_arr[i]);
100
93
}
101
94
}
102
95
103
- const T *remain = data + (vlen * elements_per_vector);
104
- for (size_t i = 0 ; i < n % elements_per_vector; i++) {
96
+ // Handle remainder
97
+ const T* remain = data + (vlen * elements_per_vector);
98
+ for (size_t i = 0 ; i < n % elements_per_vector; i++) {
105
99
if (remain[i] == remain[i]) { // Not NaN
106
100
max_val = std::max (max_val, remain[i]);
107
101
}
0 commit comments