Skip to content

Commit 169f4f3

Browse files
authored
Factors SWARWithSublanes to its header (#77)
* Factors SWARWithSublanes to its header * Change not intended for Windows * Omission * Omission #2 * Misspelling * Another misspelling --------- Co-authored-by: Eddie <eddie see email elsewhere>
1 parent 54e6334 commit 169f4f3

File tree

7 files changed

+181
-159
lines changed

7 files changed

+181
-159
lines changed

inc/zoo/map/RobinHoodUtil.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#pragma once
22

3-
#include "zoo/swar/SWAR.h"
3+
#include "zoo/swar/SWARWithSubLanes.h"
44

55
#include <array>
66
#include <cstddef>

inc/zoo/swar/SWAR.h

Lines changed: 0 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -160,109 +160,6 @@ struct SWAR {
160160
T m_v;
161161
};
162162

163-
// SWAR is a useful abstraction for performing computations in lanes overlaid
164-
// over any given integral type.
165-
// Doing additions, subtractions, and compares via SWAR techniques requires an
166-
// extra bit per lane be available past the lane size, _or_ knowledge that both
167-
// of your MSBs are set 0 (leaving space for the operation). Similarly, doing
168-
// multiplications via SWAR techniques require double bits per lane (unless you
169-
// can bind your inputs at half lane size).
170-
// This leads to a useful technique (which we use in the robin hood table)
171-
// where we interleave two related small bit count integers inside of a lane of
172-
// swar. More generally, this is useful because it sometimes allows fast
173-
// operations on side "a" of some lane if side "b" is blitted out, and vice
174-
// versa. In the spirit of separation of concerns, we provide a cut-lane-SWAR
175-
// abstraction here.
176-
177-
template<int NBitsLeast_, int NBitsMost_, typename T = uint64_t>
178-
struct SWARWithSubLanes: SWAR<NBitsLeast_ + NBitsMost_ , T> {
179-
static constexpr inline auto NBitsLeast = NBitsLeast_;
180-
static constexpr inline auto NBitsMost = NBitsMost_;
181-
182-
using Base = SWAR<NBitsMost + NBitsLeast, T>;
183-
static constexpr inline auto Available = sizeof(T);
184-
static constexpr inline auto LaneBits = NBitsLeast + NBitsMost;
185-
186-
using Base::Base;
187-
constexpr SWARWithSubLanes(Base b) noexcept: Base(b) {}
188-
constexpr SWARWithSubLanes(T most, T least) noexcept:
189-
Base((most << NBitsLeast) | least)
190-
{}
191-
192-
// M is most significant bits slice, L is least significant bits slice.
193-
// 0x....M2L2M1L1 or MN|LN||...||M2|L2||M1|L1
194-
using SL = SWARWithSubLanes<NBitsLeast, NBitsMost, T>;
195-
196-
static constexpr inline auto LeastOnes =
197-
Base(meta::BitmaskMaker<T, Base{1}.value(), LaneBits>::value);
198-
static constexpr inline auto MostOnes =
199-
Base(LeastOnes.value() << NBitsLeast);
200-
static constexpr inline auto LeastMask = MostOnes - LeastOnes;
201-
static constexpr inline auto MostMask = ~LeastMask;
202-
203-
constexpr auto least() const noexcept {
204-
return SL{LeastMask & *this};
205-
}
206-
207-
// Isolate the least significant bits of the lane at the specified position.
208-
constexpr auto least(int pos) const noexcept {
209-
constexpr auto Filter = SL((T(1) << NBitsLeast) - 1);
210-
return Filter.shiftLanesLeft(pos) & *this;
211-
}
212-
213-
// Returns only the least significant bits at specified position, 'decoded' to their integer value.
214-
constexpr auto leastFlat(int pos) const noexcept {
215-
return least().at(pos);
216-
}
217-
218-
constexpr auto most() const noexcept {
219-
return SL{MostMask & *this};
220-
}
221-
222-
// The most significant bits of the lane at the specified position.
223-
constexpr auto most(int pos) const noexcept {
224-
constexpr auto Filter =
225-
SL(((T(1) << SL::NBitsMost) - 1) << SL::NBitsLeast);
226-
return Filter.shiftLanesLeft(pos) & *this;
227-
}
228-
229-
// The most significant bits of the lane at the specified position,
230-
// 'decoded' to their integer value.
231-
constexpr auto mostFlat(int pos) const noexcept {
232-
return most().at(pos) >> SL::NBitsLeast;
233-
}
234-
235-
// Blits most sig bits into least significant bits. Experimental.
236-
constexpr auto flattenMostToLeast(int pos) const noexcept {
237-
return SL(this->m_v >> NBitsLeast) & LeastMask;
238-
}
239-
240-
// Blits least sig bits into most significant bits. Experimental.
241-
constexpr auto promoteLeastToMost(int pos) const noexcept {
242-
return SL(this->m_v << NBitsMost) & MostMask;
243-
}
244-
245-
// Sets the lsb sublane at |pos| with least significant NBitsLeast of |in|
246-
constexpr auto least(T in, int pos) const noexcept {
247-
constexpr auto filter = (T(1) << LaneBits) - 1;
248-
const auto keep = ~(filter << (LaneBits * pos)) | MostMask.value();
249-
const auto rdyToInsert = this->m_v & keep;
250-
const auto rval = rdyToInsert | ((in & LeastMask.value()) << (LaneBits * pos));
251-
return SL(rval);
252-
}
253-
254-
// Sets the msb sublane at |pos| with least significant NBitsMost of |in|
255-
constexpr auto most(T in, int pos) const noexcept {
256-
constexpr auto filter = (T(1) << LaneBits) - 1;
257-
const auto keep = ~(filter << (LaneBits * pos)) | LeastMask.value();
258-
const auto rdyToInsert = this->m_v & keep;
259-
const auto insVal = (((in<<NBitsLeast) & MostMask.value()) << (LaneBits * pos));
260-
const auto rval = rdyToInsert | insVal;
261-
return SL(rval);
262-
}
263-
};
264-
265-
266163
/// Defining operator== on base SWAR types is entirely too error prone. Force a verbose invocation.
267164
template<int NBits, typename T = uint64_t>
268165
constexpr auto horizontalEquality(SWAR<NBits, T> left, SWAR<NBits, T> right) {

inc/zoo/swar/SWARWithSubLanes.h

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
#ifndef ZOO_SWAR_SWARWITHSUBLANES_H
2+
#define ZOO_SWAR_SWARWITHSUBLANES_H
3+
4+
#include "zoo/swar/SWAR.h"
5+
6+
namespace zoo { namespace swar {
7+
8+
/// \brief Allows SWAR Lanes to be treated both as a whole or something with
9+
/// internal structure.
10+
11+
/// Example: Robin Hood "Haystack" metadata composed of hoisted hash bits and
12+
/// PSL (probe sequence lengths), that are used together or separately.
13+
/// SWAR is a useful abstraction for performing computations in lanes overlaid
14+
/// over any given integral type.
15+
/// To prevent the normal integer operations in a lane to disrrupt the operation
16+
/// in the adjoining lanes, some precautions must be maintained. For example
17+
/// upon an addition of lanes, we either need that the domain of our values
18+
/// does not use the most significant bit (guaranteeing normal addition of
19+
/// lanes won't cross to the upper lane) or that this possibility is explicitly
20+
/// taken into account (see "full addition"). This applies to all operations,
21+
/// including comparisons.
22+
/// Similarly, doing multiplications via SWAR techniques require double bits per
23+
/// lane (unless you can guarantee the values of the input lanes are half lane
24+
/// size).
25+
/// This leads to a useful technique (which we use in the Robin Hood table)
26+
/// where we interleave two related small bit count integers inside of a lane of
27+
/// swar. More generally, this is useful because it sometimes allows fast
28+
/// operations on side "a" of some lane if side "b" is blitted out, and vice
29+
/// versa. In the spirit of separation of concerns, we provide a cut-lane-SWAR
30+
/// abstraction here.
31+
template<int NBitsLeast_, int NBitsMost_, typename T = uint64_t>
32+
struct SWARWithSubLanes: SWAR<NBitsLeast_ + NBitsMost_ , T> {
33+
static constexpr inline auto NBitsLeast = NBitsLeast_;
34+
static constexpr inline auto NBitsMost = NBitsMost_;
35+
36+
using Base = SWAR<NBitsMost + NBitsLeast, T>;
37+
static constexpr inline auto Available = sizeof(T);
38+
static constexpr inline auto LaneBits = NBitsLeast + NBitsMost;
39+
40+
using Base::Base;
41+
constexpr SWARWithSubLanes(Base b) noexcept: Base(b) {}
42+
constexpr SWARWithSubLanes(T most, T least) noexcept:
43+
Base((most << NBitsLeast) | least)
44+
{}
45+
46+
// M is most significant bits slice, L is least significant bits slice.
47+
// 0x....M2L2M1L1 or MN|LN||...||M2|L2||M1|L1
48+
using SL = SWARWithSubLanes<NBitsLeast, NBitsMost, T>;
49+
50+
static constexpr inline auto LeastOnes =
51+
Base(meta::BitmaskMaker<T, Base{1}.value(), LaneBits>::value);
52+
static constexpr inline auto MostOnes =
53+
Base(LeastOnes.value() << NBitsLeast);
54+
static constexpr inline auto LeastMask = MostOnes - LeastOnes;
55+
static constexpr inline auto MostMask = ~LeastMask;
56+
57+
constexpr auto least() const noexcept {
58+
return SL{LeastMask & *this};
59+
}
60+
61+
// Isolate the least significant bits of the lane at the specified position.
62+
constexpr auto least(int pos) const noexcept {
63+
constexpr auto Filter = SL((T(1) << NBitsLeast) - 1);
64+
return Filter.shiftLanesLeft(pos) & *this;
65+
}
66+
67+
// Returns only the least significant bits at specified position, 'decoded' to their integer value.
68+
constexpr auto leastFlat(int pos) const noexcept {
69+
return least().at(pos);
70+
}
71+
72+
constexpr auto most() const noexcept {
73+
return SL{MostMask & *this};
74+
}
75+
76+
// The most significant bits of the lane at the specified position.
77+
constexpr auto most(int pos) const noexcept {
78+
constexpr auto Filter =
79+
SL(((T(1) << SL::NBitsMost) - 1) << SL::NBitsLeast);
80+
return Filter.shiftLanesLeft(pos) & *this;
81+
}
82+
83+
// The most significant bits of the lane at the specified position,
84+
// 'decoded' to their integer value.
85+
constexpr auto mostFlat(int pos) const noexcept {
86+
return most().at(pos) >> SL::NBitsLeast;
87+
}
88+
89+
// Blits most sig bits into least significant bits. Experimental.
90+
constexpr auto flattenMostToLeast(int pos) const noexcept {
91+
return SL(this->m_v >> NBitsLeast) & LeastMask;
92+
}
93+
94+
// Blits least sig bits into most significant bits. Experimental.
95+
constexpr auto promoteLeastToMost(int pos) const noexcept {
96+
return SL(this->m_v << NBitsMost) & MostMask;
97+
}
98+
99+
// Sets the lsb sublane at |pos| with least significant NBitsLeast of |in|
100+
constexpr auto least(T in, int pos) const noexcept {
101+
constexpr auto filter = (T(1) << LaneBits) - 1;
102+
const auto keep = ~(filter << (LaneBits * pos)) | MostMask.value();
103+
const auto rdyToInsert = this->m_v & keep;
104+
const auto rval = rdyToInsert | ((in & LeastMask.value()) << (LaneBits * pos));
105+
return SL(rval);
106+
}
107+
108+
// Sets the msb sublane at |pos| with least significant NBitsMost of |in|
109+
constexpr auto most(T in, int pos) const noexcept {
110+
constexpr auto filter = (T(1) << LaneBits) - 1;
111+
const auto keep = ~(filter << (LaneBits * pos)) | LeastMask.value();
112+
const auto rdyToInsert = this->m_v & keep;
113+
const auto insVal = (((in<<NBitsLeast) & MostMask.value()) << (LaneBits * pos));
114+
const auto rval = rdyToInsert | insVal;
115+
return SL(rval);
116+
}
117+
};
118+
119+
}}
120+
121+
#endif

inc/zoo/swar/associative_iteration.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ std::ostream &operator<<(std::ostream &out, zoo::swar::SWAR<NB, B> s) {
3535

3636
#else
3737

38-
#define ZOO_TRACEABLE_EXPRESSION(...) __VA_ARGS__
38+
#define ZOO_TRACEABLE_EXPRESSION(...) (void)(__VA_ARGS__)
3939

4040
#endif
4141

@@ -130,7 +130,7 @@ Desired result:
130130
1111 0110 0100 0100 0100 1000 1001 1010 forParallelSuffix
131131
132132
10 1101 1101
133-
/*
133+
134134
Complete example (32 bits)
135135
Selection mask:
136136
0001 0011 0111 0111 0110 1110 1100 1010

test/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ else()
111111
)
112112
set(
113113
SWAR_SOURCES
114-
swar/BasicOperations.cpp
114+
swar/BasicOperations.cpp swar/sublanes.cpp
115115
)
116116
set(
117117
MAP_SOURCES

test/swar/BasicOperations.cpp

Lines changed: 0 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -275,58 +275,6 @@ GE_MSB_TEST(0x7777'7777,
275275
0x0123'4567,
276276
0x8888'8888)
277277

278-
// 3 bits on msb side, 5 bits on lsb side.
279-
using Lanes = SWARWithSubLanes<5, 3, u32>;
280-
using S8u32 = SWAR<8, u32>;
281-
static constexpr inline u32 all0 = 0;
282-
static constexpr inline u32 allF = broadcast<8>(S8u32(0x0000'00FFul)).value();
283-
284-
static_assert(allF == Lanes(allF).value());
285-
static_assert(0xFFFF'FFFF == Lanes(allF).value());
286-
287-
static_assert(0xFFFF'FFE0 == Lanes(allF).least(0,0).value());
288-
static_assert(0xFFFF'FFE1 == Lanes(allF).least(1,0).value());
289-
static_assert(0xFFFF'E0FF == Lanes(allF).least(0,1).value());
290-
static_assert(0xFFFF'E1FF == Lanes(allF).least(1,1).value());
291-
292-
static_assert(0xFFE0'FFFF == Lanes(allF).least(0,2).value());
293-
static_assert(0xFFE1'FFFF == Lanes(allF).least(1,2).value());
294-
static_assert(0xE0FF'FFFF == Lanes(allF).least(0,3).value());
295-
static_assert(0xE1FF'FFFF == Lanes(allF).least(1,3).value());
296-
297-
static_assert(0xFFFF'FF1F == Lanes(allF).most(0,0).value());
298-
static_assert(0xFFFF'FF3F == Lanes(allF).most(1,0).value());
299-
static_assert(0xFFFF'1FFF == Lanes(allF).most(0,1).value());
300-
static_assert(0xFFFF'3FFF == Lanes(allF).most(1,1).value());
301-
302-
static_assert(0xFF1F'FFFF == Lanes(allF).most(0,2).value());
303-
static_assert(0xFF3F'FFFF == Lanes(allF).most(1,2).value());
304-
static_assert(0x1FFF'FFFF == Lanes(allF).most(0,3).value());
305-
static_assert(0x3FFF'FFFF == Lanes(allF).most(1,3).value());
306-
307-
static_assert(0x0000'001f == Lanes(all0).least(31, 0).most(0, 0).value());
308-
static_assert(0x0000'1f00 == Lanes(all0).least(31, 1).most(0, 1).value());
309-
static_assert(0x001f'0000 == Lanes(all0).least(31, 2).most(0, 2).value());
310-
static_assert(0x1f00'0000 == Lanes(all0).least(31, 3).most(0, 3).value());
311-
312-
static_assert(0x0000'00e0 == Lanes(all0).least(0, 0).most(31, 0).value());
313-
static_assert(0x0000'e000 == Lanes(all0).least(0, 1).most(31, 1).value());
314-
static_assert(0x00e0'0000 == Lanes(all0).least(0, 2).most(31, 2).value());
315-
static_assert(0xe000'0000 == Lanes(all0).least(0, 3).most(31, 3).value());
316-
317-
static_assert(0x1F1F'1F1F == Lanes(allF).least().value());
318-
static_assert(0xE0E0'E0E0 == Lanes(allF).most().value());
319-
320-
static_assert(0x0000'001F == Lanes(allF).least(0).value());
321-
static_assert(0x0000'1F00 == Lanes(allF).least(1).value());
322-
static_assert(0x001F'0000 == Lanes(allF).least(2).value());
323-
static_assert(0x1F00'0000 == Lanes(allF).least(3).value());
324-
325-
static_assert(0x0000'00E0 == Lanes(allF).most(0).value());
326-
static_assert(0x0000'E000 == Lanes(allF).most(1).value());
327-
static_assert(0x00E0'0000 == Lanes(allF).most(2).value());
328-
static_assert(0xE000'0000 == Lanes(allF).most(3).value());
329-
330278
static_assert(0x123 == SWAR<4, uint32_t>(0x173).blitElement(1, 2).value());
331279
static_assert(0 == isolateLSB(u32(0)));
332280

test/swar/sublanes.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#include "zoo/swar/SWARWithSubLanes.h"
2+
3+
using namespace zoo;
4+
using namespace zoo::swar;
5+
6+
// 3 bits on msb side, 5 bits on lsb side.
7+
using Lanes = SWARWithSubLanes<5, 3, u32>;
8+
using S8u32 = SWAR<8, u32>;
9+
static constexpr inline u32 all0 = 0;
10+
static constexpr inline u32 allF = broadcast<8>(S8u32(0x0000'00FFul)).value();
11+
12+
static_assert(allF == Lanes(allF).value());
13+
static_assert(0xFFFF'FFFF == Lanes(allF).value());
14+
15+
static_assert(0xFFFF'FFE0 == Lanes(allF).least(0,0).value());
16+
static_assert(0xFFFF'FFE1 == Lanes(allF).least(1,0).value());
17+
static_assert(0xFFFF'E0FF == Lanes(allF).least(0,1).value());
18+
static_assert(0xFFFF'E1FF == Lanes(allF).least(1,1).value());
19+
20+
static_assert(0xFFE0'FFFF == Lanes(allF).least(0,2).value());
21+
static_assert(0xFFE1'FFFF == Lanes(allF).least(1,2).value());
22+
static_assert(0xE0FF'FFFF == Lanes(allF).least(0,3).value());
23+
static_assert(0xE1FF'FFFF == Lanes(allF).least(1,3).value());
24+
25+
static_assert(0xFFFF'FF1F == Lanes(allF).most(0,0).value());
26+
static_assert(0xFFFF'FF3F == Lanes(allF).most(1,0).value());
27+
static_assert(0xFFFF'1FFF == Lanes(allF).most(0,1).value());
28+
static_assert(0xFFFF'3FFF == Lanes(allF).most(1,1).value());
29+
30+
static_assert(0xFF1F'FFFF == Lanes(allF).most(0,2).value());
31+
static_assert(0xFF3F'FFFF == Lanes(allF).most(1,2).value());
32+
static_assert(0x1FFF'FFFF == Lanes(allF).most(0,3).value());
33+
static_assert(0x3FFF'FFFF == Lanes(allF).most(1,3).value());
34+
35+
static_assert(0x0000'001f == Lanes(all0).least(31, 0).most(0, 0).value());
36+
static_assert(0x0000'1f00 == Lanes(all0).least(31, 1).most(0, 1).value());
37+
static_assert(0x001f'0000 == Lanes(all0).least(31, 2).most(0, 2).value());
38+
static_assert(0x1f00'0000 == Lanes(all0).least(31, 3).most(0, 3).value());
39+
40+
static_assert(0x0000'00e0 == Lanes(all0).least(0, 0).most(31, 0).value());
41+
static_assert(0x0000'e000 == Lanes(all0).least(0, 1).most(31, 1).value());
42+
static_assert(0x00e0'0000 == Lanes(all0).least(0, 2).most(31, 2).value());
43+
static_assert(0xe000'0000 == Lanes(all0).least(0, 3).most(31, 3).value());
44+
45+
static_assert(0x1F1F'1F1F == Lanes(allF).least().value());
46+
static_assert(0xE0E0'E0E0 == Lanes(allF).most().value());
47+
48+
static_assert(0x0000'001F == Lanes(allF).least(0).value());
49+
static_assert(0x0000'1F00 == Lanes(allF).least(1).value());
50+
static_assert(0x001F'0000 == Lanes(allF).least(2).value());
51+
static_assert(0x1F00'0000 == Lanes(allF).least(3).value());
52+
53+
static_assert(0x0000'00E0 == Lanes(allF).most(0).value());
54+
static_assert(0x0000'E000 == Lanes(allF).most(1).value());
55+
static_assert(0x00E0'0000 == Lanes(allF).most(2).value());
56+
static_assert(0xE000'0000 == Lanes(allF).most(3).value());

0 commit comments

Comments
 (0)