Skip to content

Commit 009f4dc

Browse files
committed
Provide portable clz implementation
This commit implements an efficient clz (count leading zero) routine for MSVC and Clang/GCC, and falls back to a generic one if the above are absent.
1 parent e1892ce commit 009f4dc

File tree

2 files changed

+43
-3
lines changed

2 files changed

+43
-3
lines changed

include/twin_private.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,4 +499,41 @@ void _twin_button_init(twin_button_t *button,
499499
twin_style_t font_style,
500500
twin_dispatch_proc_t dispatch);
501501

502+
/* utility */
503+
504+
#ifdef _MSC_VER
505+
#include <intrin.h>
506+
static inline int twin_clz(uint32_t v)
507+
{
508+
uint32_t leading_zero = 0;
509+
/* Search from LSB to MSB for first set bit.
510+
* Returns zero if no set bit is found.
511+
*/
512+
if (_BitScanReverse(&leading_zero, v))
513+
return 31 - leading_zero;
514+
return 32; /* undefined behavior */
515+
}
516+
#elif defined(__GNUC__) || defined(__clang__)
517+
static inline int twin_clz(uint32_t v)
518+
{
519+
return __builtin_clz(v);
520+
}
521+
#else /* generic implementation */
522+
static inline int twin_clz(uint32_t v)
523+
{
524+
/* http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn */
525+
static const uint8_t mul_debruijn[] = {
526+
0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
527+
8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31};
528+
529+
v |= v >> 1;
530+
v |= v >> 2;
531+
v |= v >> 4;
532+
v |= v >> 8;
533+
v |= v >> 16;
534+
535+
return mul_debruijn[(uint32_t) (v * 0x07C4ACDDU) >> 27];
536+
}
537+
#endif
538+
502539
#endif /* _TWIN_PRIVATE_H_ */

src/fixed.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#define uint32_lo(i) ((i) & 0xffff)
1010
#define uint32_hi(i) ((i) >> 16)
1111
#define uint32_carry16 ((1) << 16)
12+
1213
/* Check interval
1314
* For any variable interval checking:
1415
* if (x > minx - epsilon && x < minx + epsilon) ...
@@ -33,17 +34,20 @@ twin_fixed_t twin_fixed_sqrt(twin_fixed_t a)
3334
/* Shift left 'a' to expand more digit for sqrt precision */
3435
offset &= ~1;
3536
a <<= offset;
37+
3638
/* Calculate the digits need to shift back */
3739
offset >>= 1;
3840
offset -= (16 >> 1);
41+
3942
/* Use digit-by-digit calculation to compute square root */
4043
twin_fixed_t z = 0;
41-
for (twin_fixed_t m = 1UL << ((31 - __builtin_clz(a)) & ~1UL); m; m >>= 2) {
44+
for (twin_fixed_t m = 1UL << ((31 - twin_clz(a)) & ~1UL); m; m >>= 2) {
4245
int b = z + m;
4346
z >>= 1;
4447
if (a >= b)
4548
a -= b, z += m;
4649
}
50+
4751
/* Shift back the expanded digits */
4852
return (offset >= 0) ? z >> offset : z << (-offset);
4953
}
@@ -66,8 +70,7 @@ twin_sfixed_t _twin_sfixed_sqrt(twin_sfixed_t as)
6670
offset -= (4 >> 1);
6771

6872
twin_sfixed_t z = 0;
69-
for (twin_sfixed_t m = 1UL << ((31 - __builtin_clz(as)) & ~1UL); m;
70-
m >>= 2) {
73+
for (twin_sfixed_t m = 1UL << ((31 - twin_clz(as)) & ~1UL); m; m >>= 2) {
7174
int16_t b = z + m;
7275
z >>= 1;
7376
if (as >= b)

0 commit comments

Comments
 (0)