Provide portable clz implementation

jserv · jserv · commit 009f4dc66ea5 · 2024-07-29T12:35:46.000+08:00
This commit implements an efficient clz (count leading zero) routine for
MSVC and Clang/GCC, and falls back to a generic one if the above are
absent.
diff --git a/include/twin_private.h b/include/twin_private.h
@@ -499,4 +499,41 @@ void _twin_button_init(twin_button_t *button,
                        twin_style_t font_style,
                        twin_dispatch_proc_t dispatch);
 
+/* utility */
+
+#ifdef _MSC_VER
+#include <intrin.h>
+static inline int twin_clz(uint32_t v)
+{
+    uint32_t leading_zero = 0;
+    /* Search from LSB to MSB for first set bit.
+     * Returns zero if no set bit is found.
+     */
+    if (_BitScanReverse(&leading_zero, v))
+        return 31 - leading_zero;
+    return 32; /* undefined behavior */
+}
+#elif defined(__GNUC__) || defined(__clang__)
+static inline int twin_clz(uint32_t v)
+{
+    return __builtin_clz(v);
+}
+#else /* generic implementation */
+static inline int twin_clz(uint32_t v)
+{
+    /* http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn */
+    static const uint8_t mul_debruijn[] = {
+        0, 9,  1,  10, 13, 21, 2,  29, 11, 14, 16, 18, 22, 25, 3, 30,
+        8, 12, 20, 28, 15, 17, 24, 7,  19, 27, 23, 6,  26, 5,  4, 31};
+
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+
+    return mul_debruijn[(uint32_t) (v * 0x07C4ACDDU) >> 27];
+}
+#endif
+
 #endif /* _TWIN_PRIVATE_H_ */
diff --git a/src/fixed.c b/src/fixed.c
@@ -9,6 +9,7 @@
 #define uint32_lo(i) ((i) & 0xffff)
 #define uint32_hi(i) ((i) >> 16)
 #define uint32_carry16 ((1) << 16)
+
 /* Check interval
  * For any variable interval checking:
  *     if (x > minx - epsilon && x < minx + epsilon) ...
@@ -33,17 +34,20 @@ twin_fixed_t twin_fixed_sqrt(twin_fixed_t a)
     /* Shift left 'a' to expand more digit for sqrt precision */
     offset &= ~1;
     a <<= offset;
+
     /* Calculate the digits need to shift back */
     offset >>= 1;
     offset -= (16 >> 1);
+
     /* Use digit-by-digit calculation to compute square root */
     twin_fixed_t z = 0;
-    for (twin_fixed_t m = 1UL << ((31 - __builtin_clz(a)) & ~1UL); m; m >>= 2) {
+    for (twin_fixed_t m = 1UL << ((31 - twin_clz(a)) & ~1UL); m; m >>= 2) {
         int b = z + m;
         z >>= 1;
         if (a >= b)
             a -= b, z += m;
     }
+
     /* Shift back the expanded digits */
     return (offset >= 0) ? z >> offset : z << (-offset);
 }
@@ -66,8 +70,7 @@ twin_sfixed_t _twin_sfixed_sqrt(twin_sfixed_t as)
     offset -= (4 >> 1);
 
     twin_sfixed_t z = 0;
-    for (twin_sfixed_t m = 1UL << ((31 - __builtin_clz(as)) & ~1UL); m;
-         m >>= 2) {
+    for (twin_sfixed_t m = 1UL << ((31 - twin_clz(as)) & ~1UL); m; m >>= 2) {
         int16_t b = z + m;
         z >>= 1;
         if (as >= b)