commit fa3e0e79b98599bedd42d5a26c7fa23ea06802e3
parent 2c4bc891602f683a730f1d2f23965c05a1b31dd2
Author: Evgeny Grin (Karlson2k) <k2k@drgrin.dev>
Date: Tue, 9 Sep 2025 01:37:26 +0200
mhd_bithelpers.h: Added new helpers: mhd_LEADING_ZEROS32{,NZ}(), mhd_BIT_WIDTH32{,NZ}() and mhd_LEADING_ONES32()
Diffstat:
| M | configure.ac | | | 108 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- |
| M | src/mhd2/mhd_bithelpers.h | | | 268 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- |
2 files changed, 370 insertions(+), 6 deletions(-)
diff --git a/configure.ac b/configure.ac
@@ -608,7 +608,7 @@ AC_CHECK_HEADERS_ONCE([stdio.h string.h stdint.h stdarg.h errno.h limits.h fcntl
# Check for basic optional headers
AC_CHECK_HEADERS([stddef.h stdlib.h inttypes.h sys/types.h sys/stat.h unistd.h \
- sys/uio.h crtdefs.h malloc.h io.h], [], [], [AC_INCLUDES_DEFAULT])
+ sys/uio.h crtdefs.h malloc.h io.h stdbit.h], [], [], [AC_INCLUDES_DEFAULT])
# Check for clock-specific optional headers
AC_CHECK_HEADERS([sys/time.h time.h], [], [], [AC_INCLUDES_DEFAULT])
@@ -3183,7 +3183,103 @@ AC_CACHE_CHECK([[whether __builtin_bswap64() is available]],
])
AS_IF([[test "x$mhd_cv_func___builtin_bswap64_avail" = "xyes"]],
[AC_DEFINE([[MHD_HAVE___BUILTIN_BSWAP64]], [[1]], [Define to 1 if you have __builtin_bswap64() builtin function])])
-
+
+AC_CACHE_CHECK([[whether __builtin_clz() is available]],
+ [[mhd_cv_func___builtin_clz_avail]], [dnl
+ AC_LINK_IFELSE([AC_LANG_SOURCE([[
+int main(void)
+{
+ unsigned int val = 2u;
+ return __builtin_clz(val) > 8 ? 0 : 1;
+}
+ ]]
+ )
+ ],
+ [[mhd_cv_func___builtin_clz_avail="yes"]],
+ [[mhd_cv_func___builtin_clz_avail="no"]]
+ )
+ ]
+)
+AS_VAR_IF([mhd_cv_func___builtin_clz_avail],["yes"],
+ [AC_DEFINE([[MHD_HAVE___BUILTIN_CLZ]], [[1]], [Define to 1 if __builtin_clz() builtin function is supported by compiler])]
+)
+AC_CACHE_CHECK([[whether __builtin_clzl() is available]],
+ [[mhd_cv_func___builtin_clzl_avail]], [dnl
+ AC_LINK_IFELSE([AC_LANG_SOURCE([[
+int main(void)
+{
+ unsigned long val = 2u;
+ return __builtin_clzl(val) > 8 ? 0 : 1;
+}
+ ]]
+ )
+ ],
+ [[mhd_cv_func___builtin_clzl_avail="yes"]],
+ [[mhd_cv_func___builtin_clzl_avail="no"]]
+ )
+ ]
+)
+AS_VAR_IF([mhd_cv_func___builtin_clzl_avail],["yes"],
+ [AC_DEFINE([[MHD_HAVE___BUILTIN_CLZL]], [[1]], [Define to 1 if __builtin_clzl() builtin function is supported by compiler])]
+)
+AC_CACHE_CHECK([[whether __builtin_clzll() is available]],
+ [[mhd_cv_func___builtin_clzll_avail]], [dnl
+ AC_LINK_IFELSE([AC_LANG_SOURCE([[
+int main(void)
+{
+ unsigned long long val = 2u;
+ return __builtin_clzl(val) > 8 ? 0 : 1;
+}
+ ]]
+ )
+ ],
+ [[mhd_cv_func___builtin_clzll_avail="yes"]],
+ [[mhd_cv_func___builtin_clzll_avail="no"]]
+ )
+ ]
+)
+AS_VAR_IF([mhd_cv_func___builtin_clzll_avail],["yes"],
+ [AC_DEFINE([[MHD_HAVE___BUILTIN_CLZLL]], [[1]], [Define to 1 if __builtin_clzll() builtin function is supported by compiler])]
+)
+AC_CACHE_CHECK([[whether __builtin_clzg(v) is available]],
+ [[mhd_cv_func___builtin_clzg1_avail]], [dnl
+ AC_LINK_IFELSE([AC_LANG_SOURCE([[
+int main(void)
+{
+ unsigned int val = 2u;
+ return __builtin_clzg(val) > 8 ? 0 : 1;
+}
+ ]]
+ )
+ ],
+ [[mhd_cv_func___builtin_clzg1_avail="yes"]],
+ [[mhd_cv_func___builtin_clzg1_avail="no"]]
+ )
+ ]
+)
+AS_VAR_IF([mhd_cv_func___builtin_clzg1_avail],["yes"],
+ [AC_DEFINE([[MHD_HAVE___BUILTIN_CLZG1]], [[1]], [Define to 1 if __builtin_clzg(v) builtin function is supported by compiler])]
+)
+AC_CACHE_CHECK([[whether __builtin_clzg(v,z) is available]],
+ [[mhd_cv_func___builtin_clzg2_avail]], [dnl
+ AC_LINK_IFELSE([AC_LANG_SOURCE([[
+int main(void)
+{
+ unsigned int val = 0u;
+ return __builtin_clzg(val, 0);
+}
+ ]]
+ )
+ ],
+ [[mhd_cv_func___builtin_clzg2_avail="yes"]],
+ [[mhd_cv_func___builtin_clzg2_avail="no"]]
+ )
+ ]
+)
+AS_VAR_IF([mhd_cv_func___builtin_clzg2_avail],["yes"],
+ [AC_DEFINE([[MHD_HAVE___BUILTIN_CLZG2]], [[1]], [Define to 1 if __builtin_clzg(v,z) builtin function is supported by compiler])]
+)
+
AC_CACHE_CHECK([whether $CC supports __builtin_expect_with_probability()],[mhd_cv_cc___builtin_expect_with_probability_avail],
[
AC_LINK_IFELSE(
@@ -3610,13 +3706,15 @@ AC_DEFINE_UNQUOTED([SIZEOF_STRUCT_TIMEVAL_TV_SEC], [$mhd_cv_size_timeval_tv_sec]
[The size of `tv_sec' member of `struct timeval', as computed by sizeof])
AC_CHECK_SIZEOF([uint_least8_t], [], [[#include <stdint.h>]])
AC_CHECK_SIZEOF([uint_least16_t], [], [[#include <stdint.h>]])
+AC_CHECK_SIZEOF([uint_least32_t], [], [[#include <stdint.h>]])
AC_CHECK_SIZEOF([uint_fast32_t], [], [[#include <stdint.h>]])
AC_CHECK_SIZEOF([uint_least64_t], [], [[#include <stdint.h>]])
AC_CHECK_SIZEOF([int_fast64_t], [], [[#include <stdint.h>]])
AC_CHECK_SIZEOF([uint_fast64_t], [], [[#include <stdint.h>]])
-AC_CHECK_SIZEOF([int], [], [[#include <stdint.h>]])
-AC_CHECK_SIZEOF([unsigned int], [], [[#include <stdint.h>]])
-AC_CHECK_SIZEOF([unsigned long long], [], [[#include <stdint.h>]])
+AC_CHECK_SIZEOF([int])
+AC_CHECK_SIZEOF([unsigned int])
+AC_CHECK_SIZEOF([unsigned long])
+AC_CHECK_SIZEOF([unsigned long long])
AC_CHECK_SIZEOF([size_t], [],
[[
#ifdef HAVE_STDLIB_H
diff --git a/src/mhd2/mhd_bithelpers.h b/src/mhd2/mhd_bithelpers.h
@@ -38,7 +38,7 @@
/**
* @file src/mhd2/mhd_bithelpers.h
- * @brief macros for bits manipulations
+ * @brief Bit manipulation helpers
* @author Karlson2k (Evgeny Grin)
*/
@@ -63,6 +63,12 @@
#endif /* _MSC_FULL_VER */
#include "mhd_byteorder.h"
+#ifdef CHAR_BIT
+# if CHAR_BIT != 8
+#error CHAR_BIT different from 8 is not supported
+# endif
+#endif
+
#ifndef __has_builtin
# define mhd_HAS_BUILTIN(x) (0)
#else
@@ -614,6 +620,266 @@ mhd_ROTR64 (uint64_t value64, unsigned int bits)
#endif /* ! __builtin_rotateright64 */
+
+/**
+ * @def mhd_LEADING_ZEROS32NZ
+ * Count leading (most-significant) zero bits in a non-zero 32-bit value.
+ * The result is undefined if the argument is zero or does not fit in 32 bits.
+ */
+#if defined(MHD_HAVE___BUILTIN_CLZ) && 4 == SIZEOF_UNSIGNED_INT
+# define mhd_LEADING_ZEROS32NZ(val32) \
+ ((uint_least8_t) __builtin_clz ((unsigned int) (val32)))
+#elif defined(MHD_HAVE___BUILTIN_CLZL) && 4 == SIZEOF_UNSIGNED_LONG
+# define mhd_LEADING_ZEROS32NZ(val32) \
+ ((uint_least8_t) __builtin_clzl ((unsigned long) (val32)))
+#elif defined(MHD_HAVE___BUILTIN_CLZG1) && 4 <= SIZEOF_UINT_LEAST32_T
+# if 4 == SIZEOF_UINT_LEAST32_T
+# define mhd_LEADING_ZEROS32NZ(val32) \
+ ((uint_least8_t) __builtin_clzg ((uint_least32_t) (val32)))
+# else /* 4 < SIZEOF_UINT_LEAST32_T */
+# define mhd_LEADING_ZEROS32NZ(val32) \
+ ((uint_least8_t) (__builtin_clzg ((uint_least32_t) (val32)) \
+ - ((sizeof(uint_least32_t) - 4u) * 8u)))
+# endif /* 4 < SIZEOF_UINT_LEAST32_T */
+#endif /* MHD_HAVE___BUILTIN_CLZG1 && 4 <= SIZEOF_UINT_LEAST32_T */
+
+
+/**
+ * @def mhd_BIT_WIDTH32NZ
+ * Return the smallest number of bits needed to represent the value.
+ * The result is undefined if the argument is zero or does not fit in 32 bits.
+ */
+#if defined(mhd_HAS_VC_INTRINSICS) && 4 == SIZEOF_UNSIGNED_LONG && \
+ (defined(_M_X64) || defined(_M_IX86) \
+ || defined(_M_ARM) || defined(_M_ARM64) \
+ || defined(__i386__) || defined(__x86_64__) \
+ || defined(__arm__) || defined(__aarch64__))
+# ifndef __clang__
+# pragma intrinsic(_BitScanReverse)
+# endif /* ! __clang__ */
+mhd_static_inline uint_least8_t
+mhd_bh_func_bit_width32nz(uint_least32_t val32)
+{
+ unsigned long idx;
+ (void) _BitScanReverse(&idx, (unsigned long) val32);
+ return (uint_least8_t) (idx + 1u);
+}
+# define mhd_BIT_WIDTH32NZ(val32) mhd_bh_func_bit_width32nz ((val32))
+#endif /* mhd_HAS_VC_INTRINSICS && 4 == SIZEOF_UNSIGNED_LONG && (x86 || ARM) */
+
+
+/**
+ * @def mhd_LEADING_ZEROS32
+ * Count leading (most-significant) zero bits in a 32-bit value.
+ * If the argument is zero then 32 is returned.
+ * The result is undefined if the argument does not fit in 32 bits.
+ */
+
+#if defined(MHD_HAVE___BUILTIN_CLZG2) && 4 <= SIZEOF_UINT_LEAST32_T
+# if 4 == SIZEOF_UINT_LEAST32_T
+# define mhd_LEADING_ZEROS32(val32) \
+ ((uint_least8_t) __builtin_clzg ((uint_least32_t) (val32),32u))
+# else /* 4 < SIZEOF_UINT_LEAST32_T */
+# define mhd_LEADING_ZEROS32(val32) \
+ ((uint_least8_t) (__builtin_clzg ((uint_least32_t) (val32), \
+ sizeof(uint_least32_t) * 8u) \
+ - ((sizeof(uint_least32_t) - 4u) * 8u)))
+# endif /* 4 < SIZEOF_UINT_LEAST32_T */
+#elif defined(mhd_HAS_VC_INTRINSICS) && 4 == SIZEOF_UNSIGNED_LONG && \
+ (defined(_M_ARM) || defined(_M_ARM64) \
+ || defined(__arm__) || defined(__aarch64__)) && \
+ ( (! defined(__clang__)) \
+ || (((__clang_major__ + 0) >= 18) && defined(__aarch64__)) )
+/* Support for _CountLeadingZeros() was added only in clang 18 */
+# ifndef __clang__
+# pragma intrinsic(_CountLeadingZeros)
+# endif /* ! __clang__ */
+# define mhd_LEADING_ZEROS32(val32) \
+ ((uint_least8_t) _CountLeadingZeros ((unsigned long) (val32)))
+#elif mhd_HAS_BUILTIN (__builtin_stdc_leading_zeros)
+# if 4 == SIZEOF_UINT_LEAST32_T
+# define mhd_LEADING_ZEROS32(val32) \
+ ((uint_least8_t) \
+ __builtin_stdc_leading_zeros ((uint_least32_t) (val32)))
+# else /* 4 < SIZEOF_UINT_LEAST32_T */
+# define mhd_LEADING_ZEROS32(val32) \
+ ((uint_least8_t) \
+ (__builtin_stdc_leading_zeros ((uint_least32_t) (val32)) \
+ - ((sizeof(uint_least32_t) - 4u) * 8u)))
+# endif /* 4 < SIZEOF_UINT_LEAST32_T */
+#endif /* __builtin_stdc_leading_zeros */
+
+
+/**
+ * @def mhd_BIT_WIDTH32
+ * Return the smallest number of bits needed to represent the value.
+ * If the argument is zero then zero is returned.
+ * The result is undefined if the argument does not fit in 32 bits.
+ */
+#if defined(mhd_HAS_VC_INTRINSICS) && 4 == SIZEOF_UNSIGNED_LONG && \
+ (defined(_M_X64) || defined(_M_IX86) \
+ || defined(_M_ARM) || defined(_M_ARM64) \
+ || defined(__i386__) || defined(__x86_64__) \
+ || defined(__arm__) || defined(__aarch64__))
+# ifndef __clang__
+# pragma intrinsic(_BitScanReverse)
+# endif /* ! __clang__ */
+mhd_static_inline uint_least8_t
+mhd_bh_func_bit_width32(uint_least32_t val32)
+{
+ unsigned long idx;
+ if (0 == _BitScanReverse(&idx, (unsigned long) val32))
+ return 0u;
+ return (uint_least8_t) (idx + 1u);
+}
+# define mhd_BIT_WIDTH32(val32) mhd_bh_func_bit_width32 ((val32))
+#elif mhd_HAS_BUILTIN (__builtin_stdc_bit_width)
+# define mhd_BIT_WIDTH32(val32) \
+ ((uint_least8_t) __builtin_stdc_bit_width ((uint_least32_t) (val32)))
+#endif /* __builtin_stdc_bit_width */
+
+
+/* ** Use compiler-optimised implementation for missing functionality ** */
+#ifndef mhd_LEADING_ZEROS32NZ
+# ifdef mhd_BIT_WIDTH32NZ
+# define mhd_LEADING_ZEROS32NZ(val32) \
+ ((uint_least8_t) (32u - mhd_BIT_WIDTH32NZ ((val32))))
+# endif /* mhd_BIT_WIDTH32NZ */
+#endif /* ! mhd_LEADING_ZEROS32NZ */
+
+#ifndef mhd_BIT_WIDTH32NZ
+# ifdef mhd_LEADING_ZEROS32NZ
+# define mhd_BIT_WIDTH32NZ(val32) \
+ ((uint_least8_t) (32u - mhd_LEADING_ZEROS32NZ ((val32))))
+# endif /* mhd_LEADING_ZEROS32NZ */
+#endif /* ! mhd_BIT_WIDTH32NZ */
+
+#ifndef mhd_LEADING_ZEROS32
+# ifdef mhd_BIT_WIDTH32
+# define mhd_LEADING_ZEROS32(val32) \
+ ((uint_least8_t) (32u - mhd_BIT_WIDTH32 ((val32))))
+# endif /* mhd_BIT_WIDTH32 */
+#endif /* ! mhd_LEADING_ZEROS32 */
+
+#ifndef mhd_BIT_WIDTH32
+# ifdef mhd_LEADING_ZEROS32
+# define mhd_BIT_WIDTH32(val32) \
+ ((uint_least8_t) (32u - mhd_LEADING_ZEROS32 ((val32))))
+# endif /* mhd_LEADING_ZEROS32 */
+#endif /* ! mhd_BIT_WIDTH32 */
+
+#if ! defined(mhd_LEADING_ZEROS32NZ)
+# ifdef mhd_LEADING_ZEROS32
+# define mhd_LEADING_ZEROS32NZ(val32) mhd_LEADING_ZEROS32 ((val32))
+# define mhd_BIT_WIDTH32NZ(val32) mhd_BIT_WIDTH32 ((val32))
+# endif /* mhd_LEADING_ZEROS32 */
+#else /* mhd_LEADING_ZEROS32NZ */
+# if ! defined(mhd_LEADING_ZEROS32)
+
+mhd_static_inline uint_least8_t
+mhd_bh_func_leading_zeros32(uint_least32_t val32)
+{
+ if (0u == val32)
+ return 32u;
+ return mhd_LEADING_ZEROS32NZ(val32);
+}
+
+mhd_static_inline uint_least8_t
+mhd_bh_func_bit_width32(uint_least32_t val32)
+{
+ if (0u == val32)
+ return 0u;
+ return mhd_BIT_WIDTH32NZ(val32);
+}
+
+# define mhd_LEADING_ZEROS32(val32) mhd_bh_func_leading_zeros32 ((val32))
+# define mhd_BIT_WIDTH32(val32) mhd_bh_func_bit_width32 ((val32))
+# endif /* ! mhd_LEADING_ZEROS32 */
+#endif /* mhd_LEADING_ZEROS32NZ */
+
+#if defined(mhd_LEADING_ZEROS32NZ) || defined(mhd_BIT_WIDTH32NZ) \
+ || defined(mhd_LEADING_ZEROS32) || defined(mhd_BIT_WIDTH32)
+/* If at least one compiler-optimised function is detected, all macros must be
+ defined as all of them can be based on a single base function. */
+# if ! defined(mhd_LEADING_ZEROS32NZ)
+#error mhd_LEADING_ZEROS32NZ() must be defined
+# endif
+# if ! defined(mhd_BIT_WIDTH32NZ)
+#error mhd_BIT_WIDTH32NZ() must be defined
+# endif
+# if ! defined(mhd_LEADING_ZEROS32)
+#error mhd_LEADING_ZEROS32() must be defined
+# endif
+# if ! defined(mhd_BIT_WIDTH32)
+#error mhd_BIT_WIDTH32() must be defined
+# endif
+#else
+/* No compiler-optimised base version. Use fallback implementation. */
+
+
+mhd_static_inline uint_least8_t
+mhd_bh_func_bit_width32(uint_least32_t val32)
+{
+ uint_fast8_t cal_width = 0u;
+ uint_fast8_t check_bits;
+ uint_fast32_t val_left = (uint_fast32_t) (val32 & 0xFFFFFFFFu);
+ mhd_assert (val32 == val_left);
+
+ /* Branchless code without any tables.
+ Should have a good performance even with a cold cache. */
+ check_bits = (uint_fast8_t) ((0 != (val_left >> 16u)) * 16u);
+ cal_width += check_bits;
+ val_left >>= check_bits;
+
+ check_bits = (uint_fast8_t) ((0 != (val_left >> 8u)) * 8u);
+ cal_width += check_bits;
+ val_left >>= check_bits;
+
+ check_bits = (uint_fast8_t) ((0 != (val_left >> 4u)) * 4u);
+ cal_width += check_bits;
+ val_left >>= check_bits;
+
+ check_bits = (uint_fast8_t) ((0 != (val_left >> 2u)) * 2u);
+ cal_width += check_bits;
+ val_left >>= check_bits;
+
+ check_bits = (uint_fast8_t) ((0 != (val_left >> 1u)) * 1u);
+ cal_width += check_bits;
+ val_left >>= check_bits;
+
+ return (uint_least8_t) (cal_width + val_left);
+}
+
+# define mhd_LEADING_ZEROS32NZ(val32) \
+ ((uint_least8_t) \
+ (32u - mhd_bh_func_bit_width32 ((uint_least32_t) (val32))))
+# define mhd_BIT_WIDTH32NZ(val32) mhd_bh_func_bit_width32 ((val32))
+# define mhd_LEADING_ZEROS32(val32) \
+ ((uint_least8_t) \
+ (32u - mhd_bh_func_bit_width32 ((uint_least32_t) (val32))))
+# define mhd_BIT_WIDTH32(val32) mhd_bh_func_bit_width32 ((val32))
+#endif
+
+/**
+ * @def mhd_LEADING_ONES32
+ * Count leading (most-significant) ones in a 32-bit value.
+ * The argument is always treated as a 32-bit value; any higher-order bits
+ * (if present) are ignored.
+ * @note Unlike other related macros, this one explicitly trims (or extends)
+ * the argument to 32 bits. Do not use signed types or narrower types
+ * as argument as they may produce unexpected results.
+ */
+#if mhd_HAS_BUILTIN (__builtin_stdc_leading_ones) && \
+ 4 == SIZEOF_UINT_LEAST32_T
+# define mhd_LEADING_ONES32(val32) \
+ ((uint_least8_t) __builtin_stdc_leading_ones ((uint_least32_t) (val32)))
+#else /* ! __builtin_stdc_leading_ones || 4 != SIZEOF_UINT_LEAST32_T */
+# define mhd_LEADING_ONES32(val32) \
+ mhd_LEADING_ZEROS32 ((uint_least32_t) \
+ (0xFFFFFFFFu & ~((uint_least32_t) (val32))))
+#endif /* ! __builtin_stdc_leading_ones || 4 != SIZEOF_UINT_LEAST32_T */
+
+
mhd_DATA_TRUNCATION_RUNTIME_CHECK_RESTORE
#if defined(_MSC_FULL_VER)