diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /util/generic/bitops.h | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'util/generic/bitops.h')
-rw-r--r-- | util/generic/bitops.h | 459 |
1 files changed, 459 insertions, 0 deletions
diff --git a/util/generic/bitops.h b/util/generic/bitops.h new file mode 100644 index 0000000000..2db15fc59b --- /dev/null +++ b/util/generic/bitops.h @@ -0,0 +1,459 @@ +#pragma once + +#include "ylimits.h" +#include "typelist.h" + +#include <util/system/compiler.h> +#include <util/system/yassert.h> + +#ifdef _MSC_VER + #include <intrin.h> +#endif + +namespace NBitOps { + namespace NPrivate { + template <unsigned N, typename T> + struct TClp2Helper { + static Y_FORCE_INLINE T Calc(T t) noexcept { + const T prev = TClp2Helper<N / 2, T>::Calc(t); + + return prev | (prev >> N); + } + }; + + template <typename T> + struct TClp2Helper<0u, T> { + static Y_FORCE_INLINE T Calc(T t) noexcept { + return t - 1; + } + }; + + extern const ui64 WORD_MASK[]; + extern const ui64 INVERSE_WORD_MASK[]; + + // see http://www-graphics.stanford.edu/~seander/bithacks.html#ReverseParallel + + Y_FORCE_INLINE ui64 SwapOddEvenBits(ui64 v) { + return ((v >> 1ULL) & 0x5555555555555555ULL) | ((v & 0x5555555555555555ULL) << 1ULL); + } + + Y_FORCE_INLINE ui64 SwapBitPairs(ui64 v) { + return ((v >> 2ULL) & 0x3333333333333333ULL) | ((v & 0x3333333333333333ULL) << 2ULL); + } + + Y_FORCE_INLINE ui64 SwapNibbles(ui64 v) { + return ((v >> 4ULL) & 0x0F0F0F0F0F0F0F0FULL) | ((v & 0x0F0F0F0F0F0F0F0FULL) << 4ULL); + } + + Y_FORCE_INLINE ui64 SwapOddEvenBytes(ui64 v) { + return ((v >> 8ULL) & 0x00FF00FF00FF00FFULL) | ((v & 0x00FF00FF00FF00FFULL) << 8ULL); + } + + Y_FORCE_INLINE ui64 SwapBytePairs(ui64 v) { + return ((v >> 16ULL) & 0x0000FFFF0000FFFFULL) | ((v & 0x0000FFFF0000FFFFULL) << 16ULL); + } + + Y_FORCE_INLINE ui64 SwapByteQuads(ui64 v) { + return (v >> 32ULL) | (v << 32ULL); + } + +#if defined(__GNUC__) + inline unsigned GetValueBitCountImpl(unsigned int value) noexcept { + Y_ASSERT(value); // because __builtin_clz* have undefined result for zero. + return std::numeric_limits<unsigned int>::digits - __builtin_clz(value); + } + + inline unsigned GetValueBitCountImpl(unsigned long value) noexcept { + Y_ASSERT(value); // because __builtin_clz* have undefined result for zero. + return std::numeric_limits<unsigned long>::digits - __builtin_clzl(value); + } + + inline unsigned GetValueBitCountImpl(unsigned long long value) noexcept { + Y_ASSERT(value); // because __builtin_clz* have undefined result for zero. + return std::numeric_limits<unsigned long long>::digits - __builtin_clzll(value); + } +#else + /// Stupid realization for non-GCC. Can use BSR from x86 instructions set. + template <typename T> + inline unsigned GetValueBitCountImpl(T value) noexcept { + Y_ASSERT(value); // because __builtin_clz* have undefined result for zero. + unsigned result = 1; // result == 0 - impossible value, see Y_ASSERT(). + value >>= 1; + while (value) { + value >>= 1; + ++result; + } + + return result; + } +#endif + +#if defined(__GNUC__) + inline unsigned CountTrailingZeroBitsImpl(unsigned int value) noexcept { + Y_ASSERT(value); // because __builtin_ctz* have undefined result for zero. + return __builtin_ctz(value); + } + + inline unsigned CountTrailingZeroBitsImpl(unsigned long value) noexcept { + Y_ASSERT(value); // because __builtin_ctz* have undefined result for zero. + return __builtin_ctzl(value); + } + + inline unsigned CountTrailingZeroBitsImpl(unsigned long long value) noexcept { + Y_ASSERT(value); // because __builtin_ctz* have undefined result for zero. + return __builtin_ctzll(value); + } +#else + /// Stupid realization for non-GCC. Can use BSF from x86 instructions set. + template <typename T> + inline unsigned CountTrailingZeroBitsImpl(T value) noexcept { + Y_ASSERT(value); // because __builtin_ctz* have undefined result for zero. + unsigned result = 0; + while (!(value & 1)) { + value >>= 1; + ++result; + } + + return result; + } +#endif + + template <typename T> + Y_FORCE_INLINE T RotateBitsLeftImpl(T value, const ui8 shift) noexcept { + constexpr ui8 bits = sizeof(T) * 8; + constexpr ui8 mask = bits - 1; + Y_ASSERT(shift <= mask); + + // do trick with mask to avoid undefined behaviour + return (value << shift) | (value >> ((-shift) & mask)); + } + + template <typename T> + Y_FORCE_INLINE T RotateBitsRightImpl(T value, const ui8 shift) noexcept { + constexpr ui8 bits = sizeof(T) * 8; + constexpr ui8 mask = bits - 1; + Y_ASSERT(shift <= mask); + + // do trick with mask to avoid undefined behaviour + return (value >> shift) | (value << ((-shift) & mask)); + } + +#if defined(_x86_) && defined(__GNUC__) + Y_FORCE_INLINE ui8 RotateBitsRightImpl(ui8 value, ui8 shift) noexcept { + __asm__("rorb %%cl, %0" + : "=r"(value) + : "0"(value), "c"(shift)); + return value; + } + + Y_FORCE_INLINE ui16 RotateBitsRightImpl(ui16 value, ui8 shift) noexcept { + __asm__("rorw %%cl, %0" + : "=r"(value) + : "0"(value), "c"(shift)); + return value; + } + + Y_FORCE_INLINE ui32 RotateBitsRightImpl(ui32 value, ui8 shift) noexcept { + __asm__("rorl %%cl, %0" + : "=r"(value) + : "0"(value), "c"(shift)); + return value; + } + + Y_FORCE_INLINE ui8 RotateBitsLeftImpl(ui8 value, ui8 shift) noexcept { + __asm__("rolb %%cl, %0" + : "=r"(value) + : "0"(value), "c"(shift)); + return value; + } + + Y_FORCE_INLINE ui16 RotateBitsLeftImpl(ui16 value, ui8 shift) noexcept { + __asm__("rolw %%cl, %0" + : "=r"(value) + : "0"(value), "c"(shift)); + return value; + } + + Y_FORCE_INLINE ui32 RotateBitsLeftImpl(ui32 value, ui8 shift) noexcept { + __asm__("roll %%cl, %0" + : "=r"(value) + : "0"(value), "c"(shift)); + return value; + } + + #if defined(_x86_64_) + Y_FORCE_INLINE ui64 RotateBitsRightImpl(ui64 value, ui8 shift) noexcept { + __asm__("rorq %%cl, %0" + : "=r"(value) + : "0"(value), "c"(shift)); + return value; + } + + Y_FORCE_INLINE ui64 RotateBitsLeftImpl(ui64 value, ui8 shift) noexcept { + __asm__("rolq %%cl, %0" + : "=r"(value) + : "0"(value), "c"(shift)); + return value; + } + #endif +#endif + } +} + +/** + * Computes the next power of 2 higher or equal to the integer parameter `t`. + * If `t` is a power of 2 will return `t`. + * Result is undefined for `t == 0`. + */ +template <typename T> +static inline T FastClp2(T t) noexcept { + Y_ASSERT(t > 0); + using TCvt = typename ::TUnsignedInts::template TSelectBy<TSizeOfPredicate<sizeof(T)>::template TResult>::type; + return 1 + ::NBitOps::NPrivate::TClp2Helper<sizeof(TCvt) * 4, T>::Calc(static_cast<TCvt>(t)); +} + +/** + * Check if integer is a power of 2. + */ +template <typename T> +Y_CONST_FUNCTION constexpr bool IsPowerOf2(T v) noexcept { + return v > 0 && (v & (v - 1)) == 0; +} + +/** + * Returns the number of leading 0-bits in `value`, starting at the most significant bit position. + */ +template <typename T> +static inline unsigned GetValueBitCount(T value) noexcept { + Y_ASSERT(value > 0); + using TCvt = typename ::TUnsignedInts::template TSelectBy<TSizeOfPredicate<sizeof(T)>::template TResult>::type; + return ::NBitOps::NPrivate::GetValueBitCountImpl(static_cast<TCvt>(value)); +} + +/** + * Returns the number of trailing 0-bits in `value`, starting at the least significant bit position + */ +template <typename T> +static inline unsigned CountTrailingZeroBits(T value) noexcept { + Y_ASSERT(value > 0); + using TCvt = typename ::TUnsignedInts::template TSelectBy<TSizeOfPredicate<sizeof(T)>::template TResult>::type; + return ::NBitOps::NPrivate::CountTrailingZeroBitsImpl(static_cast<TCvt>(value)); +} + +/* + * Returns 64-bit mask with `bits` lower bits set. + */ +Y_FORCE_INLINE ui64 MaskLowerBits(ui64 bits) { + return ::NBitOps::NPrivate::WORD_MASK[bits]; +} + +/* + * Return 64-bit mask with `bits` set starting from `skipbits`. + */ +Y_FORCE_INLINE ui64 MaskLowerBits(ui64 bits, ui64 skipbits) { + return MaskLowerBits(bits) << skipbits; +} + +/* + * Return 64-bit mask with all bits set except for `bits` lower bits. + */ +Y_FORCE_INLINE ui64 InverseMaskLowerBits(ui64 bits) { + return ::NBitOps::NPrivate::INVERSE_WORD_MASK[bits]; +} + +/* + * Return 64-bit mask with all bits set except for `bits` bitst starting from `skipbits`. + */ +Y_FORCE_INLINE ui64 InverseMaskLowerBits(ui64 bits, ui64 skipbits) { + return ~MaskLowerBits(bits, skipbits); +} + +/* + * Returns 0-based position of the most significant bit that is set. 0 for 0. + */ +Y_FORCE_INLINE ui64 MostSignificantBit(ui64 v) { +#ifdef __GNUC__ + ui64 res = v ? (63 - __builtin_clzll(v)) : 0; +#elif defined(_MSC_VER) && defined(_64_) + unsigned long res = 0; + if (v) + _BitScanReverse64(&res, v); +#else + ui64 res = 0; + if (v) + while (v >>= 1) + ++res; +#endif + return res; +} + +/** + * Returns 0-based position of the least significant bit that is set. 0 for 0. + */ +Y_FORCE_INLINE ui64 LeastSignificantBit(ui64 v) { +#ifdef __GNUC__ + ui64 res = v ? __builtin_ffsll(v) - 1 : 0; +#elif defined(_MSC_VER) && defined(_64_) + unsigned long res = 0; + if (v) + _BitScanForward64(&res, v); +#else + ui64 res = 0; + if (v) { + while (!(v & 1)) { + ++res; + v >>= 1; + } + } +#endif + return res; +} + +/* + * Returns 0 - based position of the most significant bit (compile time) + * 0 for 0. + */ +constexpr ui64 MostSignificantBitCT(ui64 x) { + return x > 1 ? 1 + MostSignificantBitCT(x >> 1) : 0; +} + +/* + * Return rounded up binary logarithm of `x`. + */ +Y_FORCE_INLINE ui8 CeilLog2(ui64 x) { + return static_cast<ui8>(MostSignificantBit(x - 1)) + 1; +} + +Y_FORCE_INLINE ui8 ReverseBytes(ui8 t) { + return t; +} + +Y_FORCE_INLINE ui16 ReverseBytes(ui16 t) { + return static_cast<ui16>(::NBitOps::NPrivate::SwapOddEvenBytes(t)); +} + +Y_FORCE_INLINE ui32 ReverseBytes(ui32 t) { + return static_cast<ui32>(::NBitOps::NPrivate::SwapBytePairs( + ::NBitOps::NPrivate::SwapOddEvenBytes(t))); +} + +Y_FORCE_INLINE ui64 ReverseBytes(ui64 t) { + return ::NBitOps::NPrivate::SwapByteQuads((::NBitOps::NPrivate::SwapOddEvenBytes(t))); +} + +Y_FORCE_INLINE ui8 ReverseBits(ui8 t) { + return static_cast<ui8>(::NBitOps::NPrivate::SwapNibbles( + ::NBitOps::NPrivate::SwapBitPairs( + ::NBitOps::NPrivate::SwapOddEvenBits(t)))); +} + +Y_FORCE_INLINE ui16 ReverseBits(ui16 t) { + return static_cast<ui16>(::NBitOps::NPrivate::SwapOddEvenBytes( + ::NBitOps::NPrivate::SwapNibbles( + ::NBitOps::NPrivate::SwapBitPairs( + ::NBitOps::NPrivate::SwapOddEvenBits(t))))); +} + +Y_FORCE_INLINE ui32 ReverseBits(ui32 t) { + return static_cast<ui32>(::NBitOps::NPrivate::SwapBytePairs( + ::NBitOps::NPrivate::SwapOddEvenBytes( + ::NBitOps::NPrivate::SwapNibbles( + ::NBitOps::NPrivate::SwapBitPairs( + ::NBitOps::NPrivate::SwapOddEvenBits(t)))))); +} + +Y_FORCE_INLINE ui64 ReverseBits(ui64 t) { + return ::NBitOps::NPrivate::SwapByteQuads( + ::NBitOps::NPrivate::SwapBytePairs( + ::NBitOps::NPrivate::SwapOddEvenBytes( + ::NBitOps::NPrivate::SwapNibbles( + ::NBitOps::NPrivate::SwapBitPairs( + ::NBitOps::NPrivate::SwapOddEvenBits(t)))))); +} + +/* + * Reverse first `bits` bits + * 1000111000111000 , bits = 6 => 1000111000000111 + */ +template <typename T> +Y_FORCE_INLINE T ReverseBits(T v, ui64 bits) { + return bits ? (T(v & ::InverseMaskLowerBits(bits)) | T(ReverseBits(T(v & ::MaskLowerBits(bits)))) >> ((ui64{sizeof(T)} << ui64{3}) - bits)) : v; +} + +/* + * Referse first `bits` bits starting from `skipbits` bits + * 1000111000111000 , bits = 4, skipbits = 2 => 1000111000011100 + */ +template <typename T> +Y_FORCE_INLINE T ReverseBits(T v, ui64 bits, ui64 skipbits) { + return (T(ReverseBits((v >> skipbits), bits)) << skipbits) | T(v & MaskLowerBits(skipbits)); +} + +/* Rotate bits left. Also known as left circular shift. + */ +template <typename T> +Y_FORCE_INLINE T RotateBitsLeft(T value, const ui8 shift) noexcept { + static_assert(std::is_unsigned<T>::value, "must be unsigned arithmetic type"); + return ::NBitOps::NPrivate::RotateBitsLeftImpl((TFixedWidthUnsignedInt<T>)value, shift); +} + +/* Rotate bits right. Also known as right circular shift. + */ +template <typename T> +Y_FORCE_INLINE T RotateBitsRight(T value, const ui8 shift) noexcept { + static_assert(std::is_unsigned<T>::value, "must be unsigned arithmetic type"); + return ::NBitOps::NPrivate::RotateBitsRightImpl((TFixedWidthUnsignedInt<T>)value, shift); +} + +/* Rotate bits left. Also known as left circular shift. + */ +template <typename T> +constexpr T RotateBitsLeftCT(T value, const ui8 shift) noexcept { + static_assert(std::is_unsigned<T>::value, "must be unsigned arithmetic type"); + + // do trick with mask to avoid undefined behaviour + return (value << shift) | (value >> ((-shift) & (sizeof(T) * 8 - 1))); +} + +/* Rotate bits right. Also known as right circular shift. + */ +template <typename T> +constexpr T RotateBitsRightCT(T value, const ui8 shift) noexcept { + static_assert(std::is_unsigned<T>::value, "must be unsigned arithmetic type"); + + // do trick with mask to avoid undefined behaviour + return (value >> shift) | (value << ((-shift) & (sizeof(T) * 8 - 1))); +} + +/* Remain `size` bits to current `offset` of `value` + size, offset are less than number of bits in size_type + */ +template <size_t Offset, size_t Size, class T> +Y_FORCE_INLINE T SelectBits(T value) { + static_assert(Size < sizeof(T) * 8, "violated: Size < sizeof(T) * 8"); + static_assert(Offset < sizeof(T) * 8, "violated: Offset < sizeof(T) * 8"); + T id = 1; + return (value >> Offset) & ((id << Size) - id); +} + +/* Set `size` bits of `bits` to current offset of `value`. Requires that bits <= (1 << size) - 1 + size, offset are less than number of bits in size_type + */ +template <size_t Offset, size_t Size, class T> +void SetBits(T& value, T bits) { + static_assert(Size < sizeof(T) * 8, "violated: Size < sizeof(T) * 8"); + static_assert(Offset < sizeof(T) * 8, "violated: Offset < sizeof(T) * 8"); + T id = 1; + T maxValue = ((id << Size) - id); + Y_ASSERT(bits <= maxValue); + value &= ~(maxValue << Offset); + value |= bits << Offset; +} + +inline constexpr ui64 NthBit64(int bit) { + return ui64(1) << bit; +} + +inline constexpr ui64 Mask64(int bits) { + return NthBit64(bits) - 1; +} |