diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/pop_count | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/pop_count')
-rw-r--r-- | library/cpp/pop_count/benchmark/main.cpp | 52 | ||||
-rw-r--r-- | library/cpp/pop_count/benchmark/ya.make | 14 | ||||
-rw-r--r-- | library/cpp/pop_count/popcount.cpp | 30 | ||||
-rw-r--r-- | library/cpp/pop_count/popcount.h | 105 | ||||
-rw-r--r-- | library/cpp/pop_count/popcount_ut.cpp | 70 | ||||
-rw-r--r-- | library/cpp/pop_count/ut/ya.make | 9 | ||||
-rw-r--r-- | library/cpp/pop_count/ya.make | 9 |
7 files changed, 289 insertions, 0 deletions
diff --git a/library/cpp/pop_count/benchmark/main.cpp b/library/cpp/pop_count/benchmark/main.cpp new file mode 100644 index 0000000000..41ea3c91cc --- /dev/null +++ b/library/cpp/pop_count/benchmark/main.cpp @@ -0,0 +1,52 @@ +#include <util/stream/output.h> +#include <util/datetime/cputimer.h> +#include <util/system/type_name.h> + +#include <library/cpp/pop_count/popcount.h> +#include <library/cpp/testing/benchmark/bench.h> + +template <class F, class I> +inline void DoRun(F&& f, I&& i) { + const ui64 n = i.Iterations(); + + for (ui64 j = 0; j < n; ++j) { + Y_DO_NOT_OPTIMIZE_AWAY(f(j * (ui64)123456 + (ui64)1)); + } +} + +Y_CPU_BENCHMARK(PopCount_8, iface) { + DoRun([](ui8 x) { + return PopCount<ui8>(x); + }, + iface); +} + +Y_CPU_BENCHMARK(PopCount_16, iface) { + DoRun([](ui16 x) { + return PopCount<ui16>(x); + }, + iface); +} + +Y_CPU_BENCHMARK(PopCount_32, iface) { + DoRun([](ui32 x) { + return PopCount<ui32>(x); + }, + iface); +} + +Y_CPU_BENCHMARK(PopCount_64, iface) { + DoRun([](ui64 x) { + return PopCount<ui64>(x); + }, + iface); +} + +#if !defined(_MSC_VER) +Y_CPU_BENCHMARK(BUILTIN_64, iface) { + DoRun([](ui64 x) { + return __builtin_popcountll(x); + }, + iface); +} +#endif diff --git a/library/cpp/pop_count/benchmark/ya.make b/library/cpp/pop_count/benchmark/ya.make new file mode 100644 index 0000000000..7fb54a519a --- /dev/null +++ b/library/cpp/pop_count/benchmark/ya.make @@ -0,0 +1,14 @@ +OWNER(g:util) + +Y_BENCHMARK() + +PEERDIR( + util/draft + library/cpp/pop_count +) + +SRCS( + main.cpp +) + +END() diff --git a/library/cpp/pop_count/popcount.cpp b/library/cpp/pop_count/popcount.cpp new file mode 100644 index 0000000000..49276424be --- /dev/null +++ b/library/cpp/pop_count/popcount.cpp @@ -0,0 +1,30 @@ +#include "popcount.h" + +#include <util/system/defaults.h> +#include <util/system/yassert.h> + +#include <string.h> + +static const ui8 PopCountLUT8Impl[1 << 8] = { +#define B2(n) n, n + 1, n + 1, n + 2 +#define B4(n) B2(n), B2(n + 1), B2(n + 1), B2(n + 2) +#define B6(n) B4(n), B4(n + 1), B4(n + 1), B4(n + 2) + B6(0), B6(1), B6(1), B6(2)}; + +ui8 const* PopCountLUT8 = PopCountLUT8Impl; + +#if !defined(_MSC_VER) +//ICE here for msvc + +static const ui8 PopCountLUT16Impl[1 << 16] = { +#define B2(n) n, n + 1, n + 1, n + 2 +#define B4(n) B2(n), B2(n + 1), B2(n + 1), B2(n + 2) +#define B6(n) B4(n), B4(n + 1), B4(n + 1), B4(n + 2) +#define B8(n) B6(n), B6(n + 1), B6(n + 1), B6(n + 2) +#define B10(n) B8(n), B8(n + 1), B8(n + 1), B8(n + 2) +#define B12(n) B10(n), B10(n + 1), B10(n + 1), B10(n + 2) +#define B14(n) B12(n), B12(n + 1), B12(n + 1), B12(n + 2) + B14(0), B14(1), B14(1), B14(2)}; + +ui8 const* PopCountLUT16 = PopCountLUT16Impl; +#endif diff --git a/library/cpp/pop_count/popcount.h b/library/cpp/pop_count/popcount.h new file mode 100644 index 0000000000..3d67737ed2 --- /dev/null +++ b/library/cpp/pop_count/popcount.h @@ -0,0 +1,105 @@ +#pragma once + +#include <util/generic/typelist.h> +#include <util/system/cpu_id.h> +#include <util/system/defaults.h> +#include <util/system/hi_lo.h> +#include <util/system/platform.h> + +#if defined(_MSC_VER) +#include <intrin.h> +#endif + +static inline ui32 PopCountImpl(ui8 n) { +#if defined(_ppc64_) + ui32 r; + __asm__("popcntb %0, %1" + : "=r"(r) + : "r"(n) + :); + return r; +#else + extern ui8 const* PopCountLUT8; + return PopCountLUT8[n]; +#endif +} + +static inline ui32 PopCountImpl(ui16 n) { +#if defined(_MSC_VER) + return __popcnt16(n); +#else + extern ui8 const* PopCountLUT16; + return PopCountLUT16[n]; +#endif +} + +static inline ui32 PopCountImpl(ui32 n) { +#if defined(_MSC_VER) + return __popcnt(n); +#else +#if defined(_x86_64_) + if (NX86::CachedHavePOPCNT()) { + ui32 r; + + __asm__("popcnt %1, %0;" + : "=r"(r) + : "r"(n) + :); + + return r; + } +#else +#if defined(_ppc64_) + ui32 r; + + __asm__("popcntw %0, %1" + : "=r"(r) + : "r"(n) + :); + + return r; +#endif +#endif + + return PopCountImpl((ui16)Lo16(n)) + PopCountImpl((ui16)Hi16(n)); +#endif +} + +static inline ui32 PopCountImpl(ui64 n) { +#if defined(_MSC_VER) && !defined(_i386_) + return __popcnt64(n); +#else +#if defined(_x86_64_) + if (NX86::CachedHavePOPCNT()) { + ui64 r; + + __asm__("popcnt %1, %0;" + : "=r"(r) + : "r"(n) + :); + + return r; + } +#else +#if defined(_ppc64_) + ui32 r; + + __asm__("popcntd %0, %1" + : "=r"(r) + : "r"(n) + :); + + return r; +#endif +#endif + + return PopCountImpl((ui32)Lo32(n)) + PopCountImpl((ui32)Hi32(n)); +#endif +} + +template <class T> +static inline ui32 PopCount(T n) { + using TCvt = TFixedWidthUnsignedInt<T>; + + return PopCountImpl((TCvt)n); +} diff --git a/library/cpp/pop_count/popcount_ut.cpp b/library/cpp/pop_count/popcount_ut.cpp new file mode 100644 index 0000000000..5cd6605411 --- /dev/null +++ b/library/cpp/pop_count/popcount_ut.cpp @@ -0,0 +1,70 @@ +#include "popcount.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/random/random.h> + +Y_UNIT_TEST_SUITE(TestPopCount) { + template <class T> + static inline ui32 SlowPopCount(T t) { + ui32 ret = 0; + + while (t) { + if (t & T(1)) { + ++ret; + } + + t = t >> 1; + } + + return ret; + } + + template <class T> + static inline void Test() { + for (size_t i = 0; i < 10000; ++i) { + const T rndv = RandomNumber<T>(); + + UNIT_ASSERT_VALUES_EQUAL(SlowPopCount(rndv), PopCount(rndv)); + } + } + + Y_UNIT_TEST(Test8) { + Test<ui8>(); + } + + Y_UNIT_TEST(Test16) { + Test<ui16>(); + } + + Y_UNIT_TEST(Test32) { + Test<ui32>(); + } + + Y_UNIT_TEST(Test64) { + Test<ui64>(); + } + + Y_UNIT_TEST(TestPopCount) { + UNIT_ASSERT_VALUES_EQUAL(PopCount(0), 0); + UNIT_ASSERT_VALUES_EQUAL(PopCount(1), 1); + UNIT_ASSERT_VALUES_EQUAL(PopCount(1 << 10), 1); + UNIT_ASSERT_VALUES_EQUAL(PopCount((1 << 10) + 1), 2); + UNIT_ASSERT_VALUES_EQUAL(PopCount(0xFFFF), 16); + UNIT_ASSERT_VALUES_EQUAL(PopCount(0xFFFFFFFF), 32); + UNIT_ASSERT_VALUES_EQUAL(PopCount(0x55555555), 16); + + UNIT_ASSERT_VALUES_EQUAL(0, PopCount(0ULL)); + UNIT_ASSERT_VALUES_EQUAL(1, PopCount(1ULL)); + UNIT_ASSERT_VALUES_EQUAL(16, PopCount(0xAAAAAAAAULL)); + UNIT_ASSERT_VALUES_EQUAL(32, PopCount(0xFFFFFFFFULL)); + UNIT_ASSERT_VALUES_EQUAL(32, PopCount(0xAAAAAAAAAAAAAAAAULL)); + UNIT_ASSERT_VALUES_EQUAL(64, PopCount(0xFFFFFFFFFFFFFFFFULL)); + + ui64 v = 0; + + for (int i = 0; i < 64; v |= 1ULL << i, ++i) { + UNIT_ASSERT_VALUES_EQUAL(i, PopCount(v)); + } + } +} diff --git a/library/cpp/pop_count/ut/ya.make b/library/cpp/pop_count/ut/ya.make new file mode 100644 index 0000000000..f0e6c014e5 --- /dev/null +++ b/library/cpp/pop_count/ut/ya.make @@ -0,0 +1,9 @@ +UNITTEST_FOR(library/cpp/pop_count) + +OWNER(g:util) + +SRCS( + popcount_ut.cpp +) + +END() diff --git a/library/cpp/pop_count/ya.make b/library/cpp/pop_count/ya.make new file mode 100644 index 0000000000..0dec238979 --- /dev/null +++ b/library/cpp/pop_count/ya.make @@ -0,0 +1,9 @@ +LIBRARY() + +OWNER(g:util) + +SRCS( + popcount.cpp +) + +END() |