blob: c398f3fae70625c7ebc89501bf0435c40cb46a96 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
#pragma once
#include <util/generic/typelist.h>
#include <util/system/cpu_id.h>
#include <util/system/defaults.h>
#include <util/system/hi_lo.h>
#include <util/system/platform.h>
#if defined(_MSC_VER)
#include <intrin.h>
#endif
static inline ui32 PopCountImpl(ui8 n) {
#if defined(_ppc64_)
ui32 r;
__asm__("popcntb %0, %1"
: "=r"(r)
: "r"(n)
:);
return r;
#else
extern ui8 const* PopCountLUT8;
return PopCountLUT8[n];
#endif
}
static inline ui32 PopCountImpl(ui16 n) {
#if defined(_MSC_VER)
return __popcnt16(n);
#else
extern ui8 const* PopCountLUT16;
return PopCountLUT16[n];
#endif
}
static inline ui32 PopCountImpl(ui32 n) {
#if defined(_MSC_VER)
return __popcnt(n);
#else
#if defined(_x86_64_)
if (NX86::CachedHavePOPCNT()) {
ui32 r;
__asm__("popcnt %1, %0;"
: "=r"(r)
: "r"(n)
:);
return r;
}
#else
#if defined(_ppc64_)
ui32 r;
__asm__("popcntw %0, %1"
: "=r"(r)
: "r"(n)
:);
return r;
#endif
#endif
return PopCountImpl((ui16)Lo16(n)) + PopCountImpl((ui16)Hi16(n));
#endif
}
static inline ui32 PopCountImpl(ui64 n) {
#if defined(_MSC_VER) && !defined(_i386_)
return __popcnt64(n);
#else
#if defined(_x86_64_)
if (NX86::CachedHavePOPCNT()) {
ui64 r;
__asm__("popcnt %1, %0;"
: "=r"(r)
: "r"(n)
:);
return r;
}
#else
#if defined(_ppc64_)
ui32 r;
__asm__("popcntd %0, %1"
: "=r"(r)
: "r"(n)
:);
return r;
#endif
#endif
return PopCountImpl((ui32)Lo32(n)) + PopCountImpl((ui32)Hi32(n));
#endif
}
template <class T>
static inline ui32 PopCount(T n) {
using TCvt = TFixedWidthUnsignedInt<T>;
return PopCountImpl((TCvt)n);
}
|