1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
#pragma once
#include "defs.h"
namespace NYql {
namespace NUdf {
inline ui8* CompressAsSparseBitmap(const ui8* src, size_t srcOffset, const ui8* sparseBitmap, ui8* dst, size_t count) {
while (count--) {
ui8 inputBit = (src[srcOffset >> 3] >> (srcOffset & 7)) & 1;
*dst = inputBit;
++srcOffset;
dst += *sparseBitmap++;
}
return dst;
}
inline ui8* DecompressToSparseBitmap(ui8* dstSparse, const ui8* src, size_t srcOffset, size_t count) {
if (srcOffset != 0) {
size_t offsetBytes = srcOffset >> 3;
size_t offsetTail = srcOffset & 7;
src += offsetBytes;
if (offsetTail != 0) {
for (ui8 i = offsetTail; count > 0 && i < 8; i++, count--) {
*dstSparse++ = (*src >> i) & 1u;
}
src++;
}
}
while (count >= 8) {
ui8 slot = *src++;
*dstSparse++ = (slot >> 0) & 1u;
*dstSparse++ = (slot >> 1) & 1u;
*dstSparse++ = (slot >> 2) & 1u;
*dstSparse++ = (slot >> 3) & 1u;
*dstSparse++ = (slot >> 4) & 1u;
*dstSparse++ = (slot >> 5) & 1u;
*dstSparse++ = (slot >> 6) & 1u;
*dstSparse++ = (slot >> 7) & 1u;
count -= 8;
}
for (ui8 i = 0; i < count; i++) {
*dstSparse++ = (*src >> i) & 1u;
}
return dstSparse;
}
template<bool Negate>
inline void CompressSparseImpl(ui8* dst, const ui8* srcSparse, size_t len) {
while (len >= 8) {
ui8 result = 0;
result |= (*srcSparse++ & 1u) << 0;
result |= (*srcSparse++ & 1u) << 1;
result |= (*srcSparse++ & 1u) << 2;
result |= (*srcSparse++ & 1u) << 3;
result |= (*srcSparse++ & 1u) << 4;
result |= (*srcSparse++ & 1u) << 5;
result |= (*srcSparse++ & 1u) << 6;
result |= (*srcSparse++ & 1u) << 7;
if constexpr (Negate) {
*dst++ = ~result;
} else {
*dst++ = result;
}
len -= 8;
}
if (len) {
ui8 result = 0;
for (ui8 i = 0; i < len; ++i) {
result |= (*srcSparse++ & 1u) << i;
}
if constexpr (Negate) {
*dst++ = ~result;
} else {
*dst++ = result;
}
}
}
inline void CompressSparseBitmap(ui8* dst, const ui8* srcSparse, size_t len) {
return CompressSparseImpl<false>(dst, srcSparse, len);
}
inline void CompressSparseBitmapNegate(ui8* dst, const ui8* srcSparse, size_t len) {
return CompressSparseImpl<true>(dst, srcSparse, len);
}
template<typename T>
inline T* CompressArray(const T* src, const ui8* sparseBitmap, T* dst, size_t count) {
while (count--) {
*dst = *src++;
dst += *sparseBitmap++;
}
return dst;
}
inline void CopyDenseBitmap(ui8* dst, const ui8* src, size_t srcOffset, size_t len) {
if ((srcOffset & 7) != 0) {
size_t offsetBytes = srcOffset >> 3;
src += offsetBytes;
ui8 offsetTail = srcOffset & 7;
ui8 offsetHead = 8 - offsetTail;
ui8 remainder = *src++ >> offsetTail;
size_t dstOffset = offsetHead;
for (; dstOffset < len; dstOffset += 8) {
*dst++ = remainder | (*src << offsetHead);
remainder = *src >> offsetTail;
src++;
}
// dst is guaranteed to have extra length even if it's not needed
*dst++ = remainder;
} else {
src += srcOffset >> 3;
// Round up to 8
len = (len + 7u) & ~size_t(7u);
memcpy(dst, src, len >> 3);
}
}
}
}
|