aboutsummaryrefslogtreecommitdiffstats
path: root/yql/essentials/public/udf/arrow/bit_util.h
blob: 4dbe785aa75e839fdbbbc9305f5a6855581de2f9 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#pragma once

#include "defs.h"

namespace NYql {
namespace NUdf {

inline ui8* CompressAsSparseBitmap(const ui8* src, size_t srcOffset, const ui8* sparseBitmap, ui8* dst, size_t count) {
    while (count--) {
        ui8 inputBit = (src[srcOffset >> 3] >> (srcOffset & 7)) & 1;
        *dst = inputBit;
        ++srcOffset;
        dst += *sparseBitmap++;
    }
    return dst;
}

inline ui8* DecompressToSparseBitmap(ui8* dstSparse, const ui8* src, size_t srcOffset, size_t count) {
    if (srcOffset != 0) {
        size_t offsetBytes = srcOffset >> 3;
        size_t offsetTail = srcOffset & 7;
        src += offsetBytes;
        if (offsetTail != 0) {
            for (ui8 i = offsetTail; count > 0 && i < 8; i++, count--) {
                *dstSparse++ = (*src >> i) & 1u;
            }
            src++;
        }
    }
    while (count >= 8) {
        ui8 slot = *src++;
        *dstSparse++ = (slot >> 0) & 1u;
        *dstSparse++ = (slot >> 1) & 1u;
        *dstSparse++ = (slot >> 2) & 1u;
        *dstSparse++ = (slot >> 3) & 1u;
        *dstSparse++ = (slot >> 4) & 1u;
        *dstSparse++ = (slot >> 5) & 1u;
        *dstSparse++ = (slot >> 6) & 1u;
        *dstSparse++ = (slot >> 7) & 1u;
        count -= 8;
    }
    for (ui8 i = 0; i < count; i++) {
        *dstSparse++ = (*src >> i) & 1u;
    }
    return dstSparse;
}

template<bool Negate>
inline void CompressSparseImpl(ui8* dst, const ui8* srcSparse, size_t len) {
    while (len >= 8) {
        ui8 result = 0;
        result |= (*srcSparse++ & 1u) << 0;
        result |= (*srcSparse++ & 1u) << 1;
        result |= (*srcSparse++ & 1u) << 2;
        result |= (*srcSparse++ & 1u) << 3;
        result |= (*srcSparse++ & 1u) << 4;
        result |= (*srcSparse++ & 1u) << 5;
        result |= (*srcSparse++ & 1u) << 6;
        result |= (*srcSparse++ & 1u) << 7;
        if constexpr (Negate) {
            *dst++ = ~result;
        } else {
            *dst++ = result;
        }
        len -= 8;
    }
    if (len) {
        ui8 result = 0;
        for (ui8 i = 0; i < len; ++i) {
            result |= (*srcSparse++ & 1u) << i;
        }
        if constexpr (Negate) {
            *dst++ = ~result;
        } else {
            *dst++ = result;
        }
    }
}

inline void CompressSparseBitmap(ui8* dst, const ui8* srcSparse, size_t len) {
    return CompressSparseImpl<false>(dst, srcSparse, len);
}

inline void CompressSparseBitmapNegate(ui8* dst, const ui8* srcSparse, size_t len) {
    return CompressSparseImpl<true>(dst, srcSparse, len);
}

template<typename T>
inline T* CompressArray(const T* src, const ui8* sparseBitmap, T* dst, size_t count) {
    while (count--) {
        *dst = *src++;
        dst += *sparseBitmap++;
    }
    return dst;
}

inline void CopyDenseBitmap(ui8* dst, const ui8* src, size_t srcOffset, size_t len) {
    if ((srcOffset & 7) != 0) {
        size_t offsetBytes = srcOffset >> 3;
        src += offsetBytes;

        ui8 offsetTail = srcOffset & 7;
        ui8 offsetHead = 8 - offsetTail;

        ui8 remainder = *src++ >> offsetTail;
        size_t dstOffset = offsetHead;
        for (; dstOffset < len; dstOffset += 8) {
            *dst++ = remainder | (*src << offsetHead);
            remainder = *src >> offsetTail;
            src++;
        }
        // dst is guaranteed to have extra length even if it's not needed
        *dst++ = remainder;
    } else {
        src += srcOffset >> 3;
        // Round up to 8
        len = (len + 7u) & ~size_t(7u);
        memcpy(dst, src, len >> 3);
    }
}

}
}