contrib/clickhouse/src/Formats/MarkInCompressedFile.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120

#pragma once

#include <tuple>

#include <IO/WriteHelpers.h>
#include <base/types.h>
#include <Common/PODArray.h>


namespace DB
{

/** Mark is the position in the compressed file. The compressed file consists of adjacent compressed blocks.
  * Mark is a tuple - the offset in the file to the start of the compressed block, the offset in the decompressed block to the start of the data.
  */
struct MarkInCompressedFile
{
    size_t offset_in_compressed_file;
    size_t offset_in_decompressed_block;

    bool operator==(const MarkInCompressedFile & rhs) const
    {
        return std::tie(offset_in_compressed_file, offset_in_decompressed_block)
            == std::tie(rhs.offset_in_compressed_file, rhs.offset_in_decompressed_block);
    }
    bool operator!=(const MarkInCompressedFile & rhs) const { return !(*this == rhs); }

    auto asTuple() const { return std::make_tuple(offset_in_compressed_file, offset_in_decompressed_block); }

    String toString() const
    {
        return "(" + DB::toString(offset_in_compressed_file) + "," + DB::toString(offset_in_decompressed_block) + ")";
    }

    String toStringWithRows(size_t rows_num) const
    {
        return "(" + DB::toString(offset_in_compressed_file) + "," + DB::toString(offset_in_decompressed_block) + ","
            + DB::toString(rows_num) + ")";
    }
};

/**
 * In-memory representation of an array of marks.
 *
 * Uses an ad-hoc compression scheme that decreases memory usage while allowing
 * random access in O(1) time.
 * This is independent from the marks *file* format, which may be uncompressed
 * or use a different compression method.
 *
 * Typical memory usage:
 *  * ~3 bytes/mark for integer columns
 *  * ~5 bytes/mark for string columns
 *  * ~0.3 bytes/mark for trivial marks in auxiliary dict files of LowCardinality columns
 */
class MarksInCompressedFile
{
public:
    using PlainArray = PODArray<MarkInCompressedFile>;

    MarksInCompressedFile(const PlainArray & marks);

    MarkInCompressedFile get(size_t idx) const;

    size_t approximateMemoryUsage() const;

private:
    /** Throughout this class:
     *   * "x" stands for offset_in_compressed_file,
     *   * "y" stands for offset_in_decompressed_block.
     */

    /** We need to store a sequence of marks, each consisting of two 64-bit integers:
     * offset_in_compressed_file and offset_in_decompressed_block. We'll call them x and y for
     * convenience, since compression doesn't care what they mean. The compression exploits the
     * following regularities:
     *  * y is usually zero.
     *  * x usually increases steadily.
     *  * Differences between x values in nearby marks usually fit in much fewer than 64 bits.
     *
     * We split the sequence of marks into blocks, each containing MARKS_PER_BLOCK marks.
     * (Not to be confused with data blocks.)
     * For each mark, we store the difference [value] - [min value in the block], for each of the
     * two values in the mark. Each block specifies the number of bits to use for these differences
     * for all marks in this block.
     * The smaller the blocks the fewer bits are required, but the bigger the relative overhead of
     * block headers.
     *
     * Packed marks and block headers all live in one contiguous array.
     */

    struct BlockInfo
    {
        // Min offset_in_compressed_file and offset_in_decompressed_block, correspondingly.
        size_t min_x = UINT64_MAX;
        size_t min_y = UINT64_MAX;

        // Place in `packed` where this block start.
        size_t bit_offset_in_packed_array;

        // How many bits each mark takes. These numbers are bit-packed in the `packed` array.
        // Can be zero. (Especially for y, which is typically all zeroes.)
        UInt8 bits_for_x;
        UInt8 bits_for_y;
        // The `y` values should be <<'ed by this amount.
        // Useful for integer columns when marks granularity is a power of 2; in this case all
        // offset_in_decompressed_block values are divisible by 2^15 or so.
        UInt8 trailing_zero_bits_in_y = 63;
    };

    static constexpr size_t MARKS_PER_BLOCK = 256;

    size_t num_marks;
    PODArray<BlockInfo> blocks;
    PODArray<UInt64> packed;

    // Mark idx -> {block info, bit offset in `packed`}.
    std::tuple<const BlockInfo *, size_t> lookUpMark(size_t idx) const;
};

}