contrib/clickhouse/src/Storages/MergeTree/InsertBlockInfo.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55

#pragma once

#include <Storages/MergeTree/MergeTreeDataWriter.h>

namespace DB
{

struct SyncInsertBlockInfo
{
    SyncInsertBlockInfo(
        Poco::Logger * /*log_*/,
        std::string && block_id_,
        BlockWithPartition && /*block_*/,
        std::optional<BlockWithPartition> && /*unmerged_block_with_partition_*/)
        : block_id(std::move(block_id_))
    {
    }

    explicit SyncInsertBlockInfo(std::string block_id_)
        : block_id(std::move(block_id_))
    {}

    std::string block_id;
};

struct AsyncInsertBlockInfo
{
    Poco::Logger * log;
    std::vector<std::string> block_id;
    BlockWithPartition block_with_partition;
    /// Some merging algorithms can mofidy the block which loses the information about the async insert offsets
    /// when preprocessing or filtering data for asnyc inserts deduplication we want to use the initial, unmerged block
    std::optional<BlockWithPartition> unmerged_block_with_partition;
    std::unordered_map<String, std::vector<size_t>> block_id_to_offset_idx;

    AsyncInsertBlockInfo(
        Poco::Logger * log_,
        std::vector<std::string> && block_id_,
        BlockWithPartition && block_,
        std::optional<BlockWithPartition> && unmerged_block_with_partition_);

    void initBlockIDMap();

    /// this function check if the block contains duplicate inserts.
    /// if so, we keep only one insert for every duplicate ones.
    bool filterSelfDuplicate();

    /// remove the conflict parts of block for rewriting again.
    void filterBlockDuplicate(const std::vector<String> & block_paths, bool self_dedup);
        /// Convert block id vector to string. Output at most 50 ids.

    static std::vector<String> getHashesForBlocks(BlockWithPartition & block, String partition_id);
};

}