blob: 3882373c0fa92335b2e81a85cbc3cbbcffff53ec (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
|
#pragma once
#include <Storages/MergeTree/MergeTreeDataWriter.h>
namespace DB
{
struct SyncInsertBlockInfo
{
SyncInsertBlockInfo(
Poco::Logger * /*log_*/,
std::string && block_id_,
BlockWithPartition && /*block_*/,
std::optional<BlockWithPartition> && /*unmerged_block_with_partition_*/)
: block_id(std::move(block_id_))
{
}
explicit SyncInsertBlockInfo(std::string block_id_)
: block_id(std::move(block_id_))
{}
std::string block_id;
};
struct AsyncInsertBlockInfo
{
Poco::Logger * log;
std::vector<std::string> block_id;
BlockWithPartition block_with_partition;
/// Some merging algorithms can mofidy the block which loses the information about the async insert offsets
/// when preprocessing or filtering data for asnyc inserts deduplication we want to use the initial, unmerged block
std::optional<BlockWithPartition> unmerged_block_with_partition;
std::unordered_map<String, std::vector<size_t>> block_id_to_offset_idx;
AsyncInsertBlockInfo(
Poco::Logger * log_,
std::vector<std::string> && block_id_,
BlockWithPartition && block_,
std::optional<BlockWithPartition> && unmerged_block_with_partition_);
void initBlockIDMap();
/// this function check if the block contains duplicate inserts.
/// if so, we keep only one insert for every duplicate ones.
bool filterSelfDuplicate();
/// remove the conflict parts of block for rewriting again.
void filterBlockDuplicate(const std::vector<String> & block_paths, bool self_dedup);
/// Convert block id vector to string. Output at most 50 ids.
static std::vector<String> getHashesForBlocks(BlockWithPartition & block, String partition_id);
};
}
|