1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
|
#pragma once
#include <Core/Types.h>
#include <base/StringRef.h>
#include <Storages/MergeTree/MergeTreePartInfo.h>
#include <Disks/IDisk.h>
#include <map>
#include <list>
#include <mutex>
#include <string>
#include <unordered_map>
namespace DB
{
/// Description of dedupliction log
struct MergeTreeDeduplicationLogNameDescription
{
/// Path to log
std::string path;
/// How many entries we have in log
size_t entries_count;
};
/// Simple string-key HashTable with fixed size based on STL containers.
/// Preserves order using linked list and remove elements
/// on overflow in FIFO order.
template <typename V>
class LimitedOrderedHashMap
{
private:
struct ListNode
{
std::string key;
V value;
};
using Queue = std::list<ListNode>;
using IndexMap = std::unordered_map<StringRef, typename Queue::iterator, StringRefHash>;
Queue queue;
IndexMap map;
size_t max_size;
public:
using iterator = typename Queue::iterator;
using const_iterator = typename Queue::const_iterator;
using reverse_iterator = typename Queue::reverse_iterator;
using const_reverse_iterator = typename Queue::const_reverse_iterator;
explicit LimitedOrderedHashMap(size_t max_size_)
: max_size(max_size_)
{}
bool contains(const std::string & key) const
{
return map.find(key) != map.end();
}
V get(const std::string & key) const
{
return map.at(key)->value;
}
size_t size() const
{
return queue.size();
}
void setMaxSize(size_t max_size_)
{
max_size = max_size_;
while (size() > max_size)
{
map.erase(queue.front().key);
queue.pop_front();
}
}
bool erase(const std::string & key)
{
auto it = map.find(key);
if (it == map.end())
return false;
auto queue_itr = it->second;
map.erase(it);
queue.erase(queue_itr);
return true;
}
bool insert(const std::string & key, const V & value)
{
auto it = map.find(key);
if (it != map.end())
return false;
if (size() == max_size)
{
map.erase(queue.front().key);
queue.pop_front();
}
ListNode elem{key, value};
auto itr = queue.insert(queue.end(), elem);
map.emplace(itr->key, itr);
return true;
}
void clear()
{
map.clear();
queue.clear();
}
iterator begin() { return queue.begin(); }
const_iterator begin() const { return queue.cbegin(); }
iterator end() { return queue.end(); }
const_iterator end() const { return queue.cend(); }
reverse_iterator rbegin() { return queue.rbegin(); }
const_reverse_iterator rbegin() const { return queue.crbegin(); }
reverse_iterator rend() { return queue.rend(); }
const_reverse_iterator rend() const { return queue.crend(); }
};
/// Fixed-size log for deduplication in non-replicated MergeTree.
/// Stores records on disk for zero-level parts in human-readable format:
/// operation part_name partition_id_check_sum
/// 1 88_18_18_0 88_10619499460461868496_9553701830997749308
/// 2 77_14_14_0 77_15147918179036854170_6725063583757244937
/// 2 77_15_15_0 77_14977227047908934259_8047656067364802772
/// 1 77_20_20_0 77_15147918179036854170_6725063583757244937
/// Also stores them in memory in hash table with limited size.
class MergeTreeDeduplicationLog
{
public:
MergeTreeDeduplicationLog(
const std::string & logs_dir_,
size_t deduplication_window_,
const MergeTreeDataFormatVersion & format_version_,
DiskPtr disk_);
/// Add part into in-memory hash table and to disk
/// Return true and part info if insertion was successful.
/// Otherwise, in case of duplicate, return false and previous part name with same hash (useful for logging)
std::pair<MergeTreePartInfo, bool> addPart(const std::string & block_id, const MergeTreePartInfo & part);
/// Remove all covered parts from in memory table and add DROP records to the disk
void dropPart(const MergeTreePartInfo & drop_part_info);
/// Load history from disk. Ignores broken logs.
void load();
void setDeduplicationWindowSize(size_t deduplication_window_);
void shutdown();
~MergeTreeDeduplicationLog();
private:
const std::string logs_dir;
/// Size of deduplication window
size_t deduplication_window;
/// How often we create new logs. Not very important,
/// default value equals deduplication_window * 2
size_t rotate_interval;
const MergeTreeDataFormatVersion format_version;
/// Current log number. Always growing number.
size_t current_log_number = 0;
/// All existing logs in order of their numbers
std::map<size_t, MergeTreeDeduplicationLogNameDescription> existing_logs;
/// In memory hash-table
LimitedOrderedHashMap<MergeTreePartInfo> deduplication_map;
/// Writer to the current log file
std::unique_ptr<WriteBufferFromFileBase> current_writer;
/// Overall mutex because we can have a lot of cocurrent inserts
std::mutex state_mutex;
/// Disk where log is stored
DiskPtr disk;
bool stopped{false};
/// Start new log
void rotate();
/// Remove all old logs with non-needed records for deduplication_window
void dropOutdatedLogs();
/// Execute both previous methods if needed
void rotateAndDropIfNeeded();
/// Load single log from disk. In case of corruption throws exceptions
size_t loadSingleLog(const std::string & path);
};
}
|