1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
|
#pragma once
#include <Core/BlockInfo.h>
#include <Core/ColumnWithTypeAndName.h>
#include <Core/ColumnsWithTypeAndName.h>
#include <Core/NamesAndTypes.h>
#include <initializer_list>
#include <list>
#include <set>
#include <vector>
#include <sparsehash/dense_hash_map>
namespace DB
{
/** Container for set of columns for bunch of rows in memory.
* This is unit of data processing.
* Also contains metadata - data types of columns and their names
* (either original names from a table, or generated names during temporary calculations).
* Allows to insert, remove columns in arbitrary position, to change order of columns.
*/
class Block
{
private:
using Container = ColumnsWithTypeAndName;
using IndexByName = std::unordered_map<String, size_t>;
Container data;
IndexByName index_by_name;
public:
BlockInfo info;
Block() = default;
Block(std::initializer_list<ColumnWithTypeAndName> il);
Block(const ColumnsWithTypeAndName & data_); /// NOLINT
Block(ColumnsWithTypeAndName && data_); /// NOLINT
/// insert the column at the specified position
void insert(size_t position, ColumnWithTypeAndName elem);
/// insert the column to the end
void insert(ColumnWithTypeAndName elem);
/// insert the column to the end, if there is no column with that name yet
void insertUnique(ColumnWithTypeAndName elem);
/// remove the column at the specified position
void erase(size_t position);
/// remove the columns at the specified positions
void erase(const std::set<size_t> & positions);
/// remove the column with the specified name
void erase(const String & name);
/// References are invalidated after calling functions above.
ColumnWithTypeAndName & getByPosition(size_t position) { return data[position]; }
const ColumnWithTypeAndName & getByPosition(size_t position) const { return data[position]; }
ColumnWithTypeAndName & safeGetByPosition(size_t position);
const ColumnWithTypeAndName & safeGetByPosition(size_t position) const;
ColumnWithTypeAndName* findByName(const std::string & name, bool case_insensitive = false)
{
return const_cast<ColumnWithTypeAndName *>(
const_cast<const Block *>(this)->findByName(name, case_insensitive));
}
const ColumnWithTypeAndName * findByName(const std::string & name, bool case_insensitive = false) const;
ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false)
{
return const_cast<ColumnWithTypeAndName &>(
const_cast<const Block *>(this)->getByName(name, case_insensitive));
}
const ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false) const;
Container::iterator begin() { return data.begin(); }
Container::iterator end() { return data.end(); }
Container::const_iterator begin() const { return data.begin(); }
Container::const_iterator end() const { return data.end(); }
Container::const_iterator cbegin() const { return data.cbegin(); }
Container::const_iterator cend() const { return data.cend(); }
bool has(const std::string & name, bool case_insensitive = false) const;
size_t getPositionByName(const std::string & name) const;
const ColumnsWithTypeAndName & getColumnsWithTypeAndName() const;
NamesAndTypesList getNamesAndTypesList() const;
NamesAndTypes getNamesAndTypes() const;
Names getNames() const;
DataTypes getDataTypes() const;
Names getDataTypeNames() const;
/// Hash table match `column name -> position in the block`.
using NameMap = ::google::dense_hash_map<StringRef, size_t, StringRefHash>;
NameMap getNamesToIndexesMap() const;
Serializations getSerializations() const;
/// Returns number of rows from first column in block, not equal to nullptr. If no columns, returns 0.
size_t rows() const;
size_t columns() const { return data.size(); }
/// Checks that every column in block is not nullptr and has same number of elements.
void checkNumberOfRows(bool allow_null_columns = false) const;
/// Approximate number of bytes in memory - for profiling and limits.
size_t bytes() const;
/// Approximate number of allocated bytes in memory - for profiling and limits.
size_t allocatedBytes() const;
explicit operator bool() const { return !!columns(); }
bool operator!() const { return !this->operator bool(); } /// NOLINT
/** Get a list of column names separated by commas. */
std::string dumpNames() const;
/** List of names, types and lengths of columns. Designed for debugging. */
std::string dumpStructure() const;
/** List of column names and positions from index */
std::string dumpIndex() const;
/** Get the same block, but empty. */
Block cloneEmpty() const;
Columns getColumns() const;
void setColumns(const Columns & columns);
void setColumn(size_t position, ColumnWithTypeAndName column);
Block cloneWithColumns(const Columns & columns) const;
Block cloneWithoutColumns() const;
Block cloneWithCutColumns(size_t start, size_t length) const;
/** Get empty columns with the same types as in block. */
MutableColumns cloneEmptyColumns() const;
/** Get columns from block for mutation. Columns in block will be nullptr. */
MutableColumns mutateColumns();
/** Replace columns in a block */
void setColumns(MutableColumns && columns);
Block cloneWithColumns(MutableColumns && columns) const;
/** Get a block with columns that have been rearranged in the order of their names. */
Block sortColumns() const;
void clear();
void swap(Block & other) noexcept;
/** Updates SipHash of the Block, using update method of columns.
* Returns hash for block, that could be used to differentiate blocks
* with same structure, but different data.
*/
void updateHash(SipHash & hash) const;
private:
void eraseImpl(size_t position);
void initializeIndexByName();
void reserve(size_t count);
/// This is needed to allow function execution over data.
/// It is safe because functions does not change column names, so index is unaffected.
/// It is temporary.
friend class ExpressionActions;
friend class ActionsDAG;
};
using BlockPtr = std::shared_ptr<Block>;
using Blocks = std::vector<Block>;
using BlocksList = std::list<Block>;
using BlocksPtr = std::shared_ptr<Blocks>;
using BlocksPtrs = std::shared_ptr<std::vector<BlocksPtr>>;
/// Extends block with extra data in derived classes
struct ExtraBlock
{
Block block;
bool empty() const { return !block; }
};
using ExtraBlockPtr = std::shared_ptr<ExtraBlock>;
/// Compare number of columns, data types, column types, column names, and values of constant columns.
bool blocksHaveEqualStructure(const Block & lhs, const Block & rhs);
/// Throw exception when blocks are different.
void assertBlocksHaveEqualStructure(const Block & lhs, const Block & rhs, std::string_view context_description);
/// Actual header is compatible to desired if block have equal structure except constants.
/// It is allowed when column from actual header is constant, but in desired is not.
/// If both columns are constant, it is checked that they have the same value.
bool isCompatibleHeader(const Block & actual, const Block & desired);
void assertCompatibleHeader(const Block & actual, const Block & desired, std::string_view context_description);
/// Calculate difference in structure of blocks and write description into output strings. NOTE It doesn't compare values of constant columns.
void getBlocksDifference(const Block & lhs, const Block & rhs, std::string & out_lhs_diff, std::string & out_rhs_diff);
void convertToFullIfSparse(Block & block);
/// Converts columns-constants to full columns ("materializes" them).
Block materializeBlock(const Block & block);
void materializeBlockInplace(Block & block);
Block concatenateBlocks(const std::vector<Block> & blocks);
}
|