1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
|
#include <Parsers/ASTIdentifier_fwd.h>
#include <Parsers/ASTInsertQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/CommonParsers.h>
#include <Parsers/ExpressionElementParsers.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/ParserSelectWithUnionQuery.h>
#include <Parsers/ParserWatchQuery.h>
#include <Parsers/ParserInsertQuery.h>
#include <Parsers/ParserSetQuery.h>
#include <Parsers/InsertQuerySettingsPushDownVisitor.h>
#include <Common/typeid_cast.h>
#include "Parsers/IAST_fwd.h"
namespace DB
{
namespace ErrorCodes
{
extern const int SYNTAX_ERROR;
}
bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
/// Create parsers
ParserKeyword s_insert_into("INSERT INTO");
ParserKeyword s_from_infile("FROM INFILE");
ParserKeyword s_compression("COMPRESSION");
ParserKeyword s_table("TABLE");
ParserKeyword s_function("FUNCTION");
ParserToken s_dot(TokenType::Dot);
ParserKeyword s_values("VALUES");
ParserKeyword s_format("FORMAT");
ParserKeyword s_settings("SETTINGS");
ParserKeyword s_select("SELECT");
ParserKeyword s_watch("WATCH");
ParserKeyword s_partition_by("PARTITION BY");
ParserKeyword s_with("WITH");
ParserToken s_lparen(TokenType::OpeningRoundBracket);
ParserToken s_rparen(TokenType::ClosingRoundBracket);
ParserToken s_semicolon(TokenType::Semicolon);
ParserIdentifier name_p(true);
ParserList columns_p(std::make_unique<ParserInsertElement>(), std::make_unique<ParserToken>(TokenType::Comma), false);
ParserFunction table_function_p{false};
ParserStringLiteral infile_name_p;
ParserExpressionWithOptionalAlias exp_elem_p(false);
/// create ASTPtr variables (result of parsing will be put in them).
/// They will be used to initialize ASTInsertQuery's fields.
ASTPtr database;
ASTPtr table;
ASTPtr infile;
ASTPtr columns;
ASTPtr format;
ASTPtr select;
ASTPtr watch;
ASTPtr table_function;
ASTPtr settings_ast;
ASTPtr partition_by_expr;
ASTPtr compression;
/// Insertion data
const char * data = nullptr;
/// Check for key words `INSERT INTO`. If it isn't found, the query can't be parsed as insert query.
if (!s_insert_into.ignore(pos, expected))
return false;
/// try to find 'TABLE'
s_table.ignore(pos, expected);
/// Search for 'FUNCTION'. If this key word is in query, read fields for insertion into 'TABLE FUNCTION'.
/// Word table is optional for table functions. (for example, s3 table function)
/// Otherwise fill 'TABLE' fields.
if (s_function.ignore(pos, expected))
{
/// Read function name
if (!table_function_p.parse(pos, table_function, expected))
return false;
/// Support insertion values with partition by.
if (s_partition_by.ignore(pos, expected))
{
if (!exp_elem_p.parse(pos, partition_by_expr, expected))
return false;
}
}
else
{
/// Read one word. It can be table or database name.
if (!name_p.parse(pos, table, expected))
return false;
/// If there is a dot, previous name was database name,
/// so read table name after dot.
if (s_dot.ignore(pos, expected))
{
database = table;
if (!name_p.parse(pos, table, expected))
return false;
}
}
/// Is there a list of columns
if (s_lparen.ignore(pos, expected))
{
if (!columns_p.parse(pos, columns, expected))
return false;
if (!s_rparen.ignore(pos, expected))
return false;
}
/// Check if file is a source of data.
if (s_from_infile.ignore(pos, expected))
{
/// Read file name to process it later
if (!infile_name_p.parse(pos, infile, expected))
return false;
/// Check for 'COMPRESSION' parameter (optional)
if (s_compression.ignore(pos, expected))
{
/// Read compression name. Create parser for this purpose.
ParserStringLiteral compression_p;
if (!compression_p.parse(pos, compression, expected))
return false;
}
}
/// Read SETTINGS if they are defined
if (s_settings.ignore(pos, expected))
{
/// Settings are written like SET query, so parse them with ParserSetQuery
ParserSetQuery parser_settings(true);
if (!parser_settings.parse(pos, settings_ast, expected))
return false;
}
String format_str;
Pos before_values = pos;
/// VALUES or FORMAT or SELECT or WITH or WATCH.
/// After FROM INFILE we expect FORMAT, SELECT, WITH or nothing.
if (!infile && s_values.ignore(pos, expected))
{
/// If VALUES is defined in query, everything except setting will be parsed as data,
/// and if values followed by semicolon, the data should be null.
if (!s_semicolon.checkWithoutMoving(pos, expected))
data = pos->begin;
format_str = "Values";
}
else if (s_format.ignore(pos, expected))
{
/// If FORMAT is defined, read format name
if (!name_p.parse(pos, format, expected))
return false;
tryGetIdentifierNameInto(format, format_str);
}
else if (s_select.ignore(pos, expected) || s_with.ignore(pos,expected))
{
/// If SELECT is defined, return to position before select and parse
/// rest of query as SELECT query.
pos = before_values;
ParserSelectWithUnionQuery select_p;
select_p.parse(pos, select, expected);
/// FORMAT section is expected if we have input() in SELECT part
if (s_format.ignore(pos, expected) && !name_p.parse(pos, format, expected))
return false;
tryGetIdentifierNameInto(format, format_str);
}
else if (!infile && s_watch.ignore(pos, expected))
{
/// If WATCH is defined, return to position before WATCH and parse
/// rest of query as WATCH query.
pos = before_values;
ParserWatchQuery watch_p;
watch_p.parse(pos, watch, expected);
}
else if (!infile)
{
/// If all previous conditions were false and it's not FROM INFILE, query is incorrect
return false;
}
/// Read SETTINGS after FORMAT.
///
/// Note, that part of SETTINGS can be interpreted as values,
/// hence it is done only under option.
///
/// Refs: https://github.com/ClickHouse/ClickHouse/issues/35100
if (allow_settings_after_format_in_insert && s_settings.ignore(pos, expected))
{
if (settings_ast)
throw Exception(ErrorCodes::SYNTAX_ERROR,
"You have SETTINGS before and after FORMAT, this is not allowed. "
"Consider switching to SETTINGS before FORMAT and disable allow_settings_after_format_in_insert.");
/// Settings are written like SET query, so parse them with ParserSetQuery
ParserSetQuery parser_settings(true);
if (!parser_settings.parse(pos, settings_ast, expected))
return false;
/// In case of INSERT INTO ... VALUES SETTINGS ... (...), (...), ...
/// we should move data pointer after all settings.
if (data != nullptr)
data = pos->begin;
}
if (select)
{
/// Copy SETTINGS from the INSERT ... SELECT ... SETTINGS
InsertQuerySettingsPushDownVisitor::Data visitor_data{settings_ast};
InsertQuerySettingsPushDownVisitor(visitor_data).visit(select);
}
/// In case of defined format, data follows it.
if (format && !infile)
{
Pos last_token = pos;
--last_token;
data = last_token->end;
/// If format name is followed by ';' (end of query symbol) there is no data to insert.
if (data < end && *data == ';')
throw Exception(ErrorCodes::SYNTAX_ERROR, "You have excessive ';' symbol before data for INSERT.\n"
"Example:\n\n"
"INSERT INTO t (x, y) FORMAT TabSeparated\n"
";\tHello\n"
"2\tWorld\n"
"\n"
"Note that there is no ';' just after format name, "
"you need to put at least one whitespace symbol before the data.");
while (data < end && (*data == ' ' || *data == '\t' || *data == '\f'))
++data;
/// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise.
if (data < end && *data == '\r')
++data;
if (data < end && *data == '\n')
++data;
}
/// Create query and fill its fields.
auto query = std::make_shared<ASTInsertQuery>();
node = query;
if (infile)
{
query->infile = infile;
query->compression = compression;
query->children.push_back(infile);
if (compression)
query->children.push_back(compression);
}
if (table_function)
{
query->table_function = table_function;
query->partition_by = partition_by_expr;
query->children.push_back(table_function);
if (partition_by_expr)
query->children.push_back(partition_by_expr);
}
else
{
query->database = database;
query->table = table;
if (database)
query->children.push_back(database);
if (table)
query->children.push_back(table);
}
query->columns = columns;
query->format = std::move(format_str);
query->select = select;
query->watch = watch;
query->settings_ast = settings_ast;
query->data = data != end ? data : nullptr;
query->end = end;
if (columns)
query->children.push_back(columns);
if (select)
query->children.push_back(select);
if (watch)
query->children.push_back(watch);
if (settings_ast)
query->children.push_back(settings_ast);
return true;
}
bool ParserInsertElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
return ParserColumnsMatcher().parse(pos, node, expected)
|| ParserQualifiedAsterisk().parse(pos, node, expected)
|| ParserAsterisk().parse(pos, node, expected)
|| ParserCompoundIdentifier().parse(pos, node, expected);
}
}
|