1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
|
#include <Common/typeid_cast.h>
#include <Interpreters/QueryAliasesVisitor.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/formatAST.h>
#include <Parsers/ASTSubquery.h>
#include <Common/quoteString.h>
namespace DB
{
namespace ErrorCodes
{
extern const int MULTIPLE_EXPRESSIONS_FOR_ALIAS;
}
namespace
{
constexpr auto dummy_subquery_name_prefix = "_subquery";
String wrongAliasMessage(const ASTPtr & ast, const ASTPtr & prev_ast, const String & alias)
{
WriteBufferFromOwnString message;
message << "Different expressions with the same alias " << backQuoteIfNeed(alias) << ":\n";
formatAST(*ast, message, false, true);
message << "\nand\n";
formatAST(*prev_ast, message, false, true);
message << '\n';
return message.str();
}
}
bool QueryAliasesWithSubqueries::needChildVisit(const ASTPtr & node, const ASTPtr &)
{
/// Don't descent into table functions and subqueries and special case for ArrayJoin.
return !(node->as<ASTTableExpression>() || node->as<ASTSelectWithUnionQuery>() || node->as<ASTArrayJoin>());
}
bool QueryAliasesNoSubqueries::needChildVisit(const ASTPtr & node, const ASTPtr & child)
{
if (node->as<ASTSubquery>())
return false;
return QueryAliasesWithSubqueries::needChildVisit(node, child);
}
template <typename T>
void QueryAliasesMatcher<T>::visit(const ASTPtr & ast, Data & data)
{
if (auto * s = ast->as<ASTSubquery>())
visit(*s, ast, data);
else if (auto * q = ast->as<ASTSelectQuery>())
visit(*q, ast, data);
else if (auto * aj = ast->as<ASTArrayJoin>())
visit(*aj, ast, data);
else
visitOther(ast, data);
}
template <typename T>
void QueryAliasesMatcher<T>::visit(const ASTSelectQuery & select, const ASTPtr &, Data &)
{
ASTPtr with = select.with();
if (!with)
return;
for (auto & child : with->children)
if (auto * ast_with_alias = dynamic_cast<ASTWithAlias *>(child.get()))
ast_with_alias->prefer_alias_to_column_name = true;
}
/// The top-level aliases in the ARRAY JOIN section have a special meaning, we will not add them
/// (skip the expression list itself and its children).
template <typename T>
void QueryAliasesMatcher<T>::visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data)
{
visitOther(ast, data);
std::vector<ASTPtr> grand_children;
for (auto & child1 : ast->children)
for (auto & child2 : child1->children)
for (auto & child3 : child2->children)
grand_children.push_back(child3);
/// create own visitor to run bottom to top
for (auto & child : grand_children)
Visitor(data).visit(child);
}
/// set unique aliases for all subqueries. this is needed, because:
/// 1) content of subqueries could change after recursive analysis, and auto-generated column names could become incorrect
/// 2) result of different scalar subqueries can be cached inside expressions compilation cache and must have different names
template <typename T>
void QueryAliasesMatcher<T>::visit(const ASTSubquery & const_subquery, const ASTPtr & ast, Data & data)
{
auto & aliases = data;
ASTSubquery & subquery = const_cast<ASTSubquery &>(const_subquery);
static std::atomic_uint64_t subquery_index = 0;
if (subquery.alias.empty())
{
String alias;
do
{
alias = dummy_subquery_name_prefix + std::to_string(++subquery_index);
}
while (aliases.contains(alias));
subquery.setAlias(alias);
aliases[alias] = ast;
}
else
visitOther(ast, aliases);
subquery.prefer_alias_to_column_name = true;
}
template <typename T>
void QueryAliasesMatcher<T>::visitOther(const ASTPtr & ast, Data & data)
{
auto & aliases = data;
String alias = ast->tryGetAlias();
if (!alias.empty())
{
if (aliases.contains(alias) && ast->getTreeHash() != aliases[alias]->getTreeHash())
throw Exception::createDeprecated(wrongAliasMessage(ast, aliases[alias], alias), ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS);
aliases[alias] = ast;
}
/** QueryAliasesVisitor is executed before ExecuteScalarSubqueriesVisitor.
For example we have subquery in our query (SELECT sum(number) FROM numbers(10)).
After running QueryAliasesVisitor it will be (SELECT sum(number) FROM numbers(10)) as _subquery_1
and prefer_alias_to_column_name for this subquery will be true.
After running ExecuteScalarSubqueriesVisitor it will be converted to (45 as _subquery_1)
and prefer_alias_to_column_name for ast literal will be true.
But if we send such query on remote host with Distributed engine for example we cannot send prefer_alias_to_column_name
information for our ast node with query string. And this alias will be dropped because prefer_alias_to_column_name for ASTWIthAlias
by default is false.
It is imporant that subquery can be converted to literal during ExecuteScalarSubqueriesVisitor.
And code below check if we previously set for subquery alias as _subquery, and if it is true
then set prefer_alias_to_column_name = true for node that was optimized during ExecuteScalarSubqueriesVisitor.
*/
if (auto * ast_with_alias = dynamic_cast<ASTWithAlias *>(ast.get()))
{
if (startsWith(alias, dummy_subquery_name_prefix))
ast_with_alias->prefer_alias_to_column_name = true;
}
}
/// Explicit template instantiations
template class QueryAliasesMatcher<QueryAliasesWithSubqueries>;
template class QueryAliasesMatcher<QueryAliasesNoSubqueries>;
}
|