aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Interpreters/QueryNormalizer.cpp
blob: f47635a3c3f3d68d2204a43a193834bf419ac4ec (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
#include <Poco/String.h>
#include <Core/Names.h>
#include <Interpreters/QueryNormalizer.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Interpreters/Context.h>
#include <Interpreters/RequiredSourceColumnsVisitor.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTQueryParameter.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTInterpolateElement.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/quoteString.h>
#include <IO/WriteHelpers.h>

namespace DB
{

namespace ErrorCodes
{
    extern const int TOO_DEEP_AST;
    extern const int CYCLIC_ALIASES;
    extern const int UNKNOWN_QUERY_PARAMETER;
    extern const int BAD_ARGUMENTS;
}


class CheckASTDepth
{
public:
    explicit CheckASTDepth(QueryNormalizer::Data & data_)
        : data(data_)
    {
        if (data.level > data.settings.max_ast_depth)
            throw Exception(ErrorCodes::TOO_DEEP_AST, "Normalized AST is too deep. Maximum: {}", data.settings.max_ast_depth);
        ++data.level;
    }

    ~CheckASTDepth()
    {
        --data.level;
    }

private:
    QueryNormalizer::Data & data;
};


class RestoreAliasOnExitScope
{
public:
    explicit RestoreAliasOnExitScope(String & alias_)
        : alias(alias_)
        , copy(alias_)
    {}

    ~RestoreAliasOnExitScope()
    {
        alias = copy;
    }

private:
    String & alias;
    const String copy;
};


void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
{
    /// We do handle cycles via tracking current_asts
    /// but in case of bug in that tricky logic we need to prevent stack overflow
    checkStackSize();

    auto & current_asts = data.current_asts;
    String & current_alias = data.current_alias;

    if (!IdentifierSemantic::getColumnName(node))
        return;

    if (data.settings.prefer_column_name_to_alias)
    {
        if (data.source_columns_set.find(node.name()) != data.source_columns_set.end())
            return;
    }

    /// If it is an alias, but not a parent alias (for constructs like "SELECT column + 1 AS column").
    auto it_alias = data.aliases.find(node.name());
    if (!data.allow_self_aliases && current_alias == node.name())
        throw Exception(ErrorCodes::CYCLIC_ALIASES, "Self referencing of {} to {}. Cyclic alias",
                        backQuote(current_alias), backQuote(node.name()));

    if (it_alias != data.aliases.end() && current_alias != node.name())
    {
        if (!IdentifierSemantic::canBeAlias(node))
            return;

        /// We are alias for other column (node.name), but we are alias by
        /// ourselves to some other column
        const auto & alias_node = it_alias->second;

        String our_alias_or_name = alias_node->getAliasOrColumnName();
        std::optional<String> our_name = IdentifierSemantic::getColumnName(alias_node);

        String node_alias = ast->tryGetAlias();

        if (current_asts.contains(alias_node.get()) /// We have loop of multiple aliases
            || (node.name() == our_alias_or_name && our_name && node_alias == *our_name)) /// Our alias points to node.name, direct loop
            throw Exception(ErrorCodes::CYCLIC_ALIASES, "Cyclic aliases");

        /// Let's replace it with the corresponding tree node.
        if (!node_alias.empty() && node_alias != our_alias_or_name)
        {
            /// Avoid infinite recursion here
            auto opt_name = IdentifierSemantic::getColumnName(alias_node);
            bool is_cycle = opt_name && *opt_name == node.name();

            if (!is_cycle)
            {
                /// In a construct like "a AS b", where a is an alias, you must set alias b to the result of substituting alias a.
                /// Check size of the alias before cloning too large alias AST
                alias_node->checkSize(data.settings.max_expanded_ast_elements);
                ast = alias_node->clone();
                ast->setAlias(node_alias);

                /// If the cloned AST was finished, this one should also be considered finished
                if (data.finished_asts.contains(alias_node))
                    data.finished_asts[ast] = ast;

                /// If we had an alias for node_alias, point it instead to the new node so we don't have to revisit it
                /// on subsequent calls
                if (auto existing_alias = data.aliases.find(node_alias); existing_alias != data.aliases.end())
                    existing_alias->second = ast;
            }
        }
        else
        {
            /// Check size of the alias before cloning too large alias AST
            alias_node->checkSize(data.settings.max_expanded_ast_elements);
            auto alias_name = ast->getAliasOrColumnName();
            ast = alias_node->clone();
            ast->setAlias(alias_name);

            /// If the cloned AST was finished, this one should also be considered finished
            if (data.finished_asts.contains(alias_node))
                data.finished_asts[ast] = ast;

            /// If we had an alias for node_alias, point it instead to the new node so we don't have to revisit it
            /// on subsequent calls
            if (auto existing_alias = data.aliases.find(node_alias); existing_alias != data.aliases.end())
                existing_alias->second = ast;
        }
    }
}


void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &, Data & data)
{
    /// normalize JOIN ON section
    if (node.table_join)
    {
        auto & join = node.table_join->as<ASTTableJoin &>();
        if (join.on_expression)
            visit(join.on_expression, data);
    }
}

static bool needVisitChild(const ASTPtr & child)
{
    /// exclude interpolate elements - they are not subject for normalization and will be processed in filling transform
    return !(child->as<ASTSelectQuery>() || child->as<ASTTableExpression>() || child->as<ASTInterpolateElement>());
}

/// special visitChildren() for ASTSelectQuery
void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr &, Data & data)
{
    for (auto & child : select.children)
    {
        if (needVisitChild(child))
            visit(child, data);
    }

    /// If the WHERE clause or HAVING consists of a single alias, the reference must be replaced not only in children,
    /// but also in where_expression and having_expression.
    if (select.prewhere())
        visit(select.refPrewhere(), data);
    if (select.where())
        visit(select.refWhere(), data);
    if (select.having())
        visit(select.refHaving(), data);
}

/// Don't go into subqueries.
/// Don't go into select query. It processes children itself.
/// Do not go to the left argument of lambda expressions, so as not to replace the formal parameters
///  on aliases in expressions of the form 123 AS x, arrayMap(x -> 1, [2]).
void QueryNormalizer::visitChildren(IAST * node, Data & data)
{
    if (auto * func_node = node->as<ASTFunction>())
    {
        if (func_node->tryGetQueryArgument())
        {
            if (func_node->name != "view")
                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Query argument can only be used in the `view` TableFunction");
            /// Don't go into query argument.
            return;
        }

        /// For lambda functions we need to avoid replacing lambda parameters with external aliases, for example,
        /// Select 1 as x, arrayMap(x -> x + 2, [1, 2, 3])
        /// shouldn't be replaced with Select 1 as x, arrayMap(x -> **(1 as x)** + 2, [1, 2, 3])
        Aliases extracted_aliases;
        if (func_node->name == "lambda")
        {
            Names lambda_aliases = RequiredSourceColumnsMatcher::extractNamesFromLambda(*func_node);
            for (const auto & name : lambda_aliases)
            {
                auto it = data.aliases.find(name);
                if (it != data.aliases.end())
                {
                    extracted_aliases.insert(data.aliases.extract(it));
                }
            }
        }

        /// We skip the first argument. We also assume that the lambda function can not have parameters.
        size_t first_pos = 0;
        if (func_node->name == "lambda")
            first_pos = 1;

        if (func_node->arguments)
        {
            auto & func_children = func_node->arguments->children;

            for (size_t i = first_pos; i < func_children.size(); ++i)
            {
                auto & child = func_children[i];

                if (needVisitChild(child))
                    visit(child, data);
            }
        }

        if (func_node->window_definition)
        {
            visitChildren(func_node->window_definition.get(), data);
        }

        for (auto & it : extracted_aliases)
        {
            data.aliases.insert(it);
        }
    }
    else if (!node->as<ASTSelectQuery>())
    {
        for (auto & child : node->children)
            if (needVisitChild(child))
                visit(child, data);
    }
}

void QueryNormalizer::visit(ASTPtr & ast, Data & data)
{
    CheckASTDepth scope1(data);
    RestoreAliasOnExitScope scope2(data.current_alias);

    auto & finished_asts = data.finished_asts;
    auto & current_asts = data.current_asts;

    if (finished_asts.contains(ast))
    {
        ast = finished_asts[ast];
        return;
    }

    ASTPtr initial_ast = ast;
    current_asts.insert(initial_ast.get());

    {
        String my_alias = ast->tryGetAlias();
        if (!my_alias.empty())
            data.current_alias = my_alias;
    }

    if (auto * node_id = ast->as<ASTIdentifier>())
        visit(*node_id, ast, data);
    else if (auto * node_tables = ast->as<ASTTablesInSelectQueryElement>())
        visit(*node_tables, ast, data);
    else if (auto * node_select = ast->as<ASTSelectQuery>())
        visit(*node_select, ast, data);
    else if (auto * node_param = ast->as<ASTQueryParameter>())
    {
        if (!data.is_create_parameterized_view)
            throw Exception(ErrorCodes::UNKNOWN_QUERY_PARAMETER, "Query parameter {} was not set", backQuote(node_param->name));
    }
    else if (auto * node_function = ast->as<ASTFunction>())
        if (node_function->parameters)
            visit(node_function->parameters, data);

    /// If we replace the root of the subtree, we will be called again for the new root, in case the alias is replaced by an alias.
    if (ast.get() != initial_ast.get())
        visit(ast, data);
    else
        visitChildren(ast.get(), data);

    current_asts.erase(initial_ast.get());
    current_asts.erase(ast.get());
    if (data.ignore_alias && !ast->tryGetAlias().empty())
        ast->setAlias("");
    finished_asts[initial_ast] = ast;

    /// @note can not place it in CheckASTDepth dtor cause of exception.
    if (data.level == 1)
    {
        try
        {
            ast->checkSize(data.settings.max_expanded_ast_elements);
        }
        catch (Exception & e)
        {
            e.addMessage("(after expansion of aliases)");
            throw;
        }
    }
}

}