aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Interpreters/ExpressionAnalyzer.h
blob: 941194e69ff05753baa6df3cb5a607ce8cf274b4 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
#pragma once

#include <Core/ColumnNumbers.h>
#include <Columns/FilterDescription.h>
#include <Interpreters/ActionsVisitor.h>
#include <Interpreters/AggregateDescription.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/WindowDescription.h>
#include <Interpreters/JoinUtils.h>
#include <Parsers/IAST_fwd.h>
#include <Storages/IStorage_fwd.h>
#include <Storages/SelectQueryInfo.h>

namespace DB
{

class Block;
struct Settings;

struct ExpressionActionsChain;
class ExpressionActions;
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
using ManyExpressionActions = std::vector<ExpressionActionsPtr>;

struct ASTTableJoin;
class IJoin;
using JoinPtr = std::shared_ptr<IJoin>;

class ASTFunction;
class ASTExpressionList;
class ASTSelectQuery;
struct ASTTablesInSelectQueryElement;

struct StorageInMemoryMetadata;
using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;

class ArrayJoinAction;
using ArrayJoinActionPtr = std::shared_ptr<ArrayJoinAction>;

class ActionsDAG;
using ActionsDAGPtr = std::shared_ptr<ActionsDAG>;

/// Create columns in block or return false if not possible
bool sanitizeBlock(Block & block, bool throw_if_cannot_create_column = false);

/// ExpressionAnalyzer sources, intermediates and results. It splits data and logic, allows to test them separately.
struct ExpressionAnalyzerData
{
    ~ExpressionAnalyzerData();

    PreparedSetsPtr prepared_sets;

    std::unique_ptr<QueryPlan> joined_plan;

    /// Columns after ARRAY JOIN. If there is no ARRAY JOIN, it's source_columns.
    NamesAndTypesList columns_after_array_join;
    /// Columns after Columns after ARRAY JOIN and JOIN. If there is no JOIN, it's columns_after_array_join.
    NamesAndTypesList columns_after_join;
    /// Columns after ARRAY JOIN, JOIN, and/or aggregation.
    NamesAndTypesList aggregated_columns;
    /// Columns after window functions.
    NamesAndTypesList columns_after_window;
    /// Keys of ORDER BY
    NameSet order_by_keys;

    bool has_aggregation = false;
    NamesAndTypesList aggregation_keys;
    NamesAndTypesLists aggregation_keys_list;
    ColumnNumbersList aggregation_keys_indexes_list;
    bool has_const_aggregation_keys = false;
    AggregateDescriptions aggregate_descriptions;

    WindowDescriptions window_descriptions;
    NamesAndTypesList window_columns;

    bool has_global_subqueries = false;

    /// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries.
    TemporaryTablesMapping external_tables;

    GroupByKind group_by_kind = GroupByKind::NONE;
};


/** Transforms an expression from a syntax tree into a sequence of actions to execute it.
  *
  * NOTE: if `ast` is a SELECT query from a table, the structure of this table should not change during the lifetime of ExpressionAnalyzer.
  */
class ExpressionAnalyzer : protected ExpressionAnalyzerData, private boost::noncopyable, protected WithContext
{
private:
    /// Extracts settings to enlight which are used (and avoid copy of others).
    struct ExtractedSettings
    {
        const bool use_index_for_in_with_subqueries;
        const SizeLimits size_limits_for_set;
        const SizeLimits size_limits_for_set_used_with_index;
        const UInt64 distributed_group_by_no_merge;

        explicit ExtractedSettings(const Settings & settings_);
    };

public:
    /// Ctor for non-select queries. Generally its usage is:
    /// auto actions = ExpressionAnalyzer(query, syntax, context).getActions();
    ExpressionAnalyzer(const ASTPtr & query_, const TreeRewriterResultPtr & syntax_analyzer_result_, ContextPtr context_)
        : ExpressionAnalyzer(query_, syntax_analyzer_result_, context_, 0, false, false, {})
    {
    }

    ~ExpressionAnalyzer();

    void appendExpression(ExpressionActionsChain & chain, const ASTPtr & expr, bool only_types);

    /// If `ast` is not a SELECT query, just gets all the actions to evaluate the expression.
    /// If add_aliases, only the calculated values in the desired order and add aliases.
    ///     If also project_result, than only aliases remain in the output block.
    /// Otherwise, only temporary columns will be deleted from the block.
    ActionsDAGPtr getActionsDAG(bool add_aliases, bool project_result = true);
    ExpressionActionsPtr getActions(bool add_aliases, bool project_result = true, CompileExpressions compile_expressions = CompileExpressions::no);

    /// Get actions to evaluate a constant expression. The function adds constants and applies functions that depend only on constants.
    /// Does not execute subqueries.
    ActionsDAGPtr getConstActionsDAG(const ColumnsWithTypeAndName & constant_inputs = {});
    ExpressionActionsPtr getConstActions(const ColumnsWithTypeAndName & constant_inputs = {});

    /** Sets that require a subquery to be create.
      * Only the sets needed to perform actions returned from already executed `append*` or `getActions`.
      * That is, you need to call getSetsWithSubqueries after all calls of `append*` or `getActions`
      *  and create all the returned sets before performing the actions.
      */
    PreparedSetsPtr getPreparedSets() { return prepared_sets; }

    /// Get intermediates for tests
    const ExpressionAnalyzerData & getAnalyzedData() const { return *this; }

    /// A list of windows for window functions.
    const WindowDescriptions & windowDescriptions() const { return window_descriptions; }

    void makeWindowDescriptionFromAST(const Context & context, const WindowDescriptions & existing_descriptions, WindowDescription & desc, const IAST * ast);
    void makeWindowDescriptions(ActionsDAGPtr actions);

    /** Checks if subquery is not a plain StorageSet.
      * Because while making set we will read data from StorageSet which is not allowed.
      * Returns valid SetPtr from StorageSet if the latter is used after IN or nullptr otherwise.
      */
    SetPtr isPlainStorageSetInSubquery(const ASTPtr & subquery_or_table_name);

protected:
    ExpressionAnalyzer(
        const ASTPtr & query_,
        const TreeRewriterResultPtr & syntax_analyzer_result_,
        ContextPtr context_,
        size_t subquery_depth_,
        bool do_global_,
        bool is_explain_,
        PreparedSetsPtr prepared_sets_,
        bool is_create_parameterized_view_ = false);

    ASTPtr query;
    const ExtractedSettings settings;
    size_t subquery_depth;

    TreeRewriterResultPtr syntax;
    bool is_create_parameterized_view;

    const ConstStoragePtr & storage() const { return syntax->storage; } /// The main table in FROM clause, if exists.
    const TableJoin & analyzedJoin() const { return *syntax->analyzed_join; }
    const NamesAndTypesList & sourceColumns() const { return syntax->required_source_columns; }
    const ASTs & aggregates() const { return syntax->aggregates; }
    /// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables.
    void initGlobalSubqueriesAndExternalTables(bool do_global, bool is_explain);

    ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool is_left) const;

    void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false);

    /** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in
      * analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the
      * prepared sets would not be applicable for MergeTree index optimization.
      */
    void getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGPtr & actions, bool only_consts = false);

    void getRootActionsForHaving(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false);

    void getRootActionsForWindowFunctions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions);

    /** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions,
      * Create a set of columns aggregated_columns resulting after the aggregation, if any,
      *  or after all the actions that are normally performed before aggregation.
      * Set has_aggregation = true if there is GROUP BY or at least one aggregate function.
      */
    void analyzeAggregation(ActionsDAGPtr & temp_actions);
    void makeAggregateDescriptions(ActionsDAGPtr & actions, AggregateDescriptions & descriptions);

    const ASTSelectQuery * getSelectQuery() const;

    bool isRemoteStorage() const;

    NamesAndTypesList getColumnsAfterArrayJoin(ActionsDAGPtr & actions, const NamesAndTypesList & src_columns);
    NamesAndTypesList analyzeJoin(ActionsDAGPtr & actions, const NamesAndTypesList & src_columns);

    AggregationKeysInfo getAggregationKeysInfo() const noexcept
    {
      return { aggregation_keys, aggregation_keys_indexes_list, group_by_kind };
    }
};

class SelectQueryExpressionAnalyzer;

/// Result of SelectQueryExpressionAnalyzer: expressions for InterpreterSelectQuery
struct ExpressionAnalysisResult
{
    std::string dump() const;

    /// Do I need to perform the first part of the pipeline - running on remote servers during distributed processing.
    bool first_stage = false;
    /// Do I need to execute the second part of the pipeline - running on the initiating server during distributed processing.
    bool second_stage = false;

    bool need_aggregate = false;
    bool has_order_by   = false;
    bool has_window = false;

    String where_column_name;
    bool remove_where_filter = false;
    bool optimize_read_in_order = false;
    bool optimize_aggregation_in_order = false;
    bool join_has_delayed_stream = false;

    bool use_grouping_set_key = false;

    ActionsDAGPtr before_array_join;
    ArrayJoinActionPtr array_join;
    ActionsDAGPtr before_join;
    ActionsDAGPtr converting_join_columns;
    JoinPtr join;
    ActionsDAGPtr before_where;
    ActionsDAGPtr before_aggregation;
    ActionsDAGPtr before_having;
    String having_column_name;
    bool remove_having_filter = false;
    ActionsDAGPtr before_window;
    ActionsDAGPtr before_order_by;
    ActionsDAGPtr before_limit_by;
    ActionsDAGPtr final_projection;

    /// Columns from the SELECT list, before renaming them to aliases. Used to
    /// perform SELECT DISTINCT.
    Names selected_columns;

    /// Columns to read from storage if any.
    Names required_columns;

    /// Columns will be removed after prewhere actions execution.
    NameSet columns_to_remove_after_prewhere;

    PrewhereInfoPtr prewhere_info;
    FilterDAGInfoPtr filter_info;
    ConstantFilterDescription prewhere_constant_filter_description;
    ConstantFilterDescription where_constant_filter_description;
    /// Actions by every element of ORDER BY
    ManyExpressionActions order_by_elements_actions;
    ManyExpressionActions group_by_elements_actions;

    ExpressionAnalysisResult() = default;

    ExpressionAnalysisResult(
        SelectQueryExpressionAnalyzer & query_analyzer,
        const StorageMetadataPtr & metadata_snapshot,
        bool first_stage,
        bool second_stage,
        bool only_types,
        const FilterDAGInfoPtr & filter_info,
        const FilterDAGInfoPtr & additional_filter, /// for setting additional_filters
        const Block & source_header);

    /// Filter for row-level security.
    bool hasFilter() const { return filter_info.get(); }

    bool hasJoin() const { return join.get(); }
    bool hasPrewhere() const { return prewhere_info.get(); }
    bool hasWhere() const { return before_where.get(); }
    bool hasHaving() const { return before_having.get(); }
    bool hasLimitBy() const { return before_limit_by.get(); }

    void removeExtraColumns() const;
    void checkActions() const;
    void finalize(
        const ExpressionActionsChain & chain,
        ssize_t & prewhere_step_num,
        ssize_t & where_step_num,
        ssize_t & having_step_num,
        const ASTSelectQuery & query);
};

/// SelectQuery specific ExpressionAnalyzer part.
class SelectQueryExpressionAnalyzer : public ExpressionAnalyzer
{
public:
    friend struct ExpressionAnalysisResult;

    SelectQueryExpressionAnalyzer(
        const ASTPtr & query_,
        const TreeRewriterResultPtr & syntax_analyzer_result_,
        ContextPtr context_,
        const StorageMetadataPtr & metadata_snapshot_,
        const Names & required_result_columns_ = {},
        bool do_global_ = false,
        const SelectQueryOptions & options_ = {},
        PreparedSetsPtr prepared_sets_ = nullptr)
        : ExpressionAnalyzer(
            query_,
            syntax_analyzer_result_,
            context_,
            options_.subquery_depth,
            do_global_,
            options_.is_explain,
            prepared_sets_,
            options_.is_create_parameterized_view)
        , metadata_snapshot(metadata_snapshot_)
        , required_result_columns(required_result_columns_)
        , query_options(options_)
    {
    }

    /// Does the expression have aggregate functions or a GROUP BY or HAVING section.
    bool hasAggregation() const { return has_aggregation; }
    bool hasWindow() const { return !syntax->window_function_asts.empty(); }
    bool hasGlobalSubqueries() { return has_global_subqueries; }
    bool hasTableJoin() const { return syntax->ast_join; }

    /// When there is only one group in GROUPING SETS
    /// it is a special case that is equal to GROUP BY, i.e.:
    ///
    ///     GROUPING SETS ((a, b)) -> GROUP BY a, b
    ///
    /// But it is rewritten by GroupingSetsRewriterVisitor to GROUP BY,
    /// so instead of aggregation_keys_list.size() > 1,
    /// !aggregation_keys_list.empty() can be used.
    bool useGroupingSetKey() const { return !aggregation_keys_list.empty(); }

    const NamesAndTypesList & aggregationKeys() const { return aggregation_keys; }
    bool hasConstAggregationKeys() const { return has_const_aggregation_keys; }
    const NamesAndTypesLists & aggregationKeysList() const { return aggregation_keys_list; }
    const AggregateDescriptions & aggregates() const { return aggregate_descriptions; }

    std::unique_ptr<QueryPlan> getJoinedPlan();

    /// Tables that will need to be sent to remote servers for distributed query processing.
    const TemporaryTablesMapping & getExternalTables() const { return external_tables; }

    ActionsDAGPtr simpleSelectActions();

    /// These appends are public only for tests
    void appendSelect(ExpressionActionsChain & chain, bool only_types);
    /// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases.
    ActionsDAGPtr appendProjectResult(ExpressionActionsChain & chain) const;

private:
    StorageMetadataPtr metadata_snapshot;
    /// If non-empty, ignore all expressions not from this list.
    Names required_result_columns;
    SelectQueryOptions query_options;

    JoinPtr makeJoin(
        const ASTTablesInSelectQueryElement & join_element,
        const ColumnsWithTypeAndName & left_columns,
        ActionsDAGPtr & left_convert_actions);

    const ASTSelectQuery * getAggregatingQuery() const;

    /** These methods allow you to build a chain of transformations over a block, that receives values in the desired sections of the query.
      *
      * Example usage:
      *   ExpressionActionsChain chain;
      *   analyzer.appendWhere(chain);
      *   chain.addStep();
      *   analyzer.appendSelect(chain);
      *   analyzer.appendOrderBy(chain);
      *   chain.finalize();
      *
      * If only_types = true set, does not execute subqueries in the relevant parts of the query. The actions got this way
      *  shouldn't be executed, they are only needed to get a list of columns with their types.
      */

    /// Before aggregation:
    ArrayJoinActionPtr appendArrayJoin(ExpressionActionsChain & chain, ActionsDAGPtr & before_array_join, bool only_types);
    bool appendJoinLeftKeys(ExpressionActionsChain & chain, bool only_types);
    JoinPtr appendJoin(ExpressionActionsChain & chain, ActionsDAGPtr & converting_join_columns);

    /// remove_filter is set in ExpressionActionsChain::finalize();
    /// Columns in `additional_required_columns` will not be removed (they can be used for e.g. sampling or FINAL modifier).
    ActionsDAGPtr appendPrewhere(ExpressionActionsChain & chain, bool only_types);
    bool appendWhere(ExpressionActionsChain & chain, bool only_types);
    bool appendGroupBy(ExpressionActionsChain & chain, bool only_types, bool optimize_aggregation_in_order, ManyExpressionActions &);
    void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types);
    void appendWindowFunctionsArguments(ExpressionActionsChain & chain, bool only_types);

    void appendExpressionsAfterWindowFunctions(ExpressionActionsChain & chain, bool only_types);
    void appendSelectSkipWindowExpressions(ExpressionActionsChain::Step & step, ASTPtr const & node);

    void appendGroupByModifiers(ActionsDAGPtr & before_aggregation, ExpressionActionsChain & chain, bool only_types);

    /// After aggregation:
    bool appendHaving(ExpressionActionsChain & chain, bool only_types);
    ///  appendSelect
    ActionsDAGPtr appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order, ManyExpressionActions &);
    bool appendLimitBy(ExpressionActionsChain & chain, bool only_types);
    ///  appendProjectResult
};

}