aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Interpreters/TreeRewriter.h
blob: 89cc13da294b8201c559dff7634ba410fb017b00 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#pragma once

#include <Core/Block.h>
#include <Core/NamesAndTypes.h>
#include <Interpreters/Aliases.h>
#include <Interpreters/Context_fwd.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Interpreters/SelectQueryOptions.h>
#include <Storages/IStorage_fwd.h>

namespace DB
{

class ASTFunction;
struct ASTTablesInSelectQueryElement;
class TableJoin;
struct Settings;
struct SelectQueryOptions;
using Scalars = std::map<String, Block>;
struct StorageInMemoryMetadata;
using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
struct StorageSnapshot;
using StorageSnapshotPtr = std::shared_ptr<StorageSnapshot>;

struct TreeRewriterResult
{
    ConstStoragePtr storage;
    StorageSnapshotPtr storage_snapshot;
    std::shared_ptr<TableJoin> analyzed_join;
    const ASTTablesInSelectQueryElement * ast_join = nullptr;

    NamesAndTypesList source_columns;
    NameSet source_columns_set; /// Set of names of source_columns.
    /// Set of columns that are enough to read from the table to evaluate the expression. It does not include joined columns.
    NamesAndTypesList required_source_columns;
    /// Same as above but also record alias columns which are expanded. This is for RBAC access check.
    Names required_source_columns_before_expanding_alias_columns;

    /// Set of alias columns that are expanded to their alias expressions. We still need the original columns to check access permission.
    NameSet expanded_aliases;

    Aliases aliases;

    ASTs aggregates;
    ASTs window_function_asts;
    ASTs expressions_with_window_function;

    /// Which column is needed to be ARRAY-JOIN'ed to get the specified.
    /// For example, for `SELECT s.v ... ARRAY JOIN a AS s` will get "s.v" -> "a.v".
    NameToNameMap array_join_result_to_source;

    /// For the ARRAY JOIN section, mapping from the alias to the full column name.
    /// For example, for `ARRAY JOIN [1,2] AS b` "b" -> "array(1,2)" will enter here.
    /// Note: not used further.
    NameToNameMap array_join_alias_to_name;

    /// The backward mapping for array_join_alias_to_name.
    /// Note: not used further.
    NameToNameMap array_join_name_to_alias;

    /// Predicate optimizer overrides the sub queries
    bool rewrite_subqueries = false;

    /// Whether the query contains explicit columns like "SELECT column1 + column2 FROM table1".
    /// Queries like "SELECT count() FROM table1", "SELECT 1" don't contain explicit columns.
    bool has_explicit_columns = false;

    /// Whether it's possible to use the trivial count optimization,
    /// i.e. use a fast call of IStorage::totalRows() (or IStorage::totalRowsByPartitionPredicate())
    /// instead of actual retrieving columns and counting rows.
    bool optimize_trivial_count = false;

    /// Cache isRemote() call for storage, because it may be too heavy.
    bool is_remote_storage = false;

    /// Rewrite _shard_num to shardNum()
    bool has_virtual_shard_num = false;

    /// Results of scalar sub queries
    Scalars scalars;
    Scalars local_scalars;

    explicit TreeRewriterResult(
        const NamesAndTypesList & source_columns_,
        ConstStoragePtr storage_ = {},
        const StorageSnapshotPtr & storage_snapshot_ = {},
        bool add_special = true);

    void collectSourceColumns(bool add_special);
    void collectUsedColumns(const ASTPtr & query, bool is_select, bool visit_index_hint);
    Names requiredSourceColumns() const { return required_source_columns.getNames(); }
    const Names & requiredSourceColumnsForAccessCheck() const { return required_source_columns_before_expanding_alias_columns; }
    NameSet getArrayJoinSourceNameSet() const;
    const Scalars & getScalars() const { return scalars; }
};

using TreeRewriterResultPtr = std::shared_ptr<const TreeRewriterResult>;

/// Tree Rewriter in terms of CMU slides @sa https://15721.courses.cs.cmu.edu/spring2020/slides/19-optimizer1.pdf
///
/// Optimizes AST tree and collect information for further expression analysis in ExpressionAnalyzer.
/// Result AST has the following invariants:
///  * all aliases are substituted
///  * qualified names are translated
///  * scalar subqueries are executed replaced with constants
///  * unneeded columns are removed from SELECT clause
///  * duplicated columns are removed from ORDER BY, LIMIT BY, USING(...).
class TreeRewriter : WithContext
{
public:
    explicit TreeRewriter(ContextPtr context_) : WithContext(context_) {}

    /// Analyze and rewrite not select query
    TreeRewriterResultPtr analyze(
        ASTPtr & query,
        const NamesAndTypesList & source_columns_,
        ConstStoragePtr storage = {},
        const StorageSnapshotPtr & storage_snapshot = {},
        bool allow_aggregations = false,
        bool allow_self_aliases = true,
        bool execute_scalar_subqueries = true,
        bool is_create_parameterized_view = false) const;

    /// Analyze and rewrite select query
    TreeRewriterResultPtr analyzeSelect(
        ASTPtr & query,
        TreeRewriterResult && result,
        const SelectQueryOptions & select_options = {},
        const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns = {},
        const Names & required_result_columns = {},
        std::shared_ptr<TableJoin> table_join = {}) const;

private:
    static void normalize(ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_, bool is_create_parameterized_view = false);
};

}