aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Planner/PlannerJoins.h
blob: c61bce932e0facbb16d0bb4a5720533a57fbe89a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
#pragma once

#include <Core/Joins.h>
#include <Core/ColumnsWithTypeAndName.h>

#include <Interpreters/ActionsDAG.h>
#include <Interpreters/TableJoin.h>
#include <Interpreters/IJoin.h>

#include <Analyzer/IQueryTreeNode.h>

namespace DB
{

/** Join clause represent single JOIN ON section clause.
  * Join clause consists of JOIN keys and conditions.
  *
  * JOIN can contain multiple clauses in JOIN ON section.
  * Example: SELECT * FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 ON t1.id = t2.id OR t1.value = t2.value;
  * t1.id = t2.id is first clause.
  * t1.value = t2.value is second clause.
  *
  * JOIN ON section can also contain condition inside clause.
  * Example: SELECT * FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 ON t1.id = t2.id AND t1.id > 0 AND t2.id > 0;
  * t1.id = t2.id AND t1.id > 0 AND t2.id > 0 is first clause.
  * t1.id = t2.id is JOIN keys section.
  * t1.id > 0 is left table condition.
  * t2.id > 0 is right table condition.
  *
  * Additionally not only conditions, but JOIN keys can be represented as expressions.
  * Example: SELECT * FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 ON toString(t1.id) = toString(t2.id).
  * toString(t1.id) = toString(t2.id) is JOIN keys section. Where toString(t1.id) is left key, and toString(t2.id) is right key.
  *
  * During query planning JOIN ON section represented using join clause structure. It is important to split
  * keys and conditions. And for each action detect from which stream it can be performed.
  *
  * We have 2 streams, left stream and right stream.
  * We split JOIN ON section expressions actions in two parts left join expression actions and right join expression actions.
  * Left join expression actions must be used to calculate necessary actions for left stream.
  * Right join expression actions must be used to calculate necessary actions for right stream.
  */
class PlannerContext;
using PlannerContextPtr = std::shared_ptr<PlannerContext>;

struct ASOFCondition
{
    size_t key_index;
    ASOFJoinInequality asof_inequality;
};

/// Single JOIN ON section clause representation
class JoinClause
{
public:
    /// Add keys
    void addKey(const ActionsDAG::Node * left_key_node, const ActionsDAG::Node * right_key_node)
    {
        left_key_nodes.emplace_back(left_key_node);
        right_key_nodes.emplace_back(right_key_node);
    }

    void addASOFKey(const ActionsDAG::Node * left_key_node, const ActionsDAG::Node * right_key_node, ASOFJoinInequality asof_inequality)
    {
        left_key_nodes.emplace_back(left_key_node);
        right_key_nodes.emplace_back(right_key_node);
        asof_conditions.push_back(ASOFCondition{left_key_nodes.size() - 1, asof_inequality});
    }

    /// Add condition for table side
    void addCondition(JoinTableSide table_side, const ActionsDAG::Node * condition_node)
    {
        auto & filter_condition_nodes = table_side == JoinTableSide::Left ? left_filter_condition_nodes : right_filter_condition_nodes;
        filter_condition_nodes.push_back(condition_node);
    }

    /// Get left key nodes
    const ActionsDAG::NodeRawConstPtrs & getLeftKeyNodes() const
    {
        return left_key_nodes;
    }

    /// Get left key nodes
    ActionsDAG::NodeRawConstPtrs & getLeftKeyNodes()
    {
        return left_key_nodes;
    }

    /// Get right key nodes
    const ActionsDAG::NodeRawConstPtrs & getRightKeyNodes() const
    {
        return right_key_nodes;
    }

    /// Get right key nodes
    ActionsDAG::NodeRawConstPtrs & getRightKeyNodes()
    {
        return right_key_nodes;
    }

    /// Returns true if JOIN clause has ASOF conditions, false otherwise
    bool hasASOF() const
    {
        return !asof_conditions.empty();
    }

    /// Get ASOF conditions
    const std::vector<ASOFCondition> & getASOFConditions() const
    {
        return asof_conditions;
    }

    /// Get left filter condition nodes
    const ActionsDAG::NodeRawConstPtrs & getLeftFilterConditionNodes() const
    {
        return left_filter_condition_nodes;
    }

    /// Get left filter condition nodes
    ActionsDAG::NodeRawConstPtrs & getLeftFilterConditionNodes()
    {
        return left_filter_condition_nodes;
    }

    /// Get right filter condition nodes
    const ActionsDAG::NodeRawConstPtrs & getRightFilterConditionNodes() const
    {
        return right_filter_condition_nodes;
    }

    /// Get right filter condition nodes
    ActionsDAG::NodeRawConstPtrs & getRightFilterConditionNodes()
    {
        return right_filter_condition_nodes;
    }

    /// Dump clause into buffer
    void dump(WriteBuffer & buffer) const;

    /// Dump clause
    String dump() const;

private:
    ActionsDAG::NodeRawConstPtrs left_key_nodes;
    ActionsDAG::NodeRawConstPtrs right_key_nodes;

    std::vector<ASOFCondition> asof_conditions;

    ActionsDAG::NodeRawConstPtrs left_filter_condition_nodes;
    ActionsDAG::NodeRawConstPtrs right_filter_condition_nodes;
};

using JoinClauses = std::vector<JoinClause>;

struct JoinClausesAndActions
{
    /// Join clauses. Actions dag nodes point into join_expression_actions.
    JoinClauses join_clauses;
    /// Whole JOIN ON section expressions
    ActionsDAGPtr join_expression_actions;
    /// Left join expressions actions
    ActionsDAGPtr left_join_expressions_actions;
    /// Right join expressions actions
    ActionsDAGPtr right_join_expressions_actions;
};

/** Calculate join clauses and actions for JOIN ON section.
  *
  * left_table_expression_columns - columns from left join stream.
  * right_table_expression_columns - columns from right join stream.
  * join_node - join query tree node.
  * planner_context - planner context.
  */
JoinClausesAndActions buildJoinClausesAndActions(
    const ColumnsWithTypeAndName & left_table_expression_columns,
    const ColumnsWithTypeAndName & right_table_expression_columns,
    const QueryTreeNodePtr & join_node,
    const PlannerContextPtr & planner_context);

/** Try extract boolean constant from JOIN expression.
  * Example: SELECT * FROM test_table AS t1 INNER JOIN test_table AS t2 ON 1;
  * Example: SELECT * FROM test_table AS t1 INNER JOIN test_table AS t2 ON 1 != 1;
  *
  * join_node - join query tree node.
  */
std::optional<bool> tryExtractConstantFromJoinNode(const QueryTreeNodePtr & join_node);

/** Choose JOIN algorithm for table join, right table expression, right table expression header and planner context.
  * Table join structure can be modified during JOIN algorithm choosing for special JOIN algorithms.
  * For example JOIN with Dictionary engine, or JOIN with JOIN engine.
  */
std::shared_ptr<IJoin> chooseJoinAlgorithm(std::shared_ptr<TableJoin> & table_join,
    const QueryTreeNodePtr & right_table_expression,
    const Block & left_table_expression_header,
    const Block & right_table_expression_header,
    const PlannerContextPtr & planner_context);

}