1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
|
#pragma once
#include <Parsers/IAST.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <string>
#include <vector>
#include <map>
#include <unordered_map>
#include <unordered_set>
namespace DB
{
struct Settings;
class ASTFunction;
class ASTSelectQuery;
/** This class provides functions for optimizing boolean expressions within queries.
*
* For simplicity, we call a homogeneous OR-chain any expression having the following structure:
* expr = x1 OR ... OR expr = xN
* where `expr` is an arbitrary expression and x1, ..., xN are literals of the same type
*/
class LogicalExpressionsOptimizer final
{
struct ExtractedSettings
{
const UInt64 optimize_min_equality_disjunction_chain_length;
explicit ExtractedSettings(UInt64 optimize_min_equality_disjunction_chain_length_)
: optimize_min_equality_disjunction_chain_length(optimize_min_equality_disjunction_chain_length_)
{}
};
public:
/// Constructor. Accepts the root of the query DAG.
LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, const TablesWithColumns & tables_with_columns_, UInt64 optimize_min_equality_disjunction_chain_length);
/** Replace all rather long homogeneous OR-chains expr = x1 OR ... OR expr = xN
* on the expressions `expr` IN (x1, ..., xN).
*/
void perform();
LogicalExpressionsOptimizer(const LogicalExpressionsOptimizer &) = delete;
LogicalExpressionsOptimizer & operator=(const LogicalExpressionsOptimizer &) = delete;
private:
/** The OR function with the expression.
*/
struct OrWithExpression
{
OrWithExpression(const ASTFunction * or_function_, const IAST::Hash & expression_, const std::string & alias_);
bool operator<(const OrWithExpression & rhs) const;
const ASTFunction * or_function;
const IAST::Hash expression;
const std::string alias;
};
struct Equalities
{
std::vector<ASTFunction *> functions;
bool is_processed = false;
};
using DisjunctiveEqualityChainsMap = std::map<OrWithExpression, Equalities>;
using DisjunctiveEqualityChain = DisjunctiveEqualityChainsMap::value_type;
/** Collect information about all the equations in the OR chains (not necessarily homogeneous).
* This information is grouped by the expression that is on the left side of the equation.
*/
void collectDisjunctiveEqualityChains();
/** Check that the set of equalities expr = x1, ..., expr = xN fulfills the following two requirements:
* 1. It's not too small
* 2. x1, ... xN have the same type
*/
bool mayOptimizeDisjunctiveEqualityChain(const DisjunctiveEqualityChain & chain) const;
/// Check if is LowCardinality OR chain
bool isLowCardinalityEqualityChain(const std::vector<ASTFunction *> & functions) const;
/// Insert the IN expression into the OR chain.
static void addInExpression(const DisjunctiveEqualityChain & chain);
/// Delete the equalities that were replaced by the IN expressions.
void cleanupOrExpressions();
/// Delete OR expressions that have only one operand.
void fixBrokenOrExpressions();
/// Restore the original column order after optimization.
void reorderColumns();
using ParentNodes = std::vector<IAST *>;
using FunctionParentMap = std::unordered_map<const IAST *, ParentNodes>;
using ColumnToPosition = std::unordered_map<const IAST *, size_t>;
ASTSelectQuery * select_query;
const TablesWithColumns & tables_with_columns;
const ExtractedSettings settings;
/// Information about the OR-chains inside the query.
DisjunctiveEqualityChainsMap disjunctive_equality_chains_map;
/// Number of processed OR-chains.
size_t processed_count = 0;
/// Parents of OR functions.
FunctionParentMap or_parent_map;
/// The position of each column.
ColumnToPosition column_to_position;
/// Set of nodes, that was visited.
std::unordered_set<void *> visited_nodes;
};
}
|