aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Interpreters/LogicalExpressionsOptimizer.h
blob: a8a0d1863944bbc5068cf5c17d1173e412956100 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#pragma once

#include <Parsers/IAST.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>

#include <string>
#include <vector>
#include <map>
#include <unordered_map>
#include <unordered_set>


namespace DB
{

struct Settings;
class ASTFunction;
class ASTSelectQuery;


/** This class provides functions for optimizing boolean expressions within queries.
  *
  * For simplicity, we call a homogeneous OR-chain any expression having the following structure:
  * expr = x1 OR ... OR expr = xN
  * where `expr` is an arbitrary expression and x1, ..., xN are literals of the same type
  */
class LogicalExpressionsOptimizer final
{
    struct ExtractedSettings
    {
        const UInt64 optimize_min_equality_disjunction_chain_length;

        explicit ExtractedSettings(UInt64 optimize_min_equality_disjunction_chain_length_)
        :   optimize_min_equality_disjunction_chain_length(optimize_min_equality_disjunction_chain_length_)
        {}
    };

public:
    /// Constructor. Accepts the root of the query DAG.
    LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, const TablesWithColumns & tables_with_columns_, UInt64 optimize_min_equality_disjunction_chain_length);

    /** Replace all rather long homogeneous OR-chains expr = x1 OR ... OR expr = xN
      * on the expressions `expr` IN (x1, ..., xN).
      */
    void perform();

    LogicalExpressionsOptimizer(const LogicalExpressionsOptimizer &) = delete;
    LogicalExpressionsOptimizer & operator=(const LogicalExpressionsOptimizer &) = delete;

private:
    /** The OR function with the expression.
    */
    struct OrWithExpression
    {
        OrWithExpression(const ASTFunction * or_function_, const IAST::Hash & expression_, const std::string & alias_);
        bool operator<(const OrWithExpression & rhs) const;

        const ASTFunction * or_function;
        const IAST::Hash expression;
        const std::string alias;
    };

    struct Equalities
    {
        std::vector<ASTFunction *> functions;
        bool is_processed = false;
    };

    using DisjunctiveEqualityChainsMap = std::map<OrWithExpression, Equalities>;
    using DisjunctiveEqualityChain = DisjunctiveEqualityChainsMap::value_type;

    /** Collect information about all the equations in the OR chains (not necessarily homogeneous).
      * This information is grouped by the expression that is on the left side of the equation.
      */
    void collectDisjunctiveEqualityChains();

    /** Check that the set of equalities expr = x1, ..., expr = xN fulfills the following two requirements:
      * 1. It's not too small
      * 2. x1, ... xN have the same type
      */
    bool mayOptimizeDisjunctiveEqualityChain(const DisjunctiveEqualityChain & chain) const;

    /// Check if is LowCardinality OR chain
    bool isLowCardinalityEqualityChain(const std::vector<ASTFunction *> & functions) const;

    /// Insert the IN expression into the OR chain.
    static void addInExpression(const DisjunctiveEqualityChain & chain);

    /// Delete the equalities that were replaced by the IN expressions.
    void cleanupOrExpressions();

    /// Delete OR expressions that have only one operand.
    void fixBrokenOrExpressions();

    /// Restore the original column order after optimization.
    void reorderColumns();

    using ParentNodes = std::vector<IAST *>;
    using FunctionParentMap = std::unordered_map<const IAST *, ParentNodes>;
    using ColumnToPosition = std::unordered_map<const IAST *, size_t>;

    ASTSelectQuery * select_query;
    const TablesWithColumns & tables_with_columns;
    const ExtractedSettings settings;
    /// Information about the OR-chains inside the query.
    DisjunctiveEqualityChainsMap disjunctive_equality_chains_map;
    /// Number of processed OR-chains.
    size_t processed_count = 0;
    /// Parents of OR functions.
    FunctionParentMap or_parent_map;
    /// The position of each column.
    ColumnToPosition column_to_position;
    /// Set of nodes, that was visited.
    std::unordered_set<void *> visited_nodes;
};

}