aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.cpp
blob: 4227ffc6873df842cb83d1929a04871ad1810f34 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
#include <Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.h>

#include <Storages/MergeTree/MergeTreeIndexHypothesis.h>
#include <Interpreters/TreeCNFConverter.h>
#include <Interpreters/ComparisonGraph.h>
#include <Parsers/IAST_fwd.h>
#include <Parsers/IAST.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTSelectQuery.h>

namespace DB
{

namespace ErrorCodes
{
    extern const int LOGICAL_ERROR;
}

MergeTreeIndexhypothesisMergedCondition::MergeTreeIndexhypothesisMergedCondition(
    const SelectQueryInfo & query, const ConstraintsDescription & constraints, size_t granularity_)
    : IMergeTreeIndexMergedCondition(granularity_)
{
    const auto & select = query.query->as<ASTSelectQuery &>();

    if (select.where() && select.prewhere())
        expression_ast = makeASTFunction(
            "and",
            select.where()->clone(),
            select.prewhere()->clone());
    else if (select.where())
        expression_ast = select.where()->clone();
    else if (select.prewhere())
        expression_ast = select.prewhere()->clone();

    expression_cnf = std::make_unique<CNFQuery>(
        expression_ast ? TreeCNFConverter::toCNF(expression_ast) : CNFQuery::AndGroup{});

    addConstraints(constraints);
}

void MergeTreeIndexhypothesisMergedCondition::addIndex(const MergeTreeIndexPtr & index)
{
    if (!index->isMergeable() || index->getGranularity() != granularity)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Index {} can not be merged", index->index.type);

    const auto hypothesis_index = std::dynamic_pointer_cast<const MergeTreeIndexHypothesis>(index);
    if (!hypothesis_index)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Only hypothesis index is supported here");

    static const NameSet relations = { "equals", "notEquals", "less", "lessOrEquals", "greaterOrEquals", "greater"};

    // TODO: move to index hypothesis
    std::vector<ASTPtr> compare_hypotheses_data;
    std::vector<CNFQuery::OrGroup> hypotheses_data;
    const auto cnf = TreeCNFConverter::toCNF(hypothesis_index->index.expression_list_ast->children.front()).pullNotOutFunctions();

    for (const auto & group : cnf.getStatements())
    {
        if (group.size() == 1)
        {
            hypotheses_data.push_back(group);
            CNFQuery::AtomicFormula atomic_formula = *group.begin();
            CNFQuery::AtomicFormula atom{atomic_formula.negative, atomic_formula.ast->clone()};
            pushNotIn(atom);
            assert(!atom.negative);

            const auto * func = atom.ast->as<ASTFunction>();
            if (func && relations.contains(func->name))
                compare_hypotheses_data.push_back(atom.ast);
        }
    }

    index_to_compare_atomic_hypotheses.push_back(compare_hypotheses_data);
    index_to_atomic_hypotheses.push_back(hypotheses_data);
}

void MergeTreeIndexhypothesisMergedCondition::addConstraints(const ConstraintsDescription & constraints_description)
{
    auto atomic_constraints_data = constraints_description.getAtomicConstraintData();
    for (const auto & atomic_formula : atomic_constraints_data)
    {
        CNFQuery::AtomicFormula atom{atomic_formula.negative, atomic_formula.ast->clone()};
        pushNotIn(atom);
        atomic_constraints.push_back(atom.ast);
    }
}

/// Replaces < -> <=, > -> >= and assumes that all hypotheses are true then checks if path exists
bool MergeTreeIndexhypothesisMergedCondition::alwaysUnknownOrTrue() const
{
    ASTs active_atomic_formulas(atomic_constraints);
    for (const auto & hypothesis : index_to_compare_atomic_hypotheses)
    {
        active_atomic_formulas.insert(
            std::end(active_atomic_formulas),
            std::begin(hypothesis),
            std::end(hypothesis));
    }

    /// transform active formulas
    for (auto & formula : active_atomic_formulas)
    {
        formula = formula->clone(); /// do all operations with copy
        auto * func = formula->as<ASTFunction>();
        if (func && func->name == "less")
            func->name = "lessOrEquals";
        if (func && func->name == "greater")
            func->name = "greaterOrEquals";
    }

    const auto weak_graph = std::make_unique<ComparisonGraph<ASTPtr>>(active_atomic_formulas);

    bool useless = true;
    expression_cnf->iterateGroups(
        [&](const CNFQuery::OrGroup & or_group)
        {
            for (const auto & atomic_formula : or_group)
            {
                CNFQuery::AtomicFormula atom{atomic_formula.negative, atomic_formula.ast->clone()};
                pushNotIn(atom);

                const auto * func = atom.ast->as<ASTFunction>();
                if (func && func->arguments->children.size() == 2)
                {
                    const auto left = weak_graph->getComponentId(func->arguments->children[0]);
                    const auto right = weak_graph->getComponentId(func->arguments->children[1]);
                    if (left && right && weak_graph->hasPath(left.value(), right.value()))
                    {
                        useless = false;
                        return;
                    }
                }
            }
        });
    return useless;
}

bool MergeTreeIndexhypothesisMergedCondition::mayBeTrueOnGranule(const MergeTreeIndexGranules & granules) const
{
    std::vector<bool> values;
    for (const auto & index_granule : granules)
    {
        const auto granule = std::dynamic_pointer_cast<const MergeTreeIndexGranuleHypothesis>(index_granule);
        if (!granule)
            throw Exception(ErrorCodes::LOGICAL_ERROR, "Only hypothesis index is supported here.");
        values.push_back(granule->met);
    }

    const ComparisonGraph<ASTPtr> * graph = nullptr;

    {
        std::lock_guard lock(cache_mutex);
        if (const auto it = answer_cache.find(values); it != std::end(answer_cache))
            return it->second;

        graph = getGraph(values);
    }

    bool always_false = false;
    expression_cnf->iterateGroups(
        [&](const CNFQuery::OrGroup & or_group)
        {
            if (always_false)
                return;

            for (const auto & atomic_formula : or_group)
            {
                CNFQuery::AtomicFormula atom{atomic_formula.negative, atomic_formula.ast->clone()};
                pushNotIn(atom);
                const auto * func = atom.ast->as<ASTFunction>();
                if (func && func->arguments->children.size() == 2)
                {
                    const auto expected = ComparisonGraph<ASTPtr>::atomToCompareResult(atom);
                    if (graph->isPossibleCompare(expected, func->arguments->children[0], func->arguments->children[1]))
                    {
                        /// If graph failed use matching.
                        /// We don't need to check constraints.
                        return;
                    }
                }
            }
            always_false = true;
       });

    std::lock_guard lock(cache_mutex);

    answer_cache[values] = !always_false;
    return !always_false;
}

std::unique_ptr<ComparisonGraph<ASTPtr>> MergeTreeIndexhypothesisMergedCondition::buildGraph(const std::vector<bool> & values) const
{
    ASTs active_atomic_formulas(atomic_constraints);
    for (size_t i = 0; i < values.size(); ++i)
    {
        if (values[i])
            active_atomic_formulas.insert(
                std::end(active_atomic_formulas),
                std::begin(index_to_compare_atomic_hypotheses[i]),
                std::end(index_to_compare_atomic_hypotheses[i]));
    }
    return std::make_unique<ComparisonGraph<ASTPtr>>(active_atomic_formulas);
}

const ComparisonGraph<ASTPtr> * MergeTreeIndexhypothesisMergedCondition::getGraph(const std::vector<bool> & values) const
{
    auto [it, inserted] = graph_cache.try_emplace(values);
    if (inserted)
        it->second = buildGraph(values);

    return it->second.get();
}

}