aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp
blob: 1fada88a21ca1118b6aee050b89e53d65ba6ee2f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#include <Analyzer/Passes/ConvertOrLikeChainPass.h>

#include <memory>
#include <unordered_map>
#include <vector>

#include <Common/likePatternToRegexp.h>

#include <Core/Field.h>

#include <DataTypes/DataTypesNumber.h>

#include <Functions/FunctionFactory.h>

#include <Interpreters/Context.h>

#include <Analyzer/ConstantNode.h>
#include <Analyzer/UnionNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/HashUtils.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>

namespace DB
{

namespace
{

class ConvertOrLikeChainVisitor : public InDepthQueryTreeVisitorWithContext<ConvertOrLikeChainVisitor>
{
public:
    using Base = InDepthQueryTreeVisitorWithContext<ConvertOrLikeChainVisitor>;
    using Base::Base;

    explicit ConvertOrLikeChainVisitor(FunctionOverloadResolverPtr or_function_resolver_,
        FunctionOverloadResolverPtr match_function_resolver_,
        ContextPtr context)
        : Base(std::move(context))
        , or_function_resolver(std::move(or_function_resolver_))
        , match_function_resolver(std::move(match_function_resolver_))
    {}

    bool needChildVisit(VisitQueryTreeNodeType &, VisitQueryTreeNodeType &)
    {
        const auto & settings = getSettings();

        return settings.optimize_or_like_chain
            && settings.allow_hyperscan
            && settings.max_hyperscan_regexp_length == 0
            && settings.max_hyperscan_regexp_total_length == 0;
    }

    void enterImpl(QueryTreeNodePtr & node)
    {
        auto * function_node = node->as<FunctionNode>();
        if (!function_node || function_node->getFunctionName() != "or")
            return;

        QueryTreeNodes unique_elems;

        QueryTreeNodePtrWithHashMap<Array> node_to_patterns;
        FunctionNodes match_functions;

        for (auto & argument : function_node->getArguments())
        {
            unique_elems.push_back(argument);

            auto * argument_function = argument->as<FunctionNode>();
            if (!argument_function)
                continue;

            const bool is_like  = argument_function->getFunctionName() == "like";
            const bool is_ilike = argument_function->getFunctionName() == "ilike";

            /// Not {i}like -> bail out.
            if (!is_like && !is_ilike)
                continue;

            const auto & like_arguments = argument_function->getArguments().getNodes();
            if (like_arguments.size() != 2)
                continue;

            const auto & like_first_argument = like_arguments[0];
            const auto * pattern = like_arguments[1]->as<ConstantNode>();
            if (!pattern || !isString(pattern->getResultType()))
                continue;

            auto regexp = likePatternToRegexp(pattern->getValue().get<String>());
            /// Case insensitive. Works with UTF-8 as well.
            if (is_ilike)
                regexp = "(?i)" + regexp;

            unique_elems.pop_back();

            auto it = node_to_patterns.find(like_first_argument);
            if (it == node_to_patterns.end())
            {
                it = node_to_patterns.insert({like_first_argument, Array{}}).first;

                /// The second argument will be added when all patterns are known.
                auto match_function = std::make_shared<FunctionNode>("multiMatchAny");
                match_function->getArguments().getNodes().push_back(like_first_argument);
                match_functions.push_back(match_function);

                unique_elems.push_back(std::move(match_function));
            }

            it->second.push_back(regexp);
        }

        /// Add all the patterns into the function arguments lists.
        for (auto & match_function : match_functions)
        {
            auto & arguments = match_function->getArguments().getNodes();
            auto & patterns = node_to_patterns.at(arguments[0]);
            arguments.push_back(std::make_shared<ConstantNode>(Field{std::move(patterns)}));
            match_function->resolveAsFunction(match_function_resolver);
        }

        /// OR must have at least two arguments.
        if (unique_elems.size() == 1)
            unique_elems.push_back(std::make_shared<ConstantNode>(static_cast<UInt8>(0)));

        function_node->getArguments().getNodes() = std::move(unique_elems);
        function_node->resolveAsFunction(or_function_resolver);
    }
private:
    using FunctionNodes = std::vector<std::shared_ptr<FunctionNode>>;
    const FunctionOverloadResolverPtr or_function_resolver;
    const FunctionOverloadResolverPtr match_function_resolver;
};

}

void ConvertOrLikeChainPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
{
    auto or_function_resolver = FunctionFactory::instance().get("or", context);
    auto match_function_resolver = FunctionFactory::instance().get("multiMatchAny", context);
    ConvertOrLikeChainVisitor visitor(std::move(or_function_resolver), std::move(match_function_resolver), std::move(context));
    visitor.visit(query_tree_node);
}

}