aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Interpreters/RewriteAnyFunctionVisitor.cpp
blob: 163e117f93d1f447a0ec89a4b7b520475293022f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#include <Common/typeid_cast.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTSubquery.h>
#include <Interpreters/RewriteAnyFunctionVisitor.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Parsers/ASTTablesInSelectQuery.h>

namespace DB
{

namespace
{

bool extractIdentifiers(const ASTFunction & func, std::unordered_set<ASTPtr *> & identifiers)
{
    for (auto & arg : func.arguments->children)
    {
        if (const auto * arg_func = arg->as<ASTFunction>())
        {
            /// arrayJoin() is special and should not be optimized (think about
            /// it as a an aggregate function), otherwise wrong result will be
            /// produced:
            ///     SELECT *, any(arrayJoin([[], []])) FROM numbers(1) GROUP BY number
            ///     ┌─number─┬─arrayJoin(array(array(), array()))─┐
            ///     │      0 │ []                                 │
            ///     │      0 │ []                                 │
            ///     └────────┴────────────────────────────────────┘
            /// While should be:
            ///     ┌─number─┬─any(arrayJoin(array(array(), array())))─┐
            ///     │      0 │ []                                      │
            ///     └────────┴─────────────────────────────────────────┘
            if (arg_func->name == "arrayJoin")
                return false;

            if (arg_func->name == "lambda")
                return false;

            // We are looking for identifiers inside a function calculated inside
            // the aggregate function `any()`. Window or aggregate function can't
            // be inside `any`, but this check in GetAggregatesMatcher happens
            // later, so we have to explicitly skip these nested functions here.
            if (arg_func->is_window_function
                || AggregateUtils::isAggregateFunction(*arg_func))
            {
                return false;
            }

            if (!extractIdentifiers(*arg_func, identifiers))
                return false;
        }
        else if (arg->as<ASTIdentifier>())
            identifiers.emplace(&arg);
    }

    return true;
}

}


void RewriteAnyFunctionMatcher::visit(ASTPtr & ast, Data & data)
{
    if (auto * func = ast->as<ASTFunction>())
    {
        if (func->is_window_function)
            return;

        visit(*func, ast, data);
    }
}

void RewriteAnyFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data & data)
{
    if (!func.arguments || func.arguments->children.empty() || !func.arguments->children[0])
        return;

    if (func.name != "any" && func.name != "anyLast")
        return;

    auto & func_arguments = func.arguments->children;

    if (func_arguments.size() != 1)
        return;

    const auto * first_arg_func = func_arguments[0]->as<ASTFunction>();
    if (!first_arg_func || first_arg_func->arguments->children.empty())
        return;

    /// We have rewritten this function. Just unwrap its argument.
    if (data.rewritten.contains(ast.get()))
    {
        func_arguments[0]->setAlias(func.alias);
        ast = func_arguments[0];
        return;
    }

    std::unordered_set<ASTPtr *> identifiers; /// implicit remove duplicates
    if (!extractIdentifiers(func, identifiers))
        return;

    /// Wrap identifiers: any(f(x, y, g(z))) -> any(f(any(x), any(y), g(any(z))))
    for (auto * ast_to_change : identifiers)
    {
        ASTPtr identifier_ast = *ast_to_change;
        *ast_to_change = makeASTFunction(func.name);
        (*ast_to_change)->as<ASTFunction>()->arguments->children.emplace_back(identifier_ast);
    }

    data.rewritten.insert(ast.get());

    /// Unwrap function: any(f(any(x), any(y), g(any(z)))) -> f(any(x), any(y), g(any(z)))
    func_arguments[0]->setAlias(func.alias);
    ast = func_arguments[0];
}

bool RewriteAnyFunctionMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
{
    return !node->as<ASTSubquery>() &&
        !node->as<ASTTableExpression>() &&
        !node->as<ASTArrayJoin>();
}

}