1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
#include <Common/typeid_cast.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTSubquery.h>
#include <Interpreters/RewriteAnyFunctionVisitor.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Parsers/ASTTablesInSelectQuery.h>
namespace DB
{
namespace
{
bool extractIdentifiers(const ASTFunction & func, std::unordered_set<ASTPtr *> & identifiers)
{
for (auto & arg : func.arguments->children)
{
if (const auto * arg_func = arg->as<ASTFunction>())
{
/// arrayJoin() is special and should not be optimized (think about
/// it as a an aggregate function), otherwise wrong result will be
/// produced:
/// SELECT *, any(arrayJoin([[], []])) FROM numbers(1) GROUP BY number
/// ┌─number─┬─arrayJoin(array(array(), array()))─┐
/// │ 0 │ [] │
/// │ 0 │ [] │
/// └────────┴────────────────────────────────────┘
/// While should be:
/// ┌─number─┬─any(arrayJoin(array(array(), array())))─┐
/// │ 0 │ [] │
/// └────────┴─────────────────────────────────────────┘
if (arg_func->name == "arrayJoin")
return false;
if (arg_func->name == "lambda")
return false;
// We are looking for identifiers inside a function calculated inside
// the aggregate function `any()`. Window or aggregate function can't
// be inside `any`, but this check in GetAggregatesMatcher happens
// later, so we have to explicitly skip these nested functions here.
if (arg_func->is_window_function
|| AggregateUtils::isAggregateFunction(*arg_func))
{
return false;
}
if (!extractIdentifiers(*arg_func, identifiers))
return false;
}
else if (arg->as<ASTIdentifier>())
identifiers.emplace(&arg);
}
return true;
}
}
void RewriteAnyFunctionMatcher::visit(ASTPtr & ast, Data & data)
{
if (auto * func = ast->as<ASTFunction>())
{
if (func->is_window_function)
return;
visit(*func, ast, data);
}
}
void RewriteAnyFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data & data)
{
if (!func.arguments || func.arguments->children.empty() || !func.arguments->children[0])
return;
if (func.name != "any" && func.name != "anyLast")
return;
auto & func_arguments = func.arguments->children;
if (func_arguments.size() != 1)
return;
const auto * first_arg_func = func_arguments[0]->as<ASTFunction>();
if (!first_arg_func || first_arg_func->arguments->children.empty())
return;
/// We have rewritten this function. Just unwrap its argument.
if (data.rewritten.contains(ast.get()))
{
func_arguments[0]->setAlias(func.alias);
ast = func_arguments[0];
return;
}
std::unordered_set<ASTPtr *> identifiers; /// implicit remove duplicates
if (!extractIdentifiers(func, identifiers))
return;
/// Wrap identifiers: any(f(x, y, g(z))) -> any(f(any(x), any(y), g(any(z))))
for (auto * ast_to_change : identifiers)
{
ASTPtr identifier_ast = *ast_to_change;
*ast_to_change = makeASTFunction(func.name);
(*ast_to_change)->as<ASTFunction>()->arguments->children.emplace_back(identifier_ast);
}
data.rewritten.insert(ast.get());
/// Unwrap function: any(f(any(x), any(y), g(any(z)))) -> f(any(x), any(y), g(any(z)))
func_arguments[0]->setAlias(func.alias);
ast = func_arguments[0];
}
bool RewriteAnyFunctionMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
{
return !node->as<ASTSubquery>() &&
!node->as<ASTTableExpression>() &&
!node->as<ASTArrayJoin>();
}
}
|