blob: dc58747221e1d82fa1ea44b795e93cec2743f5d8 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
|
#include <Analyzer/Passes/CountDistinctPass.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Interpreters/Context.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/QueryNode.h>
namespace DB
{
namespace
{
class CountDistinctVisitor : public InDepthQueryTreeVisitorWithContext<CountDistinctVisitor>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<CountDistinctVisitor>;
using Base::Base;
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().count_distinct_optimization)
return;
auto * query_node = node->as<QueryNode>();
/// Check that query has only SELECT clause
if (!query_node || (query_node->hasWith() || query_node->hasPrewhere() || query_node->hasWhere() || query_node->hasGroupBy() ||
query_node->hasHaving() || query_node->hasWindow() || query_node->hasOrderBy() || query_node->hasLimitByLimit() || query_node->hasLimitByOffset() ||
query_node->hasLimitBy() || query_node->hasLimit() || query_node->hasOffset()))
return;
/// Check that query has only single table expression
auto join_tree_node_type = query_node->getJoinTree()->getNodeType();
if (join_tree_node_type == QueryTreeNodeType::JOIN || join_tree_node_type == QueryTreeNodeType::ARRAY_JOIN)
return;
/// Check that query has only single node in projection
auto & projection_nodes = query_node->getProjection().getNodes();
if (projection_nodes.size() != 1)
return;
/// Check that query single projection node is `countDistinct` function
auto & projection_node = projection_nodes[0];
auto * function_node = projection_node->as<FunctionNode>();
if (!function_node)
return;
auto lower_function_name = Poco::toLower(function_node->getFunctionName());
if (lower_function_name != "countdistinct" && lower_function_name != "uniqexact")
return;
/// Check that `countDistinct` function has single COLUMN argument
auto & count_distinct_arguments_nodes = function_node->getArguments().getNodes();
if (count_distinct_arguments_nodes.size() != 1 && count_distinct_arguments_nodes[0]->getNodeType() != QueryTreeNodeType::COLUMN)
return;
auto & count_distinct_argument_column = count_distinct_arguments_nodes[0];
auto & count_distinct_argument_column_typed = count_distinct_argument_column->as<ColumnNode &>();
/// Build subquery SELECT count_distinct_argument_column FROM table_expression GROUP BY count_distinct_argument_column
auto subquery = std::make_shared<QueryNode>(Context::createCopy(query_node->getContext()));
subquery->getJoinTree() = query_node->getJoinTree();
subquery->getProjection().getNodes().push_back(count_distinct_argument_column);
subquery->getGroupBy().getNodes().push_back(count_distinct_argument_column);
subquery->resolveProjectionColumns({count_distinct_argument_column_typed.getColumn()});
/// Put subquery into JOIN TREE of initial query
query_node->getJoinTree() = std::move(subquery);
/// Replace `countDistinct` of initial query into `count`
auto result_type = function_node->getResultType();
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties);
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
function_node->getArguments().getNodes().clear();
}
};
}
void CountDistinctPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
{
CountDistinctVisitor visitor(std::move(context));
visitor.visit(query_tree_node);
}
}
|