aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/Interpreters/TreeOptimizer.cpp
diff options
context:
space:
mode:
authorvitalyisaev <vitalyisaev@ydb.tech>2023-11-14 09:58:56 +0300
committervitalyisaev <vitalyisaev@ydb.tech>2023-11-14 10:20:20 +0300
commitc2b2dfd9827a400a8495e172a56343462e3ceb82 (patch)
treecd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /contrib/clickhouse/src/Interpreters/TreeOptimizer.cpp
parentd4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff)
downloadydb-c2b2dfd9827a400a8495e172a56343462e3ceb82.tar.gz
YQ Connector: move tests from yql to ydb (OSS)
Перенос папки с тестами на Коннектор из папки yql в папку ydb (синхронизируется с github).
Diffstat (limited to 'contrib/clickhouse/src/Interpreters/TreeOptimizer.cpp')
-rw-r--r--contrib/clickhouse/src/Interpreters/TreeOptimizer.cpp823
1 files changed, 823 insertions, 0 deletions
diff --git a/contrib/clickhouse/src/Interpreters/TreeOptimizer.cpp b/contrib/clickhouse/src/Interpreters/TreeOptimizer.cpp
new file mode 100644
index 0000000000..9612a4d6c3
--- /dev/null
+++ b/contrib/clickhouse/src/Interpreters/TreeOptimizer.cpp
@@ -0,0 +1,823 @@
+#include <Core/Settings.h>
+
+#include <Interpreters/TreeOptimizer.h>
+#include <Interpreters/TreeRewriter.h>
+#include <Interpreters/OptimizeIfChains.h>
+#include <Interpreters/OptimizeIfWithConstantConditionVisitor.h>
+#include <Interpreters/WhereConstraintsOptimizer.h>
+#include <Interpreters/SubstituteColumnOptimizer.h>
+#include <Interpreters/TreeCNFConverter.h>
+#include <Interpreters/ArithmeticOperationsInAgrFuncOptimize.h>
+#include <Interpreters/DuplicateOrderByVisitor.h>
+#include <Interpreters/GroupByFunctionKeysVisitor.h>
+#include <Interpreters/AggregateFunctionOfGroupByKeysVisitor.h>
+#include <Interpreters/RewriteAnyFunctionVisitor.h>
+#include <Interpreters/RemoveInjectiveFunctionsVisitor.h>
+#include <Interpreters/FunctionMaskingArgumentCheckVisitor.h>
+#include <Interpreters/RedundantFunctionsInOrderByVisitor.h>
+#include <Interpreters/RewriteCountVariantsVisitor.h>
+#include <Interpreters/MonotonicityCheckVisitor.h>
+#include <Interpreters/ConvertStringsToEnumVisitor.h>
+#include <Interpreters/ConvertFunctionOrLikeVisitor.h>
+#include <Interpreters/RewriteFunctionToSubcolumnVisitor.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/ExternalDictionariesLoader.h>
+#include <Interpreters/GatherFunctionQuantileVisitor.h>
+#include <Interpreters/RewriteSumIfFunctionVisitor.h>
+#include <Interpreters/RewriteArrayExistsFunctionVisitor.h>
+#include <Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.h>
+
+#include <Parsers/ASTExpressionList.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTOrderByElement.h>
+#include <Parsers/ASTSelectQuery.h>
+#include <Parsers/ASTSubquery.h>
+#include <Parsers/ASTSelectWithUnionQuery.h>
+#include <Parsers/ASTTablesInSelectQuery.h>
+
+#include <Functions/FunctionFactory.h>
+#include <Functions/UserDefined/UserDefinedExecutableFunctionFactory.h>
+#include <Storages/IStorage.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+ extern const int UNKNOWN_TYPE_OF_AST_NODE;
+}
+
+namespace
+{
+
+const std::unordered_set<String> possibly_injective_function_names
+{
+ "dictGet",
+ "dictGetString",
+ "dictGetUInt8",
+ "dictGetUInt16",
+ "dictGetUInt32",
+ "dictGetUInt64",
+ "dictGetInt8",
+ "dictGetInt16",
+ "dictGetInt32",
+ "dictGetInt64",
+ "dictGetFloat32",
+ "dictGetFloat64",
+ "dictGetDate",
+ "dictGetDateTime"
+};
+
+/** You can not completely remove GROUP BY. Because if there were no aggregate functions, then it turns out that there will be no aggregation.
+ * Instead, leave `GROUP BY const`.
+ * Next, see deleting the constants in the analyzeAggregation method.
+ */
+void appendUnusedGroupByColumn(ASTSelectQuery * select_query)
+{
+ /// You must insert a constant that is not the name of the column in the table. Such a case is rare, but it happens.
+ /// Also start unused_column integer must not intersect with ([1, source_columns.size()])
+ /// might be in positional GROUP BY.
+ select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::make_shared<ASTExpressionList>());
+ select_query->groupBy()->children.emplace_back(std::make_shared<ASTLiteral>(static_cast<Int64>(-1)));
+}
+
+/// Eliminates injective function calls and constant expressions from group by statement.
+void optimizeGroupBy(ASTSelectQuery * select_query, ContextPtr context)
+{
+ const FunctionFactory & function_factory = FunctionFactory::instance();
+
+ if (!select_query->groupBy())
+ return;
+
+ const auto is_literal = [] (const ASTPtr & ast) -> bool
+ {
+ return ast->as<ASTLiteral>();
+ };
+
+ auto & group_exprs = select_query->groupBy()->children;
+
+ /// removes expression at index idx by making it last one and calling .pop_back()
+ const auto remove_expr_at_index = [&group_exprs] (const size_t idx)
+ {
+ if (idx < group_exprs.size() - 1)
+ std::swap(group_exprs[idx], group_exprs.back());
+
+ group_exprs.pop_back();
+ };
+
+ const auto & settings = context->getSettingsRef();
+
+ /// iterate over each GROUP BY expression, eliminate injective function calls and literals
+ for (size_t i = 0; i < group_exprs.size();)
+ {
+ if (const auto * function = group_exprs[i]->as<ASTFunction>())
+ {
+ /// assert function is injective
+ if (possibly_injective_function_names.contains(function->name))
+ {
+ /// do not handle semantic errors here
+ if (function->arguments->children.size() < 2)
+ {
+ ++i;
+ continue;
+ }
+
+ const auto * dict_name_ast = function->arguments->children[0]->as<ASTLiteral>();
+ const auto * attr_name_ast = function->arguments->children[1]->as<ASTLiteral>();
+ if (!dict_name_ast || !attr_name_ast)
+ {
+ ++i;
+ continue;
+ }
+
+ const auto & dict_name = dict_name_ast->value.safeGet<String>();
+ const auto & attr_name = attr_name_ast->value.safeGet<String>();
+
+ const auto & dict_ptr = context->getExternalDictionariesLoader().getDictionary(dict_name, context);
+ if (!dict_ptr->isInjective(attr_name))
+ {
+ ++i;
+ continue;
+ }
+ }
+ else
+ {
+ FunctionOverloadResolverPtr function_builder = UserDefinedExecutableFunctionFactory::instance().tryGet(function->name, context);
+
+ if (!function_builder)
+ function_builder = function_factory.get(function->name, context);
+
+ if (!function_builder->isInjective({}))
+ {
+ ++i;
+ continue;
+ }
+ }
+ /// don't optimize functions that shadow any of it's arguments, e.g.:
+ /// SELECT toString(dummy) as dummy FROM system.one GROUP BY dummy;
+ if (!function->alias.empty())
+ {
+ FunctionMaskingArgumentCheckVisitor::Data data{.alias=function->alias};
+ FunctionMaskingArgumentCheckVisitor(data).visit(function->arguments);
+
+ if (data.is_rejected)
+ {
+ ++i;
+ continue;
+ }
+ }
+
+ /// copy shared pointer to args in order to ensure lifetime
+ auto args_ast = function->arguments;
+
+ /** remove function call and take a step back to ensure
+ * next iteration does not skip not yet processed data
+ */
+ remove_expr_at_index(i);
+
+ /// copy non-literal arguments
+ std::remove_copy_if(
+ std::begin(args_ast->children), std::end(args_ast->children),
+ std::back_inserter(group_exprs), is_literal
+ );
+ }
+ else if (is_literal(group_exprs[i]))
+ {
+ bool keep_position = false;
+ if (settings.enable_positional_arguments)
+ {
+ const auto & value = group_exprs[i]->as<ASTLiteral>()->value;
+ if (value.getType() == Field::Types::UInt64)
+ {
+ auto pos = value.get<UInt64>();
+ if (pos > 0 && pos <= select_query->select()->children.size())
+ keep_position = true;
+ }
+ }
+
+ if (keep_position)
+ ++i;
+ else
+ remove_expr_at_index(i);
+ }
+ else
+ {
+ /// if neither a function nor literal - advance to next expression
+ ++i;
+ }
+ }
+
+ if (group_exprs.empty())
+ appendUnusedGroupByColumn(select_query);
+}
+
+struct GroupByKeysInfo
+{
+ NameSet key_names; ///set of keys' short names
+ bool has_function = false;
+};
+
+GroupByKeysInfo getGroupByKeysInfo(const ASTs & group_by_keys)
+{
+ GroupByKeysInfo data;
+
+ /// filling set with short names of keys
+ for (const auto & group_key : group_by_keys)
+ {
+ /// for grouping sets case
+ if (group_key->as<ASTExpressionList>())
+ {
+ const auto express_list_ast = group_key->as<const ASTExpressionList &>();
+ for (const auto & group_elem : express_list_ast.children)
+ {
+ data.key_names.insert(group_elem->getColumnName());
+ }
+ }
+ else
+ {
+ if (group_key->as<ASTFunction>())
+ data.has_function = true;
+
+ data.key_names.insert(group_key->getColumnName());
+ }
+ }
+
+ return data;
+}
+
+/// Eliminates min/max/any-aggregators of functions of GROUP BY keys
+void optimizeAggregateFunctionsOfGroupByKeys(ASTSelectQuery * select_query, ASTPtr & node)
+{
+ if (!select_query->groupBy())
+ return;
+
+ const auto & group_by_keys = select_query->groupBy()->children;
+ GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_by_keys);
+
+ SelectAggregateFunctionOfGroupByKeysVisitor::Data visitor_data{group_by_keys_data.key_names};
+ SelectAggregateFunctionOfGroupByKeysVisitor(visitor_data).visit(node);
+}
+
+/// Remove duplicate items from ORDER BY.
+void optimizeDuplicatesInOrderBy(const ASTSelectQuery * select_query)
+{
+ if (!select_query->orderBy())
+ return;
+
+ /// Make unique sorting conditions.
+ using NameAndLocale = std::pair<String, String>;
+ std::set<NameAndLocale> elems_set;
+
+ ASTs & elems = select_query->orderBy()->children;
+ ASTs unique_elems;
+ unique_elems.reserve(elems.size());
+
+ for (const auto & elem : elems)
+ {
+ String name = elem->children.front()->getColumnName();
+ const auto & order_by_elem = elem->as<ASTOrderByElement &>();
+
+ if (order_by_elem.with_fill /// Always keep elements WITH FILL as they affects other.
+ || elems_set.emplace(name, order_by_elem.collation ? order_by_elem.collation->getColumnName() : "").second)
+ unique_elems.emplace_back(elem);
+ }
+
+ if (unique_elems.size() < elems.size())
+ elems = std::move(unique_elems);
+}
+
+/// Return simple subselect (without UNIONs or JOINs or SETTINGS) if any
+const ASTSelectQuery * getSimpleSubselect(const ASTSelectQuery & select)
+{
+ if (!select.tables())
+ return nullptr;
+
+ const auto & tables = select.tables()->children;
+ if (tables.empty() || tables.size() != 1)
+ return nullptr;
+
+ const auto & ast_table_expression = tables[0]->as<ASTTablesInSelectQueryElement>()->table_expression;
+ if (!ast_table_expression)
+ return nullptr;
+
+ const auto & table_expression = ast_table_expression->as<ASTTableExpression>();
+ if (!table_expression->subquery)
+ return nullptr;
+
+ const auto & subquery = table_expression->subquery->as<ASTSubquery>();
+ if (!subquery || subquery->children.size() != 1)
+ return nullptr;
+
+ const auto & subselect_union = subquery->children[0]->as<ASTSelectWithUnionQuery>();
+ if (!subselect_union || !subselect_union->list_of_selects ||
+ subselect_union->list_of_selects->children.size() != 1)
+ return nullptr;
+
+ const auto & subselect = subselect_union->list_of_selects->children[0]->as<ASTSelectQuery>();
+ if (subselect && subselect->settings())
+ return nullptr;
+
+ return subselect;
+}
+
+std::unordered_set<String> getDistinctNames(const ASTSelectQuery & select)
+{
+ if (!select.select() || select.select()->children.empty())
+ return {};
+
+ std::unordered_set<String> names;
+ std::unordered_set<String> implicit_distinct;
+
+ if (!select.distinct)
+ {
+ /// SELECT a, b FROM (SELECT DISTINCT a FROM ...)
+ if (const ASTSelectQuery * subselect = getSimpleSubselect(select))
+ implicit_distinct = getDistinctNames(*subselect);
+
+ if (implicit_distinct.empty())
+ return {};
+ }
+
+ /// Extract result column names (prefer aliases, ignore table name)
+ for (const auto & id : select.select()->children)
+ {
+ String alias = id->tryGetAlias();
+
+ if (const auto * identifier = id->as<ASTIdentifier>())
+ {
+ const String & name = identifier->shortName();
+
+ if (select.distinct || implicit_distinct.contains(name))
+ {
+ if (alias.empty())
+ names.insert(name);
+ else
+ names.insert(alias);
+ }
+ }
+ else if (select.distinct && !alias.empty())
+ {
+ /// It's not possible to use getAliasOrColumnName() cause name is context specific (function arguments)
+ names.insert(alias);
+ }
+ }
+
+ /// SELECT a FROM (SELECT DISTINCT a, b FROM ...)
+ if (!select.distinct && names.size() != implicit_distinct.size())
+ return {};
+
+ return names;
+}
+
+/// Replace monotonous functions in ORDER BY if they don't participate in GROUP BY expression,
+/// has a single argument and not an aggregate functions.
+void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, ContextPtr context,
+ const TablesWithColumns & tables_with_columns,
+ const TreeRewriterResult & result)
+{
+ auto order_by = select_query->orderBy();
+ if (!order_by)
+ return;
+
+ /// Do not apply optimization for Distributed and Merge storages,
+ /// because we can't get the sorting key of their underlying tables
+ /// and we can break the matching of the sorting key for `read_in_order`
+ /// optimization by removing monotonous functions from the prefix of key.
+ if (result.is_remote_storage || (result.storage && result.storage->getName() == "Merge"))
+ return;
+
+ for (const auto & child : order_by->children)
+ {
+ auto * order_by_element = child->as<ASTOrderByElement>();
+
+ if (!order_by_element || order_by_element->children.empty())
+ throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE, "Bad ORDER BY expression AST");
+
+ if (order_by_element->with_fill)
+ return;
+ }
+
+ std::unordered_set<String> group_by_hashes;
+ if (auto group_by = select_query->groupBy())
+ {
+ if (select_query->group_by_with_grouping_sets)
+ {
+ for (auto & set : group_by->children)
+ {
+ for (auto & elem : set->children)
+ {
+ const auto hash = elem->getTreeHash();
+ const auto key = toString(hash);
+ group_by_hashes.insert(key);
+ }
+ }
+ }
+ else
+ {
+ for (auto & elem : group_by->children)
+ {
+ const auto hash = elem->getTreeHash();
+ const auto key = toString(hash);
+ group_by_hashes.insert(key);
+ }
+ }
+ }
+
+ auto sorting_key_columns = result.storage_snapshot ? result.storage_snapshot->metadata->getSortingKeyColumns() : Names{};
+
+ bool is_sorting_key_prefix = true;
+ for (size_t i = 0; i < order_by->children.size(); ++i)
+ {
+ auto * order_by_element = order_by->children[i]->as<ASTOrderByElement>();
+
+ auto & ast_func = order_by_element->children[0];
+ if (!ast_func->as<ASTFunction>())
+ continue;
+
+ if (i >= sorting_key_columns.size() || ast_func->getColumnName() != sorting_key_columns[i])
+ is_sorting_key_prefix = false;
+
+ /// If order by expression matches the sorting key, do not remove
+ /// functions to allow execute reading in order of key.
+ if (is_sorting_key_prefix)
+ continue;
+
+ MonotonicityCheckVisitor::Data data{tables_with_columns, context, group_by_hashes};
+ MonotonicityCheckVisitor(data).visit(ast_func);
+
+ if (!data.isRejected())
+ {
+ ast_func = data.identifier->clone();
+ ast_func->setAlias("");
+ if (!data.monotonicity.is_positive)
+ order_by_element->direction *= -1;
+ }
+ }
+}
+
+/// If ORDER BY has argument x followed by f(x) transforms it to ORDER BY x.
+/// Optimize ORDER BY x, y, f(x), g(x, y), f(h(x)), t(f(x), g(x)) into ORDER BY x, y
+/// in case if f(), g(), h(), t() are deterministic (in scope of query).
+/// Don't optimize ORDER BY f(x), g(x), x even if f(x) is bijection for x or g(x).
+void optimizeRedundantFunctionsInOrderBy(const ASTSelectQuery * select_query, ContextPtr context)
+{
+ const auto & order_by = select_query->orderBy();
+ if (!order_by)
+ return;
+
+ for (const auto & child : order_by->children)
+ {
+ auto * order_by_element = child->as<ASTOrderByElement>();
+
+ if (!order_by_element || order_by_element->children.empty())
+ throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE, "Bad ORDER BY expression AST");
+
+ if (order_by_element->with_fill)
+ return;
+ }
+
+ std::unordered_set<String> prev_keys;
+ ASTs modified;
+ modified.reserve(order_by->children.size());
+
+ for (auto & order_by_element : order_by->children)
+ {
+ /// Order by contains ASTOrderByElement as children and meaning item only as a grand child.
+ ASTPtr & name_or_function = order_by_element->children[0];
+
+ if (name_or_function->as<ASTFunction>())
+ {
+ if (!prev_keys.empty())
+ {
+ RedundantFunctionsInOrderByVisitor::Data data{prev_keys, context};
+ RedundantFunctionsInOrderByVisitor(data).visit(name_or_function);
+ if (data.redundant)
+ continue;
+ }
+ }
+
+ /// @note Leave duplicate keys unchanged. They would be removed in optimizeDuplicatesInOrderBy()
+ if (auto * identifier = name_or_function->as<ASTIdentifier>())
+ prev_keys.emplace(getIdentifierName(identifier));
+
+ modified.push_back(order_by_element);
+ }
+
+ if (modified.size() < order_by->children.size())
+ order_by->children = std::move(modified);
+}
+
+/// Remove duplicate items from LIMIT BY.
+void optimizeLimitBy(const ASTSelectQuery * select_query)
+{
+ if (!select_query->limitBy())
+ return;
+
+ std::set<String> elems_set;
+
+ ASTs & elems = select_query->limitBy()->children;
+ ASTs unique_elems;
+ unique_elems.reserve(elems.size());
+
+ for (const auto & elem : elems)
+ {
+ if (elems_set.emplace(elem->getColumnName()).second)
+ unique_elems.emplace_back(elem);
+ }
+
+ if (unique_elems.size() < elems.size())
+ elems = std::move(unique_elems);
+}
+
+/// Use constraints to get rid of useless parts of query
+void optimizeWithConstraints(ASTSelectQuery * select_query,
+ Aliases & /*aliases*/,
+ const NameSet & /*source_columns_set*/,
+ const std::vector<TableWithColumnNamesAndTypes> & /*tables_with_columns*/,
+ const StorageMetadataPtr & metadata_snapshot,
+ const bool optimize_append_index)
+{
+ WhereConstraintsOptimizer(select_query, metadata_snapshot, optimize_append_index).perform();
+}
+
+void optimizeSubstituteColumn(ASTSelectQuery * select_query,
+ Aliases & /*aliases*/,
+ const NameSet & /*source_columns_set*/,
+ const std::vector<TableWithColumnNamesAndTypes> & /*tables_with_columns*/,
+ const StorageMetadataPtr & metadata_snapshot,
+ const ConstStoragePtr & storage)
+{
+ SubstituteColumnOptimizer(select_query, metadata_snapshot, storage).perform();
+}
+
+/// Transform WHERE to CNF for more convenient optimization.
+bool convertQueryToCNF(ASTSelectQuery * select_query)
+{
+ if (select_query->where())
+ {
+ auto cnf_form = TreeCNFConverter::tryConvertToCNF(select_query->where());
+ if (!cnf_form)
+ return false;
+
+ cnf_form->pushNotInFunctions();
+ select_query->refWhere() = TreeCNFConverter::fromCNF(*cnf_form);
+ return true;
+ }
+
+ return false;
+}
+
+/// Remove duplicated columns from USING(...).
+void optimizeUsing(const ASTSelectQuery * select_query)
+{
+ if (!select_query->join())
+ return;
+
+ const auto * table_join = select_query->join()->table_join->as<ASTTableJoin>();
+ if (!(table_join && table_join->using_expression_list))
+ return;
+
+ ASTs & expression_list = table_join->using_expression_list->children;
+ ASTs uniq_expressions_list;
+
+ std::set<String> expressions_names;
+
+ for (const auto & expression : expression_list)
+ {
+ auto expression_name = expression->getAliasOrColumnName();
+ if (expressions_names.find(expression_name) == expressions_names.end())
+ {
+ uniq_expressions_list.push_back(expression);
+ expressions_names.insert(expression_name);
+ }
+ }
+
+ if (uniq_expressions_list.size() < expression_list.size())
+ expression_list = uniq_expressions_list;
+}
+
+void optimizeAggregationFunctions(ASTPtr & query)
+{
+ /// Move arithmetic operations out of aggregation functions
+ ArithmeticOperationsInAgrFuncVisitor::Data data;
+ ArithmeticOperationsInAgrFuncVisitor(data).visit(query);
+}
+
+void optimizeAnyFunctions(ASTPtr & query)
+{
+ RewriteAnyFunctionVisitor::Data data = {};
+ RewriteAnyFunctionVisitor(data).visit(query);
+}
+
+void optimizeSumIfFunctions(ASTPtr & query)
+{
+ RewriteSumIfFunctionVisitor::Data data = {};
+ RewriteSumIfFunctionVisitor(data).visit(query);
+}
+
+void optimizeArrayExistsFunctions(ASTPtr & query)
+{
+ RewriteArrayExistsFunctionVisitor::Data data = {};
+ RewriteArrayExistsFunctionVisitor(data).visit(query);
+}
+
+void optimizeMultiIfToIf(ASTPtr & query)
+{
+ OptimizeMultiIfToIfVisitor::Data data;
+ OptimizeMultiIfToIfVisitor(data).visit(query);
+}
+
+void optimizeInjectiveFunctionsInsideUniq(ASTPtr & query, ContextPtr context)
+{
+ RemoveInjectiveFunctionsVisitor::Data data(context);
+ RemoveInjectiveFunctionsVisitor(data).visit(query);
+}
+
+void optimizeDateFilters(ASTSelectQuery * select_query, const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns, ContextPtr context)
+{
+ /// Predicates in HAVING clause has been moved to WHERE clause.
+ if (select_query->where())
+ {
+ OptimizeDateOrDateTimeConverterWithPreimageVisitor::Data data{tables_with_columns, context};
+ OptimizeDateOrDateTimeConverterWithPreimageVisitor(data).visit(select_query->refWhere());
+ }
+ if (select_query->prewhere())
+ {
+ OptimizeDateOrDateTimeConverterWithPreimageVisitor::Data data{tables_with_columns, context};
+ OptimizeDateOrDateTimeConverterWithPreimageVisitor(data).visit(select_query->refPrewhere());
+ }
+}
+
+void transformIfStringsIntoEnum(ASTPtr & query)
+{
+ std::unordered_set<String> function_names = {"if", "transform"};
+ std::unordered_set<String> used_as_argument;
+
+ FindUsedFunctionsVisitor::Data used_data{function_names, used_as_argument};
+ FindUsedFunctionsVisitor(used_data).visit(query);
+
+ ConvertStringsToEnumVisitor::Data convert_data{used_as_argument};
+ ConvertStringsToEnumVisitor(convert_data).visit(query);
+}
+
+void optimizeFunctionsToSubcolumns(ASTPtr & query, const StorageMetadataPtr & metadata_snapshot)
+{
+ RewriteFunctionToSubcolumnVisitor::Data data{metadata_snapshot};
+ RewriteFunctionToSubcolumnVisitor(data).visit(query);
+}
+
+void optimizeOrLikeChain(ASTPtr & query)
+{
+ ConvertFunctionOrLikeVisitor::Data data = {};
+ ConvertFunctionOrLikeVisitor(data).visit(query);
+}
+
+}
+
+void TreeOptimizer::optimizeIf(ASTPtr & query, Aliases & aliases, bool if_chain_to_multiif, bool multiif_to_if)
+{
+ if (multiif_to_if)
+ optimizeMultiIfToIf(query);
+
+ /// Optimize if with constant condition after constants was substituted instead of scalar subqueries.
+ OptimizeIfWithConstantConditionVisitor(aliases).visit(query);
+
+ if (if_chain_to_multiif)
+ OptimizeIfChainsVisitor().visit(query);
+}
+
+void TreeOptimizer::optimizeCountConstantAndSumOne(ASTPtr & query, ContextPtr context)
+{
+ RewriteCountVariantsVisitor(context).visit(query);
+}
+
+///eliminate functions of other GROUP BY keys
+void TreeOptimizer::optimizeGroupByFunctionKeys(ASTSelectQuery * select_query)
+{
+ if (!select_query->groupBy())
+ return;
+
+ auto group_by = select_query->groupBy();
+ const auto & group_by_keys = group_by->children;
+
+ ASTs modified; ///result
+
+ GroupByKeysInfo group_by_keys_data = getGroupByKeysInfo(group_by_keys);
+
+ if (!group_by_keys_data.has_function)
+ return;
+
+ GroupByFunctionKeysVisitor::Data visitor_data{group_by_keys_data.key_names};
+ GroupByFunctionKeysVisitor(visitor_data).visit(group_by);
+
+ modified.reserve(group_by_keys.size());
+
+ /// filling the result
+ for (const auto & group_key : group_by_keys)
+ if (group_by_keys_data.key_names.contains(group_key->getColumnName()))
+ modified.push_back(group_key);
+
+ /// modifying the input
+ group_by->children = modified;
+}
+
+void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
+ const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns, ContextPtr context)
+{
+ const auto & settings = context->getSettingsRef();
+
+ auto * select_query = query->as<ASTSelectQuery>();
+ if (!select_query)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not select asts.");
+
+ if (settings.optimize_functions_to_subcolumns && result.storage_snapshot && result.storage->supportsSubcolumns())
+ optimizeFunctionsToSubcolumns(query, result.storage_snapshot->metadata);
+
+ /// Move arithmetic operations out of aggregation functions
+ if (settings.optimize_arithmetic_operations_in_aggregate_functions)
+ optimizeAggregationFunctions(query);
+
+ bool converted_to_cnf = false;
+ if (settings.convert_query_to_cnf)
+ converted_to_cnf = convertQueryToCNF(select_query);
+
+ if (converted_to_cnf && settings.optimize_using_constraints && result.storage_snapshot)
+ {
+ optimizeWithConstraints(select_query, result.aliases, result.source_columns_set,
+ tables_with_columns, result.storage_snapshot->metadata, settings.optimize_append_index);
+
+ if (settings.optimize_substitute_columns)
+ optimizeSubstituteColumn(select_query, result.aliases, result.source_columns_set,
+ tables_with_columns, result.storage_snapshot->metadata, result.storage);
+ }
+
+ /// Rewrite date filters to avoid the calls of converters such as toYear, toYYYYMM, etc.
+ optimizeDateFilters(select_query, tables_with_columns, context);
+
+ /// GROUP BY injective function elimination.
+ optimizeGroupBy(select_query, context);
+
+ /// GROUP BY functions of other keys elimination.
+ if (settings.optimize_group_by_function_keys)
+ optimizeGroupByFunctionKeys(select_query);
+
+ /// Move all operations out of any function
+ if (settings.optimize_move_functions_out_of_any)
+ optimizeAnyFunctions(query);
+
+ if (settings.optimize_normalize_count_variants)
+ optimizeCountConstantAndSumOne(query, context);
+
+ if (settings.optimize_rewrite_sum_if_to_count_if)
+ optimizeSumIfFunctions(query);
+
+ if (settings.optimize_rewrite_array_exists_to_has)
+ optimizeArrayExistsFunctions(query);
+
+ /// Remove injective functions inside uniq
+ if (settings.optimize_injective_functions_inside_uniq)
+ optimizeInjectiveFunctionsInsideUniq(query, context);
+
+ /// Eliminate min/max/any aggregators of functions of GROUP BY keys
+ if (settings.optimize_aggregators_of_group_by_keys
+ && !select_query->group_by_with_totals
+ && !select_query->group_by_with_rollup
+ && !select_query->group_by_with_cube)
+ optimizeAggregateFunctionsOfGroupByKeys(select_query, query);
+
+ /// Remove functions from ORDER BY if its argument is also in ORDER BY
+ if (settings.optimize_redundant_functions_in_order_by)
+ optimizeRedundantFunctionsInOrderBy(select_query, context);
+
+ /// Replace monotonous functions with its argument
+ if (settings.optimize_monotonous_functions_in_order_by)
+ optimizeMonotonousFunctionsInOrderBy(select_query, context, tables_with_columns, result);
+
+ /// Remove duplicate items from ORDER BY.
+ /// Execute it after all order by optimizations,
+ /// because they can produce duplicated columns.
+ optimizeDuplicatesInOrderBy(select_query);
+
+ /// If function "if" has String-type arguments, transform them into enum
+ if (settings.optimize_if_transform_strings_to_enum)
+ transformIfStringsIntoEnum(query);
+
+ /// Remove duplicated elements from LIMIT BY clause.
+ optimizeLimitBy(select_query);
+
+ /// Remove duplicated columns from USING(...).
+ optimizeUsing(select_query);
+
+ if (settings.optimize_or_like_chain
+ && settings.allow_hyperscan
+ && settings.max_hyperscan_regexp_length == 0
+ && settings.max_hyperscan_regexp_total_length == 0)
+ {
+ optimizeOrLikeChain(query);
+ }
+}
+
+}