summaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/AggregateFunctions/AggregateFunctionFactory.cpp
diff options
context:
space:
mode:
authorvitalyisaev <[email protected]>2023-11-14 09:58:56 +0300
committervitalyisaev <[email protected]>2023-11-14 10:20:20 +0300
commitc2b2dfd9827a400a8495e172a56343462e3ceb82 (patch)
treecd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /contrib/clickhouse/src/AggregateFunctions/AggregateFunctionFactory.cpp
parentd4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff)
YQ Connector: move tests from yql to ydb (OSS)
Перенос папки с тестами на Коннектор из папки yql в папку ydb (синхронизируется с github).
Diffstat (limited to 'contrib/clickhouse/src/AggregateFunctions/AggregateFunctionFactory.cpp')
-rw-r--r--contrib/clickhouse/src/AggregateFunctions/AggregateFunctionFactory.cpp294
1 files changed, 294 insertions, 0 deletions
diff --git a/contrib/clickhouse/src/AggregateFunctions/AggregateFunctionFactory.cpp b/contrib/clickhouse/src/AggregateFunctions/AggregateFunctionFactory.cpp
new file mode 100644
index 00000000000..f52c9ac8510
--- /dev/null
+++ b/contrib/clickhouse/src/AggregateFunctions/AggregateFunctionFactory.cpp
@@ -0,0 +1,294 @@
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/AggregateFunctionCombinatorFactory.h>
+
+#include <DataTypes/DataTypeAggregateFunction.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+
+#include <IO/WriteHelpers.h>
+
+#include <Interpreters/Context.h>
+
+#include <Common/StringUtils/StringUtils.h>
+#include <Common/typeid_cast.h>
+#include <Common/CurrentThread.h>
+
+#include <Poco/String.h>
+
+#include <Functions/FunctionFactory.h>
+
+
+static constexpr size_t MAX_AGGREGATE_FUNCTION_NAME_LENGTH = 1000;
+
+
+namespace DB
+{
+struct Settings;
+
+namespace ErrorCodes
+{
+ extern const int UNKNOWN_AGGREGATE_FUNCTION;
+ extern const int LOGICAL_ERROR;
+ extern const int ILLEGAL_AGGREGATION;
+ extern const int TOO_LARGE_STRING_SIZE;
+}
+
+const String & getAggregateFunctionCanonicalNameIfAny(const String & name)
+{
+ return AggregateFunctionFactory::instance().getCanonicalNameIfAny(name);
+}
+
+void AggregateFunctionFactory::registerFunction(const String & name, Value creator_with_properties, CaseSensitiveness case_sensitiveness)
+{
+ if (creator_with_properties.creator == nullptr)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionFactory: "
+ "the aggregate function {} has been provided a null constructor", name);
+
+ if (!aggregate_functions.emplace(name, creator_with_properties).second)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionFactory: the aggregate function name '{}' is not unique",
+ name);
+
+ if (case_sensitiveness == CaseInsensitive)
+ {
+ auto key = Poco::toLower(name);
+ if (!case_insensitive_aggregate_functions.emplace(key, creator_with_properties).second)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionFactory: "
+ "the case insensitive aggregate function name '{}' is not unique", name);
+ case_insensitive_name_mapping[key] = name;
+ }
+}
+
+static DataTypes convertLowCardinalityTypesToNested(const DataTypes & types)
+{
+ DataTypes res_types;
+ res_types.reserve(types.size());
+ for (const auto & type : types)
+ res_types.emplace_back(recursiveRemoveLowCardinality(type));
+
+ return res_types;
+}
+
+AggregateFunctionPtr AggregateFunctionFactory::get(
+ const String & name, const DataTypes & argument_types, const Array & parameters, AggregateFunctionProperties & out_properties) const
+{
+ /// This to prevent costly string manipulation in parsing the aggregate function combinators.
+ /// Example: avgArrayArrayArrayArray...(1000 times)...Array
+ if (name.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH)
+ throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too long name of aggregate function, maximum: {}", MAX_AGGREGATE_FUNCTION_NAME_LENGTH);
+
+ auto types_without_low_cardinality = convertLowCardinalityTypesToNested(argument_types);
+
+ /// If one of the types is Nullable, we apply aggregate function combinator "Null" if it's not window function.
+ /// Window functions are not real aggregate functions. Applying combinators doesn't make sense for them,
+ /// they must handle the nullability themselves
+ auto properties = tryGetProperties(name);
+ bool is_window_function = properties.has_value() && properties->is_window_function;
+ if (!is_window_function && std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(),
+ [](const auto & type) { return type->isNullable(); }))
+ {
+ AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix("Null");
+ if (!combinator)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: cannot find aggregate function combinator "
+ "to apply a function to Nullable arguments.");
+
+ DataTypes nested_types = combinator->transformArguments(types_without_low_cardinality);
+ Array nested_parameters = combinator->transformParameters(parameters);
+
+ bool has_null_arguments = std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(),
+ [](const auto & type) { return type->onlyNull(); });
+
+ AggregateFunctionPtr nested_function = getImpl(
+ name, nested_types, nested_parameters, out_properties, has_null_arguments);
+
+ // Pure window functions are not real aggregate functions. Applying
+ // combinators doesn't make sense for them, they must handle the
+ // nullability themselves. Another special case is functions from Nothing
+ // that are rewritten to AggregateFunctionNothing, in this case
+ // nested_function is nullptr.
+ if (!nested_function || !nested_function->isOnlyWindowFunction())
+ return combinator->transformAggregateFunction(nested_function, out_properties, types_without_low_cardinality, parameters);
+ }
+
+ auto with_original_arguments = getImpl(name, types_without_low_cardinality, parameters, out_properties, false);
+
+ if (!with_original_arguments)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: AggregateFunctionFactory returned nullptr");
+ return with_original_arguments;
+}
+
+
+AggregateFunctionPtr AggregateFunctionFactory::getImpl(
+ const String & name_param,
+ const DataTypes & argument_types,
+ const Array & parameters,
+ AggregateFunctionProperties & out_properties,
+ bool has_null_arguments) const
+{
+ String name = getAliasToOrName(name_param);
+ bool is_case_insensitive = false;
+ Value found;
+
+ /// Find by exact match.
+ if (auto it = aggregate_functions.find(name); it != aggregate_functions.end())
+ {
+ found = it->second;
+ }
+
+ if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end())
+ {
+ found = jt->second;
+ is_case_insensitive = true;
+ }
+
+ ContextPtr query_context;
+ if (CurrentThread::isInitialized())
+ query_context = CurrentThread::get().getQueryContext();
+
+ if (found.creator)
+ {
+ out_properties = found.properties;
+
+ if (query_context && query_context->getSettingsRef().log_queries)
+ query_context->addQueryFactoriesInfo(
+ Context::QueryLogFactories::AggregateFunction, is_case_insensitive ? Poco::toLower(name) : name);
+
+ /// The case when aggregate function should return NULL on NULL arguments. This case is handled in "get" method.
+ if (!out_properties.returns_default_when_only_null && has_null_arguments)
+ return nullptr;
+
+ const Settings * settings = query_context ? &query_context->getSettingsRef() : nullptr;
+ return found.creator(name, argument_types, parameters, settings);
+ }
+
+ /// Combinators of aggregate functions.
+ /// For every aggregate function 'agg' and combiner '-Comb' there is a combined aggregate function with the name 'aggComb',
+ /// that can have different number and/or types of arguments, different result type and different behaviour.
+
+ if (AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(name))
+ {
+ const std::string & combinator_name = combinator->getName();
+
+ if (combinator->isForInternalUsageOnly())
+ throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION,
+ "Aggregate function combinator '{}' is only for internal usage",
+ combinator_name);
+
+ if (query_context && query_context->getSettingsRef().log_queries)
+ query_context->addQueryFactoriesInfo(Context::QueryLogFactories::AggregateFunctionCombinator, combinator_name);
+
+ String nested_name = name.substr(0, name.size() - combinator_name.size());
+ /// Nested identical combinators (i.e. uniqCombinedIfIf) is not
+ /// supported (since they don't work -- silently).
+ ///
+ /// But non-identical is supported and works. For example,
+ /// uniqCombinedIfMergeIf is useful in cases when the underlying
+ /// storage stores AggregateFunction(uniqCombinedIf) and in SELECT you
+ /// need to filter aggregation result based on another column.
+
+ if (!combinator->supportsNesting() && nested_name.ends_with(combinator_name))
+ {
+ throw Exception(ErrorCodes::ILLEGAL_AGGREGATION,
+ "Nested identical combinator '{}' is not supported",
+ combinator_name);
+ }
+
+ DataTypes nested_types = combinator->transformArguments(argument_types);
+ Array nested_parameters = combinator->transformParameters(parameters);
+
+ AggregateFunctionPtr nested_function = get(nested_name, nested_types, nested_parameters, out_properties);
+ return combinator->transformAggregateFunction(nested_function, out_properties, argument_types, parameters);
+ }
+
+
+ String extra_info;
+ if (FunctionFactory::instance().hasNameOrAlias(name))
+ extra_info = ". There is an ordinary function with the same name, but aggregate function is expected here";
+
+ auto hints = this->getHints(name);
+ if (!hints.empty())
+ throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION,
+ "Unknown aggregate function {}{}. Maybe you meant: {}", name, extra_info, toString(hints));
+ else
+ throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION, "Unknown aggregate function {}{}", name, extra_info);
+}
+
+
+AggregateFunctionPtr AggregateFunctionFactory::tryGet(
+ const String & name, const DataTypes & argument_types, const Array & parameters, AggregateFunctionProperties & out_properties) const
+{
+ return isAggregateFunctionName(name)
+ ? get(name, argument_types, parameters, out_properties)
+ : nullptr;
+}
+
+std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetProperties(String name) const
+{
+ if (name.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH)
+ throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too long name of aggregate function, maximum: {}", MAX_AGGREGATE_FUNCTION_NAME_LENGTH);
+
+ while (true)
+ {
+ name = getAliasToOrName(name);
+ Value found;
+
+ /// Find by exact match.
+ if (auto it = aggregate_functions.find(name); it != aggregate_functions.end())
+ {
+ found = it->second;
+ }
+
+ if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end())
+ found = jt->second;
+
+ if (found.creator)
+ return found.properties;
+
+ /// Combinators of aggregate functions.
+ /// For every aggregate function 'agg' and combiner '-Comb' there is a combined aggregate function with the name 'aggComb',
+ /// that can have different number and/or types of arguments, different result type and different behaviour.
+
+ if (AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(name))
+ {
+ if (combinator->isForInternalUsageOnly())
+ return {};
+
+ /// NOTE: It's reasonable to also allow to transform properties by combinator.
+ name = name.substr(0, name.size() - combinator->getName().size());
+ }
+ else
+ return {};
+ }
+}
+
+
+bool AggregateFunctionFactory::isAggregateFunctionName(String name) const
+{
+ if (name.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH)
+ throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too long name of aggregate function, maximum: {}", MAX_AGGREGATE_FUNCTION_NAME_LENGTH);
+
+ while (true)
+ {
+ if (aggregate_functions.contains(name) || isAlias(name))
+ return true;
+
+ String name_lowercase = Poco::toLower(name);
+ if (case_insensitive_aggregate_functions.contains(name_lowercase) || isAlias(name_lowercase))
+ return true;
+
+ if (AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(name))
+ {
+ name = name.substr(0, name.size() - combinator->getName().size());
+ }
+ else
+ return false;
+ }
+}
+
+AggregateFunctionFactory & AggregateFunctionFactory::instance()
+{
+ static AggregateFunctionFactory ret;
+ return ret;
+}
+
+}