diff options
| author | vitalyisaev <[email protected]> | 2023-11-14 09:58:56 +0300 |
|---|---|---|
| committer | vitalyisaev <[email protected]> | 2023-11-14 10:20:20 +0300 |
| commit | c2b2dfd9827a400a8495e172a56343462e3ceb82 (patch) | |
| tree | cd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /contrib/clickhouse/src/Functions/concat.cpp | |
| parent | d4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff) | |
YQ Connector: move tests from yql to ydb (OSS)
Перенос папки с тестами на Коннектор из папки yql в папку ydb (синхронизируется с github).
Diffstat (limited to 'contrib/clickhouse/src/Functions/concat.cpp')
| -rw-r--r-- | contrib/clickhouse/src/Functions/concat.cpp | 243 |
1 files changed, 243 insertions, 0 deletions
diff --git a/contrib/clickhouse/src/Functions/concat.cpp b/contrib/clickhouse/src/Functions/concat.cpp new file mode 100644 index 00000000000..9eb222d8c09 --- /dev/null +++ b/contrib/clickhouse/src/Functions/concat.cpp @@ -0,0 +1,243 @@ +#include <Columns/ColumnString.h> +#include <DataTypes/DataTypeString.h> +#include <Functions/FunctionFactory.h> +#include <Functions/FunctionHelpers.h> +#include <Functions/GatherUtils/Algorithms.h> +#include <Functions/GatherUtils/Sinks.h> +#include <Functions/GatherUtils/Slices.h> +#include <Functions/GatherUtils/Sources.h> +#include <Functions/IFunction.h> +#include <IO/WriteHelpers.h> +#include <base/map.h> +#include <base/range.h> + +#include "formatString.h" + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_COLUMN; +} + +using namespace GatherUtils; + +namespace +{ + +template <typename Name, bool is_injective> +class ConcatImpl : public IFunction +{ +public: + static constexpr auto name = Name::name; + explicit ConcatImpl(ContextPtr context_) : context(context_) {} + static FunctionPtr create(ContextPtr context) { return std::make_shared<ConcatImpl>(context); } + + String getName() const override { return name; } + + bool isVariadic() const override { return true; } + + size_t getNumberOfArguments() const override { return 0; } + + bool isInjective(const ColumnsWithTypeAndName &) const override { return is_injective; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.size() < 2) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be at least 2", + getName(), + arguments.size()); + + for (const auto arg_idx : collections::range(0, arguments.size())) + { + const auto * arg = arguments[arg_idx].get(); + if (!isStringOrFixedString(arg)) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}", + arg->getName(), + arg_idx + 1, + getName()); + } + + return std::make_shared<DataTypeString>(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + /// Format function is not proven to be faster for two arguments. + /// Actually there is overhead of 2 to 5 extra instructions for each string for checking empty strings in FormatImpl. + /// Though, benchmarks are really close, for most examples we saw executeBinary is slightly faster (0-3%). + /// For 3 and more arguments FormatImpl is much faster (up to 50-60%). + if (arguments.size() == 2) + return executeBinary(arguments, input_rows_count); + else + return executeFormatImpl(arguments, input_rows_count); + } + +private: + ContextWeakPtr context; + + ColumnPtr executeBinary(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const + { + const IColumn * c0 = arguments[0].column.get(); + const IColumn * c1 = arguments[1].column.get(); + + const ColumnString * c0_string = checkAndGetColumn<ColumnString>(c0); + const ColumnString * c1_string = checkAndGetColumn<ColumnString>(c1); + const ColumnConst * c0_const_string = checkAndGetColumnConst<ColumnString>(c0); + const ColumnConst * c1_const_string = checkAndGetColumnConst<ColumnString>(c1); + + auto c_res = ColumnString::create(); + + if (c0_string && c1_string) + concat(StringSource(*c0_string), StringSource(*c1_string), StringSink(*c_res, c0->size())); + else if (c0_string && c1_const_string) + concat(StringSource(*c0_string), ConstSource<StringSource>(*c1_const_string), StringSink(*c_res, c0->size())); + else if (c0_const_string && c1_string) + concat(ConstSource<StringSource>(*c0_const_string), StringSource(*c1_string), StringSink(*c_res, c0->size())); + else + { + /// Fallback: use generic implementation for not very important cases. + return executeFormatImpl(arguments, input_rows_count); + } + + return c_res; + } + + ColumnPtr executeFormatImpl(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const + { + const size_t num_arguments = arguments.size(); + assert(num_arguments >= 2); + + auto c_res = ColumnString::create(); + std::vector<const ColumnString::Chars *> data(num_arguments); + std::vector<const ColumnString::Offsets *> offsets(num_arguments); + std::vector<size_t> fixed_string_sizes(num_arguments); + std::vector<std::optional<String>> constant_strings(num_arguments); + bool has_column_string = false; + bool has_column_fixed_string = false; + for (size_t i = 0; i < num_arguments; ++i) + { + const ColumnPtr & column = arguments[i].column; + if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get())) + { + has_column_string = true; + data[i] = &col->getChars(); + offsets[i] = &col->getOffsets(); + } + else if (const ColumnFixedString * fixed_col = checkAndGetColumn<ColumnFixedString>(column.get())) + { + has_column_fixed_string = true; + data[i] = &fixed_col->getChars(); + fixed_string_sizes[i] = fixed_col->getN(); + } + else if (const ColumnConst * const_col = checkAndGetColumnConstStringOrFixedString(column.get())) + { + constant_strings[i] = const_col->getValue<String>(); + } + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + column->getName(), getName()); + } + + String pattern; + pattern.reserve(2 * num_arguments); + + for (size_t i = 0; i < num_arguments; ++i) + pattern += "{}"; + + FormatImpl::formatExecute( + has_column_string, + has_column_fixed_string, + std::move(pattern), + data, + offsets, + fixed_string_sizes, + constant_strings, + c_res->getChars(), + c_res->getOffsets(), + input_rows_count); + + return c_res; + } +}; + + +struct NameConcat +{ + static constexpr auto name = "concat"; +}; +struct NameConcatAssumeInjective +{ + static constexpr auto name = "concatAssumeInjective"; +}; + +using FunctionConcat = ConcatImpl<NameConcat, false>; +using FunctionConcatAssumeInjective = ConcatImpl<NameConcatAssumeInjective, true>; + + +/// Also works with arrays. +class ConcatOverloadResolver : public IFunctionOverloadResolver +{ +public: + static constexpr auto name = "concat"; + static FunctionOverloadResolverPtr create(ContextPtr context) { return std::make_unique<ConcatOverloadResolver>(context); } + + explicit ConcatOverloadResolver(ContextPtr context_) : context(context_) {} + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } + + FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override + { + if (isArray(arguments.at(0).type)) + { + return FunctionFactory::instance().getImpl("arrayConcat", context)->build(arguments); + } + else if (isMap(arguments.at(0).type)) + { + return FunctionFactory::instance().getImpl("mapConcat", context)->build(arguments); + } + else if (isTuple(arguments.at(0).type)) + { + return FunctionFactory::instance().getImpl("tupleConcat", context)->build(arguments); + } + else + return std::make_unique<FunctionToFunctionBaseAdaptor>( + FunctionConcat::create(context), collections::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }), return_type); + } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.size() < 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be at least 2.", + getName(), arguments.size()); + + /// We always return Strings from concat, even if arguments were fixed strings. + return std::make_shared<DataTypeString>(); + } + +private: + ContextPtr context; +}; + +} + +REGISTER_FUNCTION(Concat) +{ + factory.registerFunction<ConcatOverloadResolver>({}, FunctionFactory::CaseInsensitive); + factory.registerFunction<FunctionConcatAssumeInjective>(); +} + +} |
