summaryrefslogtreecommitdiffstats
path: root/contrib/clickhouse/src/DataTypes
diff options
context:
space:
mode:
authorvitalyisaev <[email protected]>2023-11-14 09:58:56 +0300
committervitalyisaev <[email protected]>2023-11-14 10:20:20 +0300
commitc2b2dfd9827a400a8495e172a56343462e3ceb82 (patch)
treecd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /contrib/clickhouse/src/DataTypes
parentd4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff)
YQ Connector: move tests from yql to ydb (OSS)
Перенос папки с тестами на Коннектор из папки yql в папку ydb (синхронизируется с github).
Diffstat (limited to 'contrib/clickhouse/src/DataTypes')
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeAggregateFunction.cpp271
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeAggregateFunction.h96
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeArray.cpp77
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeArray.h74
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeCustom.h56
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeCustomGeo.cpp43
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeCustomGeo.h32
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp170
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h42
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeDate.cpp23
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeDate.h27
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeDate32.cpp23
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeDate32.h31
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeDateTime.cpp42
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeDateTime.h53
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeDateTime64.cpp70
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeDateTime64.h50
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeDecimalBase.cpp47
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeDecimalBase.h210
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeDomainBool.cpp21
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeEnum.cpp348
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeEnum.h93
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeFactory.cpp301
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeFactory.h104
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeFixedString.cpp70
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeFixedString.h78
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeFunction.cpp36
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeFunction.h45
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeIPv4andIPv6.cpp17
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeIPv4andIPv6.h94
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeInterval.cpp31
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeInterval.h41
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeLowCardinality.cpp179
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeLowCardinality.h95
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeLowCardinalityHelpers.cpp208
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeMap.cpp158
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeMap.h62
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeNested.cpp75
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeNested.h33
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeNothing.cpp31
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeNothing.h36
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeNullable.cpp118
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeNullable.h59
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeNumberBase.cpp76
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeNumberBase.h75
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeObject.cpp82
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeObject.h49
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeSet.h32
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeString.cpp99
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeString.h46
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeTuple.cpp372
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeTuple.h76
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeUUID.cpp34
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypeUUID.h50
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypesDecimal.cpp131
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypesDecimal.h270
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypesNumber.cpp98
-rw-r--r--contrib/clickhouse/src/DataTypes/DataTypesNumber.h53
-rw-r--r--contrib/clickhouse/src/DataTypes/EnumValues.cpp107
-rw-r--r--contrib/clickhouse/src/DataTypes/EnumValues.h90
-rw-r--r--contrib/clickhouse/src/DataTypes/FieldToDataType.cpp211
-rw-r--r--contrib/clickhouse/src/DataTypes/FieldToDataType.h50
-rw-r--r--contrib/clickhouse/src/DataTypes/IDataType.cpp254
-rw-r--r--contrib/clickhouse/src/DataTypes/IDataType.h649
-rw-r--r--contrib/clickhouse/src/DataTypes/IDataTypeDummy.h50
-rw-r--r--contrib/clickhouse/src/DataTypes/Native.cpp200
-rw-r--r--contrib/clickhouse/src/DataTypes/Native.h111
-rw-r--r--contrib/clickhouse/src/DataTypes/NestedUtils.cpp360
-rw-r--r--contrib/clickhouse/src/DataTypes/NestedUtils.h61
-rw-r--r--contrib/clickhouse/src/DataTypes/NumberTraits.h244
-rw-r--r--contrib/clickhouse/src/DataTypes/ObjectUtils.cpp992
-rw-r--r--contrib/clickhouse/src/DataTypes/ObjectUtils.h212
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/ISerialization.cpp323
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/ISerialization.h417
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/JSONDataParser.cpp281
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/JSONDataParser.h64
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/PathInData.cpp156
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/PathInData.h110
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationAggregateFunction.cpp218
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationAggregateFunction.h46
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationArray.cpp620
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationArray.h84
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationBool.cpp335
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationBool.h37
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp97
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationCustomSimpleText.h59
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationDate.cpp88
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationDate.h29
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationDate32.cpp85
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationDate32.h27
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationDateTime.cpp179
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationDateTime.h29
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationDateTime64.cpp174
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationDateTime64.h29
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationDecimal.cpp99
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationDecimal.h30
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationDecimalBase.cpp79
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationDecimalBase.h32
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationEnum.cpp117
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationEnum.h40
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationFixedString.cpp214
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationFixedString.h50
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationIPv4andIPv6.h132
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationInfo.cpp298
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationInfo.h118
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationInfoTuple.cpp165
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationInfoTuple.h45
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationInterval.cpp209
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationInterval.h90
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationLowCardinality.cpp781
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationLowCardinality.h84
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationMap.cpp365
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationMap.h76
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationNamed.cpp78
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationNamed.h80
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationNothing.cpp25
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationNothing.h34
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationNullable.cpp670
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationNullable.h108
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationNumber.cpp182
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationNumber.h36
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationObject.cpp557
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationObject.h119
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationSparse.cpp387
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationSparse.h104
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationString.cpp365
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationString.h37
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationTuple.cpp484
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationTuple.h78
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationUUID.cpp173
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationUUID.h30
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationWrapper.cpp149
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SerializationWrapper.h83
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SimpleTextSerialization.h64
-rw-r--r--contrib/clickhouse/src/DataTypes/Serializations/SubcolumnsTree.h209
-rw-r--r--contrib/clickhouse/src/DataTypes/TimezoneMixin.h32
-rw-r--r--contrib/clickhouse/src/DataTypes/convertMySQLDataType.cpp132
-rw-r--r--contrib/clickhouse/src/DataTypes/convertMySQLDataType.h15
-rw-r--r--contrib/clickhouse/src/DataTypes/getLeastSupertype.cpp668
-rw-r--r--contrib/clickhouse/src/DataTypes/getLeastSupertype.h39
-rw-r--r--contrib/clickhouse/src/DataTypes/getMostSubtype.cpp398
-rw-r--r--contrib/clickhouse/src/DataTypes/getMostSubtype.h19
-rw-r--r--contrib/clickhouse/src/DataTypes/hasNullable.cpp33
-rw-r--r--contrib/clickhouse/src/DataTypes/hasNullable.h10
-rw-r--r--contrib/clickhouse/src/DataTypes/registerDataTypeDateTime.cpp118
-rw-r--r--contrib/clickhouse/src/DataTypes/transformTypesRecursively.cpp172
-rw-r--r--contrib/clickhouse/src/DataTypes/transformTypesRecursively.h19
147 files changed, 21122 insertions, 0 deletions
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeAggregateFunction.cpp b/contrib/clickhouse/src/DataTypes/DataTypeAggregateFunction.cpp
new file mode 100644
index 00000000000..be60886d74b
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeAggregateFunction.cpp
@@ -0,0 +1,271 @@
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+
+#include <Columns/ColumnAggregateFunction.h>
+
+#include <Common/AlignedBuffer.h>
+#include <Common/FieldVisitorToString.h>
+
+#include <Formats/FormatSettings.h>
+#include <DataTypes/DataTypeAggregateFunction.h>
+#include <DataTypes/Serializations/SerializationAggregateFunction.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/transformTypesRecursively.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/Operators.h>
+
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTIdentifier_fwd.h>
+#include <Parsers/ASTLiteral.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int SYNTAX_ERROR;
+ extern const int BAD_ARGUMENTS;
+ extern const int PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS;
+ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+ extern const int LOGICAL_ERROR;
+}
+
+
+String DataTypeAggregateFunction::doGetName() const
+{
+ return getNameImpl(true);
+}
+
+
+String DataTypeAggregateFunction::getNameWithoutVersion() const
+{
+ return getNameImpl(false);
+}
+
+
+size_t DataTypeAggregateFunction::getVersion() const
+{
+ if (version)
+ return *version;
+ return function->getDefaultVersion();
+}
+
+
+String DataTypeAggregateFunction::getNameImpl(bool with_version) const
+{
+ WriteBufferFromOwnString stream;
+ stream << "AggregateFunction(";
+
+ /// If aggregate function does not support versioning its version is 0 and is not printed.
+ auto data_type_version = getVersion();
+ if (with_version && data_type_version)
+ stream << data_type_version << ", ";
+ stream << function->getName();
+
+ if (!parameters.empty())
+ {
+ stream << '(';
+ for (size_t i = 0, size = parameters.size(); i < size; ++i)
+ {
+ if (i)
+ stream << ", ";
+ stream << applyVisitor(FieldVisitorToString(), parameters[i]);
+ }
+ stream << ')';
+ }
+
+ for (const auto & argument_type : argument_types)
+ stream << ", " << argument_type->getName();
+
+ stream << ')';
+ return stream.str();
+}
+
+
+MutableColumnPtr DataTypeAggregateFunction::createColumn() const
+{
+ return ColumnAggregateFunction::create(function, getVersion());
+}
+
+
+/// Create empty state
+Field DataTypeAggregateFunction::getDefault() const
+{
+ Field field = AggregateFunctionStateData();
+ field.get<AggregateFunctionStateData &>().name = getName();
+
+ AlignedBuffer place_buffer(function->sizeOfData(), function->alignOfData());
+ AggregateDataPtr place = place_buffer.data();
+
+ function->create(place);
+
+ try
+ {
+ WriteBufferFromString buffer_from_field(field.get<AggregateFunctionStateData &>().data);
+ function->serialize(place, buffer_from_field, version);
+ }
+ catch (...)
+ {
+ function->destroy(place);
+ throw;
+ }
+
+ function->destroy(place);
+
+ return field;
+}
+
+bool DataTypeAggregateFunction::strictEquals(const DataTypePtr & lhs_state_type, const DataTypePtr & rhs_state_type)
+{
+ const auto * lhs_state = typeid_cast<const DataTypeAggregateFunction *>(lhs_state_type.get());
+ const auto * rhs_state = typeid_cast<const DataTypeAggregateFunction *>(rhs_state_type.get());
+
+ if (!lhs_state || !rhs_state)
+ return false;
+
+ if (lhs_state->function->getName() != rhs_state->function->getName())
+ return false;
+
+ if (lhs_state->parameters.size() != rhs_state->parameters.size())
+ return false;
+
+ for (size_t i = 0; i < lhs_state->parameters.size(); ++i)
+ if (lhs_state->parameters[i] != rhs_state->parameters[i])
+ return false;
+
+ if (lhs_state->argument_types.size() != rhs_state->argument_types.size())
+ return false;
+
+ for (size_t i = 0; i < lhs_state->argument_types.size(); ++i)
+ if (!lhs_state->argument_types[i]->equals(*rhs_state->argument_types[i]))
+ return false;
+
+ return true;
+}
+
+bool DataTypeAggregateFunction::equals(const IDataType & rhs) const
+{
+ if (typeid(rhs) != typeid(*this))
+ return false;
+
+ auto lhs_state_type = function->getNormalizedStateType();
+ auto rhs_state_type = typeid_cast<const DataTypeAggregateFunction &>(rhs).function->getNormalizedStateType();
+
+ return strictEquals(lhs_state_type, rhs_state_type);
+}
+
+
+SerializationPtr DataTypeAggregateFunction::doGetDefaultSerialization() const
+{
+ return std::make_shared<SerializationAggregateFunction>(function, getName(), getVersion());
+}
+
+
+static DataTypePtr create(const ASTPtr & arguments)
+{
+ String function_name;
+ AggregateFunctionPtr function;
+ DataTypes argument_types;
+ Array params_row;
+ std::optional<size_t> version;
+
+ if (!arguments || arguments->children.empty())
+ throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+ "Data type AggregateFunction requires parameters: "
+ "version(optionally), name of aggregate function and list of data types for arguments");
+
+ ASTPtr data_type_ast = arguments->children[0];
+ size_t argument_types_start_idx = 1;
+
+ /* If aggregate function definition doesn't have version, it will have in AST children args [ASTFunction, types...] - in case
+ * it is parametric, or [ASTIdentifier, types...] - otherwise. If aggregate function has version in AST, then it will be:
+ * [ASTLiteral, ASTFunction (or ASTIdentifier), types...].
+ */
+ if (auto * version_ast = arguments->children[0]->as<ASTLiteral>())
+ {
+ if (arguments->children.size() < 2)
+ throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+ "Data type AggregateFunction has version, but it requires at least one more parameter - name of aggregate function");
+ version = version_ast->value.safeGet<UInt64>();
+ data_type_ast = arguments->children[1];
+ argument_types_start_idx = 2;
+ }
+
+ if (const auto * parametric = data_type_ast->as<ASTFunction>())
+ {
+ if (parametric->parameters)
+ throw Exception(ErrorCodes::SYNTAX_ERROR, "Unexpected level of parameters to aggregate function");
+
+ function_name = parametric->name;
+
+ if (parametric->arguments)
+ {
+ const ASTs & parameters = parametric->arguments->children;
+ params_row.resize(parameters.size());
+
+ for (size_t i = 0; i < parameters.size(); ++i)
+ {
+ const auto * literal = parameters[i]->as<ASTLiteral>();
+ if (!literal)
+ throw Exception(
+ ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS,
+ "Parameters to aggregate functions must be literals. "
+ "Got parameter '{}' for function '{}'",
+ parameters[i]->formatForErrorMessage(), function_name);
+
+ params_row[i] = literal->value;
+ }
+ }
+ }
+ else if (auto opt_name = tryGetIdentifierName(data_type_ast))
+ {
+ function_name = *opt_name;
+ }
+ else if (data_type_ast->as<ASTLiteral>())
+ {
+ throw Exception(ErrorCodes::BAD_ARGUMENTS,
+ "Aggregate function name for data type AggregateFunction must "
+ "be passed as identifier (without quotes) or function");
+ }
+ else
+ throw Exception(ErrorCodes::BAD_ARGUMENTS,
+ "Unexpected AST element passed as aggregate function name for data type AggregateFunction. "
+ "Must be identifier or function.");
+
+ for (size_t i = argument_types_start_idx; i < arguments->children.size(); ++i)
+ argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i]));
+
+ if (function_name.empty())
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: empty name of aggregate function passed");
+
+ AggregateFunctionProperties properties;
+ function = AggregateFunctionFactory::instance().get(function_name, argument_types, params_row, properties);
+ return std::make_shared<DataTypeAggregateFunction>(function, argument_types, params_row, version);
+}
+
+void setVersionToAggregateFunctions(DataTypePtr & type, bool if_empty, std::optional<size_t> revision)
+{
+ auto callback = [revision, if_empty](DataTypePtr & column_type)
+ {
+ const auto * aggregate_function_type = typeid_cast<const DataTypeAggregateFunction *>(column_type.get());
+ if (aggregate_function_type && aggregate_function_type->isVersioned())
+ {
+ if (revision)
+ aggregate_function_type->updateVersionFromRevision(*revision, if_empty);
+ else
+ aggregate_function_type->setVersion(0, if_empty);
+ }
+ };
+
+ callOnNestedSimpleTypes(type, callback);
+}
+
+
+void registerDataTypeAggregateFunction(DataTypeFactory & factory)
+{
+ factory.registerDataType("AggregateFunction", create);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeAggregateFunction.h b/contrib/clickhouse/src/DataTypes/DataTypeAggregateFunction.h
new file mode 100644
index 00000000000..6331c23222f
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeAggregateFunction.h
@@ -0,0 +1,96 @@
+#pragma once
+
+#include <AggregateFunctions/IAggregateFunction.h>
+
+#include <DataTypes/IDataType.h>
+
+
+namespace DB
+{
+
+/** Type - the state of the aggregate function.
+ * Type parameters is an aggregate function, the types of its arguments, and its parameters (for parametric aggregate functions).
+ *
+ * Data type can support versioning for serialization of aggregate function state.
+ * Version 0 also means no versioning. When a table with versioned data type is attached, its version is parsed from AST. If
+ * there is no version in AST, then it is either attach with no version in metadata (then version is 0) or it
+ * is a new data type (then version is default - latest).
+ */
+class DataTypeAggregateFunction final : public IDataType
+{
+private:
+ AggregateFunctionPtr function;
+ DataTypes argument_types;
+ Array parameters;
+ mutable std::optional<size_t> version;
+
+ String getNameImpl(bool with_version) const;
+ size_t getVersion() const;
+
+public:
+ static constexpr bool is_parametric = true;
+
+ DataTypeAggregateFunction(AggregateFunctionPtr function_, const DataTypes & argument_types_,
+ const Array & parameters_, std::optional<size_t> version_ = std::nullopt)
+ : function(std::move(function_))
+ , argument_types(argument_types_)
+ , parameters(parameters_)
+ , version(version_)
+ {
+ }
+
+ String getFunctionName() const { return function->getName(); }
+ AggregateFunctionPtr getFunction() const { return function; }
+
+ String doGetName() const override;
+ String getNameWithoutVersion() const;
+ const char * getFamilyName() const override { return "AggregateFunction"; }
+ String getSQLCompatibleName() const override { return "TEXT"; }
+ TypeIndex getTypeId() const override { return TypeIndex::AggregateFunction; }
+
+ Array getParameters() const { return parameters; }
+
+ bool canBeInsideNullable() const override { return false; }
+
+ DataTypePtr getReturnType() const { return function->getResultType(); }
+ DataTypePtr getReturnTypeToPredict() const { return function->getReturnTypeToPredict(); }
+ DataTypes getArgumentsDataTypes() const { return argument_types; }
+
+ MutableColumnPtr createColumn() const override;
+
+ Field getDefault() const override;
+
+ static bool strictEquals(const DataTypePtr & lhs_state_type, const DataTypePtr & rhs_state_type);
+ bool equals(const IDataType & rhs) const override;
+
+ bool isParametric() const override { return true; }
+ bool haveSubtypes() const override { return false; }
+ bool shouldAlignRightInPrettyFormats() const override { return false; }
+
+ SerializationPtr doGetDefaultSerialization() const override;
+ bool supportsSparseSerialization() const override { return false; }
+
+ bool isVersioned() const { return function->isVersioned(); }
+
+ /// Version is not empty only if it was parsed from AST or implicitly cast to 0 or version according
+ /// to server revision.
+ /// It is ok to have an empty version value here - then for serialization a default (latest)
+ /// version is used. This method is used to force some zero version to be used instead of
+ /// default, or to set version for serialization in distributed queries.
+ void setVersion(size_t version_, bool if_empty) const
+ {
+ if (version && if_empty)
+ return;
+
+ version = version_;
+ }
+
+ void updateVersionFromRevision(size_t revision, bool if_empty) const
+ {
+ setVersion(function->getVersionFromRevision(revision), if_empty);
+ }
+};
+
+void setVersionToAggregateFunctions(DataTypePtr & type, bool if_empty, std::optional<size_t> revision = std::nullopt);
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeArray.cpp b/contrib/clickhouse/src/DataTypes/DataTypeArray.cpp
new file mode 100644
index 00000000000..e31f10046b7
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeArray.cpp
@@ -0,0 +1,77 @@
+#include <Columns/ColumnArray.h>
+
+#include <Formats/FormatSettings.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/Serializations/SerializationArray.h>
+
+#include <Parsers/IAST.h>
+
+#include <Common/typeid_cast.h>
+#include <Common/assert_cast.h>
+
+#include <Core/NamesAndTypes.h>
+#include <Columns/ColumnConst.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+using FieldType = Array;
+
+
+DataTypeArray::DataTypeArray(const DataTypePtr & nested_)
+ : nested{nested_}
+{
+}
+
+
+MutableColumnPtr DataTypeArray::createColumn() const
+{
+ return ColumnArray::create(nested->createColumn(), ColumnArray::ColumnOffsets::create());
+}
+
+Field DataTypeArray::getDefault() const
+{
+ return Array();
+}
+
+
+bool DataTypeArray::equals(const IDataType & rhs) const
+{
+ return typeid(rhs) == typeid(*this) && nested->equals(*static_cast<const DataTypeArray &>(rhs).nested);
+}
+
+SerializationPtr DataTypeArray::doGetDefaultSerialization() const
+{
+ return std::make_shared<SerializationArray>(nested->getDefaultSerialization());
+}
+
+size_t DataTypeArray::getNumberOfDimensions() const
+{
+ const DataTypeArray * nested_array = typeid_cast<const DataTypeArray *>(nested.get());
+ if (!nested_array)
+ return 1;
+ return 1 + nested_array->getNumberOfDimensions(); /// Every modern C++ compiler optimizes tail recursion.
+}
+
+
+static DataTypePtr create(const ASTPtr & arguments)
+{
+ if (!arguments || arguments->children.size() != 1)
+ throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Array data type family must have exactly one argument - type of elements");
+
+ return std::make_shared<DataTypeArray>(DataTypeFactory::instance().get(arguments->children[0]));
+}
+
+
+void registerDataTypeArray(DataTypeFactory & factory)
+{
+ factory.registerDataType("Array", create);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeArray.h b/contrib/clickhouse/src/DataTypes/DataTypeArray.h
new file mode 100644
index 00000000000..68b574b8ded
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeArray.h
@@ -0,0 +1,74 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+#include <DataTypes/Serializations/SerializationArray.h>
+#include <Columns/ColumnArray.h>
+
+
+namespace DB
+{
+
+
+class DataTypeArray final : public IDataType
+{
+private:
+ /// The type of array elements.
+ DataTypePtr nested;
+
+public:
+ using FieldType = Array;
+ using ColumnType = ColumnArray;
+ static constexpr bool is_parametric = true;
+
+ explicit DataTypeArray(const DataTypePtr & nested_);
+
+ TypeIndex getTypeId() const override { return TypeIndex::Array; }
+
+ std::string doGetName() const override
+ {
+ return "Array(" + nested->getName() + ")";
+ }
+
+ const char * getFamilyName() const override
+ {
+ return "Array";
+ }
+ String getSQLCompatibleName() const override
+ {
+ return "TEXT";
+ }
+
+ bool canBeInsideNullable() const override
+ {
+ return false;
+ }
+
+ MutableColumnPtr createColumn() const override;
+
+
+ Field getDefault() const override;
+
+ bool equals(const IDataType & rhs) const override;
+
+ bool isParametric() const override { return true; }
+ bool haveSubtypes() const override { return true; }
+ bool cannotBeStoredInTables() const override { return nested->cannotBeStoredInTables(); }
+ bool textCanContainOnlyValidUTF8() const override { return nested->textCanContainOnlyValidUTF8(); }
+ bool isComparable() const override { return nested->isComparable(); }
+ bool canBeComparedWithCollation() const override { return nested->canBeComparedWithCollation(); }
+ bool hasDynamicSubcolumns() const override { return nested->hasDynamicSubcolumns(); }
+
+ bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override
+ {
+ return nested->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion();
+ }
+
+ SerializationPtr doGetDefaultSerialization() const override;
+
+ const DataTypePtr & getNestedType() const { return nested; }
+
+ /// 1 for plain array, 2 for array of arrays and so on.
+ size_t getNumberOfDimensions() const;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeCustom.h b/contrib/clickhouse/src/DataTypes/DataTypeCustom.h
new file mode 100644
index 00000000000..cf1e943d8e9
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeCustom.h
@@ -0,0 +1,56 @@
+#pragma once
+
+#include <memory>
+#include <cstddef>
+#include <Core/Types_fwd.h>
+#include <DataTypes/Serializations/ISerialization.h>
+
+namespace DB
+{
+
+class ReadBuffer;
+class WriteBuffer;
+struct FormatSettings;
+class IColumn;
+
+/** Allow to customize an existing data type and set a different name and/or text serialization/deserialization methods.
+ * See use in IPv4 and IPv6 data types, and also in SimpleAggregateFunction.
+ */
+class IDataTypeCustomName
+{
+public:
+ virtual ~IDataTypeCustomName() = default;
+
+ virtual String getName() const = 0;
+};
+
+using DataTypeCustomNamePtr = std::unique_ptr<const IDataTypeCustomName>;
+
+/** Describe a data type customization
+ */
+struct DataTypeCustomDesc
+{
+ DataTypeCustomNamePtr name;
+ SerializationPtr serialization;
+
+ explicit DataTypeCustomDesc(
+ DataTypeCustomNamePtr name_,
+ SerializationPtr serialization_ = nullptr)
+ : name(std::move(name_))
+ , serialization(std::move(serialization_)) {}
+};
+
+using DataTypeCustomDescPtr = std::unique_ptr<DataTypeCustomDesc>;
+
+/** A simple implementation of IDataTypeCustomName
+ */
+class DataTypeCustomFixedName : public IDataTypeCustomName
+{
+private:
+ String name;
+public:
+ explicit DataTypeCustomFixedName(String name_) : name(name_) {}
+ String getName() const override { return name; }
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeCustomGeo.cpp b/contrib/clickhouse/src/DataTypes/DataTypeCustomGeo.cpp
new file mode 100644
index 00000000000..f7d05fa3be6
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeCustomGeo.cpp
@@ -0,0 +1,43 @@
+#include <DataTypes/DataTypeCustomGeo.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeCustom.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypesNumber.h>
+
+namespace DB
+{
+
+void registerDataTypeDomainGeo(DataTypeFactory & factory)
+{
+ // Custom type for point represented as its coordinates stored as Tuple(Float64, Float64)
+ factory.registerSimpleDataTypeCustom("Point", []
+ {
+ return std::make_pair(DataTypeFactory::instance().get("Tuple(Float64, Float64)"),
+ std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypePointName>()));
+ });
+
+ // Custom type for simple polygon without holes stored as Array(Point)
+ factory.registerSimpleDataTypeCustom("Ring", []
+ {
+ return std::make_pair(DataTypeFactory::instance().get("Array(Point)"),
+ std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeRingName>()));
+ });
+
+ // Custom type for polygon with holes stored as Array(Ring)
+ // First element of outer array is outer shape of polygon and all the following are holes
+ factory.registerSimpleDataTypeCustom("Polygon", []
+ {
+ return std::make_pair(DataTypeFactory::instance().get("Array(Ring)"),
+ std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypePolygonName>()));
+ });
+
+ // Custom type for multiple polygons with holes stored as Array(Polygon)
+ factory.registerSimpleDataTypeCustom("MultiPolygon", []
+ {
+ return std::make_pair(DataTypeFactory::instance().get("Array(Polygon)"),
+ std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeMultiPolygonName>()));
+ });
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeCustomGeo.h b/contrib/clickhouse/src/DataTypes/DataTypeCustomGeo.h
new file mode 100644
index 00000000000..c2a83b3e577
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeCustomGeo.h
@@ -0,0 +1,32 @@
+#pragma once
+
+#include <DataTypes/DataTypeCustom.h>
+
+namespace DB
+{
+
+class DataTypePointName : public DataTypeCustomFixedName
+{
+public:
+ DataTypePointName() : DataTypeCustomFixedName("Point") {}
+};
+
+class DataTypeRingName : public DataTypeCustomFixedName
+{
+public:
+ DataTypeRingName() : DataTypeCustomFixedName("Ring") {}
+};
+
+class DataTypePolygonName : public DataTypeCustomFixedName
+{
+public:
+ DataTypePolygonName() : DataTypeCustomFixedName("Polygon") {}
+};
+
+class DataTypeMultiPolygonName : public DataTypeCustomFixedName
+{
+public:
+ DataTypeMultiPolygonName() : DataTypeCustomFixedName("MultiPolygon") {}
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/contrib/clickhouse/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp
new file mode 100644
index 00000000000..4e50be0a0cc
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp
@@ -0,0 +1,170 @@
+#include <Common/FieldVisitorToString.h>
+#include <Common/typeid_cast.h>
+
+#include <DataTypes/DataTypeCustomSimpleAggregateFunction.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeFactory.h>
+
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTSelectWithUnionQuery.h>
+
+#include <boost/algorithm/string/join.hpp>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int SYNTAX_ERROR;
+ extern const int BAD_ARGUMENTS;
+ extern const int PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS;
+ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+ extern const int LOGICAL_ERROR;
+}
+
+void DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(const AggregateFunctionPtr & function)
+{
+ /// TODO Make it sane.
+ static const std::vector<String> supported_functions{
+ "any",
+ "anyLast",
+ "min",
+ "max",
+ "sum",
+ "sumWithOverflow",
+ "groupBitAnd",
+ "groupBitOr",
+ "groupBitXor",
+ "sumMap",
+ "minMap",
+ "maxMap",
+ "groupArrayArray",
+ "groupArrayLastArray",
+ "groupUniqArrayArray",
+ "sumMappedArrays",
+ "minMappedArrays",
+ "maxMappedArrays",
+ };
+
+ // check function
+ if (std::find(std::begin(supported_functions), std::end(supported_functions), function->getName()) == std::end(supported_functions))
+ {
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported aggregate function {}, supported functions are {}",
+ function->getName(), boost::algorithm::join(supported_functions, ","));
+ }
+}
+
+String DataTypeCustomSimpleAggregateFunction::getName() const
+{
+ WriteBufferFromOwnString stream;
+ stream << "SimpleAggregateFunction(" << function->getName();
+
+ if (!parameters.empty())
+ {
+ stream << "(";
+ for (size_t i = 0; i < parameters.size(); ++i)
+ {
+ if (i)
+ stream << ", ";
+ stream << applyVisitor(FieldVisitorToString(), parameters[i]);
+ }
+ stream << ")";
+ }
+
+ for (const auto & argument_type : argument_types)
+ stream << ", " << argument_type->getName();
+
+ stream << ")";
+ return stream.str();
+}
+
+
+static std::pair<DataTypePtr, DataTypeCustomDescPtr> create(const ASTPtr & arguments)
+{
+ String function_name;
+ AggregateFunctionPtr function;
+ DataTypes argument_types;
+ Array params_row;
+
+ if (!arguments || arguments->children.empty())
+ throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+ "Data type SimpleAggregateFunction requires parameters: "
+ "name of aggregate function and list of data types for arguments");
+
+ if (const ASTFunction * parametric = arguments->children[0]->as<ASTFunction>())
+ {
+ if (parametric->parameters)
+ throw Exception(ErrorCodes::SYNTAX_ERROR, "Unexpected level of parameters to aggregate function");
+ function_name = parametric->name;
+
+ if (parametric->arguments)
+ {
+ const ASTs & parameters = parametric->arguments->as<ASTExpressionList &>().children;
+ params_row.resize(parameters.size());
+
+ for (size_t i = 0; i < parameters.size(); ++i)
+ {
+ const ASTLiteral * lit = parameters[i]->as<ASTLiteral>();
+ if (!lit)
+ throw Exception(
+ ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS,
+ "Parameters to aggregate functions must be literals. "
+ "Got parameter '{}' for function '{}'",
+ parameters[i]->formatForErrorMessage(),
+ function_name);
+
+ params_row[i] = lit->value;
+ }
+ }
+ }
+ else if (auto opt_name = tryGetIdentifierName(arguments->children[0]))
+ {
+ function_name = *opt_name;
+ }
+ else if (arguments->children[0]->as<ASTLiteral>())
+ {
+ throw Exception(ErrorCodes::BAD_ARGUMENTS,
+ "Aggregate function name for data type SimpleAggregateFunction must "
+ "be passed as identifier (without quotes) or function");
+ }
+ else
+ throw Exception(ErrorCodes::BAD_ARGUMENTS,
+ "Unexpected AST element passed as aggregate function name for data type "
+ "SimpleAggregateFunction. Must be identifier or function.");
+
+ for (size_t i = 1; i < arguments->children.size(); ++i)
+ argument_types.push_back(DataTypeFactory::instance().get(arguments->children[i]));
+
+ if (function_name.empty())
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: empty name of aggregate function passed");
+
+ AggregateFunctionProperties properties;
+ function = AggregateFunctionFactory::instance().get(function_name, argument_types, params_row, properties);
+
+ DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(function);
+
+ DataTypePtr storage_type = DataTypeFactory::instance().get(argument_types[0]->getName());
+
+ if (!function->getResultType()->equals(*removeLowCardinality(storage_type)))
+ {
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "Incompatible data types between aggregate function '{}' "
+ "which returns {} and column storage type {}",
+ function->getName(), function->getResultType()->getName(), storage_type->getName());
+ }
+
+ DataTypeCustomNamePtr custom_name = std::make_unique<DataTypeCustomSimpleAggregateFunction>(function, argument_types, params_row);
+
+ return std::make_pair(storage_type, std::make_unique<DataTypeCustomDesc>(std::move(custom_name), nullptr));
+}
+
+void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory)
+{
+ factory.registerDataTypeCustom("SimpleAggregateFunction", create);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h b/contrib/clickhouse/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h
new file mode 100644
index 00000000000..926dfd9cc82
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <DataTypes/DataTypeCustom.h>
+#include <AggregateFunctions/IAggregateFunction.h>
+
+#include <IO/ReadHelpers.h>
+
+namespace DB
+{
+
+/** The type SimpleAggregateFunction(fct, type) is meant to be used in an AggregatingMergeTree. It behaves like a standard
+ * data type but when rows are merged, an aggregation function is applied.
+ *
+ * The aggregation function is limited to simple functions whose merge state is the final result:
+ * any, anyLast, min, max, sum
+ *
+ * Examples:
+ *
+ * SimpleAggregateFunction(sum, Nullable(Float64))
+ * SimpleAggregateFunction(anyLast, LowCardinality(Nullable(String)))
+ * SimpleAggregateFunction(anyLast, IPv4)
+ *
+ * Technically, a standard IDataType is instantiated and customized with IDataTypeCustomName and DataTypeCustomDesc.
+ */
+
+class DataTypeCustomSimpleAggregateFunction : public IDataTypeCustomName
+{
+private:
+ const AggregateFunctionPtr function;
+ const DataTypes argument_types;
+ const Array parameters;
+
+public:
+ DataTypeCustomSimpleAggregateFunction(const AggregateFunctionPtr & function_, const DataTypes & argument_types_, const Array & parameters_)
+ : function(function_), argument_types(argument_types_), parameters(parameters_) {}
+
+ AggregateFunctionPtr getFunction() const { return function; }
+ String getName() const override;
+ static void checkSupportedFunctions(const AggregateFunctionPtr & function);
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeDate.cpp b/contrib/clickhouse/src/DataTypes/DataTypeDate.cpp
new file mode 100644
index 00000000000..ee4b0065e59
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeDate.cpp
@@ -0,0 +1,23 @@
+#include <DataTypes/DataTypeDate.h>
+#include <DataTypes/Serializations/SerializationDate.h>
+#include <DataTypes/DataTypeFactory.h>
+
+namespace DB
+{
+
+bool DataTypeDate::equals(const IDataType & rhs) const
+{
+ return typeid(rhs) == typeid(*this);
+}
+
+SerializationPtr DataTypeDate::doGetDefaultSerialization() const
+{
+ return std::make_shared<SerializationDate>();
+}
+
+void registerDataTypeDate(DataTypeFactory & factory)
+{
+ factory.registerSimpleDataType("Date", [] { return DataTypePtr(std::make_shared<DataTypeDate>()); }, DataTypeFactory::CaseInsensitive);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeDate.h b/contrib/clickhouse/src/DataTypes/DataTypeDate.h
new file mode 100644
index 00000000000..0d557cad5f0
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeDate.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <DataTypes/DataTypeNumberBase.h>
+
+
+namespace DB
+{
+
+class DataTypeDate final : public DataTypeNumberBase<UInt16>
+{
+public:
+ static constexpr auto family_name = "Date";
+
+ TypeIndex getTypeId() const override { return TypeIndex::Date; }
+ const char * getFamilyName() const override { return family_name; }
+ String getSQLCompatibleName() const override { return "DATE"; }
+
+ bool canBeUsedAsVersion() const override { return true; }
+ bool canBeInsideNullable() const override { return true; }
+
+ bool equals(const IDataType & rhs) const override;
+
+protected:
+ SerializationPtr doGetDefaultSerialization() const override;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeDate32.cpp b/contrib/clickhouse/src/DataTypes/DataTypeDate32.cpp
new file mode 100644
index 00000000000..83b1260eb6d
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeDate32.cpp
@@ -0,0 +1,23 @@
+#include <DataTypes/DataTypeDate32.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/Serializations/SerializationDate32.h>
+
+namespace DB
+{
+bool DataTypeDate32::equals(const IDataType & rhs) const
+{
+ return typeid(rhs) == typeid(*this);
+}
+
+SerializationPtr DataTypeDate32::doGetDefaultSerialization() const
+{
+ return std::make_shared<SerializationDate32>();
+}
+
+void registerDataTypeDate32(DataTypeFactory & factory)
+{
+ factory.registerSimpleDataType(
+ "Date32", [] { return DataTypePtr(std::make_shared<DataTypeDate32>()); }, DataTypeFactory::CaseInsensitive);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeDate32.h b/contrib/clickhouse/src/DataTypes/DataTypeDate32.h
new file mode 100644
index 00000000000..0879a404179
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeDate32.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <Core/Field.h>
+#include <Common/DateLUT.h>
+#include <DataTypes/DataTypeNumberBase.h>
+
+namespace DB
+{
+class DataTypeDate32 final : public DataTypeNumberBase<Int32>
+{
+public:
+ static constexpr auto family_name = "Date32";
+
+ TypeIndex getTypeId() const override { return TypeIndex::Date32; }
+ const char * getFamilyName() const override { return family_name; }
+ String getSQLCompatibleName() const override { return "DATE"; }
+
+ Field getDefault() const override
+ {
+ return -static_cast<Int64>(DateLUT::instance().getDayNumOffsetEpoch());
+ }
+
+ bool canBeUsedAsVersion() const override { return true; }
+ bool canBeInsideNullable() const override { return true; }
+
+ bool equals(const IDataType & rhs) const override;
+
+protected:
+ SerializationPtr doGetDefaultSerialization() const override;
+};
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeDateTime.cpp b/contrib/clickhouse/src/DataTypes/DataTypeDateTime.cpp
new file mode 100644
index 00000000000..c7722e1c1d9
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeDateTime.cpp
@@ -0,0 +1,42 @@
+#include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/Serializations/SerializationDateTime.h>
+
+#include <IO/Operators.h>
+#include <IO/WriteBufferFromString.h>
+
+namespace DB
+{
+
+DataTypeDateTime::DataTypeDateTime(const String & time_zone_name)
+ : TimezoneMixin(time_zone_name)
+{
+}
+
+DataTypeDateTime::DataTypeDateTime(const TimezoneMixin & time_zone_)
+ : TimezoneMixin(time_zone_)
+{
+}
+
+String DataTypeDateTime::doGetName() const
+{
+ if (!has_explicit_time_zone)
+ return "DateTime";
+
+ WriteBufferFromOwnString out;
+ out << "DateTime(" << quote << time_zone.getTimeZone() << ")";
+ return out.str();
+}
+
+bool DataTypeDateTime::equals(const IDataType & rhs) const
+{
+ /// DateTime with different timezones are equal, because:
+ /// "all types with different time zones are equivalent and may be used interchangingly."
+ return typeid(rhs) == typeid(*this);
+}
+
+SerializationPtr DataTypeDateTime::doGetDefaultSerialization() const
+{
+ return std::make_shared<SerializationDateTime>(*this);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeDateTime.h b/contrib/clickhouse/src/DataTypes/DataTypeDateTime.h
new file mode 100644
index 00000000000..a473aae1faf
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeDateTime.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include <Core/Types.h>
+#include <DataTypes/DataTypeNumberBase.h>
+#include <DataTypes/TimezoneMixin.h>
+
+namespace DB
+{
+
+/** DateTime stores time as unix timestamp.
+ * The value itself is independent of time zone.
+ *
+ * In binary format it is represented as unix timestamp.
+ * In text format it is serialized to and parsed from YYYY-MM-DD hh:mm:ss format.
+ * The text format is dependent of time zone.
+ *
+ * To cast from/to text format, time zone may be specified explicitly or implicit time zone may be used.
+ *
+ * Time zone may be specified explicitly as type parameter, example: DateTime('Pacific/Pitcairn').
+ * As it does not affect the internal representation of values,
+ * all types with different time zones are equivalent and may be used interchangingly.
+ * Time zone only affects parsing and displaying in text formats.
+ *
+ * If time zone is not specified (example: DateTime without parameter),
+ * then `session_timezone` setting value is used.
+ * If `session_timezone` is not set (or empty string), server default time zone is used.
+ * Default time zone is server time zone, if server is doing transformations
+ * and if client is doing transformations, unless 'use_client_time_zone' setting is passed to client;
+ * Server time zone is the time zone specified in 'timezone' parameter in configuration file,
+ * or system time zone at the moment of server startup.
+ */
+class DataTypeDateTime final : public DataTypeNumberBase<UInt32>, public TimezoneMixin
+{
+public:
+ explicit DataTypeDateTime(const String & time_zone_name = "");
+ explicit DataTypeDateTime(const TimezoneMixin & time_zone);
+
+ static constexpr auto family_name = "DateTime";
+
+ const char * getFamilyName() const override { return family_name; }
+ String getSQLCompatibleName() const override { return "DATETIME"; }
+ String doGetName() const override;
+ TypeIndex getTypeId() const override { return TypeIndex::DateTime; }
+
+ bool canBeUsedAsVersion() const override { return true; }
+ bool canBeInsideNullable() const override { return true; }
+
+ bool equals(const IDataType & rhs) const override;
+
+ SerializationPtr doGetDefaultSerialization() const override;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeDateTime64.cpp b/contrib/clickhouse/src/DataTypes/DataTypeDateTime64.cpp
new file mode 100644
index 00000000000..124fea1f458
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeDateTime64.cpp
@@ -0,0 +1,70 @@
+#include <DataTypes/DataTypeDateTime64.h>
+#include <DataTypes/Serializations/SerializationDateTime64.h>
+#include <IO/Operators.h>
+#include <IO/WriteBufferFromString.h>
+#include <optional>
+#include <string>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int ARGUMENT_OUT_OF_BOUND;
+ extern const int LOGICAL_ERROR;
+}
+
+static constexpr UInt32 max_scale = 9;
+
+DataTypeDateTime64::DataTypeDateTime64(UInt32 scale_, const std::string & time_zone_name)
+ : DataTypeDecimalBase<DateTime64>(DecimalUtils::max_precision<DateTime64>, scale_),
+ TimezoneMixin(time_zone_name)
+{
+ if (scale > max_scale)
+ throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale {} is too large for DateTime64. "
+ "Maximum is up to nanoseconds (9).", std::to_string(scale));
+}
+
+DataTypeDateTime64::DataTypeDateTime64(UInt32 scale_, const TimezoneMixin & time_zone_info)
+ : DataTypeDecimalBase<DateTime64>(DecimalUtils::max_precision<DateTime64>, scale_),
+ TimezoneMixin(time_zone_info)
+{
+ if (scale > max_scale)
+ throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale {} is too large for DateTime64. "
+ "Maximum is up to nanoseconds (9).", std::to_string(scale));
+}
+
+std::string DataTypeDateTime64::doGetName() const
+{
+ if (!has_explicit_time_zone)
+ return std::string(getFamilyName()) + "(" + std::to_string(this->scale) + ")";
+
+ WriteBufferFromOwnString out;
+ out << "DateTime64(" << this->scale << ", " << quote << time_zone.getTimeZone() << ")";
+ return out.str();
+}
+
+bool DataTypeDateTime64::equals(const IDataType & rhs) const
+{
+ if (const auto * ptype = typeid_cast<const DataTypeDateTime64 *>(&rhs))
+ return this->scale == ptype->getScale();
+ return false;
+}
+
+SerializationPtr DataTypeDateTime64::doGetDefaultSerialization() const
+{
+ return std::make_shared<SerializationDateTime64>(scale, *this);
+}
+
+std::string getDateTimeTimezone(const IDataType & data_type)
+{
+ if (const auto * type = typeid_cast<const DataTypeDateTime *>(&data_type))
+ return type->hasExplicitTimeZone() ? type->getTimeZone().getTimeZone() : std::string();
+ if (const auto * type = typeid_cast<const DataTypeDateTime64 *>(&data_type))
+ return type->hasExplicitTimeZone() ? type->getTimeZone().getTimeZone() : std::string();
+
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get time zone from type {}", data_type.getName());
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeDateTime64.h b/contrib/clickhouse/src/DataTypes/DataTypeDateTime64.h
new file mode 100644
index 00000000000..7663518807f
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeDateTime64.h
@@ -0,0 +1,50 @@
+#pragma once
+
+#include <Core/Types.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeDecimalBase.h>
+
+class DateLUTImpl;
+
+namespace DB
+{
+
+/** DateTime64 is same as DateTime, but it stores values as Int64 and has configurable sub-second part.
+ *
+ * `scale` determines number of decimal places for sub-second part of the DateTime64.
+ */
+class DataTypeDateTime64 final : public DataTypeDecimalBase<DateTime64>, public TimezoneMixin
+{
+public:
+ using Base = DataTypeDecimalBase<DateTime64>;
+ static constexpr UInt8 default_scale = 3;
+
+ static constexpr auto family_name = "DateTime64";
+ static constexpr auto type_id = TypeIndex::DateTime64;
+
+ explicit DataTypeDateTime64(UInt32 scale_, const std::string & time_zone_name = "");
+
+ // reuse timezone from other DateTime/DateTime64
+ DataTypeDateTime64(UInt32 scale_, const TimezoneMixin & time_zone_info);
+
+ const char * getFamilyName() const override { return family_name; }
+ String getSQLCompatibleName() const override { return "DATETIME"; }
+ std::string doGetName() const override;
+ TypeIndex getTypeId() const override { return type_id; }
+
+ bool equals(const IDataType & rhs) const override;
+
+ bool canBePromoted() const override { return false; }
+
+ bool canBeUsedAsVersion() const override { return true; }
+
+ bool isSummable() const override { return false; }
+
+protected:
+ SerializationPtr doGetDefaultSerialization() const override;
+};
+
+std::string getDateTimeTimezone(const IDataType & data_type);
+
+}
+
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeDecimalBase.cpp b/contrib/clickhouse/src/DataTypes/DataTypeDecimalBase.cpp
new file mode 100644
index 00000000000..62218694924
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeDecimalBase.cpp
@@ -0,0 +1,47 @@
+#include <DataTypes/DataTypeDecimalBase.h>
+#include <Interpreters/Context.h>
+#include <type_traits>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+}
+
+bool decimalCheckComparisonOverflow(ContextPtr context)
+{
+ return context->getSettingsRef().decimal_check_overflow;
+}
+bool decimalCheckArithmeticOverflow(ContextPtr context)
+{
+ return context->getSettingsRef().decimal_check_overflow;
+}
+
+template <is_decimal T>
+Field DataTypeDecimalBase<T>::getDefault() const
+{
+ return DecimalField(T(0), scale);
+}
+
+template <is_decimal T>
+MutableColumnPtr DataTypeDecimalBase<T>::createColumn() const
+{
+ return ColumnType::create(0, scale);
+}
+
+template <is_decimal T>
+T DataTypeDecimalBase<T>::getScaleMultiplier(UInt32 scale_)
+{
+ return DecimalUtils::scaleMultiplier<typename T::NativeType>(scale_);
+}
+
+
+/// Explicit template instantiations.
+template class DataTypeDecimalBase<Decimal32>;
+template class DataTypeDecimalBase<Decimal64>;
+template class DataTypeDecimalBase<Decimal128>;
+template class DataTypeDecimalBase<Decimal256>;
+template class DataTypeDecimalBase<DateTime64>;
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeDecimalBase.h b/contrib/clickhouse/src/DataTypes/DataTypeDecimalBase.h
new file mode 100644
index 00000000000..adbe9c95b14
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeDecimalBase.h
@@ -0,0 +1,210 @@
+#pragma once
+
+#include <cmath>
+#include <type_traits>
+
+#include <Core/TypeId.h>
+#include <Core/DecimalFunctions.h>
+#include <Columns/ColumnDecimal.h>
+#include <DataTypes/IDataType.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Interpreters/Context_fwd.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int ARGUMENT_OUT_OF_BOUND;
+}
+
+bool decimalCheckComparisonOverflow(ContextPtr context);
+bool decimalCheckArithmeticOverflow(ContextPtr context);
+
+inline UInt32 leastDecimalPrecisionFor(TypeIndex int_type)
+{
+ switch (int_type)
+ {
+ case TypeIndex::Int8: [[fallthrough]];
+ case TypeIndex::UInt8:
+ return 3;
+ case TypeIndex::Int16: [[fallthrough]];
+ case TypeIndex::UInt16:
+ return 5;
+ case TypeIndex::Int32: [[fallthrough]];
+ case TypeIndex::UInt32:
+ return 10;
+ case TypeIndex::Int64:
+ return 19;
+ case TypeIndex::UInt64:
+ return 20;
+ default:
+ break;
+ }
+ return 0;
+}
+
+/// Base class for decimals, like Decimal(P, S), where P is precision, S is scale.
+/// Maximum precisions for underlying types are:
+/// Int32 9
+/// Int64 18
+/// Int128 38
+/// Int256 76
+/// Operation between two decimals leads to Decimal(P, S), where
+/// P is one of (9, 18, 38, 76); equals to the maximum precision for the biggest underlying type of operands.
+/// S is maximum scale of operands. The allowed valuas are [0, precision]
+template <is_decimal T>
+class DataTypeDecimalBase : public IDataType
+{
+public:
+ using FieldType = T;
+ using ColumnType = ColumnDecimal<T>;
+ static constexpr auto type_id = TypeToTypeIndex<T>;
+
+ static constexpr bool is_parametric = true;
+
+ static constexpr size_t maxPrecision() { return DecimalUtils::max_precision<T>; }
+
+ DataTypeDecimalBase(UInt32 precision_, UInt32 scale_)
+ : precision(precision_),
+ scale(scale_)
+ {
+ if (unlikely(precision < 1 || precision > maxPrecision()))
+ throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
+ "Precision {} is out of bounds (precision range: [1, {}])",
+ std::to_string(precision), maxPrecision());
+ if (unlikely(scale > maxPrecision()))
+ throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale {} is out of bounds (max scale: {})",
+ std::to_string(scale), maxPrecision());
+ }
+
+ TypeIndex getTypeId() const override { return TypeToTypeIndex<T>; }
+
+ Field getDefault() const override;
+ MutableColumnPtr createColumn() const override;
+
+ bool isParametric() const override { return true; }
+ bool haveSubtypes() const override { return false; }
+ bool shouldAlignRightInPrettyFormats() const override { return true; }
+ bool textCanContainOnlyValidUTF8() const override { return true; }
+ bool isComparable() const override { return true; }
+ bool isValueRepresentedByNumber() const override { return true; }
+ bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; }
+ bool haveMaximumSizeOfValue() const override { return true; }
+ size_t getSizeOfValueInMemory() const override { return sizeof(T); }
+
+ bool isSummable() const override { return true; }
+ bool canBeUsedInBooleanContext() const override { return true; }
+ bool canBeInsideNullable() const override { return true; }
+
+ /// Decimal specific
+
+ UInt32 getPrecision() const { return precision; }
+ UInt32 getScale() const { return scale; }
+ T getScaleMultiplier() const { return getScaleMultiplier(scale); }
+
+ T wholePart(T x) const
+ {
+ return DecimalUtils::getWholePart(x, scale);
+ }
+
+ T fractionalPart(T x) const
+ {
+ return DecimalUtils::getFractionalPart(x, scale);
+ }
+
+ T maxWholeValue() const { return getScaleMultiplier(precision - scale) - T(1); }
+
+ template <typename U>
+ bool canStoreWhole(U x) const
+ {
+ static_assert(is_signed_v<typename T::NativeType>);
+ T max = maxWholeValue();
+ if constexpr (is_signed_v<U>)
+ return -max.value <= x && x <= max.value;
+ else
+ return x <= static_cast<make_unsigned_t<typename T::NativeType>>(max.value);
+ }
+
+ /// @returns multiplier for U to become T with correct scale
+ template <typename U>
+ T scaleFactorFor(const DataTypeDecimalBase<U> & x, bool) const
+ {
+ if (getScale() < x.getScale())
+ throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Decimal result's scale is less than argument's one");
+ UInt32 scale_delta = getScale() - x.getScale(); /// scale_delta >= 0
+ return getScaleMultiplier(scale_delta);
+ }
+
+ template <typename U>
+ T scaleFactorFor(const DataTypeNumber<U> & , bool is_multiply_or_divisor) const
+ {
+ if (is_multiply_or_divisor)
+ return T(1);
+ return getScaleMultiplier();
+ }
+
+ static T getScaleMultiplier(UInt32 scale);
+
+ inline DecimalUtils::DataTypeDecimalTrait<T> getTrait() const
+ {
+ return {precision, scale};
+ }
+
+protected:
+ const UInt32 precision;
+ const UInt32 scale;
+};
+
+
+template <typename T>
+inline const DataTypeDecimalBase<T> * checkDecimalBase(const IDataType & data_type)
+{
+ if (isColumnedAsDecimalT<T>(data_type))
+ return static_cast<const DataTypeDecimalBase<T> *>(&data_type);
+
+ return nullptr;
+}
+
+template <bool is_multiply, bool is_division, typename T, typename U, template <typename> typename DecimalType>
+inline auto decimalResultType(const DecimalType<T> & tx, const DecimalType<U> & ty)
+{
+ const auto result_trait = DecimalUtils::binaryOpResult<is_multiply, is_division>(tx, ty);
+ return DecimalType<typename decltype(result_trait)::FieldType>(result_trait.precision, result_trait.scale);
+}
+
+template <bool is_multiply, bool is_division, typename T, typename U, template <typename> typename DecimalType>
+inline DecimalType<T> decimalResultType(const DecimalType<T> & tx, const DataTypeNumber<U> & ty)
+{
+ const auto result_trait = DecimalUtils::binaryOpResult<is_multiply, is_division>(tx, ty);
+ return DecimalType<typename decltype(result_trait)::FieldType>(result_trait.precision, result_trait.scale);
+}
+
+template <bool is_multiply, bool is_division, typename T, typename U, template <typename> typename DecimalType>
+inline DecimalType<U> decimalResultType(const DataTypeNumber<T> & tx, const DecimalType<U> & ty)
+{
+ const auto result_trait = DecimalUtils::binaryOpResult<is_multiply, is_division>(tx, ty);
+ return DecimalType<typename decltype(result_trait)::FieldType>(result_trait.precision, result_trait.scale);
+}
+
+template <template <typename> typename DecimalType>
+inline DataTypePtr createDecimal(UInt64 precision_value, UInt64 scale_value)
+{
+ if (precision_value < DecimalUtils::min_precision || precision_value > DecimalUtils::max_precision<Decimal256>)
+ throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Wrong precision: it must be between {} and {}, got {}",
+ DecimalUtils::min_precision, DecimalUtils::max_precision<Decimal256>, precision_value);
+
+ if (static_cast<UInt64>(scale_value) > precision_value)
+ throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Negative scales and scales larger than precision are not supported");
+
+ if (precision_value <= DecimalUtils::max_precision<Decimal32>)
+ return std::make_shared<DecimalType<Decimal32>>(precision_value, scale_value);
+ else if (precision_value <= DecimalUtils::max_precision<Decimal64>)
+ return std::make_shared<DecimalType<Decimal64>>(precision_value, scale_value);
+ else if (precision_value <= DecimalUtils::max_precision<Decimal128>)
+ return std::make_shared<DecimalType<Decimal128>>(precision_value, scale_value);
+ return std::make_shared<DecimalType<Decimal256>>(precision_value, scale_value);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeDomainBool.cpp b/contrib/clickhouse/src/DataTypes/DataTypeDomainBool.cpp
new file mode 100644
index 00000000000..245c5495299
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeDomainBool.cpp
@@ -0,0 +1,21 @@
+#include <DataTypes/Serializations/SerializationBool.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeCustom.h>
+
+namespace DB
+{
+
+void registerDataTypeDomainBool(DataTypeFactory & factory)
+{
+ factory.registerSimpleDataTypeCustom("Bool", []
+ {
+ auto type = DataTypeFactory::instance().get("UInt8");
+ return std::make_pair(type, std::make_unique<DataTypeCustomDesc>(
+ std::make_unique<DataTypeCustomFixedName>("Bool"), std::make_unique<SerializationBool>(type->getDefaultSerialization())));
+ });
+
+ factory.registerAlias("bool", "Bool", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("boolean", "Bool", DataTypeFactory::CaseInsensitive);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeEnum.cpp b/contrib/clickhouse/src/DataTypes/DataTypeEnum.cpp
new file mode 100644
index 00000000000..1750ae785bf
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeEnum.cpp
@@ -0,0 +1,348 @@
+#include <IO/WriteBufferFromString.h>
+#include <DataTypes/DataTypeEnum.h>
+#include <DataTypes/Serializations/SerializationEnum.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <Parsers/IAST.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTLiteral.h>
+#include <Common/typeid_cast.h>
+#include <Common/assert_cast.h>
+#include <Common/UTF8Helpers.h>
+#include <Poco/UTF8Encoding.h>
+
+#include <limits>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int BAD_TYPE_OF_FIELD;
+ extern const int EMPTY_DATA_PASSED;
+ extern const int UNEXPECTED_AST_STRUCTURE;
+ extern const int ARGUMENT_OUT_OF_BOUND;
+}
+
+
+template <typename FieldType> struct EnumName;
+template <> struct EnumName<Int8> { static constexpr auto value = "Enum8"; };
+template <> struct EnumName<Int16> { static constexpr auto value = "Enum16"; };
+
+
+template <typename Type>
+const char * DataTypeEnum<Type>::getFamilyName() const
+{
+ return EnumName<FieldType>::value;
+}
+
+template <typename Type>
+std::string DataTypeEnum<Type>::generateMySQLName(const Values & values)
+{
+ WriteBufferFromOwnString out;
+
+ writeString("ENUM", out);
+ writeChar('(', out);
+
+ auto first = true;
+ for (const auto & name_and_value : values)
+ {
+ if (!first)
+ writeString(", ", out);
+
+ first = false;
+
+ writeQuotedString(name_and_value.first, out);
+ }
+
+ writeChar(')', out);
+
+ return out.str();
+}
+
+template <typename Type>
+std::string DataTypeEnum<Type>::generateName(const Values & values)
+{
+ WriteBufferFromOwnString out;
+
+ writeString(EnumName<FieldType>::value, out);
+ writeChar('(', out);
+
+ auto first = true;
+ for (const auto & name_and_value : values)
+ {
+ if (!first)
+ writeString(", ", out);
+
+ first = false;
+
+ writeQuotedString(name_and_value.first, out);
+ writeString(" = ", out);
+ writeText(name_and_value.second, out);
+ }
+
+ writeChar(')', out);
+
+ return out.str();
+}
+
+template <typename Type>
+DataTypeEnum<Type>::DataTypeEnum(const Values & values_)
+ : EnumValues<Type>(values_)
+ , type_name(generateName(this->getValues()))
+{
+}
+
+template <typename Type>
+Field DataTypeEnum<Type>::getDefault() const
+{
+ return this->getValues().front().second;
+}
+
+template <typename Type>
+void DataTypeEnum<Type>::insertDefaultInto(IColumn & column) const
+{
+ assert_cast<ColumnType &>(column).getData().push_back(this->getValues().front().second);
+}
+
+template <typename Type>
+bool DataTypeEnum<Type>::equals(const IDataType & rhs) const
+{
+ return typeid(rhs) == typeid(*this) && type_name == static_cast<const DataTypeEnum<Type> &>(rhs).type_name;
+}
+
+
+template <typename Type>
+bool DataTypeEnum<Type>::textCanContainOnlyValidUTF8() const
+{
+ for (const auto & elem : this->getValues())
+ {
+ const char * pos = elem.first.data();
+ const char * end = pos + elem.first.size();
+ while (pos < end)
+ {
+ size_t length = UTF8::seqLength(*pos);
+ if (pos + length > end)
+ return false;
+
+ if (Poco::UTF8Encoding::isLegal(reinterpret_cast<const unsigned char *>(pos), static_cast<int>(length)))
+ pos += length;
+ else
+ return false;
+ }
+ }
+ return true;
+}
+
+template <typename Type>
+static void checkOverflow(Int64 value)
+{
+ if (!(std::numeric_limits<Type>::min() <= value && value <= std::numeric_limits<Type>::max()))
+ throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "DataTypeEnum: Unexpected value {}", toString(value));
+}
+
+template <typename Type>
+Field DataTypeEnum<Type>::castToName(const Field & value_or_name) const
+{
+ if (value_or_name.getType() == Field::Types::String)
+ {
+ this->getValue(value_or_name.get<String>()); /// Check correctness
+ return value_or_name.get<String>();
+ }
+ else if (value_or_name.getType() == Field::Types::Int64)
+ {
+ Int64 value = value_or_name.get<Int64>();
+ checkOverflow<Type>(value);
+ return this->getNameForValue(static_cast<Type>(value)).toString();
+ }
+ else
+ throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD,
+ "DataTypeEnum: Unsupported type of field {}", value_or_name.getTypeName());
+}
+
+template <typename Type>
+Field DataTypeEnum<Type>::castToValue(const Field & value_or_name) const
+{
+ if (value_or_name.getType() == Field::Types::String)
+ {
+ return this->getValue(value_or_name.get<String>());
+ }
+ else if (value_or_name.getType() == Field::Types::Int64
+ || value_or_name.getType() == Field::Types::UInt64)
+ {
+ Int64 value = value_or_name.get<Int64>();
+ checkOverflow<Type>(value);
+ this->getNameForValue(static_cast<Type>(value)); /// Check correctness
+ return value;
+ }
+ else
+ throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD,
+ "DataTypeEnum: Unsupported type of field {}", value_or_name.getTypeName());
+}
+
+
+template <typename Type>
+bool DataTypeEnum<Type>::contains(const IDataType & rhs) const
+{
+ if (const auto * rhs_enum8 = typeid_cast<const DataTypeEnum8 *>(&rhs))
+ return this->containsAll(rhs_enum8->getValues());
+ if (const auto * rhs_enum16 = typeid_cast<const DataTypeEnum16 *>(&rhs))
+ return this->containsAll(rhs_enum16->getValues());
+ return false;
+}
+
+template <typename Type>
+SerializationPtr DataTypeEnum<Type>::doGetDefaultSerialization() const
+{
+ return std::make_shared<SerializationEnum<Type>>(this->getValues());
+}
+
+
+/// Explicit instantiations.
+template class DataTypeEnum<Int8>;
+template class DataTypeEnum<Int16>;
+
+static void checkASTStructure(const ASTPtr & child)
+{
+ const auto * func = child->as<ASTFunction>();
+ if (!func
+ || func->name != "equals"
+ || func->parameters
+ || !func->arguments
+ || func->arguments->children.size() != 2)
+ throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Elements of Enum data type must be of form: "
+ "'name' = number, where name is string literal and number is an integer");
+}
+
+static void autoAssignNumberForEnum(const ASTPtr & arguments)
+{
+ Int64 literal_child_assign_num = 1;
+ ASTs assign_number_child;
+ assign_number_child.reserve(arguments->children.size());
+ bool is_first_child = true;
+ size_t assign_count= 0;
+
+ for (const ASTPtr & child : arguments->children)
+ {
+ if (child->as<ASTLiteral>())
+ {
+ assign_count += !is_first_child;
+ ASTPtr func = makeASTFunction("equals", child, std::make_shared<ASTLiteral>(literal_child_assign_num + assign_count));
+ assign_number_child.emplace_back(func);
+ }
+ else if (child->as<ASTFunction>())
+ {
+ if (is_first_child)
+ {
+ checkASTStructure(child);
+ const auto * func = child->as<ASTFunction>();
+ const auto * value_literal = func->arguments->children[1]->as<ASTLiteral>();
+
+ if (!value_literal
+ || (value_literal->value.getType() != Field::Types::UInt64 && value_literal->value.getType() != Field::Types::Int64))
+ throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE,
+ "Elements of Enum data type must be of form: "
+ "'name' = number or 'name', where name is string literal and number is an integer");
+
+ literal_child_assign_num = value_literal->value.get<Int64>();
+ }
+ assign_number_child.emplace_back(child);
+ }
+ else
+ throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE,
+ "Elements of Enum data type must be of form: "
+ "'name' = number or 'name', where name is string literal and number is an integer");
+
+ is_first_child = false;
+ }
+
+ if (assign_count != 0 && assign_count != arguments->children.size() - 1)
+ throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE,
+ "All elements of Enum data type must be of form: "
+ "'name' = number or 'name', where name is string literal and number is an integer");
+
+ arguments->children = assign_number_child;
+}
+
+template <typename DataTypeEnum>
+static DataTypePtr createExact(const ASTPtr & arguments)
+{
+ if (!arguments || arguments->children.empty())
+ throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Enum data type cannot be empty");
+
+ typename DataTypeEnum::Values values;
+ values.reserve(arguments->children.size());
+
+ using FieldType = typename DataTypeEnum::FieldType;
+
+ autoAssignNumberForEnum(arguments);
+ /// Children must be functions 'equals' with string literal as left argument and numeric literal as right argument.
+ for (const ASTPtr & child : arguments->children)
+ {
+ checkASTStructure(child);
+
+ const auto * func = child->as<ASTFunction>();
+ const auto * name_literal = func->arguments->children[0]->as<ASTLiteral>();
+ const auto * value_literal = func->arguments->children[1]->as<ASTLiteral>();
+
+ if (!name_literal
+ || !value_literal
+ || name_literal->value.getType() != Field::Types::String
+ || (value_literal->value.getType() != Field::Types::UInt64 && value_literal->value.getType() != Field::Types::Int64))
+ throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE,
+ "Elements of Enum data type must be of form: "
+ "'name' = number or 'name', where name is string literal and number is an integer");
+
+ const String & field_name = name_literal->value.get<String>();
+ const auto value = value_literal->value.get<FieldType>();
+
+ if (value > std::numeric_limits<FieldType>::max() || value < std::numeric_limits<FieldType>::min())
+ throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Value {} for element '{}' exceeds range of {}",
+ toString(value), field_name, EnumName<FieldType>::value);
+
+ values.emplace_back(field_name, value);
+ }
+
+ return std::make_shared<DataTypeEnum>(values);
+}
+
+static DataTypePtr create(const ASTPtr & arguments)
+{
+ if (!arguments || arguments->children.empty())
+ throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Enum data type cannot be empty");
+
+ autoAssignNumberForEnum(arguments);
+ /// Children must be functions 'equals' with string literal as left argument and numeric literal as right argument.
+ for (const ASTPtr & child : arguments->children)
+ {
+ checkASTStructure(child);
+
+ const auto * func = child->as<ASTFunction>();
+ const auto * value_literal = func->arguments->children[1]->as<ASTLiteral>();
+
+ if (!value_literal
+ || (value_literal->value.getType() != Field::Types::UInt64 && value_literal->value.getType() != Field::Types::Int64))
+ throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE,
+ "Elements of Enum data type must be of form: "
+ "'name' = number or 'name', where name is string literal and number is an integer");
+
+ Int64 value = value_literal->value.get<Int64>();
+
+ if (value > std::numeric_limits<Int8>::max() || value < std::numeric_limits<Int8>::min())
+ return createExact<DataTypeEnum16>(arguments);
+ }
+
+ return createExact<DataTypeEnum8>(arguments);
+}
+
+void registerDataTypeEnum(DataTypeFactory & factory)
+{
+ factory.registerDataType("Enum8", createExact<DataTypeEnum<Int8>>);
+ factory.registerDataType("Enum16", createExact<DataTypeEnum<Int16>>);
+ factory.registerDataType("Enum", create);
+
+ /// MySQL
+ factory.registerAlias("ENUM", "Enum", DataTypeFactory::CaseInsensitive);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeEnum.h b/contrib/clickhouse/src/DataTypes/DataTypeEnum.h
new file mode 100644
index 00000000000..d148f753c82
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeEnum.h
@@ -0,0 +1,93 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+#include <DataTypes/EnumValues.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnConst.h>
+#include <Common/HashTable/HashMap.h>
+#include <vector>
+#include <unordered_map>
+
+
+namespace DB
+{
+
+class IDataTypeEnum : public IDataType
+{
+public:
+ virtual Field castToName(const Field & value_or_name) const = 0;
+ virtual Field castToValue(const Field & value_or_name) const = 0;
+
+ bool isParametric() const override { return true; }
+ bool haveSubtypes() const override { return false; }
+ bool isValueRepresentedByNumber() const override { return true; }
+ bool isValueRepresentedByInteger() const override { return true; }
+ bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; }
+ bool haveMaximumSizeOfValue() const override { return true; }
+ bool isCategorial() const override { return true; }
+ bool canBeInsideNullable() const override { return true; }
+ bool isComparable() const override { return true; }
+
+ virtual bool contains(const IDataType & rhs) const = 0;
+};
+
+
+template <typename Type>
+class DataTypeEnum final : public IDataTypeEnum, public EnumValues<Type>
+{
+public:
+ using FieldType = Type;
+ using ColumnType = ColumnVector<FieldType>;
+ static constexpr auto type_id = sizeof(FieldType) == 1 ? TypeIndex::Enum8 : TypeIndex::Enum16;
+ using typename EnumValues<Type>::Values;
+
+ static constexpr bool is_parametric = true;
+
+private:
+ std::string type_name;
+ static std::string generateName(const Values & values);
+ static std::string generateMySQLName(const Values & values);
+
+public:
+ explicit DataTypeEnum(const Values & values_);
+
+ std::string doGetName() const override { return type_name; }
+ const char * getFamilyName() const override;
+ String getSQLCompatibleName() const override { return generateMySQLName(this->getValues()); }
+
+ TypeIndex getTypeId() const override { return type_id; }
+
+ FieldType readValue(ReadBuffer & istr) const
+ {
+ FieldType x;
+ readText(x, istr);
+ return this->findByValue(x)->first;
+ }
+
+ Field castToName(const Field & value_or_name) const override;
+ Field castToValue(const Field & value_or_name) const override;
+
+ MutableColumnPtr createColumn() const override { return ColumnType::create(); }
+
+ Field getDefault() const override;
+ void insertDefaultInto(IColumn & column) const override;
+
+ bool equals(const IDataType & rhs) const override;
+
+ bool textCanContainOnlyValidUTF8() const override;
+ size_t getSizeOfValueInMemory() const override { return sizeof(FieldType); }
+
+ /// Check current Enum type extends another Enum type (contains all the same values and doesn't override name's with other values)
+ /// Example:
+ /// Enum('a' = 1, 'b' = 2) -> Enum('c' = 1, 'b' = 2, 'd' = 3) OK
+ /// Enum('a' = 1, 'b' = 2) -> Enum('a' = 2, 'b' = 1) NOT OK
+ bool contains(const IDataType & rhs) const override;
+
+ SerializationPtr doGetDefaultSerialization() const override;
+};
+
+
+using DataTypeEnum8 = DataTypeEnum<Int8>;
+using DataTypeEnum16 = DataTypeEnum<Int16>;
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeFactory.cpp b/contrib/clickhouse/src/DataTypes/DataTypeFactory.cpp
new file mode 100644
index 00000000000..415f24d8151
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeFactory.cpp
@@ -0,0 +1,301 @@
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeCustom.h>
+#include <Parsers/parseQuery.h>
+#include <Parsers/ParserCreateQuery.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTLiteral.h>
+#include <Common/typeid_cast.h>
+#include <Poco/String.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <IO/WriteHelpers.h>
+#include <Core/Defines.h>
+#include <Common/CurrentThread.h>
+#include <Interpreters/Context.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+ extern const int UNKNOWN_TYPE;
+ extern const int ILLEGAL_SYNTAX_FOR_DATA_TYPE;
+ extern const int UNEXPECTED_AST_STRUCTURE;
+ extern const int DATA_TYPE_CANNOT_HAVE_ARGUMENTS;
+}
+
+DataTypePtr DataTypeFactory::get(const String & full_name) const
+{
+ return getImpl<false>(full_name);
+}
+
+DataTypePtr DataTypeFactory::tryGet(const String & full_name) const
+{
+ return getImpl<true>(full_name);
+}
+
+template <bool nullptr_on_error>
+DataTypePtr DataTypeFactory::getImpl(const String & full_name) const
+{
+ /// Data type parser can be invoked from coroutines with small stack.
+ /// Value 315 is known to cause stack overflow in some test configurations (debug build, sanitizers)
+ /// let's make the threshold significantly lower.
+ /// It is impractical for user to have complex data types with this depth.
+
+#if defined(SANITIZER) || !defined(NDEBUG)
+ static constexpr size_t data_type_max_parse_depth = 150;
+#else
+ static constexpr size_t data_type_max_parse_depth = 300;
+#endif
+
+ ParserDataType parser;
+ ASTPtr ast;
+ if constexpr (nullptr_on_error)
+ {
+ String out_err;
+ const char * start = full_name.data();
+ ast = tryParseQuery(parser, start, start + full_name.size(), out_err, false, "data type", false, DBMS_DEFAULT_MAX_QUERY_SIZE, data_type_max_parse_depth);
+ if (!ast)
+ return nullptr;
+ }
+ else
+ {
+ ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", false, data_type_max_parse_depth);
+ }
+
+ return getImpl<nullptr_on_error>(ast);
+}
+
+DataTypePtr DataTypeFactory::get(const ASTPtr & ast) const
+{
+ return getImpl<false>(ast);
+}
+
+DataTypePtr DataTypeFactory::tryGet(const ASTPtr & ast) const
+{
+ return getImpl<true>(ast);
+}
+
+template <bool nullptr_on_error>
+DataTypePtr DataTypeFactory::getImpl(const ASTPtr & ast) const
+{
+ if (const auto * func = ast->as<ASTFunction>())
+ {
+ if (func->parameters)
+ {
+ if constexpr (nullptr_on_error)
+ return nullptr;
+ throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE, "Data type cannot have multiple parenthesized parameters.");
+ }
+ return getImpl<nullptr_on_error>(func->name, func->arguments);
+ }
+
+ if (const auto * ident = ast->as<ASTIdentifier>())
+ {
+ return getImpl<nullptr_on_error>(ident->name(), {});
+ }
+
+ if (const auto * lit = ast->as<ASTLiteral>())
+ {
+ if (lit->value.isNull())
+ return getImpl<nullptr_on_error>("Null", {});
+ }
+
+ if constexpr (nullptr_on_error)
+ return nullptr;
+ throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected AST element for data type.");
+}
+
+DataTypePtr DataTypeFactory::get(const String & family_name_param, const ASTPtr & parameters) const
+{
+ return getImpl<false>(family_name_param, parameters);
+}
+
+DataTypePtr DataTypeFactory::tryGet(const String & family_name_param, const ASTPtr & parameters) const
+{
+ return getImpl<true>(family_name_param, parameters);
+}
+
+template <bool nullptr_on_error>
+DataTypePtr DataTypeFactory::getImpl(const String & family_name_param, const ASTPtr & parameters) const
+{
+ String family_name = getAliasToOrName(family_name_param);
+
+ if (endsWith(family_name, "WithDictionary"))
+ {
+ ASTPtr low_cardinality_params = std::make_shared<ASTExpressionList>();
+ String param_name = family_name.substr(0, family_name.size() - strlen("WithDictionary"));
+ if (parameters)
+ {
+ auto func = std::make_shared<ASTFunction>();
+ func->name = param_name;
+ func->arguments = parameters;
+ low_cardinality_params->children.push_back(func);
+ }
+ else
+ low_cardinality_params->children.push_back(std::make_shared<ASTIdentifier>(param_name));
+
+ return getImpl<nullptr_on_error>("LowCardinality", low_cardinality_params);
+ }
+
+ const auto * creator = findCreatorByName<nullptr_on_error>(family_name);
+ if constexpr (nullptr_on_error)
+ {
+ if (!creator)
+ return nullptr;
+
+ try
+ {
+ return (*creator)(parameters);
+ }
+ catch (...)
+ {
+ return nullptr;
+ }
+ }
+ else
+ {
+ assert(creator);
+ return (*creator)(parameters);
+ }
+}
+
+DataTypePtr DataTypeFactory::getCustom(DataTypeCustomDescPtr customization) const
+{
+ if (!customization->name)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create custom type without name");
+
+ auto type = get(customization->name->getName());
+ type->setCustomization(std::move(customization));
+ return type;
+}
+
+
+void DataTypeFactory::registerDataType(const String & family_name, Value creator, CaseSensitiveness case_sensitiveness)
+{
+ if (creator == nullptr)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "DataTypeFactory: the data type family {} has been provided a null constructor", family_name);
+
+ String family_name_lowercase = Poco::toLower(family_name);
+
+ if (isAlias(family_name) || isAlias(family_name_lowercase))
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "DataTypeFactory: the data type family name '{}' is already registered as alias", family_name);
+
+ if (!data_types.emplace(family_name, creator).second)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "DataTypeFactory: the data type family name '{}' is not unique",
+ family_name);
+
+ if (case_sensitiveness == CaseInsensitive
+ && !case_insensitive_data_types.emplace(family_name_lowercase, creator).second)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "DataTypeFactory: the case insensitive data type family name '{}' is not unique", family_name);
+}
+
+void DataTypeFactory::registerSimpleDataType(const String & name, SimpleCreator creator, CaseSensitiveness case_sensitiveness)
+{
+ if (creator == nullptr)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "DataTypeFactory: the data type {} has been provided a null constructor",
+ name);
+
+ registerDataType(name, [name, creator](const ASTPtr & ast)
+ {
+ if (ast)
+ throw Exception(ErrorCodes::DATA_TYPE_CANNOT_HAVE_ARGUMENTS, "Data type {} cannot have arguments", name);
+ return creator();
+ }, case_sensitiveness);
+}
+
+void DataTypeFactory::registerDataTypeCustom(const String & family_name, CreatorWithCustom creator, CaseSensitiveness case_sensitiveness)
+{
+ registerDataType(family_name, [creator](const ASTPtr & ast)
+ {
+ auto res = creator(ast);
+ res.first->setCustomization(std::move(res.second));
+
+ return res.first;
+ }, case_sensitiveness);
+}
+
+void DataTypeFactory::registerSimpleDataTypeCustom(const String & name, SimpleCreatorWithCustom creator, CaseSensitiveness case_sensitiveness)
+{
+ registerDataTypeCustom(name, [name, creator](const ASTPtr & ast)
+ {
+ if (ast)
+ throw Exception(ErrorCodes::DATA_TYPE_CANNOT_HAVE_ARGUMENTS, "Data type {} cannot have arguments", name);
+ return creator();
+ }, case_sensitiveness);
+}
+
+template <bool nullptr_on_error>
+const DataTypeFactory::Value * DataTypeFactory::findCreatorByName(const String & family_name) const
+{
+ ContextPtr query_context;
+ if (CurrentThread::isInitialized())
+ query_context = CurrentThread::get().getQueryContext();
+ {
+ DataTypesDictionary::const_iterator it = data_types.find(family_name);
+ if (data_types.end() != it)
+ {
+ if (query_context && query_context->getSettingsRef().log_queries)
+ query_context->addQueryFactoriesInfo(Context::QueryLogFactories::DataType, family_name);
+ return &it->second;
+ }
+ }
+
+ String family_name_lowercase = Poco::toLower(family_name);
+
+ {
+ DataTypesDictionary::const_iterator it = case_insensitive_data_types.find(family_name_lowercase);
+ if (case_insensitive_data_types.end() != it)
+ {
+ if (query_context && query_context->getSettingsRef().log_queries)
+ query_context->addQueryFactoriesInfo(Context::QueryLogFactories::DataType, family_name_lowercase);
+ return &it->second;
+ }
+ }
+
+ if constexpr (nullptr_on_error)
+ return nullptr;
+
+ auto hints = this->getHints(family_name);
+ if (!hints.empty())
+ throw Exception(ErrorCodes::UNKNOWN_TYPE, "Unknown data type family: {}. Maybe you meant: {}", family_name, toString(hints));
+ else
+ throw Exception(ErrorCodes::UNKNOWN_TYPE, "Unknown data type family: {}", family_name);
+}
+
+DataTypeFactory::DataTypeFactory()
+{
+ registerDataTypeNumbers(*this);
+ registerDataTypeDecimal(*this);
+ registerDataTypeDate(*this);
+ registerDataTypeDate32(*this);
+ registerDataTypeDateTime(*this);
+ registerDataTypeString(*this);
+ registerDataTypeFixedString(*this);
+ registerDataTypeEnum(*this);
+ registerDataTypeArray(*this);
+ registerDataTypeTuple(*this);
+ registerDataTypeNullable(*this);
+ registerDataTypeNothing(*this);
+ registerDataTypeUUID(*this);
+ registerDataTypeIPv4andIPv6(*this);
+ registerDataTypeAggregateFunction(*this);
+ registerDataTypeNested(*this);
+ registerDataTypeInterval(*this);
+ registerDataTypeLowCardinality(*this);
+ registerDataTypeDomainBool(*this);
+ registerDataTypeDomainSimpleAggregateFunction(*this);
+ registerDataTypeDomainGeo(*this);
+ registerDataTypeMap(*this);
+ registerDataTypeObject(*this);
+}
+
+DataTypeFactory & DataTypeFactory::instance()
+{
+ static DataTypeFactory ret;
+ return ret;
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeFactory.h b/contrib/clickhouse/src/DataTypes/DataTypeFactory.h
new file mode 100644
index 00000000000..ba7c1a3d7fe
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeFactory.h
@@ -0,0 +1,104 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+#include <Parsers/IAST_fwd.h>
+#include <Common/IFactoryWithAliases.h>
+#include <DataTypes/DataTypeCustom.h>
+
+
+#include <functional>
+#include <memory>
+#include <unordered_map>
+
+
+namespace DB
+{
+
+class IDataType;
+using DataTypePtr = std::shared_ptr<const IDataType>;
+
+
+/** Creates a data type by name of data type family and parameters.
+ */
+class DataTypeFactory final : private boost::noncopyable, public IFactoryWithAliases<std::function<DataTypePtr(const ASTPtr & parameters)>>
+{
+private:
+ using SimpleCreator = std::function<DataTypePtr()>;
+ using DataTypesDictionary = std::unordered_map<String, Value>;
+ using CreatorWithCustom = std::function<std::pair<DataTypePtr, DataTypeCustomDescPtr>(const ASTPtr & parameters)>;
+ using SimpleCreatorWithCustom = std::function<std::pair<DataTypePtr,DataTypeCustomDescPtr>()>;
+
+public:
+ static DataTypeFactory & instance();
+
+ DataTypePtr get(const String & full_name) const;
+ DataTypePtr get(const String & family_name, const ASTPtr & parameters) const;
+ DataTypePtr get(const ASTPtr & ast) const;
+ DataTypePtr getCustom(DataTypeCustomDescPtr customization) const;
+
+ /// Return nullptr in case of error.
+ DataTypePtr tryGet(const String & full_name) const;
+ DataTypePtr tryGet(const String & family_name, const ASTPtr & parameters) const;
+ DataTypePtr tryGet(const ASTPtr & ast) const;
+
+ /// Register a type family by its name.
+ void registerDataType(const String & family_name, Value creator, CaseSensitiveness case_sensitiveness = CaseSensitive);
+
+ /// Register a simple data type, that have no parameters.
+ void registerSimpleDataType(const String & name, SimpleCreator creator, CaseSensitiveness case_sensitiveness = CaseSensitive);
+
+ /// Register a customized type family
+ void registerDataTypeCustom(const String & family_name, CreatorWithCustom creator, CaseSensitiveness case_sensitiveness = CaseSensitive);
+
+ /// Register a simple customized data type
+ void registerSimpleDataTypeCustom(const String & name, SimpleCreatorWithCustom creator, CaseSensitiveness case_sensitiveness = CaseSensitive);
+
+private:
+ template <bool nullptr_on_error>
+ DataTypePtr getImpl(const String & full_name) const;
+ template <bool nullptr_on_error>
+ DataTypePtr getImpl(const String & family_name, const ASTPtr & parameters) const;
+ template <bool nullptr_on_error>
+ DataTypePtr getImpl(const ASTPtr & ast) const;
+ template <bool nullptr_on_error>
+ const Value * findCreatorByName(const String & family_name) const;
+
+ DataTypesDictionary data_types;
+
+ /// Case insensitive data types will be additionally added here with lowercased name.
+ DataTypesDictionary case_insensitive_data_types;
+
+ DataTypeFactory();
+
+ const DataTypesDictionary & getMap() const override { return data_types; }
+
+ const DataTypesDictionary & getCaseInsensitiveMap() const override { return case_insensitive_data_types; }
+
+ String getFactoryName() const override { return "DataTypeFactory"; }
+};
+
+void registerDataTypeNumbers(DataTypeFactory & factory);
+void registerDataTypeDecimal(DataTypeFactory & factory);
+void registerDataTypeDate(DataTypeFactory & factory);
+void registerDataTypeDate32(DataTypeFactory & factory);
+void registerDataTypeDateTime(DataTypeFactory & factory);
+void registerDataTypeString(DataTypeFactory & factory);
+void registerDataTypeFixedString(DataTypeFactory & factory);
+void registerDataTypeEnum(DataTypeFactory & factory);
+void registerDataTypeArray(DataTypeFactory & factory);
+void registerDataTypeTuple(DataTypeFactory & factory);
+void registerDataTypeMap(DataTypeFactory & factory);
+void registerDataTypeNullable(DataTypeFactory & factory);
+void registerDataTypeNothing(DataTypeFactory & factory);
+void registerDataTypeUUID(DataTypeFactory & factory);
+void registerDataTypeIPv4andIPv6(DataTypeFactory & factory);
+void registerDataTypeAggregateFunction(DataTypeFactory & factory);
+void registerDataTypeNested(DataTypeFactory & factory);
+void registerDataTypeInterval(DataTypeFactory & factory);
+void registerDataTypeLowCardinality(DataTypeFactory & factory);
+void registerDataTypeDomainBool(DataTypeFactory & factory);
+void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory);
+void registerDataTypeDomainGeo(DataTypeFactory & factory);
+void registerDataTypeObject(DataTypeFactory & factory);
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeFixedString.cpp b/contrib/clickhouse/src/DataTypes/DataTypeFixedString.cpp
new file mode 100644
index 00000000000..85af59e852d
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeFixedString.cpp
@@ -0,0 +1,70 @@
+#include <Columns/ColumnFixedString.h>
+
+#include <DataTypes/DataTypeFixedString.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/Serializations/SerializationFixedString.h>
+
+#include <Parsers/IAST.h>
+#include <Parsers/ASTLiteral.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+ extern const int UNEXPECTED_AST_STRUCTURE;
+}
+
+
+std::string DataTypeFixedString::doGetName() const
+{
+ return "FixedString(" + toString(n) + ")";
+}
+
+MutableColumnPtr DataTypeFixedString::createColumn() const
+{
+ return ColumnFixedString::create(n);
+}
+
+Field DataTypeFixedString::getDefault() const
+{
+ return String();
+}
+
+bool DataTypeFixedString::equals(const IDataType & rhs) const
+{
+ return typeid(rhs) == typeid(*this) && n == static_cast<const DataTypeFixedString &>(rhs).n;
+}
+
+SerializationPtr DataTypeFixedString::doGetDefaultSerialization() const
+{
+ return std::make_shared<SerializationFixedString>(n);
+}
+
+
+static DataTypePtr create(const ASTPtr & arguments)
+{
+ if (!arguments || arguments->children.size() != 1)
+ throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+ "FixedString data type family must have exactly one argument - size in bytes");
+
+ const auto * argument = arguments->children[0]->as<ASTLiteral>();
+ if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get<UInt64>() == 0)
+ throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE,
+ "FixedString data type family must have a number (positive integer) as its argument");
+
+ return std::make_shared<DataTypeFixedString>(argument->value.get<UInt64>());
+}
+
+
+void registerDataTypeFixedString(DataTypeFactory & factory)
+{
+ factory.registerDataType("FixedString", create);
+
+ /// Compatibility alias.
+ factory.registerAlias("BINARY", "FixedString", DataTypeFactory::CaseInsensitive);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeFixedString.h b/contrib/clickhouse/src/DataTypes/DataTypeFixedString.h
new file mode 100644
index 00000000000..22ec793208d
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeFixedString.h
@@ -0,0 +1,78 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+#include <Common/PODArray_fwd.h>
+#include <Common/Exception.h>
+
+#define MAX_FIXEDSTRING_SIZE 0xFFFFFF
+#define MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS 256
+
+
+namespace DB
+{
+
+class ColumnFixedString;
+
+namespace ErrorCodes
+{
+ extern const int ARGUMENT_OUT_OF_BOUND;
+}
+
+
+class DataTypeFixedString final : public IDataType
+{
+private:
+ size_t n;
+
+public:
+ using ColumnType = ColumnFixedString;
+
+ static constexpr bool is_parametric = true;
+ static constexpr auto type_id = TypeIndex::FixedString;
+
+ explicit DataTypeFixedString(size_t n_) : n(n_)
+ {
+ if (n == 0)
+ throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "FixedString size must be positive");
+ if (n > MAX_FIXEDSTRING_SIZE)
+ throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "FixedString size is too large");
+ }
+
+ std::string doGetName() const override;
+ TypeIndex getTypeId() const override { return type_id; }
+
+ const char * getFamilyName() const override { return "FixedString"; }
+ /// Use TEXT for compatibility with MySQL to allow arbitrary bytes.
+ String getSQLCompatibleName() const override { return "TEXT"; }
+
+ size_t getN() const
+ {
+ return n;
+ }
+
+ MutableColumnPtr createColumn() const override;
+
+ Field getDefault() const override;
+
+ bool equals(const IDataType & rhs) const override;
+
+ SerializationPtr doGetDefaultSerialization() const override;
+
+ bool isParametric() const override { return true; }
+ bool haveSubtypes() const override { return false; }
+ bool isComparable() const override { return true; }
+ bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; }
+ bool isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion() const override { return true; }
+ bool haveMaximumSizeOfValue() const override { return true; }
+ size_t getSizeOfValueInMemory() const override { return n; }
+ bool isCategorial() const override { return true; }
+ bool canBeInsideNullable() const override { return true; }
+ bool canBeInsideLowCardinality() const override { return true; }
+
+ /// Makes sure that the length of a newly inserted string to `chars` is equal to getN().
+ /// If the length is less than getN() the function will add zero characters up to getN().
+ /// If the length is greater than getN() the function will throw an exception.
+ void alignStringLength(PaddedPODArray<UInt8> & chars, size_t old_size) const;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeFunction.cpp b/contrib/clickhouse/src/DataTypes/DataTypeFunction.cpp
new file mode 100644
index 00000000000..82f3d7ee515
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeFunction.cpp
@@ -0,0 +1,36 @@
+#include <DataTypes/DataTypeFunction.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/Operators.h>
+
+
+namespace DB
+{
+
+std::string DataTypeFunction::doGetName() const
+{
+ WriteBufferFromOwnString res;
+
+ res << "Function(";
+ if (argument_types.size() > 1)
+ res << "(";
+ for (size_t i = 0; i < argument_types.size(); ++i)
+ {
+ if (i > 0)
+ res << ", ";
+ const DataTypePtr & type = argument_types[i];
+ res << (type ? type->getName() : "?");
+ }
+ if (argument_types.size() > 1)
+ res << ")";
+ res << " -> ";
+ res << (return_type ? return_type->getName() : "?");
+ res << ")";
+ return res.str();
+}
+
+bool DataTypeFunction::equals(const IDataType & rhs) const
+{
+ return typeid(rhs) == typeid(*this) && getName() == rhs.getName();
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeFunction.h b/contrib/clickhouse/src/DataTypes/DataTypeFunction.h
new file mode 100644
index 00000000000..9acec676ce0
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeFunction.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include <DataTypes/IDataTypeDummy.h>
+
+
+namespace DB
+{
+
+/** Special data type, representing lambda expression.
+ */
+class DataTypeFunction final : public IDataTypeDummy
+{
+private:
+ DataTypes argument_types;
+ DataTypePtr return_type;
+
+public:
+ static constexpr bool is_parametric = true;
+ bool isParametric() const override { return true; }
+
+ /// Some types could be still unknown.
+ explicit DataTypeFunction(const DataTypes & argument_types_ = DataTypes(), const DataTypePtr & return_type_ = nullptr)
+ : argument_types(argument_types_), return_type(return_type_) {}
+
+ std::string doGetName() const override;
+ const char * getFamilyName() const override { return "Function"; }
+ String getSQLCompatibleName() const override { return "TEXT"; }
+ TypeIndex getTypeId() const override { return TypeIndex::Function; }
+
+ const DataTypes & getArgumentTypes() const
+ {
+ return argument_types;
+ }
+
+ const DataTypePtr & getReturnType() const
+ {
+ return return_type;
+ }
+
+ bool equals(const IDataType & rhs) const override;
+
+ bool supportsSparseSerialization() const override { return false; }
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeIPv4andIPv6.cpp b/contrib/clickhouse/src/DataTypes/DataTypeIPv4andIPv6.cpp
new file mode 100644
index 00000000000..4c0b45f472a
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeIPv4andIPv6.cpp
@@ -0,0 +1,17 @@
+#include <DataTypes/DataTypeIPv4andIPv6.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/Serializations/SerializationIPv4andIPv6.h>
+
+
+namespace DB
+{
+
+void registerDataTypeIPv4andIPv6(DataTypeFactory & factory)
+{
+ factory.registerSimpleDataType("IPv4", [] { return DataTypePtr(std::make_shared<DataTypeIPv4>()); });
+ factory.registerAlias("INET4", "IPv4", DataTypeFactory::CaseInsensitive);
+ factory.registerSimpleDataType("IPv6", [] { return DataTypePtr(std::make_shared<DataTypeIPv6>()); });
+ factory.registerAlias("INET6", "IPv6", DataTypeFactory::CaseInsensitive);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeIPv4andIPv6.h b/contrib/clickhouse/src/DataTypes/DataTypeIPv4andIPv6.h
new file mode 100644
index 00000000000..487ce04f67c
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeIPv4andIPv6.h
@@ -0,0 +1,94 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+#include <Columns/ColumnVector.h>
+#include <base/IPv4andIPv6.h>
+#include <DataTypes/Serializations/SerializationIPv4andIPv6.h>
+
+
+namespace DB
+{
+
+class DataTypeIPv4 : public IDataType
+{
+public:
+ static constexpr bool is_parametric = false;
+
+ using FieldType = IPv4;
+ using ColumnType = ColumnVector<IPv4>;
+ static constexpr auto type_id = TypeToTypeIndex<IPv4>;
+
+ const char * getFamilyName() const override { return TypeName<IPv4>.data(); }
+ String getSQLCompatibleName() const override { return "TEXT"; }
+
+ TypeIndex getTypeId() const override { return type_id; }
+
+ Field getDefault() const override { return IPv4{}; }
+
+ MutableColumnPtr createColumn() const override {return ColumnVector<IPv4>::create();}
+
+ bool isParametric() const override { return false; }
+ bool haveSubtypes() const override { return false; }
+
+ bool equals(const IDataType & rhs) const override { return typeid(rhs) == typeid(*this); }
+
+ bool canBeUsedInBitOperations() const override { return true; }
+ bool canBeInsideNullable() const override { return true; }
+ bool canBePromoted() const override { return false; }
+ bool shouldAlignRightInPrettyFormats() const override { return false; }
+ bool textCanContainOnlyValidUTF8() const override { return true; }
+ bool isComparable() const override { return true; }
+ bool isValueRepresentedByNumber() const override { return true; }
+ bool isValueRepresentedByInteger() const override { return true; }
+ bool isValueRepresentedByUnsignedInteger() const override { return true; }
+ bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; }
+ bool isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion() const override { return true; }
+ bool haveMaximumSizeOfValue() const override { return true; }
+ size_t getSizeOfValueInMemory() const override { return sizeof(IPv4); }
+ bool isCategorial() const override { return true; }
+ bool canBeInsideLowCardinality() const override { return true; }
+
+ SerializationPtr doGetDefaultSerialization() const override { return std::make_shared<SerializationIP<IPv4>>(); }
+};
+
+class DataTypeIPv6 : public IDataType
+{
+public:
+ static constexpr bool is_parametric = false;
+
+ using FieldType = IPv6;
+ using ColumnType = ColumnVector<IPv6>;
+ static constexpr auto type_id = TypeToTypeIndex<IPv6>;
+
+ const char * getFamilyName() const override { return TypeName<IPv6>.data(); }
+ String getSQLCompatibleName() const override { return "TEXT"; }
+
+ TypeIndex getTypeId() const override { return type_id; }
+
+ Field getDefault() const override { return IPv6{}; }
+
+ MutableColumnPtr createColumn() const override {return ColumnVector<IPv6>::create();}
+
+ bool isParametric() const override { return false; }
+ bool haveSubtypes() const override { return false; }
+
+ bool equals(const IDataType & rhs) const override { return typeid(rhs) == typeid(*this); }
+
+ bool canBeUsedInBitOperations() const override { return true; }
+ bool canBeInsideNullable() const override { return true; }
+ bool canBePromoted() const override { return false; }
+ bool shouldAlignRightInPrettyFormats() const override { return false; }
+ bool textCanContainOnlyValidUTF8() const override { return true; }
+ bool isComparable() const override { return true; }
+ bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; }
+ bool isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion() const override { return true; }
+ bool haveMaximumSizeOfValue() const override { return true; }
+ size_t getSizeOfValueInMemory() const override { return sizeof(IPv6); }
+ bool isCategorial() const override { return true; }
+ bool canBeInsideLowCardinality() const override { return true; }
+
+ SerializationPtr doGetDefaultSerialization() const override { return std::make_shared<SerializationIP<IPv6>>(); }
+};
+
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeInterval.cpp b/contrib/clickhouse/src/DataTypes/DataTypeInterval.cpp
new file mode 100644
index 00000000000..f8fe8bb3b4b
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeInterval.cpp
@@ -0,0 +1,31 @@
+#include <DataTypes/DataTypeInterval.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/Serializations/SerializationInterval.h>
+
+
+namespace DB
+{
+
+SerializationPtr DataTypeInterval::doGetDefaultSerialization() const { return std::make_shared<SerializationInterval>(kind); }
+
+bool DataTypeInterval::equals(const IDataType & rhs) const
+{
+ return typeid(rhs) == typeid(*this) && kind == static_cast<const DataTypeInterval &>(rhs).kind;
+}
+
+void registerDataTypeInterval(DataTypeFactory & factory)
+{
+ factory.registerSimpleDataType("IntervalNanosecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Nanosecond)); });
+ factory.registerSimpleDataType("IntervalMicrosecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Microsecond)); });
+ factory.registerSimpleDataType("IntervalMillisecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Millisecond)); });
+ factory.registerSimpleDataType("IntervalSecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Second)); });
+ factory.registerSimpleDataType("IntervalMinute", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Minute)); });
+ factory.registerSimpleDataType("IntervalHour", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Hour)); });
+ factory.registerSimpleDataType("IntervalDay", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Day)); });
+ factory.registerSimpleDataType("IntervalWeek", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Week)); });
+ factory.registerSimpleDataType("IntervalMonth", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Month)); });
+ factory.registerSimpleDataType("IntervalQuarter", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Quarter)); });
+ factory.registerSimpleDataType("IntervalYear", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Year)); });
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeInterval.h b/contrib/clickhouse/src/DataTypes/DataTypeInterval.h
new file mode 100644
index 00000000000..c398a54268e
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeInterval.h
@@ -0,0 +1,41 @@
+#pragma once
+
+#include <DataTypes/DataTypeNumberBase.h>
+#include <Common/IntervalKind.h>
+
+
+namespace DB
+{
+
+/** Data type to deal with INTERVAL in SQL (arithmetic on time intervals).
+ *
+ * Mostly the same as Int64.
+ * But also tagged with interval kind.
+ */
+class DataTypeInterval final : public DataTypeNumberBase<Int64>
+{
+private:
+ IntervalKind kind;
+
+public:
+ static constexpr bool is_parametric = true;
+
+ IntervalKind getKind() const { return kind; }
+
+ explicit DataTypeInterval(IntervalKind kind_) : kind(kind_) {}
+
+ SerializationPtr doGetDefaultSerialization() const override;
+ std::string doGetName() const override { return fmt::format("Interval{}", kind.toString()); }
+ const char * getFamilyName() const override { return "Interval"; }
+ String getSQLCompatibleName() const override { return "TEXT"; }
+ TypeIndex getTypeId() const override { return TypeIndex::Interval; }
+
+ bool equals(const IDataType & rhs) const override;
+
+ bool isParametric() const override { return true; }
+ bool isCategorial() const override { return false; }
+ bool canBeInsideNullable() const override { return true; }
+};
+
+}
+
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeLowCardinality.cpp b/contrib/clickhouse/src/DataTypes/DataTypeLowCardinality.cpp
new file mode 100644
index 00000000000..8293455cabc
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeLowCardinality.cpp
@@ -0,0 +1,179 @@
+#include <Columns/ColumnFixedString.h>
+#include <Columns/ColumnLowCardinality.h>
+#include <Columns/ColumnUnique.h>
+#include <Columns/ColumnsCommon.h>
+#include <Common/HashTable/HashMap.h>
+#include <Common/assert_cast.h>
+#include <Common/typeid_cast.h>
+#include <Core/Field.h>
+#include <base/TypeLists.h>
+#include <DataTypes/DataTypeDate.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeInterval.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/Serializations/SerializationLowCardinality.h>
+#include <Parsers/IAST.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+ extern const int LOGICAL_ERROR;
+ extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+DataTypeLowCardinality::DataTypeLowCardinality(DataTypePtr dictionary_type_)
+ : dictionary_type(std::move(dictionary_type_))
+{
+ auto inner_type = dictionary_type;
+ if (dictionary_type->isNullable())
+ inner_type = static_cast<const DataTypeNullable &>(*dictionary_type).getNestedType();
+
+ if (!inner_type->canBeInsideLowCardinality())
+ throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+ "DataTypeLowCardinality is supported only for numbers, strings, Date or DateTime, but got {}",
+ dictionary_type->getName());
+}
+
+namespace
+{
+ template <typename Creator>
+ struct CreateColumnVector
+ {
+ MutableColumnUniquePtr & column;
+ const IDataType & keys_type;
+ const Creator & creator;
+
+ CreateColumnVector(MutableColumnUniquePtr & column_, const IDataType & keys_type_, const Creator & creator_)
+ : column(column_), keys_type(keys_type_), creator(creator_)
+ {
+ }
+
+ template <typename T>
+ void operator()(TypeList<T>)
+ {
+ if (typeid_cast<const DataTypeNumber<T> *>(&keys_type))
+ column = creator(static_cast<ColumnVector<T> *>(nullptr));
+ }
+ };
+}
+
+template <typename Creator>
+MutableColumnUniquePtr DataTypeLowCardinality::createColumnUniqueImpl(const IDataType & keys_type,
+ const Creator & creator)
+{
+ const auto * type = &keys_type;
+ if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(&keys_type))
+ type = nullable_type->getNestedType().get();
+
+ WhichDataType which(type);
+
+ if (which.isString())
+ return creator(static_cast<ColumnString *>(nullptr));
+ else if (which.isFixedString())
+ return creator(static_cast<ColumnFixedString *>(nullptr));
+ else if (which.isDate())
+ return creator(static_cast<ColumnVector<UInt16> *>(nullptr));
+ else if (which.isDate32())
+ return creator(static_cast<ColumnVector<Int32> *>(nullptr));
+ else if (which.isDateTime())
+ return creator(static_cast<ColumnVector<UInt32> *>(nullptr));
+ else if (which.isUUID())
+ return creator(static_cast<ColumnVector<UUID> *>(nullptr));
+ else if (which.isIPv4())
+ return creator(static_cast<ColumnVector<IPv4> *>(nullptr));
+ else if (which.isIPv6())
+ return creator(static_cast<ColumnVector<IPv6> *>(nullptr));
+ else if (which.isInterval())
+ return creator(static_cast<DataTypeInterval::ColumnType *>(nullptr));
+ else if (which.isInt() || which.isUInt() || which.isFloat())
+ {
+ MutableColumnUniquePtr column;
+ TypeListUtils::forEach(TypeListIntAndFloat{}, CreateColumnVector(column, *type, creator));
+
+ if (!column)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected numeric type: {}", type->getName());
+
+ return column;
+ }
+
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected dictionary type for DataTypeLowCardinality: {}",
+ type->getName());
+}
+
+
+MutableColumnUniquePtr DataTypeLowCardinality::createColumnUnique(const IDataType & keys_type)
+{
+ auto creator = [&](auto x)
+ {
+ using ColumnType = typename std::remove_pointer<decltype(x)>::type;
+ return ColumnUnique<ColumnType>::create(keys_type);
+ };
+ return createColumnUniqueImpl(keys_type, creator);
+}
+
+MutableColumnUniquePtr DataTypeLowCardinality::createColumnUnique(const IDataType & keys_type, MutableColumnPtr && keys)
+{
+ auto creator = [&](auto x)
+ {
+ using ColumnType = typename std::remove_pointer<decltype(x)>::type;
+ return ColumnUnique<ColumnType>::create(std::move(keys), keys_type.isNullable());
+ };
+ return createColumnUniqueImpl(keys_type, creator);
+}
+
+MutableColumnPtr DataTypeLowCardinality::createColumn() const
+{
+ MutableColumnPtr indexes = DataTypeUInt8().createColumn();
+ MutableColumnPtr dictionary = createColumnUnique(*dictionary_type);
+ return ColumnLowCardinality::create(std::move(dictionary), std::move(indexes));
+}
+
+Field DataTypeLowCardinality::getDefault() const
+{
+ return dictionary_type->getDefault();
+}
+
+bool DataTypeLowCardinality::equals(const IDataType & rhs) const
+{
+ if (typeid(rhs) != typeid(*this))
+ return false;
+
+ const auto & low_cardinality_rhs= static_cast<const DataTypeLowCardinality &>(rhs);
+ return dictionary_type->equals(*low_cardinality_rhs.dictionary_type);
+}
+
+SerializationPtr DataTypeLowCardinality::doGetDefaultSerialization() const
+{
+ return std::make_shared<SerializationLowCardinality>(dictionary_type);
+}
+
+
+static DataTypePtr create(const ASTPtr & arguments)
+{
+ if (!arguments || arguments->children.size() != 1)
+ throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+ "LowCardinality data type family must have single argument - type of elements");
+
+ return std::make_shared<DataTypeLowCardinality>(DataTypeFactory::instance().get(arguments->children[0]));
+}
+
+void registerDataTypeLowCardinality(DataTypeFactory & factory)
+{
+ factory.registerDataType("LowCardinality", create);
+}
+
+
+DataTypePtr removeLowCardinality(const DataTypePtr & type)
+{
+ if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(type.get()))
+ return low_cardinality_type->getDictionaryType();
+ return type;
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeLowCardinality.h b/contrib/clickhouse/src/DataTypes/DataTypeLowCardinality.h
new file mode 100644
index 00000000000..d2a414cb073
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeLowCardinality.h
@@ -0,0 +1,95 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+#include <Columns/IColumnUnique.h>
+
+
+namespace DB
+{
+
+class DataTypeLowCardinality : public IDataType
+{
+private:
+ DataTypePtr dictionary_type;
+
+
+public:
+ explicit DataTypeLowCardinality(DataTypePtr dictionary_type_);
+
+ const DataTypePtr & getDictionaryType() const { return dictionary_type; }
+
+ String doGetName() const override
+ {
+ return "LowCardinality(" + dictionary_type->getName() + ")";
+ }
+ const char * getFamilyName() const override { return "LowCardinality"; }
+ String getSQLCompatibleName() const override { return dictionary_type->getSQLCompatibleName(); }
+
+ TypeIndex getTypeId() const override { return TypeIndex::LowCardinality; }
+
+ MutableColumnPtr createColumn() const override;
+
+ Field getDefault() const override;
+
+ bool equals(const IDataType & rhs) const override;
+
+ bool isParametric() const override { return true; }
+ bool haveSubtypes() const override { return true; }
+ bool cannotBeStoredInTables() const override { return dictionary_type->cannotBeStoredInTables(); }
+ bool shouldAlignRightInPrettyFormats() const override { return dictionary_type->shouldAlignRightInPrettyFormats(); }
+ bool textCanContainOnlyValidUTF8() const override { return dictionary_type->textCanContainOnlyValidUTF8(); }
+ bool isComparable() const override { return dictionary_type->isComparable(); }
+ bool canBeComparedWithCollation() const override { return dictionary_type->canBeComparedWithCollation(); }
+ bool canBeUsedAsVersion() const override { return dictionary_type->canBeUsedAsVersion(); }
+ bool isSummable() const override { return dictionary_type->isSummable(); }
+ bool canBeUsedInBitOperations() const override { return dictionary_type->canBeUsedInBitOperations(); }
+ bool canBeUsedInBooleanContext() const override { return dictionary_type->canBeUsedInBooleanContext(); }
+ bool isValueRepresentedByNumber() const override { return dictionary_type->isValueRepresentedByNumber(); }
+ bool isValueRepresentedByInteger() const override { return dictionary_type->isValueRepresentedByInteger(); }
+ bool isValueRepresentedByUnsignedInteger() const override { return dictionary_type->isValueRepresentedByUnsignedInteger(); }
+ bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; }
+ bool haveMaximumSizeOfValue() const override { return dictionary_type->haveMaximumSizeOfValue(); }
+ size_t getMaximumSizeOfValueInMemory() const override { return dictionary_type->getMaximumSizeOfValueInMemory(); }
+ size_t getSizeOfValueInMemory() const override { return dictionary_type->getSizeOfValueInMemory(); }
+ bool isCategorial() const override { return false; }
+ bool isNullable() const override { return false; }
+ bool onlyNull() const override { return false; }
+ bool lowCardinality() const override { return true; }
+ bool supportsSparseSerialization() const override { return false; }
+ bool isLowCardinalityNullable() const override { return dictionary_type->isNullable(); }
+
+ static MutableColumnUniquePtr createColumnUnique(const IDataType & keys_type);
+ static MutableColumnUniquePtr createColumnUnique(const IDataType & keys_type, MutableColumnPtr && keys);
+
+private:
+ SerializationPtr doGetDefaultSerialization() const override;
+
+ template <typename ... Params>
+ using SerializeFunctionPtr = void (IDataType::*)(const IColumn &, size_t, Params ...) const;
+
+ template <typename... Params, typename... Args>
+ void serializeImpl(const IColumn & column, size_t row_num, SerializeFunctionPtr<Params...> func, Args &&... args) const;
+
+ template <typename ... Params>
+ using DeserializeFunctionPtr = void (IDataType::*)(IColumn &, Params ...) const;
+
+ template <typename ... Params, typename... Args>
+ void deserializeImpl(IColumn & column, DeserializeFunctionPtr<Params...> func, Args &&... args) const;
+
+ template <typename Creator>
+ static MutableColumnUniquePtr createColumnUniqueImpl(const IDataType & keys_type, const Creator & creator);
+};
+
+/// Returns dictionary type if type is DataTypeLowCardinality, type otherwise.
+DataTypePtr removeLowCardinality(const DataTypePtr & type);
+
+/// Remove LowCardinality recursively from all nested types.
+DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type);
+
+/// Remove LowCardinality recursively from all nested columns.
+ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column);
+
+/// Convert column of type from_type to type to_type by converting nested LowCardinality columns.
+ColumnPtr recursiveLowCardinalityTypeConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type);
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeLowCardinalityHelpers.cpp b/contrib/clickhouse/src/DataTypes/DataTypeLowCardinalityHelpers.cpp
new file mode 100644
index 00000000000..98eb76267a4
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeLowCardinalityHelpers.cpp
@@ -0,0 +1,208 @@
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnConst.h>
+#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnMap.h>
+#include <Columns/ColumnLowCardinality.h>
+#include <Columns/ColumnFunction.h>
+
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeMap.h>
+
+#include <Common/assert_cast.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int ILLEGAL_COLUMN;
+ extern const int TYPE_MISMATCH;
+}
+
+DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type)
+{
+ if (!type)
+ return type;
+
+ if (const auto * array_type = typeid_cast<const DataTypeArray *>(type.get()))
+ return std::make_shared<DataTypeArray>(recursiveRemoveLowCardinality(array_type->getNestedType()));
+
+ if (const auto * tuple_type = typeid_cast<const DataTypeTuple *>(type.get()))
+ {
+ DataTypes elements = tuple_type->getElements();
+ for (auto & element : elements)
+ element = recursiveRemoveLowCardinality(element);
+
+ if (tuple_type->haveExplicitNames())
+ return std::make_shared<DataTypeTuple>(elements, tuple_type->getElementNames());
+ else
+ return std::make_shared<DataTypeTuple>(elements);
+ }
+
+ if (const auto * map_type = typeid_cast<const DataTypeMap *>(type.get()))
+ {
+ return std::make_shared<DataTypeMap>(recursiveRemoveLowCardinality(map_type->getKeyType()), recursiveRemoveLowCardinality(map_type->getValueType()));
+ }
+
+ if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(type.get()))
+ return low_cardinality_type->getDictionaryType();
+
+ return type;
+}
+
+ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column)
+{
+ if (!column)
+ return column;
+
+ if (const auto * column_array = typeid_cast<const ColumnArray *>(column.get()))
+ {
+ const auto & data = column_array->getDataPtr();
+ auto data_no_lc = recursiveRemoveLowCardinality(data);
+ if (data.get() == data_no_lc.get())
+ return column;
+
+ return ColumnArray::create(data_no_lc, column_array->getOffsetsPtr());
+ }
+
+ if (const auto * column_const = typeid_cast<const ColumnConst *>(column.get()))
+ {
+ const auto & nested = column_const->getDataColumnPtr();
+ auto nested_no_lc = recursiveRemoveLowCardinality(nested);
+ if (nested.get() == nested_no_lc.get())
+ return column;
+
+ return ColumnConst::create(nested_no_lc, column_const->size());
+ }
+
+ if (const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get()))
+ {
+ auto columns = column_tuple->getColumns();
+ for (auto & element : columns)
+ element = recursiveRemoveLowCardinality(element);
+ return ColumnTuple::create(columns);
+ }
+
+ if (const auto * column_map = typeid_cast<const ColumnMap *>(column.get()))
+ {
+ const auto & nested = column_map->getNestedColumnPtr();
+ auto nested_no_lc = recursiveRemoveLowCardinality(nested);
+ if (nested.get() == nested_no_lc.get())
+ return column;
+
+ return ColumnMap::create(nested_no_lc);
+ }
+
+ /// Special case when column is a lazy argument of short circuit function.
+ /// We should call recursiveRemoveLowCardinality on the result column
+ /// when function will be executed.
+ if (const auto * column_function = typeid_cast<const ColumnFunction *>(column.get()))
+ {
+ if (!column_function->isShortCircuitArgument())
+ return column;
+
+ return column_function->recursivelyConvertResultToFullColumnIfLowCardinality();
+ }
+
+ if (const auto * column_low_cardinality = typeid_cast<const ColumnLowCardinality *>(column.get()))
+ return column_low_cardinality->convertToFullColumn();
+
+ return column;
+}
+
+ColumnPtr recursiveLowCardinalityTypeConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type)
+{
+ if (!column)
+ return column;
+
+ if (from_type->equals(*to_type))
+ return column;
+
+ /// We can allow insert enum column if it's numeric type is the same as the column's type in table.
+ if (WhichDataType(to_type).isEnum() && from_type->getTypeId() == to_type->getTypeId())
+ return column;
+
+ if (const auto * column_const = typeid_cast<const ColumnConst *>(column.get()))
+ {
+ const auto & nested = column_const->getDataColumnPtr();
+ auto nested_no_lc = recursiveLowCardinalityTypeConversion(nested, from_type, to_type);
+ if (nested.get() == nested_no_lc.get())
+ return column;
+
+ return ColumnConst::create(nested_no_lc, column_const->size());
+ }
+
+ if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(from_type.get()))
+ {
+ if (to_type->equals(*low_cardinality_type->getDictionaryType()))
+ return column->convertToFullColumnIfLowCardinality();
+ }
+
+ if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(to_type.get()))
+ {
+ if (from_type->equals(*low_cardinality_type->getDictionaryType()))
+ {
+ auto col = low_cardinality_type->createColumn();
+ assert_cast<ColumnLowCardinality &>(*col).insertRangeFromFullColumn(*column, 0, column->size());
+ return col;
+ }
+ }
+
+ if (const auto * from_array_type = typeid_cast<const DataTypeArray *>(from_type.get()))
+ {
+ if (const auto * to_array_type = typeid_cast<const DataTypeArray *>(to_type.get()))
+ {
+ const auto * column_array = typeid_cast<const ColumnArray *>(column.get());
+ if (!column_array)
+ throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected column {} for type {}",
+ column->getName(), from_type->getName());
+
+ const auto & nested_from = from_array_type->getNestedType();
+ const auto & nested_to = to_array_type->getNestedType();
+
+ return ColumnArray::create(
+ recursiveLowCardinalityTypeConversion(column_array->getDataPtr(), nested_from, nested_to),
+ column_array->getOffsetsPtr());
+ }
+ }
+
+ if (const auto * from_tuple_type = typeid_cast<const DataTypeTuple *>(from_type.get()))
+ {
+ if (const auto * to_tuple_type = typeid_cast<const DataTypeTuple *>(to_type.get()))
+ {
+ const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get());
+ if (!column_tuple)
+ throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected column {} for type {}",
+ column->getName(), from_type->getName());
+
+ auto columns = column_tuple->getColumns();
+ const auto & from_elements = from_tuple_type->getElements();
+ const auto & to_elements = to_tuple_type->getElements();
+
+ bool has_converted = false;
+
+ for (size_t i = 0; i < columns.size(); ++i)
+ {
+ auto & element = columns[i];
+ auto element_no_lc = recursiveLowCardinalityTypeConversion(element, from_elements.at(i), to_elements.at(i));
+ if (element.get() != element_no_lc.get())
+ {
+ element = element_no_lc;
+ has_converted = true;
+ }
+ }
+
+ if (!has_converted)
+ return column;
+
+ return ColumnTuple::create(columns);
+ }
+ }
+
+ throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert: {} to {}", from_type->getName(), to_type->getName());
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeMap.cpp b/contrib/clickhouse/src/DataTypes/DataTypeMap.cpp
new file mode 100644
index 00000000000..90561857fad
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeMap.cpp
@@ -0,0 +1,158 @@
+#include <base/map.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <Columns/ColumnMap.h>
+#include <Core/Field.h>
+#include <DataTypes/DataTypeMap.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/Serializations/SerializationMap.h>
+#include <Parsers/IAST.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/Operators.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+ extern const int BAD_ARGUMENTS;
+}
+
+DataTypeMap::DataTypeMap(const DataTypePtr & nested_)
+ : nested(nested_)
+{
+ const auto * type_array = typeid_cast<const DataTypeArray *>(nested.get());
+ if (!type_array)
+ throw Exception(ErrorCodes::BAD_ARGUMENTS,
+ "Expected Array(Tuple(key, value)) type, got {}", nested->getName());
+
+ const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type_array->getNestedType().get());
+ if (!type_tuple)
+ throw Exception(ErrorCodes::BAD_ARGUMENTS,
+ "Expected Array(Tuple(key, value)) type, got {}", nested->getName());
+
+ if (type_tuple->getElements().size() != 2)
+ throw Exception(ErrorCodes::BAD_ARGUMENTS,
+ "Expected Array(Tuple(key, value)) type, got {}", nested->getName());
+
+ key_type = type_tuple->getElement(0);
+ value_type = type_tuple->getElement(1);
+ assertKeyType();
+}
+
+DataTypeMap::DataTypeMap(const DataTypes & elems_)
+{
+ assert(elems_.size() == 2);
+ key_type = elems_[0];
+ value_type = elems_[1];
+
+ assertKeyType();
+
+ nested = std::make_shared<DataTypeArray>(
+ std::make_shared<DataTypeTuple>(DataTypes{key_type, value_type}, Names{"keys", "values"}));
+}
+
+DataTypeMap::DataTypeMap(const DataTypePtr & key_type_, const DataTypePtr & value_type_)
+ : key_type(key_type_), value_type(value_type_)
+ , nested(std::make_shared<DataTypeArray>(
+ std::make_shared<DataTypeTuple>(DataTypes{key_type_, value_type_}, Names{"keys", "values"})))
+{
+ assertKeyType();
+}
+
+void DataTypeMap::assertKeyType() const
+{
+ if (!checkKeyType(key_type))
+ throw Exception(ErrorCodes::BAD_ARGUMENTS,
+ "Type of Map key must be a type, that can be represented by integer "
+ "or String or FixedString (possibly LowCardinality) or UUID or IPv6,"
+ " but {} given", key_type->getName());
+}
+
+
+std::string DataTypeMap::doGetName() const
+{
+ WriteBufferFromOwnString s;
+ s << "Map(" << key_type->getName() << ", " << value_type->getName() << ")";
+
+ return s.str();
+}
+
+MutableColumnPtr DataTypeMap::createColumn() const
+{
+ return ColumnMap::create(nested->createColumn());
+}
+
+Field DataTypeMap::getDefault() const
+{
+ return Map();
+}
+
+SerializationPtr DataTypeMap::doGetDefaultSerialization() const
+{
+ return std::make_shared<SerializationMap>(
+ key_type->getDefaultSerialization(),
+ value_type->getDefaultSerialization(),
+ nested->getDefaultSerialization());
+}
+
+bool DataTypeMap::equals(const IDataType & rhs) const
+{
+ if (typeid(rhs) != typeid(*this))
+ return false;
+
+ const DataTypeMap & rhs_map = static_cast<const DataTypeMap &>(rhs);
+ return nested->equals(*rhs_map.nested);
+}
+
+bool DataTypeMap::checkKeyType(DataTypePtr key_type)
+{
+ if (key_type->getTypeId() == TypeIndex::LowCardinality)
+ {
+ const auto & low_cardinality_data_type = assert_cast<const DataTypeLowCardinality &>(*key_type);
+ if (!isStringOrFixedString(*(low_cardinality_data_type.getDictionaryType())))
+ return false;
+ }
+ else if (!key_type->isValueRepresentedByInteger()
+ && !isStringOrFixedString(*key_type)
+ && !WhichDataType(key_type).isNothing()
+ && !WhichDataType(key_type).isIPv6()
+ && !WhichDataType(key_type).isUUID())
+ {
+ return false;
+ }
+
+ return true;
+}
+
+DataTypePtr DataTypeMap::getNestedTypeWithUnnamedTuple() const
+{
+ const auto & from_array = assert_cast<const DataTypeArray &>(*nested);
+ const auto & from_tuple = assert_cast<const DataTypeTuple &>(*from_array.getNestedType());
+ return std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(from_tuple.getElements()));
+}
+
+static DataTypePtr create(const ASTPtr & arguments)
+{
+ if (!arguments || arguments->children.size() != 2)
+ throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Map data type family must have two arguments: key and value types");
+
+ DataTypes nested_types;
+ nested_types.reserve(arguments->children.size());
+
+ for (const ASTPtr & child : arguments->children)
+ nested_types.emplace_back(DataTypeFactory::instance().get(child));
+
+ return std::make_shared<DataTypeMap>(nested_types);
+}
+
+
+void registerDataTypeMap(DataTypeFactory & factory)
+{
+ factory.registerDataType("Map", create);
+}
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeMap.h b/contrib/clickhouse/src/DataTypes/DataTypeMap.h
new file mode 100644
index 00000000000..294c5d7ac77
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeMap.h
@@ -0,0 +1,62 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+
+
+namespace DB
+{
+
+/** Map data type.
+ * Map is implemented as two arrays of keys and values.
+ * Serialization of type 'Map(K, V)' is similar to serialization.
+ * of 'Array(Tuple(keys K, values V))' or in other words of 'Nested(keys K, valuev V)'.
+ */
+class DataTypeMap final : public IDataType
+{
+private:
+ DataTypePtr key_type;
+ DataTypePtr value_type;
+
+ /// 'nested' is an Array(Tuple(key_type, value_type))
+ DataTypePtr nested;
+
+public:
+ static constexpr bool is_parametric = true;
+
+ explicit DataTypeMap(const DataTypePtr & nested_);
+ explicit DataTypeMap(const DataTypes & elems);
+ DataTypeMap(const DataTypePtr & key_type_, const DataTypePtr & value_type_);
+
+ TypeIndex getTypeId() const override { return TypeIndex::Map; }
+ std::string doGetName() const override;
+ const char * getFamilyName() const override { return "Map"; }
+ String getSQLCompatibleName() const override { return "JSON"; }
+
+ bool canBeInsideNullable() const override { return false; }
+
+ MutableColumnPtr createColumn() const override;
+
+ Field getDefault() const override;
+
+ bool equals(const IDataType & rhs) const override;
+ bool isComparable() const override { return key_type->isComparable() && value_type->isComparable(); }
+ bool isParametric() const override { return true; }
+ bool haveSubtypes() const override { return true; }
+ bool hasDynamicSubcolumns() const override { return nested->hasDynamicSubcolumns(); }
+
+ const DataTypePtr & getKeyType() const { return key_type; }
+ const DataTypePtr & getValueType() const { return value_type; }
+ DataTypes getKeyValueTypes() const { return {key_type, value_type}; }
+ const DataTypePtr & getNestedType() const { return nested; }
+ DataTypePtr getNestedTypeWithUnnamedTuple() const;
+
+ SerializationPtr doGetDefaultSerialization() const override;
+
+ static bool checkKeyType(DataTypePtr key_type);
+
+private:
+ void assertKeyType() const;
+};
+
+}
+
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeNested.cpp b/contrib/clickhouse/src/DataTypes/DataTypeNested.cpp
new file mode 100644
index 00000000000..a4b7442393c
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeNested.cpp
@@ -0,0 +1,75 @@
+#include <DataTypes/DataTypeNested.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <IO/Operators.h>
+#include <Common/quoteString.h>
+#include <Parsers/ASTNameTypePair.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int EMPTY_DATA_PASSED;
+ extern const int BAD_ARGUMENTS;
+}
+
+String DataTypeNestedCustomName::getName() const
+{
+ WriteBufferFromOwnString s;
+ s << "Nested(";
+ for (size_t i = 0; i < elems.size(); ++i)
+ {
+ if (i != 0)
+ s << ", ";
+
+ s << backQuoteIfNeed(names[i]) << ' ';
+ s << elems[i]->getName();
+ }
+ s << ")";
+
+ return s.str();
+}
+
+static std::pair<DataTypePtr, DataTypeCustomDescPtr> create(const ASTPtr & arguments)
+{
+ if (!arguments || arguments->children.empty())
+ throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Nested cannot be empty");
+
+ DataTypes nested_types;
+ Strings nested_names;
+ nested_types.reserve(arguments->children.size());
+ nested_names.reserve(arguments->children.size());
+
+ for (const auto & child : arguments->children)
+ {
+ const auto * name_type = child->as<ASTNameTypePair>();
+ if (!name_type)
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "Data type Nested accepts only pairs with name and type");
+
+ auto nested_type = DataTypeFactory::instance().get(name_type->type);
+ nested_types.push_back(std::move(nested_type));
+ nested_names.push_back(name_type->name);
+ }
+
+ auto data_type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(nested_types, nested_names));
+ auto custom_name = std::make_unique<DataTypeNestedCustomName>(nested_types, nested_names);
+
+ return std::make_pair(std::move(data_type), std::make_unique<DataTypeCustomDesc>(std::move(custom_name)));
+}
+
+void registerDataTypeNested(DataTypeFactory & factory)
+{
+ return factory.registerDataTypeCustom("Nested", create);
+}
+
+DataTypePtr createNested(const DataTypes & types, const Names & names)
+{
+ auto custom_desc = std::make_unique<DataTypeCustomDesc>(
+ std::make_unique<DataTypeNestedCustomName>(types, names));
+
+ return DataTypeFactory::instance().getCustom(std::move(custom_desc));
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeNested.h b/contrib/clickhouse/src/DataTypes/DataTypeNested.h
new file mode 100644
index 00000000000..1ad06477a6e
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeNested.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+
+
+namespace DB
+{
+
+class DataTypeNestedCustomName final : public IDataTypeCustomName
+{
+private:
+ DataTypes elems;
+ Strings names;
+
+public:
+ DataTypeNestedCustomName(const DataTypes & elems_, const Strings & names_)
+ : elems(elems_), names(names_)
+ {
+ }
+
+ String getName() const override;
+};
+
+DataTypePtr createNested(const DataTypes & types, const Names & names);
+
+template <typename DataType>
+inline bool isNested(const DataType & data_type)
+{
+ return typeid_cast<const DataTypeNestedCustomName *>(data_type->getCustomName()) != nullptr;
+}
+
+}
+
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeNothing.cpp b/contrib/clickhouse/src/DataTypes/DataTypeNothing.cpp
new file mode 100644
index 00000000000..c2b552035a0
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeNothing.cpp
@@ -0,0 +1,31 @@
+#include <DataTypes/DataTypeNothing.h>
+#include <DataTypes/Serializations/SerializationNothing.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <Columns/ColumnNothing.h>
+
+
+namespace DB
+{
+
+MutableColumnPtr DataTypeNothing::createColumn() const
+{
+ return ColumnNothing::create(0);
+}
+
+bool DataTypeNothing::equals(const IDataType & rhs) const
+{
+ return typeid(rhs) == typeid(*this);
+}
+
+SerializationPtr DataTypeNothing::doGetDefaultSerialization() const
+{
+ return std::make_shared<SerializationNothing>();
+}
+
+
+void registerDataTypeNothing(DataTypeFactory & factory)
+{
+ factory.registerSimpleDataType("Nothing", [] { return DataTypePtr(std::make_shared<DataTypeNothing>()); });
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeNothing.h b/contrib/clickhouse/src/DataTypes/DataTypeNothing.h
new file mode 100644
index 00000000000..c3a7e2d09f0
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeNothing.h
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <DataTypes/IDataTypeDummy.h>
+
+
+namespace DB
+{
+
+/** Data type that cannot have any values.
+ * Used to represent NULL of unknown type as Nullable(Nothing),
+ * and possibly for empty array of unknown type as Array(Nothing).
+ */
+class DataTypeNothing final : public IDataTypeDummy
+{
+public:
+ static constexpr bool is_parametric = false;
+
+ const char * getFamilyName() const override { return "Nothing"; }
+ String getSQLCompatibleName() const override { return "TEXT"; }
+
+ TypeIndex getTypeId() const override { return TypeIndex::Nothing; }
+
+ MutableColumnPtr createColumn() const override;
+
+ bool equals(const IDataType & rhs) const override;
+
+ bool isParametric() const override { return false; }
+ bool textCanContainOnlyValidUTF8() const override { return true; }
+ bool haveMaximumSizeOfValue() const override { return true; }
+ size_t getSizeOfValueInMemory() const override { return 0; }
+ bool canBeInsideNullable() const override { return true; }
+
+ SerializationPtr doGetDefaultSerialization() const override;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeNullable.cpp b/contrib/clickhouse/src/DataTypes/DataTypeNullable.cpp
new file mode 100644
index 00000000000..41a9a1de543
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeNullable.cpp
@@ -0,0 +1,118 @@
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeNothing.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/Serializations/SerializationNullable.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <Columns/ColumnNullable.h>
+#include <Core/Field.h>
+#include <Parsers/IAST.h>
+#include <Common/typeid_cast.h>
+#include <Common/assert_cast.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+ extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+
+DataTypeNullable::DataTypeNullable(const DataTypePtr & nested_data_type_)
+ : nested_data_type{nested_data_type_}
+{
+ if (!nested_data_type->canBeInsideNullable())
+ throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Nested type {} cannot be inside Nullable type", nested_data_type->getName());
+}
+
+
+bool DataTypeNullable::onlyNull() const
+{
+ return typeid_cast<const DataTypeNothing *>(nested_data_type.get());
+}
+
+
+MutableColumnPtr DataTypeNullable::createColumn() const
+{
+ return ColumnNullable::create(nested_data_type->createColumn(), ColumnUInt8::create());
+}
+
+Field DataTypeNullable::getDefault() const
+{
+ return Null();
+}
+
+size_t DataTypeNullable::getSizeOfValueInMemory() const
+{
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Value of type {} in memory is not of fixed size.", getName());
+}
+
+
+bool DataTypeNullable::equals(const IDataType & rhs) const
+{
+ return rhs.isNullable() && nested_data_type->equals(*static_cast<const DataTypeNullable &>(rhs).nested_data_type);
+}
+
+SerializationPtr DataTypeNullable::doGetDefaultSerialization() const
+{
+ return std::make_shared<SerializationNullable>(nested_data_type->getDefaultSerialization());
+}
+
+
+static DataTypePtr create(const ASTPtr & arguments)
+{
+ if (!arguments || arguments->children.size() != 1)
+ throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Nullable data type family must have exactly one argument - nested type");
+
+ DataTypePtr nested_type = DataTypeFactory::instance().get(arguments->children[0]);
+
+ return std::make_shared<DataTypeNullable>(nested_type);
+}
+
+
+void registerDataTypeNullable(DataTypeFactory & factory)
+{
+ factory.registerDataType("Nullable", create);
+}
+
+
+DataTypePtr makeNullable(const DataTypePtr & type)
+{
+ if (type->isNullable())
+ return type;
+ return std::make_shared<DataTypeNullable>(type);
+}
+
+DataTypePtr makeNullableSafe(const DataTypePtr & type)
+{
+ if (type->canBeInsideNullable())
+ return makeNullable(type);
+ return type;
+}
+
+DataTypePtr removeNullable(const DataTypePtr & type)
+{
+ if (type->isNullable())
+ return static_cast<const DataTypeNullable &>(*type).getNestedType();
+ return type;
+}
+
+DataTypePtr makeNullableOrLowCardinalityNullable(const DataTypePtr & type)
+{
+ if (isNullableOrLowCardinalityNullable(type))
+ return type;
+
+ if (type->lowCardinality())
+ {
+ const auto & dictionary_type = assert_cast<const DataTypeLowCardinality &>(*type).getDictionaryType();
+ return std::make_shared<DataTypeLowCardinality>(makeNullable(dictionary_type));
+ }
+
+ return std::make_shared<DataTypeNullable>(type);
+}
+
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeNullable.h b/contrib/clickhouse/src/DataTypes/DataTypeNullable.h
new file mode 100644
index 00000000000..e3165414c07
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeNullable.h
@@ -0,0 +1,59 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+
+namespace DB
+{
+
+/// A nullable data type is an ordinary data type provided with a tag
+/// indicating that it also contains the NULL value. The following class
+/// embodies this concept.
+class DataTypeNullable final : public IDataType
+{
+public:
+ static constexpr bool is_parametric = true;
+
+ explicit DataTypeNullable(const DataTypePtr & nested_data_type_);
+ std::string doGetName() const override { return "Nullable(" + nested_data_type->getName() + ")"; }
+ const char * getFamilyName() const override { return "Nullable"; }
+ String getSQLCompatibleName() const override { return nested_data_type->getSQLCompatibleName(); }
+ TypeIndex getTypeId() const override { return TypeIndex::Nullable; }
+
+ MutableColumnPtr createColumn() const override;
+
+ Field getDefault() const override;
+
+ bool equals(const IDataType & rhs) const override;
+
+ bool isParametric() const override { return true; }
+ bool haveSubtypes() const override { return true; }
+ bool cannotBeStoredInTables() const override { return nested_data_type->cannotBeStoredInTables(); }
+ bool shouldAlignRightInPrettyFormats() const override { return nested_data_type->shouldAlignRightInPrettyFormats(); }
+ bool textCanContainOnlyValidUTF8() const override { return nested_data_type->textCanContainOnlyValidUTF8(); }
+ bool isComparable() const override { return nested_data_type->isComparable(); }
+ bool canBeComparedWithCollation() const override { return nested_data_type->canBeComparedWithCollation(); }
+ bool canBeUsedAsVersion() const override { return false; }
+ bool isSummable() const override { return nested_data_type->isSummable(); }
+ bool canBeUsedInBooleanContext() const override { return nested_data_type->canBeUsedInBooleanContext() || onlyNull(); }
+ bool haveMaximumSizeOfValue() const override { return nested_data_type->haveMaximumSizeOfValue(); }
+ size_t getMaximumSizeOfValueInMemory() const override { return 1 + nested_data_type->getMaximumSizeOfValueInMemory(); }
+ bool isNullable() const override { return true; }
+ size_t getSizeOfValueInMemory() const override;
+ bool onlyNull() const override;
+ bool canBeInsideLowCardinality() const override { return nested_data_type->canBeInsideLowCardinality(); }
+ bool canBePromoted() const override { return nested_data_type->canBePromoted(); }
+
+ const DataTypePtr & getNestedType() const { return nested_data_type; }
+private:
+ SerializationPtr doGetDefaultSerialization() const override;
+
+ DataTypePtr nested_data_type;
+};
+
+
+DataTypePtr makeNullable(const DataTypePtr & type);
+DataTypePtr makeNullableSafe(const DataTypePtr & type);
+DataTypePtr removeNullable(const DataTypePtr & type);
+DataTypePtr makeNullableOrLowCardinalityNullable(const DataTypePtr & type);
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeNumberBase.cpp b/contrib/clickhouse/src/DataTypes/DataTypeNumberBase.cpp
new file mode 100644
index 00000000000..4cefc4945c6
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeNumberBase.cpp
@@ -0,0 +1,76 @@
+#include <type_traits>
+#include <DataTypes/DataTypeNumberBase.h>
+#include <Columns/ColumnVector.h>
+
+
+namespace DB
+{
+
+template <typename T>
+Field DataTypeNumberBase<T>::getDefault() const
+{
+ return NearestFieldType<FieldType>();
+}
+template <typename T>
+String DataTypeNumberBase<T>::getSQLCompatibleName() const
+{
+ if constexpr (std::is_same_v<T, Int8>)
+ return "TINYINT";
+ else if constexpr (std::is_same_v<T, Int16>)
+ return "SMALLINT";
+ else if constexpr (std::is_same_v<T, Int32>)
+ return "INTEGER";
+ else if constexpr (std::is_same_v<T, Int64>)
+ return "BIGINT";
+ else if constexpr (std::is_same_v<T, UInt8>)
+ return "TINYINT UNSIGNED";
+ else if constexpr (std::is_same_v<T, UInt16>)
+ return "SMALLINT UNSIGNED";
+ else if constexpr (std::is_same_v<T, UInt32>)
+ return "INTEGER UNSIGNED";
+ else if constexpr (std::is_same_v<T, UInt64>)
+ return "BIGINT UNSIGNED";
+ else if constexpr (std::is_same_v<T, Float32>)
+ return "FLOAT";
+ else if constexpr (std::is_same_v<T, Float64>)
+ return "DOUBLE";
+ /// Unsupported types are converted to TEXT
+ else
+ return "TEXT";
+}
+
+template <typename T>
+MutableColumnPtr DataTypeNumberBase<T>::createColumn() const
+{
+ return ColumnVector<T>::create();
+}
+
+template <typename T>
+bool DataTypeNumberBase<T>::isValueRepresentedByInteger() const
+{
+ return is_integer<T>;
+}
+
+template <typename T>
+bool DataTypeNumberBase<T>::isValueRepresentedByUnsignedInteger() const
+{
+ return is_integer<T> && is_unsigned_v<T>;
+}
+
+/// Explicit template instantiations - to avoid code bloat in headers.
+template class DataTypeNumberBase<UInt8>;
+template class DataTypeNumberBase<UInt16>;
+template class DataTypeNumberBase<UInt32>;
+template class DataTypeNumberBase<UInt64>;
+template class DataTypeNumberBase<UInt128>;
+template class DataTypeNumberBase<UInt256>;
+template class DataTypeNumberBase<Int8>;
+template class DataTypeNumberBase<Int16>;
+template class DataTypeNumberBase<Int32>;
+template class DataTypeNumberBase<Int64>;
+template class DataTypeNumberBase<Int128>;
+template class DataTypeNumberBase<Int256>;
+template class DataTypeNumberBase<Float32>;
+template class DataTypeNumberBase<Float64>;
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeNumberBase.h b/contrib/clickhouse/src/DataTypes/DataTypeNumberBase.h
new file mode 100644
index 00000000000..d902c62505e
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeNumberBase.h
@@ -0,0 +1,75 @@
+#pragma once
+
+#include <base/TypeName.h>
+#include <Core/TypeId.h>
+#include <DataTypes/IDataType.h>
+#include <DataTypes/Serializations/SerializationNumber.h>
+
+
+namespace DB
+{
+
+/** Implements part of the IDataType interface, common to all numbers and for Date and DateTime.
+ */
+template <typename T>
+class DataTypeNumberBase : public IDataType
+{
+ static_assert(is_arithmetic_v<T>);
+
+public:
+ static constexpr bool is_parametric = false;
+ static constexpr auto family_name = TypeName<T>;
+ static constexpr auto type_id = TypeToTypeIndex<T>;
+
+ using FieldType = T;
+ using ColumnType = ColumnVector<T>;
+
+ const char * getFamilyName() const override { return TypeName<T>.data(); }
+ String getSQLCompatibleName() const override;
+ TypeIndex getTypeId() const override { return TypeToTypeIndex<T>; }
+
+ Field getDefault() const override;
+
+ MutableColumnPtr createColumn() const override;
+
+ bool isParametric() const override { return false; }
+ bool haveSubtypes() const override { return false; }
+
+ bool shouldAlignRightInPrettyFormats() const override
+ {
+ /// Just a number, without customizations. Counterexample: IPv4.
+ return !custom_serialization;
+ }
+
+ bool textCanContainOnlyValidUTF8() const override { return true; }
+ bool isComparable() const override { return true; }
+ bool isValueRepresentedByNumber() const override { return true; }
+ bool isValueRepresentedByInteger() const override;
+ bool isValueRepresentedByUnsignedInteger() const override;
+ bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; }
+ bool haveMaximumSizeOfValue() const override { return true; }
+ size_t getSizeOfValueInMemory() const override { return sizeof(T); }
+ bool isCategorial() const override { return isValueRepresentedByInteger(); }
+ bool canBeInsideLowCardinality() const override { return true; }
+
+ SerializationPtr doGetDefaultSerialization() const override { return std::make_shared<SerializationNumber<T>>(); }
+};
+
+/// Prevent implicit template instantiation of DataTypeNumberBase for common numeric types
+
+extern template class DataTypeNumberBase<UInt8>;
+extern template class DataTypeNumberBase<UInt16>;
+extern template class DataTypeNumberBase<UInt32>;
+extern template class DataTypeNumberBase<UInt64>;
+extern template class DataTypeNumberBase<UInt128>;
+extern template class DataTypeNumberBase<UInt256>;
+extern template class DataTypeNumberBase<Int16>;
+extern template class DataTypeNumberBase<Int8>;
+extern template class DataTypeNumberBase<Int32>;
+extern template class DataTypeNumberBase<Int64>;
+extern template class DataTypeNumberBase<Int128>;
+extern template class DataTypeNumberBase<Int256>;
+extern template class DataTypeNumberBase<Float32>;
+extern template class DataTypeNumberBase<Float64>;
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeObject.cpp b/contrib/clickhouse/src/DataTypes/DataTypeObject.cpp
new file mode 100644
index 00000000000..720436d0e0d
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeObject.cpp
@@ -0,0 +1,82 @@
+#include <DataTypes/DataTypeObject.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/Serializations/SerializationObject.h>
+
+#include <Parsers/IAST.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTFunction.h>
+#include <IO/Operators.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+ extern const int UNEXPECTED_AST_STRUCTURE;
+}
+
+DataTypeObject::DataTypeObject(const String & schema_format_, bool is_nullable_)
+ : schema_format(Poco::toLower(schema_format_))
+ , is_nullable(is_nullable_)
+{
+}
+
+bool DataTypeObject::equals(const IDataType & rhs) const
+{
+ if (const auto * object = typeid_cast<const DataTypeObject *>(&rhs))
+ return schema_format == object->schema_format && is_nullable == object->is_nullable;
+ return false;
+}
+
+SerializationPtr DataTypeObject::doGetDefaultSerialization() const
+{
+ return getObjectSerialization(schema_format);
+}
+
+String DataTypeObject::doGetName() const
+{
+ WriteBufferFromOwnString out;
+ if (is_nullable)
+ out << "Object(Nullable(" << quote << schema_format << "))";
+ else
+ out << "Object(" << quote << schema_format << ")";
+ return out.str();
+}
+
+static DataTypePtr create(const ASTPtr & arguments)
+{
+ if (!arguments || arguments->children.size() != 1)
+ throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+ "Object data type family must have one argument - name of schema format");
+
+ ASTPtr schema_argument = arguments->children[0];
+ bool is_nullable = false;
+
+ if (const auto * func = schema_argument->as<ASTFunction>())
+ {
+ if (func->name != "Nullable" || func->arguments->children.size() != 1)
+ throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE,
+ "Expected 'Nullable(<schema_name>)' as parameter for type Object (function: {})", func->name);
+
+ schema_argument = func->arguments->children[0];
+ is_nullable = true;
+ }
+
+ const auto * literal = schema_argument->as<ASTLiteral>();
+ if (!literal || literal->value.getType() != Field::Types::String)
+ throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE,
+ "Object data type family must have a const string as its schema name parameter");
+
+ return std::make_shared<DataTypeObject>(literal->value.get<const String &>(), is_nullable);
+}
+
+void registerDataTypeObject(DataTypeFactory & factory)
+{
+ factory.registerDataType("Object", create);
+ factory.registerSimpleDataType("JSON",
+ [] { return std::make_shared<DataTypeObject>("JSON", false); },
+ DataTypeFactory::CaseInsensitive);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeObject.h b/contrib/clickhouse/src/DataTypes/DataTypeObject.h
new file mode 100644
index 00000000000..2e1e5398f7e
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeObject.h
@@ -0,0 +1,49 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+#include <Core/Field.h>
+#include <Columns/ColumnObject.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int NOT_IMPLEMENTED;
+}
+
+class DataTypeObject : public IDataType
+{
+private:
+ String schema_format;
+ bool is_nullable;
+
+public:
+ DataTypeObject(const String & schema_format_, bool is_nullable_);
+
+ const char * getFamilyName() const override { return "Object"; }
+ String getSQLCompatibleName() const override { return "JSON"; }
+ String doGetName() const override;
+ TypeIndex getTypeId() const override { return TypeIndex::Object; }
+
+ MutableColumnPtr createColumn() const override { return ColumnObject::create(is_nullable); }
+
+ Field getDefault() const override
+ {
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getDefault() is not implemented for data type {}", getName());
+ }
+
+ bool haveSubtypes() const override { return false; }
+ bool equals(const IDataType & rhs) const override;
+ bool isParametric() const override { return true; }
+ bool hasDynamicSubcolumns() const override { return true; }
+
+ SerializationPtr doGetDefaultSerialization() const override;
+
+ bool hasNullableSubcolumns() const { return is_nullable; }
+
+ const String & getSchemaFormat() const { return schema_format; }
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeSet.h b/contrib/clickhouse/src/DataTypes/DataTypeSet.h
new file mode 100644
index 00000000000..e71a345a195
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeSet.h
@@ -0,0 +1,32 @@
+#pragma once
+
+#include <DataTypes/IDataTypeDummy.h>
+#include <Columns/ColumnSet.h>
+
+
+namespace DB
+{
+
+/** The data type corresponding to the set of values in the IN section.
+ * Used only as an intermediate when evaluating expressions.
+ */
+class DataTypeSet final : public IDataTypeDummy
+{
+public:
+ static constexpr bool is_parametric = true;
+ const char * getFamilyName() const override { return "Set"; }
+ String getSQLCompatibleName() const override { return "TEXT"; }
+
+ TypeIndex getTypeId() const override { return TypeIndex::Set; }
+ bool equals(const IDataType & rhs) const override { return typeid(rhs) == typeid(*this); }
+ bool isParametric() const override { return true; }
+
+ // Used for expressions analysis.
+ MutableColumnPtr createColumn() const override { return ColumnSet::create(0, nullptr); }
+
+ // Used only for debugging, making it DUMPABLE
+ Field getDefault() const override { return Tuple(); }
+};
+
+}
+
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeString.cpp b/contrib/clickhouse/src/DataTypes/DataTypeString.cpp
new file mode 100644
index 00000000000..95e49420009
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeString.cpp
@@ -0,0 +1,99 @@
+#include <Columns/ColumnString.h>
+#include <Core/Field.h>
+
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/Serializations/SerializationString.h>
+
+#include <Parsers/IAST.h>
+#include <Parsers/ASTLiteral.h>
+
+namespace DB
+{
+
+
+namespace ErrorCodes
+{
+ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+ extern const int UNEXPECTED_AST_STRUCTURE;
+}
+
+Field DataTypeString::getDefault() const
+{
+ return String();
+}
+
+MutableColumnPtr DataTypeString::createColumn() const
+{
+ return ColumnString::create();
+}
+
+
+bool DataTypeString::equals(const IDataType & rhs) const
+{
+ return typeid(rhs) == typeid(*this);
+}
+
+SerializationPtr DataTypeString::doGetDefaultSerialization() const
+{
+ return std::make_shared<SerializationString>();
+}
+
+static DataTypePtr create(const ASTPtr & arguments)
+{
+ if (arguments && !arguments->children.empty())
+ {
+ if (arguments->children.size() > 1)
+ throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+ "String data type family mustn't have more than one argument - size in characters");
+
+ const auto * argument = arguments->children[0]->as<ASTLiteral>();
+ if (!argument || argument->value.getType() != Field::Types::UInt64)
+ throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "String data type family may have only a number (positive integer) as its argument");
+ }
+
+ return std::make_shared<DataTypeString>();
+}
+
+
+void registerDataTypeString(DataTypeFactory & factory)
+{
+ factory.registerDataType("String", create);
+
+ /// These synonims are added for compatibility.
+
+ factory.registerAlias("CHAR", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("NCHAR", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("CHARACTER", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("VARCHAR", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("NVARCHAR", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("VARCHAR2", "String", DataTypeFactory::CaseInsensitive); /// Oracle
+ factory.registerAlias("TEXT", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("TINYTEXT", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("MEDIUMTEXT", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("LONGTEXT", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("BLOB", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("CLOB", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("TINYBLOB", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("MEDIUMBLOB", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("LONGBLOB", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("BYTEA", "String", DataTypeFactory::CaseInsensitive); /// PostgreSQL
+
+ factory.registerAlias("CHARACTER LARGE OBJECT", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("CHARACTER VARYING", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("CHAR LARGE OBJECT", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("CHAR VARYING", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("NATIONAL CHAR", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("NATIONAL CHARACTER", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("NATIONAL CHARACTER LARGE OBJECT", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("NATIONAL CHARACTER VARYING", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("NATIONAL CHAR VARYING", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("NCHAR VARYING", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("NCHAR LARGE OBJECT", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("BINARY LARGE OBJECT", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("BINARY VARYING", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("VARBINARY", "String", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("GEOMETRY", "String", DataTypeFactory::CaseInsensitive); //mysql
+
+}
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeString.h b/contrib/clickhouse/src/DataTypes/DataTypeString.h
new file mode 100644
index 00000000000..c39fa90f6e7
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeString.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+
+
+namespace DB
+{
+
+class ColumnString;
+
+class DataTypeString final : public IDataType
+{
+public:
+ using FieldType = String;
+ using ColumnType = ColumnString;
+ static constexpr bool is_parametric = false;
+ static constexpr auto type_id = TypeIndex::String;
+
+ const char * getFamilyName() const override
+ {
+ return "String";
+ }
+
+ String getSQLCompatibleName() const override { return "BLOB"; }
+
+ TypeIndex getTypeId() const override { return type_id; }
+
+ MutableColumnPtr createColumn() const override;
+
+ Field getDefault() const override;
+
+ bool equals(const IDataType & rhs) const override;
+
+ bool isParametric() const override { return false; }
+ bool haveSubtypes() const override { return false; }
+ bool isComparable() const override { return true; }
+ bool canBeComparedWithCollation() const override { return true; }
+ bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; }
+ bool isCategorial() const override { return true; }
+ bool canBeInsideNullable() const override { return true; }
+ bool canBeInsideLowCardinality() const override { return true; }
+
+ SerializationPtr doGetDefaultSerialization() const override;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeTuple.cpp b/contrib/clickhouse/src/DataTypes/DataTypeTuple.cpp
new file mode 100644
index 00000000000..768f87fe3d4
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeTuple.cpp
@@ -0,0 +1,372 @@
+#include <base/map.h>
+#include <base/range.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnConst.h>
+#include <Core/Field.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/Serializations/SerializationInfo.h>
+#include <DataTypes/Serializations/SerializationTuple.h>
+#include <DataTypes/Serializations/SerializationNamed.h>
+#include <DataTypes/Serializations/SerializationInfoTuple.h>
+#include <DataTypes/NestedUtils.h>
+#include <Parsers/IAST.h>
+#include <Parsers/ASTNameTypePair.h>
+#include <Common/assert_cast.h>
+#include <Common/quoteString.h>
+#include <IO/WriteHelpers.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/Operators.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int BAD_ARGUMENTS;
+ extern const int DUPLICATE_COLUMN;
+ extern const int EMPTY_DATA_PASSED;
+ extern const int NOT_FOUND_COLUMN_IN_BLOCK;
+ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+ extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH;
+ extern const int ILLEGAL_INDEX;
+ extern const int LOGICAL_ERROR;
+}
+
+
+DataTypeTuple::DataTypeTuple(const DataTypes & elems_)
+ : elems(elems_), have_explicit_names(false)
+{
+ /// Automatically assigned names in form of '1', '2', ...
+ size_t size = elems.size();
+ names.resize(size);
+ for (size_t i = 0; i < size; ++i)
+ names[i] = toString(i + 1);
+}
+
+static std::optional<Exception> checkTupleNames(const Strings & names)
+{
+ std::unordered_set<String> names_set;
+ for (const auto & name : names)
+ {
+ if (name.empty())
+ return Exception(ErrorCodes::BAD_ARGUMENTS, "Names of tuple elements cannot be empty");
+
+ if (!names_set.insert(name).second)
+ return Exception(ErrorCodes::DUPLICATE_COLUMN, "Names of tuple elements must be unique");
+ }
+
+ return {};
+}
+
+DataTypeTuple::DataTypeTuple(const DataTypes & elems_, const Strings & names_)
+ : elems(elems_), names(names_), have_explicit_names(true)
+{
+ size_t size = elems.size();
+ if (names.size() != size)
+ throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Wrong number of names passed to constructor of DataTypeTuple");
+
+ if (auto exception = checkTupleNames(names))
+ throw std::move(*exception);
+}
+
+std::string DataTypeTuple::doGetName() const
+{
+ size_t size = elems.size();
+ WriteBufferFromOwnString s;
+
+ s << "Tuple(";
+ for (size_t i = 0; i < size; ++i)
+ {
+ if (i != 0)
+ s << ", ";
+
+ if (have_explicit_names)
+ s << backQuoteIfNeed(names[i]) << ' ';
+
+ s << elems[i]->getName();
+ }
+ s << ")";
+
+ return s.str();
+}
+
+
+static inline IColumn & extractElementColumn(IColumn & column, size_t idx)
+{
+ return assert_cast<ColumnTuple &>(column).getColumn(idx);
+}
+
+template <typename F>
+static void addElementSafe(const DataTypes & elems, IColumn & column, F && impl)
+{
+ /// We use the assumption that tuples of zero size do not exist.
+ size_t old_size = column.size();
+
+ try
+ {
+ impl();
+
+ // Check that all columns now have the same size.
+ size_t new_size = column.size();
+
+ for (auto i : collections::range(0, elems.size()))
+ {
+ const auto & element_column = extractElementColumn(column, i);
+ if (element_column.size() != new_size)
+ {
+ // This is not a logical error because it may work with
+ // user-supplied data.
+ throw Exception(ErrorCodes::SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH,
+ "Cannot read a tuple because not all elements are present");
+ }
+ }
+ }
+ catch (...)
+ {
+ for (const auto & i : collections::range(0, elems.size()))
+ {
+ auto & element_column = extractElementColumn(column, i);
+
+ if (element_column.size() > old_size)
+ element_column.popBack(1);
+ }
+
+ throw;
+ }
+}
+
+MutableColumnPtr DataTypeTuple::createColumn() const
+{
+ size_t size = elems.size();
+ MutableColumns tuple_columns(size);
+ for (size_t i = 0; i < size; ++i)
+ tuple_columns[i] = elems[i]->createColumn();
+ return ColumnTuple::create(std::move(tuple_columns));
+}
+
+MutableColumnPtr DataTypeTuple::createColumn(const ISerialization & serialization) const
+{
+ /// If we read subcolumn of nested Tuple, it may be wrapped to SerializationNamed
+ /// several times to allow to reconstruct the substream path name.
+ /// Here we don't need substream path name, so we drop first several wrapper serializations.
+
+ const auto * current_serialization = &serialization;
+ while (const auto * serialization_named = typeid_cast<const SerializationNamed *>(current_serialization))
+ current_serialization = serialization_named->getNested().get();
+
+ const auto * serialization_tuple = typeid_cast<const SerializationTuple *>(current_serialization);
+ if (!serialization_tuple)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected serialization to create column of type Tuple");
+
+ const auto & element_serializations = serialization_tuple->getElementsSerializations();
+
+ size_t size = elems.size();
+ assert(element_serializations.size() == size);
+ MutableColumns tuple_columns(size);
+ for (size_t i = 0; i < size; ++i)
+ tuple_columns[i] = elems[i]->createColumn(*element_serializations[i]->getNested());
+
+ return ColumnTuple::create(std::move(tuple_columns));
+}
+
+Field DataTypeTuple::getDefault() const
+{
+ return Tuple(collections::map<Tuple>(elems, [] (const DataTypePtr & elem) { return elem->getDefault(); }));
+}
+
+void DataTypeTuple::insertDefaultInto(IColumn & column) const
+{
+ addElementSafe(elems, column, [&]
+ {
+ for (const auto & i : collections::range(0, elems.size()))
+ elems[i]->insertDefaultInto(extractElementColumn(column, i));
+ });
+}
+
+bool DataTypeTuple::equals(const IDataType & rhs) const
+{
+ if (typeid(rhs) != typeid(*this))
+ return false;
+
+ const DataTypeTuple & rhs_tuple = static_cast<const DataTypeTuple &>(rhs);
+
+ size_t size = elems.size();
+ if (size != rhs_tuple.elems.size())
+ return false;
+
+ for (size_t i = 0; i < size; ++i)
+ if (!elems[i]->equals(*rhs_tuple.elems[i]) || names[i] != rhs_tuple.names[i])
+ return false;
+
+ return true;
+}
+
+
+size_t DataTypeTuple::getPositionByName(const String & name) const
+{
+ size_t size = elems.size();
+ for (size_t i = 0; i < size; ++i)
+ if (names[i] == name)
+ return i;
+ throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, "Tuple doesn't have element with name '{}'", name);
+}
+
+std::optional<size_t> DataTypeTuple::tryGetPositionByName(const String & name) const
+{
+ size_t size = elems.size();
+ for (size_t i = 0; i < size; ++i)
+ {
+ if (names[i] == name)
+ {
+ return std::optional<size_t>(i);
+ }
+ }
+ return std::nullopt;
+}
+
+String DataTypeTuple::getNameByPosition(size_t i) const
+{
+ if (i == 0 || i > names.size())
+ throw Exception(ErrorCodes::ILLEGAL_INDEX, "Index of tuple element ({}) if out range ([1, {}])", i, names.size());
+
+ return names[i - 1];
+}
+
+
+bool DataTypeTuple::textCanContainOnlyValidUTF8() const
+{
+ return std::all_of(elems.begin(), elems.end(), [](auto && elem) { return elem->textCanContainOnlyValidUTF8(); });
+}
+
+bool DataTypeTuple::haveMaximumSizeOfValue() const
+{
+ return std::all_of(elems.begin(), elems.end(), [](auto && elem) { return elem->haveMaximumSizeOfValue(); });
+}
+
+bool DataTypeTuple::hasDynamicSubcolumns() const
+{
+ return std::any_of(elems.begin(), elems.end(), [](auto && elem) { return elem->hasDynamicSubcolumns(); });
+}
+
+bool DataTypeTuple::isComparable() const
+{
+ return std::all_of(elems.begin(), elems.end(), [](auto && elem) { return elem->isComparable(); });
+}
+
+size_t DataTypeTuple::getMaximumSizeOfValueInMemory() const
+{
+ size_t res = 0;
+ for (const auto & elem : elems)
+ res += elem->getMaximumSizeOfValueInMemory();
+ return res;
+}
+
+size_t DataTypeTuple::getSizeOfValueInMemory() const
+{
+ size_t res = 0;
+ for (const auto & elem : elems)
+ res += elem->getSizeOfValueInMemory();
+ return res;
+}
+
+SerializationPtr DataTypeTuple::doGetDefaultSerialization() const
+{
+ SerializationTuple::ElementSerializations serializations(elems.size());
+
+ for (size_t i = 0; i < elems.size(); ++i)
+ {
+ String elem_name = have_explicit_names ? names[i] : toString(i + 1);
+ auto serialization = elems[i]->getDefaultSerialization();
+ serializations[i] = std::make_shared<SerializationNamed>(serialization, elem_name);
+ }
+
+ return std::make_shared<SerializationTuple>(std::move(serializations), have_explicit_names);
+}
+
+SerializationPtr DataTypeTuple::getSerialization(const SerializationInfo & info) const
+{
+ SerializationTuple::ElementSerializations serializations(elems.size());
+ const auto & info_tuple = assert_cast<const SerializationInfoTuple &>(info);
+
+ for (size_t i = 0; i < elems.size(); ++i)
+ {
+ String elem_name = have_explicit_names ? names[i] : toString(i + 1);
+ auto serialization = elems[i]->getSerialization(*info_tuple.getElementInfo(i));
+ serializations[i] = std::make_shared<SerializationNamed>(serialization, elem_name);
+ }
+
+ return std::make_shared<SerializationTuple>(std::move(serializations), have_explicit_names);
+}
+
+MutableSerializationInfoPtr DataTypeTuple::createSerializationInfo(const SerializationInfo::Settings & settings) const
+{
+ MutableSerializationInfos infos;
+ infos.reserve(elems.size());
+ for (const auto & elem : elems)
+ infos.push_back(elem->createSerializationInfo(settings));
+
+ return std::make_shared<SerializationInfoTuple>(std::move(infos), names, settings);
+}
+
+SerializationInfoPtr DataTypeTuple::getSerializationInfo(const IColumn & column) const
+{
+ if (const auto * column_const = checkAndGetColumn<ColumnConst>(&column))
+ return getSerializationInfo(column_const->getDataColumn());
+
+ MutableSerializationInfos infos;
+ infos.reserve(elems.size());
+
+ const auto & column_tuple = assert_cast<const ColumnTuple &>(column);
+ assert(elems.size() == column_tuple.getColumns().size());
+
+ for (size_t i = 0; i < elems.size(); ++i)
+ {
+ auto element_info = elems[i]->getSerializationInfo(column_tuple.getColumn(i));
+ infos.push_back(const_pointer_cast<SerializationInfo>(element_info));
+ }
+
+ return std::make_shared<SerializationInfoTuple>(std::move(infos), names, SerializationInfo::Settings{});
+}
+
+
+static DataTypePtr create(const ASTPtr & arguments)
+{
+ if (!arguments || arguments->children.empty())
+ throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Tuple cannot be empty");
+
+ DataTypes nested_types;
+ nested_types.reserve(arguments->children.size());
+
+ Strings names;
+ names.reserve(arguments->children.size());
+
+ for (const ASTPtr & child : arguments->children)
+ {
+ if (const auto * name_and_type_pair = child->as<ASTNameTypePair>())
+ {
+ nested_types.emplace_back(DataTypeFactory::instance().get(name_and_type_pair->type));
+ names.emplace_back(name_and_type_pair->name);
+ }
+ else
+ nested_types.emplace_back(DataTypeFactory::instance().get(child));
+ }
+
+ if (names.empty())
+ return std::make_shared<DataTypeTuple>(nested_types);
+ else if (names.size() != nested_types.size())
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "Names are specified not for all elements of Tuple type");
+ else
+ return std::make_shared<DataTypeTuple>(nested_types, names);
+}
+
+
+void registerDataTypeTuple(DataTypeFactory & factory)
+{
+ factory.registerDataType("Tuple", create);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeTuple.h b/contrib/clickhouse/src/DataTypes/DataTypeTuple.h
new file mode 100644
index 00000000000..0bf3f3ac8b3
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeTuple.h
@@ -0,0 +1,76 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+#include <optional>
+
+
+namespace DB
+{
+
+/** Tuple data type.
+ * Used as an intermediate result when evaluating expressions.
+ * Also can be used as a column - the result of the query execution.
+ *
+ * Tuple elements can have names.
+ * If an element is unnamed, it will have automatically assigned name like '1', '2', '3' corresponding to its position.
+ * Manually assigned names must not begin with digit. Names must be unique.
+ *
+ * All tuples with same size and types of elements are equivalent for expressions, regardless to names of elements.
+ */
+class DataTypeTuple final : public IDataType
+{
+private:
+ DataTypes elems;
+ Strings names;
+ bool have_explicit_names;
+
+public:
+ static constexpr bool is_parametric = true;
+
+ explicit DataTypeTuple(const DataTypes & elems);
+ DataTypeTuple(const DataTypes & elems, const Strings & names);
+
+ TypeIndex getTypeId() const override { return TypeIndex::Tuple; }
+ std::string doGetName() const override;
+ const char * getFamilyName() const override { return "Tuple"; }
+ String getSQLCompatibleName() const override { return "JSON"; }
+
+ bool canBeInsideNullable() const override { return false; }
+ bool supportsSparseSerialization() const override { return true; }
+ bool canBeInsideSparseColumns() const override { return false; }
+
+ MutableColumnPtr createColumn() const override;
+ MutableColumnPtr createColumn(const ISerialization & serialization) const override;
+
+ Field getDefault() const override;
+ void insertDefaultInto(IColumn & column) const override;
+
+ bool equals(const IDataType & rhs) const override;
+
+ bool isParametric() const override { return true; }
+ bool haveSubtypes() const override { return !elems.empty(); }
+ bool isComparable() const override;
+ bool textCanContainOnlyValidUTF8() const override;
+ bool haveMaximumSizeOfValue() const override;
+ bool hasDynamicSubcolumns() const override;
+ size_t getMaximumSizeOfValueInMemory() const override;
+ size_t getSizeOfValueInMemory() const override;
+
+ SerializationPtr doGetDefaultSerialization() const override;
+ SerializationPtr getSerialization(const SerializationInfo & info) const override;
+ MutableSerializationInfoPtr createSerializationInfo(const SerializationInfo::Settings & settings) const override;
+ SerializationInfoPtr getSerializationInfo(const IColumn & column) const override;
+
+ const DataTypePtr & getElement(size_t i) const { return elems[i]; }
+ const DataTypes & getElements() const { return elems; }
+ const Strings & getElementNames() const { return names; }
+
+ size_t getPositionByName(const String & name) const;
+ std::optional<size_t> tryGetPositionByName(const String & name) const;
+ String getNameByPosition(size_t i) const;
+
+ bool haveExplicitNames() const { return have_explicit_names; }
+};
+
+}
+
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeUUID.cpp b/contrib/clickhouse/src/DataTypes/DataTypeUUID.cpp
new file mode 100644
index 00000000000..44182a700b4
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeUUID.cpp
@@ -0,0 +1,34 @@
+#include <DataTypes/DataTypeUUID.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/Serializations/SerializationUUID.h>
+
+
+namespace DB
+{
+
+bool DataTypeUUID::equals(const IDataType & rhs) const
+{
+ return typeid(rhs) == typeid(*this);
+}
+
+SerializationPtr DataTypeUUID::doGetDefaultSerialization() const
+{
+ return std::make_shared<SerializationUUID>();
+}
+
+Field DataTypeUUID::getDefault() const
+{
+ return UUID{};
+}
+
+MutableColumnPtr DataTypeUUID::createColumn() const
+{
+ return ColumnVector<UUID>::create();
+}
+
+void registerDataTypeUUID(DataTypeFactory & factory)
+{
+ factory.registerSimpleDataType("UUID", [] { return DataTypePtr(std::make_shared<DataTypeUUID>()); });
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypeUUID.h b/contrib/clickhouse/src/DataTypes/DataTypeUUID.h
new file mode 100644
index 00000000000..8664c3bcfd1
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypeUUID.h
@@ -0,0 +1,50 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+#include <Columns/ColumnVector.h>
+#include <Core/UUID.h>
+
+
+namespace DB
+{
+
+class DataTypeUUID : public IDataType
+{
+public:
+ static constexpr bool is_parametric = false;
+
+ using FieldType = UUID;
+ using ColumnType = ColumnVector<UUID>;
+ static constexpr auto type_id = TypeIndex::UUID;
+
+ const char * getFamilyName() const override { return "UUID"; }
+ String getSQLCompatibleName() const override { return "CHAR"; }
+
+ TypeIndex getTypeId() const override { return type_id; }
+
+ Field getDefault() const override;
+
+ MutableColumnPtr createColumn() const override;
+
+ bool isParametric() const override { return false; }
+ bool haveSubtypes() const override { return false; }
+
+ bool equals(const IDataType & rhs) const override;
+
+ bool canBeUsedInBitOperations() const override { return true; }
+ bool canBeInsideNullable() const override { return true; }
+ bool canBePromoted() const override { return false; }
+ bool shouldAlignRightInPrettyFormats() const override { return false; }
+ bool textCanContainOnlyValidUTF8() const override { return true; }
+ bool isComparable() const override { return true; }
+ bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; }
+ bool isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion() const override { return true; }
+ bool haveMaximumSizeOfValue() const override { return true; }
+ size_t getSizeOfValueInMemory() const override { return sizeof(UUID); }
+ bool isCategorial() const override { return true; }
+ bool canBeInsideLowCardinality() const override { return true; }
+
+ SerializationPtr doGetDefaultSerialization() const override;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypesDecimal.cpp b/contrib/clickhouse/src/DataTypes/DataTypesDecimal.cpp
new file mode 100644
index 00000000000..fa044d4ac9c
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypesDecimal.cpp
@@ -0,0 +1,131 @@
+#include <DataTypes/DataTypesDecimal.h>
+#include <DataTypes/Serializations/SerializationDecimal.h>
+
+#include <Common/typeid_cast.h>
+#include <Core/DecimalFunctions.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <IO/readDecimalText.h>
+#include <Parsers/ASTLiteral.h>
+
+#include <type_traits>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+ extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+ extern const int DECIMAL_OVERFLOW;
+}
+
+
+template <is_decimal T>
+std::string DataTypeDecimal<T>::doGetName() const
+{
+ return fmt::format("Decimal({}, {})", this->precision, this->scale);
+}
+
+template <is_decimal T>
+std::string DataTypeDecimal<T>::getSQLCompatibleName() const
+{
+ return fmt::format("DECIMAL({}, {})", this->precision, this->scale);
+}
+
+template <is_decimal T>
+bool DataTypeDecimal<T>::equals(const IDataType & rhs) const
+{
+ if (auto * ptype = typeid_cast<const DataTypeDecimal<T> *>(&rhs))
+ return this->scale == ptype->getScale();
+ return false;
+}
+
+template <is_decimal T>
+DataTypePtr DataTypeDecimal<T>::promoteNumericType() const
+{
+ if (sizeof(T) <= sizeof(Decimal128))
+ return std::make_shared<DataTypeDecimal<Decimal128>>(DataTypeDecimal<Decimal128>::maxPrecision(), this->scale);
+ else
+ return std::make_shared<DataTypeDecimal<Decimal256>>(DataTypeDecimal<Decimal256>::maxPrecision(), this->scale);
+}
+
+template <is_decimal T>
+T DataTypeDecimal<T>::parseFromString(const String & str) const
+{
+ ReadBufferFromMemory buf(str.data(), str.size());
+ T x;
+ UInt32 unread_scale = this->scale;
+ readDecimalText(buf, x, this->precision, unread_scale, true);
+
+ if (common::mulOverflow(x.value, DecimalUtils::scaleMultiplier<T>(unread_scale), x.value))
+ throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Decimal math overflow");
+
+ return x;
+}
+
+template <is_decimal T>
+SerializationPtr DataTypeDecimal<T>::doGetDefaultSerialization() const
+{
+ return std::make_shared<SerializationDecimal<T>>(this->precision, this->scale);
+}
+
+
+static DataTypePtr create(const ASTPtr & arguments)
+{
+ if (!arguments || arguments->children.size() != 2)
+ throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+ "Decimal data type family must have exactly two arguments: precision and scale");
+
+ const auto * precision = arguments->children[0]->as<ASTLiteral>();
+ const auto * scale = arguments->children[1]->as<ASTLiteral>();
+
+ if (!precision || precision->value.getType() != Field::Types::UInt64 ||
+ !scale || !(scale->value.getType() == Field::Types::Int64 || scale->value.getType() == Field::Types::UInt64))
+ throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Decimal data type family must have two numbers as its arguments");
+
+ UInt64 precision_value = precision->value.get<UInt64>();
+ UInt64 scale_value = scale->value.get<UInt64>();
+
+ return createDecimal<DataTypeDecimal>(precision_value, scale_value);
+}
+
+template <typename T>
+static DataTypePtr createExact(const ASTPtr & arguments)
+{
+ if (!arguments || arguments->children.size() != 1)
+ throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+ "Decimal32 | Decimal64 | Decimal128 | Decimal256 data type family must have exactly one arguments: scale");
+ const auto * scale_arg = arguments->children[0]->as<ASTLiteral>();
+
+ if (!scale_arg || !(scale_arg->value.getType() == Field::Types::Int64 || scale_arg->value.getType() == Field::Types::UInt64))
+ throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+ "Decimal32 | Decimal64 | Decimal128 | Decimal256 data type family must have a one number as its argument");
+
+ UInt64 precision = DecimalUtils::max_precision<T>;
+ UInt64 scale = scale_arg->value.get<UInt64>();
+
+ return createDecimal<DataTypeDecimal>(precision, scale);
+}
+
+void registerDataTypeDecimal(DataTypeFactory & factory)
+{
+ factory.registerDataType("Decimal32", createExact<Decimal32>, DataTypeFactory::CaseInsensitive);
+ factory.registerDataType("Decimal64", createExact<Decimal64>, DataTypeFactory::CaseInsensitive);
+ factory.registerDataType("Decimal128", createExact<Decimal128>, DataTypeFactory::CaseInsensitive);
+ factory.registerDataType("Decimal256", createExact<Decimal256>, DataTypeFactory::CaseInsensitive);
+
+ factory.registerDataType("Decimal", create, DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("DEC", "Decimal", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("NUMERIC", "Decimal", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("FIXED", "Decimal", DataTypeFactory::CaseInsensitive);
+}
+
+/// Explicit template instantiations.
+template class DataTypeDecimal<Decimal32>;
+template class DataTypeDecimal<Decimal64>;
+template class DataTypeDecimal<Decimal128>;
+template class DataTypeDecimal<Decimal256>;
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypesDecimal.h b/contrib/clickhouse/src/DataTypes/DataTypesDecimal.h
new file mode 100644
index 00000000000..5e4cfab7928
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypesDecimal.h
@@ -0,0 +1,270 @@
+#pragma once
+
+#include <base/arithmeticOverflow.h>
+#include <base/extended_types.h>
+#include <Common/typeid_cast.h>
+#include <DataTypes/IDataType.h>
+#include <DataTypes/DataTypeDecimalBase.h>
+#include <DataTypes/DataTypeDateTime64.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int DECIMAL_OVERFLOW;
+ extern const int LOGICAL_ERROR;
+}
+
+/// Implements Decimal(P, S), where P is precision, S is scale.
+/// Maximum precisions for underlying types are:
+/// Int32 9
+/// Int64 18
+/// Int128 38
+/// Int256 76
+/// Operation between two decimals leads to Decimal(P, S), where
+/// P is one of (9, 18, 38, 76); equals to the maximum precision for the biggest underlying type of operands.
+/// S is maximum scale of operands. The allowed valuas are [0, precision]
+template <is_decimal T>
+class DataTypeDecimal final : public DataTypeDecimalBase<T>
+{
+ using Base = DataTypeDecimalBase<T>;
+
+public:
+ using typename Base::FieldType;
+ using typename Base::ColumnType;
+ using Base::Base;
+
+ static constexpr auto family_name = "Decimal";
+
+ const char * getFamilyName() const override { return family_name; }
+ String getSQLCompatibleName() const override;
+
+ std::string doGetName() const override;
+ TypeIndex getTypeId() const override { return TypeToTypeIndex<T>; }
+ bool canBePromoted() const override { return true; }
+ DataTypePtr promoteNumericType() const override;
+
+ bool equals(const IDataType & rhs) const override;
+ T parseFromString(const String & str) const;
+ SerializationPtr doGetDefaultSerialization() const override;
+};
+
+using DataTypeDecimal32 = DataTypeDecimal<Decimal32>;
+using DataTypeDecimal64 = DataTypeDecimal<Decimal64>;
+using DataTypeDecimal128 = DataTypeDecimal<Decimal128>;
+using DataTypeDecimal256 = DataTypeDecimal<Decimal256>;
+
+template <typename T>
+inline const DataTypeDecimal<T> * checkDecimal(const IDataType & data_type)
+{
+ return typeid_cast<const DataTypeDecimal<T> *>(&data_type);
+}
+
+inline UInt32 getDecimalScale(const IDataType & data_type)
+{
+ if (const auto * decimal_type = checkDecimal<Decimal32>(data_type))
+ return decimal_type->getScale();
+ if (const auto * decimal_type = checkDecimal<Decimal64>(data_type))
+ return decimal_type->getScale();
+ if (const auto * decimal_type = checkDecimal<Decimal128>(data_type))
+ return decimal_type->getScale();
+ if (const auto * decimal_type = checkDecimal<Decimal256>(data_type))
+ return decimal_type->getScale();
+ if (const auto * date_time_type = typeid_cast<const DataTypeDateTime64 *>(&data_type))
+ return date_time_type->getScale();
+
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get decimal scale from type {}", data_type.getName());
+}
+
+inline UInt32 getDecimalPrecision(const IDataType & data_type)
+{
+ if (const auto * decimal_type = checkDecimal<Decimal32>(data_type))
+ return decimal_type->getPrecision();
+ if (const auto * decimal_type = checkDecimal<Decimal64>(data_type))
+ return decimal_type->getPrecision();
+ if (const auto * decimal_type = checkDecimal<Decimal128>(data_type))
+ return decimal_type->getPrecision();
+ if (const auto * decimal_type = checkDecimal<Decimal256>(data_type))
+ return decimal_type->getPrecision();
+ if (const auto * date_time_type = typeid_cast<const DataTypeDateTime64 *>(&data_type))
+ return date_time_type->getPrecision();
+
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get decimal precision from type {}", data_type.getName());
+}
+
+template <typename T>
+inline UInt32 getDecimalScale(const DataTypeDecimal<T> & data_type)
+{
+ return data_type.getScale();
+}
+
+template <typename FromDataType, typename ToDataType, typename ReturnType = void>
+requires (IsDataTypeDecimal<FromDataType> && IsDataTypeDecimal<ToDataType>)
+inline ReturnType convertDecimalsImpl(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType & result)
+{
+ using FromFieldType = typename FromDataType::FieldType;
+ using ToFieldType = typename ToDataType::FieldType;
+ using MaxFieldType = std::conditional_t<(sizeof(FromFieldType) > sizeof(ToFieldType)), FromFieldType, ToFieldType>;
+ using MaxNativeType = typename MaxFieldType::NativeType;
+
+ static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
+
+ MaxNativeType converted_value;
+ if (scale_to > scale_from)
+ {
+ converted_value = DecimalUtils::scaleMultiplier<MaxNativeType>(scale_to - scale_from);
+ if (common::mulOverflow(static_cast<MaxNativeType>(value.value), converted_value, converted_value))
+ {
+ if constexpr (throw_exception)
+ throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow while multiplying {} by scale {}",
+ std::string(ToDataType::family_name), toString(value.value), toString(converted_value));
+ else
+ return ReturnType(false);
+ }
+ }
+ else if (scale_to == scale_from)
+ {
+ converted_value = value.value;
+ }
+ else
+ {
+ converted_value = value.value / DecimalUtils::scaleMultiplier<MaxNativeType>(scale_from - scale_to);
+ }
+
+ if constexpr (sizeof(FromFieldType) > sizeof(ToFieldType))
+ {
+ if (converted_value < std::numeric_limits<typename ToFieldType::NativeType>::min() ||
+ converted_value > std::numeric_limits<typename ToFieldType::NativeType>::max())
+ {
+ if constexpr (throw_exception)
+ throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow: {} is not in range ({}, {})",
+ std::string(ToDataType::family_name), toString(converted_value),
+ toString(std::numeric_limits<typename ToFieldType::NativeType>::min()),
+ toString(std::numeric_limits<typename ToFieldType::NativeType>::max()));
+ else
+ return ReturnType(false);
+ }
+ }
+
+ result = static_cast<typename ToFieldType::NativeType>(converted_value);
+
+ return ReturnType(true);
+}
+
+template <typename FromDataType, typename ToDataType>
+requires (IsDataTypeDecimal<FromDataType> && IsDataTypeDecimal<ToDataType>)
+inline typename ToDataType::FieldType convertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to)
+{
+ using ToFieldType = typename ToDataType::FieldType;
+ ToFieldType result;
+
+ convertDecimalsImpl<FromDataType, ToDataType, void>(value, scale_from, scale_to, result);
+
+ return result;
+}
+
+template <typename FromDataType, typename ToDataType>
+requires (IsDataTypeDecimal<FromDataType> && IsDataTypeDecimal<ToDataType>)
+inline bool tryConvertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to, typename ToDataType::FieldType & result)
+{
+ return convertDecimalsImpl<FromDataType, ToDataType, bool>(value, scale_from, scale_to, result);
+}
+
+template <typename FromDataType, typename ToDataType, typename ReturnType>
+requires (IsDataTypeDecimal<FromDataType> && is_arithmetic_v<typename ToDataType::FieldType>)
+inline ReturnType convertFromDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result)
+{
+ using FromFieldType = typename FromDataType::FieldType;
+ using ToFieldType = typename ToDataType::FieldType;
+
+ return DecimalUtils::convertToImpl<ToFieldType, FromFieldType, ReturnType>(value, scale, result);
+}
+
+template <typename FromDataType, typename ToDataType>
+requires (IsDataTypeDecimal<FromDataType> && is_arithmetic_v<typename ToDataType::FieldType>)
+inline typename ToDataType::FieldType convertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale)
+{
+ typename ToDataType::FieldType result;
+
+ convertFromDecimalImpl<FromDataType, ToDataType, void>(value, scale, result);
+
+ return result;
+}
+
+template <typename FromDataType, typename ToDataType>
+requires (IsDataTypeDecimal<FromDataType> && is_arithmetic_v<typename ToDataType::FieldType>)
+inline bool tryConvertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result)
+{
+ return convertFromDecimalImpl<FromDataType, ToDataType, bool>(value, scale, result);
+}
+
+template <typename FromDataType, typename ToDataType, typename ReturnType>
+requires (is_arithmetic_v<typename FromDataType::FieldType> && IsDataTypeDecimal<ToDataType>)
+inline ReturnType convertToDecimalImpl(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result)
+{
+ using FromFieldType = typename FromDataType::FieldType;
+ using ToFieldType = typename ToDataType::FieldType;
+ using ToNativeType = typename ToFieldType::NativeType;
+
+ static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
+
+ if constexpr (std::is_floating_point_v<FromFieldType>)
+ {
+ if (!std::isfinite(value))
+ {
+ if constexpr (throw_exception)
+ throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow. Cannot convert infinity or NaN to decimal", ToDataType::family_name);
+ else
+ return ReturnType(false);
+ }
+
+ auto out = value * static_cast<FromFieldType>(DecimalUtils::scaleMultiplier<ToNativeType>(scale));
+
+ if (out <= static_cast<FromFieldType>(std::numeric_limits<ToNativeType>::min()) ||
+ out >= static_cast<FromFieldType>(std::numeric_limits<ToNativeType>::max()))
+ {
+ if constexpr (throw_exception)
+ throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "{} convert overflow. Float is out of Decimal range", ToDataType::family_name);
+ else
+ return ReturnType(false);
+ }
+
+ result = static_cast<ToNativeType>(out);
+ return ReturnType(true);
+ }
+ else
+ {
+ if constexpr (is_big_int_v<FromFieldType>)
+ return ReturnType(convertDecimalsImpl<DataTypeDecimal<Decimal256>, ToDataType, ReturnType>(static_cast<Int256>(value), 0, scale, result));
+ else if constexpr (std::is_same_v<FromFieldType, UInt64>)
+ return ReturnType(convertDecimalsImpl<DataTypeDecimal<Decimal128>, ToDataType, ReturnType>(static_cast<Int128>(value), 0, scale, result));
+ else
+ return ReturnType(convertDecimalsImpl<DataTypeDecimal<Decimal64>, ToDataType, ReturnType>(static_cast<Int64>(value), 0, scale, result));
+ }
+}
+
+template <typename FromDataType, typename ToDataType>
+requires (is_arithmetic_v<typename FromDataType::FieldType> && IsDataTypeDecimal<ToDataType>)
+inline typename ToDataType::FieldType convertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale)
+{
+ typename ToDataType::FieldType result;
+ convertToDecimalImpl<FromDataType, ToDataType, void>(value, scale, result);
+ return result;
+}
+
+template <typename FromDataType, typename ToDataType>
+requires (is_arithmetic_v<typename FromDataType::FieldType> && IsDataTypeDecimal<ToDataType>)
+inline bool tryConvertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale, typename ToDataType::FieldType& result)
+{
+ return convertToDecimalImpl<FromDataType, ToDataType, bool>(value, scale, result);
+}
+
+template <typename T>
+inline DataTypePtr createDecimalMaxPrecision(UInt64 scale)
+{
+ return std::make_shared<DataTypeDecimal<T>>(DecimalUtils::max_precision<T>, scale);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypesNumber.cpp b/contrib/clickhouse/src/DataTypes/DataTypesNumber.cpp
new file mode 100644
index 00000000000..232a5101cbe
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypesNumber.cpp
@@ -0,0 +1,98 @@
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeFactory.h>
+
+
+#include <Parsers/IAST.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
+template <typename T>
+static DataTypePtr createNumericDataType(const ASTPtr & arguments)
+{
+ if (arguments)
+ {
+ if (std::is_integral_v<T>)
+ {
+ if (arguments->children.size() > 1)
+ throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+ "{} data type family must not have more than one argument - display width", TypeName<T>);
+ }
+ else
+ {
+ if (arguments->children.size() > 2)
+ throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+ "{} data type family must not have more than two arguments - total number "
+ "of digits and number of digits following the decimal point", TypeName<T>);
+ }
+ }
+ return std::make_shared<DataTypeNumber<T>>();
+}
+
+
+void registerDataTypeNumbers(DataTypeFactory & factory)
+{
+ factory.registerDataType("UInt8", createNumericDataType<UInt8>);
+ factory.registerDataType("UInt16", createNumericDataType<UInt16>);
+ factory.registerDataType("UInt32", createNumericDataType<UInt32>);
+ factory.registerDataType("UInt64", createNumericDataType<UInt64>);
+
+ factory.registerDataType("Int8", createNumericDataType<Int8>);
+ factory.registerDataType("Int16", createNumericDataType<Int16>);
+ factory.registerDataType("Int32", createNumericDataType<Int32>);
+ factory.registerDataType("Int64", createNumericDataType<Int64>);
+
+ factory.registerDataType("Float32", createNumericDataType<Float32>);
+ factory.registerDataType("Float64", createNumericDataType<Float64>);
+
+ factory.registerSimpleDataType("UInt128", [] { return DataTypePtr(std::make_shared<DataTypeUInt128>()); });
+ factory.registerSimpleDataType("UInt256", [] { return DataTypePtr(std::make_shared<DataTypeUInt256>()); });
+
+ factory.registerSimpleDataType("Int128", [] { return DataTypePtr(std::make_shared<DataTypeInt128>()); });
+ factory.registerSimpleDataType("Int256", [] { return DataTypePtr(std::make_shared<DataTypeInt256>()); });
+
+ /// These synonyms are added for compatibility.
+
+ factory.registerAlias("TINYINT", "Int8", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("INT1", "Int8", DataTypeFactory::CaseInsensitive); /// MySQL
+ factory.registerAlias("BYTE", "Int8", DataTypeFactory::CaseInsensitive); /// MS Access
+ factory.registerAlias("SMALLINT", "Int16", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("INT", "Int32", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("INTEGER", "Int32", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("BIGINT", "Int64", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("FLOAT", "Float32", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("REAL", "Float32", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("SINGLE", "Float32", DataTypeFactory::CaseInsensitive); /// MS Access
+ factory.registerAlias("DOUBLE", "Float64", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("MEDIUMINT", "Int32", DataTypeFactory::CaseInsensitive); /// MySQL
+
+ factory.registerAlias("DOUBLE PRECISION", "Float64", DataTypeFactory::CaseInsensitive);
+
+ /// MySQL
+ factory.registerAlias("TINYINT SIGNED", "Int8", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("INT1 SIGNED", "Int8", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("SMALLINT SIGNED", "Int16", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("MEDIUMINT SIGNED", "Int32", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("INT SIGNED", "Int32", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("INTEGER SIGNED", "Int32", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("BIGINT SIGNED", "Int64", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("TINYINT UNSIGNED", "UInt8", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("INT1 UNSIGNED", "UInt8", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("SMALLINT UNSIGNED", "UInt16", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("MEDIUMINT UNSIGNED", "UInt32", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("INT UNSIGNED", "UInt32", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("INTEGER UNSIGNED", "UInt32", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("BIGINT UNSIGNED", "UInt64", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("BIT", "UInt64", DataTypeFactory::CaseInsensitive); /// MySQL
+ factory.registerAlias("SET", "UInt64", DataTypeFactory::CaseInsensitive); /// MySQL
+ factory.registerAlias("YEAR", "UInt16", DataTypeFactory::CaseInsensitive);
+ factory.registerAlias("TIME", "Int64", DataTypeFactory::CaseInsensitive);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/DataTypesNumber.h b/contrib/clickhouse/src/DataTypes/DataTypesNumber.h
new file mode 100644
index 00000000000..5843086248c
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/DataTypesNumber.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include <type_traits>
+#include <Core/Field.h>
+#include <DataTypes/DataTypeNumberBase.h>
+#include <DataTypes/Serializations/SerializationNumber.h>
+
+
+namespace DB
+{
+
+template <typename T>
+class DataTypeNumber final : public DataTypeNumberBase<T>
+{
+public:
+ bool equals(const IDataType & rhs) const override { return typeid(rhs) == typeid(*this); }
+
+ bool canBeUsedAsVersion() const override { return true; }
+ bool isSummable() const override { return true; }
+ bool canBeUsedInBitOperations() const override { return true; }
+ bool canBeUsedInBooleanContext() const override { return true; }
+ bool canBeInsideNullable() const override { return true; }
+
+ bool canBePromoted() const override { return true; }
+ DataTypePtr promoteNumericType() const override
+ {
+ using PromotedType = DataTypeNumber<NearestFieldType<T>>;
+ return std::make_shared<PromotedType>();
+ }
+
+ SerializationPtr doGetDefaultSerialization() const override
+ {
+ return std::make_shared<SerializationNumber<T>>();
+ }
+};
+
+using DataTypeUInt8 = DataTypeNumber<UInt8>;
+using DataTypeUInt16 = DataTypeNumber<UInt16>;
+using DataTypeUInt32 = DataTypeNumber<UInt32>;
+using DataTypeUInt64 = DataTypeNumber<UInt64>;
+using DataTypeInt8 = DataTypeNumber<Int8>;
+using DataTypeInt16 = DataTypeNumber<Int16>;
+using DataTypeInt32 = DataTypeNumber<Int32>;
+using DataTypeInt64 = DataTypeNumber<Int64>;
+using DataTypeFloat32 = DataTypeNumber<Float32>;
+using DataTypeFloat64 = DataTypeNumber<Float64>;
+
+using DataTypeUInt128 = DataTypeNumber<UInt128>;
+using DataTypeInt128 = DataTypeNumber<Int128>;
+using DataTypeUInt256 = DataTypeNumber<UInt256>;
+using DataTypeInt256 = DataTypeNumber<Int256>;
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/EnumValues.cpp b/contrib/clickhouse/src/DataTypes/EnumValues.cpp
new file mode 100644
index 00000000000..9df49e765a7
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/EnumValues.cpp
@@ -0,0 +1,107 @@
+#include <DataTypes/EnumValues.h>
+#include <boost/algorithm/string.hpp>
+#include <base/sort.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int SYNTAX_ERROR;
+ extern const int EMPTY_DATA_PASSED;
+ extern const int UNKNOWN_ELEMENT_OF_ENUM;
+}
+
+template <typename T>
+EnumValues<T>::EnumValues(const Values & values_)
+ : values(values_)
+{
+ if (values.empty())
+ throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "DataTypeEnum enumeration cannot be empty");
+
+ ::sort(std::begin(values), std::end(values), [] (auto & left, auto & right)
+ {
+ return left.second < right.second;
+ });
+
+ fillMaps();
+}
+
+template <typename T>
+void EnumValues<T>::fillMaps()
+{
+ for (const auto & name_and_value : values)
+ {
+ const auto inserted_value = name_to_value_map.insert(
+ { StringRef{name_and_value.first}, name_and_value.second });
+
+ if (!inserted_value.second)
+ throw Exception(ErrorCodes::SYNTAX_ERROR, "Duplicate names in enum: '{}' = {} and {}",
+ name_and_value.first, toString(name_and_value.second), toString(inserted_value.first->getMapped()));
+
+ const auto inserted_name = value_to_name_map.insert(
+ { name_and_value.second, StringRef{name_and_value.first} });
+
+ if (!inserted_name.second)
+ throw Exception(ErrorCodes::SYNTAX_ERROR, "Duplicate values in enum: '{}' = {} and '{}'",
+ name_and_value.first, toString(name_and_value.second), toString((*inserted_name.first).first));
+ }
+}
+
+template <typename T>
+T EnumValues<T>::getValue(StringRef field_name, bool try_treat_as_id) const
+{
+ const auto it = name_to_value_map.find(field_name);
+ if (!it)
+ {
+ /// It is used in CSV and TSV input formats. If we fail to find given string in
+ /// enum names, we will try to treat it as enum id.
+ if (try_treat_as_id)
+ {
+ T x;
+ ReadBufferFromMemory tmp_buf(field_name.data, field_name.size);
+ readText(x, tmp_buf);
+ /// Check if we reached end of the tmp_buf (otherwise field_name is not a number)
+ /// and try to find it in enum ids
+ if (tmp_buf.eof() && value_to_name_map.find(x) != value_to_name_map.end())
+ return x;
+ }
+ auto hints = this->getHints(field_name.toString());
+ auto hints_string = !hints.empty() ? ", maybe you meant: " + toString(hints) : "";
+ throw Exception(ErrorCodes::UNKNOWN_ELEMENT_OF_ENUM, "Unknown element '{}' for enum{}", field_name.toString(), hints_string);
+ }
+ return it->getMapped();
+}
+
+template <typename T>
+Names EnumValues<T>::getAllRegisteredNames() const
+{
+ Names result;
+ for (const auto & value : values)
+ result.emplace_back(value.first);
+ return result;
+}
+
+template <typename T>
+std::unordered_set<String> EnumValues<T>::getSetOfAllNames(bool to_lower) const
+{
+ std::unordered_set<String> result;
+ for (const auto & value : values)
+ result.insert(to_lower ? boost::algorithm::to_lower_copy(value.first) : value.first);
+ return result;
+}
+
+template <typename T>
+std::unordered_set<T> EnumValues<T>::getSetOfAllValues() const
+{
+ std::unordered_set<T> result;
+ for (const auto & value : values)
+ result.insert(value.second);
+ return result;
+}
+
+template class EnumValues<Int8>;
+template class EnumValues<Int16>;
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/EnumValues.h b/contrib/clickhouse/src/DataTypes/EnumValues.h
new file mode 100644
index 00000000000..2e6628adcf3
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/EnumValues.h
@@ -0,0 +1,90 @@
+#pragma once
+
+#include <unordered_map>
+#include <Common/HashTable/HashMap.h>
+#include <Common/NamePrompter.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int BAD_ARGUMENTS;
+}
+
+template <typename T>
+class EnumValues : public IHints<1, EnumValues<T>>
+{
+public:
+ using Value = std::pair<std::string, T>;
+ using Values = std::vector<Value>;
+ using NameToValueMap = HashMap<StringRef, T, StringRefHash>;
+ using ValueToNameMap = std::unordered_map<T, StringRef>;
+
+private:
+ Values values;
+ NameToValueMap name_to_value_map;
+ ValueToNameMap value_to_name_map;
+
+ void fillMaps();
+
+public:
+ explicit EnumValues(const Values & values_);
+
+ const Values & getValues() const { return values; }
+
+ auto findByValue(const T & value) const
+ {
+ const auto it = value_to_name_map.find(value);
+ if (it == std::end(value_to_name_map))
+ throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected value {} in enum", toString(value));
+
+ return it;
+ }
+
+ /// throws exception if value is not valid
+ const StringRef & getNameForValue(const T & value) const
+ {
+ return findByValue(value)->second;
+ }
+
+ /// returns false if value is not valid
+ bool getNameForValue(const T & value, StringRef & result) const
+ {
+ const auto it = value_to_name_map.find(value);
+ if (it == std::end(value_to_name_map))
+ return false;
+
+ result = it->second;
+ return true;
+ }
+
+ T getValue(StringRef field_name, bool try_treat_as_id = false) const;
+
+ template <typename TValues>
+ bool containsAll(const TValues & rhs_values) const
+ {
+ auto check = [&](const auto & value)
+ {
+ auto it = name_to_value_map.find(value.first);
+ /// If we don't have this name, than we have to be sure,
+ /// that this value exists in enum
+ if (it == name_to_value_map.end())
+ return value_to_name_map.count(value.second) > 0;
+
+ /// If we have this name, than it should have the same value
+ return it->value.second == value.second;
+ };
+
+ return std::all_of(rhs_values.begin(), rhs_values.end(), check);
+ }
+
+ Names getAllRegisteredNames() const override;
+
+ std::unordered_set<String> getSetOfAllNames(bool to_lower) const;
+
+ std::unordered_set<T> getSetOfAllValues() const;
+};
+
+}
+
diff --git a/contrib/clickhouse/src/DataTypes/FieldToDataType.cpp b/contrib/clickhouse/src/DataTypes/FieldToDataType.cpp
new file mode 100644
index 00000000000..210dab9921e
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/FieldToDataType.cpp
@@ -0,0 +1,211 @@
+#include <DataTypes/FieldToDataType.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeMap.h>
+#include <DataTypes/DataTypeObject.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeNothing.h>
+#include <DataTypes/DataTypeUUID.h>
+#include <DataTypes/DataTypeIPv4andIPv6.h>
+#include <DataTypes/getLeastSupertype.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <Common/Exception.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int EMPTY_DATA_PASSED;
+ extern const int NOT_IMPLEMENTED;
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const Null &) const
+{
+ return std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>());
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const UInt64 & x) const
+{
+ if (x <= std::numeric_limits<UInt8>::max()) return std::make_shared<DataTypeUInt8>();
+ if (x <= std::numeric_limits<UInt16>::max()) return std::make_shared<DataTypeUInt16>();
+ if (x <= std::numeric_limits<UInt32>::max()) return std::make_shared<DataTypeUInt32>();
+ return std::make_shared<DataTypeUInt64>();
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const Int64 & x) const
+{
+ if (x <= std::numeric_limits<Int8>::max() && x >= std::numeric_limits<Int8>::min()) return std::make_shared<DataTypeInt8>();
+ if (x <= std::numeric_limits<Int16>::max() && x >= std::numeric_limits<Int16>::min()) return std::make_shared<DataTypeInt16>();
+ if (x <= std::numeric_limits<Int32>::max() && x >= std::numeric_limits<Int32>::min()) return std::make_shared<DataTypeInt32>();
+ return std::make_shared<DataTypeInt64>();
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const Float64 &) const
+{
+ return std::make_shared<DataTypeFloat64>();
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const UInt128 &) const
+{
+ return std::make_shared<DataTypeUInt128>();
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const Int128 &) const
+{
+ return std::make_shared<DataTypeInt128>();
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const UInt256 &) const
+{
+ return std::make_shared<DataTypeUInt256>();
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const Int256 &) const
+{
+ return std::make_shared<DataTypeInt256>();
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const UUID &) const
+{
+ return std::make_shared<DataTypeUUID>();
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const IPv4 &) const
+{
+ return std::make_shared<DataTypeIPv4>();
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const IPv6 &) const
+{
+ return std::make_shared<DataTypeIPv6>();
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const String &) const
+{
+ return std::make_shared<DataTypeString>();
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const DecimalField<Decimal32> & x) const
+{
+ using Type = DataTypeDecimal<Decimal32>;
+ return std::make_shared<Type>(Type::maxPrecision(), x.getScale());
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const DecimalField<Decimal64> & x) const
+{
+ using Type = DataTypeDecimal<Decimal64>;
+ return std::make_shared<Type>(Type::maxPrecision(), x.getScale());
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const DecimalField<Decimal128> & x) const
+{
+ using Type = DataTypeDecimal<Decimal128>;
+ return std::make_shared<Type>(Type::maxPrecision(), x.getScale());
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const DecimalField<Decimal256> & x) const
+{
+ using Type = DataTypeDecimal<Decimal256>;
+ return std::make_shared<Type>(Type::maxPrecision(), x.getScale());
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const Array & x) const
+{
+ DataTypes element_types;
+ element_types.reserve(x.size());
+
+ for (const Field & elem : x)
+ element_types.emplace_back(applyVisitor(*this, elem));
+
+ return std::make_shared<DataTypeArray>(getLeastSupertype<on_error>(element_types));
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const Tuple & tuple) const
+{
+ if (tuple.empty())
+ throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Cannot infer type of an empty tuple");
+
+ DataTypes element_types;
+ element_types.reserve(tuple.size());
+
+ for (const auto & element : tuple)
+ element_types.push_back(applyVisitor(*this, element));
+
+ return std::make_shared<DataTypeTuple>(element_types);
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const Map & map) const
+{
+ DataTypes key_types;
+ DataTypes value_types;
+ key_types.reserve(map.size());
+ value_types.reserve(map.size());
+
+ for (const auto & elem : map)
+ {
+ const auto & tuple = elem.safeGet<const Tuple &>();
+ assert(tuple.size() == 2);
+ key_types.push_back(applyVisitor(*this, tuple[0]));
+ value_types.push_back(applyVisitor(*this, tuple[1]));
+ }
+
+ return std::make_shared<DataTypeMap>(
+ getLeastSupertype<on_error>(key_types),
+ getLeastSupertype<on_error>(value_types));
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const Object &) const
+{
+ /// TODO: Do we need different parameters for type Object?
+ return std::make_shared<DataTypeObject>("json", false);
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const AggregateFunctionStateData & x) const
+{
+ const auto & name = static_cast<const AggregateFunctionStateData &>(x).name;
+ return DataTypeFactory::instance().get(name);
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator() (const CustomType &) const
+{
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented");
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr FieldToDataType<on_error>::operator()(const bool &) const
+{
+ return DataTypeFactory::instance().get("Bool");
+}
+
+template class FieldToDataType<LeastSupertypeOnError::Throw>;
+template class FieldToDataType<LeastSupertypeOnError::String>;
+template class FieldToDataType<LeastSupertypeOnError::Null>;
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/FieldToDataType.h b/contrib/clickhouse/src/DataTypes/FieldToDataType.h
new file mode 100644
index 00000000000..8febadc1a0d
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/FieldToDataType.h
@@ -0,0 +1,50 @@
+#pragma once
+
+#include <memory>
+#include <Core/Types.h>
+#include <Core/Field.h>
+#include <Common/FieldVisitors.h>
+#include <DataTypes/getLeastSupertype.h>
+
+
+namespace DB
+{
+
+class IDataType;
+using DataTypePtr = std::shared_ptr<const IDataType>;
+
+
+/** For a given Field returns the minimum data type that allows this value to be stored.
+ * Note that you still have to convert Field to corresponding data type before inserting to columns
+ * (for example, this is necessary to convert elements of Array to common type).
+ */
+template <LeastSupertypeOnError on_error = LeastSupertypeOnError::Throw>
+class FieldToDataType : public StaticVisitor<DataTypePtr>
+{
+public:
+ DataTypePtr operator() (const Null & x) const;
+ DataTypePtr operator() (const UInt64 & x) const;
+ DataTypePtr operator() (const UInt128 & x) const;
+ DataTypePtr operator() (const Int64 & x) const;
+ DataTypePtr operator() (const Int128 & x) const;
+ DataTypePtr operator() (const UUID & x) const;
+ DataTypePtr operator() (const IPv4 & x) const;
+ DataTypePtr operator() (const IPv6 & x) const;
+ DataTypePtr operator() (const Float64 & x) const;
+ DataTypePtr operator() (const String & x) const;
+ DataTypePtr operator() (const Array & x) const;
+ DataTypePtr operator() (const Tuple & tuple) const;
+ DataTypePtr operator() (const Map & map) const;
+ DataTypePtr operator() (const Object & map) const;
+ DataTypePtr operator() (const DecimalField<Decimal32> & x) const;
+ DataTypePtr operator() (const DecimalField<Decimal64> & x) const;
+ DataTypePtr operator() (const DecimalField<Decimal128> & x) const;
+ DataTypePtr operator() (const DecimalField<Decimal256> & x) const;
+ DataTypePtr operator() (const AggregateFunctionStateData & x) const;
+ DataTypePtr operator() (const CustomType & x) const;
+ DataTypePtr operator() (const UInt256 & x) const;
+ DataTypePtr operator() (const Int256 & x) const;
+ DataTypePtr operator() (const bool & x) const;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/IDataType.cpp b/contrib/clickhouse/src/DataTypes/IDataType.cpp
new file mode 100644
index 00000000000..4ffe82039b2
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/IDataType.cpp
@@ -0,0 +1,254 @@
+#include <cstddef>
+#include <Columns/IColumn.h>
+#include <Columns/ColumnConst.h>
+#include <Columns/ColumnSparse.h>
+
+#include <Common/Exception.h>
+#include <Common/SipHash.h>
+
+#include <IO/WriteHelpers.h>
+#include <IO/Operators.h>
+
+#include <DataTypes/IDataType.h>
+#include <DataTypes/DataTypeCustom.h>
+#include <DataTypes/NestedUtils.h>
+#include <DataTypes/Serializations/SerializationSparse.h>
+#include <DataTypes/Serializations/SerializationInfo.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+ extern const int DATA_TYPE_CANNOT_BE_PROMOTED;
+ extern const int ILLEGAL_COLUMN;
+}
+
+IDataType::~IDataType() = default;
+
+void IDataType::updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint)
+{
+ /// Update the average value size hint if amount of read rows isn't too small
+ size_t column_size = column.size();
+ if (column_size > 10)
+ {
+ double current_avg_value_size = static_cast<double>(column.byteSize()) / column_size;
+
+ /// Heuristic is chosen so that avg_value_size_hint increases rapidly but decreases slowly.
+ if (current_avg_value_size > avg_value_size_hint)
+ avg_value_size_hint = std::min(1024., current_avg_value_size); /// avoid overestimation
+ else if (current_avg_value_size * 2 < avg_value_size_hint)
+ avg_value_size_hint = (current_avg_value_size + avg_value_size_hint * 3) / 4;
+ }
+}
+
+MutableColumnPtr IDataType::createColumn(const ISerialization & serialization) const
+{
+ auto column = createColumn();
+ if (serialization.getKind() == ISerialization::Kind::SPARSE)
+ return ColumnSparse::create(std::move(column));
+
+ return column;
+}
+
+ColumnPtr IDataType::createColumnConst(size_t size, const Field & field) const
+{
+ auto column = createColumn();
+ column->insert(field);
+ return ColumnConst::create(std::move(column), size);
+}
+
+
+ColumnPtr IDataType::createColumnConstWithDefaultValue(size_t size) const
+{
+ return createColumnConst(size, getDefault());
+}
+
+DataTypePtr IDataType::promoteNumericType() const
+{
+ throw Exception(ErrorCodes::DATA_TYPE_CANNOT_BE_PROMOTED, "Data type {} can't be promoted.", getName());
+}
+
+size_t IDataType::getSizeOfValueInMemory() const
+{
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Value of type {} in memory is not of fixed size.", getName());
+}
+
+void IDataType::forEachSubcolumn(
+ const SubcolumnCallback & callback,
+ const SubstreamData & data)
+{
+ ISerialization::StreamCallback callback_with_data = [&](const auto & subpath)
+ {
+ for (size_t i = 0; i < subpath.size(); ++i)
+ {
+ size_t prefix_len = i + 1;
+ if (!subpath[i].visited && ISerialization::hasSubcolumnForPath(subpath, prefix_len))
+ {
+ auto name = ISerialization::getSubcolumnNameForStream(subpath, prefix_len);
+ auto subdata = ISerialization::createFromPath(subpath, prefix_len);
+ callback(subpath, name, subdata);
+ }
+ subpath[i].visited = true;
+ }
+ };
+
+ ISerialization::EnumerateStreamsSettings settings;
+ settings.position_independent_encoding = false;
+ data.serialization->enumerateStreams(settings, callback_with_data, data);
+}
+
+template <typename Ptr>
+Ptr IDataType::getForSubcolumn(
+ std::string_view subcolumn_name,
+ const SubstreamData & data,
+ Ptr SubstreamData::*member,
+ bool throw_if_null) const
+{
+ Ptr res;
+ forEachSubcolumn([&](const auto &, const auto & name, const auto & subdata)
+ {
+ if (name == subcolumn_name)
+ res = subdata.*member;
+ }, data);
+
+ if (!res && throw_if_null)
+ throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
+
+ return res;
+}
+
+bool IDataType::hasSubcolumn(std::string_view subcolumn_name) const
+{
+ return tryGetSubcolumnType(subcolumn_name) != nullptr;
+}
+
+DataTypePtr IDataType::tryGetSubcolumnType(std::string_view subcolumn_name) const
+{
+ auto data = SubstreamData(getDefaultSerialization()).withType(getPtr());
+ return getForSubcolumn<DataTypePtr>(subcolumn_name, data, &SubstreamData::type, false);
+}
+
+DataTypePtr IDataType::getSubcolumnType(std::string_view subcolumn_name) const
+{
+ auto data = SubstreamData(getDefaultSerialization()).withType(getPtr());
+ return getForSubcolumn<DataTypePtr>(subcolumn_name, data, &SubstreamData::type, true);
+}
+
+ColumnPtr IDataType::tryGetSubcolumn(std::string_view subcolumn_name, const ColumnPtr & column) const
+{
+ auto data = SubstreamData(getDefaultSerialization()).withColumn(column);
+ return getForSubcolumn<ColumnPtr>(subcolumn_name, data, &SubstreamData::column, false);
+}
+
+ColumnPtr IDataType::getSubcolumn(std::string_view subcolumn_name, const ColumnPtr & column) const
+{
+ auto data = SubstreamData(getDefaultSerialization()).withColumn(column);
+ return getForSubcolumn<ColumnPtr>(subcolumn_name, data, &SubstreamData::column, true);
+}
+
+SerializationPtr IDataType::getSubcolumnSerialization(std::string_view subcolumn_name, const SerializationPtr & serialization) const
+{
+ auto data = SubstreamData(serialization);
+ return getForSubcolumn<SerializationPtr>(subcolumn_name, data, &SubstreamData::serialization, true);
+}
+
+Names IDataType::getSubcolumnNames() const
+{
+ Names res;
+ forEachSubcolumn([&](const auto &, const auto & name, const auto &)
+ {
+ res.push_back(name);
+ }, SubstreamData(getDefaultSerialization()));
+ return res;
+}
+
+void IDataType::insertDefaultInto(IColumn & column) const
+{
+ column.insertDefault();
+}
+
+void IDataType::insertManyDefaultsInto(IColumn & column, size_t n) const
+{
+ for (size_t i = 0; i < n; ++i)
+ insertDefaultInto(column);
+}
+
+void IDataType::setCustomization(DataTypeCustomDescPtr custom_desc_) const
+{
+ /// replace only if not null
+ if (custom_desc_->name)
+ custom_name = std::move(custom_desc_->name);
+
+ if (custom_desc_->serialization)
+ custom_serialization = std::move(custom_desc_->serialization);
+}
+
+MutableSerializationInfoPtr IDataType::createSerializationInfo(const SerializationInfo::Settings & settings) const
+{
+ return std::make_shared<SerializationInfo>(ISerialization::Kind::DEFAULT, settings);
+}
+
+SerializationInfoPtr IDataType::getSerializationInfo(const IColumn & column) const
+{
+ if (const auto * column_const = checkAndGetColumn<ColumnConst>(&column))
+ return getSerializationInfo(column_const->getDataColumn());
+
+ return std::make_shared<SerializationInfo>(ISerialization::getKind(column), SerializationInfo::Settings{});
+}
+
+SerializationPtr IDataType::getDefaultSerialization() const
+{
+ if (custom_serialization)
+ return custom_serialization;
+
+ return doGetDefaultSerialization();
+}
+
+SerializationPtr IDataType::getSparseSerialization() const
+{
+ return std::make_shared<SerializationSparse>(getDefaultSerialization());
+}
+
+SerializationPtr IDataType::getSerialization(ISerialization::Kind kind) const
+{
+ if (supportsSparseSerialization() && kind == ISerialization::Kind::SPARSE)
+ return getSparseSerialization();
+
+ return getDefaultSerialization();
+}
+
+SerializationPtr IDataType::getSerialization(const SerializationInfo & info) const
+{
+ return getSerialization(info.getKind());
+}
+
+// static
+SerializationPtr IDataType::getSerialization(const NameAndTypePair & column, const SerializationInfo & info)
+{
+ if (column.isSubcolumn())
+ {
+ const auto & type_in_storage = column.getTypeInStorage();
+ auto serialization = type_in_storage->getSerialization(info);
+ return type_in_storage->getSubcolumnSerialization(column.getSubcolumnName(), serialization);
+ }
+
+ return column.type->getSerialization(info);
+}
+
+// static
+SerializationPtr IDataType::getSerialization(const NameAndTypePair & column)
+{
+ if (column.isSubcolumn())
+ {
+ const auto & type_in_storage = column.getTypeInStorage();
+ auto serialization = type_in_storage->getDefaultSerialization();
+ return type_in_storage->getSubcolumnSerialization(column.getSubcolumnName(), serialization);
+ }
+
+ return column.type->getDefaultSerialization();
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/IDataType.h b/contrib/clickhouse/src/DataTypes/IDataType.h
new file mode 100644
index 00000000000..54cb3d0d5c2
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/IDataType.h
@@ -0,0 +1,649 @@
+#pragma once
+
+#include <memory>
+#include <boost/noncopyable.hpp>
+#include <Core/Names.h>
+#include <Core/TypeId.h>
+#include <Common/COW.h>
+#include <DataTypes/DataTypeCustom.h>
+#include <DataTypes/Serializations/ISerialization.h>
+#include <DataTypes/Serializations/SerializationInfo.h>
+
+
+namespace DB
+{
+
+class ReadBuffer;
+class WriteBuffer;
+
+class IDataType;
+struct FormatSettings;
+
+class IColumn;
+using ColumnPtr = COW<IColumn>::Ptr;
+using MutableColumnPtr = COW<IColumn>::MutablePtr;
+
+class Field;
+
+using DataTypePtr = std::shared_ptr<const IDataType>;
+using DataTypes = std::vector<DataTypePtr>;
+
+struct NameAndTypePair;
+
+struct DataTypeWithConstInfo
+{
+ DataTypePtr type;
+ bool is_const;
+};
+
+using DataTypesWithConstInfo = std::vector<DataTypeWithConstInfo>;
+
+/** Properties of data type.
+ *
+ * Contains methods for getting serialization instances.
+ * One data type may have different serializations, which can be chosen
+ * dynamically before reading or writing, according to information about
+ * column content (see `getSerialization` methods).
+ *
+ * Implementations of this interface represent a data type (example: UInt8)
+ * or parametric family of data types (example: Array(...)).
+ *
+ * DataType is totally immutable object. You can always share them.
+ */
+class IDataType : private boost::noncopyable, public std::enable_shared_from_this<IDataType>
+{
+public:
+ IDataType() = default;
+ virtual ~IDataType();
+
+ /// Compile time flag. If false, then if C++ types are the same, then SQL types are also the same.
+ /// Example: DataTypeString is not parametric: thus all instances of DataTypeString are the same SQL type.
+ /// Example: DataTypeFixedString is parametric: different instances of DataTypeFixedString may be different SQL types.
+ /// Place it in descendants:
+ /// static constexpr bool is_parametric = false;
+
+ /// Name of data type (examples: UInt64, Array(String)).
+ String getName() const
+ {
+ if (custom_name)
+ return custom_name->getName();
+ else
+ return doGetName();
+ }
+
+ DataTypePtr getPtr() const { return shared_from_this(); }
+
+ /// Name of data type family (example: FixedString, Array).
+ virtual const char * getFamilyName() const = 0;
+ /// Name of corresponding data type in MySQL (exampe: Bigint, Blob, etc)
+ virtual String getSQLCompatibleName() const = 0;
+
+ /// Data type id. It's used for runtime type checks.
+ virtual TypeIndex getTypeId() const = 0;
+
+ bool hasSubcolumn(std::string_view subcolumn_name) const;
+
+ DataTypePtr tryGetSubcolumnType(std::string_view subcolumn_name) const;
+ DataTypePtr getSubcolumnType(std::string_view subcolumn_name) const;
+
+ ColumnPtr tryGetSubcolumn(std::string_view subcolumn_name, const ColumnPtr & column) const;
+ ColumnPtr getSubcolumn(std::string_view subcolumn_name, const ColumnPtr & column) const;
+
+ SerializationPtr getSubcolumnSerialization(std::string_view subcolumn_name, const SerializationPtr & serialization) const;
+
+ using SubstreamData = ISerialization::SubstreamData;
+ using SubstreamPath = ISerialization::SubstreamPath;
+
+ using SubcolumnCallback = std::function<void(
+ const SubstreamPath &,
+ const String &,
+ const SubstreamData &)>;
+
+ static void forEachSubcolumn(
+ const SubcolumnCallback & callback,
+ const SubstreamData & data);
+
+ Names getSubcolumnNames() const;
+
+ virtual MutableSerializationInfoPtr createSerializationInfo(const SerializationInfo::Settings & settings) const;
+ virtual SerializationInfoPtr getSerializationInfo(const IColumn & column) const;
+
+ /// TODO: support more types.
+ virtual bool supportsSparseSerialization() const { return !haveSubtypes(); }
+ virtual bool canBeInsideSparseColumns() const { return supportsSparseSerialization(); }
+
+ SerializationPtr getDefaultSerialization() const;
+ SerializationPtr getSparseSerialization() const;
+
+ /// Chooses serialization according to serialization kind.
+ SerializationPtr getSerialization(ISerialization::Kind kind) const;
+
+ /// Chooses serialization according to collected information about content of column.
+ virtual SerializationPtr getSerialization(const SerializationInfo & info) const;
+
+ /// Chooses between subcolumn serialization and regular serialization according to @column.
+ /// This method typically should be used to get serialization for reading column or subcolumn.
+ static SerializationPtr getSerialization(const NameAndTypePair & column, const SerializationInfo & info);
+
+ static SerializationPtr getSerialization(const NameAndTypePair & column);
+
+protected:
+ virtual String doGetName() const { return getFamilyName(); }
+ virtual SerializationPtr doGetDefaultSerialization() const = 0;
+
+public:
+ /** Create empty column for corresponding type and default serialization.
+ */
+ virtual MutableColumnPtr createColumn() const = 0;
+
+ /** Create empty column for corresponding type and serialization.
+ */
+ virtual MutableColumnPtr createColumn(const ISerialization & serialization) const;
+
+ /** Create ColumnConst for corresponding type, with specified size and value.
+ */
+ ColumnPtr createColumnConst(size_t size, const Field & field) const;
+ ColumnPtr createColumnConstWithDefaultValue(size_t size) const;
+
+ /** Get default value of data type.
+ * It is the "default" default, regardless the fact that a table could contain different user-specified default.
+ */
+ virtual Field getDefault() const = 0;
+
+ /** The data type can be promoted in order to try to avoid overflows.
+ * Data types which can be promoted are typically Number or Decimal data types.
+ */
+ virtual bool canBePromoted() const { return false; }
+
+ /** Return the promoted numeric data type of the current data type. Throw an exception if `canBePromoted() == false`.
+ */
+ virtual DataTypePtr promoteNumericType() const;
+
+ /** Directly insert default value into a column. Default implementation use method IColumn::insertDefault.
+ * This should be overridden if data type default value differs from column default value (example: Enum data types).
+ */
+ virtual void insertDefaultInto(IColumn & column) const;
+
+ void insertManyDefaultsInto(IColumn & column, size_t n) const;
+
+ /// Checks that two instances belong to the same type
+ virtual bool equals(const IDataType & rhs) const = 0;
+
+ /// Various properties on behaviour of data type.
+
+ /** The data type is dependent on parameters and types with different parameters are different.
+ * Examples: FixedString(N), Tuple(T1, T2), Nullable(T).
+ * Otherwise all instances of the same class are the same types.
+ */
+ virtual bool isParametric() const = 0;
+
+ /** The data type is dependent on parameters and at least one of them is another type.
+ * Examples: Tuple(T1, T2), Nullable(T). But FixedString(N) is not.
+ */
+ virtual bool haveSubtypes() const = 0;
+
+ /** Can appear in table definition.
+ * Counterexamples: Interval, Nothing.
+ */
+ virtual bool cannotBeStoredInTables() const { return false; }
+
+ /** In text formats that render "pretty" tables,
+ * is it better to align value right in table cell.
+ * Examples: numbers, even nullable.
+ */
+ virtual bool shouldAlignRightInPrettyFormats() const { return false; }
+
+ /** Does formatted value in any text format can contain anything but valid UTF8 sequences.
+ * Example: String (because it can contain arbitrary bytes).
+ * Counterexamples: numbers, Date, DateTime.
+ * For Enum, it depends.
+ */
+ virtual bool textCanContainOnlyValidUTF8() const { return false; }
+
+ /** Is it possible to compare for less/greater, to calculate min/max?
+ * Not necessarily totally comparable. For example, floats are comparable despite the fact that NaNs compares to nothing.
+ * The same for nullable of comparable types: they are comparable (but not totally-comparable).
+ */
+ virtual bool isComparable() const { return false; }
+
+ /** Does it make sense to use this type with COLLATE modifier in ORDER BY.
+ * Example: String, but not FixedString.
+ */
+ virtual bool canBeComparedWithCollation() const { return false; }
+
+ /** If the type is totally comparable (Ints, Date, DateTime, DateTime64, not nullable, not floats)
+ * and "simple" enough (not String, FixedString) to be used as version number
+ * (to select rows with maximum version).
+ */
+ virtual bool canBeUsedAsVersion() const { return false; }
+
+ /** Values of data type can be summed (possibly with overflow, within the same data type).
+ * Example: numbers, even nullable. Not Date/DateTime. Not Enum.
+ * Enums can be passed to aggregate function 'sum', but the result is Int64, not Enum, so they are not summable.
+ */
+ virtual bool isSummable() const { return false; }
+
+ /** Can be used in operations like bit and, bit shift, bit not, etc.
+ */
+ virtual bool canBeUsedInBitOperations() const { return false; }
+
+ /** Can be used in boolean context (WHERE, HAVING).
+ * UInt8, maybe nullable.
+ */
+ virtual bool canBeUsedInBooleanContext() const { return false; }
+
+ /** Numbers, Enums, Date, DateTime. Not nullable.
+ */
+ virtual bool isValueRepresentedByNumber() const { return false; }
+
+ /** Integers, Enums, Date, DateTime. Not nullable.
+ */
+ virtual bool isValueRepresentedByInteger() const { return false; }
+
+ /** Unsigned Integers, Date, DateTime. Not nullable.
+ */
+ virtual bool isValueRepresentedByUnsignedInteger() const { return false; }
+
+ /** Values are unambiguously identified by contents of contiguous memory region,
+ * that can be obtained by IColumn::getDataAt method.
+ * Examples: numbers, Date, DateTime, String, FixedString,
+ * and Arrays of numbers, Date, DateTime, FixedString, Enum, but not String.
+ * (because Array(String) values became ambiguous if you concatenate Strings).
+ * Counterexamples: Nullable, Tuple.
+ */
+ virtual bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const { return false; }
+
+ virtual bool isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion() const
+ {
+ return isValueRepresentedByNumber();
+ }
+
+ /** Example: numbers, Date, DateTime, FixedString, Enum... Nullable and Tuple of such types.
+ * Counterexamples: String, Array.
+ * It's Ok to return false for AggregateFunction despite the fact that some of them have fixed size state.
+ */
+ virtual bool haveMaximumSizeOfValue() const { return false; }
+
+ /** Size in amount of bytes in memory. Throws an exception if not haveMaximumSizeOfValue.
+ */
+ virtual size_t getMaximumSizeOfValueInMemory() const { return getSizeOfValueInMemory(); }
+
+ /** Throws an exception if value is not of fixed size.
+ */
+ virtual size_t getSizeOfValueInMemory() const;
+
+ /** Integers (not floats), Enum, String, FixedString.
+ */
+ virtual bool isCategorial() const { return false; }
+
+ virtual bool isNullable() const { return false; }
+
+ /** Is this type can represent only NULL value? (It also implies isNullable)
+ */
+ virtual bool onlyNull() const { return false; }
+
+ /** If this data type cannot be wrapped in Nullable data type.
+ */
+ virtual bool canBeInsideNullable() const { return false; }
+
+ virtual bool lowCardinality() const { return false; }
+
+ /// Checks if this type is LowCardinality(Nullable(...))
+ virtual bool isLowCardinalityNullable() const { return false; }
+
+ /// Strings, Numbers, Date, DateTime, Nullable
+ virtual bool canBeInsideLowCardinality() const { return false; }
+
+ /// Object, Array(Object), Tuple(..., Object, ...)
+ virtual bool hasDynamicSubcolumns() const { return false; }
+
+ /// Updates avg_value_size_hint for newly read column. Uses to optimize deserialization. Zero expected for first column.
+ static void updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint);
+
+protected:
+ friend class DataTypeFactory;
+ friend class AggregateFunctionSimpleState;
+
+ /// Customize this DataType
+ void setCustomization(DataTypeCustomDescPtr custom_desc_) const;
+
+ /// This is mutable to allow setting custom name and serialization on `const IDataType` post construction.
+ mutable DataTypeCustomNamePtr custom_name;
+ mutable SerializationPtr custom_serialization;
+
+public:
+ const IDataTypeCustomName * getCustomName() const { return custom_name.get(); }
+ const ISerialization * getCustomSerialization() const { return custom_serialization.get(); }
+
+private:
+ template <typename Ptr>
+ Ptr getForSubcolumn(
+ std::string_view subcolumn_name,
+ const SubstreamData & data,
+ Ptr SubstreamData::*member,
+ bool throw_if_null) const;
+};
+
+
+/// Some sugar to check data type of IDataType
+struct WhichDataType
+{
+ TypeIndex idx;
+
+ constexpr WhichDataType(TypeIndex idx_ = TypeIndex::Nothing) : idx(idx_) {} /// NOLINT
+ constexpr WhichDataType(const IDataType & data_type) : idx(data_type.getTypeId()) {} /// NOLINT
+ constexpr WhichDataType(const IDataType * data_type) : idx(data_type->getTypeId()) {} /// NOLINT
+
+ // shared ptr -> is non-constexpr in gcc
+ WhichDataType(const DataTypePtr & data_type) : idx(data_type->getTypeId()) {} /// NOLINT
+
+ constexpr bool isUInt8() const { return idx == TypeIndex::UInt8; }
+ constexpr bool isUInt16() const { return idx == TypeIndex::UInt16; }
+ constexpr bool isUInt32() const { return idx == TypeIndex::UInt32; }
+ constexpr bool isUInt64() const { return idx == TypeIndex::UInt64; }
+ constexpr bool isUInt128() const { return idx == TypeIndex::UInt128; }
+ constexpr bool isUInt256() const { return idx == TypeIndex::UInt256; }
+ constexpr bool isNativeUInt() const { return isUInt8() || isUInt16() || isUInt32() || isUInt64(); }
+ constexpr bool isUInt() const { return isNativeUInt() || isUInt128() || isUInt256(); }
+
+ constexpr bool isInt8() const { return idx == TypeIndex::Int8; }
+ constexpr bool isInt16() const { return idx == TypeIndex::Int16; }
+ constexpr bool isInt32() const { return idx == TypeIndex::Int32; }
+ constexpr bool isInt64() const { return idx == TypeIndex::Int64; }
+ constexpr bool isInt128() const { return idx == TypeIndex::Int128; }
+ constexpr bool isInt256() const { return idx == TypeIndex::Int256; }
+ constexpr bool isNativeInt() const { return isInt8() || isInt16() || isInt32() || isInt64(); }
+ constexpr bool isInt() const { return isNativeInt() || isInt128() || isInt256(); }
+
+ constexpr bool isDecimal32() const { return idx == TypeIndex::Decimal32; }
+ constexpr bool isDecimal64() const { return idx == TypeIndex::Decimal64; }
+ constexpr bool isDecimal128() const { return idx == TypeIndex::Decimal128; }
+ constexpr bool isDecimal256() const { return idx == TypeIndex::Decimal256; }
+ constexpr bool isDecimal() const { return isDecimal32() || isDecimal64() || isDecimal128() || isDecimal256(); }
+
+ constexpr bool isFloat32() const { return idx == TypeIndex::Float32; }
+ constexpr bool isFloat64() const { return idx == TypeIndex::Float64; }
+ constexpr bool isFloat() const { return isFloat32() || isFloat64(); }
+
+ constexpr bool isEnum8() const { return idx == TypeIndex::Enum8; }
+ constexpr bool isEnum16() const { return idx == TypeIndex::Enum16; }
+ constexpr bool isEnum() const { return isEnum8() || isEnum16(); }
+
+ constexpr bool isDate() const { return idx == TypeIndex::Date; }
+ constexpr bool isDate32() const { return idx == TypeIndex::Date32; }
+ constexpr bool isDateTime() const { return idx == TypeIndex::DateTime; }
+ constexpr bool isDateTime64() const { return idx == TypeIndex::DateTime64; }
+ constexpr bool isDateOrDate32() const { return isDate() || isDate32(); }
+
+ constexpr bool isString() const { return idx == TypeIndex::String; }
+ constexpr bool isFixedString() const { return idx == TypeIndex::FixedString; }
+ constexpr bool isStringOrFixedString() const { return isString() || isFixedString(); }
+
+ constexpr bool isUUID() const { return idx == TypeIndex::UUID; }
+ constexpr bool isIPv4() const { return idx == TypeIndex::IPv4; }
+ constexpr bool isIPv6() const { return idx == TypeIndex::IPv6; }
+ constexpr bool isArray() const { return idx == TypeIndex::Array; }
+ constexpr bool isTuple() const { return idx == TypeIndex::Tuple; }
+ constexpr bool isMap() const {return idx == TypeIndex::Map; }
+ constexpr bool isSet() const { return idx == TypeIndex::Set; }
+ constexpr bool isInterval() const { return idx == TypeIndex::Interval; }
+ constexpr bool isObject() const { return idx == TypeIndex::Object; }
+
+ constexpr bool isNothing() const { return idx == TypeIndex::Nothing; }
+ constexpr bool isNullable() const { return idx == TypeIndex::Nullable; }
+ constexpr bool isFunction() const { return idx == TypeIndex::Function; }
+ constexpr bool isAggregateFunction() const { return idx == TypeIndex::AggregateFunction; }
+ constexpr bool isSimple() const { return isInt() || isUInt() || isFloat() || isString(); }
+
+ constexpr bool isLowCardinality() const { return idx == TypeIndex::LowCardinality; }
+};
+
+/// IDataType helpers (alternative for IDataType virtual methods with single point of truth)
+
+template <typename T>
+inline bool isDate(const T & data_type) { return WhichDataType(data_type).isDate(); }
+template <typename T>
+inline bool isDate32(const T & data_type) { return WhichDataType(data_type).isDate32(); }
+template <typename T>
+inline bool isDateOrDate32(const T & data_type) { return WhichDataType(data_type).isDateOrDate32(); }
+template <typename T>
+inline bool isDateTime(const T & data_type) { return WhichDataType(data_type).isDateTime(); }
+template <typename T>
+inline bool isDateTime64(const T & data_type) { return WhichDataType(data_type).isDateTime64(); }
+
+template <typename T>
+inline bool isEnum(const T & data_type) { return WhichDataType(data_type).isEnum(); }
+template <typename T>
+inline bool isDecimal(const T & data_type) { return WhichDataType(data_type).isDecimal(); }
+template <typename T>
+inline bool isTuple(const T & data_type) { return WhichDataType(data_type).isTuple(); }
+template <typename T>
+inline bool isArray(const T & data_type) { return WhichDataType(data_type).isArray(); }
+template <typename T>
+inline bool isMap(const T & data_type) {return WhichDataType(data_type).isMap(); }
+template <typename T>
+inline bool isInterval(const T & data_type) {return WhichDataType(data_type).isInterval(); }
+template <typename T>
+inline bool isNothing(const T & data_type) { return WhichDataType(data_type).isNothing(); }
+template <typename T>
+inline bool isUUID(const T & data_type) { return WhichDataType(data_type).isUUID(); }
+template <typename T>
+inline bool isIPv4(const T & data_type) { return WhichDataType(data_type).isIPv4(); }
+template <typename T>
+inline bool isIPv6(const T & data_type) { return WhichDataType(data_type).isIPv6(); }
+
+template <typename T>
+inline bool isObject(const T & data_type) { return WhichDataType(data_type).isObject();
+}
+
+template <typename T>
+inline bool isUInt8(const T & data_type)
+{
+ return WhichDataType(data_type).isUInt8();
+}
+
+template <typename T>
+inline bool isUInt64(const T & data_type)
+{
+ return WhichDataType(data_type).isUInt64();
+}
+
+template <typename T>
+inline bool isUnsignedInteger(const T & data_type)
+{
+ return WhichDataType(data_type).isUInt();
+}
+
+template <typename T>
+inline bool isInteger(const T & data_type)
+{
+ WhichDataType which(data_type);
+ return which.isInt() || which.isUInt();
+}
+
+template <typename T>
+inline bool isFloat(const T & data_type)
+{
+ WhichDataType which(data_type);
+ return which.isFloat();
+}
+
+template <typename T>
+inline bool isNativeInteger(const T & data_type)
+{
+ WhichDataType which(data_type);
+ return which.isNativeInt() || which.isNativeUInt();
+}
+
+
+template <typename T>
+inline bool isNativeNumber(const T & data_type)
+{
+ WhichDataType which(data_type);
+ return which.isNativeInt() || which.isNativeUInt() || which.isFloat();
+}
+
+template <typename T>
+inline bool isNumber(const T & data_type)
+{
+ WhichDataType which(data_type);
+ return which.isInt() || which.isUInt() || which.isFloat() || which.isDecimal();
+}
+
+template <typename T>
+inline bool isColumnedAsNumber(const T & data_type)
+{
+ WhichDataType which(data_type);
+ return which.isInt() || which.isUInt() || which.isFloat() || which.isDateOrDate32() || which.isDateTime() || which.isDateTime64() || which.isUUID() || which.isIPv4() || which.isIPv6();
+}
+
+template <typename T>
+inline bool isColumnedAsDecimal(const T & data_type)
+{
+ WhichDataType which(data_type);
+ return which.isDecimal() || which.isDateTime64();
+}
+
+// Same as isColumnedAsDecimal but also checks value type of underlyig column.
+template <typename T, typename DataType>
+inline bool isColumnedAsDecimalT(const DataType & data_type)
+{
+ const WhichDataType which(data_type);
+ return (which.isDecimal() || which.isDateTime64()) && which.idx == TypeToTypeIndex<T>;
+}
+
+template <typename T>
+inline bool isString(const T & data_type)
+{
+ return WhichDataType(data_type).isString();
+}
+
+template <typename T>
+inline bool isFixedString(const T & data_type)
+{
+ return WhichDataType(data_type).isFixedString();
+}
+
+template <typename T>
+inline bool isStringOrFixedString(const T & data_type)
+{
+ return WhichDataType(data_type).isStringOrFixedString();
+}
+
+template <typename T>
+inline bool isNotCreatable(const T & data_type)
+{
+ WhichDataType which(data_type);
+ return which.isNothing() || which.isFunction() || which.isSet();
+}
+
+inline bool isNotDecimalButComparableToDecimal(const DataTypePtr & data_type)
+{
+ WhichDataType which(data_type);
+ return which.isInt() || which.isUInt() || which.isFloat();
+}
+
+inline bool isBool(const DataTypePtr & data_type)
+{
+ return data_type->getName() == "Bool";
+}
+
+inline bool isAggregateFunction(const DataTypePtr & data_type)
+{
+ WhichDataType which(data_type);
+ return which.isAggregateFunction();
+}
+
+inline bool isNullableOrLowCardinalityNullable(const DataTypePtr & data_type)
+{
+ return data_type->isNullable() || data_type->isLowCardinalityNullable();
+}
+
+template <typename DataType> constexpr bool IsDataTypeDecimal = false;
+template <typename DataType> constexpr bool IsDataTypeNumber = false;
+template <typename DataType> constexpr bool IsDataTypeDateOrDateTime = false;
+template <typename DataType> constexpr bool IsDataTypeDate = false;
+template <typename DataType> constexpr bool IsDataTypeEnum = false;
+
+template <typename DataType> constexpr bool IsDataTypeDecimalOrNumber = IsDataTypeDecimal<DataType> || IsDataTypeNumber<DataType>;
+
+template <is_decimal T>
+class DataTypeDecimal;
+
+template <typename T>
+class DataTypeNumber;
+
+class DataTypeDate;
+class DataTypeDate32;
+class DataTypeDateTime;
+class DataTypeDateTime64;
+
+template <is_decimal T> constexpr bool IsDataTypeDecimal<DataTypeDecimal<T>> = true;
+template <> inline constexpr bool IsDataTypeDecimal<DataTypeDateTime64> = true;
+
+template <typename T> constexpr bool IsDataTypeNumber<DataTypeNumber<T>> = true;
+
+template <> inline constexpr bool IsDataTypeDate<DataTypeDate> = true;
+template <> inline constexpr bool IsDataTypeDate<DataTypeDate32> = true;
+
+template <> inline constexpr bool IsDataTypeDateOrDateTime<DataTypeDate> = true;
+template <> inline constexpr bool IsDataTypeDateOrDateTime<DataTypeDate32> = true;
+template <> inline constexpr bool IsDataTypeDateOrDateTime<DataTypeDateTime> = true;
+template <> inline constexpr bool IsDataTypeDateOrDateTime<DataTypeDateTime64> = true;
+
+template <typename T>
+class DataTypeEnum;
+
+template <typename T> inline constexpr bool IsDataTypeEnum<DataTypeEnum<T>> = true;
+
+#define FOR_BASIC_NUMERIC_TYPES(M) \
+ M(UInt8) \
+ M(UInt16) \
+ M(UInt32) \
+ M(UInt64) \
+ M(Int8) \
+ M(Int16) \
+ M(Int32) \
+ M(Int64) \
+ M(Float32) \
+ M(Float64)
+
+#define FOR_NUMERIC_TYPES(M) \
+ M(UInt8) \
+ M(UInt16) \
+ M(UInt32) \
+ M(UInt64) \
+ M(UInt128) \
+ M(UInt256) \
+ M(Int8) \
+ M(Int16) \
+ M(Int32) \
+ M(Int64) \
+ M(Int128) \
+ M(Int256) \
+ M(Float32) \
+ M(Float64)
+}
+
+/// See https://fmt.dev/latest/api.html#formatting-user-defined-types
+template <>
+struct fmt::formatter<DB::DataTypePtr>
+{
+ constexpr static auto parse(format_parse_context & ctx)
+ {
+ const auto * it = ctx.begin();
+ const auto * end = ctx.end();
+
+ /// Only support {}.
+ if (it != end && *it != '}')
+ throw fmt::format_error("invalid format");
+
+ return it;
+ }
+
+ template <typename FormatContext>
+ auto format(const DB::DataTypePtr & type, FormatContext & ctx)
+ {
+ return fmt::format_to(ctx.out(), "{}", type->getName());
+ }
+};
diff --git a/contrib/clickhouse/src/DataTypes/IDataTypeDummy.h b/contrib/clickhouse/src/DataTypes/IDataTypeDummy.h
new file mode 100644
index 00000000000..fcfcbe43375
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/IDataTypeDummy.h
@@ -0,0 +1,50 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+#include <Core/Field.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int NOT_IMPLEMENTED;
+}
+
+/** The base class for data types that do not support serialization and deserialization,
+ * but arise only as an intermediate result of the calculations.
+ *
+ * That is, this class is used just to distinguish the corresponding data type from the others.
+ */
+class IDataTypeDummy : public IDataType
+{
+private:
+ [[noreturn]] void throwNoSerialization() const
+ {
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Serialization is not implemented for data type {}", getName());
+ }
+
+public:
+ MutableColumnPtr createColumn() const override
+ {
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method createColumn() is not implemented for data type {}", getName());
+ }
+
+ Field getDefault() const override
+ {
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getDefault() is not implemented for data type {}", getName());
+ }
+
+ void insertDefaultInto(IColumn &) const override
+ {
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method insertDefaultInto() is not implemented for data type {}", getName());
+ }
+
+ bool haveSubtypes() const override { return false; }
+ bool cannotBeStoredInTables() const override { return true; }
+
+ SerializationPtr doGetDefaultSerialization() const override { throwNoSerialization(); }
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Native.cpp b/contrib/clickhouse/src/DataTypes/Native.cpp
new file mode 100644
index 00000000000..fd3716c2291
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Native.cpp
@@ -0,0 +1,200 @@
+#include <DataTypes/Native.h>
+
+#if USE_EMBEDDED_COMPILER
+# include <DataTypes/DataTypeNullable.h>
+# include <Columns/ColumnConst.h>
+# include <Columns/ColumnNullable.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int NOT_IMPLEMENTED;
+ extern const int LOGICAL_ERROR;
+}
+
+bool typeIsSigned(const IDataType & type)
+{
+ WhichDataType data_type(type);
+ return data_type.isNativeInt() || data_type.isFloat() || data_type.isEnum() || data_type.isDate32();
+}
+
+llvm::Type * toNullableType(llvm::IRBuilderBase & builder, llvm::Type * type)
+{
+ auto * is_null_type = builder.getInt1Ty();
+ return llvm::StructType::get(type, is_null_type);
+}
+
+bool canBeNativeType(const IDataType & type)
+{
+ WhichDataType data_type(type);
+
+ if (data_type.isNullable())
+ {
+ const auto & data_type_nullable = static_cast<const DataTypeNullable&>(type);
+ return canBeNativeType(*data_type_nullable.getNestedType());
+ }
+
+ return data_type.isNativeInt() || data_type.isNativeUInt() || data_type.isFloat() || data_type.isDate()
+ || data_type.isDate32() || data_type.isDateTime() || data_type.isEnum();
+}
+
+bool canBeNativeType(const DataTypePtr & type)
+{
+ return canBeNativeType(*type);
+}
+
+llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const IDataType & type)
+{
+ WhichDataType data_type(type);
+
+ if (data_type.isNullable())
+ {
+ const auto & data_type_nullable = static_cast<const DataTypeNullable&>(type);
+ auto * nested_type = toNativeType(builder, *data_type_nullable.getNestedType());
+ return toNullableType(builder, nested_type);
+ }
+
+ /// LLVM doesn't have unsigned types, it has unsigned instructions.
+ if (data_type.isInt8() || data_type.isUInt8())
+ return builder.getInt8Ty();
+ else if (data_type.isInt16() || data_type.isUInt16() || data_type.isDate())
+ return builder.getInt16Ty();
+ else if (data_type.isInt32() || data_type.isUInt32() || data_type.isDate32() || data_type.isDateTime())
+ return builder.getInt32Ty();
+ else if (data_type.isInt64() || data_type.isUInt64())
+ return builder.getInt64Ty();
+ else if (data_type.isFloat32())
+ return builder.getFloatTy();
+ else if (data_type.isFloat64())
+ return builder.getDoubleTy();
+ else if (data_type.isEnum8())
+ return builder.getInt8Ty();
+ else if (data_type.isEnum16())
+ return builder.getInt16Ty();
+
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid cast to native type");
+}
+
+llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const DataTypePtr & type)
+{
+ return toNativeType(builder, *type);
+}
+
+llvm::Value * nativeBoolCast(llvm::IRBuilderBase & b, const DataTypePtr & from_type, llvm::Value * value)
+{
+ if (from_type->isNullable())
+ {
+ auto * inner = nativeBoolCast(b, removeNullable(from_type), b.CreateExtractValue(value, {0}));
+ return b.CreateAnd(b.CreateNot(b.CreateExtractValue(value, {1})), inner);
+ }
+
+ auto * zero = llvm::Constant::getNullValue(value->getType());
+
+ if (value->getType()->isIntegerTy())
+ return b.CreateICmpNE(value, zero);
+ else if (value->getType()->isFloatingPointTy())
+ return b.CreateFCmpUNE(value, zero);
+
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot cast non-number {} to bool", from_type->getName());
+}
+
+llvm::Value * nativeBoolCast(llvm::IRBuilderBase & b, const ValueWithType & value_with_type)
+{
+ return nativeBoolCast(b, value_with_type.type, value_with_type.value);
+}
+
+llvm::Value * nativeCast(llvm::IRBuilderBase & b, const DataTypePtr & from_type, llvm::Value * value, const DataTypePtr & to_type)
+{
+ if (from_type->equals(*to_type))
+ {
+ return value;
+ }
+ else if (from_type->isNullable() && to_type->isNullable())
+ {
+ auto * inner = nativeCast(b, removeNullable(from_type), b.CreateExtractValue(value, {0}), to_type);
+ return b.CreateInsertValue(inner, b.CreateExtractValue(value, {1}), {1});
+ }
+ else if (from_type->isNullable())
+ {
+ return nativeCast(b, removeNullable(from_type), b.CreateExtractValue(value, {0}), to_type);
+ }
+ else if (to_type->isNullable())
+ {
+ auto * to_native_type = toNativeType(b, to_type);
+ auto * inner = nativeCast(b, from_type, value, removeNullable(to_type));
+ return b.CreateInsertValue(llvm::Constant::getNullValue(to_native_type), inner, {0});
+ }
+ else
+ {
+ auto * from_native_type = toNativeType(b, from_type);
+ auto * to_native_type = toNativeType(b, to_type);
+
+ if (from_native_type == to_native_type)
+ return value;
+ else if (from_native_type->isIntegerTy() && to_native_type->isFloatingPointTy())
+ return typeIsSigned(*from_type) ? b.CreateSIToFP(value, to_native_type) : b.CreateUIToFP(value, to_native_type);
+ else if (from_native_type->isFloatingPointTy() && to_native_type->isIntegerTy())
+ return typeIsSigned(*to_type) ? b.CreateFPToSI(value, to_native_type) : b.CreateFPToUI(value, to_native_type);
+ else if (from_native_type->isIntegerTy() && from_native_type->isIntegerTy())
+ return b.CreateIntCast(value, to_native_type, typeIsSigned(*from_type));
+ else if (to_native_type->isFloatingPointTy() && to_native_type->isFloatingPointTy())
+ return b.CreateFPCast(value, to_native_type);
+ }
+
+ throw Exception(ErrorCodes::LOGICAL_ERROR,
+ "Invalid cast to native value from type {} to type {}",
+ from_type->getName(),
+ to_type->getName());
+}
+
+llvm::Value * nativeCast(llvm::IRBuilderBase & b, const ValueWithType & value, const DataTypePtr & to_type)
+{
+ return nativeCast(b, value.type, value.value, to_type);
+}
+
+llvm::Constant * getColumnNativeValue(llvm::IRBuilderBase & builder, const DataTypePtr & column_type, const IColumn & column, size_t index)
+{
+ if (const auto * constant = typeid_cast<const ColumnConst *>(&column))
+ return getColumnNativeValue(builder, column_type, constant->getDataColumn(), 0);
+
+ auto * type = toNativeType(builder, column_type);
+
+ WhichDataType column_data_type(column_type);
+ if (column_data_type.isNullable())
+ {
+ const auto & nullable_data_type = assert_cast<const DataTypeNullable &>(*column_type);
+ const auto & nullable_column = assert_cast<const ColumnNullable &>(column);
+
+ auto * value = getColumnNativeValue(builder, nullable_data_type.getNestedType(), nullable_column.getNestedColumn(), index);
+ auto * is_null = llvm::ConstantInt::get(type->getContainedType(1), nullable_column.isNullAt(index));
+
+ return llvm::ConstantStruct::get(static_cast<llvm::StructType *>(type), value, is_null);
+ }
+ else if (column_data_type.isFloat32())
+ {
+ return llvm::ConstantFP::get(type, assert_cast<const ColumnVector<Float32> &>(column).getElement(index));
+ }
+ else if (column_data_type.isFloat64())
+ {
+ return llvm::ConstantFP::get(type, assert_cast<const ColumnVector<Float64> &>(column).getElement(index));
+ }
+ else if (column_data_type.isNativeUInt() || column_data_type.isDate() || column_data_type.isDateTime())
+ {
+ return llvm::ConstantInt::get(type, column.getUInt(index));
+ }
+ else if (column_data_type.isNativeInt() || column_data_type.isEnum() || column_data_type.isDate32())
+ {
+ return llvm::ConstantInt::get(type, column.getInt(index));
+ }
+
+ throw Exception(ErrorCodes::LOGICAL_ERROR,
+ "Cannot get native value for column with type {}",
+ column_type->getName());
+}
+
+}
+
+#endif
diff --git a/contrib/clickhouse/src/DataTypes/Native.h b/contrib/clickhouse/src/DataTypes/Native.h
new file mode 100644
index 00000000000..875248103c5
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Native.h
@@ -0,0 +1,111 @@
+#pragma once
+
+#include "clickhouse_config.h"
+
+#if USE_EMBEDDED_COMPILER
+# include <Common/Exception.h>
+# include <Core/ValueWithType.h>
+# include <DataTypes/IDataType.h>
+# error #include <llvm/IR/IRBuilder.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+}
+
+/// Returns true if type is signed, false otherwise
+bool typeIsSigned(const IDataType & type);
+
+/// Cast LLVM type to nullable LLVM type
+llvm::Type * toNullableType(llvm::IRBuilderBase & builder, llvm::Type * type);
+
+/// Returns true if type can be native LLVM type, false otherwise
+bool canBeNativeType(const IDataType & type);
+
+/// Returns true if type can be native LLVM type, false otherwise
+bool canBeNativeType(const DataTypePtr & type);
+
+template <typename Type>
+static inline bool canBeNativeType()
+{
+ if constexpr (std::is_same_v<Type, Int8> || std::is_same_v<Type, UInt8>)
+ return true;
+ else if constexpr (std::is_same_v<Type, Int16> || std::is_same_v<Type, UInt16>)
+ return true;
+ else if constexpr (std::is_same_v<Type, Int32> || std::is_same_v<Type, UInt32>)
+ return true;
+ else if constexpr (std::is_same_v<Type, Int64> || std::is_same_v<Type, UInt64>)
+ return true;
+ else if constexpr (std::is_same_v<Type, Float32> || std::is_same_v<Type, Float64>)
+ return true;
+
+ return false;
+}
+
+/// Cast type to native LLVM type
+llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const IDataType & type);
+
+/// Cast type to native LLVM type
+llvm::Type * toNativeType(llvm::IRBuilderBase & builder, const DataTypePtr & type);
+
+template <typename ToType>
+static inline llvm::Type * toNativeType(llvm::IRBuilderBase & builder)
+{
+ if constexpr (std::is_same_v<ToType, Int8> || std::is_same_v<ToType, UInt8>)
+ return builder.getInt8Ty();
+ else if constexpr (std::is_same_v<ToType, Int16> || std::is_same_v<ToType, UInt16>)
+ return builder.getInt16Ty();
+ else if constexpr (std::is_same_v<ToType, Int32> || std::is_same_v<ToType, UInt32>)
+ return builder.getInt32Ty();
+ else if constexpr (std::is_same_v<ToType, Int64> || std::is_same_v<ToType, UInt64>)
+ return builder.getInt64Ty();
+ else if constexpr (std::is_same_v<ToType, Float32>)
+ return builder.getFloatTy();
+ else if constexpr (std::is_same_v<ToType, Float64>)
+ return builder.getDoubleTy();
+
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid cast to native type");
+}
+
+template <typename ToType>
+static inline DataTypePtr toNativeDataType()
+{
+ if constexpr (std::is_same_v<ToType, Int8> || std::is_same_v<ToType, UInt8> ||
+ std::is_same_v<ToType, Int16> || std::is_same_v<ToType, UInt16> ||
+ std::is_same_v<ToType, Int32> || std::is_same_v<ToType, UInt32> ||
+ std::is_same_v<ToType, Int64> || std::is_same_v<ToType, UInt64> ||
+ std::is_same_v<ToType, Float32> || std::is_same_v<ToType, Float64>)
+ return std::make_shared<DataTypeNumber<ToType>>();
+
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid cast to native data type");
+}
+
+/// Cast LLVM value with type to bool
+llvm::Value * nativeBoolCast(llvm::IRBuilderBase & b, const DataTypePtr & from_type, llvm::Value * value);
+
+/// Cast LLVM value with type to bool
+llvm::Value * nativeBoolCast(llvm::IRBuilderBase & b, const ValueWithType & value_with_type);
+
+/// Cast LLVM value with type to specified type
+llvm::Value * nativeCast(llvm::IRBuilderBase & b, const DataTypePtr & from_type, llvm::Value * value, const DataTypePtr & to_type);
+
+/// Cast LLVM value with type to specified type
+llvm::Value * nativeCast(llvm::IRBuilderBase & b, const ValueWithType & value, const DataTypePtr & to_type);
+
+template <typename FromType>
+static inline llvm::Value * nativeCast(llvm::IRBuilderBase & b, llvm::Value * value, const DataTypePtr & to)
+{
+ auto native_data_type = toNativeDataType<FromType>();
+ return nativeCast(b, native_data_type, value, to);
+}
+
+/// Get column value for specified index as LLVM constant
+llvm::Constant * getColumnNativeValue(llvm::IRBuilderBase & builder, const DataTypePtr & column_type, const IColumn & column, size_t index);
+
+}
+
+#endif
diff --git a/contrib/clickhouse/src/DataTypes/NestedUtils.cpp b/contrib/clickhouse/src/DataTypes/NestedUtils.cpp
new file mode 100644
index 00000000000..9ee803c4235
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/NestedUtils.cpp
@@ -0,0 +1,360 @@
+#include <cstring>
+#include <memory>
+
+#include <Common/typeid_cast.h>
+#include <Common/assert_cast.h>
+#include <Common/StringUtils/StringUtils.h>
+#include "Columns/IColumn.h"
+
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/NestedUtils.h>
+#include <DataTypes/DataTypeNested.h>
+
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnConst.h>
+
+#include <Parsers/IAST.h>
+
+#include <boost/algorithm/string/case_conv.hpp>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int ILLEGAL_COLUMN;
+ extern const int SIZES_OF_ARRAYS_DONT_MATCH;
+}
+
+namespace Nested
+{
+
+std::string concatenateName(const std::string & nested_table_name, const std::string & nested_field_name)
+{
+ if (nested_table_name.empty())
+ return nested_field_name;
+
+ if (nested_field_name.empty())
+ return nested_table_name;
+
+ return nested_table_name + "." + nested_field_name;
+}
+
+
+/** Name can be treated as compound if it contains dot (.) in the middle.
+ */
+std::pair<std::string, std::string> splitName(const std::string & name, bool reverse)
+{
+ auto idx = (reverse ? name.find_last_of('.') : name.find_first_of('.'));
+ if (idx == std::string::npos || idx == 0 || idx + 1 == name.size())
+ return {name, {}};
+
+ return {name.substr(0, idx), name.substr(idx + 1)};
+}
+
+std::pair<std::string_view, std::string_view> splitName(std::string_view name, bool reverse)
+{
+ auto idx = (reverse ? name.find_last_of('.') : name.find_first_of('.'));
+ if (idx == std::string::npos || idx == 0 || idx + 1 == name.size())
+ return {name, {}};
+
+ return {name.substr(0, idx), name.substr(idx + 1)};
+}
+
+
+std::string extractTableName(const std::string & nested_name)
+{
+ auto split = splitName(nested_name);
+ return split.first;
+}
+
+
+static Block flattenImpl(const Block & block, bool flatten_named_tuple)
+{
+ Block res;
+
+ for (const auto & elem : block)
+ {
+ if (const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(elem.type.get()))
+ {
+ const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(type_arr->getNestedType().get());
+ if (type_tuple && type_tuple->haveExplicitNames())
+ {
+ const DataTypes & element_types = type_tuple->getElements();
+ const Strings & names = type_tuple->getElementNames();
+ size_t tuple_size = element_types.size();
+
+ bool is_const = isColumnConst(*elem.column);
+ const ColumnArray * column_array;
+ if (is_const)
+ column_array = typeid_cast<const ColumnArray *>(&assert_cast<const ColumnConst &>(*elem.column).getDataColumn());
+ else
+ column_array = typeid_cast<const ColumnArray *>(elem.column.get());
+
+ const ColumnPtr & column_offsets = column_array->getOffsetsPtr();
+
+ const ColumnTuple & column_tuple = typeid_cast<const ColumnTuple &>(column_array->getData());
+ const auto & element_columns = column_tuple.getColumns();
+
+ for (size_t i = 0; i < tuple_size; ++i)
+ {
+ String nested_name = concatenateName(elem.name, names[i]);
+ ColumnPtr column_array_of_element = ColumnArray::create(element_columns[i], column_offsets);
+
+ res.insert(ColumnWithTypeAndName(
+ is_const
+ ? ColumnConst::create(std::move(column_array_of_element), block.rows())
+ : column_array_of_element,
+ std::make_shared<DataTypeArray>(element_types[i]),
+ nested_name));
+ }
+ }
+ else
+ res.insert(elem);
+ }
+ else if (const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(elem.type.get()); type_tuple && flatten_named_tuple)
+ {
+ if (type_tuple->haveExplicitNames())
+ {
+ const DataTypes & element_types = type_tuple->getElements();
+ const Strings & names = type_tuple->getElementNames();
+ const ColumnTuple * column_tuple;
+ if (isColumnConst(*elem.column))
+ column_tuple = typeid_cast<const ColumnTuple *>(&assert_cast<const ColumnConst &>(*elem.column).getDataColumn());
+ else
+ column_tuple = typeid_cast<const ColumnTuple *>(elem.column.get());
+ size_t tuple_size = column_tuple->tupleSize();
+ for (size_t i = 0; i < tuple_size; ++i)
+ {
+ const auto & element_column = column_tuple->getColumn(i);
+ String nested_name = concatenateName(elem.name, names[i]);
+ res.insert(ColumnWithTypeAndName(element_column.getPtr(), element_types[i], nested_name));
+ }
+ }
+ else
+ res.insert(elem);
+ }
+ else
+ res.insert(elem);
+ }
+
+ return res;
+}
+
+Block flatten(const Block & block)
+{
+ return flattenImpl(block, true);
+}
+
+
+Block flattenArrayOfTuples(const Block & block)
+{
+ return flattenImpl(block, false);
+}
+
+namespace
+{
+
+using NameToDataType = std::map<String, DataTypePtr>;
+
+NameToDataType getSubcolumnsOfNested(const NamesAndTypesList & names_and_types)
+{
+ std::unordered_map<String, NamesAndTypesList> nested;
+ for (const auto & name_type : names_and_types)
+ {
+ const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(name_type.type.get());
+
+ /// Ignore true Nested type, but try to unite flatten arrays to Nested type.
+ if (!isNested(name_type.type) && type_arr)
+ {
+ auto split = splitName(name_type.name);
+ if (!split.second.empty())
+ nested[split.first].emplace_back(split.second, type_arr->getNestedType());
+ }
+ }
+
+ std::map<String, DataTypePtr> nested_types;
+
+ for (const auto & [name, elems] : nested)
+ nested_types.emplace(name, createNested(elems.getTypes(), elems.getNames()));
+
+ return nested_types;
+}
+
+}
+
+NamesAndTypesList collect(const NamesAndTypesList & names_and_types)
+{
+ NamesAndTypesList res;
+ auto nested_types = getSubcolumnsOfNested(names_and_types);
+
+ for (const auto & name_type : names_and_types)
+ if (!isArray(name_type.type) || !nested_types.contains(splitName(name_type.name).first))
+ res.push_back(name_type);
+
+ for (const auto & name_type : nested_types)
+ res.emplace_back(name_type.first, name_type.second);
+
+ return res;
+}
+
+NamesAndTypesList convertToSubcolumns(const NamesAndTypesList & names_and_types)
+{
+ auto nested_types = getSubcolumnsOfNested(names_and_types);
+ auto res = names_and_types;
+
+ for (auto & name_type : res)
+ {
+ if (!isArray(name_type.type))
+ continue;
+
+ auto split = splitName(name_type.name);
+ if (name_type.isSubcolumn() || split.second.empty())
+ continue;
+
+ auto it = nested_types.find(split.first);
+ if (it != nested_types.end())
+ name_type = NameAndTypePair{split.first, split.second, it->second, it->second->getSubcolumnType(split.second)};
+ }
+
+ return res;
+}
+
+
+void validateArraySizes(const Block & block)
+{
+ /// Nested prefix -> position of first column in block.
+ std::map<std::string, size_t> nested;
+
+ for (size_t i = 0, size = block.columns(); i < size; ++i)
+ {
+ const auto & elem = block.getByPosition(i);
+
+ if (isArray(elem.type))
+ {
+ if (!typeid_cast<const ColumnArray *>(elem.column.get()))
+ throw Exception(ErrorCodes::ILLEGAL_COLUMN,
+ "Column with Array type is not represented by ColumnArray column: {}",
+ elem.column->dumpStructure());
+
+ auto split = splitName(elem.name);
+
+ /// Is it really a column of Nested data structure.
+ if (!split.second.empty())
+ {
+ auto [it, inserted] = nested.emplace(split.first, i);
+
+ /// It's not the first column of Nested data structure.
+ if (!inserted)
+ {
+ const ColumnArray & first_array_column = assert_cast<const ColumnArray &>(*block.getByPosition(it->second).column);
+ const ColumnArray & another_array_column = assert_cast<const ColumnArray &>(*elem.column);
+
+ if (!first_array_column.hasEqualOffsets(another_array_column))
+ throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
+ "Elements '{}' and '{}' "
+ "of Nested data structure '{}' (Array columns) have different array sizes.",
+ block.getByPosition(it->second).name, elem.name, split.first);
+ }
+ }
+ }
+ }
+}
+
+
+std::unordered_set<String> getAllTableNames(const Block & block, bool to_lower_case)
+{
+ std::unordered_set<String> nested_table_names;
+ for (const auto & name : block.getNames())
+ {
+ auto nested_table_name = Nested::extractTableName(name);
+ if (to_lower_case)
+ boost::to_lower(nested_table_name);
+
+ if (!nested_table_name.empty())
+ nested_table_names.insert(std::move(nested_table_name));
+ }
+ return nested_table_names;
+}
+
+Names getAllNestedColumnsForTable(const Block & block, const std::string & table_name)
+{
+ Names names;
+ for (const auto & name: block.getNames())
+ {
+ if (extractTableName(name) == table_name)
+ names.push_back(name);
+ }
+ return names;
+}
+
+}
+
+NestedColumnExtractHelper::NestedColumnExtractHelper(const Block & block_, bool case_insentive_)
+ : block(block_)
+ , case_insentive(case_insentive_)
+{}
+
+std::optional<ColumnWithTypeAndName> NestedColumnExtractHelper::extractColumn(const String & column_name)
+{
+ if (block.has(column_name, case_insentive))
+ return {block.getByName(column_name, case_insentive)};
+
+ auto nested_names = Nested::splitName(column_name);
+ if (case_insentive)
+ {
+ boost::to_lower(nested_names.first);
+ boost::to_lower(nested_names.second);
+ }
+ if (!block.has(nested_names.first, case_insentive))
+ return {};
+
+ if (!nested_tables.contains(nested_names.first))
+ {
+ ColumnsWithTypeAndName columns = {block.getByName(nested_names.first, case_insentive)};
+ nested_tables[nested_names.first] = std::make_shared<Block>(Nested::flatten(columns));
+ }
+
+ return extractColumn(column_name, nested_names.first, nested_names.second);
+}
+
+std::optional<ColumnWithTypeAndName> NestedColumnExtractHelper::extractColumn(
+ const String & original_column_name, const String & column_name_prefix, const String & column_name_suffix)
+{
+ auto table_iter = nested_tables.find(column_name_prefix);
+ if (table_iter == nested_tables.end())
+ {
+ return {};
+ }
+
+ auto & nested_table = table_iter->second;
+ auto nested_names = Nested::splitName(column_name_suffix);
+ auto new_column_name_prefix = Nested::concatenateName(column_name_prefix, nested_names.first);
+ if (nested_names.second.empty())
+ {
+ if (auto * column_ref = nested_table->findByName(new_column_name_prefix, case_insentive))
+ {
+ ColumnWithTypeAndName column = *column_ref;
+ if (case_insentive)
+ column.name = original_column_name;
+ return {std::move(column)};
+ }
+ else
+ {
+ return {};
+ }
+ }
+
+ if (!nested_table->has(new_column_name_prefix, case_insentive))
+ {
+ return {};
+ }
+
+ ColumnsWithTypeAndName columns = {nested_table->getByName(new_column_name_prefix, case_insentive)};
+ Block sub_block(columns);
+ nested_tables[new_column_name_prefix] = std::make_shared<Block>(Nested::flatten(sub_block));
+ return extractColumn(original_column_name, new_column_name_prefix, nested_names.second);
+}
+}
diff --git a/contrib/clickhouse/src/DataTypes/NestedUtils.h b/contrib/clickhouse/src/DataTypes/NestedUtils.h
new file mode 100644
index 00000000000..e009ceb18fe
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/NestedUtils.h
@@ -0,0 +1,61 @@
+#pragma once
+
+#include <Core/Block.h>
+#include <Core/NamesAndTypes.h>
+
+
+namespace DB
+{
+
+namespace Nested
+{
+ std::string concatenateName(const std::string & nested_table_name, const std::string & nested_field_name);
+
+ /// Splits name of compound identifier by first/last dot (depending on 'reverse' parameter).
+ std::pair<std::string, std::string> splitName(const std::string & name, bool reverse = false);
+ std::pair<std::string_view, std::string_view> splitName(std::string_view name, bool reverse = false);
+
+ /// Returns the prefix of the name to the first '.'. Or the name is unchanged if there is no dot.
+ std::string extractTableName(const std::string & nested_name);
+
+ /// Flat a column of nested type into columns
+ /// 1) For named tuples,t Tuple(x .., y ..., ...), replace it with t.x ..., t.y ... , ...
+ /// 2) For an Array with named Tuple element column, a Array(Tuple(x ..., y ..., ...)), replace it with multiple Array Columns, a.x ..., a.y ..., ...
+ Block flatten(const Block & block);
+
+ /// Same as flatten but only for Array with named Tuple element column.
+ Block flattenArrayOfTuples(const Block & block);
+
+ /// Collect Array columns in a form of `column_name.element_name` to single Array(Tuple(...)) column.
+ NamesAndTypesList collect(const NamesAndTypesList & names_and_types);
+
+ /// Convert old-style nested (single arrays with same prefix, `n.a`, `n.b`...) to subcolumns of data type Nested.
+ NamesAndTypesList convertToSubcolumns(const NamesAndTypesList & names_and_types);
+
+ /// Check that sizes of arrays - elements of nested data structures - are equal.
+ void validateArraySizes(const Block & block);
+
+ /// Get all nested tables names from a block.
+ std::unordered_set<String> getAllTableNames(const Block & block, bool to_lower_case = false);
+
+ /// Extract all column names that are nested for specifying table.
+ Names getAllNestedColumnsForTable(const Block & block, const std::string & table_name);
+}
+
+/// Use this class to extract element columns from columns of nested type in a block, e.g. named Tuple.
+/// It can extract a column from a multiple nested type column, e.g. named Tuple in named Tuple
+/// Keeps some intermediate data to avoid rebuild them multi-times.
+class NestedColumnExtractHelper
+{
+public:
+ explicit NestedColumnExtractHelper(const Block & block_, bool case_insentive_);
+ std::optional<ColumnWithTypeAndName> extractColumn(const String & column_name);
+private:
+ std::optional<ColumnWithTypeAndName>
+ extractColumn(const String & original_column_name, const String & column_name_prefix, const String & column_name_suffix);
+ const Block & block;
+ bool case_insentive;
+ std::map<String, BlockPtr> nested_tables;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/NumberTraits.h b/contrib/clickhouse/src/DataTypes/NumberTraits.h
new file mode 100644
index 00000000000..cf283d3358c
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/NumberTraits.h
@@ -0,0 +1,244 @@
+#pragma once
+
+#include <type_traits>
+
+#include <Core/Types.h>
+
+
+namespace DB
+{
+
+/** Allows get the result type of the functions +, -, *, /, %, intDiv (integer division).
+ * The rules are different from those used in C++.
+ */
+
+namespace NumberTraits
+{
+
+struct Error {};
+
+constexpr size_t max(size_t x, size_t y)
+{
+ return x > y ? x : y;
+}
+
+constexpr size_t min(size_t x, size_t y)
+{
+ return x < y ? x : y;
+}
+
+/// @note There's no auto scale to larger big integer, only for integral ones.
+/// It's cause of (U)Int64 backward compatibility and very big performance penalties.
+constexpr size_t nextSize(size_t size)
+{
+ if (size < 8)
+ return size * 2;
+ return size;
+}
+
+template <bool is_signed, bool is_floating, size_t size>
+struct Construct
+{
+ using Type = Error;
+};
+
+template <> struct Construct<false, false, 1> { using Type = UInt8; };
+template <> struct Construct<false, false, 2> { using Type = UInt16; };
+template <> struct Construct<false, false, 4> { using Type = UInt32; };
+template <> struct Construct<false, false, 8> { using Type = UInt64; };
+template <> struct Construct<false, false, 16> { using Type = UInt128; };
+template <> struct Construct<false, false, 32> { using Type = UInt256; };
+template <> struct Construct<false, true, 1> { using Type = Float32; };
+template <> struct Construct<false, true, 2> { using Type = Float32; };
+template <> struct Construct<false, true, 4> { using Type = Float32; };
+template <> struct Construct<false, true, 8> { using Type = Float64; };
+template <> struct Construct<true, false, 1> { using Type = Int8; };
+template <> struct Construct<true, false, 2> { using Type = Int16; };
+template <> struct Construct<true, false, 4> { using Type = Int32; };
+template <> struct Construct<true, false, 8> { using Type = Int64; };
+template <> struct Construct<true, false, 16> { using Type = Int128; };
+template <> struct Construct<true, false, 32> { using Type = Int256; };
+template <> struct Construct<true, true, 1> { using Type = Float32; };
+template <> struct Construct<true, true, 2> { using Type = Float32; };
+template <> struct Construct<true, true, 4> { using Type = Float32; };
+template <> struct Construct<true, true, 8> { using Type = Float64; };
+
+
+/** The result of addition or multiplication is calculated according to the following rules:
+ * - if one of the arguments is floating-point, the result is a floating point, otherwise - the whole;
+ * - if one of the arguments is signed, the result is signed, otherwise it is unsigned;
+ * - the result contains more bits (not only meaningful) than the maximum in the arguments
+ * (for example, UInt8 + Int32 = Int64).
+ */
+template <typename A, typename B> struct ResultOfAdditionMultiplication
+{
+ using Type = typename Construct<
+ is_signed_v<A> || is_signed_v<B>,
+ std::is_floating_point_v<A> || std::is_floating_point_v<B>,
+ nextSize(max(sizeof(A), sizeof(B)))>::Type;
+};
+
+template <typename A, typename B> struct ResultOfSubtraction
+{
+ using Type = typename Construct<
+ true,
+ std::is_floating_point_v<A> || std::is_floating_point_v<B>,
+ nextSize(max(sizeof(A), sizeof(B)))>::Type;
+};
+
+/** When dividing, you always get a floating-point number.
+ */
+template <typename A, typename B> struct ResultOfFloatingPointDivision
+{
+ using Type = Float64;
+};
+
+/** For integer division, we get a number with the same number of bits as in divisible.
+ */
+template <typename A, typename B> struct ResultOfIntegerDivision
+{
+ using Type = typename Construct<
+ is_signed_v<A> || is_signed_v<B>,
+ false,
+ sizeof(A)>::Type;
+};
+
+/** Division with remainder you get a number with the same number of bits as in divisor,
+ * or larger in case of signed type.
+ */
+template <typename A, typename B> struct ResultOfModulo
+{
+ static constexpr bool result_is_signed = is_signed_v<A>;
+ /// If modulo of division can yield negative number, we need larger type to accommodate it.
+ /// Example: toInt32(-199) % toUInt8(200) will return -199 that does not fit in Int8, only in Int16.
+ static constexpr size_t size_of_result = result_is_signed ? nextSize(sizeof(B)) : sizeof(B);
+ using Type0 = typename Construct<result_is_signed, false, size_of_result>::Type;
+ using Type = std::conditional_t<std::is_floating_point_v<A> || std::is_floating_point_v<B>, Float64, Type0>;
+};
+
+template <typename A, typename B> struct ResultOfPositiveModulo
+{
+ /// function positive_modulo always return non-negative number.
+ static constexpr size_t size_of_result = sizeof(B);
+ using Type0 = typename Construct<false, false, size_of_result>::Type;
+ using Type = std::conditional_t<std::is_floating_point_v<A> || std::is_floating_point_v<B>, Float64, Type0>;
+};
+
+
+template <typename A, typename B> struct ResultOfModuloLegacy
+{
+ using Type0 = typename Construct<is_signed_v<A> || is_signed_v<B>, false, sizeof(B)>::Type;
+ using Type = std::conditional_t<std::is_floating_point_v<A> || std::is_floating_point_v<B>, Float64, Type0>;
+};
+
+template <typename A> struct ResultOfNegate
+{
+ using Type = typename Construct<
+ true,
+ std::is_floating_point_v<A>,
+ is_signed_v<A> ? sizeof(A) : nextSize(sizeof(A))>::Type;
+};
+
+template <typename A> struct ResultOfAbs
+{
+ using Type = typename Construct<
+ false,
+ std::is_floating_point_v<A>,
+ sizeof(A)>::Type;
+};
+
+/** For bitwise operations, an integer is obtained with number of bits is equal to the maximum of the arguments.
+ */
+template <typename A, typename B> struct ResultOfBit
+{
+ using Type = typename Construct<
+ is_signed_v<A> || is_signed_v<B>,
+ false,
+ std::is_floating_point_v<A> || std::is_floating_point_v<B> ? 8 : max(sizeof(A), sizeof(B))>::Type;
+};
+
+template <typename A> struct ResultOfBitNot
+{
+ using Type = typename Construct<
+ is_signed_v<A>,
+ false,
+ sizeof(A)>::Type;
+};
+
+
+/** Type casting for `if` function:
+ * UInt<x>, UInt<y> -> UInt<max(x,y)>
+ * Int<x>, Int<y> -> Int<max(x,y)>
+ * Float<x>, Float<y> -> Float<max(x, y)>
+ * UInt<x>, Int<y> -> Int<max(x*2, y)>
+ * Float<x>, [U]Int<y> -> Float<max(x, y*2)>
+ * Decimal<x>, Decimal<y> -> Decimal<max(x,y)>
+ * UUID, UUID -> UUID
+ * UInt64, Int<x> -> Error
+ * Float<x>, [U]Int64 -> Error
+ */
+template <typename A, typename B>
+struct ResultOfIf
+{
+ static constexpr bool has_float = std::is_floating_point_v<A> || std::is_floating_point_v<B>;
+ static constexpr bool has_integer = is_integer<A> || is_integer<B>;
+ static constexpr bool has_signed = is_signed_v<A> || is_signed_v<B>;
+ static constexpr bool has_unsigned = !is_signed_v<A> || !is_signed_v<B>;
+ static constexpr bool has_big_int = is_big_int_v<A> || is_big_int_v<B>;
+
+ static constexpr size_t max_size_of_unsigned_integer = max(is_signed_v<A> ? 0 : sizeof(A), is_signed_v<B> ? 0 : sizeof(B));
+ static constexpr size_t max_size_of_signed_integer = max(is_signed_v<A> ? sizeof(A) : 0, is_signed_v<B> ? sizeof(B) : 0);
+ static constexpr size_t max_size_of_integer = max(is_integer<A> ? sizeof(A) : 0, is_integer<B> ? sizeof(B) : 0);
+ static constexpr size_t max_size_of_float = max(std::is_floating_point_v<A> ? sizeof(A) : 0, std::is_floating_point_v<B> ? sizeof(B) : 0);
+
+ using ConstructedType = typename Construct<has_signed, has_float,
+ ((has_float && has_integer && max_size_of_integer >= max_size_of_float)
+ || (has_signed && has_unsigned && max_size_of_unsigned_integer >= max_size_of_signed_integer))
+ ? max(sizeof(A), sizeof(B)) * 2
+ : max(sizeof(A), sizeof(B))>::Type;
+
+ using Type =
+ std::conditional_t<std::is_same_v<A, B>, A,
+ std::conditional_t<is_decimal<A> && is_decimal<B>,
+ std::conditional_t<(sizeof(A) > sizeof(B)), A, B>,
+ std::conditional_t<!is_decimal<A> && !is_decimal<B>,
+ ConstructedType, Error>>>;
+};
+
+/** Before applying operator `%` and bitwise operations, operands are casted to whole numbers. */
+template <typename A> struct ToInteger
+{
+ using Type = typename Construct<
+ is_signed_v<A>,
+ false,
+ std::is_floating_point_v<A> ? 8 : sizeof(A)>::Type;
+};
+
+
+// CLICKHOUSE-29. The same depth, different signs
+// NOTE: This case is applied for 64-bit integers only (for backward compatibility), but could be used for any-bit integers
+template <typename A, typename B>
+constexpr bool LeastGreatestSpecialCase =
+ std::is_integral_v<A> && std::is_integral_v<B>
+ && (8 == sizeof(A) && sizeof(A) == sizeof(B))
+ && (is_signed_v<A> ^ is_signed_v<B>);
+
+template <typename A, typename B>
+using ResultOfLeast = std::conditional_t<LeastGreatestSpecialCase<A, B>,
+ typename Construct<true, false, sizeof(A)>::Type,
+ typename ResultOfIf<A, B>::Type>;
+
+template <typename A, typename B>
+using ResultOfGreatest = std::conditional_t<LeastGreatestSpecialCase<A, B>,
+ typename Construct<false, false, sizeof(A)>::Type,
+ typename ResultOfIf<A, B>::Type>;
+
+}
+
+template <typename T>
+static inline auto littleBits(const T & x)
+{
+ return static_cast<UInt8>(x);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/ObjectUtils.cpp b/contrib/clickhouse/src/DataTypes/ObjectUtils.cpp
new file mode 100644
index 00000000000..28f000b6f0d
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/ObjectUtils.cpp
@@ -0,0 +1,992 @@
+#include <DataTypes/ObjectUtils.h>
+#include <DataTypes/DataTypeObject.h>
+#include <DataTypes/DataTypeNothing.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeMap.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeNested.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/getLeastSupertype.h>
+#include <DataTypes/NestedUtils.h>
+#include <Storages/StorageSnapshot.h>
+#include <Columns/ColumnObject.h>
+#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnMap.h>
+#include <Columns/ColumnNullable.h>
+#include <Parsers/ASTSelectQuery.h>
+#include <Parsers/ASTExpressionList.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTFunction.h>
+#include <IO/Operators.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int TYPE_MISMATCH;
+ extern const int LOGICAL_ERROR;
+ extern const int INCOMPATIBLE_COLUMNS;
+ extern const int NOT_IMPLEMENTED;
+}
+
+size_t getNumberOfDimensions(const IDataType & type)
+{
+ if (const auto * type_array = typeid_cast<const DataTypeArray *>(&type))
+ return type_array->getNumberOfDimensions();
+ return 0;
+}
+
+size_t getNumberOfDimensions(const IColumn & column)
+{
+ if (const auto * column_array = checkAndGetColumn<ColumnArray>(column))
+ return column_array->getNumberOfDimensions();
+ return 0;
+}
+
+DataTypePtr getBaseTypeOfArray(const DataTypePtr & type)
+{
+ /// Get raw pointers to avoid extra copying of type pointers.
+ const DataTypeArray * last_array = nullptr;
+ const auto * current_type = type.get();
+ while (const auto * type_array = typeid_cast<const DataTypeArray *>(current_type))
+ {
+ current_type = type_array->getNestedType().get();
+ last_array = type_array;
+ }
+
+ return last_array ? last_array->getNestedType() : type;
+}
+
+ColumnPtr getBaseColumnOfArray(const ColumnPtr & column)
+{
+ /// Get raw pointers to avoid extra copying of column pointers.
+ const ColumnArray * last_array = nullptr;
+ const auto * current_column = column.get();
+ while (const auto * column_array = checkAndGetColumn<ColumnArray>(current_column))
+ {
+ current_column = &column_array->getData();
+ last_array = column_array;
+ }
+
+ return last_array ? last_array->getDataPtr() : column;
+}
+
+DataTypePtr createArrayOfType(DataTypePtr type, size_t num_dimensions)
+{
+ for (size_t i = 0; i < num_dimensions; ++i)
+ type = std::make_shared<DataTypeArray>(std::move(type));
+ return type;
+}
+
+ColumnPtr createArrayOfColumn(ColumnPtr column, size_t num_dimensions)
+{
+ for (size_t i = 0; i < num_dimensions; ++i)
+ column = ColumnArray::create(column);
+ return column;
+}
+
+Array createEmptyArrayField(size_t num_dimensions)
+{
+ if (num_dimensions == 0)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create array field with 0 dimensions");
+
+ Array array;
+ Array * current_array = &array;
+ for (size_t i = 1; i < num_dimensions; ++i)
+ {
+ current_array->push_back(Array());
+ current_array = &current_array->back().get<Array &>();
+ }
+
+ return array;
+}
+
+DataTypePtr getDataTypeByColumn(const IColumn & column)
+{
+ auto idx = column.getDataType();
+ WhichDataType which(idx);
+ if (which.isSimple())
+ return DataTypeFactory::instance().get(String(magic_enum::enum_name(idx)));
+
+ if (which.isNothing())
+ return std::make_shared<DataTypeNothing>();
+
+ if (const auto * column_array = checkAndGetColumn<ColumnArray>(&column))
+ return std::make_shared<DataTypeArray>(getDataTypeByColumn(column_array->getData()));
+
+ if (const auto * column_nullable = checkAndGetColumn<ColumnNullable>(&column))
+ return makeNullable(getDataTypeByColumn(column_nullable->getNestedColumn()));
+
+ /// TODO: add more types.
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get data type of column {}", column.getFamilyName());
+}
+
+template <size_t I, typename Tuple>
+static auto extractVector(const std::vector<Tuple> & vec)
+{
+ static_assert(I < std::tuple_size_v<Tuple>);
+ std::vector<std::tuple_element_t<I, Tuple>> res;
+ res.reserve(vec.size());
+ for (const auto & elem : vec)
+ res.emplace_back(std::get<I>(elem));
+ return res;
+}
+
+static DataTypePtr recreateTupleWithElements(const DataTypeTuple & type_tuple, const DataTypes & elements)
+{
+ return type_tuple.haveExplicitNames()
+ ? std::make_shared<DataTypeTuple>(elements, type_tuple.getElementNames())
+ : std::make_shared<DataTypeTuple>(elements);
+}
+
+static std::pair<ColumnPtr, DataTypePtr> convertObjectColumnToTuple(
+ const ColumnObject & column_object, const DataTypeObject & type_object)
+{
+ if (!column_object.isFinalized())
+ {
+ auto finalized = column_object.cloneFinalized();
+ const auto & finalized_object = assert_cast<const ColumnObject &>(*finalized);
+ return convertObjectColumnToTuple(finalized_object, type_object);
+ }
+
+ const auto & subcolumns = column_object.getSubcolumns();
+
+ PathsInData tuple_paths;
+ DataTypes tuple_types;
+ Columns tuple_columns;
+
+ for (const auto & entry : subcolumns)
+ {
+ tuple_paths.emplace_back(entry->path);
+ tuple_types.emplace_back(entry->data.getLeastCommonType());
+ tuple_columns.emplace_back(entry->data.getFinalizedColumnPtr());
+ }
+
+ return unflattenTuple(tuple_paths, tuple_types, tuple_columns);
+}
+
+static std::pair<ColumnPtr, DataTypePtr> recursivlyConvertDynamicColumnToTuple(
+ const ColumnPtr & column, const DataTypePtr & type)
+{
+ if (!type->hasDynamicSubcolumns())
+ return {column, type};
+
+ if (const auto * type_object = typeid_cast<const DataTypeObject *>(type.get()))
+ {
+ const auto & column_object = assert_cast<const ColumnObject &>(*column);
+ return convertObjectColumnToTuple(column_object, *type_object);
+ }
+
+ if (const auto * type_array = typeid_cast<const DataTypeArray *>(type.get()))
+ {
+ const auto & column_array = assert_cast<const ColumnArray &>(*column);
+ auto [new_column, new_type] = recursivlyConvertDynamicColumnToTuple(
+ column_array.getDataPtr(), type_array->getNestedType());
+
+ return
+ {
+ ColumnArray::create(new_column, column_array.getOffsetsPtr()),
+ std::make_shared<DataTypeArray>(std::move(new_type)),
+ };
+ }
+
+ if (const auto * type_map = typeid_cast<const DataTypeMap *>(type.get()))
+ {
+ const auto & column_map = assert_cast<const ColumnMap &>(*column);
+ auto [new_column, new_type] = recursivlyConvertDynamicColumnToTuple(
+ column_map.getNestedColumnPtr(), type_map->getNestedType());
+
+ return
+ {
+ ColumnMap::create(new_column),
+ std::make_shared<DataTypeMap>(std::move(new_type)),
+ };
+ }
+
+ if (const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type.get()))
+ {
+ const auto & tuple_columns = assert_cast<const ColumnTuple &>(*column).getColumns();
+ const auto & tuple_types = type_tuple->getElements();
+
+ assert(tuple_columns.size() == tuple_types.size());
+ const size_t tuple_size = tuple_types.size();
+
+ Columns new_tuple_columns(tuple_size);
+ DataTypes new_tuple_types(tuple_size);
+
+ for (size_t i = 0; i < tuple_size; ++i)
+ {
+ std::tie(new_tuple_columns[i], new_tuple_types[i])
+ = recursivlyConvertDynamicColumnToTuple(tuple_columns[i], tuple_types[i]);
+ }
+
+ return
+ {
+ ColumnTuple::create(new_tuple_columns),
+ recreateTupleWithElements(*type_tuple, new_tuple_types)
+ };
+ }
+
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Type {} unexpectedly has dynamic columns", type->getName());
+}
+
+void convertDynamicColumnsToTuples(Block & block, const StorageSnapshotPtr & storage_snapshot)
+{
+ for (auto & column : block)
+ {
+ if (!column.type->hasDynamicSubcolumns())
+ continue;
+
+ std::tie(column.column, column.type)
+ = recursivlyConvertDynamicColumnToTuple(column.column, column.type);
+
+ GetColumnsOptions options(GetColumnsOptions::AllPhysical);
+ auto storage_column = storage_snapshot->tryGetColumn(options, column.name);
+ if (!storage_column)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Column '{}' not found in storage", column.name);
+
+ auto storage_column_concrete = storage_snapshot->getColumn(options.withExtendedObjects(), column.name);
+
+ /// Check that constructed Tuple type and type in storage are compatible.
+ getLeastCommonTypeForDynamicColumns(
+ storage_column->type, {column.type, storage_column_concrete.type}, true);
+ }
+}
+
+static bool isPrefix(const PathInData::Parts & prefix, const PathInData::Parts & parts)
+{
+ if (prefix.size() > parts.size())
+ return false;
+
+ for (size_t i = 0; i < prefix.size(); ++i)
+ if (prefix[i].key != parts[i].key)
+ return false;
+ return true;
+}
+
+/// Returns true if there exists a prefix with matched names,
+/// but not matched structure (is Nested, number of dimensions).
+static bool hasDifferentStructureInPrefix(const PathInData::Parts & lhs, const PathInData::Parts & rhs)
+{
+ for (size_t i = 0; i < std::min(lhs.size(), rhs.size()); ++i)
+ {
+ if (lhs[i].key != rhs[i].key)
+ return false;
+ else if (lhs[i] != rhs[i])
+ return true;
+ }
+ return false;
+}
+
+void checkObjectHasNoAmbiguosPaths(const PathsInData & paths)
+{
+ size_t size = paths.size();
+ for (size_t i = 0; i < size; ++i)
+ {
+ for (size_t j = 0; j < i; ++j)
+ {
+ if (isPrefix(paths[i].getParts(), paths[j].getParts())
+ || isPrefix(paths[j].getParts(), paths[i].getParts()))
+ throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS,
+ "Data in Object has ambiguous paths: '{}' and '{}'",
+ paths[i].getPath(), paths[j].getPath());
+
+ if (hasDifferentStructureInPrefix(paths[i].getParts(), paths[j].getParts()))
+ throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS,
+ "Data in Object has ambiguous paths: '{}' and '{}'. "
+ "Paths have prefixes matched by names, but different in structure",
+ paths[i].getPath(), paths[j].getPath());
+ }
+ }
+}
+
+static DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool check_ambiguos_paths)
+{
+ /// Types of subcolumns by path from all tuples.
+ std::unordered_map<PathInData, DataTypes, PathInData::Hash> subcolumns_types;
+
+ /// First we flatten tuples, then get common type for paths
+ /// and finally unflatten paths and create new tuple type.
+ for (const auto & type : types)
+ {
+ const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type.get());
+ if (!type_tuple)
+ throw Exception(ErrorCodes::LOGICAL_ERROR,
+ "Least common type for object can be deduced only from tuples, but {} given", type->getName());
+
+ auto [tuple_paths, tuple_types] = flattenTuple(type);
+ assert(tuple_paths.size() == tuple_types.size());
+
+ for (size_t i = 0; i < tuple_paths.size(); ++i)
+ subcolumns_types[tuple_paths[i]].push_back(tuple_types[i]);
+ }
+
+ PathsInData tuple_paths;
+ DataTypes tuple_types;
+
+ /// Get the least common type for all paths.
+ for (const auto & [key, subtypes] : subcolumns_types)
+ {
+ assert(!subtypes.empty());
+ if (key.getPath() == ColumnObject::COLUMN_NAME_DUMMY)
+ continue;
+
+ size_t first_dim = getNumberOfDimensions(*subtypes[0]);
+ for (size_t i = 1; i < subtypes.size(); ++i)
+ if (first_dim != getNumberOfDimensions(*subtypes[i]))
+ throw Exception(ErrorCodes::TYPE_MISMATCH,
+ "Uncompatible types of subcolumn '{}': {} and {}",
+ key.getPath(), subtypes[0]->getName(), subtypes[i]->getName());
+
+ tuple_paths.emplace_back(key);
+ tuple_types.emplace_back(getLeastSupertypeOrString(subtypes));
+ }
+
+ if (tuple_paths.empty())
+ {
+ tuple_paths.emplace_back(ColumnObject::COLUMN_NAME_DUMMY);
+ tuple_types.emplace_back(std::make_shared<DataTypeUInt8>());
+ }
+
+ if (check_ambiguos_paths)
+ checkObjectHasNoAmbiguosPaths(tuple_paths);
+
+ return unflattenTuple(tuple_paths, tuple_types);
+}
+
+static DataTypePtr getLeastCommonTypeForDynamicColumnsImpl(
+ const DataTypePtr & type_in_storage, const DataTypes & concrete_types, bool check_ambiguos_paths);
+
+template<typename Type>
+static DataTypePtr getLeastCommonTypeForColumnWithNestedType(
+ const Type & type, const DataTypes & concrete_types, bool check_ambiguos_paths)
+{
+ DataTypes nested_types;
+ nested_types.reserve(concrete_types.size());
+
+ for (const auto & concrete_type : concrete_types)
+ {
+ const auto * type_with_nested_conctete = typeid_cast<const Type *>(concrete_type.get());
+ if (!type_with_nested_conctete)
+ throw Exception(ErrorCodes::TYPE_MISMATCH, "Expected {} type, got {}", demangle(typeid(Type).name()), concrete_type->getName());
+
+ nested_types.push_back(type_with_nested_conctete->getNestedType());
+ }
+
+ return std::make_shared<Type>(
+ getLeastCommonTypeForDynamicColumnsImpl(
+ type.getNestedType(), nested_types, check_ambiguos_paths));
+}
+
+static DataTypePtr getLeastCommonTypeForTuple(
+ const DataTypeTuple & type, const DataTypes & concrete_types, bool check_ambiguos_paths)
+{
+ const auto & element_types = type.getElements();
+ DataTypes new_element_types(element_types.size());
+
+ for (size_t i = 0; i < element_types.size(); ++i)
+ {
+ DataTypes concrete_element_types;
+ concrete_element_types.reserve(concrete_types.size());
+
+ for (const auto & type_concrete : concrete_types)
+ {
+ const auto * type_tuple_conctete = typeid_cast<const DataTypeTuple *>(type_concrete.get());
+ if (!type_tuple_conctete)
+ throw Exception(ErrorCodes::TYPE_MISMATCH, "Expected Tuple type, got {}", type_concrete->getName());
+
+ concrete_element_types.push_back(type_tuple_conctete->getElement(i));
+ }
+
+ new_element_types[i] = getLeastCommonTypeForDynamicColumnsImpl(
+ element_types[i], concrete_element_types, check_ambiguos_paths);
+ }
+
+ return recreateTupleWithElements(type, new_element_types);
+}
+
+static DataTypePtr getLeastCommonTypeForDynamicColumnsImpl(
+ const DataTypePtr & type_in_storage, const DataTypes & concrete_types, bool check_ambiguos_paths)
+{
+ if (!type_in_storage->hasDynamicSubcolumns())
+ return type_in_storage;
+
+ if (isObject(type_in_storage))
+ return getLeastCommonTypeForObject(concrete_types, check_ambiguos_paths);
+
+ if (const auto * type_array = typeid_cast<const DataTypeArray *>(type_in_storage.get()))
+ return getLeastCommonTypeForColumnWithNestedType(*type_array, concrete_types, check_ambiguos_paths);
+
+ if (const auto * type_map = typeid_cast<const DataTypeMap *>(type_in_storage.get()))
+ return getLeastCommonTypeForColumnWithNestedType(*type_map, concrete_types, check_ambiguos_paths);
+
+ if (const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type_in_storage.get()))
+ return getLeastCommonTypeForTuple(*type_tuple, concrete_types, check_ambiguos_paths);
+
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Type {} unexpectedly has dynamic columns", type_in_storage->getName());
+}
+
+DataTypePtr getLeastCommonTypeForDynamicColumns(
+ const DataTypePtr & type_in_storage, const DataTypes & concrete_types, bool check_ambiguos_paths)
+{
+ if (concrete_types.empty())
+ return nullptr;
+
+ bool all_equal = true;
+ for (size_t i = 1; i < concrete_types.size(); ++i)
+ {
+ if (!concrete_types[i]->equals(*concrete_types[0]))
+ {
+ all_equal = false;
+ break;
+ }
+ }
+
+ if (all_equal)
+ return concrete_types[0];
+
+ return getLeastCommonTypeForDynamicColumnsImpl(type_in_storage, concrete_types, check_ambiguos_paths);
+}
+
+DataTypePtr createConcreteEmptyDynamicColumn(const DataTypePtr & type_in_storage)
+{
+ if (!type_in_storage->hasDynamicSubcolumns())
+ return type_in_storage;
+
+ if (isObject(type_in_storage))
+ return std::make_shared<DataTypeTuple>(
+ DataTypes{std::make_shared<DataTypeUInt8>()}, Names{ColumnObject::COLUMN_NAME_DUMMY});
+
+ if (const auto * type_array = typeid_cast<const DataTypeArray *>(type_in_storage.get()))
+ return std::make_shared<DataTypeArray>(
+ createConcreteEmptyDynamicColumn(type_array->getNestedType()));
+
+ if (const auto * type_map = typeid_cast<const DataTypeMap *>(type_in_storage.get()))
+ return std::make_shared<DataTypeMap>(
+ createConcreteEmptyDynamicColumn(type_map->getNestedType()));
+
+ if (const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type_in_storage.get()))
+ {
+ const auto & elements = type_tuple->getElements();
+ DataTypes new_elements;
+ new_elements.reserve(elements.size());
+
+ for (const auto & element : elements)
+ new_elements.push_back(createConcreteEmptyDynamicColumn(element));
+
+ return recreateTupleWithElements(*type_tuple, new_elements);
+ }
+
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Type {} unexpectedly has dynamic columns", type_in_storage->getName());
+}
+
+bool hasDynamicSubcolumns(const ColumnsDescription & columns)
+{
+ return std::any_of(columns.begin(), columns.end(),
+ [](const auto & column)
+ {
+ return column.type->hasDynamicSubcolumns();
+ });
+}
+
+void extendObjectColumns(NamesAndTypesList & columns_list, const ColumnsDescription & object_columns, bool with_subcolumns)
+{
+ NamesAndTypesList subcolumns_list;
+ for (auto & column : columns_list)
+ {
+ auto object_column = object_columns.tryGetColumn(GetColumnsOptions::All, column.name);
+ if (object_column)
+ {
+ column.type = object_column->type;
+
+ if (with_subcolumns)
+ subcolumns_list.splice(subcolumns_list.end(), object_columns.getSubcolumns(column.name));
+ }
+ }
+
+ columns_list.splice(columns_list.end(), std::move(subcolumns_list));
+}
+
+void updateObjectColumns(
+ ColumnsDescription & object_columns,
+ const ColumnsDescription & storage_columns,
+ const NamesAndTypesList & new_columns)
+{
+ for (const auto & new_column : new_columns)
+ {
+ auto object_column = object_columns.tryGetColumn(GetColumnsOptions::All, new_column.name);
+ if (object_column && !object_column->type->equals(*new_column.type))
+ {
+ auto storage_column = storage_columns.getColumn(GetColumnsOptions::All, new_column.name);
+ object_columns.modify(new_column.name, [&](auto & column)
+ {
+ column.type = getLeastCommonTypeForDynamicColumns(storage_column.type, {object_column->type, new_column.type});
+ });
+ }
+ }
+}
+
+namespace
+{
+
+void flattenTupleImpl(
+ PathInDataBuilder & builder,
+ DataTypePtr type,
+ std::vector<PathInData::Parts> & new_paths,
+ DataTypes & new_types)
+{
+ if (const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type.get()))
+ {
+ const auto & tuple_names = type_tuple->getElementNames();
+ const auto & tuple_types = type_tuple->getElements();
+
+ for (size_t i = 0; i < tuple_names.size(); ++i)
+ {
+ builder.append(tuple_names[i], false);
+ flattenTupleImpl(builder, tuple_types[i], new_paths, new_types);
+ builder.popBack();
+ }
+ }
+ else if (const auto * type_array = typeid_cast<const DataTypeArray *>(type.get()))
+ {
+ PathInDataBuilder element_builder;
+ std::vector<PathInData::Parts> element_paths;
+ DataTypes element_types;
+
+ flattenTupleImpl(element_builder, type_array->getNestedType(), element_paths, element_types);
+ assert(element_paths.size() == element_types.size());
+
+ for (size_t i = 0; i < element_paths.size(); ++i)
+ {
+ builder.append(element_paths[i], true);
+ new_paths.emplace_back(builder.getParts());
+ new_types.emplace_back(std::make_shared<DataTypeArray>(element_types[i]));
+ builder.popBack(element_paths[i].size());
+ }
+ }
+ else
+ {
+ new_paths.emplace_back(builder.getParts());
+ new_types.emplace_back(type);
+ }
+}
+
+/// @offsets_columns are used as stack of array offsets and allows to recreate Array columns.
+void flattenTupleImpl(const ColumnPtr & column, Columns & new_columns, Columns & offsets_columns)
+{
+ if (const auto * column_tuple = checkAndGetColumn<ColumnTuple>(column.get()))
+ {
+ const auto & subcolumns = column_tuple->getColumns();
+ for (const auto & subcolumn : subcolumns)
+ flattenTupleImpl(subcolumn, new_columns, offsets_columns);
+ }
+ else if (const auto * column_array = checkAndGetColumn<ColumnArray>(column.get()))
+ {
+ offsets_columns.push_back(column_array->getOffsetsPtr());
+ flattenTupleImpl(column_array->getDataPtr(), new_columns, offsets_columns);
+ offsets_columns.pop_back();
+ }
+ else
+ {
+ if (!offsets_columns.empty())
+ {
+ auto new_column = ColumnArray::create(column, offsets_columns.back());
+ for (auto it = offsets_columns.rbegin() + 1; it != offsets_columns.rend(); ++it)
+ new_column = ColumnArray::create(new_column, *it);
+
+ new_columns.push_back(std::move(new_column));
+ }
+ else
+ {
+ new_columns.push_back(column);
+ }
+ }
+}
+
+DataTypePtr reduceNumberOfDimensions(DataTypePtr type, size_t dimensions_to_reduce)
+{
+ while (dimensions_to_reduce--)
+ {
+ const auto * type_array = typeid_cast<const DataTypeArray *>(type.get());
+ if (!type_array)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Not enough dimensions to reduce");
+
+ type = type_array->getNestedType();
+ }
+
+ return type;
+}
+
+ColumnPtr reduceNumberOfDimensions(ColumnPtr column, size_t dimensions_to_reduce)
+{
+ while (dimensions_to_reduce--)
+ {
+ const auto * column_array = typeid_cast<const ColumnArray *>(column.get());
+ if (!column_array)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Not enough dimensions to reduce");
+
+ column = column_array->getDataPtr();
+ }
+
+ return column;
+}
+
+/// We save intermediate column, type and number of array
+/// dimensions for each intermediate node in path in subcolumns tree.
+struct ColumnWithTypeAndDimensions
+{
+ ColumnPtr column;
+ DataTypePtr type;
+ size_t array_dimensions;
+};
+
+using SubcolumnsTreeWithColumns = SubcolumnsTree<ColumnWithTypeAndDimensions>;
+using Node = SubcolumnsTreeWithColumns::Node;
+
+/// Creates data type and column from tree of subcolumns.
+ColumnWithTypeAndDimensions createTypeFromNode(const Node & node)
+{
+ auto collect_tuple_elemets = [](const auto & children)
+ {
+ if (children.empty())
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create type from empty Tuple or Nested node");
+
+ std::vector<std::tuple<String, ColumnWithTypeAndDimensions>> tuple_elements;
+ tuple_elements.reserve(children.size());
+ for (const auto & [name, child] : children)
+ {
+ assert(child);
+ auto column = createTypeFromNode(*child);
+ tuple_elements.emplace_back(name, std::move(column));
+ }
+
+ /// Sort to always create the same type for the same set of subcolumns.
+ ::sort(tuple_elements.begin(), tuple_elements.end(),
+ [](const auto & lhs, const auto & rhs) { return std::get<0>(lhs) < std::get<0>(rhs); });
+
+ auto tuple_names = extractVector<0>(tuple_elements);
+ auto tuple_columns = extractVector<1>(tuple_elements);
+
+ return std::make_tuple(std::move(tuple_names), std::move(tuple_columns));
+ };
+
+ if (node.kind == Node::SCALAR)
+ {
+ return node.data;
+ }
+ else if (node.kind == Node::NESTED)
+ {
+ auto [tuple_names, tuple_columns] = collect_tuple_elemets(node.children);
+
+ Columns offsets_columns;
+ offsets_columns.reserve(tuple_columns[0].array_dimensions + 1);
+
+ /// If we have a Nested node and child node with anonymous array levels
+ /// we need to push a Nested type through all array levels.
+ /// Example: { "k1": [[{"k2": 1, "k3": 2}] } should be parsed as
+ /// `k1 Array(Nested(k2 Int, k3 Int))` and k1 is marked as Nested
+ /// and `k2` and `k3` has anonymous_array_level = 1 in that case.
+
+ const auto & current_array = assert_cast<const ColumnArray &>(*node.data.column);
+ offsets_columns.push_back(current_array.getOffsetsPtr());
+
+ auto first_column = tuple_columns[0].column;
+ for (size_t i = 0; i < tuple_columns[0].array_dimensions; ++i)
+ {
+ const auto & column_array = assert_cast<const ColumnArray &>(*first_column);
+ offsets_columns.push_back(column_array.getOffsetsPtr());
+ first_column = column_array.getDataPtr();
+ }
+
+ size_t num_elements = tuple_columns.size();
+ Columns tuple_elements_columns(num_elements);
+ DataTypes tuple_elements_types(num_elements);
+
+ /// Reduce extra array dimensions to get columns and types of Nested elements.
+ for (size_t i = 0; i < num_elements; ++i)
+ {
+ assert(tuple_columns[i].array_dimensions == tuple_columns[0].array_dimensions);
+ tuple_elements_columns[i] = reduceNumberOfDimensions(tuple_columns[i].column, tuple_columns[i].array_dimensions);
+ tuple_elements_types[i] = reduceNumberOfDimensions(tuple_columns[i].type, tuple_columns[i].array_dimensions);
+ }
+
+ auto result_column = ColumnArray::create(ColumnTuple::create(tuple_elements_columns), offsets_columns.back());
+ auto result_type = createNested(tuple_elements_types, tuple_names);
+
+ /// Recreate result Array type and Array column.
+ for (auto it = offsets_columns.rbegin() + 1; it != offsets_columns.rend(); ++it)
+ {
+ result_column = ColumnArray::create(result_column, *it);
+ result_type = std::make_shared<DataTypeArray>(result_type);
+ }
+
+ return {result_column, result_type, tuple_columns[0].array_dimensions};
+ }
+ else
+ {
+ auto [tuple_names, tuple_columns] = collect_tuple_elemets(node.children);
+
+ size_t num_elements = tuple_columns.size();
+ Columns tuple_elements_columns(num_elements);
+ DataTypes tuple_elements_types(num_elements);
+
+ for (size_t i = 0; i < tuple_columns.size(); ++i)
+ {
+ assert(tuple_columns[i].array_dimensions == tuple_columns[0].array_dimensions);
+ tuple_elements_columns[i] = tuple_columns[i].column;
+ tuple_elements_types[i] = tuple_columns[i].type;
+ }
+
+ auto result_column = ColumnTuple::create(tuple_elements_columns);
+ auto result_type = std::make_shared<DataTypeTuple>(tuple_elements_types, tuple_names);
+
+ return {result_column, result_type, tuple_columns[0].array_dimensions};
+ }
+}
+
+}
+
+std::pair<PathsInData, DataTypes> flattenTuple(const DataTypePtr & type)
+{
+ std::vector<PathInData::Parts> new_path_parts;
+ DataTypes new_types;
+ PathInDataBuilder builder;
+
+ flattenTupleImpl(builder, type, new_path_parts, new_types);
+
+ PathsInData new_paths(new_path_parts.begin(), new_path_parts.end());
+ return {new_paths, new_types};
+}
+
+ColumnPtr flattenTuple(const ColumnPtr & column)
+{
+ Columns new_columns;
+ Columns offsets_columns;
+
+ flattenTupleImpl(column, new_columns, offsets_columns);
+ return ColumnTuple::create(new_columns);
+}
+
+DataTypePtr unflattenTuple(const PathsInData & paths, const DataTypes & tuple_types)
+{
+ assert(paths.size() == tuple_types.size());
+ Columns tuple_columns;
+ tuple_columns.reserve(tuple_types.size());
+ for (const auto & type : tuple_types)
+ tuple_columns.emplace_back(type->createColumn());
+
+ return unflattenTuple(paths, tuple_types, tuple_columns).second;
+}
+
+std::pair<ColumnPtr, DataTypePtr> unflattenObjectToTuple(const ColumnObject & column)
+{
+ const auto & subcolumns = column.getSubcolumns();
+
+ if (subcolumns.empty())
+ {
+ auto type = std::make_shared<DataTypeTuple>(
+ DataTypes{std::make_shared<DataTypeUInt8>()},
+ Names{ColumnObject::COLUMN_NAME_DUMMY});
+
+ return {type->createColumn()->cloneResized(column.size()), type};
+ }
+
+ PathsInData paths;
+ DataTypes types;
+ Columns columns;
+
+ paths.reserve(subcolumns.size());
+ types.reserve(subcolumns.size());
+ columns.reserve(subcolumns.size());
+
+ for (const auto & entry : subcolumns)
+ {
+ paths.emplace_back(entry->path);
+ types.emplace_back(entry->data.getLeastCommonType());
+ columns.emplace_back(entry->data.getFinalizedColumnPtr());
+ }
+
+ return unflattenTuple(paths, types, columns);
+}
+
+std::pair<ColumnPtr, DataTypePtr> unflattenTuple(
+ const PathsInData & paths,
+ const DataTypes & tuple_types,
+ const Columns & tuple_columns)
+{
+ assert(paths.size() == tuple_types.size());
+ assert(paths.size() == tuple_columns.size());
+
+ if (paths.empty())
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot unflatten empty Tuple");
+
+ /// We add all paths to the subcolumn tree and then create a type from it.
+ /// The tree stores column, type and number of array dimensions
+ /// for each intermediate node.
+ SubcolumnsTreeWithColumns tree;
+
+ for (size_t i = 0; i < paths.size(); ++i)
+ {
+ auto column = tuple_columns[i];
+ auto type = tuple_types[i];
+
+ const auto & parts = paths[i].getParts();
+ size_t num_parts = parts.size();
+
+ size_t pos = 0;
+ tree.add(paths[i], [&](Node::Kind kind, bool exists) -> std::shared_ptr<Node>
+ {
+ if (pos >= num_parts)
+ throw Exception(ErrorCodes::LOGICAL_ERROR,
+ "Not enough name parts for path {}. Expected at least {}, got {}",
+ paths[i].getPath(), pos + 1, num_parts);
+
+ size_t array_dimensions = kind == Node::NESTED ? 1 : parts[pos].anonymous_array_level;
+ ColumnWithTypeAndDimensions current_column{column, type, array_dimensions};
+
+ /// Get type and column for next node.
+ if (array_dimensions)
+ {
+ type = reduceNumberOfDimensions(type, array_dimensions);
+ column = reduceNumberOfDimensions(column, array_dimensions);
+ }
+
+ ++pos;
+ if (exists)
+ return nullptr;
+
+ return kind == Node::SCALAR
+ ? std::make_shared<Node>(kind, current_column, paths[i])
+ : std::make_shared<Node>(kind, current_column);
+ });
+ }
+
+ auto [column, type, _] = createTypeFromNode(tree.getRoot());
+ return std::make_pair(std::move(column), std::move(type));
+}
+
+static void addConstantToWithClause(const ASTPtr & query, const String & column_name, const DataTypePtr & data_type)
+{
+ auto & select = query->as<ASTSelectQuery &>();
+ if (!select.with())
+ select.setExpression(ASTSelectQuery::Expression::WITH, std::make_shared<ASTExpressionList>());
+
+ /// TODO: avoid materialize
+ auto node = makeASTFunction("materialize",
+ makeASTFunction("CAST",
+ std::make_shared<ASTLiteral>(data_type->getDefault()),
+ std::make_shared<ASTLiteral>(data_type->getName())));
+
+ node->alias = column_name;
+ node->prefer_alias_to_column_name = true;
+ select.with()->children.push_back(std::move(node));
+}
+
+/// @expected_columns and @available_columns contain descriptions
+/// of extended Object columns.
+void replaceMissedSubcolumnsByConstants(
+ const ColumnsDescription & expected_columns,
+ const ColumnsDescription & available_columns,
+ ASTPtr query)
+{
+ NamesAndTypes missed_names_types;
+
+ /// Find all subcolumns that are in @expected_columns, but not in @available_columns.
+ for (const auto & column : available_columns)
+ {
+ auto expected_column = expected_columns.getColumn(GetColumnsOptions::All, column.name);
+
+ /// Extract all paths from both descriptions to easily check existence of subcolumns.
+ auto [available_paths, available_types] = flattenTuple(column.type);
+ auto [expected_paths, expected_types] = flattenTuple(expected_column.type);
+
+ auto extract_names_and_types = [&column](const auto & paths, const auto & types)
+ {
+ NamesAndTypes res;
+ res.reserve(paths.size());
+ for (size_t i = 0; i < paths.size(); ++i)
+ {
+ auto full_name = Nested::concatenateName(column.name, paths[i].getPath());
+ res.emplace_back(full_name, types[i]);
+ }
+
+ ::sort(res.begin(), res.end());
+ return res;
+ };
+
+ auto available_names_types = extract_names_and_types(available_paths, available_types);
+ auto expected_names_types = extract_names_and_types(expected_paths, expected_types);
+
+ std::set_difference(
+ expected_names_types.begin(), expected_names_types.end(),
+ available_names_types.begin(), available_names_types.end(),
+ std::back_inserter(missed_names_types),
+ [](const auto & lhs, const auto & rhs) { return lhs.name < rhs.name; });
+ }
+
+ if (missed_names_types.empty())
+ return;
+
+ IdentifierNameSet identifiers;
+ query->collectIdentifierNames(identifiers);
+
+ /// Replace missed subcolumns to default literals of theirs type.
+ for (const auto & [name, type] : missed_names_types)
+ if (identifiers.contains(name))
+ addConstantToWithClause(query, name, type);
+}
+
+Field FieldVisitorReplaceScalars::operator()(const Array & x) const
+{
+ if (num_dimensions_to_keep == 0)
+ return replacement;
+
+ const size_t size = x.size();
+ Array res(size);
+ for (size_t i = 0; i < size; ++i)
+ res[i] = applyVisitor(FieldVisitorReplaceScalars(replacement, num_dimensions_to_keep - 1), x[i]);
+ return res;
+}
+
+size_t FieldVisitorToNumberOfDimensions::operator()(const Array & x)
+{
+ const size_t size = x.size();
+ size_t dimensions = 0;
+
+ for (size_t i = 0; i < size; ++i)
+ {
+ size_t element_dimensions = applyVisitor(*this, x[i]);
+ if (i > 0 && element_dimensions != dimensions)
+ need_fold_dimension = true;
+
+ dimensions = std::max(dimensions, element_dimensions);
+ }
+
+ return 1 + dimensions;
+}
+
+Field FieldVisitorFoldDimension::operator()(const Array & x) const
+{
+ if (num_dimensions_to_fold == 0)
+ return x;
+
+ const size_t size = x.size();
+ Array res(size);
+ for (size_t i = 0; i < size; ++i)
+ res[i] = applyVisitor(FieldVisitorFoldDimension(num_dimensions_to_fold - 1), x[i]);
+
+ return res;
+}
+
+void setAllObjectsToDummyTupleType(NamesAndTypesList & columns)
+{
+ for (auto & column : columns)
+ if (column.type->hasDynamicSubcolumns())
+ column.type = createConcreteEmptyDynamicColumn(column.type);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/ObjectUtils.h b/contrib/clickhouse/src/DataTypes/ObjectUtils.h
new file mode 100644
index 00000000000..2bfcaae09ca
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/ObjectUtils.h
@@ -0,0 +1,212 @@
+#pragma once
+
+#include <Core/Block.h>
+#include <Core/NamesAndTypes.h>
+#include <Common/FieldVisitors.h>
+#include <Storages/ColumnsDescription.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Columns/ColumnObject.h>
+
+namespace DB
+{
+
+struct StorageSnapshot;
+using StorageSnapshotPtr = std::shared_ptr<StorageSnapshot>;
+
+/// Returns number of dimensions in Array type. 0 if type is not array.
+size_t getNumberOfDimensions(const IDataType & type);
+
+/// Returns number of dimensions in Array column. 0 if column is not array.
+size_t getNumberOfDimensions(const IColumn & column);
+
+/// Returns type of scalars of Array of arbitrary dimensions.
+DataTypePtr getBaseTypeOfArray(const DataTypePtr & type);
+
+/// Returns Array type with requested scalar type and number of dimensions.
+DataTypePtr createArrayOfType(DataTypePtr type, size_t num_dimensions);
+
+/// Returns column of scalars of Array of arbitrary dimensions.
+ColumnPtr getBaseColumnOfArray(const ColumnPtr & column);
+
+/// Returns empty Array column with requested scalar column and number of dimensions.
+ColumnPtr createArrayOfColumn(const ColumnPtr & column, size_t num_dimensions);
+
+/// Returns Array with requested number of dimensions and no scalars.
+Array createEmptyArrayField(size_t num_dimensions);
+
+/// Tries to get data type by column. Only limited subset of types is supported
+DataTypePtr getDataTypeByColumn(const IColumn & column);
+
+/// Converts Object types and columns to Tuples in @columns_list and @block
+/// and checks that types are consistent with types in @storage_snapshot.
+void convertDynamicColumnsToTuples(Block & block, const StorageSnapshotPtr & storage_snapshot);
+
+/// Checks that each path is not the prefix of any other path.
+void checkObjectHasNoAmbiguosPaths(const PathsInData & paths);
+
+/// Receives several Tuple types and deduces the least common type among them.
+DataTypePtr getLeastCommonTypeForDynamicColumns(
+ const DataTypePtr & type_in_storage, const DataTypes & types, bool check_ambiguos_paths = false);
+
+DataTypePtr createConcreteEmptyDynamicColumn(const DataTypePtr & type_in_storage);
+
+/// Converts types of object columns to tuples in @columns_list
+/// according to @object_columns and adds all tuple's subcolumns if needed.
+void extendObjectColumns(NamesAndTypesList & columns_list, const ColumnsDescription & object_columns, bool with_subcolumns);
+
+/// Checks whether @columns contain any column with dynamic subcolumns.
+bool hasDynamicSubcolumns(const ColumnsDescription & columns);
+
+/// Updates types of objects in @object_columns inplace
+/// according to types in new_columns.
+void updateObjectColumns(
+ ColumnsDescription & object_columns,
+ const ColumnsDescription & storage_columns,
+ const NamesAndTypesList & new_columns);
+
+using DataTypeTuplePtr = std::shared_ptr<DataTypeTuple>;
+
+/// Flattens nested Tuple to plain Tuple. I.e extracts all paths and types from tuple.
+/// E.g. Tuple(t Tuple(c1 UInt32, c2 String), c3 UInt64) -> Tuple(t.c1 UInt32, t.c2 String, c3 UInt32)
+std::pair<PathsInData, DataTypes> flattenTuple(const DataTypePtr & type);
+
+/// Flattens nested Tuple column to plain Tuple column.
+ColumnPtr flattenTuple(const ColumnPtr & column);
+
+/// The reverse operation to 'flattenTuple'.
+/// Creates nested Tuple from all paths and types.
+/// E.g. Tuple(t.c1 UInt32, t.c2 String, c3 UInt32) -> Tuple(t Tuple(c1 UInt32, c2 String), c3 UInt64)
+DataTypePtr unflattenTuple(
+ const PathsInData & paths,
+ const DataTypes & tuple_types);
+
+std::pair<ColumnPtr, DataTypePtr> unflattenObjectToTuple(const ColumnObject & column);
+
+std::pair<ColumnPtr, DataTypePtr> unflattenTuple(
+ const PathsInData & paths,
+ const DataTypes & tuple_types,
+ const Columns & tuple_columns);
+
+
+/// For all columns which exist in @expected_columns and
+/// don't exist in @available_columns adds to WITH clause
+/// an alias with column name to literal of default value of column type.
+void replaceMissedSubcolumnsByConstants(
+ const ColumnsDescription & expected_columns,
+ const ColumnsDescription & available_columns,
+ ASTPtr query);
+
+/// Visitor that keeps @num_dimensions_to_keep dimensions in arrays
+/// and replaces all scalars or nested arrays to @replacement at that level.
+class FieldVisitorReplaceScalars : public StaticVisitor<Field>
+{
+public:
+ FieldVisitorReplaceScalars(const Field & replacement_, size_t num_dimensions_to_keep_)
+ : replacement(replacement_), num_dimensions_to_keep(num_dimensions_to_keep_)
+ {
+ }
+
+ Field operator()(const Array & x) const;
+
+ template <typename T>
+ Field operator()(const T &) const { return replacement; }
+
+private:
+ const Field & replacement;
+ size_t num_dimensions_to_keep;
+};
+
+/// Calculates number of dimensions in array field.
+/// Returns 0 for scalar fields.
+class FieldVisitorToNumberOfDimensions : public StaticVisitor<size_t>
+{
+public:
+ size_t operator()(const Array & x);
+
+ template <typename T>
+ size_t operator()(const T &) const { return 0; }
+
+ bool need_fold_dimension = false;
+};
+
+/// Fold field (except Null) to the higher dimension, e.g. `1` -- fold 2 --> `[[1]]`
+/// used to normalize dimension of element in an array. e.g [1, [2]] --> [[1], [2]]
+class FieldVisitorFoldDimension : public StaticVisitor<Field>
+{
+public:
+ explicit FieldVisitorFoldDimension(size_t num_dimensions_to_fold_) : num_dimensions_to_fold(num_dimensions_to_fold_) { }
+
+ Field operator()(const Array & x) const;
+
+ Field operator()(const Null & x) const { return x; }
+
+ template <typename T>
+ Field operator()(const T & x) const
+ {
+ if (num_dimensions_to_fold == 0)
+ return x;
+
+ Array res(1, x);
+ for (size_t i = 1; i < num_dimensions_to_fold; ++i)
+ {
+ Array new_res;
+ new_res.push_back(std::move(res));
+ res = std::move(new_res);
+ }
+
+ return res;
+ }
+
+private:
+ size_t num_dimensions_to_fold;
+};
+
+void setAllObjectsToDummyTupleType(NamesAndTypesList & columns);
+
+/// Receives range of objects, which contains collections
+/// of columns-like objects (e.g. ColumnsDescription or NamesAndTypesList)
+/// and deduces the common types of object columns for all entries.
+/// @entry_columns_getter should extract reference to collection of
+/// columns-like objects from entry to which Iterator points.
+/// columns-like object should have fields "name" and "type".
+template <typename Iterator, typename EntryColumnsGetter>
+ColumnsDescription getConcreteObjectColumns(
+ Iterator begin, Iterator end,
+ const ColumnsDescription & storage_columns,
+ EntryColumnsGetter && entry_columns_getter)
+{
+ std::unordered_map<String, DataTypes> types_in_entries;
+
+ /// Add dummy column for all Object columns
+ /// to not lose any column if it's missing
+ /// in all entries. If it exists in any entry
+ /// dummy column will be removed.
+ for (const auto & column : storage_columns)
+ {
+ if (column.type->hasDynamicSubcolumns())
+ types_in_entries[column.name].push_back(createConcreteEmptyDynamicColumn(column.type));
+ }
+
+ for (auto it = begin; it != end; ++it)
+ {
+ const auto & entry_columns = entry_columns_getter(*it);
+ for (const auto & column : entry_columns)
+ {
+ auto storage_column = storage_columns.tryGetPhysical(column.name);
+ if (storage_column && storage_column->type->hasDynamicSubcolumns())
+ types_in_entries[column.name].push_back(column.type);
+ }
+ }
+
+ ColumnsDescription res;
+ for (const auto & [name, types] : types_in_entries)
+ {
+ auto storage_column = storage_columns.getPhysical(name);
+ res.add({name, getLeastCommonTypeForDynamicColumns(storage_column.type, types)});
+ }
+
+ return res;
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/ISerialization.cpp b/contrib/clickhouse/src/DataTypes/Serializations/ISerialization.cpp
new file mode 100644
index 00000000000..782b890841a
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/ISerialization.cpp
@@ -0,0 +1,323 @@
+#include <DataTypes/Serializations/ISerialization.h>
+#include <Compression/CompressionFactory.h>
+#include <Columns/IColumn.h>
+#include <IO/WriteHelpers.h>
+#include <IO/Operators.h>
+#include <IO/ReadBufferFromString.h>
+#include <Common/escapeForFileName.h>
+#include <DataTypes/NestedUtils.h>
+#include <base/EnumReflection.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int MULTIPLE_STREAMS_REQUIRED;
+ extern const int UNEXPECTED_DATA_AFTER_PARSED_VALUE;
+ extern const int LOGICAL_ERROR;
+}
+
+ISerialization::Kind ISerialization::getKind(const IColumn & column)
+{
+ if (column.isSparse())
+ return Kind::SPARSE;
+
+ return Kind::DEFAULT;
+}
+
+String ISerialization::kindToString(Kind kind)
+{
+ switch (kind)
+ {
+ case Kind::DEFAULT:
+ return "Default";
+ case Kind::SPARSE:
+ return "Sparse";
+ }
+ UNREACHABLE();
+}
+
+ISerialization::Kind ISerialization::stringToKind(const String & str)
+{
+ if (str == "Default")
+ return Kind::DEFAULT;
+ else if (str == "Sparse")
+ return Kind::SPARSE;
+ else
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown serialization kind '{}'", str);
+}
+
+String ISerialization::Substream::toString() const
+{
+ if (type == TupleElement)
+ return fmt::format("TupleElement({}, escape_tuple_delimiter = {})",
+ tuple_element_name, escape_tuple_delimiter ? "true" : "false");
+
+ return String(magic_enum::enum_name(type));
+}
+
+String ISerialization::SubstreamPath::toString() const
+{
+ WriteBufferFromOwnString wb;
+ wb << "{";
+ for (size_t i = 0; i < size(); ++i)
+ {
+ if (i != 0)
+ wb << ", ";
+ wb << at(i).toString();
+ }
+ wb << "}";
+ return wb.str();
+}
+
+void ISerialization::enumerateStreams(
+ EnumerateStreamsSettings & settings,
+ const StreamCallback & callback,
+ const SubstreamData & data) const
+{
+ settings.path.push_back(Substream::Regular);
+ settings.path.back().data = data;
+ callback(settings.path);
+ settings.path.pop_back();
+}
+
+void ISerialization::enumerateStreams(
+ const StreamCallback & callback,
+ const DataTypePtr & type,
+ const ColumnPtr & column) const
+{
+ EnumerateStreamsSettings settings;
+ auto data = SubstreamData(getPtr()).withType(type).withColumn(column);
+ enumerateStreams(settings, callback, data);
+}
+
+void ISerialization::serializeBinaryBulk(const IColumn & column, WriteBuffer &, size_t, size_t) const
+{
+ throw Exception(ErrorCodes::MULTIPLE_STREAMS_REQUIRED, "Column {} must be serialized with multiple streams", column.getName());
+}
+
+void ISerialization::deserializeBinaryBulk(IColumn & column, ReadBuffer &, size_t, double) const
+{
+ throw Exception(ErrorCodes::MULTIPLE_STREAMS_REQUIRED, "Column {} must be deserialized with multiple streams", column.getName());
+}
+
+void ISerialization::serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column,
+ size_t offset,
+ size_t limit,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & /* state */) const
+{
+ if (WriteBuffer * stream = settings.getter(settings.path))
+ serializeBinaryBulk(column, *stream, offset, limit);
+}
+
+void ISerialization::deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & /* state */,
+ SubstreamsCache * cache) const
+{
+ auto cached_column = getFromSubstreamsCache(cache, settings.path);
+ if (cached_column)
+ {
+ column = cached_column;
+ }
+ else if (ReadBuffer * stream = settings.getter(settings.path))
+ {
+ auto mutable_column = column->assumeMutable();
+ deserializeBinaryBulk(*mutable_column, *stream, limit, settings.avg_value_size_hint);
+ column = std::move(mutable_column);
+ addToSubstreamsCache(cache, settings.path, column);
+ }
+}
+
+namespace
+{
+
+using SubstreamIterator = ISerialization::SubstreamPath::const_iterator;
+
+String getNameForSubstreamPath(
+ String stream_name,
+ SubstreamIterator begin,
+ SubstreamIterator end,
+ bool escape_tuple_delimiter)
+{
+ using Substream = ISerialization::Substream;
+
+ size_t array_level = 0;
+ for (auto it = begin; it != end; ++it)
+ {
+ if (it->type == Substream::NullMap)
+ stream_name += ".null";
+ else if (it->type == Substream::ArraySizes)
+ stream_name += ".size" + toString(array_level);
+ else if (it->type == Substream::ArrayElements)
+ ++array_level;
+ else if (it->type == Substream::DictionaryKeys)
+ stream_name += ".dict";
+ else if (it->type == Substream::SparseOffsets)
+ stream_name += ".sparse.idx";
+ else if (it->type == Substream::TupleElement)
+ {
+ /// For compatibility reasons, we use %2E (escaped dot) instead of dot.
+ /// Because nested data may be represented not by Array of Tuple,
+ /// but by separate Array columns with names in a form of a.b,
+ /// and name is encoded as a whole.
+ if (escape_tuple_delimiter && it->escape_tuple_delimiter)
+ stream_name += escapeForFileName("." + it->tuple_element_name);
+ else
+ stream_name += "." + it->tuple_element_name;
+ }
+ }
+
+ return stream_name;
+}
+
+}
+
+String ISerialization::getFileNameForStream(const NameAndTypePair & column, const SubstreamPath & path)
+{
+ return getFileNameForStream(column.getNameInStorage(), path);
+}
+
+bool isOffsetsOfNested(const ISerialization::SubstreamPath & path)
+{
+ if (path.empty())
+ return false;
+
+ for (const auto & elem : path)
+ if (elem.type == ISerialization::Substream::ArrayElements)
+ return false;
+
+ return path.back().type == ISerialization::Substream::ArraySizes;
+}
+
+String ISerialization::getFileNameForStream(const String & name_in_storage, const SubstreamPath & path)
+{
+ String stream_name;
+ auto nested_storage_name = Nested::extractTableName(name_in_storage);
+ if (name_in_storage != nested_storage_name && isOffsetsOfNested(path))
+ stream_name = escapeForFileName(nested_storage_name);
+ else
+ stream_name = escapeForFileName(name_in_storage);
+
+ return getNameForSubstreamPath(std::move(stream_name), path.begin(), path.end(), true);
+}
+
+String ISerialization::getSubcolumnNameForStream(const SubstreamPath & path)
+{
+ return getSubcolumnNameForStream(path, path.size());
+}
+
+String ISerialization::getSubcolumnNameForStream(const SubstreamPath & path, size_t prefix_len)
+{
+ auto subcolumn_name = getNameForSubstreamPath("", path.begin(), path.begin() + prefix_len, false);
+ if (!subcolumn_name.empty())
+ subcolumn_name = subcolumn_name.substr(1); // It starts with a dot.
+
+ return subcolumn_name;
+}
+
+void ISerialization::addToSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path, ColumnPtr column)
+{
+ if (!cache || path.empty())
+ return;
+
+ cache->emplace(getSubcolumnNameForStream(path), column);
+}
+
+ColumnPtr ISerialization::getFromSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path)
+{
+ if (!cache || path.empty())
+ return nullptr;
+
+ auto it = cache->find(getSubcolumnNameForStream(path));
+ return it == cache->end() ? nullptr : it->second;
+}
+
+bool ISerialization::isSpecialCompressionAllowed(const SubstreamPath & path)
+{
+ for (const auto & elem : path)
+ {
+ if (elem.type == Substream::NullMap
+ || elem.type == Substream::ArraySizes
+ || elem.type == Substream::DictionaryIndexes
+ || elem.type == Substream::SparseOffsets)
+ return false;
+ }
+ return true;
+}
+
+void ISerialization::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ String field;
+ /// Read until \t or \n.
+ readString(field, istr);
+ ReadBufferFromString buf(field);
+ deserializeWholeText(column, buf, settings);
+}
+
+void ISerialization::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeText(column, row_num, ostr, settings);
+}
+
+size_t ISerialization::getArrayLevel(const SubstreamPath & path)
+{
+ size_t level = 0;
+ for (const auto & elem : path)
+ level += elem.type == Substream::ArrayElements;
+ return level;
+}
+
+bool ISerialization::hasSubcolumnForPath(const SubstreamPath & path, size_t prefix_len)
+{
+ if (prefix_len == 0 || prefix_len > path.size())
+ return false;
+
+ size_t last_elem = prefix_len - 1;
+ return path[last_elem].type == Substream::NullMap
+ || path[last_elem].type == Substream::TupleElement
+ || path[last_elem].type == Substream::ArraySizes;
+}
+
+ISerialization::SubstreamData ISerialization::createFromPath(const SubstreamPath & path, size_t prefix_len)
+{
+ assert(prefix_len <= path.size());
+ if (prefix_len == 0)
+ return {};
+
+ ssize_t last_elem = prefix_len - 1;
+ auto res = path[last_elem].data;
+ for (ssize_t i = last_elem - 1; i >= 0; --i)
+ {
+ const auto & creator = path[i].creator;
+ if (creator)
+ {
+ res.type = res.type ? creator->create(res.type) : res.type;
+ res.serialization = res.serialization ? creator->create(res.serialization) : res.serialization;
+ res.column = res.column ? creator->create(res.column) : res.column;
+ }
+ }
+
+ return res;
+}
+
+void ISerialization::throwUnexpectedDataAfterParsedValue(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const String & type_name) const
+{
+ WriteBufferFromOwnString ostr;
+ serializeText(column, column.size() - 1, ostr, settings);
+ throw Exception(
+ ErrorCodes::UNEXPECTED_DATA_AFTER_PARSED_VALUE,
+ "Unexpected data '{}' after parsed {} value '{}'",
+ std::string(istr.position(), std::min(size_t(10), istr.available())),
+ type_name,
+ ostr.str());
+}
+
+}
+
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/ISerialization.h b/contrib/clickhouse/src/DataTypes/Serializations/ISerialization.h
new file mode 100644
index 00000000000..17e6dfb85bc
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/ISerialization.h
@@ -0,0 +1,417 @@
+#pragma once
+
+#include <Common/COW.h>
+#include <Core/Types_fwd.h>
+#include <base/demangle.h>
+#include <Common/typeid_cast.h>
+#include <Columns/IColumn.h>
+
+#include <boost/noncopyable.hpp>
+#include <unordered_map>
+#include <memory>
+#include <variant>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+}
+
+class IDataType;
+
+class ReadBuffer;
+class WriteBuffer;
+class ProtobufReader;
+class ProtobufWriter;
+
+class IDataType;
+using DataTypePtr = std::shared_ptr<const IDataType>;
+
+class ISerialization;
+using SerializationPtr = std::shared_ptr<const ISerialization>;
+
+class SerializationInfo;
+using SerializationInfoPtr = std::shared_ptr<const SerializationInfo>;
+
+class Field;
+
+struct FormatSettings;
+struct NameAndTypePair;
+
+/** Represents serialization of data type.
+ * Has methods to serialize/deserialize column in binary and several text formats.
+ * Every data type has default serialization, but can be serialized in different representations.
+ * Default serialization can be wrapped to one of the special kind of serializations.
+ * Currently there is only one special serialization: Sparse.
+ * Each serialization has its own implementation of IColumn as its in-memory representation.
+ */
+class ISerialization : private boost::noncopyable, public std::enable_shared_from_this<ISerialization>
+{
+public:
+ ISerialization() = default;
+ virtual ~ISerialization() = default;
+
+ enum class Kind : UInt8
+ {
+ DEFAULT = 0,
+ SPARSE = 1,
+ };
+
+ virtual Kind getKind() const { return Kind::DEFAULT; }
+ SerializationPtr getPtr() const { return shared_from_this(); }
+
+ static Kind getKind(const IColumn & column);
+ static String kindToString(Kind kind);
+ static Kind stringToKind(const String & str);
+
+ /** Binary serialization for range of values in column - for writing to disk/network, etc.
+ *
+ * Some data types are represented in multiple streams while being serialized.
+ * Example:
+ * - Arrays are represented as stream of all elements and stream of array sizes.
+ * - Nullable types are represented as stream of values (with unspecified values in place of NULLs) and stream of NULL flags.
+ *
+ * Different streams are identified by "path".
+ * If the data type require single stream (it's true for most of data types), the stream will have empty path.
+ * Otherwise, the path can have components like "array elements", "array sizes", etc.
+ *
+ * For multidimensional arrays, path can have arbitrary length.
+ * As an example, for 2-dimensional arrays of numbers we have at least three streams:
+ * - array sizes; (sizes of top level arrays)
+ * - array elements / array sizes; (sizes of second level (nested) arrays)
+ * - array elements / array elements; (the most deep elements, placed contiguously)
+ *
+ * Descendants must override either serializeBinaryBulk, deserializeBinaryBulk methods (for simple cases with single stream)
+ * or serializeBinaryBulkWithMultipleStreams, deserializeBinaryBulkWithMultipleStreams, enumerateStreams methods (for cases with multiple streams).
+ *
+ * Default implementations of ...WithMultipleStreams methods will call serializeBinaryBulk, deserializeBinaryBulk for single stream.
+ */
+
+ struct ISubcolumnCreator
+ {
+ virtual DataTypePtr create(const DataTypePtr & prev) const = 0;
+ virtual SerializationPtr create(const SerializationPtr & prev) const = 0;
+ virtual ColumnPtr create(const ColumnPtr & prev) const = 0;
+ virtual ~ISubcolumnCreator() = default;
+ };
+
+ using SubcolumnCreatorPtr = std::shared_ptr<const ISubcolumnCreator>;
+
+ struct SubstreamData
+ {
+ SubstreamData() = default;
+ SubstreamData(SerializationPtr serialization_)
+ : serialization(std::move(serialization_))
+ {
+ }
+
+ SubstreamData & withType(DataTypePtr type_)
+ {
+ type = std::move(type_);
+ return *this;
+ }
+
+ SubstreamData & withColumn(ColumnPtr column_)
+ {
+ column = std::move(column_);
+ return *this;
+ }
+
+ SubstreamData & withSerializationInfo(SerializationInfoPtr serialization_info_)
+ {
+ serialization_info = std::move(serialization_info_);
+ return *this;
+ }
+
+ SerializationPtr serialization;
+ DataTypePtr type;
+ ColumnPtr column;
+ SerializationInfoPtr serialization_info;
+ };
+
+ struct Substream
+ {
+ enum Type
+ {
+ ArrayElements,
+ ArraySizes,
+
+ NullableElements,
+ NullMap,
+
+ TupleElement,
+
+ DictionaryKeys,
+ DictionaryIndexes,
+
+ SparseElements,
+ SparseOffsets,
+
+ ObjectStructure,
+ ObjectData,
+
+ Regular,
+ };
+
+ Type type;
+
+ /// Index of tuple element, starting at 1 or name.
+ String tuple_element_name;
+
+ /// Do we need to escape a dot in filenames for tuple elements.
+ bool escape_tuple_delimiter = true;
+
+ /// Data for current substream.
+ SubstreamData data;
+
+ /// Creator of subcolumn for current substream.
+ SubcolumnCreatorPtr creator = nullptr;
+
+ /// Flag, that may help to traverse substream paths.
+ mutable bool visited = false;
+
+ Substream(Type type_) : type(type_) {} /// NOLINT
+
+ String toString() const;
+ };
+
+ struct SubstreamPath : public std::vector<Substream>
+ {
+ String toString() const;
+ };
+
+ /// Cache for common substreams of one type, but possible different its subcolumns.
+ /// E.g. sizes of arrays of Nested data type.
+ using SubstreamsCache = std::unordered_map<String, ColumnPtr>;
+
+ using StreamCallback = std::function<void(const SubstreamPath &)>;
+
+ struct EnumerateStreamsSettings
+ {
+ SubstreamPath path;
+ bool position_independent_encoding = true;
+ };
+
+ virtual void enumerateStreams(
+ EnumerateStreamsSettings & settings,
+ const StreamCallback & callback,
+ const SubstreamData & data) const;
+
+ /// Enumerate streams with default settings.
+ void enumerateStreams(
+ const StreamCallback & callback,
+ const DataTypePtr & type = nullptr,
+ const ColumnPtr & column = nullptr) const;
+
+ using OutputStreamGetter = std::function<WriteBuffer*(const SubstreamPath &)>;
+ using InputStreamGetter = std::function<ReadBuffer*(const SubstreamPath &)>;
+
+ struct SerializeBinaryBulkState
+ {
+ virtual ~SerializeBinaryBulkState() = default;
+ };
+
+ struct DeserializeBinaryBulkState
+ {
+ virtual ~DeserializeBinaryBulkState() = default;
+ };
+
+ using SerializeBinaryBulkStatePtr = std::shared_ptr<SerializeBinaryBulkState>;
+ using DeserializeBinaryBulkStatePtr = std::shared_ptr<DeserializeBinaryBulkState>;
+
+ struct SerializeBinaryBulkSettings
+ {
+ OutputStreamGetter getter;
+ SubstreamPath path;
+
+ size_t low_cardinality_max_dictionary_size = 0;
+ bool low_cardinality_use_single_dictionary_for_part = true;
+
+ bool position_independent_encoding = true;
+ };
+
+ struct DeserializeBinaryBulkSettings
+ {
+ InputStreamGetter getter;
+ SubstreamPath path;
+
+ /// True if continue reading from previous positions in file. False if made fseek to the start of new granule.
+ bool continuous_reading = true;
+
+ bool position_independent_encoding = true;
+
+ bool native_format = false;
+
+ /// If not zero, may be used to avoid reallocations while reading column of String type.
+ double avg_value_size_hint = 0;
+ };
+
+ /// Call before serializeBinaryBulkWithMultipleStreams chain to write something before first mark.
+ /// Column may be used only to retrieve the structure.
+ virtual void serializeBinaryBulkStatePrefix(
+ const IColumn & /*column*/,
+ SerializeBinaryBulkSettings & /*settings*/,
+ SerializeBinaryBulkStatePtr & /*state*/) const {}
+
+ /// Call after serializeBinaryBulkWithMultipleStreams chain to finish serialization.
+ virtual void serializeBinaryBulkStateSuffix(
+ SerializeBinaryBulkSettings & /*settings*/,
+ SerializeBinaryBulkStatePtr & /*state*/) const {}
+
+ /// Call before before deserializeBinaryBulkWithMultipleStreams chain to get DeserializeBinaryBulkStatePtr.
+ virtual void deserializeBinaryBulkStatePrefix(
+ DeserializeBinaryBulkSettings & /*settings*/,
+ DeserializeBinaryBulkStatePtr & /*state*/) const {}
+
+ /** 'offset' and 'limit' are used to specify range.
+ * limit = 0 - means no limit.
+ * offset must be not greater than size of column.
+ * offset + limit could be greater than size of column
+ * - in that case, column is serialized till the end.
+ */
+ virtual void serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column,
+ size_t offset,
+ size_t limit,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const;
+
+ /// Read no more than limit values and append them into column.
+ virtual void deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state,
+ SubstreamsCache * cache) const;
+
+ /** Override these methods for data types that require just single stream (most of data types).
+ */
+ virtual void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const;
+ virtual void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const;
+
+ /** Serialization/deserialization of individual values.
+ *
+ * These are helper methods for implementation of various formats to input/output for user (like CSV, JSON, etc.).
+ * There is no one-to-one correspondence between formats and these methods.
+ * For example, TabSeparated and Pretty formats could use same helper method serializeTextEscaped.
+ *
+ * For complex data types (like arrays) binary serde for individual values may differ from bulk serde.
+ * For example, if you serialize single array, it will be represented as its size and elements in single contiguous stream,
+ * but if you bulk serialize column with arrays, then sizes and elements will be written to separate streams.
+ */
+
+ /// There is two variants for binary serde. First variant work with Field.
+ virtual void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const = 0;
+ virtual void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const = 0;
+
+ /// Other variants takes a column, to avoid creating temporary Field object.
+ /// Column must be non-constant.
+
+ /// Serialize one value of a column at specified row number.
+ virtual void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
+ /// Deserialize one value and insert into a column.
+ /// If method will throw an exception, then column will be in same state as before call to method.
+ virtual void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
+
+ /** Text serialization with escaping but without quoting.
+ */
+ virtual void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
+
+ virtual void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
+
+ /** Text serialization as a literal that may be inserted into a query.
+ */
+ virtual void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
+
+ virtual void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
+
+ /** Text serialization for the CSV format.
+ */
+ virtual void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
+ virtual void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
+
+ /** Text serialization for displaying on a terminal or saving into a text file, and the like.
+ * Without escaping or quoting.
+ */
+ virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
+
+ /** Text deserialization in case when buffer contains only one value, without any escaping and delimiters.
+ */
+ virtual void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
+
+ /** Text serialization intended for using in JSON format.
+ */
+ virtual void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
+ virtual void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
+ virtual void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t /*indent*/) const
+ {
+ serializeTextJSON(column, row_num, ostr, settings);
+ }
+
+
+ /** Text serialization for putting into the XML format.
+ */
+ virtual void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+ {
+ serializeText(column, row_num, ostr, settings);
+ }
+
+ /** Text deserialization without escaping and quoting. Reads all data until first \n or \t
+ * into a temporary string and then call deserializeWholeText. It was implemented this way
+ * because this function is rarely used and because proper implementation requires a lot of
+ * additional code in data types serialization and ReadHelpers.
+ */
+ virtual void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const;
+ virtual void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const;
+
+ static String getFileNameForStream(const NameAndTypePair & column, const SubstreamPath & path);
+ static String getFileNameForStream(const String & name_in_storage, const SubstreamPath & path);
+ static String getSubcolumnNameForStream(const SubstreamPath & path);
+ static String getSubcolumnNameForStream(const SubstreamPath & path, size_t prefix_len);
+
+ static void addToSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path, ColumnPtr column);
+ static ColumnPtr getFromSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path);
+
+ static bool isSpecialCompressionAllowed(const SubstreamPath & path);
+
+ static size_t getArrayLevel(const SubstreamPath & path);
+ static bool hasSubcolumnForPath(const SubstreamPath & path, size_t prefix_len);
+ static SubstreamData createFromPath(const SubstreamPath & path, size_t prefix_len);
+
+protected:
+ template <typename State, typename StatePtr>
+ State * checkAndGetState(const StatePtr & state) const;
+
+ [[noreturn]] void throwUnexpectedDataAfterParsedValue(IColumn & column, ReadBuffer & istr, const FormatSettings &, const String & type_name) const;
+};
+
+using SerializationPtr = std::shared_ptr<const ISerialization>;
+using Serializations = std::vector<SerializationPtr>;
+using SerializationByName = std::unordered_map<String, SerializationPtr>;
+
+template <typename State, typename StatePtr>
+State * ISerialization::checkAndGetState(const StatePtr & state) const
+{
+ if (!state)
+ throw Exception(ErrorCodes::LOGICAL_ERROR,
+ "Got empty state for {}", demangle(typeid(*this).name()));
+
+ auto * state_concrete = typeid_cast<State *>(state.get());
+ if (!state_concrete)
+ {
+ auto & state_ref = *state;
+ throw Exception(ErrorCodes::LOGICAL_ERROR,
+ "Invalid State for {}. Expected: {}, got {}",
+ demangle(typeid(*this).name()),
+ demangle(typeid(State).name()),
+ demangle(typeid(state_ref).name()));
+ }
+
+ return state_concrete;
+}
+
+bool isOffsetsOfNested(const ISerialization::SubstreamPath & path);
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/JSONDataParser.cpp b/contrib/clickhouse/src/DataTypes/Serializations/JSONDataParser.cpp
new file mode 100644
index 00000000000..56641424396
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/JSONDataParser.cpp
@@ -0,0 +1,281 @@
+#include <DataTypes/Serializations/JSONDataParser.h>
+#include <Common/JSONParsers/SimdJSONParser.h>
+#include <Common/JSONParsers/RapidJSONParser.h>
+#include <Common/checkStackSize.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+}
+
+template <typename ParserImpl>
+std::optional<ParseResult> JSONDataParser<ParserImpl>::parse(const char * begin, size_t length)
+{
+ std::string_view json{begin, length};
+ Element document;
+ if (!parser.parse(json, document))
+ return {};
+
+ ParseContext context;
+ traverse(document, context);
+
+ ParseResult result;
+ result.values = std::move(context.values);
+ result.paths.reserve(context.paths.size());
+
+ for (auto && path : context.paths)
+ result.paths.emplace_back(std::move(path));
+
+ return result;
+}
+
+template <typename ParserImpl>
+void JSONDataParser<ParserImpl>::traverse(const Element & element, ParseContext & ctx)
+{
+ checkStackSize();
+
+ if (element.isObject())
+ {
+ traverseObject(element.getObject(), ctx);
+ }
+ else if (element.isArray())
+ {
+ traverseArray(element.getArray(), ctx);
+ }
+ else
+ {
+ ctx.paths.push_back(ctx.builder.getParts());
+ ctx.values.push_back(getValueAsField(element));
+ }
+}
+
+template <typename ParserImpl>
+void JSONDataParser<ParserImpl>::traverseObject(const JSONObject & object, ParseContext & ctx)
+{
+ ctx.paths.reserve(ctx.paths.size() + object.size());
+ ctx.values.reserve(ctx.values.size() + object.size());
+
+ for (auto it = object.begin(); it != object.end(); ++it)
+ {
+ const auto & [key, value] = *it;
+ ctx.builder.append(key, false);
+ traverse(value, ctx);
+ ctx.builder.popBack();
+ }
+}
+
+template <typename ParserImpl>
+void JSONDataParser<ParserImpl>::traverseArray(const JSONArray & array, ParseContext & ctx)
+{
+ /// Traverse elements of array and collect an array of fields by each path.
+ ParseArrayContext array_ctx;
+ array_ctx.total_size = array.size();
+
+ for (auto it = array.begin(); it != array.end(); ++it)
+ {
+ traverseArrayElement(*it, array_ctx);
+ ++array_ctx.current_size;
+ }
+
+ auto && arrays_by_path = array_ctx.arrays_by_path;
+
+ if (arrays_by_path.empty())
+ {
+ ctx.paths.push_back(ctx.builder.getParts());
+ ctx.values.push_back(Array());
+ }
+ else
+ {
+ ctx.paths.reserve(ctx.paths.size() + arrays_by_path.size());
+ ctx.values.reserve(ctx.values.size() + arrays_by_path.size());
+
+ for (auto && [_, value] : arrays_by_path)
+ {
+ auto && [path, path_array] = value;
+
+ /// Merge prefix path and path of array element.
+ ctx.paths.push_back(ctx.builder.append(path, true).getParts());
+ ctx.values.push_back(std::move(path_array));
+ ctx.builder.popBack(path.size());
+ }
+ }
+}
+
+template <typename ParserImpl>
+void JSONDataParser<ParserImpl>::traverseArrayElement(const Element & element, ParseArrayContext & ctx)
+{
+ ParseContext element_ctx;
+ traverse(element, element_ctx);
+
+ auto & [_, paths, values] = element_ctx;
+ size_t size = paths.size();
+ size_t keys_to_update = ctx.arrays_by_path.size();
+
+ for (size_t i = 0; i < size; ++i)
+ {
+ if (values[i].isNull())
+ continue;
+
+ UInt128 hash = PathInData::getPartsHash(paths[i].begin(), paths[i].end());
+ if (auto * found = ctx.arrays_by_path.find(hash))
+ {
+ auto & path_array = found->getMapped().second;
+ assert(path_array.size() == ctx.current_size);
+
+ /// If current element of array is part of Nested,
+ /// collect its size or check it if the size of
+ /// the Nested has been already collected.
+ auto nested_hash = getHashOfNestedPath(paths[i], values[i]);
+ if (nested_hash)
+ {
+ size_t array_size = values[i].template get<const Array &>().size();
+ auto & current_nested_sizes = ctx.nested_sizes_by_path[*nested_hash];
+
+ if (current_nested_sizes.size() == ctx.current_size)
+ current_nested_sizes.push_back(array_size);
+ else if (array_size != current_nested_sizes.back())
+ throw Exception(ErrorCodes::LOGICAL_ERROR,
+ "Array sizes mismatched ({} and {})", array_size, current_nested_sizes.back());
+ }
+
+ path_array.push_back(std::move(values[i]));
+ --keys_to_update;
+ }
+ else
+ {
+ /// We found a new key. Add and empty array with current size.
+ Array path_array;
+ path_array.reserve(ctx.total_size);
+ path_array.resize(ctx.current_size);
+
+ auto nested_hash = getHashOfNestedPath(paths[i], values[i]);
+ if (nested_hash)
+ {
+ size_t array_size = values[i].template get<const Array &>().size();
+ auto & current_nested_sizes = ctx.nested_sizes_by_path[*nested_hash];
+
+ if (current_nested_sizes.empty())
+ {
+ current_nested_sizes.resize(ctx.current_size);
+ }
+ else
+ {
+ /// If newly added element is part of the Nested then
+ /// resize its elements to keep correct sizes of Nested arrays.
+ for (size_t j = 0; j < ctx.current_size; ++j)
+ path_array[j] = Array(current_nested_sizes[j]);
+ }
+
+ if (current_nested_sizes.size() == ctx.current_size)
+ current_nested_sizes.push_back(array_size);
+ else if (array_size != current_nested_sizes.back())
+ throw Exception(ErrorCodes::LOGICAL_ERROR,
+ "Array sizes mismatched ({} and {})", array_size, current_nested_sizes.back());
+ }
+
+ path_array.push_back(std::move(values[i]));
+
+ auto & elem = ctx.arrays_by_path[hash];
+ elem.first = std::move(paths[i]);
+ elem.second = std::move(path_array);
+ }
+ }
+
+ /// If some of the keys are missed in current element,
+ /// add default values for them.
+ if (keys_to_update)
+ fillMissedValuesInArrays(ctx);
+}
+
+template <typename ParserImpl>
+void JSONDataParser<ParserImpl>::fillMissedValuesInArrays(ParseArrayContext & ctx)
+{
+ for (auto & [_, value] : ctx.arrays_by_path)
+ {
+ auto & [path, path_array] = value;
+ assert(path_array.size() == ctx.current_size || path_array.size() == ctx.current_size + 1);
+
+ if (path_array.size() == ctx.current_size)
+ {
+ bool inserted = tryInsertDefaultFromNested(ctx, path, path_array);
+ if (!inserted)
+ path_array.emplace_back();
+ }
+ }
+}
+
+template <typename ParserImpl>
+bool JSONDataParser<ParserImpl>::tryInsertDefaultFromNested(
+ ParseArrayContext & ctx, const PathInData::Parts & path, Array & array)
+{
+ /// If there is a collected size of current Nested
+ /// then insert array of this size as a default value.
+ if (path.empty() || array.empty())
+ return false;
+
+ /// Last element is not Null, because otherwise this path wouldn't exist.
+ auto hash = getHashOfNestedPath(path, array.back());
+ if (!hash)
+ return false;
+
+ auto * mapped = ctx.nested_sizes_by_path.find(*hash);
+ if (!mapped)
+ return false;
+
+ auto & current_nested_sizes = mapped->getMapped();
+ assert(current_nested_sizes.size() == ctx.current_size || current_nested_sizes.size() == ctx.current_size + 1);
+
+ /// If all keys of Nested were missed then add a zero length.
+ if (current_nested_sizes.size() == ctx.current_size)
+ current_nested_sizes.push_back(0);
+
+ size_t array_size = current_nested_sizes.back();
+ array.push_back(Array(array_size));
+ return true;
+}
+
+template <typename ParserImpl>
+Field JSONDataParser<ParserImpl>::getValueAsField(const Element & element)
+{
+ if (element.isBool()) return element.getBool();
+ if (element.isInt64()) return element.getInt64();
+ if (element.isUInt64()) return element.getUInt64();
+ if (element.isDouble()) return element.getDouble();
+ if (element.isString()) return element.getString();
+ if (element.isNull()) return Field();
+
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Unsupported type of JSON field");
+}
+
+template <typename ParserImpl>
+std::optional<UInt128> JSONDataParser<ParserImpl>::getHashOfNestedPath(const PathInData::Parts & path, const Field & value)
+{
+ if (value.getType() != Field::Types::Array || path.empty())
+ return {};
+
+ /// Find first key that is marked as nested and return hash of its path.
+ /// It's needed because we may have tuple of Nested and there could be
+ /// several arrays with the same prefix, but with independent sizes.
+ /// Consider we have array element with type `k2 Tuple(k3 Nested(...), k5 Nested(...))`
+ /// Then subcolumns `k2.k3` and `k2.k5` may have indepented sizes and we should extract
+ /// `k3` and `k5` keys instead of `k2`.
+
+ for (size_t i = 0; i != path.size(); ++i)
+ if (path[i].is_nested)
+ return PathInData::getPartsHash(path.begin(), std::next(path.begin(), i + 1));
+
+ return {};
+}
+
+#if USE_SIMDJSON
+ template class JSONDataParser<SimdJSONParser>;
+#endif
+
+#if USE_RAPIDJSON
+ template class JSONDataParser<RapidJSONParser>;
+#endif
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/JSONDataParser.h b/contrib/clickhouse/src/DataTypes/Serializations/JSONDataParser.h
new file mode 100644
index 00000000000..b22014df72a
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/JSONDataParser.h
@@ -0,0 +1,64 @@
+#pragma once
+
+#include <IO/ReadHelpers.h>
+#include <Common/HashTable/HashMap.h>
+#include <DataTypes/Serializations/PathInData.h>
+
+namespace DB
+{
+
+class ReadBuffer;
+
+template <typename ParserImpl>
+class JSONDataParser
+{
+public:
+ static void readJSON(String & s, ReadBuffer & buf)
+ {
+ readJSONObjectPossiblyInvalid(s, buf);
+ }
+
+ std::optional<ParseResult> parse(const char * begin, size_t length);
+
+private:
+ using Element = typename ParserImpl::Element;
+ using JSONObject = typename ParserImpl::Object;
+ using JSONArray = typename ParserImpl::Array;
+
+ struct ParseContext
+ {
+ PathInDataBuilder builder;
+ std::vector<PathInData::Parts> paths;
+ std::vector<Field> values;
+ };
+
+ using PathPartsWithArray = std::pair<PathInData::Parts, Array>;
+ using PathToArray = HashMapWithStackMemory<UInt128, PathPartsWithArray, UInt128TrivialHash, 5>;
+ using PathToSizes = HashMapWithStackMemory<UInt128, std::vector<size_t>, UInt128TrivialHash, 5>;
+
+ struct ParseArrayContext
+ {
+ size_t current_size = 0;
+ size_t total_size = 0;
+
+ PathToArray arrays_by_path;
+ PathToSizes nested_sizes_by_path;
+ Arena strings_pool;
+ };
+
+ void traverse(const Element & element, ParseContext & ctx);
+ void traverseObject(const JSONObject & object, ParseContext & ctx);
+ void traverseArray(const JSONArray & array, ParseContext & ctx);
+ void traverseArrayElement(const Element & element, ParseArrayContext & ctx);
+
+ static void fillMissedValuesInArrays(ParseArrayContext & ctx);
+ static bool tryInsertDefaultFromNested(
+ ParseArrayContext & ctx, const PathInData::Parts & path, Array & array);
+
+ static Field getValueAsField(const Element & element);
+ static std::optional<UInt128> getHashOfNestedPath(const PathInData::Parts & path, const Field & value);
+
+ ParserImpl parser;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/PathInData.cpp b/contrib/clickhouse/src/DataTypes/Serializations/PathInData.cpp
new file mode 100644
index 00000000000..cf78d7cbb14
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/PathInData.cpp
@@ -0,0 +1,156 @@
+#include <DataTypes/Serializations/PathInData.h>
+#include <DataTypes/NestedUtils.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeArray.h>
+#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnArray.h>
+#include <Common/SipHash.h>
+
+#include <boost/algorithm/string/split.hpp>
+#include <boost/algorithm/string.hpp>
+
+namespace DB
+{
+
+PathInData::PathInData(std::string_view path_)
+ : path(path_)
+{
+ const char * begin = path.data();
+ const char * end = path.data() + path.size();
+
+ for (const char * it = path.data(); it != end; ++it)
+ {
+ if (*it == '.')
+ {
+ size_t size = static_cast<size_t>(it - begin);
+ parts.emplace_back(std::string_view{begin, size}, false, 0);
+ begin = it + 1;
+ }
+ }
+
+ size_t size = static_cast<size_t>(end - begin);
+ parts.emplace_back(std::string_view{begin, size}, false, 0.);
+}
+
+PathInData::PathInData(const Parts & parts_)
+{
+ buildPath(parts_);
+ buildParts(parts_);
+}
+
+PathInData::PathInData(const PathInData & other)
+ : path(other.path)
+{
+ buildParts(other.getParts());
+}
+
+PathInData & PathInData::operator=(const PathInData & other)
+{
+ if (this != &other)
+ {
+ path = other.path;
+ buildParts(other.parts);
+ }
+ return *this;
+}
+
+UInt128 PathInData::getPartsHash(const Parts::const_iterator & begin, const Parts::const_iterator & end)
+{
+ SipHash hash;
+ hash.update(std::distance(begin, end));
+ for (auto part_it = begin; part_it != end; ++part_it)
+ {
+ hash.update(part_it->key.data(), part_it->key.length());
+ hash.update(part_it->is_nested);
+ hash.update(part_it->anonymous_array_level);
+ }
+
+ return hash.get128();
+}
+
+void PathInData::buildPath(const Parts & other_parts)
+{
+ if (other_parts.empty())
+ return;
+
+ path.clear();
+ auto it = other_parts.begin();
+ path += it->key;
+ ++it;
+ for (; it != other_parts.end(); ++it)
+ {
+ path += ".";
+ path += it->key;
+ }
+}
+
+void PathInData::buildParts(const Parts & other_parts)
+{
+ if (other_parts.empty())
+ return;
+
+ parts.clear();
+ parts.reserve(other_parts.size());
+ const char * begin = path.data();
+ for (const auto & part : other_parts)
+ {
+ has_nested |= part.is_nested;
+ parts.emplace_back(std::string_view{begin, part.key.length()}, part.is_nested, part.anonymous_array_level);
+ begin += part.key.length() + 1;
+ }
+}
+
+size_t PathInData::Hash::operator()(const PathInData & value) const
+{
+ auto hash = getPartsHash(value.parts.begin(), value.parts.end());
+ return hash.items[0] ^ hash.items[1];
+}
+
+PathInDataBuilder & PathInDataBuilder::append(std::string_view key, bool is_array)
+{
+ if (parts.empty())
+ current_anonymous_array_level += is_array;
+
+ if (!key.empty())
+ {
+ if (!parts.empty())
+ parts.back().is_nested = is_array;
+
+ parts.emplace_back(key, false, current_anonymous_array_level);
+ current_anonymous_array_level = 0;
+ }
+
+ return *this;
+}
+
+PathInDataBuilder & PathInDataBuilder::append(const PathInData::Parts & path, bool is_array)
+{
+ if (parts.empty())
+ current_anonymous_array_level += is_array;
+
+ if (!path.empty())
+ {
+ if (!parts.empty())
+ parts.back().is_nested = is_array;
+
+ auto it = parts.insert(parts.end(), path.begin(), path.end());
+ for (; it != parts.end(); ++it)
+ it->anonymous_array_level += current_anonymous_array_level;
+ current_anonymous_array_level = 0;
+ }
+
+ return *this;
+}
+
+void PathInDataBuilder::popBack()
+{
+ parts.pop_back();
+}
+
+void PathInDataBuilder::popBack(size_t n)
+{
+ assert(n <= parts.size());
+ parts.resize(parts.size() - n);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/PathInData.h b/contrib/clickhouse/src/DataTypes/Serializations/PathInData.h
new file mode 100644
index 00000000000..5624348bee3
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/PathInData.h
@@ -0,0 +1,110 @@
+#pragma once
+
+#include <Core/Types.h>
+#include <Core/Field.h>
+#include <bitset>
+
+namespace DB
+{
+
+/// Class that represents path in document, e.g. JSON.
+class PathInData
+{
+public:
+ struct Part
+ {
+ Part() = default;
+ Part(std::string_view key_, bool is_nested_, UInt8 anonymous_array_level_)
+ : key(key_), is_nested(is_nested_), anonymous_array_level(anonymous_array_level_)
+ {
+ }
+
+ /// Name of part of path.
+ std::string_view key;
+
+ /// If this part is Nested, i.e. element
+ /// related to this key is the array of objects.
+ bool is_nested = false;
+
+ /// Number of array levels between current key and previous key.
+ /// E.g. in JSON {"k1": [[[{"k2": 1, "k3": 2}]]]}
+ /// "k1" is nested and has anonymous_array_level = 0.
+ /// "k2" and "k3" are not nested and have anonymous_array_level = 2.
+ UInt8 anonymous_array_level = 0;
+
+ bool operator==(const Part & other) const = default;
+ };
+
+ using Parts = std::vector<Part>;
+
+ PathInData() = default;
+ explicit PathInData(std::string_view path_);
+ explicit PathInData(const Parts & parts_);
+
+ PathInData(const PathInData & other);
+ PathInData & operator=(const PathInData & other);
+
+ static UInt128 getPartsHash(const Parts::const_iterator & begin, const Parts::const_iterator & end);
+
+ bool empty() const { return parts.empty(); }
+
+ const String & getPath() const { return path; }
+ const Parts & getParts() const { return parts; }
+
+ bool isNested(size_t i) const { return parts[i].is_nested; }
+ bool hasNested() const { return has_nested; }
+
+ bool operator==(const PathInData & other) const { return parts == other.parts; }
+ struct Hash { size_t operator()(const PathInData & value) const; };
+
+private:
+ /// Creates full path from parts.
+ void buildPath(const Parts & other_parts);
+
+ /// Creates new parts full from full path with correct string pointers.
+ void buildParts(const Parts & other_parts);
+
+ /// The full path. Parts are separated by dots.
+ String path;
+
+ /// Parts of the path. All string_view-s in parts must point to the @path.
+ Parts parts;
+
+ /// True if at least one part is nested.
+ /// Cached to avoid linear complexity at 'hasNested'.
+ bool has_nested = false;
+};
+
+class PathInDataBuilder
+{
+public:
+ const PathInData::Parts & getParts() const { return parts; }
+
+ PathInDataBuilder & append(std::string_view key, bool is_array);
+ PathInDataBuilder & append(const PathInData::Parts & path, bool is_array);
+
+ void popBack();
+ void popBack(size_t n);
+
+private:
+ PathInData::Parts parts;
+
+ /// Number of array levels without key to which
+ /// next non-empty key will be nested.
+ /// Example: for JSON { "k1": [[{"k2": 1, "k3": 2}] }
+ // `k2` and `k3` has anonymous_array_level = 1 in that case.
+ size_t current_anonymous_array_level = 0;
+};
+
+using PathsInData = std::vector<PathInData>;
+
+/// Result of parsing of a document.
+/// Contains all paths extracted from document
+/// and values which are related to them.
+struct ParseResult
+{
+ std::vector<PathInData> paths;
+ std::vector<Field> values;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationAggregateFunction.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationAggregateFunction.cpp
new file mode 100644
index 00000000000..c482c9623e9
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationAggregateFunction.cpp
@@ -0,0 +1,218 @@
+#include <DataTypes/Serializations/SerializationAggregateFunction.h>
+
+#include <IO/WriteHelpers.h>
+
+#include <Columns/ColumnAggregateFunction.h>
+
+#include <Common/typeid_cast.h>
+#include <Common/assert_cast.h>
+#include <Common/AlignedBuffer.h>
+#include <Common/Arena.h>
+
+#include <Formats/FormatSettings.h>
+#include <Formats/ProtobufReader.h>
+#include <Formats/ProtobufWriter.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/Operators.h>
+
+namespace DB
+{
+
+void SerializationAggregateFunction::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const
+{
+ const AggregateFunctionStateData & state = field.get<const AggregateFunctionStateData &>();
+ writeBinary(state.data, ostr);
+}
+
+void SerializationAggregateFunction::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const
+{
+ field = AggregateFunctionStateData();
+ AggregateFunctionStateData & s = field.get<AggregateFunctionStateData &>();
+ readBinary(s.data, istr);
+ s.name = type_name;
+}
+
+void SerializationAggregateFunction::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ function->serialize(assert_cast<const ColumnAggregateFunction &>(column).getData()[row_num], ostr, version);
+}
+
+void SerializationAggregateFunction::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ ColumnAggregateFunction & column_concrete = assert_cast<ColumnAggregateFunction &>(column);
+
+ Arena & arena = column_concrete.createOrGetArena();
+ size_t size_of_state = function->sizeOfData();
+ AggregateDataPtr place = arena.alignedAlloc(size_of_state, function->alignOfData());
+
+ function->create(place);
+ try
+ {
+ function->deserialize(place, istr, version, &arena);
+ }
+ catch (...)
+ {
+ function->destroy(place);
+ throw;
+ }
+
+ column_concrete.getData().push_back(place);
+}
+
+void SerializationAggregateFunction::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
+{
+ const ColumnAggregateFunction & real_column = typeid_cast<const ColumnAggregateFunction &>(column);
+ const ColumnAggregateFunction::Container & vec = real_column.getData();
+
+ ColumnAggregateFunction::Container::const_iterator it = vec.begin() + offset;
+ ColumnAggregateFunction::Container::const_iterator end = limit ? it + limit : vec.end();
+
+ if (end > vec.end())
+ end = vec.end();
+
+ for (; it != end; ++it)
+ function->serialize(*it, ostr, version);
+}
+
+void SerializationAggregateFunction::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const
+{
+ ColumnAggregateFunction & real_column = typeid_cast<ColumnAggregateFunction &>(column);
+ ColumnAggregateFunction::Container & vec = real_column.getData();
+
+ Arena & arena = real_column.createOrGetArena();
+ real_column.set(function, version);
+ vec.reserve(vec.size() + limit);
+
+ size_t size_of_state = function->sizeOfData();
+ size_t align_of_state = function->alignOfData();
+
+ for (size_t i = 0; i < limit; ++i)
+ {
+ if (istr.eof())
+ break;
+
+ AggregateDataPtr place = arena.alignedAlloc(size_of_state, align_of_state);
+
+ function->create(place);
+
+ try
+ {
+ function->deserialize(place, istr, version, &arena);
+ }
+ catch (...)
+ {
+ function->destroy(place);
+ throw;
+ }
+
+ vec.push_back(place);
+ }
+}
+
+static String serializeToString(const AggregateFunctionPtr & function, const IColumn & column, size_t row_num, size_t version)
+{
+ WriteBufferFromOwnString buffer;
+ function->serialize(assert_cast<const ColumnAggregateFunction &>(column).getData()[row_num], buffer, version);
+ return buffer.str();
+}
+
+static void deserializeFromString(const AggregateFunctionPtr & function, IColumn & column, const String & s, size_t version)
+{
+ ColumnAggregateFunction & column_concrete = assert_cast<ColumnAggregateFunction &>(column);
+
+ Arena & arena = column_concrete.createOrGetArena();
+ size_t size_of_state = function->sizeOfData();
+ AggregateDataPtr place = arena.alignedAlloc(size_of_state, function->alignOfData());
+
+ function->create(place);
+
+ try
+ {
+ ReadBufferFromString istr(s);
+ function->deserialize(place, istr, version, &arena);
+ }
+ catch (...)
+ {
+ function->destroy(place);
+ throw;
+ }
+
+ column_concrete.getData().push_back(place);
+}
+
+void SerializationAggregateFunction::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeString(serializeToString(function, column, row_num, version), ostr);
+}
+
+
+void SerializationAggregateFunction::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeEscapedString(serializeToString(function, column, row_num, version), ostr);
+}
+
+
+void SerializationAggregateFunction::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ String s;
+ readEscapedString(s, istr);
+ deserializeFromString(function, column, s, version);
+}
+
+
+void SerializationAggregateFunction::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeQuotedString(serializeToString(function, column, row_num, version), ostr);
+}
+
+
+void SerializationAggregateFunction::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ String s;
+ readQuotedStringWithSQLStyle(s, istr);
+ deserializeFromString(function, column, s, version);
+}
+
+
+void SerializationAggregateFunction::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ String s;
+ readStringUntilEOF(s, istr);
+ deserializeFromString(function, column, s, version);
+}
+
+
+void SerializationAggregateFunction::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeJSONString(serializeToString(function, column, row_num, version), ostr, settings);
+}
+
+
+void SerializationAggregateFunction::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ String s;
+ readJSONString(s, istr);
+ deserializeFromString(function, column, s, version);
+}
+
+
+void SerializationAggregateFunction::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeXMLStringForTextElement(serializeToString(function, column, row_num, version), ostr);
+}
+
+
+void SerializationAggregateFunction::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeCSV(serializeToString(function, column, row_num, version), ostr);
+}
+
+
+void SerializationAggregateFunction::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ String s;
+ readCSV(s, istr, settings.csv);
+ deserializeFromString(function, column, s, version);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationAggregateFunction.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationAggregateFunction.h
new file mode 100644
index 00000000000..4212298bbc1
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationAggregateFunction.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include <AggregateFunctions/IAggregateFunction.h>
+
+#include <DataTypes/Serializations/ISerialization.h>
+
+
+namespace DB
+{
+
+class SerializationAggregateFunction final : public ISerialization
+{
+private:
+ AggregateFunctionPtr function;
+ String type_name;
+ size_t version;
+
+public:
+ static constexpr bool is_parametric = true;
+
+ SerializationAggregateFunction(const AggregateFunctionPtr & function_, String type_name_, size_t version_)
+ : function(function_), type_name(std::move(type_name_)), version(version_) {}
+
+ /// NOTE These two functions for serializing single values are incompatible with the functions below.
+ void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
+ void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
+ void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationArray.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationArray.cpp
new file mode 100644
index 00000000000..e01c1aea0e9
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationArray.cpp
@@ -0,0 +1,620 @@
+#include <DataTypes/Serializations/SerializationArray.h>
+#include <DataTypes/Serializations/SerializationNullable.h>
+#include <DataTypes/Serializations/SerializationNumber.h>
+#include <DataTypes/Serializations/SerializationNamed.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Columns/ColumnArray.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadBufferFromString.h>
+#include <IO/WriteBufferFromString.h>
+
+#include <Formats/FormatSettings.h>
+#include <Formats/ProtobufReader.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int CANNOT_READ_ALL_DATA;
+ extern const int CANNOT_READ_ARRAY_FROM_TEXT;
+ extern const int LOGICAL_ERROR;
+ extern const int TOO_LARGE_ARRAY_SIZE;
+}
+
+static constexpr size_t MAX_ARRAY_SIZE = 1ULL << 30;
+static constexpr size_t MAX_ARRAYS_SIZE = 1ULL << 40;
+
+
+void SerializationArray::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const Array & a = field.get<const Array &>();
+ writeVarUInt(a.size(), ostr);
+ for (const auto & i : a)
+ {
+ nested->serializeBinary(i, ostr, settings);
+ }
+}
+
+
+void SerializationArray::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ size_t size;
+ readVarUInt(size, istr);
+ if (settings.max_binary_array_size && size > settings.max_binary_array_size)
+ throw Exception(
+ ErrorCodes::TOO_LARGE_ARRAY_SIZE,
+ "Too large array size: {}. The maximum is: {}. To increase the maximum, use setting "
+ "format_binary_max_array_size",
+ size,
+ settings.max_binary_array_size);
+
+ field = Array();
+ Array & arr = field.get<Array &>();
+ arr.reserve(size);
+ for (size_t i = 0; i < size; ++i)
+ nested->deserializeBinary(arr.emplace_back(), istr, settings);
+}
+
+
+void SerializationArray::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
+ const ColumnArray::Offsets & offsets = column_array.getOffsets();
+
+ size_t offset = offsets[row_num - 1];
+ size_t next_offset = offsets[row_num];
+ size_t size = next_offset - offset;
+
+ writeVarUInt(size, ostr);
+
+ const IColumn & nested_column = column_array.getData();
+ for (size_t i = offset; i < next_offset; ++i)
+ nested->serializeBinary(nested_column, i, ostr, settings);
+}
+
+
+void SerializationArray::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ ColumnArray & column_array = assert_cast<ColumnArray &>(column);
+ ColumnArray::Offsets & offsets = column_array.getOffsets();
+
+ size_t size;
+ readVarUInt(size, istr);
+ if (settings.max_binary_array_size && size > settings.max_binary_array_size)
+ throw Exception(
+ ErrorCodes::TOO_LARGE_ARRAY_SIZE,
+ "Too large array size: {}. The maximum is: {}. To increase the maximum, use setting "
+ "format_binary_max_array_size",
+ size,
+ settings.max_binary_array_size);
+
+ IColumn & nested_column = column_array.getData();
+
+ size_t i = 0;
+ try
+ {
+ for (; i < size; ++i)
+ nested->deserializeBinary(nested_column, istr, settings);
+ }
+ catch (...)
+ {
+ if (i)
+ nested_column.popBack(i);
+ throw;
+ }
+
+ offsets.push_back(offsets.back() + size);
+}
+
+
+namespace
+{
+ void serializeArraySizesPositionIndependent(const IColumn & column, WriteBuffer & ostr, UInt64 offset, UInt64 limit)
+ {
+ const ColumnArray & column_array = typeid_cast<const ColumnArray &>(column);
+ const ColumnArray::Offsets & offset_values = column_array.getOffsets();
+ size_t size = offset_values.size();
+
+ if (!size)
+ return;
+
+ size_t end = limit && (offset + limit < size)
+ ? offset + limit
+ : size;
+
+ ColumnArray::Offset prev_offset = offset_values[offset - 1];
+ for (size_t i = offset; i < end; ++i)
+ {
+ ColumnArray::Offset current_offset = offset_values[i];
+ writeBinaryLittleEndian(current_offset - prev_offset, ostr);
+ prev_offset = current_offset;
+ }
+ }
+
+ void deserializeArraySizesPositionIndependent(ColumnArray & column_array, ReadBuffer & istr, UInt64 limit)
+ {
+ ColumnArray::Offsets & offset_values = column_array.getOffsets();
+ size_t initial_size = offset_values.size();
+ offset_values.resize(initial_size + limit);
+
+ size_t i = initial_size;
+ ColumnArray::Offset current_offset = initial_size ? offset_values[initial_size - 1] : 0;
+ while (i < initial_size + limit && !istr.eof())
+ {
+ ColumnArray::Offset current_size = 0;
+ readBinaryLittleEndian(current_size, istr);
+
+ if (unlikely(current_size > MAX_ARRAY_SIZE))
+ throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array size is too large: {}", current_size);
+ if (unlikely(__builtin_add_overflow(current_offset, current_size, &current_offset)))
+ throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Deserialization of array offsets will lead to overflow");
+
+ offset_values[i] = current_offset;
+ ++i;
+ }
+
+ offset_values.resize(i);
+ }
+
+ ColumnPtr arraySizesToOffsets(const IColumn & column)
+ {
+ const auto & column_sizes = assert_cast<const ColumnArray::ColumnOffsets &>(column);
+ MutableColumnPtr column_offsets = column_sizes.cloneEmpty();
+
+ if (column_sizes.empty())
+ return column_offsets;
+
+ const auto & sizes_data = column_sizes.getData();
+ auto & offsets_data = assert_cast<ColumnArray::ColumnOffsets &>(*column_offsets).getData();
+
+ offsets_data.resize(sizes_data.size());
+
+ IColumn::Offset prev_offset = 0;
+ for (size_t i = 0, size = sizes_data.size(); i < size; ++i)
+ {
+ prev_offset += sizes_data[i];
+ offsets_data[i] = prev_offset;
+ }
+
+ return column_offsets;
+ }
+
+ ColumnPtr arrayOffsetsToSizes(const IColumn & column)
+ {
+ const auto & column_offsets = assert_cast<const ColumnArray::ColumnOffsets &>(column);
+ MutableColumnPtr column_sizes = column_offsets.cloneEmpty();
+
+ if (column_offsets.empty())
+ return column_sizes;
+
+ const auto & offsets_data = column_offsets.getData();
+ auto & sizes_data = assert_cast<ColumnArray::ColumnOffsets &>(*column_sizes).getData();
+
+ sizes_data.resize(offsets_data.size());
+
+ IColumn::Offset prev_offset = 0;
+ for (size_t i = 0, size = offsets_data.size(); i < size; ++i)
+ {
+ auto current_offset = offsets_data[i];
+ sizes_data[i] = current_offset - prev_offset;
+ prev_offset = current_offset;
+ }
+
+ return column_sizes;
+ }
+}
+
+DataTypePtr SerializationArray::SubcolumnCreator::create(const DataTypePtr & prev) const
+{
+ return std::make_shared<DataTypeArray>(prev);
+}
+
+SerializationPtr SerializationArray::SubcolumnCreator::create(const SerializationPtr & prev) const
+{
+ return std::make_shared<SerializationArray>(prev);
+}
+
+ColumnPtr SerializationArray::SubcolumnCreator::create(const ColumnPtr & prev) const
+{
+ return ColumnArray::create(prev, offsets);
+}
+
+void SerializationArray::enumerateStreams(
+ EnumerateStreamsSettings & settings,
+ const StreamCallback & callback,
+ const SubstreamData & data) const
+{
+ const auto * type_array = data.type ? &assert_cast<const DataTypeArray &>(*data.type) : nullptr;
+ const auto * column_array = data.column ? &assert_cast<const ColumnArray &>(*data.column) : nullptr;
+ auto offsets = column_array ? column_array->getOffsetsPtr() : nullptr;
+
+ auto offsets_serialization =
+ std::make_shared<SerializationNamed>(
+ std::make_shared<SerializationNumber<UInt64>>(),
+ "size" + std::to_string(getArrayLevel(settings.path)), false);
+
+ auto offsets_column = offsets && !settings.position_independent_encoding
+ ? arrayOffsetsToSizes(*offsets)
+ : offsets;
+
+ settings.path.push_back(Substream::ArraySizes);
+ settings.path.back().data = SubstreamData(offsets_serialization)
+ .withType(type_array ? std::make_shared<DataTypeUInt64>() : nullptr)
+ .withColumn(std::move(offsets_column))
+ .withSerializationInfo(data.serialization_info);
+
+ callback(settings.path);
+
+ settings.path.back() = Substream::ArrayElements;
+ settings.path.back().data = data;
+ settings.path.back().creator = std::make_shared<SubcolumnCreator>(offsets);
+
+ auto next_data = SubstreamData(nested)
+ .withType(type_array ? type_array->getNestedType() : nullptr)
+ .withColumn(column_array ? column_array->getDataPtr() : nullptr)
+ .withSerializationInfo(data.serialization_info);
+
+ nested->enumerateStreams(settings, callback, next_data);
+ settings.path.pop_back();
+}
+
+void SerializationArray::serializeBinaryBulkStatePrefix(
+ const IColumn & column,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ settings.path.push_back(Substream::ArrayElements);
+ const auto & column_array = assert_cast<const ColumnArray &>(column);
+ nested->serializeBinaryBulkStatePrefix(column_array.getData(), settings, state);
+ settings.path.pop_back();
+}
+
+
+void SerializationArray::serializeBinaryBulkStateSuffix(
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ settings.path.push_back(Substream::ArrayElements);
+ nested->serializeBinaryBulkStateSuffix(settings, state);
+ settings.path.pop_back();
+}
+
+
+void SerializationArray::deserializeBinaryBulkStatePrefix(
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state) const
+{
+ settings.path.push_back(Substream::ArrayElements);
+ nested->deserializeBinaryBulkStatePrefix(settings, state);
+ settings.path.pop_back();
+}
+
+
+void SerializationArray::serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column,
+ size_t offset,
+ size_t limit,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ const ColumnArray & column_array = typeid_cast<const ColumnArray &>(column);
+
+ /// First serialize array sizes.
+ settings.path.push_back(Substream::ArraySizes);
+ if (auto * stream = settings.getter(settings.path))
+ {
+ if (settings.position_independent_encoding)
+ serializeArraySizesPositionIndependent(column, *stream, offset, limit);
+ else
+ SerializationNumber<ColumnArray::Offset>().serializeBinaryBulk(*column_array.getOffsetsPtr(), *stream, offset, limit);
+ }
+
+ /// Then serialize contents of arrays.
+ settings.path.back() = Substream::ArrayElements;
+ const ColumnArray::Offsets & offset_values = column_array.getOffsets();
+
+ if (offset > offset_values.size())
+ return;
+
+ /** offset - from which array to write.
+ * limit - how many arrays should be written, or 0, if you write everything that is.
+ * end - up to which array the recorded piece ends.
+ *
+ * nested_offset - from which element of the innards to write.
+ * nested_limit - how many elements of the innards to write, or 0, if you write everything that is.
+ */
+
+ size_t end = std::min(offset + limit, offset_values.size());
+
+ size_t nested_offset = offset ? offset_values[offset - 1] : 0;
+ size_t nested_limit = limit
+ ? offset_values[end - 1] - nested_offset
+ : 0;
+
+ if (limit == 0 || nested_limit)
+ nested->serializeBinaryBulkWithMultipleStreams(column_array.getData(), nested_offset, nested_limit, settings, state);
+ settings.path.pop_back();
+}
+
+
+void SerializationArray::deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state,
+ SubstreamsCache * cache) const
+{
+ auto mutable_column = column->assumeMutable();
+ ColumnArray & column_array = typeid_cast<ColumnArray &>(*mutable_column);
+ settings.path.push_back(Substream::ArraySizes);
+
+ if (auto cached_column = getFromSubstreamsCache(cache, settings.path))
+ {
+ column_array.getOffsetsPtr() = arraySizesToOffsets(*cached_column);
+ }
+ else if (auto * stream = settings.getter(settings.path))
+ {
+ if (settings.position_independent_encoding)
+ deserializeArraySizesPositionIndependent(column_array, *stream, limit);
+ else
+ SerializationNumber<ColumnArray::Offset>().deserializeBinaryBulk(column_array.getOffsetsColumn(), *stream, limit, 0);
+
+ addToSubstreamsCache(cache, settings.path, arrayOffsetsToSizes(column_array.getOffsetsColumn()));
+ }
+
+ settings.path.back() = Substream::ArrayElements;
+
+ ColumnArray::Offsets & offset_values = column_array.getOffsets();
+ ColumnPtr & nested_column = column_array.getDataPtr();
+
+ /// Number of values corresponding with `offset_values` must be read.
+ size_t last_offset = offset_values.back();
+ if (last_offset < nested_column->size())
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Nested column is longer than last offset");
+ size_t nested_limit = last_offset - nested_column->size();
+
+ if (unlikely(nested_limit > MAX_ARRAYS_SIZE))
+ throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array sizes are too large: {}", nested_limit);
+
+ /// Adjust value size hint. Divide it to the average array size.
+ settings.avg_value_size_hint = nested_limit ? settings.avg_value_size_hint / nested_limit * offset_values.size() : 0;
+
+ nested->deserializeBinaryBulkWithMultipleStreams(nested_column, nested_limit, settings, state, cache);
+
+ settings.path.pop_back();
+
+ /// Check consistency between offsets and elements subcolumns.
+ /// But if elements column is empty - it's ok for columns of Nested types that was added by ALTER.
+ if (!nested_column->empty() && nested_column->size() != last_offset)
+ throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read all array values: read just {} of {}",
+ toString(nested_column->size()), toString(last_offset));
+
+ column = std::move(mutable_column);
+}
+
+
+template <typename Writer>
+static void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, Writer && write_nested)
+{
+ const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
+ const ColumnArray::Offsets & offsets = column_array.getOffsets();
+
+ size_t offset = offsets[row_num - 1];
+ size_t next_offset = offsets[row_num];
+
+ const IColumn & nested_column = column_array.getData();
+
+ writeChar('[', ostr);
+ for (size_t i = offset; i < next_offset; ++i)
+ {
+ if (i != offset)
+ writeChar(',', ostr);
+ write_nested(nested_column, i);
+ }
+ writeChar(']', ostr);
+}
+
+
+template <typename Reader>
+static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && read_nested, bool allow_unenclosed)
+{
+ ColumnArray & column_array = assert_cast<ColumnArray &>(column);
+ ColumnArray::Offsets & offsets = column_array.getOffsets();
+
+ IColumn & nested_column = column_array.getData();
+
+ size_t size = 0;
+
+ bool has_braces = false;
+ if (checkChar('[', istr))
+ has_braces = true;
+ else if (!allow_unenclosed)
+ throw Exception(ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT, "Array does not start with '[' character");
+
+ try
+ {
+ bool first = true;
+ while (!istr.eof() && *istr.position() != ']')
+ {
+ if (!first)
+ {
+ if (*istr.position() == ',')
+ ++istr.position();
+ else
+ throw ParsingException(ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT,
+ "Cannot read array from text, expected comma or end of array, found '{}'",
+ *istr.position());
+ }
+
+ first = false;
+
+ skipWhitespaceIfAny(istr);
+
+ if (*istr.position() == ']')
+ break;
+
+ read_nested(nested_column);
+ ++size;
+
+ skipWhitespaceIfAny(istr);
+ }
+
+ if (has_braces)
+ assertChar(']', istr);
+ else /// If array is not enclosed in braces, we read until EOF.
+ assertEOF(istr);
+ }
+ catch (...)
+ {
+ if (size)
+ nested_column.popBack(size);
+ throw;
+ }
+
+ offsets.push_back(offsets.back() + size);
+}
+
+
+void SerializationArray::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeTextImpl(column, row_num, ostr,
+ [&](const IColumn & nested_column, size_t i)
+ {
+ nested->serializeTextQuoted(nested_column, i, ostr, settings);
+ });
+}
+
+
+void SerializationArray::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
+{
+ deserializeTextImpl(column, istr,
+ [&](IColumn & nested_column)
+ {
+ nested->deserializeTextQuoted(nested_column, istr, settings);
+ }, false);
+
+ if (whole && !istr.eof())
+ throwUnexpectedDataAfterParsedValue(column, istr, settings, "Array");
+}
+
+void SerializationArray::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
+ const ColumnArray::Offsets & offsets = column_array.getOffsets();
+
+ size_t offset = offsets[row_num - 1];
+ size_t next_offset = offsets[row_num];
+
+ const IColumn & nested_column = column_array.getData();
+
+ writeChar('[', ostr);
+ for (size_t i = offset; i < next_offset; ++i)
+ {
+ if (i != offset)
+ writeChar(',', ostr);
+ nested->serializeTextJSON(nested_column, i, ostr, settings);
+ }
+ writeChar(']', ostr);
+}
+
+void SerializationArray::serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const
+{
+ const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
+ const ColumnArray::Offsets & offsets = column_array.getOffsets();
+
+ size_t offset = offsets[row_num - 1];
+ size_t next_offset = offsets[row_num];
+
+ const IColumn & nested_column = column_array.getData();
+
+ if (offset == next_offset)
+ {
+ writeCString("[]", ostr);
+ return;
+ }
+
+ writeCString("[\n", ostr);
+ for (size_t i = offset; i < next_offset; ++i)
+ {
+ if (i != offset)
+ writeCString(",\n", ostr);
+ writeChar(' ', (indent + 1) * 4, ostr);
+ nested->serializeTextJSONPretty(nested_column, i, ostr, settings, indent + 1);
+ }
+ writeChar('\n', ostr);
+ writeChar(' ', indent * 4, ostr);
+ writeChar(']', ostr);
+}
+
+
+void SerializationArray::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ deserializeTextImpl(column, istr,
+ [&](IColumn & nested_column)
+ {
+ if (settings.null_as_default)
+ SerializationNullable::deserializeTextJSONImpl(nested_column, istr, settings, nested);
+ else
+ nested->deserializeTextJSON(nested_column, istr, settings);
+ }, false);
+}
+
+
+void SerializationArray::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
+ const ColumnArray::Offsets & offsets = column_array.getOffsets();
+
+ size_t offset = offsets[row_num - 1];
+ size_t next_offset = offsets[row_num];
+
+ const IColumn & nested_column = column_array.getData();
+
+ writeCString("<array>", ostr);
+ for (size_t i = offset; i < next_offset; ++i)
+ {
+ writeCString("<elem>", ostr);
+ nested->serializeTextXML(nested_column, i, ostr, settings);
+ writeCString("</elem>", ostr);
+ }
+ writeCString("</array>", ostr);
+}
+
+
+void SerializationArray::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ /// There is no good way to serialize an array in CSV. Therefore, we serialize it into a string, and then write the resulting string in CSV.
+ WriteBufferFromOwnString wb;
+ serializeText(column, row_num, wb, settings);
+ writeCSV(wb.str(), ostr);
+}
+
+
+void SerializationArray::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ String s;
+ readCSV(s, istr, settings.csv);
+ ReadBufferFromString rb(s);
+
+ if (settings.csv.arrays_as_nested_csv)
+ {
+ deserializeTextImpl(column, rb,
+ [&](IColumn & nested_column)
+ {
+ nested->deserializeTextCSV(nested_column, rb, settings);
+ }, true);
+ }
+ else
+ {
+ deserializeTextImpl(column, rb,
+ [&](IColumn & nested_column)
+ {
+ nested->deserializeTextQuoted(nested_column, rb, settings);
+ }, true);
+ }
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationArray.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationArray.h
new file mode 100644
index 00000000000..de331169db5
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationArray.h
@@ -0,0 +1,84 @@
+#pragma once
+
+#include <DataTypes/Serializations/SimpleTextSerialization.h>
+
+namespace DB
+{
+
+class SerializationArray final : public SimpleTextSerialization
+{
+private:
+ SerializationPtr nested;
+
+public:
+ explicit SerializationArray(const SerializationPtr & nested_) : nested(nested_) {}
+
+ void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+ void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override;
+
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override;
+
+ void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ /** Streaming serialization of arrays is arranged in a special way:
+ * - elements placed in a row are written/read without array sizes;
+ * - the sizes are written/read in a separate stream,
+ * This is necessary, because when implementing nested structures, several arrays can have common sizes.
+ */
+
+ void enumerateStreams(
+ EnumerateStreamsSettings & settings,
+ const StreamCallback & callback,
+ const SubstreamData & data) const override;
+
+ void serializeBinaryBulkStatePrefix(
+ const IColumn & column,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void serializeBinaryBulkStateSuffix(
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void deserializeBinaryBulkStatePrefix(
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state) const override;
+
+ void serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column,
+ size_t offset,
+ size_t limit,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state,
+ SubstreamsCache * cache) const override;
+
+private:
+ struct SubcolumnCreator : public ISubcolumnCreator
+ {
+ const ColumnPtr offsets;
+
+ explicit SubcolumnCreator(const ColumnPtr & offsets_) : offsets(offsets_) {}
+
+ DataTypePtr create(const DataTypePtr & prev) const override;
+ SerializationPtr create(const SerializationPtr & prev) const override;
+ ColumnPtr create(const ColumnPtr & prev) const override;
+ };
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationBool.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationBool.cpp
new file mode 100644
index 00000000000..41b5bf806e5
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationBool.cpp
@@ -0,0 +1,335 @@
+#include <DataTypes/Serializations/SerializationBool.h>
+
+#include <Columns/ColumnsNumber.h>
+#include <Common/Exception.h>
+#include <IO/WriteBuffer.h>
+#include <IO/ReadBuffer.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <IO/PeekableReadBuffer.h>
+
+#include <unordered_set>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int ILLEGAL_COLUMN;
+ extern const int CANNOT_PARSE_BOOL;
+}
+
+namespace
+{
+
+constexpr char str_true[5] = "true";
+constexpr char str_false[6] = "false";
+
+const ColumnUInt8 * checkAndGetSerializeColumnType(const IColumn & column)
+{
+ const auto * col = checkAndGetColumn<ColumnUInt8>(&column);
+ if (!checkAndGetColumn<ColumnUInt8>(&column))
+ throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Bool type can only serialize columns of type UInt8.{}", column.getName());
+ return col;
+}
+
+ColumnUInt8 * checkAndGetDeserializeColumnType(IColumn & column)
+{
+ auto * col = typeid_cast<ColumnUInt8 *>(&column);
+ if (!checkAndGetColumn<ColumnUInt8>(&column))
+ throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Bool type can only deserialize columns of type UInt8.{}",
+ column.getName());
+ return col;
+}
+
+void serializeCustom(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings)
+{
+ const auto * col = checkAndGetSerializeColumnType(column);
+
+ if (col->getData()[row_num])
+ {
+ writeString(settings.bool_true_representation, ostr);
+ }
+ else
+ {
+ writeString(settings.bool_false_representation, ostr);
+ }
+}
+
+void serializeSimple(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &)
+{
+ const auto * col = checkAndGetSerializeColumnType(column);
+
+ if (col->getData()[row_num])
+ ostr.write(str_true, sizeof(str_true) - 1);
+ else
+ ostr.write(str_false, sizeof(str_false) - 1);
+}
+
+bool tryDeserializeAllVariants(ColumnUInt8 * column, ReadBuffer & istr)
+{
+ if (checkCharCaseInsensitive('1', istr))
+ {
+ column->insert(true);
+ }
+ else if (checkCharCaseInsensitive('0', istr))
+ {
+ column->insert(false);
+ }
+ /// 'True' and 'T'
+ else if (checkCharCaseInsensitive('t', istr))
+ {
+ /// Check if it's just short form `T` or full form `True`
+ if (checkCharCaseInsensitive('r', istr))
+ {
+ if (!checkStringCaseInsensitive("ue", istr))
+ return false;
+ }
+ column->insert(true);
+ }
+ /// 'False' and 'F'
+ else if (checkCharCaseInsensitive('f', istr))
+ {
+ /// Check if it's just short form `F` or full form `False`
+ if (checkCharCaseInsensitive('a', istr))
+ {
+ if (!checkStringCaseInsensitive("lse", istr))
+ return false;
+ }
+ column->insert(false);
+ }
+ /// 'Yes' and 'Y'
+ else if (checkCharCaseInsensitive('y', istr))
+ {
+ /// Check if it's just short form `Y` or full form `Yes`
+ if (checkCharCaseInsensitive('e', istr))
+ {
+ if (!checkCharCaseInsensitive('s', istr))
+ return false;
+ }
+ column->insert(true);
+ }
+ /// 'No' and 'N'
+ else if (checkCharCaseInsensitive('n', istr))
+ {
+ /// Check if it's just short form `N` or full form `No`
+ checkCharCaseInsensitive('o', istr);
+ column->insert(false);
+ }
+ /// 'On' and 'Off'
+ else if (checkCharCaseInsensitive('o', istr))
+ {
+ if (checkCharCaseInsensitive('n', istr))
+ column->insert(true);
+ else if (checkStringCaseInsensitive("ff", istr))
+ {
+ column->insert(false);
+ }
+ else
+ return false;
+ }
+ /// 'Enable' and 'Enabled'
+ else if (checkStringCaseInsensitive("enable", istr))
+ {
+ /// Check if it's 'enable' or 'enabled'
+ checkCharCaseInsensitive('d', istr);
+ column->insert(true);
+ }
+ /// 'Disable' and 'Disabled'
+ else if (checkStringCaseInsensitive("disable", istr))
+ {
+ /// Check if it's 'disable' or 'disabled'
+ checkCharCaseInsensitive('d', istr);
+ column->insert(false);
+ }
+ else
+ {
+ return false;
+ }
+
+ return true;
+}
+
+void deserializeImpl(
+ IColumn & column, ReadBuffer & istr, const FormatSettings & settings, std::function<bool(ReadBuffer &)> check_end_of_value)
+{
+ ColumnUInt8 * col = checkAndGetDeserializeColumnType(column);
+
+ PeekableReadBuffer buf(istr);
+ buf.setCheckpoint();
+ if (checkString(settings.bool_true_representation, buf) && check_end_of_value(buf))
+ {
+ col->insert(true);
+ return;
+ }
+
+ buf.rollbackToCheckpoint();
+ if (checkString(settings.bool_false_representation, buf) && check_end_of_value(buf))
+ {
+ col->insert(false);
+ buf.dropCheckpoint();
+ if (buf.hasUnreadData())
+ throw Exception(
+ ErrorCodes::CANNOT_PARSE_BOOL,
+ "Cannot continue parsing after parsed bool value because it will result in the loss of some data. It may happen if "
+ "bool_true_representation or bool_false_representation contains some delimiters of input format");
+ return;
+ }
+
+ buf.rollbackToCheckpoint();
+ if (tryDeserializeAllVariants(col, buf) && check_end_of_value(buf))
+ {
+ buf.dropCheckpoint();
+ if (buf.hasUnreadData())
+ throw Exception(
+ ErrorCodes::CANNOT_PARSE_BOOL,
+ "Cannot continue parsing after parsed bool value because it will result in the loss of some data. It may happen if "
+ "bool_true_representation or bool_false_representation contains some delimiters of input format");
+ return;
+ }
+
+ buf.makeContinuousMemoryFromCheckpointToPos();
+ buf.rollbackToCheckpoint();
+ throw Exception(
+ ErrorCodes::CANNOT_PARSE_BOOL,
+ "Cannot parse boolean value here: '{}', should be '{}' or '{}' controlled by setting bool_true_representation and "
+ "bool_false_representation or one of "
+ "True/False/T/F/Y/N/Yes/No/On/Off/Enable/Disable/Enabled/Disabled/1/0",
+ String(buf.position(), std::min(10lu, buf.available())),
+ settings.bool_true_representation, settings.bool_false_representation);
+}
+
+}
+
+
+SerializationBool::SerializationBool(const SerializationPtr &nested_)
+ : SerializationWrapper(nested_)
+{
+}
+
+void SerializationBool::serializeText(const IColumn & column, size_t row_num, WriteBuffer &ostr, const FormatSettings & settings) const
+{
+ serializeCustom(column, row_num, ostr, settings);
+}
+
+void SerializationBool::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeCustom(column, row_num, ostr, settings);
+}
+
+void SerializationBool::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ if (istr.eof())
+ throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Expected boolean value but get EOF.");
+
+ deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; });
+}
+
+void SerializationBool::serializeTextJSON(const IColumn &column, size_t row_num, WriteBuffer &ostr, const FormatSettings &settings) const
+{
+ serializeSimple(column, row_num, ostr, settings);
+}
+
+void SerializationBool::deserializeTextJSON(IColumn &column, ReadBuffer &istr, const FormatSettings &) const
+{
+ if (istr.eof())
+ throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Expected boolean value but get EOF.");
+
+ ColumnUInt8 * col = checkAndGetDeserializeColumnType(column);
+ bool value = false;
+
+ char first_char = *istr.position();
+ if (first_char == 't' || first_char == 'f')
+ readBoolTextWord(value, istr);
+ else if (first_char == '1' || first_char == '0')
+ readBoolText(value, istr);
+ else
+ throw Exception(ErrorCodes::CANNOT_PARSE_BOOL,
+ "Invalid boolean value, should be true/false, 1/0, but it starts with the '{}' character.", first_char);
+
+ col->insert(value);
+}
+
+void SerializationBool::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeCustom(column, row_num, ostr, settings);
+}
+
+void SerializationBool::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ if (istr.eof())
+ throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Expected boolean value but get EOF.");
+
+ deserializeImpl(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\n' || *buf.position() == '\r'; });
+}
+
+void SerializationBool::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeCustom(column, row_num, ostr, settings);
+}
+
+void SerializationBool::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ if (istr.eof())
+ throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Expected boolean value but get EOF.");
+
+ deserializeImpl(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; });
+}
+
+void SerializationBool::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeSimple(column, row_num, ostr, settings);
+}
+
+void SerializationBool::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ if (istr.eof())
+ throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Expected boolean value but get EOF.");
+
+ auto * col = checkAndGetDeserializeColumnType(column);
+
+ char symbol = toLowerIfAlphaASCII(*istr.position());
+ switch (symbol)
+ {
+ case 't':
+ assertStringCaseInsensitive("true", istr);
+ col->insert(true);
+ break;
+ case 'f':
+ assertStringCaseInsensitive("false", istr);
+ col->insert(false);
+ break;
+ case '1':
+ col->insert(true);
+ break;
+ case '0':
+ col->insert(false);
+ break;
+ case '\'':
+ ++istr.position();
+ deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return !buf.eof() && *buf.position() == '\''; });
+ assertChar('\'', istr);
+ break;
+ default:
+ throw Exception(
+ ErrorCodes::CANNOT_PARSE_BOOL,
+ "Cannot parse boolean value here: '{}', should be true/false, 1/0 or on of "
+ "True/False/T/F/Y/N/Yes/No/On/Off/Enable/Disable/Enabled/Disabled/1/0 in quotes",
+ String(istr.position(), std::min(10ul, istr.available())));
+ }
+}
+
+void SerializationBool::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ if (istr.eof())
+ throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Expected boolean value but get EOF.");
+
+ deserializeImpl(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof(); });
+}
+
+void SerializationBool::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeSimple(column, row_num, ostr, settings);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationBool.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationBool.h
new file mode 100644
index 00000000000..a5aa0ca80a2
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationBool.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include <DataTypes/Serializations/SerializationWrapper.h>
+#include <Columns/ColumnsNumber.h>
+#include <unordered_set>
+
+namespace DB
+{
+
+class SerializationBool final : public SerializationWrapper
+{
+public:
+ explicit SerializationBool(const SerializationPtr & nested_);
+
+ void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+
+ void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+ void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+ void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+ void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp
new file mode 100644
index 00000000000..c35e1120ce8
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp
@@ -0,0 +1,97 @@
+#include <DataTypes/Serializations/SerializationCustomSimpleText.h>
+
+#include <IO/ReadBufferFromString.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/WriteHelpers.h>
+
+namespace
+{
+
+using namespace DB;
+
+String serializeToString(const SerializationCustomSimpleText & domain, const IColumn & column, size_t row_num, const FormatSettings & settings)
+{
+ WriteBufferFromOwnString buffer;
+ domain.serializeText(column, row_num, buffer, settings);
+
+ return buffer.str();
+}
+
+void deserializeFromString(const SerializationCustomSimpleText & domain, IColumn & column, const String & s, const FormatSettings & settings)
+{
+ ReadBufferFromString istr(s);
+ domain.deserializeText(column, istr, settings, true);
+}
+
+}
+
+namespace DB
+{
+
+SerializationCustomSimpleText::SerializationCustomSimpleText(const SerializationPtr & nested_)
+ : SerializationWrapper(nested_)
+{
+}
+
+void SerializationCustomSimpleText::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ String str;
+ readStringUntilEOF(str, istr);
+ deserializeFromString(*this, column, str, settings);
+}
+
+void SerializationCustomSimpleText::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeEscapedString(serializeToString(*this, column, row_num, settings), ostr);
+}
+
+void SerializationCustomSimpleText::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ String str;
+ readEscapedString(str, istr);
+ deserializeFromString(*this, column, str, settings);
+}
+
+void SerializationCustomSimpleText::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeQuotedString(serializeToString(*this, column, row_num, settings), ostr);
+}
+
+void SerializationCustomSimpleText::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ String str;
+ readQuotedString(str, istr);
+ deserializeFromString(*this, column, str, settings);
+}
+
+void SerializationCustomSimpleText::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeCSVString(serializeToString(*this, column, row_num, settings), ostr);
+}
+
+void SerializationCustomSimpleText::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ String str;
+ readCSVString(str, istr, settings.csv);
+ deserializeFromString(*this, column, str, settings);
+}
+
+void SerializationCustomSimpleText::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeJSONString(serializeToString(*this, column, row_num, settings), ostr, settings);
+}
+
+void SerializationCustomSimpleText::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ String str;
+ readJSONString(str, istr);
+ deserializeFromString(*this, column, str, settings);
+}
+
+void SerializationCustomSimpleText::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeXMLStringForTextElement(serializeToString(*this, column, row_num, settings), ostr);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationCustomSimpleText.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationCustomSimpleText.h
new file mode 100644
index 00000000000..21d6f8af650
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationCustomSimpleText.h
@@ -0,0 +1,59 @@
+#pragma once
+
+#include <DataTypes/Serializations/SerializationWrapper.h>
+
+namespace DB
+{
+
+class ReadBuffer;
+class WriteBuffer;
+struct FormatSettings;
+class IColumn;
+
+/** Simple ISerialization that uses serializeText/deserializeText
+ * for all serialization and deserialization. */
+class SerializationCustomSimpleText : public SerializationWrapper
+{
+public:
+ explicit SerializationCustomSimpleText(const SerializationPtr & nested_);
+
+ // Methods that subclasses must override in order to get full serialization/deserialization support.
+ virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override = 0;
+ /// whole = true means that buffer contains only one value, so we should read until EOF.
+ /// It's needed to check if there is garbage after parsed field.
+ virtual void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const = 0;
+
+ /** Text deserialization in case when buffer contains only one value, without any escaping and delimiters.
+ */
+ void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ /** Text serialization with escaping but without quoting.
+ */
+ void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ /** Text serialization as a literal that may be inserted into a query.
+ */
+ void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ /** Text serialization for the CSV format.
+ */
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ /** delimiter - the delimiter we expect when reading a string value that is not double-quoted
+ * (the delimiter is not consumed).
+ */
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ /** Text serialization intended for using in JSON format.
+ * force_quoting_64bit_integers parameter forces to brace UInt64 and Int64 types into quotes.
+ */
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ /** Text serialization for putting into the XML format.
+ */
+ void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationDate.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDate.cpp
new file mode 100644
index 00000000000..1ed48fdd31d
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDate.cpp
@@ -0,0 +1,88 @@
+#include <DataTypes/Serializations/SerializationDate.h>
+
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+
+#include <Columns/ColumnsNumber.h>
+#include <Formats/ProtobufReader.h>
+
+#include <Common/assert_cast.h>
+
+namespace DB
+{
+
+void SerializationDate::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeDateText(DayNum(assert_cast<const ColumnUInt16 &>(column).getData()[row_num]), ostr, time_zone);
+}
+
+void SerializationDate::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ deserializeTextEscaped(column, istr, settings);
+ if (!istr.eof())
+ throwUnexpectedDataAfterParsedValue(column, istr, settings, "Date");
+}
+
+void SerializationDate::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ DayNum x;
+ readDateText(x, istr, time_zone);
+ assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
+}
+
+void SerializationDate::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeText(column, row_num, ostr, settings);
+}
+
+void SerializationDate::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeChar('\'', ostr);
+ serializeText(column, row_num, ostr, settings);
+ writeChar('\'', ostr);
+}
+
+void SerializationDate::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ DayNum x;
+ assertChar('\'', istr);
+ readDateText(x, istr, time_zone);
+ assertChar('\'', istr);
+ assert_cast<ColumnUInt16 &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
+}
+
+void SerializationDate::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeChar('"', ostr);
+ serializeText(column, row_num, ostr, settings);
+ writeChar('"', ostr);
+}
+
+void SerializationDate::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ DayNum x;
+ assertChar('"', istr);
+ readDateText(x, istr, time_zone);
+ assertChar('"', istr);
+ assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
+}
+
+void SerializationDate::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeChar('"', ostr);
+ serializeText(column, row_num, ostr, settings);
+ writeChar('"', ostr);
+}
+
+void SerializationDate::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ DayNum value;
+ readCSV(value, istr, time_zone);
+ assert_cast<ColumnUInt16 &>(column).getData().push_back(value);
+}
+
+SerializationDate::SerializationDate(const DateLUTImpl & time_zone_) : time_zone(time_zone_)
+{
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationDate.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDate.h
new file mode 100644
index 00000000000..f751b06fba6
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDate.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <DataTypes/Serializations/SerializationNumber.h>
+#include <Common/DateLUT.h>
+
+namespace DB
+{
+
+class SerializationDate final : public SerializationNumber<UInt16>
+{
+public:
+ explicit SerializationDate(const DateLUTImpl & time_zone_ = DateLUT::instance());
+
+ void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+protected:
+ const DateLUTImpl & time_zone;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationDate32.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDate32.cpp
new file mode 100644
index 00000000000..851710de839
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDate32.cpp
@@ -0,0 +1,85 @@
+#include <DataTypes/Serializations/SerializationDate32.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+
+#include <Columns/ColumnsNumber.h>
+
+#include <Common/assert_cast.h>
+
+namespace DB
+{
+
+void SerializationDate32::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeDateText(ExtendedDayNum(assert_cast<const ColumnInt32 &>(column).getData()[row_num]), ostr, time_zone);
+}
+
+void SerializationDate32::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ deserializeTextEscaped(column, istr, settings);
+ if (!istr.eof())
+ throwUnexpectedDataAfterParsedValue(column, istr, settings, "Date32");
+}
+
+void SerializationDate32::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ ExtendedDayNum x;
+ readDateText(x, istr, time_zone);
+ assert_cast<ColumnInt32 &>(column).getData().push_back(x);
+}
+
+void SerializationDate32::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeText(column, row_num, ostr, settings);
+}
+
+void SerializationDate32::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeChar('\'', ostr);
+ serializeText(column, row_num, ostr, settings);
+ writeChar('\'', ostr);
+}
+
+void SerializationDate32::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ ExtendedDayNum x;
+ assertChar('\'', istr);
+ readDateText(x, istr, time_zone);
+ assertChar('\'', istr);
+ assert_cast<ColumnInt32 &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
+}
+
+void SerializationDate32::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeChar('"', ostr);
+ serializeText(column, row_num, ostr, settings);
+ writeChar('"', ostr);
+}
+
+void SerializationDate32::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ ExtendedDayNum x;
+ assertChar('"', istr);
+ readDateText(x, istr, time_zone);
+ assertChar('"', istr);
+ assert_cast<ColumnInt32 &>(column).getData().push_back(x);
+}
+
+void SerializationDate32::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeChar('"', ostr);
+ serializeText(column, row_num, ostr, settings);
+ writeChar('"', ostr);
+}
+
+void SerializationDate32::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ LocalDate value;
+ readCSV(value, istr);
+ assert_cast<ColumnInt32 &>(column).getData().push_back(value.getExtenedDayNum());
+}
+
+SerializationDate32::SerializationDate32(const DateLUTImpl & time_zone_) : time_zone(time_zone_)
+{
+}
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationDate32.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDate32.h
new file mode 100644
index 00000000000..49560fb6c7d
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDate32.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <DataTypes/Serializations/SerializationNumber.h>
+#include <Common/DateLUT.h>
+
+namespace DB
+{
+class SerializationDate32 final : public SerializationNumber<Int32>
+{
+public:
+ explicit SerializationDate32(const DateLUTImpl & time_zone_ = DateLUT::instance());
+
+ void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+protected:
+ const DateLUTImpl & time_zone;
+};
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationDateTime.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDateTime.cpp
new file mode 100644
index 00000000000..2ba24f5351b
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDateTime.cpp
@@ -0,0 +1,179 @@
+#include <DataTypes/Serializations/SerializationDateTime.h>
+
+#include <Columns/ColumnVector.h>
+#include <Common/assert_cast.h>
+#include <Common/DateLUT.h>
+#include <Formats/FormatSettings.h>
+#include <Formats/ProtobufReader.h>
+#include <Formats/ProtobufWriter.h>
+#include <IO/Operators.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <IO/parseDateTimeBestEffort.h>
+#include <IO/ReadBufferFromString.h>
+
+namespace DB
+{
+
+namespace
+{
+
+inline void readText(time_t & x, ReadBuffer & istr, const FormatSettings & settings, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone)
+{
+ switch (settings.date_time_input_format)
+ {
+ case FormatSettings::DateTimeInputFormat::Basic:
+ readDateTimeText(x, istr, time_zone);
+ return;
+ case FormatSettings::DateTimeInputFormat::BestEffort:
+ parseDateTimeBestEffort(x, istr, time_zone, utc_time_zone);
+ return;
+ case FormatSettings::DateTimeInputFormat::BestEffortUS:
+ parseDateTimeBestEffortUS(x, istr, time_zone, utc_time_zone);
+ return;
+ }
+}
+
+}
+
+SerializationDateTime::SerializationDateTime(const TimezoneMixin & time_zone_)
+ : TimezoneMixin(time_zone_)
+{
+}
+
+void SerializationDateTime::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ auto value = assert_cast<const ColumnType &>(column).getData()[row_num];
+ switch (settings.date_time_output_format)
+ {
+ case FormatSettings::DateTimeOutputFormat::Simple:
+ writeDateTimeText(value, ostr, time_zone);
+ return;
+ case FormatSettings::DateTimeOutputFormat::UnixTimestamp:
+ writeIntText(value, ostr);
+ return;
+ case FormatSettings::DateTimeOutputFormat::ISO:
+ writeDateTimeTextISO(value, ostr, utc_time_zone);
+ return;
+ }
+}
+
+void SerializationDateTime::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeText(column, row_num, ostr, settings);
+}
+
+void SerializationDateTime::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ deserializeTextEscaped(column, istr, settings);
+ if (!istr.eof())
+ throwUnexpectedDataAfterParsedValue(column, istr, settings, "DateTime");
+}
+
+void SerializationDateTime::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ time_t x = 0;
+ readText(x, istr, settings, time_zone, utc_time_zone);
+ if (x < 0)
+ x = 0;
+ assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
+}
+
+void SerializationDateTime::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeChar('\'', ostr);
+ serializeText(column, row_num, ostr, settings);
+ writeChar('\'', ostr);
+}
+
+void SerializationDateTime::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ time_t x = 0;
+ if (checkChar('\'', istr)) /// Cases: '2017-08-31 18:36:48' or '1504193808'
+ {
+ readText(x, istr, settings, time_zone, utc_time_zone);
+ assertChar('\'', istr);
+ }
+ else /// Just 1504193808 or 01504193808
+ {
+ readIntText(x, istr);
+ }
+ if (x < 0)
+ x = 0;
+
+ /// It's important to do this at the end - for exception safety.
+ assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
+}
+
+void SerializationDateTime::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeChar('"', ostr);
+ serializeText(column, row_num, ostr, settings);
+ writeChar('"', ostr);
+}
+
+void SerializationDateTime::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ time_t x = 0;
+ if (checkChar('"', istr))
+ {
+ readText(x, istr, settings, time_zone, utc_time_zone);
+ assertChar('"', istr);
+ }
+ else
+ {
+ readIntText(x, istr);
+ }
+ if (x < 0)
+ x = 0;
+ assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
+}
+
+void SerializationDateTime::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeChar('"', ostr);
+ serializeText(column, row_num, ostr, settings);
+ writeChar('"', ostr);
+}
+
+void SerializationDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ time_t x = 0;
+
+ if (istr.eof())
+ throwReadAfterEOF();
+
+ char maybe_quote = *istr.position();
+
+ if (maybe_quote == '\'' || maybe_quote == '\"')
+ {
+ ++istr.position();
+ readText(x, istr, settings, time_zone, utc_time_zone);
+ assertChar(maybe_quote, istr);
+ }
+ else
+ {
+ if (settings.csv.delimiter != ',' || settings.date_time_input_format == FormatSettings::DateTimeInputFormat::Basic)
+ {
+ readText(x, istr, settings, time_zone, utc_time_zone);
+ }
+ /// Best effort parsing supports datetime in format like "01.01.2000, 00:00:00"
+ /// and can mistakenly read comma as a part of datetime.
+ /// For example data "...,01.01.2000,some string,..." cannot be parsed correctly.
+ /// To fix this problem we first read CSV string and then try to parse it as datetime.
+ else
+ {
+ String datetime_str;
+ readCSVString(datetime_str, istr, settings.csv);
+ ReadBufferFromString buf(datetime_str);
+ readText(x, buf, settings, time_zone, utc_time_zone);
+ }
+ }
+
+ if (x < 0)
+ x = 0;
+
+ assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationDateTime.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDateTime.h
new file mode 100644
index 00000000000..f4a142483e5
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDateTime.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <DataTypes/Serializations/SerializationNumber.h>
+#include <DataTypes/TimezoneMixin.h>
+
+class DateLUTImpl;
+
+namespace DB
+{
+
+class SerializationDateTime final : public SerializationNumber<UInt32>, public TimezoneMixin
+{
+public:
+ explicit SerializationDateTime(const TimezoneMixin & time_zone_);
+
+ void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+};
+
+}
+
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationDateTime64.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDateTime64.cpp
new file mode 100644
index 00000000000..c5964f1bd97
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDateTime64.cpp
@@ -0,0 +1,174 @@
+#include <DataTypes/Serializations/SerializationDateTime64.h>
+
+#include <Columns/ColumnVector.h>
+#include <Common/assert_cast.h>
+#include <Common/DateLUT.h>
+#include <Formats/FormatSettings.h>
+#include <Formats/ProtobufReader.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/WriteHelpers.h>
+#include <IO/parseDateTimeBestEffort.h>
+#include <IO/ReadBufferFromString.h>
+
+namespace DB
+{
+
+SerializationDateTime64::SerializationDateTime64(
+ UInt32 scale_, const TimezoneMixin & time_zone_)
+ : SerializationDecimalBase<DateTime64>(DecimalUtils::max_precision<DateTime64>, scale_)
+ , TimezoneMixin(time_zone_)
+{
+}
+
+void SerializationDateTime64::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ auto value = assert_cast<const ColumnType &>(column).getData()[row_num];
+ switch (settings.date_time_output_format)
+ {
+ case FormatSettings::DateTimeOutputFormat::Simple:
+ writeDateTimeText(value, scale, ostr, time_zone);
+ return;
+ case FormatSettings::DateTimeOutputFormat::UnixTimestamp:
+ writeDateTimeUnixTimestamp(value, scale, ostr);
+ return;
+ case FormatSettings::DateTimeOutputFormat::ISO:
+ writeDateTimeTextISO(value, scale, ostr, utc_time_zone);
+ return;
+ }
+}
+
+void SerializationDateTime64::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
+{
+ DateTime64 result = 0;
+ readDateTime64Text(result, scale, istr, time_zone);
+ assert_cast<ColumnType &>(column).getData().push_back(result);
+
+ if (whole && !istr.eof())
+ throwUnexpectedDataAfterParsedValue(column, istr, settings, "DateTime64");
+}
+
+void SerializationDateTime64::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ deserializeTextEscaped(column, istr, settings);
+ if (!istr.eof())
+ throwUnexpectedDataAfterParsedValue(column, istr, settings, "DateTime64");
+}
+
+void SerializationDateTime64::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeText(column, row_num, ostr, settings);
+}
+
+static inline void readText(DateTime64 & x, UInt32 scale, ReadBuffer & istr, const FormatSettings & settings, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone)
+{
+ switch (settings.date_time_input_format)
+ {
+ case FormatSettings::DateTimeInputFormat::Basic:
+ readDateTime64Text(x, scale, istr, time_zone);
+ return;
+ case FormatSettings::DateTimeInputFormat::BestEffort:
+ parseDateTime64BestEffort(x, scale, istr, time_zone, utc_time_zone);
+ return;
+ case FormatSettings::DateTimeInputFormat::BestEffortUS:
+ parseDateTime64BestEffortUS(x, scale, istr, time_zone, utc_time_zone);
+ return;
+ }
+}
+
+void SerializationDateTime64::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ DateTime64 x = 0;
+ readText(x, scale, istr, settings, time_zone, utc_time_zone);
+ assert_cast<ColumnType &>(column).getData().push_back(x);
+}
+
+void SerializationDateTime64::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeChar('\'', ostr);
+ serializeText(column, row_num, ostr, settings);
+ writeChar('\'', ostr);
+}
+
+void SerializationDateTime64::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ DateTime64 x = 0;
+ if (checkChar('\'', istr)) /// Cases: '2017-08-31 18:36:48' or '1504193808'
+ {
+ readText(x, scale, istr, settings, time_zone, utc_time_zone);
+ assertChar('\'', istr);
+ }
+ else /// Just 1504193808 or 01504193808
+ {
+ readIntText(x, istr);
+ }
+ assert_cast<ColumnType &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
+}
+
+void SerializationDateTime64::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeChar('"', ostr);
+ serializeText(column, row_num, ostr, settings);
+ writeChar('"', ostr);
+}
+
+void SerializationDateTime64::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ DateTime64 x = 0;
+ if (checkChar('"', istr))
+ {
+ readText(x, scale, istr, settings, time_zone, utc_time_zone);
+ assertChar('"', istr);
+ }
+ else
+ {
+ readIntText(x, istr);
+ }
+ assert_cast<ColumnType &>(column).getData().push_back(x);
+}
+
+void SerializationDateTime64::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeChar('"', ostr);
+ serializeText(column, row_num, ostr, settings);
+ writeChar('"', ostr);
+}
+
+void SerializationDateTime64::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ DateTime64 x = 0;
+
+ if (istr.eof())
+ throwReadAfterEOF();
+
+ char maybe_quote = *istr.position();
+
+ if (maybe_quote == '\'' || maybe_quote == '\"')
+ {
+ ++istr.position();
+ readText(x, scale, istr, settings, time_zone, utc_time_zone);
+ assertChar(maybe_quote, istr);
+ }
+ else
+ {
+ if (settings.csv.delimiter != ',' || settings.date_time_input_format == FormatSettings::DateTimeInputFormat::Basic)
+ {
+ readText(x, scale, istr, settings, time_zone, utc_time_zone);
+ }
+ /// Best effort parsing supports datetime in format like "01.01.2000, 00:00:00"
+ /// and can mistakenly read comma as a part of datetime.
+ /// For example data "...,01.01.2000,some string,..." cannot be parsed correctly.
+ /// To fix this problem we first read CSV string and then try to parse it as datetime.
+ else
+ {
+ String datetime_str;
+ readCSVString(datetime_str, istr, settings.csv);
+ ReadBufferFromString buf(datetime_str);
+ readText(x, scale, buf, settings, time_zone, utc_time_zone);
+ }
+ }
+
+ assert_cast<ColumnType &>(column).getData().push_back(x);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationDateTime64.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDateTime64.h
new file mode 100644
index 00000000000..f817edbf0dd
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDateTime64.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <DataTypes/Serializations/SerializationDecimalBase.h>
+#include <DataTypes/TimezoneMixin.h>
+
+class DateLUTImpl;
+
+namespace DB
+{
+
+class SerializationDateTime64 final : public SerializationDecimalBase<DateTime64>, public TimezoneMixin
+{
+public:
+ SerializationDateTime64(UInt32 scale_, const TimezoneMixin & time_zone_);
+
+ void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override;
+ void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationDecimal.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDecimal.cpp
new file mode 100644
index 00000000000..9de85d338e9
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDecimal.cpp
@@ -0,0 +1,99 @@
+#include <DataTypes/Serializations/SerializationDecimal.h>
+
+#include <Columns/ColumnVector.h>
+#include <Common/assert_cast.h>
+#include <Common/typeid_cast.h>
+#include <Formats/ProtobufReader.h>
+#include <Formats/ProtobufWriter.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <IO/readDecimalText.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int DECIMAL_OVERFLOW;
+}
+
+template <typename T>
+bool SerializationDecimal<T>::tryReadText(T & x, ReadBuffer & istr, UInt32 precision, UInt32 scale)
+{
+ UInt32 unread_scale = scale;
+ if (!tryReadDecimalText(istr, x, precision, unread_scale))
+ return false;
+
+ if (common::mulOverflow(x.value, DecimalUtils::scaleMultiplier<T>(unread_scale), x.value))
+ return false;
+
+ return true;
+}
+
+template <typename T>
+void SerializationDecimal<T>::readText(T & x, ReadBuffer & istr, UInt32 precision, UInt32 scale, bool csv)
+{
+ UInt32 unread_scale = scale;
+ if (csv)
+ readCSVDecimalText(istr, x, precision, unread_scale);
+ else
+ readDecimalText(istr, x, precision, unread_scale);
+
+ if (common::mulOverflow(x.value, DecimalUtils::scaleMultiplier<T>(unread_scale), x.value))
+ throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Decimal math overflow");
+}
+
+template <typename T>
+void SerializationDecimal<T>::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ T value = assert_cast<const ColumnType &>(column).getData()[row_num];
+ writeText(value, this->scale, ostr, settings.decimal_trailing_zeros);
+}
+
+template <typename T>
+void SerializationDecimal<T>::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
+{
+ T x;
+ readText(x, istr);
+ assert_cast<ColumnType &>(column).getData().push_back(x);
+
+ if (whole && !istr.eof())
+ ISerialization::throwUnexpectedDataAfterParsedValue(column, istr, settings, "Decimal");
+}
+
+template <typename T>
+void SerializationDecimal<T>::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ T x;
+ readText(x, istr, true);
+ assert_cast<ColumnType &>(column).getData().push_back(x);
+}
+
+template <typename T>
+void SerializationDecimal<T>::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ if (settings.json.quote_decimals)
+ writeChar('"', ostr);
+
+ serializeText(column, row_num, ostr, settings);
+
+ if (settings.json.quote_decimals)
+ writeChar('"', ostr);
+}
+
+template <typename T>
+void SerializationDecimal<T>::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ bool have_quotes = checkChar('"', istr);
+ deserializeText(column, istr, settings, false);
+ if (have_quotes)
+ assertChar('"', istr);
+}
+
+
+template class SerializationDecimal<Decimal32>;
+template class SerializationDecimal<Decimal64>;
+template class SerializationDecimal<Decimal128>;
+template class SerializationDecimal<Decimal256>;
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationDecimal.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDecimal.h
new file mode 100644
index 00000000000..57decdd0973
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDecimal.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include <DataTypes/Serializations/SerializationDecimalBase.h>
+
+namespace DB
+{
+
+template <typename T>
+class SerializationDecimal final : public SerializationDecimalBase<T>
+{
+public:
+ using typename SerializationDecimalBase<T>::ColumnType;
+
+ SerializationDecimal(UInt32 precision_, UInt32 scale_)
+ : SerializationDecimalBase<T>(precision_, scale_) {}
+
+ void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override;
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+ void readText(T & x, ReadBuffer & istr, bool csv = false) const { readText(x, istr, this->precision, this->scale, csv); }
+
+ static void readText(T & x, ReadBuffer & istr, UInt32 precision_, UInt32 scale_, bool csv = false);
+ static bool tryReadText(T & x, ReadBuffer & istr, UInt32 precision_, UInt32 scale_);
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationDecimalBase.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDecimalBase.cpp
new file mode 100644
index 00000000000..b7f91e6833e
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDecimalBase.cpp
@@ -0,0 +1,79 @@
+#include <DataTypes/Serializations/SerializationDecimalBase.h>
+
+#include <Common/assert_cast.h>
+#include <Common/typeid_cast.h>
+#include <Formats/ProtobufReader.h>
+#include <Formats/ProtobufWriter.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+
+#include <ranges>
+
+namespace DB
+{
+
+template <typename T>
+void SerializationDecimalBase<T>::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const
+{
+ FieldType x = field.get<DecimalField<T>>();
+ writeBinaryLittleEndian(x, ostr);
+}
+
+template <typename T>
+void SerializationDecimalBase<T>::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ const FieldType & x = assert_cast<const ColumnType &>(column).getElement(row_num);
+ writeBinaryLittleEndian(x, ostr);
+}
+
+template <typename T>
+void SerializationDecimalBase<T>::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
+{
+ const typename ColumnType::Container & x = typeid_cast<const ColumnType &>(column).getData();
+ if (const size_t size = x.size(); limit == 0 || offset + limit > size)
+ limit = size - offset;
+
+ if constexpr (std::endian::native == std::endian::big)
+ for (size_t i = offset; i < offset + limit; ++i)
+ writeBinaryLittleEndian(x[i], ostr);
+ else
+ ostr.write(reinterpret_cast<const char *>(&x[offset]), sizeof(FieldType) * limit);
+}
+
+template <typename T>
+void SerializationDecimalBase<T>::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const
+{
+ typename FieldType::NativeType x;
+ readBinaryLittleEndian(x, istr);
+ field = DecimalField(T(x), this->scale);
+}
+
+template <typename T>
+void SerializationDecimalBase<T>::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ typename FieldType::NativeType x;
+ readBinaryLittleEndian(x, istr);
+ assert_cast<ColumnType &>(column).getData().push_back(FieldType(x));
+}
+
+template <typename T>
+void SerializationDecimalBase<T>::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double) const
+{
+ typename ColumnType::Container & x = typeid_cast<ColumnType &>(column).getData();
+ const size_t initial_size = x.size();
+ x.resize(initial_size + limit);
+ const size_t size = istr.readBig(reinterpret_cast<char *>(&x[initial_size]), sizeof(FieldType) * limit);
+ x.resize(initial_size + size / sizeof(FieldType));
+
+ if constexpr (std::endian::native == std::endian::big)
+ for (size_t i = initial_size; i < x.size(); ++i)
+ transformEndianness<std::endian::big, std::endian::little>(x[i]);
+}
+
+template class SerializationDecimalBase<Decimal32>;
+template class SerializationDecimalBase<Decimal64>;
+template class SerializationDecimalBase<Decimal128>;
+template class SerializationDecimalBase<Decimal256>;
+template class SerializationDecimalBase<DateTime64>;
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationDecimalBase.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDecimalBase.h
new file mode 100644
index 00000000000..08f963cedbb
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationDecimalBase.h
@@ -0,0 +1,32 @@
+#pragma once
+
+#include <DataTypes/Serializations/SimpleTextSerialization.h>
+#include <Columns/ColumnDecimal.h>
+
+namespace DB
+{
+
+template <typename T>
+class SerializationDecimalBase : public SimpleTextSerialization
+{
+protected:
+ const UInt32 precision;
+ const UInt32 scale;
+
+public:
+ using FieldType = T;
+ using ColumnType = ColumnDecimal<T>;
+
+ SerializationDecimalBase(UInt32 precision_, UInt32 scale_)
+ : precision(precision_), scale(scale_) {}
+
+ void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override;
+ void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
+
+ void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override;
+ void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationEnum.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationEnum.cpp
new file mode 100644
index 00000000000..a1bd63d4327
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationEnum.cpp
@@ -0,0 +1,117 @@
+#include <DataTypes/Serializations/SerializationEnum.h>
+
+#include <Columns/ColumnVector.h>
+#include <Common/assert_cast.h>
+#include <IO/WriteBufferFromString.h>
+#include <Formats/FormatSettings.h>
+#include <Formats/ProtobufReader.h>
+#include <Formats/ProtobufWriter.h>
+
+namespace DB
+{
+
+template <typename Type>
+void SerializationEnum<Type>::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
+}
+
+template <typename Type>
+void SerializationEnum<Type>::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeEscapedString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
+}
+
+template <typename Type>
+void SerializationEnum<Type>::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ if (settings.tsv.enum_as_number)
+ assert_cast<ColumnType &>(column).getData().push_back(readValue(istr));
+ else
+ {
+ /// NOTE It would be nice to do without creating a temporary object - at least extract std::string out.
+ std::string field_name;
+ readEscapedString(field_name, istr);
+ assert_cast<ColumnType &>(column).getData().push_back(this->getValue(StringRef(field_name), true));
+ }
+}
+
+template <typename Type>
+void SerializationEnum<Type>::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeQuotedString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
+}
+
+template <typename Type>
+void SerializationEnum<Type>::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ std::string field_name;
+ readQuotedStringWithSQLStyle(field_name, istr);
+ assert_cast<ColumnType &>(column).getData().push_back(this->getValue(StringRef(field_name)));
+}
+
+template <typename Type>
+void SerializationEnum<Type>::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ if (settings.tsv.enum_as_number)
+ {
+ assert_cast<ColumnType &>(column).getData().push_back(readValue(istr));
+ if (!istr.eof())
+ ISerialization::throwUnexpectedDataAfterParsedValue(column, istr, settings, "Enum");
+ }
+ else
+ {
+ std::string field_name;
+ readStringUntilEOF(field_name, istr);
+ assert_cast<ColumnType &>(column).getData().push_back(this->getValue(StringRef(field_name), true));
+ }
+}
+
+template <typename Type>
+void SerializationEnum<Type>::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeJSONString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr, settings);
+}
+
+template <typename Type>
+void SerializationEnum<Type>::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeXMLStringForTextElement(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]).toView(), ostr);
+}
+
+template <typename Type>
+void SerializationEnum<Type>::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ if (!istr.eof() && *istr.position() != '"')
+ assert_cast<ColumnType &>(column).getData().push_back(readValue(istr));
+ else
+ {
+ std::string field_name;
+ readJSONString(field_name, istr);
+ assert_cast<ColumnType &>(column).getData().push_back(this->getValue(StringRef(field_name)));
+ }
+}
+
+template <typename Type>
+void SerializationEnum<Type>::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeCSVString(this->getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
+}
+
+template <typename Type>
+void SerializationEnum<Type>::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ if (settings.csv.enum_as_number)
+ assert_cast<ColumnType &>(column).getData().push_back(readValue(istr));
+ else
+ {
+ std::string field_name;
+ readCSVString(field_name, istr, settings.csv);
+ assert_cast<ColumnType &>(column).getData().push_back(this->getValue(StringRef(field_name), true));
+ }
+}
+
+template class SerializationEnum<Int8>;
+template class SerializationEnum<Int16>;
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationEnum.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationEnum.h
new file mode 100644
index 00000000000..bdd769b59c5
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationEnum.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include <DataTypes/Serializations/SerializationNumber.h>
+#include <DataTypes/EnumValues.h>
+
+namespace DB
+{
+
+template <typename Type>
+class SerializationEnum : public SerializationNumber<Type>, public EnumValues<Type>
+{
+public:
+ using typename SerializationNumber<Type>::FieldType;
+ using typename SerializationNumber<Type>::ColumnType;
+ using typename EnumValues<Type>::Values;
+
+ explicit SerializationEnum(const Values & values_) : EnumValues<Type>(values_) {}
+
+ void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+ FieldType readValue(ReadBuffer & istr) const
+ {
+ FieldType x;
+ readText(x, istr);
+ return this->findByValue(x)->first;
+ }
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationFixedString.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationFixedString.cpp
new file mode 100644
index 00000000000..3b405f6ec08
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationFixedString.cpp
@@ -0,0 +1,214 @@
+#include <DataTypes/Serializations/SerializationFixedString.h>
+
+#include <Columns/ColumnFixedString.h>
+#include <Columns/ColumnConst.h>
+
+#include <Formats/FormatSettings.h>
+#include <Formats/ProtobufReader.h>
+#include <Formats/ProtobufWriter.h>
+
+#include <IO/WriteBuffer.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <IO/VarInt.h>
+
+#include <Common/typeid_cast.h>
+#include <Common/assert_cast.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int CANNOT_READ_ALL_DATA;
+ extern const int TOO_LARGE_STRING_SIZE;
+}
+
+static constexpr size_t MAX_STRINGS_SIZE = 1ULL << 30;
+
+void SerializationFixedString::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const
+{
+ const String & s = field.get<const String &>();
+ ostr.write(s.data(), std::min(s.size(), n));
+ if (s.size() < n)
+ for (size_t i = s.size(); i < n; ++i)
+ ostr.write(0);
+}
+
+
+void SerializationFixedString::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const
+{
+ field = String();
+ String & s = field.get<String &>();
+ s.resize(n);
+ istr.readStrict(s.data(), n);
+}
+
+
+void SerializationFixedString::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ ostr.write(reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]), n);
+}
+
+
+void SerializationFixedString::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ ColumnFixedString::Chars & data = assert_cast<ColumnFixedString &>(column).getChars();
+ size_t old_size = data.size();
+ data.resize(old_size + n);
+ try
+ {
+ istr.readStrict(reinterpret_cast<char *>(data.data() + old_size), n);
+ }
+ catch (...)
+ {
+ data.resize_assume_reserved(old_size);
+ throw;
+ }
+}
+
+
+void SerializationFixedString::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
+{
+ const ColumnFixedString::Chars & data = typeid_cast<const ColumnFixedString &>(column).getChars();
+
+ size_t size = data.size() / n;
+
+ if (limit == 0 || offset + limit > size)
+ limit = size - offset;
+
+ if (limit)
+ ostr.write(reinterpret_cast<const char *>(&data[n * offset]), n * limit);
+}
+
+
+void SerializationFixedString::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const
+{
+ ColumnFixedString::Chars & data = typeid_cast<ColumnFixedString &>(column).getChars();
+
+ size_t initial_size = data.size();
+ size_t max_bytes;
+ size_t new_data_size;
+
+ if (unlikely(__builtin_mul_overflow(limit, n, &max_bytes)))
+ throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Deserializing FixedString will lead to overflow");
+ if (unlikely(max_bytes > MAX_STRINGS_SIZE))
+ throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large sizes of FixedString to deserialize: {}", max_bytes);
+ if (unlikely(__builtin_add_overflow(initial_size, max_bytes, &new_data_size)))
+ throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Deserializing FixedString will lead to overflow");
+
+ data.resize(new_data_size);
+ size_t read_bytes = istr.readBig(reinterpret_cast<char *>(&data[initial_size]), max_bytes);
+
+ if (read_bytes % n != 0)
+ throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read all data of type FixedString. "
+ "Bytes read:{}. String size:{}.", read_bytes, toString(n));
+
+ data.resize(initial_size + read_bytes);
+}
+
+
+void SerializationFixedString::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeString(reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]), n, ostr);
+}
+
+
+void SerializationFixedString::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
+ writeAnyEscapedString<'\''>(pos, pos + n, ostr);
+}
+
+
+void SerializationFixedString::alignStringLength(size_t n, PaddedPODArray<UInt8> & data, size_t string_start)
+{
+ size_t length = data.size() - string_start;
+ if (length < n)
+ {
+ data.resize_fill(string_start + n);
+ }
+ else if (length > n)
+ {
+ data.resize_assume_reserved(string_start);
+ throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large value for FixedString({})", n);
+ }
+}
+
+template <typename Reader>
+static inline void read(const SerializationFixedString & self, IColumn & column, Reader && reader)
+{
+ ColumnFixedString::Chars & data = typeid_cast<ColumnFixedString &>(column).getChars();
+ size_t prev_size = data.size();
+ try
+ {
+ reader(data);
+ SerializationFixedString::alignStringLength(self.getN(), data, prev_size);
+ }
+ catch (...)
+ {
+ data.resize_assume_reserved(prev_size);
+ throw;
+ }
+}
+
+
+void SerializationFixedString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ read(*this, column, [&istr](ColumnFixedString::Chars & data) { readEscapedStringInto(data, istr); });
+}
+
+
+void SerializationFixedString::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
+ writeAnyQuotedString<'\''>(pos, pos + n, ostr);
+}
+
+
+void SerializationFixedString::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ read(*this, column, [&istr](ColumnFixedString::Chars & data) { readQuotedStringInto<true>(data, istr); });
+}
+
+
+void SerializationFixedString::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ read(*this, column, [&istr](ColumnFixedString::Chars & data) { readStringUntilEOFInto(data, istr); });
+}
+
+
+void SerializationFixedString::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
+ writeJSONString(pos, pos + n, ostr, settings);
+}
+
+
+void SerializationFixedString::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ read(*this, column, [&istr](ColumnFixedString::Chars & data) { readJSONStringInto(data, istr); });
+}
+
+
+void SerializationFixedString::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
+ writeXMLStringForTextElement(pos, pos + n, ostr);
+}
+
+
+void SerializationFixedString::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
+ writeCSVString(pos, pos + n, ostr);
+}
+
+
+void SerializationFixedString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ read(*this, column, [&istr, &csv = settings.csv](ColumnFixedString::Chars & data) { readCSVStringInto(data, istr, csv); });
+}
+
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationFixedString.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationFixedString.h
new file mode 100644
index 00000000000..3db31ab02cb
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationFixedString.h
@@ -0,0 +1,50 @@
+#pragma once
+
+#include <DataTypes/Serializations/ISerialization.h>
+#include <Common/PODArray.h>
+
+namespace DB
+{
+
+class SerializationFixedString : public ISerialization
+{
+private:
+ size_t n;
+
+public:
+ explicit SerializationFixedString(size_t n_) : n(n_) {}
+ size_t getN() const { return n; }
+
+ void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
+ void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
+
+ void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+
+ void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ /// Makes sure that the length of a newly inserted string to `chars` is equal to getN().
+ /// If the length is less than getN() the function will add zero characters up to getN().
+ /// If the length is greater than getN() the function will throw an exception.
+ static void alignStringLength(size_t n, PaddedPODArray<UInt8> & data, size_t string_start);
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationIPv4andIPv6.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationIPv4andIPv6.h
new file mode 100644
index 00000000000..61464962f1c
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationIPv4andIPv6.h
@@ -0,0 +1,132 @@
+#pragma once
+
+#include <base/TypeName.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <Columns/ColumnsNumber.h>
+#include <DataTypes/Serializations/SimpleTextSerialization.h>
+
+namespace DB
+{
+
+template <typename IPv>
+class SerializationIP : public SimpleTextSerialization
+{
+public:
+ void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override
+ {
+ writeText(assert_cast<const ColumnVector<IPv> &>(column).getData()[row_num], ostr);
+ }
+ void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override
+ {
+ IPv x;
+ readText(x, istr);
+
+ if (whole && !istr.eof())
+ throwUnexpectedDataAfterParsedValue(column, istr, settings, TypeName<IPv>.data());
+
+ assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x);
+ }
+ void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override
+ {
+ serializeText(column, row_num, ostr, settings);
+ }
+ void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
+ {
+ deserializeText(column, istr, settings, false);
+ }
+ void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override
+ {
+ writeChar('\'', ostr);
+ serializeText(column, row_num, ostr, settings);
+ writeChar('\'', ostr);
+ }
+ void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override
+ {
+ IPv x;
+ assertChar('\'', istr);
+ readText(x, istr);
+ assertChar('\'', istr);
+ assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
+ }
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override
+ {
+ writeChar('"', ostr);
+ serializeText(column, row_num, ostr, settings);
+ writeChar('"', ostr);
+ }
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
+ {
+ IPv x;
+ assertChar('"', istr);
+ readText(x, istr);
+ /// this code looks weird, but we want to throw specific exception to match original behavior...
+ if (istr.eof())
+ assertChar('"', istr);
+ if (*istr.position() != '"')
+ throwUnexpectedDataAfterParsedValue(column, istr, settings, TypeName<IPv>.data());
+ istr.ignore();
+
+ assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x);
+ }
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override
+ {
+ writeChar('"', ostr);
+ serializeText(column, row_num, ostr, settings);
+ writeChar('"', ostr);
+ }
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &/* settings*/) const override
+ {
+ IPv value;
+ readCSV(value, istr);
+
+ assert_cast<ColumnVector<IPv> &>(column).getData().push_back(value);
+ }
+
+ void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override
+ {
+ IPv x = field.get<IPv>();
+ writeBinary(x, ostr);
+ }
+ void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override
+ {
+ IPv x;
+ readBinary(x.toUnderType(), istr);
+ field = NearestFieldType<IPv>(x);
+ }
+ void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override
+ {
+ writeBinary(assert_cast<const ColumnVector<IPv> &>(column).getData()[row_num], ostr);
+ }
+ void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override
+ {
+ IPv x;
+ readBinary(x.toUnderType(), istr);
+ assert_cast<ColumnVector<IPv> &>(column).getData().push_back(x);
+ }
+ void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override
+ {
+ const typename ColumnVector<IPv>::Container & x = typeid_cast<const ColumnVector<IPv> &>(column).getData();
+
+ size_t size = x.size();
+
+ if (limit == 0 || offset + limit > size)
+ limit = size - offset;
+
+ if (limit)
+ ostr.write(reinterpret_cast<const char *>(&x[offset]), sizeof(IPv) * limit);
+ }
+ void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const override
+ {
+ typename ColumnVector<IPv>::Container & x = typeid_cast<ColumnVector<IPv> &>(column).getData();
+ size_t initial_size = x.size();
+ x.resize(initial_size + limit);
+ size_t size = istr.readBig(reinterpret_cast<char*>(&x[initial_size]), sizeof(IPv) * limit);
+ x.resize(initial_size + size / sizeof(IPv));
+ }
+};
+
+using SerializationIPv4 = SerializationIP<IPv4>;
+using SerializationIPv6 = SerializationIP<IPv6>;
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationInfo.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationInfo.cpp
new file mode 100644
index 00000000000..4e9b9905454
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationInfo.cpp
@@ -0,0 +1,298 @@
+#include <DataTypes/Serializations/SerializationInfo.h>
+#include <Columns/ColumnSparse.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <IO/VarInt.h>
+#include <Core/Block.h>
+#include <base/EnumReflection.h>
+
+#include <Poco/JSON/JSON.h>
+#include <Poco/JSON/Object.h>
+#include <Poco/JSON/Stringifier.h>
+#include <Poco/JSON/Parser.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int CORRUPTED_DATA;
+}
+
+namespace
+{
+
+constexpr auto KEY_VERSION = "version";
+constexpr auto KEY_NUM_ROWS = "num_rows";
+constexpr auto KEY_COLUMNS = "columns";
+constexpr auto KEY_NUM_DEFAULTS = "num_defaults";
+constexpr auto KEY_KIND = "kind";
+constexpr auto KEY_NAME = "name";
+
+}
+
+void SerializationInfo::Data::add(const IColumn & column)
+{
+ size_t rows = column.size();
+ double ratio = column.getRatioOfDefaultRows(ColumnSparse::DEFAULT_ROWS_SEARCH_SAMPLE_RATIO);
+
+ num_rows += rows;
+ num_defaults += static_cast<size_t>(ratio * rows);
+}
+
+void SerializationInfo::Data::add(const Data & other)
+{
+ num_rows += other.num_rows;
+ num_defaults += other.num_defaults;
+}
+
+void SerializationInfo::Data::addDefaults(size_t length)
+{
+ num_rows += length;
+ num_defaults += length;
+}
+
+SerializationInfo::SerializationInfo(ISerialization::Kind kind_, const Settings & settings_)
+ : settings(settings_)
+ , kind(kind_)
+{
+}
+
+SerializationInfo::SerializationInfo(ISerialization::Kind kind_, const Settings & settings_, const Data & data_)
+ : settings(settings_)
+ , kind(kind_)
+ , data(data_)
+{
+}
+
+void SerializationInfo::add(const IColumn & column)
+{
+ data.add(column);
+ if (settings.choose_kind)
+ kind = chooseKind(data, settings);
+}
+
+void SerializationInfo::add(const SerializationInfo & other)
+{
+ data.add(other.data);
+ if (settings.choose_kind)
+ kind = chooseKind(data, settings);
+}
+
+void SerializationInfo::addDefaults(size_t length)
+{
+ data.addDefaults(length);
+ if (settings.choose_kind)
+ kind = chooseKind(data, settings);
+}
+
+void SerializationInfo::replaceData(const SerializationInfo & other)
+{
+ data = other.data;
+}
+
+MutableSerializationInfoPtr SerializationInfo::clone() const
+{
+ return std::make_shared<SerializationInfo>(kind, settings, data);
+}
+
+/// Returns true if all rows with default values of type 'lhs'
+/// are mapped to default values of type 'rhs' after conversion.
+static bool preserveDefaultsAfterConversion(const IDataType & lhs, const IDataType & rhs)
+{
+ if (lhs.equals(rhs))
+ return true;
+
+ bool lhs_is_columned_as_numeric = isColumnedAsNumber(lhs) || isColumnedAsDecimal(lhs);
+ bool rhs_is_columned_as_numeric = isColumnedAsNumber(rhs) || isColumnedAsDecimal(rhs);
+
+ if (lhs_is_columned_as_numeric && rhs_is_columned_as_numeric)
+ return true;
+
+ if (isStringOrFixedString(lhs) && isStringOrFixedString(rhs))
+ return true;
+
+ return false;
+}
+
+std::shared_ptr<SerializationInfo> SerializationInfo::createWithType(
+ const IDataType & old_type,
+ const IDataType & new_type,
+ const Settings & new_settings) const
+{
+ auto new_kind = kind;
+ if (new_kind == ISerialization::Kind::SPARSE)
+ {
+ if (!new_type.supportsSparseSerialization()
+ || !preserveDefaultsAfterConversion(old_type, new_type))
+ new_kind = ISerialization::Kind::DEFAULT;
+ }
+
+ return std::make_shared<SerializationInfo>(new_kind, new_settings);
+}
+
+void SerializationInfo::serialializeKindBinary(WriteBuffer & out) const
+{
+ writeBinary(static_cast<UInt8>(kind), out);
+}
+
+void SerializationInfo::deserializeFromKindsBinary(ReadBuffer & in)
+{
+ UInt8 kind_num;
+ readBinary(kind_num, in);
+ auto maybe_kind = magic_enum::enum_cast<ISerialization::Kind>(kind_num);
+ if (!maybe_kind)
+ throw Exception(ErrorCodes::CORRUPTED_DATA, "Unknown serialization kind {}", std::to_string(kind_num));
+
+ kind = *maybe_kind;
+}
+
+Poco::JSON::Object SerializationInfo::toJSON() const
+{
+ Poco::JSON::Object object;
+ object.set(KEY_KIND, ISerialization::kindToString(kind));
+ object.set(KEY_NUM_DEFAULTS, data.num_defaults);
+ object.set(KEY_NUM_ROWS, data.num_rows);
+ return object;
+}
+
+void SerializationInfo::fromJSON(const Poco::JSON::Object & object)
+{
+ if (!object.has(KEY_KIND) || !object.has(KEY_NUM_DEFAULTS) || !object.has(KEY_NUM_ROWS))
+ throw Exception(ErrorCodes::CORRUPTED_DATA,
+ "Missed field '{}' or '{}' or '{}' in SerializationInfo of columns",
+ KEY_KIND, KEY_NUM_DEFAULTS, KEY_NUM_ROWS);
+
+ data.num_rows = object.getValue<size_t>(KEY_NUM_ROWS);
+ data.num_defaults = object.getValue<size_t>(KEY_NUM_DEFAULTS);
+ kind = ISerialization::stringToKind(object.getValue<String>(KEY_KIND));
+}
+
+ISerialization::Kind SerializationInfo::chooseKind(const Data & data, const Settings & settings)
+{
+ double ratio = data.num_rows ? std::min(static_cast<double>(data.num_defaults) / data.num_rows, 1.0) : 0.0;
+ return ratio > settings.ratio_of_defaults_for_sparse ? ISerialization::Kind::SPARSE : ISerialization::Kind::DEFAULT;
+}
+
+SerializationInfoByName::SerializationInfoByName(
+ const NamesAndTypesList & columns,
+ const SerializationInfo::Settings & settings)
+{
+ if (settings.isAlwaysDefault())
+ return;
+
+ for (const auto & column : columns)
+ if (column.type->supportsSparseSerialization())
+ emplace(column.name, column.type->createSerializationInfo(settings));
+}
+
+void SerializationInfoByName::add(const Block & block)
+{
+ for (const auto & column : block)
+ {
+ auto it = find(column.name);
+ if (it == end())
+ continue;
+
+ it->second->add(*column.column);
+ }
+}
+
+void SerializationInfoByName::add(const SerializationInfoByName & other)
+{
+ for (const auto & [name, info] : other)
+ {
+ auto it = find(name);
+ if (it == end())
+ continue;
+
+ it->second->add(*info);
+ }
+}
+
+void SerializationInfoByName::replaceData(const SerializationInfoByName & other)
+{
+ for (const auto & [name, new_info] : other)
+ {
+ auto & old_info = (*this)[name];
+
+ if (old_info)
+ old_info->replaceData(*new_info);
+ else
+ old_info = new_info->clone();
+ }
+}
+
+void SerializationInfoByName::writeJSON(WriteBuffer & out) const
+{
+ Poco::JSON::Object object;
+ object.set(KEY_VERSION, SERIALIZATION_INFO_VERSION);
+
+ Poco::JSON::Array column_infos;
+ for (const auto & [name, info] : *this)
+ {
+ auto info_json = info->toJSON();
+ info_json.set(KEY_NAME, name);
+ column_infos.add(std::move(info_json)); /// NOLINT
+ }
+
+ object.set(KEY_COLUMNS, std::move(column_infos)); /// NOLINT
+
+ std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
+ oss.exceptions(std::ios::failbit);
+ Poco::JSON::Stringifier::stringify(object, oss);
+
+ return writeString(oss.str(), out);
+}
+
+SerializationInfoByName SerializationInfoByName::readJSON(
+ const NamesAndTypesList & columns, const Settings & settings, ReadBuffer & in)
+{
+ String json_str;
+ readString(json_str, in);
+
+ Poco::JSON::Parser parser;
+ auto object = parser.parse(json_str).extract<Poco::JSON::Object::Ptr>();
+
+ if (!object->has(KEY_VERSION))
+ throw Exception(ErrorCodes::CORRUPTED_DATA, "Missed version of serialization infos");
+
+ if (object->getValue<size_t>(KEY_VERSION) > SERIALIZATION_INFO_VERSION)
+ throw Exception(ErrorCodes::CORRUPTED_DATA,
+ "Unknown version of serialization infos ({}). Should be less or equal than {}",
+ object->getValue<size_t>(KEY_VERSION), SERIALIZATION_INFO_VERSION);
+
+ SerializationInfoByName infos;
+ if (object->has(KEY_COLUMNS))
+ {
+ std::unordered_map<std::string_view, const IDataType *> column_type_by_name;
+ for (const auto & [name, type] : columns)
+ column_type_by_name.emplace(name, type.get());
+
+ auto array = object->getArray(KEY_COLUMNS);
+ for (const auto & elem : *array)
+ {
+ auto elem_object = elem.extract<Poco::JSON::Object::Ptr>();
+
+ if (!elem_object->has(KEY_NAME))
+ throw Exception(ErrorCodes::CORRUPTED_DATA,
+ "Missed field '{}' in serialization infos", KEY_NAME);
+
+ auto name = elem_object->getValue<String>(KEY_NAME);
+ auto it = column_type_by_name.find(name);
+
+ if (it == column_type_by_name.end())
+ throw Exception(ErrorCodes::CORRUPTED_DATA,
+ "Found unexpected column '{}' in serialization infos", name);
+
+ auto info = it->second->createSerializationInfo(settings);
+ info->fromJSON(*elem_object);
+ infos.emplace(name, std::move(info));
+ }
+ }
+
+ return infos;
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationInfo.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationInfo.h
new file mode 100644
index 00000000000..3d8f4f1d00c
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationInfo.h
@@ -0,0 +1,118 @@
+#pragma once
+
+#include <Core/Types_fwd.h>
+#include <DataTypes/Serializations/ISerialization.h>
+#include <Poco/JSON/Object.h>
+
+
+namespace DB
+{
+
+class ReadBuffer;
+class ReadBuffer;
+class WriteBuffer;
+class NamesAndTypesList;
+class Block;
+
+constexpr auto SERIALIZATION_INFO_VERSION = 0;
+
+/** Contains information about kind of serialization of column and its subcolumns.
+ * Also contains information about content of columns,
+ * that helps to choose kind of serialization of column.
+ *
+ * Currently has only information about number of default rows,
+ * that helps to choose sparse serialization.
+ *
+ * Should be extended, when new kinds of serialization will be implemented.
+ */
+class SerializationInfo
+{
+public:
+ struct Data
+ {
+ size_t num_rows = 0;
+ size_t num_defaults = 0;
+
+ void add(const IColumn & column);
+ void add(const Data & other);
+ void addDefaults(size_t length);
+ };
+
+ struct Settings
+ {
+ const double ratio_of_defaults_for_sparse = 1.0;
+ const bool choose_kind = false;
+
+ bool isAlwaysDefault() const { return ratio_of_defaults_for_sparse >= 1.0; }
+ };
+
+ SerializationInfo(ISerialization::Kind kind_, const Settings & settings_);
+ SerializationInfo(ISerialization::Kind kind_, const Settings & settings_, const Data & data_);
+
+ virtual ~SerializationInfo() = default;
+
+ virtual bool hasCustomSerialization() const { return kind != ISerialization::Kind::DEFAULT; }
+ virtual bool structureEquals(const SerializationInfo & rhs) const { return typeid(SerializationInfo) == typeid(rhs); }
+
+ virtual void add(const IColumn & column);
+ virtual void add(const SerializationInfo & other);
+ virtual void addDefaults(size_t length);
+ virtual void replaceData(const SerializationInfo & other);
+
+ virtual std::shared_ptr<SerializationInfo> clone() const;
+
+ virtual std::shared_ptr<SerializationInfo> createWithType(
+ const IDataType & old_type,
+ const IDataType & new_type,
+ const Settings & new_settings) const;
+
+ virtual void serialializeKindBinary(WriteBuffer & out) const;
+ virtual void deserializeFromKindsBinary(ReadBuffer & in);
+
+ virtual Poco::JSON::Object toJSON() const;
+ virtual void fromJSON(const Poco::JSON::Object & object);
+
+ void setKind(ISerialization::Kind kind_) { kind = kind_; }
+ const Settings & getSettings() const { return settings; }
+ const Data & getData() const { return data; }
+ ISerialization::Kind getKind() const { return kind; }
+
+ static ISerialization::Kind chooseKind(const Data & data, const Settings & settings);
+
+protected:
+ const Settings settings;
+
+ ISerialization::Kind kind;
+ Data data;
+};
+
+using SerializationInfoPtr = std::shared_ptr<const SerializationInfo>;
+using MutableSerializationInfoPtr = std::shared_ptr<SerializationInfo>;
+
+using SerializationInfos = std::vector<SerializationInfoPtr>;
+using MutableSerializationInfos = std::vector<MutableSerializationInfoPtr>;
+
+/// The order is important because info is serialized to part metadata.
+class SerializationInfoByName : public std::map<String, MutableSerializationInfoPtr>
+{
+public:
+ using Settings = SerializationInfo::Settings;
+
+ SerializationInfoByName() = default;
+ SerializationInfoByName(const NamesAndTypesList & columns, const Settings & settings);
+
+ void add(const Block & block);
+ void add(const SerializationInfoByName & other);
+
+ /// Takes data from @other, but keeps current serialization kinds.
+ /// If column exists in @other infos, but not in current infos,
+ /// it's cloned to current infos.
+ void replaceData(const SerializationInfoByName & other);
+
+ void writeJSON(WriteBuffer & out) const;
+
+ static SerializationInfoByName readJSON(
+ const NamesAndTypesList & columns, const Settings & settings, ReadBuffer & in);
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationInfoTuple.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationInfoTuple.cpp
new file mode 100644
index 00000000000..d36668f03b6
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationInfoTuple.cpp
@@ -0,0 +1,165 @@
+#include <DataTypes/Serializations/SerializationInfoTuple.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <Columns/ColumnTuple.h>
+#include <Common/assert_cast.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int CORRUPTED_DATA;
+ extern const int THERE_IS_NO_COLUMN;
+}
+
+SerializationInfoTuple::SerializationInfoTuple(
+ MutableSerializationInfos elems_, Names names_, const Settings & settings_)
+ : SerializationInfo(ISerialization::Kind::DEFAULT, settings_)
+ , elems(std::move(elems_))
+ , names(std::move(names_))
+{
+ assert(names.size() == elems.size());
+ for (size_t i = 0; i < names.size(); ++i)
+ name_to_elem[names[i]] = elems[i];
+}
+
+bool SerializationInfoTuple::hasCustomSerialization() const
+{
+ return std::any_of(elems.begin(), elems.end(), [](const auto & elem) { return elem->hasCustomSerialization(); });
+}
+
+bool SerializationInfoTuple::structureEquals(const SerializationInfo & rhs) const
+{
+ const auto * rhs_tuple = typeid_cast<const SerializationInfoTuple *>(&rhs);
+ if (!rhs_tuple || elems.size() != rhs_tuple->elems.size())
+ return false;
+
+ for (size_t i = 0; i < elems.size(); ++i)
+ if (!elems[i]->structureEquals(*rhs_tuple->elems[i]))
+ return false;
+
+ return true;
+}
+
+void SerializationInfoTuple::add(const IColumn & column)
+{
+ SerializationInfo::add(column);
+
+ const auto & column_tuple = assert_cast<const ColumnTuple &>(column);
+ const auto & right_elems = column_tuple.getColumns();
+ assert(elems.size() == right_elems.size());
+
+ for (size_t i = 0; i < elems.size(); ++i)
+ elems[i]->add(*right_elems[i]);
+}
+
+void SerializationInfoTuple::add(const SerializationInfo & other)
+{
+ SerializationInfo::add(other);
+
+ const auto & other_info = assert_cast<const SerializationInfoTuple &>(other);
+ for (const auto & [name, elem] : name_to_elem)
+ {
+ auto it = other_info.name_to_elem.find(name);
+ if (it != other_info.name_to_elem.end())
+ elem->add(*it->second);
+ else
+ elem->addDefaults(other_info.getData().num_rows);
+ }
+}
+
+void SerializationInfoTuple::addDefaults(size_t length)
+{
+ for (const auto & elem : elems)
+ elem->addDefaults(length);
+}
+
+void SerializationInfoTuple::replaceData(const SerializationInfo & other)
+{
+ SerializationInfo::add(other);
+
+ const auto & other_info = assert_cast<const SerializationInfoTuple &>(other);
+ for (const auto & [name, elem] : name_to_elem)
+ {
+ auto it = other_info.name_to_elem.find(name);
+ if (it != other_info.name_to_elem.end())
+ elem->replaceData(*it->second);
+ }
+}
+
+MutableSerializationInfoPtr SerializationInfoTuple::clone() const
+{
+ MutableSerializationInfos elems_cloned;
+ elems_cloned.reserve(elems.size());
+ for (const auto & elem : elems)
+ elems_cloned.push_back(elem->clone());
+
+ return std::make_shared<SerializationInfoTuple>(std::move(elems_cloned), names, settings);
+}
+
+MutableSerializationInfoPtr SerializationInfoTuple::createWithType(
+ const IDataType & old_type,
+ const IDataType & new_type,
+ const Settings & new_settings) const
+{
+ const auto & old_tuple = assert_cast<const DataTypeTuple &>(old_type);
+ const auto & new_tuple = assert_cast<const DataTypeTuple &>(new_type);
+
+ const auto & old_elements = old_tuple.getElements();
+ const auto & new_elements = new_tuple.getElements();
+
+ assert(elems.size() == old_elements.size());
+ assert(elems.size() == new_elements.size());
+
+ MutableSerializationInfos infos;
+ infos.reserve(elems.size());
+ for (size_t i = 0; i < elems.size(); ++i)
+ infos.push_back(elems[i]->createWithType(*old_elements[i], *new_elements[i], new_settings));
+
+ return std::make_shared<SerializationInfoTuple>(std::move(infos), names, new_settings);
+}
+
+void SerializationInfoTuple::serialializeKindBinary(WriteBuffer & out) const
+{
+ SerializationInfo::serialializeKindBinary(out);
+ for (const auto & elem : elems)
+ elem->serialializeKindBinary(out);
+}
+
+void SerializationInfoTuple::deserializeFromKindsBinary(ReadBuffer & in)
+{
+ SerializationInfo::deserializeFromKindsBinary(in);
+ for (const auto & elem : elems)
+ elem->deserializeFromKindsBinary(in);
+}
+
+Poco::JSON::Object SerializationInfoTuple::toJSON() const
+{
+ auto object = SerializationInfo::toJSON();
+ Poco::JSON::Array subcolumns;
+ for (const auto & elem : elems)
+ subcolumns.add(elem->toJSON());
+
+ object.set("subcolumns", subcolumns);
+ return object;
+}
+
+void SerializationInfoTuple::fromJSON(const Poco::JSON::Object & object)
+{
+ SerializationInfo::fromJSON(object);
+
+ if (!object.has("subcolumns"))
+ throw Exception(ErrorCodes::CORRUPTED_DATA,
+ "Missed field 'subcolumns' in SerializationInfo of columns SerializationInfoTuple");
+
+ auto subcolumns = object.getArray("subcolumns");
+ if (elems.size() != subcolumns->size())
+ throw Exception(ErrorCodes::THERE_IS_NO_COLUMN,
+ "Mismatched number of subcolumns between JSON and SerializationInfoTuple."
+ "Expected: {}, got: {}", elems.size(), subcolumns->size());
+
+ for (size_t i = 0; i < elems.size(); ++i)
+ elems[i]->fromJSON(*subcolumns->getObject(static_cast<unsigned>(i)));
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationInfoTuple.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationInfoTuple.h
new file mode 100644
index 00000000000..a9f3bdb6c6e
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationInfoTuple.h
@@ -0,0 +1,45 @@
+#pragma once
+#include <Core/Names.h>
+#include <DataTypes/Serializations/SerializationInfo.h>
+
+namespace DB
+{
+
+class SerializationInfoTuple : public SerializationInfo
+{
+public:
+ SerializationInfoTuple(MutableSerializationInfos elems_, Names names_, const Settings & settings_);
+
+ bool hasCustomSerialization() const override;
+ bool structureEquals(const SerializationInfo & rhs) const override;
+
+ void add(const IColumn & column) override;
+ void add(const SerializationInfo & other) override;
+ void addDefaults(size_t length) override;
+ void replaceData(const SerializationInfo & other) override;
+
+ MutableSerializationInfoPtr clone() const override;
+
+ MutableSerializationInfoPtr createWithType(
+ const IDataType & old_type,
+ const IDataType & new_type,
+ const Settings & new_settings) const override;
+
+ void serialializeKindBinary(WriteBuffer & out) const override;
+ void deserializeFromKindsBinary(ReadBuffer & in) override;
+
+ Poco::JSON::Object toJSON() const override;
+ void fromJSON(const Poco::JSON::Object & object) override;
+
+ const MutableSerializationInfoPtr & getElementInfo(size_t i) const { return elems[i]; }
+ ISerialization::Kind getElementKind(size_t i) const { return elems[i]->getKind(); }
+
+private:
+ MutableSerializationInfos elems;
+ Names names;
+
+ using NameToElem = std::unordered_map<String, MutableSerializationInfoPtr>;
+ NameToElem name_to_elem;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationInterval.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationInterval.cpp
new file mode 100644
index 00000000000..59086d8aef3
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationInterval.cpp
@@ -0,0 +1,209 @@
+#include "SerializationInterval.h"
+
+#include <Columns/ColumnsNumber.h>
+#include <IO/WriteBuffer.h>
+#include <Parsers/Kusto/Formatters.h>
+
+namespace DB
+{
+using ColumnInterval = DataTypeInterval::ColumnType;
+
+namespace ErrorCodes
+{
+ extern const int ILLEGAL_COLUMN;
+ extern const int NOT_IMPLEMENTED;
+}
+
+void SerializationKustoInterval::serializeText(
+ const IColumn & column, const size_t row, WriteBuffer & ostr, const FormatSettings &) const
+{
+ const auto * interval_column = checkAndGetColumn<ColumnInterval>(column);
+ if (!interval_column)
+ throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Expected column of underlying type of Interval");
+
+ const auto & value = interval_column->getData()[row];
+ const auto ticks = kind.toAvgNanoseconds() * value / 100;
+ const auto interval_as_string = formatKQLTimespan(ticks);
+ ostr.write(interval_as_string.c_str(), interval_as_string.length());
+}
+
+void SerializationKustoInterval::deserializeText(
+ [[maybe_unused]] IColumn & column,
+ [[maybe_unused]] ReadBuffer & istr,
+ [[maybe_unused]] const FormatSettings & settings,
+ [[maybe_unused]] const bool whole) const
+{
+ throw Exception(
+ ErrorCodes::NOT_IMPLEMENTED, "Deserialization is not implemented for {}", kind.toNameOfFunctionToIntervalDataType());
+}
+
+SerializationInterval::SerializationInterval(IntervalKind interval_kind_) : interval_kind(std::move(interval_kind_))
+{
+}
+
+void SerializationInterval::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ dispatch(
+ static_cast<void (ISerialization::*)(Field &, ReadBuffer &, const FormatSettings &) const>(&ISerialization::deserializeBinary),
+ settings.interval.output_format,
+ field,
+ istr,
+ settings);
+}
+
+void SerializationInterval::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ dispatch(
+ static_cast<void (ISerialization::*)(IColumn &, ReadBuffer &, const FormatSettings &) const>(&ISerialization::deserializeBinary),
+ settings.interval.output_format,
+ column,
+ istr,
+ settings);
+}
+
+void SerializationInterval::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const
+{
+ dispatch(
+ &ISerialization::deserializeBinaryBulk, FormatSettings::IntervalOutputFormat::Numeric, column, istr, limit, avg_value_size_hint);
+}
+
+void SerializationInterval::deserializeBinaryBulkStatePrefix(
+ DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const
+{
+ dispatch(&ISerialization::deserializeBinaryBulkStatePrefix, FormatSettings::IntervalOutputFormat::Numeric, settings, state);
+}
+
+
+void SerializationInterval::deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state,
+ SubstreamsCache * cache) const
+{
+ dispatch(
+ &ISerialization::deserializeBinaryBulkWithMultipleStreams,
+ FormatSettings::IntervalOutputFormat::Numeric,
+ column,
+ limit,
+ settings,
+ state,
+ cache);
+}
+
+
+void SerializationInterval::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ dispatch(&ISerialization::deserializeTextCSV, settings.interval.output_format, column, istr, settings);
+}
+
+void SerializationInterval::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ dispatch(&ISerialization::deserializeTextEscaped, settings.interval.output_format, column, istr, settings);
+}
+
+void SerializationInterval::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ dispatch(&ISerialization::deserializeTextJSON, settings.interval.output_format, column, istr, settings);
+}
+
+void SerializationInterval::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ dispatch(&ISerialization::deserializeTextQuoted, settings.interval.output_format, column, istr, settings);
+}
+
+void SerializationInterval::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ dispatch(&ISerialization::deserializeTextRaw, settings.interval.output_format, column, istr, settings);
+}
+
+
+void SerializationInterval::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ dispatch(&ISerialization::deserializeWholeText, settings.interval.output_format, column, istr, settings);
+}
+
+void SerializationInterval::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ dispatch(
+ static_cast<void (ISerialization::*)(const Field &, WriteBuffer &, const FormatSettings &) const>(&ISerialization::serializeBinary),
+ settings.interval.output_format,
+ field,
+ ostr,
+ settings);
+}
+
+void SerializationInterval::serializeBinary(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ dispatch(
+ static_cast<void (ISerialization::*)(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const>(
+ &ISerialization::serializeBinary),
+ settings.interval.output_format,
+ column,
+ row,
+ ostr,
+ settings);
+}
+
+void SerializationInterval::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
+{
+ dispatch(&ISerialization::serializeBinaryBulk, FormatSettings::IntervalOutputFormat::Numeric, column, ostr, offset, limit);
+}
+
+void SerializationInterval::serializeBinaryBulkStatePrefix(
+ const IColumn & column, SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const
+{
+ dispatch(&ISerialization::serializeBinaryBulkStatePrefix, FormatSettings::IntervalOutputFormat::Numeric, column, settings, state);
+}
+
+void SerializationInterval::serializeBinaryBulkStateSuffix(
+ SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const
+{
+ dispatch(&ISerialization::serializeBinaryBulkStateSuffix, FormatSettings::IntervalOutputFormat::Numeric, settings, state);
+}
+
+void SerializationInterval::serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column, size_t offset, size_t limit, SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const
+{
+ dispatch(
+ &ISerialization::serializeBinaryBulkWithMultipleStreams,
+ FormatSettings::IntervalOutputFormat::Numeric,
+ column,
+ offset,
+ limit,
+ settings,
+ state);
+}
+
+void SerializationInterval::serializeText(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ dispatch(&ISerialization::serializeText, settings.interval.output_format, column, row, ostr, settings);
+}
+
+void SerializationInterval::serializeTextCSV(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ dispatch(&ISerialization::serializeTextCSV, settings.interval.output_format, column, row, ostr, settings);
+}
+
+void SerializationInterval::serializeTextEscaped(
+ const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ dispatch(&ISerialization::serializeTextEscaped, settings.interval.output_format, column, row, ostr, settings);
+}
+
+void SerializationInterval::serializeTextJSON(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ dispatch(&ISerialization::serializeTextJSON, settings.interval.output_format, column, row, ostr, settings);
+}
+
+void SerializationInterval::serializeTextQuoted(
+ const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ dispatch(&ISerialization::serializeTextQuoted, settings.interval.output_format, column, row, ostr, settings);
+}
+
+void SerializationInterval::serializeTextRaw(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ dispatch(&ISerialization::serializeTextRaw, settings.interval.output_format, column, row, ostr, settings);
+}
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationInterval.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationInterval.h
new file mode 100644
index 00000000000..a4e6c204e4f
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationInterval.h
@@ -0,0 +1,90 @@
+#pragma once
+
+#include "ISerialization.h"
+#include "SerializationCustomSimpleText.h"
+
+#include <DataTypes/DataTypeInterval.h>
+#include <Formats/FormatSettings.h>
+#include <Common/IntervalKind.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+ extern const int NOT_IMPLEMENTED;
+}
+
+class SerializationKustoInterval : public SerializationCustomSimpleText
+{
+public:
+ explicit SerializationKustoInterval(IntervalKind kind_) : SerializationCustomSimpleText(nullptr), kind(kind_) { }
+
+ void serializeText(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override;
+
+private:
+ IntervalKind kind;
+};
+
+class SerializationInterval : public ISerialization
+{
+public:
+ explicit SerializationInterval(IntervalKind kind_);
+
+ void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
+ void deserializeBinaryBulkStatePrefix(DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const override;
+ void deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state,
+ SubstreamsCache * cache) const override;
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+ void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void serializeBinary(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
+ void serializeBinaryBulkStatePrefix(
+ const IColumn & column, SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const override;
+ void serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const override;
+ void serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column,
+ size_t offset,
+ size_t limit,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+ void serializeText(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void serializeTextCSV(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void serializeTextEscaped(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void serializeTextJSON(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void serializeTextQuoted(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void serializeTextRaw(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const override;
+
+private:
+ template <typename... Args, std::invocable<const ISerialization *, Args...> Method>
+ void dispatch(const Method method, const FormatSettings::IntervalOutputFormat format, Args &&... args) const
+ {
+ const ISerialization * serialization = nullptr;
+ if (format == FormatSettings::IntervalOutputFormat::Kusto)
+ serialization = &serialization_kusto;
+ else if (format == FormatSettings::IntervalOutputFormat::Numeric)
+ serialization = &serialization_numeric;
+
+ if (!serialization)
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Option {} is not implemented", magic_enum::enum_name(format));
+
+ (serialization->*method)(std::forward<Args>(args)...);
+ }
+
+ IntervalKind interval_kind;
+ SerializationKustoInterval serialization_kusto{interval_kind};
+ SerializationNumber<typename DataTypeInterval::FieldType> serialization_numeric;
+};
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationLowCardinality.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationLowCardinality.cpp
new file mode 100644
index 00000000000..3e1cbdb00f5
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationLowCardinality.cpp
@@ -0,0 +1,781 @@
+#include <DataTypes/Serializations/SerializationLowCardinality.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypesNumber.h>
+
+#include <Columns/ColumnLowCardinality.h>
+#include <Columns/ColumnUnique.h>
+#include <Columns/ColumnFixedString.h>
+#include <Columns/ColumnsCommon.h>
+#include <Common/HashTable/HashMap.h>
+#include <Common/typeid_cast.h>
+#include <Common/assert_cast.h>
+#include <Core/Field.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int LOGICAL_ERROR;
+ extern const int INCORRECT_DATA;
+}
+
+namespace
+{
+ const ColumnLowCardinality & getColumnLowCardinality(const IColumn & column)
+ {
+ return typeid_cast<const ColumnLowCardinality &>(column);
+ }
+
+ ColumnLowCardinality & getColumnLowCardinality(IColumn & column)
+ {
+ return typeid_cast<ColumnLowCardinality &>(column);
+ }
+}
+
+SerializationLowCardinality::SerializationLowCardinality(const DataTypePtr & dictionary_type_)
+ : dictionary_type(dictionary_type_)
+ , dict_inner_serialization(removeNullable(dictionary_type_)->getDefaultSerialization())
+{
+}
+
+void SerializationLowCardinality::enumerateStreams(
+ EnumerateStreamsSettings & settings,
+ const StreamCallback & callback,
+ const SubstreamData & data) const
+{
+ const auto * column_lc = data.column ? &getColumnLowCardinality(*data.column) : nullptr;
+
+ settings.path.push_back(Substream::DictionaryKeys);
+ auto dict_data = SubstreamData(dict_inner_serialization)
+ .withType(data.type ? dictionary_type : nullptr)
+ .withColumn(column_lc ? column_lc->getDictionary().getNestedColumn() : nullptr)
+ .withSerializationInfo(data.serialization_info);
+
+ settings.path.back().data = dict_data;
+ dict_inner_serialization->enumerateStreams(settings, callback, dict_data);
+
+ settings.path.back() = Substream::DictionaryIndexes;
+ settings.path.back().data = data;
+
+ callback(settings.path);
+ settings.path.pop_back();
+}
+
+struct KeysSerializationVersion
+{
+ enum Value
+ {
+ /// Version is written at the start of <name.dict.bin>.
+ /// Dictionary is written as number N and N keys after them.
+ /// Dictionary can be shared for continuous range of granules, so some marks may point to the same position.
+ /// Shared dictionary is stored in state and is read once.
+ SharedDictionariesWithAdditionalKeys = 1,
+ };
+
+ Value value;
+
+ static void checkVersion(UInt64 version)
+ {
+ if (version != SharedDictionariesWithAdditionalKeys)
+ throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid version for SerializationLowCardinality key column.");
+ }
+
+ explicit KeysSerializationVersion(UInt64 version) : value(static_cast<Value>(version)) { checkVersion(version); }
+};
+
+/// Version is stored at the start of each granule. It's used to store indexes type and flags.
+struct IndexesSerializationType
+{
+ using SerializationType = UInt64;
+ /// Need to read dictionary if it wasn't.
+ static constexpr SerializationType NeedGlobalDictionaryBit = 1u << 8u;
+ /// Need to read additional keys. Additional keys are stored before indexes as value N and N keys after them.
+ static constexpr SerializationType HasAdditionalKeysBit = 1u << 9u;
+ /// Need to update dictionary. It means that previous granule has different dictionary.
+ static constexpr SerializationType NeedUpdateDictionary = 1u << 10u;
+
+ enum Type
+ {
+ TUInt8 = 0,
+ TUInt16,
+ TUInt32,
+ TUInt64,
+ };
+
+ Type type;
+ bool has_additional_keys;
+ bool need_global_dictionary;
+ bool need_update_dictionary;
+
+ static constexpr SerializationType resetFlags(SerializationType type)
+ {
+ return type & (~(HasAdditionalKeysBit | NeedGlobalDictionaryBit | NeedUpdateDictionary));
+ }
+
+ static void checkType(SerializationType type)
+ {
+ UInt64 value = resetFlags(type);
+ if (value <= TUInt64)
+ return;
+
+ throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid type for SerializationLowCardinality index column.");
+ }
+
+ void serialize(WriteBuffer & buffer) const
+ {
+ SerializationType val = type;
+ if (has_additional_keys)
+ val |= HasAdditionalKeysBit;
+ if (need_global_dictionary)
+ val |= NeedGlobalDictionaryBit;
+ if (need_update_dictionary)
+ val |= NeedUpdateDictionary;
+ writeBinaryLittleEndian(val, buffer);
+ }
+
+ void deserialize(ReadBuffer & buffer, const ISerialization::DeserializeBinaryBulkSettings & settings)
+ {
+ SerializationType val;
+ readBinaryLittleEndian(val, buffer);
+
+ checkType(val);
+ has_additional_keys = (val & HasAdditionalKeysBit) != 0;
+ need_global_dictionary = (val & NeedGlobalDictionaryBit) != 0;
+ need_update_dictionary = (val & NeedUpdateDictionary) != 0;
+ type = static_cast<Type>(resetFlags(val));
+
+ if (settings.native_format)
+ {
+ if (need_global_dictionary)
+ throw Exception(ErrorCodes::INCORRECT_DATA,
+ "LowCardinality indexes serialization type for Native format "
+ "cannot use global dictionary");
+ }
+ }
+
+ IndexesSerializationType(const IColumn & column,
+ bool has_additional_keys_,
+ bool need_global_dictionary_,
+ bool enumerate_dictionaries)
+ : has_additional_keys(has_additional_keys_)
+ , need_global_dictionary(need_global_dictionary_)
+ , need_update_dictionary(enumerate_dictionaries)
+ {
+ if (typeid_cast<const ColumnUInt8 *>(&column))
+ type = TUInt8;
+ else if (typeid_cast<const ColumnUInt16 *>(&column))
+ type = TUInt16;
+ else if (typeid_cast<const ColumnUInt32 *>(&column))
+ type = TUInt32;
+ else if (typeid_cast<const ColumnUInt64 *>(&column))
+ type = TUInt64;
+ else
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid Indexes column for IndexesSerializationType. "
+ "Expected ColumnUInt*, got {}", column.getName());
+ }
+
+ DataTypePtr getDataType() const
+ {
+ if (type == TUInt8)
+ return std::make_shared<DataTypeUInt8>();
+ if (type == TUInt16)
+ return std::make_shared<DataTypeUInt16>();
+ if (type == TUInt32)
+ return std::make_shared<DataTypeUInt32>();
+ if (type == TUInt64)
+ return std::make_shared<DataTypeUInt64>();
+
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't create DataType from IndexesSerializationType.");
+ }
+
+ IndexesSerializationType() = default;
+};
+
+struct SerializeStateLowCardinality : public ISerialization::SerializeBinaryBulkState
+{
+ KeysSerializationVersion key_version;
+ MutableColumnUniquePtr shared_dictionary;
+
+ explicit SerializeStateLowCardinality(UInt64 key_version_) : key_version(key_version_) {}
+};
+
+struct DeserializeStateLowCardinality : public ISerialization::DeserializeBinaryBulkState
+{
+ KeysSerializationVersion key_version;
+ ColumnUniquePtr global_dictionary;
+
+ IndexesSerializationType index_type;
+ ColumnPtr additional_keys;
+ ColumnPtr null_map;
+ UInt64 num_pending_rows = 0;
+
+ /// If dictionary should be updated.
+ /// Can happen is some granules was skipped while reading from MergeTree.
+ /// We should store this flag in State because
+ /// in case of long block of empty arrays we may not need read dictionary at first reading.
+ bool need_update_dictionary = false;
+
+ explicit DeserializeStateLowCardinality(UInt64 key_version_) : key_version(key_version_) {}
+};
+
+void SerializationLowCardinality::serializeBinaryBulkStatePrefix(
+ const IColumn & /*column*/,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ settings.path.push_back(Substream::DictionaryKeys);
+ auto * stream = settings.getter(settings.path);
+ settings.path.pop_back();
+
+ if (!stream)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty stream in SerializationLowCardinality::serializeBinaryBulkStatePrefix");
+
+ /// Write version and create SerializeBinaryBulkState.
+ UInt64 key_version = KeysSerializationVersion::SharedDictionariesWithAdditionalKeys;
+
+ writeBinaryLittleEndian(key_version, *stream);
+
+ state = std::make_shared<SerializeStateLowCardinality>(key_version);
+}
+
+void SerializationLowCardinality::serializeBinaryBulkStateSuffix(
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ auto * low_cardinality_state = checkAndGetState<SerializeStateLowCardinality>(state);
+ KeysSerializationVersion::checkVersion(low_cardinality_state->key_version.value);
+
+ if (low_cardinality_state->shared_dictionary && settings.low_cardinality_max_dictionary_size)
+ {
+ auto nested_column = low_cardinality_state->shared_dictionary->getNestedNotNullableColumn();
+
+ settings.path.push_back(Substream::DictionaryKeys);
+ auto * stream = settings.getter(settings.path);
+ settings.path.pop_back();
+
+ if (!stream)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty stream in SerializationLowCardinality::serializeBinaryBulkStateSuffix");
+
+ UInt64 num_keys = nested_column->size();
+ writeBinaryLittleEndian(num_keys, *stream);
+ dict_inner_serialization->serializeBinaryBulk(*nested_column, *stream, 0, num_keys);
+ low_cardinality_state->shared_dictionary = nullptr;
+ }
+}
+
+void SerializationLowCardinality::deserializeBinaryBulkStatePrefix(
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state) const
+{
+ settings.path.push_back(Substream::DictionaryKeys);
+ auto * stream = settings.getter(settings.path);
+ settings.path.pop_back();
+
+ if (!stream)
+ return;
+
+ UInt64 keys_version;
+ readBinaryLittleEndian(keys_version, *stream);
+
+ state = std::make_shared<DeserializeStateLowCardinality>(keys_version);
+}
+
+namespace
+{
+ template <typename T>
+ PaddedPODArray<T> * getIndexesData(IColumn & indexes)
+ {
+ auto * column = typeid_cast<ColumnVector<T> *>(&indexes);
+ if (column)
+ return &column->getData();
+
+ return nullptr;
+ }
+
+ struct IndexMapsWithAdditionalKeys
+ {
+ MutableColumnPtr dictionary_map;
+ MutableColumnPtr additional_keys_map;
+ };
+
+ template <typename T>
+ IndexMapsWithAdditionalKeys mapIndexWithAdditionalKeys(PaddedPODArray<T> & index, size_t dict_size)
+ {
+ T max_less_dict_size = 0;
+ T max_value = 0;
+
+ auto size = index.size();
+ if (size == 0)
+ return {ColumnVector<T>::create(), ColumnVector<T>::create()};
+
+ for (size_t i = 0; i < size; ++i)
+ {
+ auto val = index[i];
+ if (val < dict_size)
+ max_less_dict_size = std::max(max_less_dict_size, val);
+
+ max_value = std::max(max_value, val);
+ }
+
+ auto map_size = UInt64(max_less_dict_size) + 1;
+ auto overflow_map_size = max_value >= dict_size ? (UInt64(max_value - dict_size) + 1) : 0;
+ PaddedPODArray<T> map(map_size, 0);
+ PaddedPODArray<T> overflow_map(overflow_map_size, 0);
+
+ T zero_pos_value = 0;
+ T zero_pos_overflowed_value = 0;
+ UInt64 cur_pos = 0;
+ UInt64 cur_overflowed_pos = 0;
+
+ for (size_t i = 0; i < size; ++i)
+ {
+ T val = index[i];
+ if (val < dict_size)
+ {
+ if (cur_pos == 0)
+ {
+ zero_pos_value = val;
+ ++cur_pos;
+ }
+ else if (map[val] == 0 && val != zero_pos_value)
+ {
+ map[val] = static_cast<T>(cur_pos);
+ ++cur_pos;
+ }
+ }
+ else
+ {
+ T shifted_val = static_cast<T>(val - dict_size);
+ if (cur_overflowed_pos == 0)
+ {
+ zero_pos_overflowed_value = shifted_val;
+ ++cur_overflowed_pos;
+ }
+ else if (overflow_map[shifted_val] == 0 && shifted_val != zero_pos_overflowed_value)
+ {
+ overflow_map[shifted_val] = static_cast<T>(cur_overflowed_pos);
+ ++cur_overflowed_pos;
+ }
+ }
+ }
+
+ auto dictionary_map = ColumnVector<T>::create(cur_pos);
+ auto additional_keys_map = ColumnVector<T>::create(cur_overflowed_pos);
+ auto & dict_data = dictionary_map->getData();
+ auto & add_keys_data = additional_keys_map->getData();
+
+ for (size_t i = 0; i < map_size; ++i)
+ if (map[i])
+ dict_data[map[i]] = static_cast<T>(i);
+
+ for (size_t i = 0; i < overflow_map_size; ++i)
+ if (overflow_map[i])
+ add_keys_data[overflow_map[i]] = static_cast<T>(i);
+
+ if (cur_pos)
+ dict_data[0] = zero_pos_value;
+ if (cur_overflowed_pos)
+ add_keys_data[0] = zero_pos_overflowed_value;
+
+ for (size_t i = 0; i < size; ++i)
+ {
+ T & val = index[i];
+ if (val < dict_size)
+ val = map[val];
+ else
+ val = overflow_map[val - dict_size] + static_cast<T>(cur_pos);
+ }
+
+ return {std::move(dictionary_map), std::move(additional_keys_map)};
+ }
+
+ /// Update column and return map with old indexes.
+ /// Let N is the number of distinct values which are less than max_size;
+ /// old_column - column before function call;
+ /// new_column - column after function call:
+ /// * if old_column[i] < max_size, than
+ /// dictionary_map[new_column[i]] = old_column[i]
+ /// * else
+ /// additional_keys_map[new_column[i]] = old_column[i] - dict_size + N
+ IndexMapsWithAdditionalKeys mapIndexWithAdditionalKeys(IColumn & column, size_t dict_size)
+ {
+ if (auto * data_uint8 = getIndexesData<UInt8>(column))
+ return mapIndexWithAdditionalKeys(*data_uint8, dict_size);
+ else if (auto * data_uint16 = getIndexesData<UInt16>(column))
+ return mapIndexWithAdditionalKeys(*data_uint16, dict_size);
+ else if (auto * data_uint32 = getIndexesData<UInt32>(column))
+ return mapIndexWithAdditionalKeys(*data_uint32, dict_size);
+ else if (auto * data_uint64 = getIndexesData<UInt64>(column))
+ return mapIndexWithAdditionalKeys(*data_uint64, dict_size);
+ else
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Indexes column for mapIndexWithAdditionalKeys must be UInt, got {}",
+ column.getName());
+ }
+}
+
+void SerializationLowCardinality::serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column,
+ size_t offset,
+ size_t limit,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ settings.path.push_back(Substream::DictionaryKeys);
+ auto * keys_stream = settings.getter(settings.path);
+ settings.path.back() = Substream::DictionaryIndexes;
+ auto * indexes_stream = settings.getter(settings.path);
+ settings.path.pop_back();
+
+ if (!keys_stream && !indexes_stream)
+ return;
+
+ if (!keys_stream)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty stream for SerializationLowCardinality keys.");
+
+ if (!indexes_stream)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty stream for SerializationLowCardinality indexes.");
+
+ const ColumnLowCardinality & low_cardinality_column = typeid_cast<const ColumnLowCardinality &>(column);
+
+ auto * low_cardinality_state = checkAndGetState<SerializeStateLowCardinality>(state);
+ auto & global_dictionary = low_cardinality_state->shared_dictionary;
+ KeysSerializationVersion::checkVersion(low_cardinality_state->key_version.value);
+
+ bool need_update_dictionary = global_dictionary == nullptr;
+ if (need_update_dictionary)
+ global_dictionary = DataTypeLowCardinality::createColumnUnique(*dictionary_type);
+
+ size_t max_limit = column.size() - offset;
+ limit = limit ? std::min(limit, max_limit) : max_limit;
+
+ /// Do not write anything for empty column. (May happen while writing empty arrays.)
+ if (limit == 0)
+ return;
+
+ auto sub_column = low_cardinality_column.cutAndCompact(offset, limit);
+ ColumnPtr positions = sub_column->getIndexesPtr();
+ ColumnPtr keys = sub_column->getDictionary().getNestedColumn();
+
+ if (settings.low_cardinality_max_dictionary_size)
+ {
+ /// Insert used_keys into global dictionary and update sub_index.
+ auto indexes_with_overflow = global_dictionary->uniqueInsertRangeWithOverflow(*keys, 0, keys->size(),
+ settings.low_cardinality_max_dictionary_size);
+
+ if (global_dictionary->size() > settings.low_cardinality_max_dictionary_size)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Got dictionary with size {} but max dictionary size is {}",
+ global_dictionary->size(), settings.low_cardinality_max_dictionary_size);
+
+ positions = indexes_with_overflow.indexes->index(*positions, 0);
+ keys = std::move(indexes_with_overflow.overflowed_keys);
+
+ if (global_dictionary->size() < settings.low_cardinality_max_dictionary_size && !keys->empty())
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Has additional keys, but dict size is {} which is less "
+ "then max dictionary size ({})", global_dictionary->size(),
+ settings.low_cardinality_max_dictionary_size);
+ }
+
+ if (const auto * nullable_keys = checkAndGetColumn<ColumnNullable>(*keys))
+ keys = nullable_keys->getNestedColumnPtr();
+
+ bool need_additional_keys = !keys->empty();
+ bool need_dictionary = settings.low_cardinality_max_dictionary_size != 0;
+ bool need_write_dictionary = !settings.low_cardinality_use_single_dictionary_for_part
+ && global_dictionary->size() >= settings.low_cardinality_max_dictionary_size;
+
+ IndexesSerializationType index_version(*positions, need_additional_keys, need_dictionary, need_update_dictionary);
+ index_version.serialize(*indexes_stream);
+
+ if (need_write_dictionary)
+ {
+ const auto & nested_column = global_dictionary->getNestedNotNullableColumn();
+ UInt64 num_keys = nested_column->size();
+ writeBinaryLittleEndian(num_keys, *keys_stream);
+ dict_inner_serialization->serializeBinaryBulk(*nested_column, *keys_stream, 0, num_keys);
+ low_cardinality_state->shared_dictionary = nullptr;
+ }
+
+ if (need_additional_keys)
+ {
+ UInt64 num_keys = keys->size();
+ writeBinaryLittleEndian(num_keys, *indexes_stream);
+ dict_inner_serialization->serializeBinaryBulk(*keys, *indexes_stream, 0, num_keys);
+ }
+
+ UInt64 num_rows = positions->size();
+ writeBinaryLittleEndian(num_rows, *indexes_stream);
+ auto index_serialization = index_version.getDataType()->getDefaultSerialization();
+ index_serialization->serializeBinaryBulk(*positions, *indexes_stream, 0, num_rows);
+}
+
+void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state,
+ SubstreamsCache * /* cache */) const
+{
+ auto mutable_column = column->assumeMutable();
+ ColumnLowCardinality & low_cardinality_column = typeid_cast<ColumnLowCardinality &>(*mutable_column);
+
+ settings.path.push_back(Substream::DictionaryKeys);
+ auto * keys_stream = settings.getter(settings.path);
+ settings.path.back() = Substream::DictionaryIndexes;
+ auto * indexes_stream = settings.getter(settings.path);
+ settings.path.pop_back();
+
+ if (!keys_stream && !indexes_stream)
+ return;
+
+ if (!keys_stream)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty stream for SerializationLowCardinality keys.");
+
+ if (!indexes_stream)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty stream for SerializationLowCardinality indexes.");
+
+ auto * low_cardinality_state = checkAndGetState<DeserializeStateLowCardinality>(state);
+ KeysSerializationVersion::checkVersion(low_cardinality_state->key_version.value);
+
+ auto read_dictionary = [this, low_cardinality_state, keys_stream]()
+ {
+ UInt64 num_keys;
+ readBinaryLittleEndian(num_keys, *keys_stream);
+
+ auto keys_type = removeNullable(dictionary_type);
+ auto global_dict_keys = keys_type->createColumn();
+ dict_inner_serialization->deserializeBinaryBulk(*global_dict_keys, *keys_stream, num_keys, 0);
+
+ auto column_unique = DataTypeLowCardinality::createColumnUnique(*dictionary_type, std::move(global_dict_keys));
+ low_cardinality_state->global_dictionary = std::move(column_unique);
+ };
+
+ auto read_additional_keys = [this, low_cardinality_state, indexes_stream]()
+ {
+ UInt64 num_keys;
+ readBinaryLittleEndian(num_keys, *indexes_stream);
+
+ auto keys_type = removeNullable(dictionary_type);
+ auto additional_keys = keys_type->createColumn();
+ dict_inner_serialization->deserializeBinaryBulk(*additional_keys, *indexes_stream, num_keys, 0);
+ low_cardinality_state->additional_keys = std::move(additional_keys);
+
+ if (!low_cardinality_state->index_type.need_global_dictionary && dictionary_type->isNullable())
+ {
+ auto null_map = ColumnUInt8::create(num_keys, 0);
+ if (num_keys)
+ null_map->getElement(0) = 1;
+
+ low_cardinality_state->null_map = std::move(null_map);
+ }
+ };
+
+ auto read_indexes = [this, low_cardinality_state, indexes_stream, &low_cardinality_column](UInt64 num_rows)
+ {
+ auto indexes_type = low_cardinality_state->index_type.getDataType();
+ MutableColumnPtr indexes_column = indexes_type->createColumn();
+ indexes_type->getDefaultSerialization()->deserializeBinaryBulk(*indexes_column, *indexes_stream, num_rows, 0);
+
+ auto & global_dictionary = low_cardinality_state->global_dictionary;
+ const auto & additional_keys = low_cardinality_state->additional_keys;
+
+ bool has_additional_keys = low_cardinality_state->index_type.has_additional_keys;
+ bool column_is_empty = low_cardinality_column.empty();
+
+ if (!low_cardinality_state->index_type.need_global_dictionary)
+ {
+ if (additional_keys == nullptr)
+ throw Exception(ErrorCodes::INCORRECT_DATA, "No additional keys found.");
+
+ ColumnPtr keys_column = additional_keys;
+ if (low_cardinality_state->null_map)
+ keys_column = ColumnNullable::create(additional_keys, low_cardinality_state->null_map);
+ low_cardinality_column.insertRangeFromDictionaryEncodedColumn(*keys_column, *indexes_column);
+ }
+ else if (!has_additional_keys)
+ {
+ if (column_is_empty)
+ low_cardinality_column.setSharedDictionary(global_dictionary);
+
+ auto local_column = ColumnLowCardinality::create(global_dictionary, std::move(indexes_column));
+ low_cardinality_column.insertRangeFrom(*local_column, 0, num_rows);
+ }
+ else
+ {
+ auto maps = mapIndexWithAdditionalKeys(*indexes_column, global_dictionary->size());
+
+ auto used_keys = IColumn::mutate(global_dictionary->getNestedColumn()->index(*maps.dictionary_map, 0));
+
+ if (!maps.additional_keys_map->empty())
+ {
+ if (additional_keys == nullptr)
+ throw Exception(ErrorCodes::INCORRECT_DATA, "No additional keys found.");
+
+ auto used_add_keys = additional_keys->index(*maps.additional_keys_map, 0);
+
+ if (dictionary_type->isNullable())
+ {
+ ColumnPtr null_map = ColumnUInt8::create(used_add_keys->size(), 0);
+ used_add_keys = ColumnNullable::create(used_add_keys, null_map);
+ }
+
+ used_keys->insertRangeFrom(*used_add_keys, 0, used_add_keys->size());
+ }
+
+ low_cardinality_column.insertRangeFromDictionaryEncodedColumn(*used_keys, *indexes_column);
+ }
+ };
+
+ if (!settings.continuous_reading)
+ {
+ low_cardinality_state->num_pending_rows = 0;
+
+ /// Remember in state that some granules were skipped and we need to update dictionary.
+ low_cardinality_state->need_update_dictionary = true;
+ }
+
+ while (limit)
+ {
+ if (low_cardinality_state->num_pending_rows == 0)
+ {
+ if (indexes_stream->eof())
+ break;
+
+ auto & index_type = low_cardinality_state->index_type;
+ auto & global_dictionary = low_cardinality_state->global_dictionary;
+
+ index_type.deserialize(*indexes_stream, settings);
+
+ bool need_update_dictionary =
+ !global_dictionary || index_type.need_update_dictionary || low_cardinality_state->need_update_dictionary;
+ if (index_type.need_global_dictionary && need_update_dictionary)
+ {
+ read_dictionary();
+ low_cardinality_state->need_update_dictionary = false;
+ }
+
+ if (low_cardinality_state->index_type.has_additional_keys)
+ read_additional_keys();
+ else
+ low_cardinality_state->additional_keys = nullptr;
+
+ readBinaryLittleEndian(low_cardinality_state->num_pending_rows, *indexes_stream);
+ }
+
+ size_t num_rows_to_read = std::min<UInt64>(limit, low_cardinality_state->num_pending_rows);
+ read_indexes(num_rows_to_read);
+ limit -= num_rows_to_read;
+ low_cardinality_state->num_pending_rows -= num_rows_to_read;
+ }
+
+ column = std::move(mutable_column);
+}
+
+void SerializationLowCardinality::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ dictionary_type->getDefaultSerialization()->serializeBinary(field, ostr, settings);
+}
+void SerializationLowCardinality::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ dictionary_type->getDefaultSerialization()->deserializeBinary(field, istr, settings);
+}
+
+void SerializationLowCardinality::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeImpl(column, row_num, &ISerialization::serializeBinary, ostr, settings);
+}
+void SerializationLowCardinality::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ deserializeImpl(column, &ISerialization::deserializeBinary, istr, settings);
+}
+
+void SerializationLowCardinality::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeImpl(column, row_num, &ISerialization::serializeTextEscaped, ostr, settings);
+}
+
+void SerializationLowCardinality::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ deserializeImpl(column, &ISerialization::deserializeTextEscaped, istr, settings);
+}
+
+void SerializationLowCardinality::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeImpl(column, row_num, &ISerialization::serializeTextQuoted, ostr, settings);
+}
+
+void SerializationLowCardinality::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ deserializeImpl(column, &ISerialization::deserializeTextQuoted, istr, settings);
+}
+
+void SerializationLowCardinality::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ deserializeImpl(column, &ISerialization::deserializeWholeText, istr, settings);
+}
+
+void SerializationLowCardinality::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeImpl(column, row_num, &ISerialization::serializeTextCSV, ostr, settings);
+}
+
+void SerializationLowCardinality::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ deserializeImpl(column, &ISerialization::deserializeTextCSV, istr, settings);
+}
+
+void SerializationLowCardinality::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeImpl(column, row_num, &ISerialization::serializeText, ostr, settings);
+}
+
+void SerializationLowCardinality::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeImpl(column, row_num, &ISerialization::serializeTextJSON, ostr, settings);
+}
+
+void SerializationLowCardinality::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ deserializeImpl(column, &ISerialization::deserializeTextJSON, istr, settings);
+}
+
+void SerializationLowCardinality::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeImpl(column, row_num, &ISerialization::serializeTextXML, ostr, settings);
+}
+
+void SerializationLowCardinality::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ deserializeImpl(column, &ISerialization::deserializeTextRaw, istr, settings);
+}
+
+void SerializationLowCardinality::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeImpl(column, row_num, &ISerialization::serializeTextRaw, ostr, settings);
+}
+
+template <typename... Params, typename... Args>
+void SerializationLowCardinality::serializeImpl(
+ const IColumn & column, size_t row_num, SerializationLowCardinality::SerializeFunctionPtr<Params...> func, Args &&... args) const
+{
+ const auto & low_cardinality_column = getColumnLowCardinality(column);
+ size_t unique_row_number = low_cardinality_column.getIndexes().getUInt(row_num);
+ auto serialization = dictionary_type->getDefaultSerialization();
+ (serialization.get()->*func)(*low_cardinality_column.getDictionary().getNestedColumn(), unique_row_number, std::forward<Args>(args)...);
+}
+
+template <typename... Params, typename... Args>
+void SerializationLowCardinality::deserializeImpl(
+ IColumn & column, SerializationLowCardinality::DeserializeFunctionPtr<Params...> func, Args &&... args) const
+{
+ auto & low_cardinality_column= getColumnLowCardinality(column);
+ auto temp_column = low_cardinality_column.getDictionary().getNestedColumn()->cloneEmpty();
+
+ auto serialization = dictionary_type->getDefaultSerialization();
+ (serialization.get()->*func)(*temp_column, std::forward<Args>(args)...);
+
+ low_cardinality_column.insertFromFullColumn(*temp_column, 0);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationLowCardinality.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationLowCardinality.h
new file mode 100644
index 00000000000..5f56bcf8108
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationLowCardinality.h
@@ -0,0 +1,84 @@
+#pragma once
+
+#include <DataTypes/Serializations/ISerialization.h>
+
+namespace DB
+{
+
+class IDataType;
+using DataTypePtr = std::shared_ptr<const IDataType>;
+
+class SerializationLowCardinality : public ISerialization
+{
+private:
+ DataTypePtr dictionary_type;
+ SerializationPtr dict_inner_serialization;
+
+public:
+ explicit SerializationLowCardinality(const DataTypePtr & dictionary_type);
+
+ void enumerateStreams(
+ EnumerateStreamsSettings & settings,
+ const StreamCallback & callback,
+ const SubstreamData & data) const override;
+
+ void serializeBinaryBulkStatePrefix(
+ const IColumn & column,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void serializeBinaryBulkStateSuffix(
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void deserializeBinaryBulkStatePrefix(
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state) const override;
+
+ void serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column,
+ size_t offset,
+ size_t limit,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state,
+ SubstreamsCache * cache) const override;
+
+ void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+
+private:
+ template <typename ... Params>
+ using SerializeFunctionPtr = void (ISerialization::*)(const IColumn &, size_t, Params ...) const;
+
+ template <typename... Params, typename... Args>
+ void serializeImpl(const IColumn & column, size_t row_num, SerializeFunctionPtr<Params...> func, Args &&... args) const;
+
+ template <typename ... Params>
+ using DeserializeFunctionPtr = void (ISerialization::*)(IColumn &, Params ...) const;
+
+ template <typename ... Params, typename... Args>
+ void deserializeImpl(IColumn & column, DeserializeFunctionPtr<Params...> func, Args &&... args) const;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationMap.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationMap.cpp
new file mode 100644
index 00000000000..af1d96c4ca7
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationMap.cpp
@@ -0,0 +1,365 @@
+#include <DataTypes/Serializations/SerializationMap.h>
+#include <DataTypes/Serializations/SerializationNullable.h>
+#include <DataTypes/DataTypeMap.h>
+
+#include <Common/StringUtils/StringUtils.h>
+#include <Columns/ColumnMap.h>
+#include <Core/Field.h>
+#include <Formats/FormatSettings.h>
+#include <Common/assert_cast.h>
+#include <Common/quoteString.h>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/ReadBufferFromString.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int CANNOT_READ_MAP_FROM_TEXT;
+ extern const int TOO_LARGE_ARRAY_SIZE;
+}
+
+SerializationMap::SerializationMap(const SerializationPtr & key_, const SerializationPtr & value_, const SerializationPtr & nested_)
+ : key(key_), value(value_), nested(nested_)
+{
+}
+
+static const IColumn & extractNestedColumn(const IColumn & column)
+{
+ return assert_cast<const ColumnMap &>(column).getNestedColumn();
+}
+
+static IColumn & extractNestedColumn(IColumn & column)
+{
+ return assert_cast<ColumnMap &>(column).getNestedColumn();
+}
+
+void SerializationMap::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const auto & map = field.get<const Map &>();
+ writeVarUInt(map.size(), ostr);
+ for (const auto & elem : map)
+ {
+ const auto & tuple = elem.safeGet<const Tuple>();
+ assert(tuple.size() == 2);
+ key->serializeBinary(tuple[0], ostr, settings);
+ value->serializeBinary(tuple[1], ostr, settings);
+ }
+}
+
+void SerializationMap::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ size_t size;
+ readVarUInt(size, istr);
+ if (settings.max_binary_array_size && size > settings.max_binary_array_size)
+ throw Exception(
+ ErrorCodes::TOO_LARGE_ARRAY_SIZE,
+ "Too large map size: {}. The maximum is: {}. To increase the maximum, use setting "
+ "format_binary_max_array_size",
+ size,
+ settings.max_binary_array_size);
+ field = Map();
+ Map & map = field.get<Map &>();
+ map.reserve(size);
+ for (size_t i = 0; i < size; ++i)
+ {
+ Tuple tuple(2);
+ key->deserializeBinary(tuple[0], istr, settings);
+ value->deserializeBinary(tuple[1], istr, settings);
+ map.push_back(std::move(tuple));
+ }
+}
+
+void SerializationMap::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ nested->serializeBinary(extractNestedColumn(column), row_num, ostr, settings);
+}
+
+void SerializationMap::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ nested->deserializeBinary(extractNestedColumn(column), istr, settings);
+}
+
+
+template <typename KeyWriter, typename ValueWriter>
+void SerializationMap::serializeTextImpl(
+ const IColumn & column,
+ size_t row_num,
+ WriteBuffer & ostr,
+ KeyWriter && key_writer,
+ ValueWriter && value_writer) const
+{
+ const auto & column_map = assert_cast<const ColumnMap &>(column);
+
+ const auto & nested_array = column_map.getNestedColumn();
+ const auto & nested_tuple = column_map.getNestedData();
+ const auto & offsets = nested_array.getOffsets();
+
+ size_t offset = offsets[row_num - 1];
+ size_t next_offset = offsets[row_num];
+
+ writeChar('{', ostr);
+ for (size_t i = offset; i < next_offset; ++i)
+ {
+ if (i != offset)
+ writeChar(',', ostr);
+
+ key_writer(ostr, key, nested_tuple.getColumn(0), i);
+ writeChar(':', ostr);
+ value_writer(ostr, value, nested_tuple.getColumn(1), i);
+ }
+ writeChar('}', ostr);
+}
+
+template <typename Reader>
+void SerializationMap::deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const
+{
+ auto & column_map = assert_cast<ColumnMap &>(column);
+
+ auto & nested_array = column_map.getNestedColumn();
+ auto & nested_tuple = column_map.getNestedData();
+ auto & offsets = nested_array.getOffsets();
+
+ auto & key_column = nested_tuple.getColumn(0);
+ auto & value_column = nested_tuple.getColumn(1);
+
+ size_t size = 0;
+ assertChar('{', istr);
+
+ try
+ {
+ bool first = true;
+ while (!istr.eof() && *istr.position() != '}')
+ {
+ if (!first)
+ {
+ if (*istr.position() == ',')
+ ++istr.position();
+ else
+ throw Exception(ErrorCodes::CANNOT_READ_MAP_FROM_TEXT, "Cannot read Map from text");
+ }
+
+ first = false;
+
+ skipWhitespaceIfAny(istr);
+
+ if (*istr.position() == '}')
+ break;
+
+ reader(istr, key, key_column);
+ ++size;
+
+ skipWhitespaceIfAny(istr);
+ assertChar(':', istr);
+ skipWhitespaceIfAny(istr);
+
+ reader(istr, value, value_column);
+
+ skipWhitespaceIfAny(istr);
+ }
+
+ assertChar('}', istr);
+ }
+ catch (...)
+ {
+ if (size)
+ {
+ nested_tuple.getColumnPtr(0) = key_column.cut(0, offsets.back());
+ nested_tuple.getColumnPtr(1) = value_column.cut(0, offsets.back());
+ }
+ throw;
+ }
+
+ offsets.push_back(offsets.back() + size);
+}
+
+void SerializationMap::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ auto writer = [&settings](WriteBuffer & buf, const SerializationPtr & subcolumn_serialization, const IColumn & subcolumn, size_t pos)
+ {
+ subcolumn_serialization->serializeTextQuoted(subcolumn, pos, buf, settings);
+ };
+
+ serializeTextImpl(column, row_num, ostr, writer, writer);
+}
+
+void SerializationMap::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
+{
+ deserializeTextImpl(column, istr,
+ [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn)
+ {
+ subcolumn_serialization->deserializeTextQuoted(subcolumn, buf, settings);
+ });
+
+ if (whole && !istr.eof())
+ throwUnexpectedDataAfterParsedValue(column, istr, settings, "Map");
+}
+
+void SerializationMap::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeTextImpl(column, row_num, ostr,
+ [&settings](WriteBuffer & buf, const SerializationPtr & subcolumn_serialization, const IColumn & subcolumn, size_t pos)
+ {
+ /// We need to double-quote all keys (including integers) to produce valid JSON.
+ WriteBufferFromOwnString str_buf;
+ subcolumn_serialization->serializeText(subcolumn, pos, str_buf, settings);
+ writeJSONString(str_buf.str(), buf, settings);
+ },
+ [&settings](WriteBuffer & buf, const SerializationPtr & subcolumn_serialization, const IColumn & subcolumn, size_t pos)
+ {
+ subcolumn_serialization->serializeTextJSON(subcolumn, pos, buf, settings);
+ });
+}
+
+void SerializationMap::serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const
+{
+ const auto & column_map = assert_cast<const ColumnMap &>(column);
+
+ const auto & nested_array = column_map.getNestedColumn();
+ const auto & nested_tuple = column_map.getNestedData();
+ const auto & offsets = nested_array.getOffsets();
+
+ size_t offset = offsets[row_num - 1];
+ size_t next_offset = offsets[row_num];
+
+ if (offset == next_offset)
+ {
+ writeCString("{}", ostr);
+ return;
+ }
+
+ writeCString("{\n", ostr);
+ for (size_t i = offset; i < next_offset; ++i)
+ {
+ if (i != offset)
+ writeCString(",\n", ostr);
+
+ WriteBufferFromOwnString str_buf;
+ key->serializeText(nested_tuple.getColumn(0), i, str_buf, settings);
+
+ writeChar(' ', (indent + 1) * 4, ostr);
+ writeJSONString(str_buf.str(), ostr, settings);
+ writeCString(": ", ostr);
+ value->serializeTextJSONPretty(nested_tuple.getColumn(1), i, ostr, settings, indent + 1);
+ }
+ writeChar('\n', ostr);
+ writeChar(' ', indent * 4, ostr);
+ writeChar('}', ostr);
+}
+
+
+void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ deserializeTextImpl(column, istr,
+ [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn)
+ {
+ if (settings.null_as_default)
+ SerializationNullable::deserializeTextJSONImpl(subcolumn, buf, settings, subcolumn_serialization);
+ else
+ subcolumn_serialization->deserializeTextJSON(subcolumn, buf, settings);
+ });
+}
+
+void SerializationMap::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const auto & column_map = assert_cast<const ColumnMap &>(column);
+ const auto & offsets = column_map.getNestedColumn().getOffsets();
+
+ size_t offset = offsets[row_num - 1];
+ size_t next_offset = offsets[row_num];
+
+ const auto & nested_data = column_map.getNestedData();
+
+ writeCString("<map>", ostr);
+ for (size_t i = offset; i < next_offset; ++i)
+ {
+ writeCString("<elem>", ostr);
+ writeCString("<key>", ostr);
+ key->serializeTextXML(nested_data.getColumn(0), i, ostr, settings);
+ writeCString("</key>", ostr);
+
+ writeCString("<value>", ostr);
+ value->serializeTextXML(nested_data.getColumn(1), i, ostr, settings);
+ writeCString("</value>", ostr);
+ writeCString("</elem>", ostr);
+ }
+ writeCString("</map>", ostr);
+}
+
+void SerializationMap::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ WriteBufferFromOwnString wb;
+ serializeText(column, row_num, wb, settings);
+ writeCSV(wb.str(), ostr);
+}
+
+void SerializationMap::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ String s;
+ readCSV(s, istr, settings.csv);
+ ReadBufferFromString rb(s);
+ deserializeText(column, rb, settings, true);
+}
+
+void SerializationMap::enumerateStreams(
+ EnumerateStreamsSettings & settings,
+ const StreamCallback & callback,
+ const SubstreamData & data) const
+{
+ auto next_data = SubstreamData(nested)
+ .withType(data.type ? assert_cast<const DataTypeMap &>(*data.type).getNestedType() : nullptr)
+ .withColumn(data.column ? assert_cast<const ColumnMap &>(*data.column).getNestedColumnPtr() : nullptr)
+ .withSerializationInfo(data.serialization_info);
+
+ nested->enumerateStreams(settings, callback, next_data);
+}
+
+void SerializationMap::serializeBinaryBulkStatePrefix(
+ const IColumn & column,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ nested->serializeBinaryBulkStatePrefix(extractNestedColumn(column), settings, state);
+}
+
+void SerializationMap::serializeBinaryBulkStateSuffix(
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ nested->serializeBinaryBulkStateSuffix(settings, state);
+}
+
+void SerializationMap::deserializeBinaryBulkStatePrefix(
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state) const
+{
+ nested->deserializeBinaryBulkStatePrefix(settings, state);
+}
+
+
+void SerializationMap::serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column,
+ size_t offset,
+ size_t limit,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ nested->serializeBinaryBulkWithMultipleStreams(extractNestedColumn(column), offset, limit, settings, state);
+}
+
+void SerializationMap::deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state,
+ SubstreamsCache * cache) const
+{
+ auto & column_map = assert_cast<ColumnMap &>(*column->assumeMutable());
+ nested->deserializeBinaryBulkWithMultipleStreams(column_map.getNestedColumnPtr(), limit, settings, state, cache);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationMap.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationMap.h
new file mode 100644
index 00000000000..f32c656757d
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationMap.h
@@ -0,0 +1,76 @@
+#pragma once
+
+#include <DataTypes/Serializations/SimpleTextSerialization.h>
+
+
+namespace DB
+{
+
+class SerializationMap final : public SimpleTextSerialization
+{
+private:
+ SerializationPtr key;
+ SerializationPtr value;
+
+ /// 'nested' is an Array(Tuple(key_type, value_type))
+ SerializationPtr nested;
+
+public:
+ SerializationMap(const SerializationPtr & key_type_, const SerializationPtr & value_type_, const SerializationPtr & nested_);
+
+ void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override;
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override;
+ void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void enumerateStreams(
+ EnumerateStreamsSettings & settings,
+ const StreamCallback & callback,
+ const SubstreamData & data) const override;
+
+ void serializeBinaryBulkStatePrefix(
+ const IColumn & column,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void serializeBinaryBulkStateSuffix(
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void deserializeBinaryBulkStatePrefix(
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state) const override;
+
+ void serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column,
+ size_t offset,
+ size_t limit,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state,
+ SubstreamsCache * cache) const override;
+
+private:
+ template <typename KeyWriter, typename ValueWriter>
+ void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, KeyWriter && key_writer, ValueWriter && value_writer) const;
+
+ template <typename Reader>
+ void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const;
+};
+
+}
+
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationNamed.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationNamed.cpp
new file mode 100644
index 00000000000..ca60948ce68
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationNamed.cpp
@@ -0,0 +1,78 @@
+#include <DataTypes/Serializations/SerializationNamed.h>
+
+namespace DB
+{
+
+void SerializationNamed::enumerateStreams(
+ EnumerateStreamsSettings & settings,
+ const StreamCallback & callback,
+ const SubstreamData & data) const
+{
+ addToPath(settings.path);
+ settings.path.back().data = data;
+ settings.path.back().creator = std::make_shared<SubcolumnCreator>(name, escape_delimiter);
+
+ nested_serialization->enumerateStreams(settings, callback, data);
+ settings.path.pop_back();
+}
+
+void SerializationNamed::serializeBinaryBulkStatePrefix(
+ const IColumn & column,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ addToPath(settings.path);
+ nested_serialization->serializeBinaryBulkStatePrefix(column, settings, state);
+ settings.path.pop_back();
+}
+
+void SerializationNamed::serializeBinaryBulkStateSuffix(
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ addToPath(settings.path);
+ nested_serialization->serializeBinaryBulkStateSuffix(settings, state);
+ settings.path.pop_back();
+}
+
+void SerializationNamed::deserializeBinaryBulkStatePrefix(
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state) const
+{
+ addToPath(settings.path);
+ nested_serialization->deserializeBinaryBulkStatePrefix(settings, state);
+ settings.path.pop_back();
+}
+
+void SerializationNamed::serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column,
+ size_t offset,
+ size_t limit,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ addToPath(settings.path);
+ nested_serialization->serializeBinaryBulkWithMultipleStreams(column, offset, limit, settings, state);
+ settings.path.pop_back();
+}
+
+void SerializationNamed::deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state,
+ SubstreamsCache * cache) const
+{
+ addToPath(settings.path);
+ nested_serialization->deserializeBinaryBulkWithMultipleStreams(column, limit, settings, state, cache);
+ settings.path.pop_back();
+}
+
+void SerializationNamed::addToPath(SubstreamPath & path) const
+{
+ path.push_back(Substream::TupleElement);
+ path.back().tuple_element_name = name;
+ path.back().escape_tuple_delimiter = escape_delimiter;
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationNamed.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationNamed.h
new file mode 100644
index 00000000000..52bbb039442
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationNamed.h
@@ -0,0 +1,80 @@
+#pragma once
+
+#include <DataTypes/Serializations/SerializationWrapper.h>
+
+namespace DB
+{
+
+/// Serialization wrapper that acts like nested serialization,
+/// but adds a passed name to the substream path like the
+/// read column was the tuple element with this name.
+/// It's used while reading subcolumns of complex types.
+/// In particular while reading components of named tuples.
+class SerializationNamed final : public SerializationWrapper
+{
+private:
+ String name;
+ bool escape_delimiter;
+
+public:
+ SerializationNamed(const SerializationPtr & nested_, const String & name_, bool escape_delimiter_ = true)
+ : SerializationWrapper(nested_)
+ , name(name_), escape_delimiter(escape_delimiter_)
+ {
+ }
+
+ const String & getElementName() const { return name; }
+
+ void enumerateStreams(
+ EnumerateStreamsSettings & settings,
+ const StreamCallback & callback,
+ const SubstreamData & data) const override;
+
+ void serializeBinaryBulkStatePrefix(
+ const IColumn & column,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void serializeBinaryBulkStateSuffix(
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void deserializeBinaryBulkStatePrefix(
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state) const override;
+
+ void serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column,
+ size_t offset,
+ size_t limit,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state,
+ SubstreamsCache * cache) const override;
+
+private:
+ struct SubcolumnCreator : public ISubcolumnCreator
+ {
+ const String name;
+ const bool escape_delimiter;
+
+ SubcolumnCreator(const String & name_, bool escape_delimiter_)
+ : name(name_), escape_delimiter(escape_delimiter_) {}
+
+ DataTypePtr create(const DataTypePtr & prev) const override { return prev; }
+ ColumnPtr create(const ColumnPtr & prev) const override { return prev; }
+ SerializationPtr create(const SerializationPtr & prev) const override
+ {
+ return std::make_shared<SerializationNamed>(prev, name, escape_delimiter);
+ }
+ };
+
+ void addToPath(SubstreamPath & path) const;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationNothing.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationNothing.cpp
new file mode 100644
index 00000000000..6b11ea6d252
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationNothing.cpp
@@ -0,0 +1,25 @@
+#include <DataTypes/Serializations/SerializationNothing.h>
+#include <Columns/ColumnNothing.h>
+#include <IO/ReadBuffer.h>
+#include <IO/WriteBuffer.h>
+
+namespace DB
+{
+
+void SerializationNothing::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
+{
+ size_t size = column.size();
+
+ if (limit == 0 || offset + limit > size)
+ limit = size - offset;
+
+ for (size_t i = 0; i < limit; ++i)
+ ostr.write('0');
+}
+
+void SerializationNothing::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const
+{
+ typeid_cast<ColumnNothing &>(column).addSize(istr.tryIgnore(limit));
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationNothing.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationNothing.h
new file mode 100644
index 00000000000..02974d1ca76
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationNothing.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include <DataTypes/Serializations/SimpleTextSerialization.h>
+#include <Common/Exception.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int NOT_IMPLEMENTED;
+}
+
+class SerializationNothing : public SimpleTextSerialization
+{
+private:
+ [[noreturn]] static void throwNoSerialization()
+ {
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Serialization is not implemented for type Nothing");
+ }
+public:
+ void serializeBinary(const Field &, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
+ void deserializeBinary(Field &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
+ void serializeBinary(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
+ void deserializeBinary(IColumn &, ReadBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
+ void serializeText(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const override { throwNoSerialization(); }
+ void deserializeText(IColumn &, ReadBuffer &, const FormatSettings &, bool) const override { throwNoSerialization(); }
+
+ /// These methods read and write zero bytes just to allow to figure out size of column.
+ void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
+ void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationNullable.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationNullable.cpp
new file mode 100644
index 00000000000..774b86472be
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationNullable.cpp
@@ -0,0 +1,670 @@
+#include <DataTypes/Serializations/SerializationNullable.h>
+#include <DataTypes/Serializations/SerializationNumber.h>
+#include <DataTypes/Serializations/SerializationNamed.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypesNumber.h>
+
+#include <Columns/ColumnNullable.h>
+#include <Core/Field.h>
+#include <IO/ReadBuffer.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteBuffer.h>
+#include <IO/WriteHelpers.h>
+#include <IO/PeekableReadBuffer.h>
+#include <Common/assert_cast.h>
+#include <base/scope_guard.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int CANNOT_READ_ALL_DATA;
+}
+
+DataTypePtr SerializationNullable::SubcolumnCreator::create(const DataTypePtr & prev) const
+{
+ return std::make_shared<DataTypeNullable>(prev);
+}
+
+SerializationPtr SerializationNullable::SubcolumnCreator::create(const SerializationPtr & prev) const
+{
+ return std::make_shared<SerializationNullable>(prev);
+}
+
+ColumnPtr SerializationNullable::SubcolumnCreator::create(const ColumnPtr & prev) const
+{
+ return ColumnNullable::create(prev, null_map);
+}
+
+void SerializationNullable::enumerateStreams(
+ EnumerateStreamsSettings & settings,
+ const StreamCallback & callback,
+ const SubstreamData & data) const
+{
+ const auto * type_nullable = data.type ? &assert_cast<const DataTypeNullable &>(*data.type) : nullptr;
+ const auto * column_nullable = data.column ? &assert_cast<const ColumnNullable &>(*data.column) : nullptr;
+
+ auto null_map_serialization = std::make_shared<SerializationNamed>(std::make_shared<SerializationNumber<UInt8>>(), "null", false);
+
+ settings.path.push_back(Substream::NullMap);
+ auto null_map_data = SubstreamData(null_map_serialization)
+ .withType(type_nullable ? std::make_shared<DataTypeUInt8>() : nullptr)
+ .withColumn(column_nullable ? column_nullable->getNullMapColumnPtr() : nullptr)
+ .withSerializationInfo(data.serialization_info);
+
+ settings.path.back().data = null_map_data;
+ callback(settings.path);
+
+ settings.path.back() = Substream::NullableElements;
+ settings.path.back().creator = std::make_shared<SubcolumnCreator>(null_map_data.column);
+ settings.path.back().data = data;
+
+ auto next_data = SubstreamData(nested)
+ .withType(type_nullable ? type_nullable->getNestedType() : nullptr)
+ .withColumn(column_nullable ? column_nullable->getNestedColumnPtr() : nullptr)
+ .withSerializationInfo(data.serialization_info);
+
+ nested->enumerateStreams(settings, callback, next_data);
+ settings.path.pop_back();
+}
+
+void SerializationNullable::serializeBinaryBulkStatePrefix(
+ const IColumn & column,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ settings.path.push_back(Substream::NullableElements);
+ const auto & column_nullable = assert_cast<const ColumnNullable &>(column);
+ nested->serializeBinaryBulkStatePrefix(column_nullable.getNestedColumn(), settings, state);
+ settings.path.pop_back();
+}
+
+
+void SerializationNullable::serializeBinaryBulkStateSuffix(
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ settings.path.push_back(Substream::NullableElements);
+ nested->serializeBinaryBulkStateSuffix(settings, state);
+ settings.path.pop_back();
+}
+
+
+void SerializationNullable::deserializeBinaryBulkStatePrefix(
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state) const
+{
+ settings.path.push_back(Substream::NullableElements);
+ nested->deserializeBinaryBulkStatePrefix(settings, state);
+ settings.path.pop_back();
+}
+
+
+void SerializationNullable::serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column,
+ size_t offset,
+ size_t limit,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
+ col.checkConsistency();
+
+ /// First serialize null map.
+ settings.path.push_back(Substream::NullMap);
+ if (auto * stream = settings.getter(settings.path))
+ SerializationNumber<UInt8>().serializeBinaryBulk(col.getNullMapColumn(), *stream, offset, limit);
+
+ /// Then serialize contents of arrays.
+ settings.path.back() = Substream::NullableElements;
+ nested->serializeBinaryBulkWithMultipleStreams(col.getNestedColumn(), offset, limit, settings, state);
+ settings.path.pop_back();
+}
+
+
+void SerializationNullable::deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state,
+ SubstreamsCache * cache) const
+{
+ auto mutable_column = column->assumeMutable();
+ ColumnNullable & col = assert_cast<ColumnNullable &>(*mutable_column);
+
+ settings.path.push_back(Substream::NullMap);
+ if (auto cached_column = getFromSubstreamsCache(cache, settings.path))
+ {
+ col.getNullMapColumnPtr() = cached_column;
+ }
+ else if (auto * stream = settings.getter(settings.path))
+ {
+ SerializationNumber<UInt8>().deserializeBinaryBulk(col.getNullMapColumn(), *stream, limit, 0);
+ addToSubstreamsCache(cache, settings.path, col.getNullMapColumnPtr());
+ }
+
+ settings.path.back() = Substream::NullableElements;
+ nested->deserializeBinaryBulkWithMultipleStreams(col.getNestedColumnPtr(), limit, settings, state, cache);
+ settings.path.pop_back();
+}
+
+
+void SerializationNullable::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ if (field.isNull())
+ {
+ writeBinary(true, ostr);
+ }
+ else
+ {
+ writeBinary(false, ostr);
+ nested->serializeBinary(field, ostr, settings);
+ }
+}
+
+void SerializationNullable::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ bool is_null = false;
+ readBinary(is_null, istr);
+ if (!is_null)
+ {
+ nested->deserializeBinary(field, istr, settings);
+ }
+ else
+ {
+ field = Null();
+ }
+}
+
+void SerializationNullable::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
+
+ bool is_null = col.isNullAt(row_num);
+ writeBinary(is_null, ostr);
+ if (!is_null)
+ nested->serializeBinary(col.getNestedColumn(), row_num, ostr, settings);
+}
+
+/// Deserialize value into ColumnNullable.
+/// We need to insert both to nested column and to null byte map, or, in case of exception, to not insert at all.
+template <typename ReturnType = void, typename CheckForNull, typename DeserializeNested, ReturnType * = nullptr>
+requires std::same_as<ReturnType, void>
+static ReturnType
+safeDeserialize(IColumn & column, const ISerialization &, CheckForNull && check_for_null, DeserializeNested && deserialize_nested)
+{
+ ColumnNullable & col = assert_cast<ColumnNullable &>(column);
+
+ if (check_for_null())
+ {
+ col.insertDefault();
+ }
+ else
+ {
+ deserialize_nested(col.getNestedColumn());
+
+ try
+ {
+ col.getNullMapData().push_back(0);
+ }
+ catch (...)
+ {
+ col.getNestedColumn().popBack(1);
+ throw;
+ }
+ }
+}
+
+/// Deserialize value into non-nullable column. In case of NULL, insert default value and return false.
+template <typename ReturnType = void, typename CheckForNull, typename DeserializeNested, ReturnType * = nullptr>
+requires std::same_as<ReturnType, bool>
+static ReturnType
+safeDeserialize(IColumn & column, const ISerialization &, CheckForNull && check_for_null, DeserializeNested && deserialize_nested)
+{
+ bool insert_default = check_for_null();
+ if (insert_default)
+ column.insertDefault();
+ else
+ deserialize_nested(column);
+ return !insert_default;
+}
+
+
+void SerializationNullable::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ safeDeserialize(column, *nested,
+ [&istr] { bool is_null = false; readBinary(is_null, istr); return is_null; },
+ [this, &istr, settings] (IColumn & nested_column) { nested->deserializeBinary(nested_column, istr, settings); });
+}
+
+
+void SerializationNullable::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
+
+ if (col.isNullAt(row_num))
+ writeString(settings.tsv.null_representation, ostr);
+ else
+ nested->serializeTextEscaped(col.getNestedColumn(), row_num, ostr, settings);
+}
+
+
+void SerializationNullable::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ deserializeTextEscapedImpl<void>(column, istr, settings, nested);
+}
+
+void SerializationNullable::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ deserializeTextRawImpl<void>(column, istr, settings, nested);
+}
+
+void SerializationNullable::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
+
+ if (col.isNullAt(row_num))
+ writeString(settings.tsv.null_representation, ostr);
+ else
+ nested->serializeTextRaw(col.getNestedColumn(), row_num, ostr, settings);
+}
+
+template<typename ReturnType>
+ReturnType SerializationNullable::deserializeTextRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested)
+{
+ return deserializeTextEscapedAndRawImpl<ReturnType, false>(column, istr, settings, nested);
+}
+
+template<typename ReturnType>
+ReturnType SerializationNullable::deserializeTextEscapedImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
+ const SerializationPtr & nested)
+{
+ return deserializeTextEscapedAndRawImpl<ReturnType, true>(column, istr, settings, nested);
+}
+
+template<typename ReturnType, bool escaped>
+ReturnType SerializationNullable::deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
+ const SerializationPtr & nested_serialization)
+{
+ const String & null_representation = settings.tsv.null_representation;
+
+ /// Some data types can deserialize absence of data (e.g. empty string), so eof is ok.
+ if (istr.eof() || (!null_representation.empty() && *istr.position() != null_representation[0]))
+ {
+ /// This is not null, surely.
+ return safeDeserialize<ReturnType>(column, *nested_serialization,
+ [] { return false; },
+ [&nested_serialization, &istr, &settings] (IColumn & nested_column)
+ {
+ if constexpr (escaped)
+ nested_serialization->deserializeTextEscaped(nested_column, istr, settings);
+ else
+ nested_serialization->deserializeTextRaw(nested_column, istr, settings);
+ });
+ }
+
+ /// Check if we have enough data in buffer to check if it's a null.
+ if (istr.available() > null_representation.size())
+ {
+ auto check_for_null = [&istr, &null_representation]()
+ {
+ auto * pos = istr.position();
+ if (checkString(null_representation, istr) && (*istr.position() == '\t' || *istr.position() == '\n'))
+ return true;
+ istr.position() = pos;
+ return false;
+ };
+ auto deserialize_nested = [&nested_serialization, &settings, &istr] (IColumn & nested_column)
+ {
+ if constexpr (escaped)
+ nested_serialization->deserializeTextEscaped(nested_column, istr, settings);
+ else
+ nested_serialization->deserializeTextRaw(nested_column, istr, settings);
+ };
+ return safeDeserialize<ReturnType>(column, *nested_serialization, check_for_null, deserialize_nested);
+ }
+
+ /// We don't have enough data in buffer to check if it's a null.
+ /// Use PeekableReadBuffer to make a checkpoint before checking null
+ /// representation and rollback if check was failed.
+ PeekableReadBuffer buf(istr, true);
+ auto check_for_null = [&buf, &null_representation]()
+ {
+ buf.setCheckpoint();
+ SCOPE_EXIT(buf.dropCheckpoint());
+ if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'))
+ return true;
+
+ buf.rollbackToCheckpoint();
+ return false;
+ };
+
+ auto deserialize_nested = [&nested_serialization, &settings, &buf, &null_representation, &istr] (IColumn & nested_column)
+ {
+ auto * pos = buf.position();
+ if constexpr (escaped)
+ nested_serialization->deserializeTextEscaped(nested_column, buf, settings);
+ else
+ nested_serialization->deserializeTextRaw(nested_column, buf, settings);
+ /// Check that we don't have any unread data in PeekableReadBuffer own memory.
+ if (likely(!buf.hasUnreadData()))
+ return;
+
+ /// We have some unread data in PeekableReadBuffer own memory.
+ /// It can happen only if there is a string instead of a number
+ /// or if someone uses tab or LF in TSV null_representation.
+ /// In the first case we cannot continue reading anyway. The second case seems to be unlikely.
+ /// We also should delete incorrectly deserialized value from nested column.
+ nested_column.popBack(1);
+
+ if (null_representation.find('\t') != std::string::npos || null_representation.find('\n') != std::string::npos)
+ throw DB::ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation "
+ "containing '\\t' or '\\n' may not work correctly for large input.");
+
+ WriteBufferFromOwnString parsed_value;
+ if constexpr (escaped)
+ nested_serialization->serializeTextEscaped(nested_column, nested_column.size() - 1, parsed_value, settings);
+ else
+ nested_serialization->serializeTextRaw(nested_column, nested_column.size() - 1, parsed_value, settings);
+ throw DB::ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while parsing \"{}{}\" as Nullable"
+ " at position {}: got \"{}\", which was deserialized as \"{}\". "
+ "It seems that input data is ill-formatted.",
+ std::string(pos, buf.buffer().end()),
+ std::string(istr.position(), std::min(size_t(10), istr.available())),
+ istr.count(), std::string(pos, buf.position() - pos), parsed_value.str());
+ };
+
+ return safeDeserialize<ReturnType>(column, *nested_serialization, check_for_null, deserialize_nested);
+}
+
+void SerializationNullable::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
+
+ if (col.isNullAt(row_num))
+ writeCString("NULL", ostr);
+ else
+ nested->serializeTextQuoted(col.getNestedColumn(), row_num, ostr, settings);
+}
+
+
+void SerializationNullable::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ deserializeTextQuotedImpl<void>(column, istr, settings, nested);
+}
+
+template<typename ReturnType>
+ReturnType SerializationNullable::deserializeTextQuotedImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
+ const SerializationPtr & nested)
+{
+ if (istr.eof() || (*istr.position() != 'N' && *istr.position() != 'n'))
+ {
+ /// This is not null, surely.
+ return safeDeserialize<ReturnType>(column, *nested,
+ [] { return false; },
+ [&nested, &istr, &settings] (IColumn & nested_column) { nested->deserializeTextQuoted(nested_column, istr, settings); });
+ }
+
+ /// Check if we have enough data in buffer to check if it's a null.
+ if (istr.available() >= 4)
+ {
+ auto check_for_null = [&istr]()
+ {
+ auto * pos = istr.position();
+ if (checkStringCaseInsensitive("NULL", istr))
+ return true;
+ istr.position() = pos;
+ return false;
+ };
+ auto deserialize_nested = [&nested, &settings, &istr] (IColumn & nested_column)
+ {
+ nested->deserializeTextQuoted(nested_column, istr, settings);
+ };
+ return safeDeserialize<ReturnType>(column, *nested, check_for_null, deserialize_nested);
+ }
+
+ /// We don't have enough data in buffer to check if it's a NULL
+ /// and we cannot check it just by one symbol (otherwise we won't be able
+ /// to differentiate for example NULL and NaN for float)
+ /// Use PeekableReadBuffer to make a checkpoint before checking
+ /// null and rollback if the check was failed.
+ PeekableReadBuffer buf(istr, true);
+ auto check_for_null = [&buf]()
+ {
+ buf.setCheckpoint();
+ SCOPE_EXIT(buf.dropCheckpoint());
+ if (checkStringCaseInsensitive("NULL", buf))
+ return true;
+
+ buf.rollbackToCheckpoint();
+ return false;
+ };
+
+ auto deserialize_nested = [&nested, &settings, &buf] (IColumn & nested_column)
+ {
+ nested->deserializeTextQuoted(nested_column, buf, settings);
+ /// Check that we don't have any unread data in PeekableReadBuffer own memory.
+ if (likely(!buf.hasUnreadData()))
+ return;
+
+ /// We have some unread data in PeekableReadBuffer own memory.
+ /// It can happen only if there is an unquoted string instead of a number.
+ /// We also should delete incorrectly deserialized value from nested column.
+ nested_column.popBack(1);
+ throw DB::ParsingException(
+ ErrorCodes::CANNOT_READ_ALL_DATA,
+ "Error while parsing Nullable: got an unquoted string {} instead of a number",
+ String(buf.position(), std::min(10ul, buf.available())));
+ };
+
+ return safeDeserialize<ReturnType>(column, *nested, check_for_null, deserialize_nested);
+}
+
+
+void SerializationNullable::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ deserializeWholeTextImpl<void>(column, istr, settings, nested);
+}
+
+template <typename ReturnType>
+ReturnType SerializationNullable::deserializeWholeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
+ const SerializationPtr & nested)
+{
+ PeekableReadBuffer buf(istr, true);
+ auto check_for_null = [&buf]()
+ {
+ buf.setCheckpoint();
+ SCOPE_EXIT(buf.dropCheckpoint());
+
+ if (checkStringCaseInsensitive("NULL", buf) && buf.eof())
+ return true;
+
+ buf.rollbackToCheckpoint();
+ if (checkStringCaseInsensitive("ᴺᵁᴸᴸ", buf) && buf.eof())
+ return true;
+
+ buf.rollbackToCheckpoint();
+ return false;
+ };
+
+ auto deserialize_nested = [&nested, &settings, &buf] (IColumn & nested_column)
+ {
+ nested->deserializeWholeText(nested_column, buf, settings);
+ assert(!buf.hasUnreadData());
+ };
+
+ return safeDeserialize<ReturnType>(column, *nested, check_for_null, deserialize_nested);
+}
+
+
+void SerializationNullable::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
+
+ if (col.isNullAt(row_num))
+ writeString(settings.csv.null_representation, ostr);
+ else
+ nested->serializeTextCSV(col.getNestedColumn(), row_num, ostr, settings);
+}
+
+void SerializationNullable::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ deserializeTextCSVImpl<void>(column, istr, settings, nested);
+}
+
+template<typename ReturnType>
+ReturnType SerializationNullable::deserializeTextCSVImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
+ const SerializationPtr & nested_serialization)
+{
+ const String & null_representation = settings.csv.null_representation;
+ if (istr.eof() || (!null_representation.empty() && *istr.position() != null_representation[0]))
+ {
+ /// This is not null, surely.
+ return safeDeserialize<ReturnType>(column, *nested_serialization,
+ [] { return false; },
+ [&nested_serialization, &istr, &settings] (IColumn & nested_column) { nested_serialization->deserializeTextCSV(nested_column, istr, settings); });
+ }
+
+ /// Check if we have enough data in buffer to check if it's a null.
+ if (settings.csv.custom_delimiter.empty() && istr.available() > null_representation.size())
+ {
+ auto check_for_null = [&istr, &null_representation, &settings]()
+ {
+ auto * pos = istr.position();
+ if (checkString(null_representation, istr) && (*istr.position() == settings.csv.delimiter || *istr.position() == '\r' || *istr.position() == '\n'))
+ return true;
+ istr.position() = pos;
+ return false;
+ };
+ auto deserialize_nested = [&nested_serialization, &settings, &istr] (IColumn & nested_column)
+ {
+ nested_serialization->deserializeTextCSV(nested_column, istr, settings);
+ };
+ return safeDeserialize<ReturnType>(column, *nested_serialization, check_for_null, deserialize_nested);
+ }
+
+ /// We don't have enough data in buffer to check if it's a null.
+ /// Use PeekableReadBuffer to make a checkpoint before checking null
+ /// representation and rollback if the check was failed.
+ PeekableReadBuffer buf(istr, true);
+ auto check_for_null = [&buf, &null_representation, &settings]()
+ {
+ buf.setCheckpoint();
+ SCOPE_EXIT(buf.dropCheckpoint());
+ if (checkString(null_representation, buf))
+ {
+ if (!settings.csv.custom_delimiter.empty())
+ {
+ if (checkString(settings.csv.custom_delimiter, buf))
+ {
+ /// Rollback to the beginning of custom delimiter.
+ buf.rollbackToCheckpoint();
+ assertString(null_representation, buf);
+ return true;
+ }
+ }
+ else if (buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\r' || *buf.position() == '\n')
+ return true;
+ }
+
+ buf.rollbackToCheckpoint();
+ return false;
+ };
+
+ auto deserialize_nested = [&nested_serialization, &settings, &buf, &null_representation, &istr] (IColumn & nested_column)
+ {
+ auto * pos = buf.position();
+ nested_serialization->deserializeTextCSV(nested_column, buf, settings);
+ /// Check that we don't have any unread data in PeekableReadBuffer own memory.
+ if (likely(!buf.hasUnreadData()))
+ return;
+
+ /// We have some unread data in PeekableReadBuffer own memory.
+ /// It can happen only if there is an unquoted string instead of a number
+ /// or if someone uses csv delimiter, LF or CR in CSV null representation.
+ /// In the first case we cannot continue reading anyway. The second case seems to be unlikely.
+ /// We also should delete incorrectly deserialized value from nested column.
+ nested_column.popBack(1);
+
+ if (null_representation.find(settings.csv.delimiter) != std::string::npos || null_representation.find('\r') != std::string::npos
+ || null_representation.find('\n') != std::string::npos)
+ throw DB::ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "CSV custom null representation containing "
+ "format_csv_delimiter, '\\r' or '\\n' may not work correctly for large input.");
+
+ WriteBufferFromOwnString parsed_value;
+ nested_serialization->serializeTextCSV(nested_column, nested_column.size() - 1, parsed_value, settings);
+ throw DB::ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while parsing \"{}{}\" as Nullable"
+ " at position {}: got \"{}\", which was deserialized as \"{}\". "
+ "It seems that input data is ill-formatted.",
+ std::string(pos, buf.buffer().end()),
+ std::string(istr.position(), std::min(size_t(10), istr.available())),
+ istr.count(), std::string(pos, buf.position() - pos), parsed_value.str());
+ };
+
+ return safeDeserialize<ReturnType>(column, *nested_serialization, check_for_null, deserialize_nested);
+}
+
+void SerializationNullable::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
+
+ /// In simple text format (like 'Pretty' format) (these formats are suitable only for output and cannot be parsed back),
+ /// data is printed without escaping.
+ /// It makes theoretically impossible to distinguish between NULL and some string value, regardless on how do we print NULL.
+ /// For this reason, we output NULL in a bit strange way.
+ /// This assumes UTF-8 and proper font support. This is Ok, because Pretty formats are "presentational", not for data exchange.
+
+ if (col.isNullAt(row_num))
+ {
+ if (settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8)
+ writeCString("ᴺᵁᴸᴸ", ostr);
+ else
+ writeCString("NULL", ostr);
+ }
+ else
+ nested->serializeText(col.getNestedColumn(), row_num, ostr, settings);
+}
+
+void SerializationNullable::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
+
+ if (col.isNullAt(row_num))
+ writeCString("null", ostr);
+ else
+ nested->serializeTextJSON(col.getNestedColumn(), row_num, ostr, settings);
+}
+
+void SerializationNullable::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ deserializeTextJSONImpl<void>(column, istr, settings, nested);
+}
+
+template<typename ReturnType>
+ReturnType SerializationNullable::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
+ const SerializationPtr & nested)
+{
+ return safeDeserialize<ReturnType>(column, *nested,
+ [&istr] { return checkStringByFirstCharacterAndAssertTheRest("null", istr); },
+ [&nested, &istr, &settings] (IColumn & nested_column) { nested->deserializeTextJSON(nested_column, istr, settings); });
+}
+
+void SerializationNullable::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const ColumnNullable & col = assert_cast<const ColumnNullable &>(column);
+
+ if (col.isNullAt(row_num))
+ writeCString("\\N", ostr);
+ else
+ nested->serializeTextXML(col.getNestedColumn(), row_num, ostr, settings);
+}
+
+template bool SerializationNullable::deserializeWholeTextImpl<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
+template bool SerializationNullable::deserializeTextEscapedImpl<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
+template bool SerializationNullable::deserializeTextQuotedImpl<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested);
+template bool SerializationNullable::deserializeTextCSVImpl<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
+template bool SerializationNullable::deserializeTextJSONImpl<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested);
+template bool SerializationNullable::deserializeTextRawImpl<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested);
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationNullable.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationNullable.h
new file mode 100644
index 00000000000..3ec01b46de5
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationNullable.h
@@ -0,0 +1,108 @@
+#pragma once
+
+#include <DataTypes/Serializations/ISerialization.h>
+
+namespace DB
+{
+
+class SerializationNullable : public ISerialization
+{
+private:
+ SerializationPtr nested;
+
+public:
+ explicit SerializationNullable(const SerializationPtr & nested_) : nested(nested_) {}
+
+ void enumerateStreams(
+ EnumerateStreamsSettings & settings,
+ const StreamCallback & callback,
+ const SubstreamData & data) const override;
+
+ void serializeBinaryBulkStatePrefix(
+ const IColumn & column,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void serializeBinaryBulkStateSuffix(
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void deserializeBinaryBulkStatePrefix(
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state) const override;
+
+ void serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column,
+ size_t offset,
+ size_t limit,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state,
+ SubstreamsCache * cache) const override;
+
+ void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+
+ /** It is questionable, how NULL values could be represented in CSV. There are three variants:
+ * 1. \N
+ * 2. empty string (without quotes)
+ * 3. NULL
+ * We support all of them (however, second variant is supported by CSVRowInputFormat, not by deserializeTextCSV).
+ * (see also input_format_defaults_for_omitted_fields and input_format_csv_unquoted_null_literal_as_null settings)
+ * In CSV, non-NULL string value, starting with \N characters, must be placed in quotes, to avoid ambiguity.
+ */
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+
+ void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+
+ /// If ReturnType is bool, check for NULL and deserialize value into non-nullable column (and return true) or insert default value of nested type (and return false)
+ /// If ReturnType is void, deserialize Nullable(T)
+ template <typename ReturnType = bool>
+ static ReturnType deserializeWholeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
+ template <typename ReturnType = bool>
+ static ReturnType deserializeTextEscapedImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
+ template <typename ReturnType = bool>
+ static ReturnType deserializeTextQuotedImpl(IColumn & column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested);
+ template <typename ReturnType = bool>
+ static ReturnType deserializeTextCSVImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
+ template <typename ReturnType = bool>
+ static ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested);
+ template <typename ReturnType = bool>
+ static ReturnType deserializeTextRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
+ template <typename ReturnType = bool, bool escaped>
+ static ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested);
+
+private:
+ struct SubcolumnCreator : public ISubcolumnCreator
+ {
+ const ColumnPtr null_map;
+
+ explicit SubcolumnCreator(const ColumnPtr & null_map_) : null_map(null_map_) {}
+
+ DataTypePtr create(const DataTypePtr & prev) const override;
+ SerializationPtr create(const SerializationPtr & prev) const override;
+ ColumnPtr create(const ColumnPtr & prev) const override;
+ };
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationNumber.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationNumber.cpp
new file mode 100644
index 00000000000..94b44d5cc66
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationNumber.cpp
@@ -0,0 +1,182 @@
+#include <DataTypes/Serializations/SerializationNumber.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnConst.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <Common/NaNUtils.h>
+#include <Common/typeid_cast.h>
+#include <Common/assert_cast.h>
+#include <Formats/FormatSettings.h>
+#include <Formats/ProtobufReader.h>
+#include <Core/Field.h>
+
+#include <ranges>
+
+namespace DB
+{
+
+template <typename T>
+void SerializationNumber<T>::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeText(assert_cast<const ColumnVector<T> &>(column).getData()[row_num], ostr);
+}
+
+template <typename T>
+void SerializationNumber<T>::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
+{
+ T x;
+
+ if constexpr (is_integer<T> && is_arithmetic_v<T>)
+ readIntTextUnsafe(x, istr);
+ else
+ readText(x, istr);
+
+ assert_cast<ColumnVector<T> &>(column).getData().push_back(x);
+
+ if (whole && !istr.eof())
+ throwUnexpectedDataAfterParsedValue(column, istr, settings, "Number");
+}
+
+template <typename T>
+void SerializationNumber<T>::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ auto x = assert_cast<const ColumnVector<T> &>(column).getData()[row_num];
+ writeJSONNumber(x, ostr, settings);
+}
+
+template <typename T>
+void SerializationNumber<T>::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ bool has_quote = false;
+ if (!istr.eof() && *istr.position() == '"') /// We understand the number both in quotes and without.
+ {
+ has_quote = true;
+ ++istr.position();
+ }
+
+ FieldType x;
+
+ /// null
+ if (!has_quote && !istr.eof() && *istr.position() == 'n')
+ {
+ ++istr.position();
+ assertString("ull", istr);
+
+ x = NaNOrZero<T>();
+ }
+ else
+ {
+ static constexpr bool is_uint8 = std::is_same_v<T, UInt8>;
+ static constexpr bool is_int8 = std::is_same_v<T, Int8>;
+
+ if (settings.json.read_bools_as_numbers || is_uint8 || is_int8)
+ {
+ // extra conditions to parse true/false strings into 1/0
+ if (istr.eof())
+ throwReadAfterEOF();
+ if (*istr.position() == 't' || *istr.position() == 'f')
+ {
+ bool tmp = false;
+ readBoolTextWord(tmp, istr);
+ x = tmp;
+ }
+ else
+ readText(x, istr);
+ }
+ else
+ {
+ readText(x, istr);
+ }
+
+ if (has_quote)
+ assertChar('"', istr);
+ }
+
+ assert_cast<ColumnVector<T> &>(column).getData().push_back(x);
+}
+
+template <typename T>
+void SerializationNumber<T>::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & /*settings*/) const
+{
+ FieldType x;
+ readCSV(x, istr);
+ assert_cast<ColumnVector<T> &>(column).getData().push_back(x);
+}
+
+template <typename T>
+void SerializationNumber<T>::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const
+{
+ /// ColumnVector<T>::ValueType is a narrower type. For example, UInt8, when the Field type is UInt64
+ typename ColumnVector<T>::ValueType x = static_cast<typename ColumnVector<T>::ValueType>(field.get<FieldType>());
+ writeBinaryLittleEndian(x, ostr);
+}
+
+template <typename T>
+void SerializationNumber<T>::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const
+{
+ typename ColumnVector<T>::ValueType x;
+ readBinaryLittleEndian(x, istr);
+ field = NearestFieldType<FieldType>(x);
+}
+
+template <typename T>
+void SerializationNumber<T>::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeBinaryLittleEndian(assert_cast<const ColumnVector<T> &>(column).getData()[row_num], ostr);
+}
+
+template <typename T>
+void SerializationNumber<T>::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ typename ColumnVector<T>::ValueType x;
+ readBinaryLittleEndian(x, istr);
+ assert_cast<ColumnVector<T> &>(column).getData().push_back(x);
+}
+
+template <typename T>
+void SerializationNumber<T>::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
+{
+ const typename ColumnVector<T>::Container & x = typeid_cast<const ColumnVector<T> &>(column).getData();
+ if (const size_t size = x.size(); limit == 0 || offset + limit > size)
+ limit = size - offset;
+
+ if (limit == 0)
+ return;
+
+ if constexpr (std::endian::native == std::endian::big && sizeof(T) >= 2)
+ for (size_t i = offset; i < offset + limit; ++i)
+ writeBinaryLittleEndian(x[i], ostr);
+ else
+ ostr.write(reinterpret_cast<const char *>(&x[offset]), sizeof(typename ColumnVector<T>::ValueType) * limit);
+}
+
+template <typename T>
+void SerializationNumber<T>::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const
+{
+ typename ColumnVector<T>::Container & x = typeid_cast<ColumnVector<T> &>(column).getData();
+ const size_t initial_size = x.size();
+ x.resize(initial_size + limit);
+ const size_t size = istr.readBig(reinterpret_cast<char*>(&x[initial_size]), sizeof(typename ColumnVector<T>::ValueType) * limit);
+ x.resize(initial_size + size / sizeof(typename ColumnVector<T>::ValueType));
+
+ if constexpr (std::endian::native == std::endian::big && sizeof(T) >= 2)
+ for (size_t i = initial_size; i < x.size(); ++i)
+ transformEndianness<std::endian::big, std::endian::little>(x[i]);
+}
+
+template class SerializationNumber<UInt8>;
+template class SerializationNumber<UInt16>;
+template class SerializationNumber<UInt32>;
+template class SerializationNumber<UInt64>;
+template class SerializationNumber<UInt128>;
+template class SerializationNumber<UInt256>;
+template class SerializationNumber<Int8>;
+template class SerializationNumber<Int16>;
+template class SerializationNumber<Int32>;
+template class SerializationNumber<Int64>;
+template class SerializationNumber<Int128>;
+template class SerializationNumber<Int256>;
+template class SerializationNumber<Float32>;
+template class SerializationNumber<Float64>;
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationNumber.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationNumber.h
new file mode 100644
index 00000000000..972c6c9a30f
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationNumber.h
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <Core/Types.h>
+#include <DataTypes/Serializations/SimpleTextSerialization.h>
+
+namespace DB
+{
+
+template <typename T>
+class ColumnVector;
+
+template <typename T>
+class SerializationNumber : public SimpleTextSerialization
+{
+ static_assert(is_arithmetic_v<T>);
+
+public:
+ using FieldType = T;
+ using ColumnType = ColumnVector<T>;
+
+ void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override;
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+ /** Format is platform-dependent. */
+ void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
+ void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationObject.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationObject.cpp
new file mode 100644
index 00000000000..df9489213c8
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationObject.cpp
@@ -0,0 +1,557 @@
+#include <DataTypes/Serializations/SerializationObject.h>
+#include <DataTypes/Serializations/JSONDataParser.h>
+#include <DataTypes/Serializations/SerializationString.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/ObjectUtils.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/NestedUtils.h>
+#include <Common/JSONParsers/SimdJSONParser.h>
+#include <Common/JSONParsers/RapidJSONParser.h>
+#include <Common/HashTable/HashSet.h>
+#include <Columns/ColumnObject.h>
+#include <Columns/ColumnString.h>
+#include <Functions/FunctionsConversion.h>
+
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <IO/VarInt.h>
+#include <magic_enum.hpp>
+#include <memory>
+#include <string>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int NOT_IMPLEMENTED;
+ extern const int INCORRECT_DATA;
+ extern const int CANNOT_READ_ALL_DATA;
+ extern const int ARGUMENT_OUT_OF_BOUND;
+ extern const int LOGICAL_ERROR;
+}
+
+template <typename Parser>
+template <typename Reader>
+void SerializationObject<Parser>::deserializeTextImpl(IColumn & column, Reader && reader) const
+{
+ auto & column_object = assert_cast<ColumnObject &>(column);
+
+ String buf;
+ reader(buf);
+ std::optional<ParseResult> result;
+
+ /// Treat empty string as an empty object
+ /// for better CAST from String to Object.
+ if (!buf.empty())
+ {
+ auto parser = parsers_pool.get([] { return new Parser; });
+ result = parser->parse(buf.data(), buf.size());
+ }
+ else
+ {
+ result = ParseResult{};
+ }
+
+ if (!result)
+ throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse object");
+
+ auto & [paths, values] = *result;
+ assert(paths.size() == values.size());
+
+ size_t old_column_size = column_object.size();
+ for (size_t i = 0; i < paths.size(); ++i)
+ {
+ auto field_info = getFieldInfo(values[i]);
+ if (field_info.need_fold_dimension)
+ values[i] = applyVisitor(FieldVisitorFoldDimension(field_info.num_dimensions), std::move(values[i]));
+ if (isNothing(field_info.scalar_type))
+ continue;
+
+ if (!column_object.hasSubcolumn(paths[i]))
+ {
+ if (paths[i].hasNested())
+ column_object.addNestedSubcolumn(paths[i], field_info, old_column_size);
+ else
+ column_object.addSubcolumn(paths[i], old_column_size);
+ }
+
+ auto & subcolumn = column_object.getSubcolumn(paths[i]);
+ assert(subcolumn.size() == old_column_size);
+
+ subcolumn.insert(std::move(values[i]), std::move(field_info));
+ }
+
+ /// Insert default values to missed subcolumns.
+ const auto & subcolumns = column_object.getSubcolumns();
+ for (const auto & entry : subcolumns)
+ {
+ if (entry->data.size() == old_column_size)
+ {
+ bool inserted = column_object.tryInsertDefaultFromNested(entry);
+ if (!inserted)
+ entry->data.insertDefault();
+ }
+ }
+
+ column_object.incrementNumRows();
+}
+
+template <typename Parser>
+void SerializationObject<Parser>::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ deserializeTextImpl(column, [&](String & s) { readStringInto(s, istr); });
+}
+
+template <typename Parser>
+void SerializationObject<Parser>::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ deserializeTextImpl(column, [&](String & s) { readEscapedString(s, istr); });
+}
+
+template <typename Parser>
+void SerializationObject<Parser>::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ deserializeTextImpl(column, [&](String & s) { readQuotedStringInto<true>(s, istr); });
+}
+
+template <typename Parser>
+void SerializationObject<Parser>::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ deserializeTextImpl(column, [&](String & s) { Parser::readJSON(s, istr); });
+}
+
+template <typename Parser>
+void SerializationObject<Parser>::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ deserializeTextImpl(column, [&](String & s) { readCSVStringInto(s, istr, settings.csv); });
+}
+
+template <typename Parser>
+template <typename TSettings>
+void SerializationObject<Parser>::checkSerializationIsSupported(const TSettings & settings) const
+{
+ if (settings.position_independent_encoding)
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+ "DataTypeObject doesn't support serialization with position independent encoding");
+}
+
+template <typename Parser>
+struct SerializationObject<Parser>::SerializeStateObject : public ISerialization::SerializeBinaryBulkState
+{
+ DataTypePtr nested_type;
+ SerializationPtr nested_serialization;
+ SerializeBinaryBulkStatePtr nested_state;
+};
+
+template <typename Parser>
+struct SerializationObject<Parser>::DeserializeStateObject : public ISerialization::DeserializeBinaryBulkState
+{
+ BinarySerializationKind kind;
+ DataTypePtr nested_type;
+ SerializationPtr nested_serialization;
+ DeserializeBinaryBulkStatePtr nested_state;
+};
+
+template <typename Parser>
+void SerializationObject<Parser>::serializeBinaryBulkStatePrefix(
+ const IColumn & column,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ checkSerializationIsSupported(settings);
+ if (state)
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+ "DataTypeObject doesn't support serialization with non-trivial state");
+
+ const auto & column_object = assert_cast<const ColumnObject &>(column);
+ if (!column_object.isFinalized())
+ {
+ auto finalized = column_object.cloneFinalized();
+ serializeBinaryBulkStatePrefix(*finalized, settings, state);
+ return;
+ }
+
+ settings.path.push_back(Substream::ObjectStructure);
+ auto * stream = settings.getter(settings.path);
+
+ if (!stream)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing stream for kind of binary serialization");
+
+ auto [tuple_column, tuple_type] = unflattenObjectToTuple(column_object);
+
+ writeIntBinary(static_cast<UInt8>(BinarySerializationKind::TUPLE), *stream);
+ writeStringBinary(tuple_type->getName(), *stream);
+
+ auto state_object = std::make_shared<SerializeStateObject>();
+ state_object->nested_type = tuple_type;
+ state_object->nested_serialization = tuple_type->getDefaultSerialization();
+
+ settings.path.back() = Substream::ObjectData;
+ state_object->nested_serialization->serializeBinaryBulkStatePrefix(*tuple_column, settings, state_object->nested_state);
+
+ state = std::move(state_object);
+ settings.path.pop_back();
+}
+
+template <typename Parser>
+void SerializationObject<Parser>::serializeBinaryBulkStateSuffix(
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ checkSerializationIsSupported(settings);
+ auto * state_object = checkAndGetState<SerializeStateObject>(state);
+
+ settings.path.push_back(Substream::ObjectData);
+ state_object->nested_serialization->serializeBinaryBulkStateSuffix(settings, state_object->nested_state);
+ settings.path.pop_back();
+}
+
+template <typename Parser>
+void SerializationObject<Parser>::deserializeBinaryBulkStatePrefix(
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state) const
+{
+ checkSerializationIsSupported(settings);
+ if (state)
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+ "DataTypeObject doesn't support serialization with non-trivial state");
+
+ settings.path.push_back(Substream::ObjectStructure);
+ auto * stream = settings.getter(settings.path);
+ settings.path.pop_back();
+
+ if (!stream)
+ throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA,
+ "Cannot read kind of binary serialization of DataTypeObject, because its stream is missing");
+
+ UInt8 kind_raw;
+ readIntBinary(kind_raw, *stream);
+ auto kind = magic_enum::enum_cast<BinarySerializationKind>(kind_raw);
+ if (!kind)
+ throw Exception(ErrorCodes::INCORRECT_DATA,
+ "Unknown binary serialization kind of Object: {}", std::to_string(kind_raw));
+
+ auto state_object = std::make_shared<DeserializeStateObject>();
+ state_object->kind = *kind;
+
+ if (state_object->kind == BinarySerializationKind::TUPLE)
+ {
+ String data_type_name;
+ readStringBinary(data_type_name, *stream);
+ state_object->nested_type = DataTypeFactory::instance().get(data_type_name);
+ state_object->nested_serialization = state_object->nested_type->getDefaultSerialization();
+
+ if (!isTuple(state_object->nested_type))
+ throw Exception(ErrorCodes::INCORRECT_DATA,
+ "Data of type Object should be written as Tuple, got: {}", data_type_name);
+ }
+ else if (state_object->kind == BinarySerializationKind::STRING)
+ {
+ state_object->nested_type = std::make_shared<DataTypeString>();
+ state_object->nested_serialization = std::make_shared<SerializationString>();
+ }
+ else
+ {
+ throw Exception(ErrorCodes::INCORRECT_DATA,
+ "Unknown binary serialization kind of Object: {}", std::to_string(kind_raw));
+ }
+
+ settings.path.push_back(Substream::ObjectData);
+ state_object->nested_serialization->deserializeBinaryBulkStatePrefix(settings, state_object->nested_state);
+ settings.path.pop_back();
+
+ state = std::move(state_object);
+}
+
+template <typename Parser>
+void SerializationObject<Parser>::serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column,
+ size_t offset,
+ size_t limit,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ checkSerializationIsSupported(settings);
+ const auto & column_object = assert_cast<const ColumnObject &>(column);
+ auto * state_object = checkAndGetState<SerializeStateObject>(state);
+
+ if (!column_object.isFinalized())
+ {
+ auto finalized = column_object.cloneFinalized();
+ serializeBinaryBulkWithMultipleStreams(*finalized, offset, limit, settings, state);
+ return;
+ }
+
+ auto [tuple_column, tuple_type] = unflattenObjectToTuple(column_object);
+
+ if (!state_object->nested_type->equals(*tuple_type))
+ {
+ throw Exception(ErrorCodes::LOGICAL_ERROR,
+ "Types of internal column of Object mismatched. Expected: {}, Got: {}",
+ state_object->nested_type->getName(), tuple_type->getName());
+ }
+
+ settings.path.push_back(Substream::ObjectData);
+ if (auto * stream = settings.getter(settings.path))
+ {
+ state_object->nested_serialization->serializeBinaryBulkWithMultipleStreams(
+ *tuple_column, offset, limit, settings, state_object->nested_state);
+ }
+
+ settings.path.pop_back();
+}
+
+template <typename Parser>
+void SerializationObject<Parser>::deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state,
+ SubstreamsCache * cache) const
+{
+ checkSerializationIsSupported(settings);
+ if (!column->empty())
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+ "DataTypeObject cannot be deserialized to non-empty column");
+
+ auto mutable_column = column->assumeMutable();
+ auto & column_object = assert_cast<ColumnObject &>(*mutable_column);
+ auto * state_object = checkAndGetState<DeserializeStateObject>(state);
+
+ settings.path.push_back(Substream::ObjectData);
+ if (state_object->kind == BinarySerializationKind::STRING)
+ deserializeBinaryBulkFromString(column_object, limit, settings, *state_object, cache);
+ else
+ deserializeBinaryBulkFromTuple(column_object, limit, settings, *state_object, cache);
+
+ settings.path.pop_back();
+ column_object.checkConsistency();
+ column_object.finalize();
+ column = std::move(mutable_column);
+}
+
+template <typename Parser>
+void SerializationObject<Parser>::deserializeBinaryBulkFromString(
+ ColumnObject & column_object,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeStateObject & state,
+ SubstreamsCache * cache) const
+{
+ ColumnPtr column_string = state.nested_type->createColumn();
+ state.nested_serialization->deserializeBinaryBulkWithMultipleStreams(
+ column_string, limit, settings, state.nested_state, cache);
+
+ ConvertImplGenericFromString<ColumnString>::executeImpl(*column_string, column_object, *this, column_string->size());
+}
+
+template <typename Parser>
+void SerializationObject<Parser>::deserializeBinaryBulkFromTuple(
+ ColumnObject & column_object,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeStateObject & state,
+ SubstreamsCache * cache) const
+{
+ ColumnPtr column_tuple = state.nested_type->createColumn();
+ state.nested_serialization->deserializeBinaryBulkWithMultipleStreams(
+ column_tuple, limit, settings, state.nested_state, cache);
+
+ auto [tuple_paths, tuple_types] = flattenTuple(state.nested_type);
+ auto flattened_tuple = flattenTuple(column_tuple);
+ const auto & tuple_columns = assert_cast<const ColumnTuple &>(*flattened_tuple).getColumns();
+
+ assert(tuple_paths.size() == tuple_types.size());
+ size_t num_subcolumns = tuple_paths.size();
+
+ if (tuple_columns.size() != num_subcolumns)
+ throw Exception(ErrorCodes::INCORRECT_DATA,
+ "Inconsistent type ({}) and column ({}) while reading column of type Object",
+ state.nested_type->getName(), column_tuple->getName());
+
+ for (size_t i = 0; i < num_subcolumns; ++i)
+ column_object.addSubcolumn(tuple_paths[i], tuple_columns[i]->assumeMutable());
+}
+
+template <typename Parser>
+void SerializationObject<Parser>::serializeBinary(const Field &, WriteBuffer &, const FormatSettings &) const
+{
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for SerializationObject");
+}
+
+template <typename Parser>
+void SerializationObject<Parser>::deserializeBinary(Field &, ReadBuffer &, const FormatSettings &) const
+{
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for SerializationObject");
+}
+
+template <typename Parser>
+void SerializationObject<Parser>::serializeBinary(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const
+{
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for SerializationObject");
+}
+
+template <typename Parser>
+void SerializationObject<Parser>::deserializeBinary(IColumn &, ReadBuffer &, const FormatSettings &) const
+{
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for SerializationObject");
+}
+
+/// TODO: use format different of JSON in serializations.
+
+template <typename Parser>
+void SerializationObject<Parser>::serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const auto & column_object = assert_cast<const ColumnObject &>(column);
+ const auto & subcolumns = column_object.getSubcolumns();
+
+ writeChar('{', ostr);
+ for (auto it = subcolumns.begin(); it != subcolumns.end(); ++it)
+ {
+ const auto & entry = *it;
+ if (it != subcolumns.begin())
+ writeCString(",", ostr);
+
+ writeDoubleQuoted(entry->path.getPath(), ostr);
+ writeChar(':', ostr);
+ serializeTextFromSubcolumn(entry->data, row_num, ostr, settings);
+ }
+ writeChar('}', ostr);
+}
+
+template <typename Parser>
+template <bool pretty_json>
+void SerializationObject<Parser>::serializeTextFromSubcolumn(
+ const ColumnObject::Subcolumn & subcolumn, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const
+{
+ const auto & least_common_type = subcolumn.getLeastCommonType();
+
+ if (subcolumn.isFinalized())
+ {
+ const auto & finalized_column = subcolumn.getFinalizedColumn();
+ auto info = least_common_type->getSerializationInfo(finalized_column);
+ auto serialization = least_common_type->getSerialization(*info);
+ if constexpr (pretty_json)
+ serialization->serializeTextJSONPretty(finalized_column, row_num, ostr, settings, indent);
+ else
+ serialization->serializeTextJSON(finalized_column, row_num, ostr, settings);
+ return;
+ }
+
+ size_t ind = row_num;
+ if (ind < subcolumn.getNumberOfDefaultsInPrefix())
+ {
+ /// Suboptimal, but it should happen rarely.
+ auto tmp_column = subcolumn.getLeastCommonType()->createColumn();
+ tmp_column->insertDefault();
+
+ auto info = least_common_type->getSerializationInfo(*tmp_column);
+ auto serialization = least_common_type->getSerialization(*info);
+ if constexpr (pretty_json)
+ serialization->serializeTextJSONPretty(*tmp_column, 0, ostr, settings, indent);
+ else
+ serialization->serializeTextJSON(*tmp_column, 0, ostr, settings);
+ return;
+ }
+
+ ind -= subcolumn.getNumberOfDefaultsInPrefix();
+ for (const auto & part : subcolumn.getData())
+ {
+ if (ind < part->size())
+ {
+ auto part_type = getDataTypeByColumn(*part);
+ auto info = part_type->getSerializationInfo(*part);
+ auto serialization = part_type->getSerialization(*info);
+ if constexpr (pretty_json)
+ serialization->serializeTextJSONPretty(*part, ind, ostr, settings, indent);
+ else
+ serialization->serializeTextJSON(*part, ind, ostr, settings);
+ return;
+ }
+
+ ind -= part->size();
+ }
+
+ throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Index ({}) for text serialization is out of range", row_num);
+}
+
+template <typename Parser>
+void SerializationObject<Parser>::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeTextImpl(column, row_num, ostr, settings);
+}
+
+template <typename Parser>
+void SerializationObject<Parser>::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ WriteBufferFromOwnString ostr_str;
+ serializeTextImpl(column, row_num, ostr_str, settings);
+ writeEscapedString(ostr_str.str(), ostr);
+}
+
+template <typename Parser>
+void SerializationObject<Parser>::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ WriteBufferFromOwnString ostr_str;
+ serializeTextImpl(column, row_num, ostr_str, settings);
+ writeQuotedString(ostr_str.str(), ostr);
+}
+
+template <typename Parser>
+void SerializationObject<Parser>::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeTextImpl(column, row_num, ostr, settings);
+}
+
+template <typename Parser>
+void SerializationObject<Parser>::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ WriteBufferFromOwnString ostr_str;
+ serializeTextImpl(column, row_num, ostr_str, settings);
+ writeCSVString(ostr_str.str(), ostr);
+}
+
+template <typename Parser>
+void SerializationObject<Parser>::serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const
+{
+ const auto & column_object = assert_cast<const ColumnObject &>(column);
+ const auto & subcolumns = column_object.getSubcolumns();
+
+ writeCString("{\n", ostr);
+ for (auto it = subcolumns.begin(); it != subcolumns.end(); ++it)
+ {
+ const auto & entry = *it;
+ if (it != subcolumns.begin())
+ writeCString(",\n", ostr);
+
+ writeChar(' ', (indent + 1) * 4, ostr);
+ writeDoubleQuoted(entry->path.getPath(), ostr);
+ writeCString(": ", ostr);
+ serializeTextFromSubcolumn<true>(entry->data, row_num, ostr, settings, indent + 1);
+ }
+ writeChar('\n', ostr);
+ writeChar(' ', indent * 4, ostr);
+ writeChar('}', ostr);
+}
+
+
+SerializationPtr getObjectSerialization(const String & schema_format)
+{
+ if (schema_format == "json")
+ {
+#if USE_SIMDJSON
+ return std::make_shared<SerializationObject<JSONDataParser<SimdJSONParser>>>();
+#elif USE_RAPIDJSON
+ return std::make_shared<SerializationObject<JSONDataParser<RapidJSONParser>>>();
+#else
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+ "To use data type Object with JSON format ClickHouse should be built with Simdjson or Rapidjson");
+#endif
+ }
+
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown schema format '{}'", schema_format);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationObject.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationObject.h
new file mode 100644
index 00000000000..de54f5739f5
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationObject.h
@@ -0,0 +1,119 @@
+#pragma once
+
+#include <Columns/ColumnObject.h>
+#include <DataTypes/Serializations/SimpleTextSerialization.h>
+#include <Common/ObjectPool.h>
+
+namespace DB
+{
+
+/** Serialization for data type Object.
+ * Supported only text serialization/deserialization.
+ * and binary bulk serialization/deserialization without position independent
+ * encoding, i.e. serialization/deserialization into Native format.
+ */
+template <typename Parser>
+class SerializationObject : public ISerialization
+{
+public:
+ /** In Native format ColumnObject can be serialized
+ * in two formats: as Tuple or as String.
+ * The format is the following:
+ *
+ * <serialization_kind> 1 byte -- 0 if Tuple, 1 if String.
+ * [type_name] -- Only for tuple serialization.
+ * ... data of internal column ...
+ *
+ * ClickHouse client serializazes objects as tuples.
+ * String serialization exists for clients, which cannot
+ * do parsing by themselves and they can send raw data as
+ * string. It will be parsed on the server side.
+ */
+
+ void serializeBinaryBulkStatePrefix(
+ const IColumn & column,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void serializeBinaryBulkStateSuffix(
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void deserializeBinaryBulkStatePrefix(
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state) const override;
+
+ void serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column,
+ size_t offset,
+ size_t limit,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state,
+ SubstreamsCache * cache) const override;
+
+ void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override;
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+
+ void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+private:
+ enum class BinarySerializationKind : UInt8
+ {
+ TUPLE = 0,
+ STRING = 1,
+ };
+
+ struct SerializeStateObject;
+ struct DeserializeStateObject;
+
+ void deserializeBinaryBulkFromString(
+ ColumnObject & column_object,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeStateObject & state,
+ SubstreamsCache * cache) const;
+
+ void deserializeBinaryBulkFromTuple(
+ ColumnObject & column_object,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeStateObject & state,
+ SubstreamsCache * cache) const;
+
+ template <typename TSettings>
+ void checkSerializationIsSupported(const TSettings & settings) const;
+
+ template <typename Reader>
+ void deserializeTextImpl(IColumn & column, Reader && reader) const;
+
+ void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const;
+
+ template <bool pretty_json = false>
+ void serializeTextFromSubcolumn(const ColumnObject::Subcolumn & subcolumn, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent = 0) const;
+
+ /// Pool of parser objects to make SerializationObject thread safe.
+ mutable SimpleObjectPool<Parser> parsers_pool;
+};
+
+SerializationPtr getObjectSerialization(const String & schema_format);
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationSparse.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationSparse.cpp
new file mode 100644
index 00000000000..4d7514271ad
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationSparse.cpp
@@ -0,0 +1,387 @@
+#include <DataTypes/Serializations/SerializationSparse.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Columns/IColumn.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnSparse.h>
+#include <Common/assert_cast.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <IO/VarInt.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int NOT_IMPLEMENTED;
+ extern const int LOGICAL_ERROR;
+}
+
+namespace
+{
+
+/// 2^62, because VarInt supports only values < 2^63.
+constexpr auto END_OF_GRANULE_FLAG = 1ULL << 62;
+
+struct DeserializeStateSparse : public ISerialization::DeserializeBinaryBulkState
+{
+ /// Number of default values, that remain from previous read.
+ size_t num_trailing_defaults = 0;
+ /// Do we have non-default value after @num_trailing_defaults?
+ bool has_value_after_defaults = false;
+ ISerialization::DeserializeBinaryBulkStatePtr nested;
+
+ void reset()
+ {
+ num_trailing_defaults = 0;
+ has_value_after_defaults = false;
+ }
+};
+
+void serializeOffsets(const IColumn::Offsets & offsets, WriteBuffer & ostr, size_t start, size_t end)
+{
+ size_t size = offsets.size();
+ for (size_t i = 0; i < size; ++i)
+ {
+ size_t group_size = offsets[i] - start;
+ writeVarUInt(group_size, ostr);
+ start += group_size + 1;
+ }
+
+ size_t group_size = start < end ? end - start : 0;
+ group_size |= END_OF_GRANULE_FLAG;
+ writeVarUInt(group_size, ostr);
+}
+
+
+/// Returns number of read rows.
+/// @start is the size of column before reading offsets.
+size_t deserializeOffsets(IColumn::Offsets & offsets,
+ ReadBuffer & istr, size_t start, size_t limit, DeserializeStateSparse & state)
+{
+ if (limit && state.num_trailing_defaults >= limit)
+ {
+ state.num_trailing_defaults -= limit;
+ return limit;
+ }
+
+ /// Just try to guess number of offsets.
+ offsets.reserve(offsets.size()
+ + static_cast<size_t>(limit * (1.0 - ColumnSparse::DEFAULT_RATIO_FOR_SPARSE_SERIALIZATION)));
+
+ bool first = true;
+ size_t total_rows = state.num_trailing_defaults;
+ if (state.has_value_after_defaults)
+ {
+ offsets.push_back(start + state.num_trailing_defaults);
+ first = false;
+
+ state.has_value_after_defaults = false;
+ state.num_trailing_defaults = 0;
+ ++total_rows;
+ }
+
+ size_t group_size;
+ while (!istr.eof())
+ {
+ readVarUInt(group_size, istr);
+
+ bool end_of_granule = group_size & END_OF_GRANULE_FLAG;
+ group_size &= ~END_OF_GRANULE_FLAG;
+
+ size_t next_total_rows = total_rows + group_size;
+ group_size += state.num_trailing_defaults;
+
+ if (limit && next_total_rows >= limit)
+ {
+ /// If it was not last group in granule,
+ /// we have to add current non-default value at further reads.
+ state.num_trailing_defaults = next_total_rows - limit;
+ state.has_value_after_defaults = !end_of_granule;
+ return limit;
+ }
+
+ if (end_of_granule)
+ {
+ state.has_value_after_defaults = false;
+ state.num_trailing_defaults = group_size;
+ }
+ else
+ {
+ /// If we add value to column for first time in current read,
+ /// start from column's current size, because it can have some defaults after last offset,
+ /// otherwise just start from previous offset.
+ size_t start_of_group = start;
+ if (!first && !offsets.empty())
+ start_of_group = offsets.back() + 1;
+ if (first)
+ first = false;
+
+ offsets.push_back(start_of_group + group_size);
+
+ state.num_trailing_defaults = 0;
+ state.has_value_after_defaults = false;
+ ++next_total_rows;
+ }
+
+ total_rows = next_total_rows;
+ }
+
+ return total_rows;
+}
+
+}
+
+SerializationSparse::SerializationSparse(const SerializationPtr & nested_)
+ : nested(nested_)
+{
+}
+
+SerializationPtr SerializationSparse::SubcolumnCreator::create(const SerializationPtr & prev) const
+{
+ return std::make_shared<SerializationSparse>(prev);
+}
+
+ColumnPtr SerializationSparse::SubcolumnCreator::create(const ColumnPtr & prev) const
+{
+ return ColumnSparse::create(prev, offsets, size);
+}
+
+void SerializationSparse::enumerateStreams(
+ EnumerateStreamsSettings & settings,
+ const StreamCallback & callback,
+ const SubstreamData & data) const
+{
+ const auto * column_sparse = data.column ? &assert_cast<const ColumnSparse &>(*data.column) : nullptr;
+ size_t column_size = column_sparse ? column_sparse->size() : 0;
+
+ settings.path.push_back(Substream::SparseOffsets);
+ auto offsets_data = SubstreamData(std::make_shared<SerializationNumber<UInt64>>())
+ .withType(data.type ? std::make_shared<DataTypeUInt64>() : nullptr)
+ .withColumn(column_sparse ? column_sparse->getOffsetsPtr() : nullptr)
+ .withSerializationInfo(data.serialization_info);
+
+ settings.path.back().data = offsets_data;
+ callback(settings.path);
+
+ settings.path.back() = Substream::SparseElements;
+ settings.path.back().creator = std::make_shared<SubcolumnCreator>(offsets_data.column, column_size);
+ settings.path.back().data = data;
+
+ auto next_data = SubstreamData(nested)
+ .withType(data.type)
+ .withColumn(column_sparse ? column_sparse->getValuesPtr() : nullptr)
+ .withSerializationInfo(data.serialization_info);
+
+ nested->enumerateStreams(settings, callback, next_data);
+ settings.path.pop_back();
+}
+
+void SerializationSparse::serializeBinaryBulkStatePrefix(
+ const IColumn & column,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ settings.path.push_back(Substream::SparseElements);
+ if (const auto * column_sparse = typeid_cast<const ColumnSparse *>(&column))
+ nested->serializeBinaryBulkStatePrefix(column_sparse->getValuesColumn(), settings, state);
+ else
+ nested->serializeBinaryBulkStatePrefix(column, settings, state);
+
+ settings.path.pop_back();
+}
+
+void SerializationSparse::serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column,
+ size_t offset,
+ size_t limit,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ size_t size = column.size();
+
+ auto offsets_column = DataTypeNumber<IColumn::Offset>().createColumn();
+ auto & offsets_data = assert_cast<ColumnVector<IColumn::Offset> &>(*offsets_column).getData();
+ column.getIndicesOfNonDefaultRows(offsets_data, offset, limit);
+
+ settings.path.push_back(Substream::SparseOffsets);
+ if (auto * stream = settings.getter(settings.path))
+ {
+ size_t end = limit && offset + limit < size ? offset + limit : size;
+ serializeOffsets(offsets_data, *stream, offset, end);
+ }
+
+ if (!offsets_data.empty())
+ {
+ settings.path.back() = Substream::SparseElements;
+ if (const auto * column_sparse = typeid_cast<const ColumnSparse *>(&column))
+ {
+ const auto & values = column_sparse->getValuesColumn();
+ size_t begin = column_sparse->getValueIndex(offsets_data[0]);
+ size_t end = column_sparse->getValueIndex(offsets_data.back());
+ nested->serializeBinaryBulkWithMultipleStreams(values, begin, end - begin + 1, settings, state);
+ }
+ else
+ {
+ auto values = column.index(*offsets_column, 0);
+ nested->serializeBinaryBulkWithMultipleStreams(*values, 0, values->size(), settings, state);
+ }
+ }
+
+ settings.path.pop_back();
+}
+
+void SerializationSparse::serializeBinaryBulkStateSuffix(
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ settings.path.push_back(Substream::SparseElements);
+ nested->serializeBinaryBulkStateSuffix(settings, state);
+ settings.path.pop_back();
+}
+
+void SerializationSparse::deserializeBinaryBulkStatePrefix(
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state) const
+{
+ auto state_sparse = std::make_shared<DeserializeStateSparse>();
+
+ settings.path.push_back(Substream::SparseElements);
+ nested->deserializeBinaryBulkStatePrefix(settings, state_sparse->nested);
+ settings.path.pop_back();
+
+ state = std::move(state_sparse);
+}
+
+void SerializationSparse::deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state,
+ SubstreamsCache * cache) const
+{
+ auto * state_sparse = checkAndGetState<DeserializeStateSparse>(state);
+
+ if (auto cached_column = getFromSubstreamsCache(cache, settings.path))
+ {
+ column = cached_column;
+ return;
+ }
+
+ if (!settings.continuous_reading)
+ state_sparse->reset();
+
+ auto mutable_column = column->assumeMutable();
+ auto & column_sparse = assert_cast<ColumnSparse &>(*mutable_column);
+ auto & offsets_data = column_sparse.getOffsetsData();
+
+ size_t old_size = offsets_data.size();
+
+ size_t read_rows = 0;
+ settings.path.push_back(Substream::SparseOffsets);
+ if (auto * stream = settings.getter(settings.path))
+ read_rows = deserializeOffsets(offsets_data, *stream, column_sparse.size(), limit, *state_sparse);
+
+ auto & values_column = column_sparse.getValuesPtr();
+ size_t values_limit = offsets_data.size() - old_size;
+
+ settings.path.back() = Substream::SparseElements;
+ /// Do not use substream cache while reading values column, because ColumnSparse can be cached only in a whole.
+ nested->deserializeBinaryBulkWithMultipleStreams(values_column, values_limit, settings, state_sparse->nested, nullptr);
+ settings.path.pop_back();
+
+ if (offsets_data.size() + 1 != values_column->size())
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Inconsistent sizes of values and offsets in SerializationSparse."
+ " Offsets size: {}, values size: {}", offsets_data.size(), values_column->size());
+
+ /// 'insertManyDefaults' just increases size of column.
+ column_sparse.insertManyDefaults(read_rows);
+ column = std::move(mutable_column);
+ addToSubstreamsCache(cache, settings.path, column);
+}
+
+/// All methods below just wrap nested serialization.
+
+void SerializationSparse::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ nested->serializeBinary(field, ostr, settings);
+}
+
+void SerializationSparse::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ nested->deserializeBinary(field, istr, settings);
+}
+
+void SerializationSparse::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const auto & column_sparse = assert_cast<const ColumnSparse &>(column);
+ nested->serializeBinary(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr, settings);
+}
+
+void SerializationSparse::deserializeBinary(IColumn &, ReadBuffer &, const FormatSettings &) const
+{
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'deserializeBinary' is not implemented for SerializationSparse");
+}
+
+void SerializationSparse::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const auto & column_sparse = assert_cast<const ColumnSparse &>(column);
+ nested->serializeTextEscaped(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr, settings);
+}
+
+void SerializationSparse::deserializeTextEscaped(IColumn &, ReadBuffer &, const FormatSettings &) const
+{
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'deserializeTextEscaped' is not implemented for SerializationSparse");
+}
+
+void SerializationSparse::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const auto & column_sparse = assert_cast<const ColumnSparse &>(column);
+ nested->serializeTextQuoted(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr, settings);
+}
+
+void SerializationSparse::deserializeTextQuoted(IColumn &, ReadBuffer &, const FormatSettings &) const
+{
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'deserializeTextQuoted' is not implemented for SerializationSparse");
+}
+
+void SerializationSparse::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const auto & column_sparse = assert_cast<const ColumnSparse &>(column);
+ nested->serializeTextCSV(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr, settings);
+}
+
+void SerializationSparse::deserializeTextCSV(IColumn &, ReadBuffer &, const FormatSettings &) const
+{
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'deserializeTextCSV' is not implemented for SerializationSparse");
+}
+
+void SerializationSparse::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const auto & column_sparse = assert_cast<const ColumnSparse &>(column);
+ nested->serializeText(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr, settings);
+}
+
+void SerializationSparse::deserializeWholeText(IColumn &, ReadBuffer &, const FormatSettings &) const
+{
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'deserializeWholeText' is not implemented for SerializationSparse");
+}
+
+void SerializationSparse::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const auto & column_sparse = assert_cast<const ColumnSparse &>(column);
+ nested->serializeTextJSON(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr, settings);
+}
+
+void SerializationSparse::deserializeTextJSON(IColumn &, ReadBuffer &, const FormatSettings &) const
+{
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'deserializeTextJSON' is not implemented for SerializationSparse");
+}
+
+void SerializationSparse::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const auto & column_sparse = assert_cast<const ColumnSparse &>(column);
+ nested->serializeTextXML(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr, settings);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationSparse.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationSparse.h
new file mode 100644
index 00000000000..2d31fba2509
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationSparse.h
@@ -0,0 +1,104 @@
+#pragma once
+
+#include <DataTypes/Serializations/ISerialization.h>
+
+namespace DB
+{
+
+
+/** Serialization for sparse representation.
+ * Only '{serialize,deserialize}BinaryBulk' makes sense.
+ * Format:
+ * Values and offsets are written to separate substreams.
+ * There are written only non-default values.
+ *
+ * Offsets have position independent format: as i-th offset there
+ * is written number of default values, that precedes the i-th non-default value.
+ * Offsets are written in VarInt encoding.
+ * Additionally at the end of every call of 'serializeBinaryBulkWithMultipleStreams'
+ * there is written number of default values in the suffix of part of column,
+ * that we currently writing. This value also marked with a flag, that means the end of portion of data.
+ * This value is used, e.g. to allow independent reading of granules in MergeTree.
+ */
+class SerializationSparse final : public ISerialization
+{
+public:
+ explicit SerializationSparse(const SerializationPtr & nested_);
+
+ Kind getKind() const override { return Kind::SPARSE; }
+
+ virtual void enumerateStreams(
+ EnumerateStreamsSettings & settings,
+ const StreamCallback & callback,
+ const SubstreamData & data) const override;
+
+ void serializeBinaryBulkStatePrefix(
+ const IColumn & column,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void serializeBinaryBulkStateSuffix(
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void deserializeBinaryBulkStatePrefix(
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state) const override;
+
+ /// Allows to write ColumnSparse and other columns in sparse serialization.
+ void serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column,
+ size_t offset,
+ size_t limit,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ /// Allows to read only ColumnSparse.
+ void deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state,
+ SubstreamsCache * cache) const override;
+
+ void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+ void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+
+private:
+ struct SubcolumnCreator : public ISubcolumnCreator
+ {
+ const ColumnPtr offsets;
+ const size_t size;
+
+ SubcolumnCreator(const ColumnPtr & offsets_, size_t size_)
+ : offsets(offsets_), size(size_) {}
+
+ DataTypePtr create(const DataTypePtr & prev) const override { return prev; }
+ SerializationPtr create(const SerializationPtr & prev) const override;
+ ColumnPtr create(const ColumnPtr & prev) const override;
+ };
+
+ SerializationPtr nested;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationString.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationString.cpp
new file mode 100644
index 00000000000..46fd9d5272d
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationString.cpp
@@ -0,0 +1,365 @@
+#include <DataTypes/Serializations/SerializationString.h>
+
+#include <Columns/ColumnString.h>
+
+#include <Common/typeid_cast.h>
+#include <Common/assert_cast.h>
+
+#include <Core/Field.h>
+
+#include <Formats/FormatSettings.h>
+
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <IO/VarInt.h>
+#include <IO/ReadBufferFromString.h>
+
+#include <base/unit.h>
+
+#ifdef __SSE2__
+ #include <emmintrin.h>
+#endif
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int INCORRECT_DATA;
+ extern const int TOO_LARGE_STRING_SIZE;
+}
+
+void SerializationString::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const String & s = field.get<const String &>();
+ if (settings.max_binary_string_size && s.size() > settings.max_binary_string_size)
+ throw Exception(
+ ErrorCodes::TOO_LARGE_STRING_SIZE,
+ "Too large string size: {}. The maximum is: {}. To increase the maximum, use setting "
+ "format_binary_max_string_size",
+ s.size(),
+ settings.max_binary_string_size);
+
+ writeVarUInt(s.size(), ostr);
+ writeString(s, ostr);
+}
+
+
+void SerializationString::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ UInt64 size;
+ readVarUInt(size, istr);
+ if (settings.max_binary_string_size && size > settings.max_binary_string_size)
+ throw Exception(
+ ErrorCodes::TOO_LARGE_STRING_SIZE,
+ "Too large string size: {}. The maximum is: {}. To increase the maximum, use setting "
+ "format_binary_max_string_size",
+ size,
+ settings.max_binary_string_size);
+
+ field = String();
+ String & s = field.get<String &>();
+ s.resize(size);
+ istr.readStrict(s.data(), size);
+}
+
+
+void SerializationString::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const StringRef & s = assert_cast<const ColumnString &>(column).getDataAt(row_num);
+ if (settings.max_binary_string_size && s.size > settings.max_binary_string_size)
+ throw Exception(
+ ErrorCodes::TOO_LARGE_STRING_SIZE,
+ "Too large string size: {}. The maximum is: {}. To increase the maximum, use setting "
+ "format_binary_max_string_size",
+ s.size,
+ settings.max_binary_string_size);
+
+ writeVarUInt(s.size, ostr);
+ writeString(s, ostr);
+}
+
+
+void SerializationString::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ ColumnString & column_string = assert_cast<ColumnString &>(column);
+ ColumnString::Chars & data = column_string.getChars();
+ ColumnString::Offsets & offsets = column_string.getOffsets();
+
+ UInt64 size;
+ readVarUInt(size, istr);
+ if (settings.max_binary_string_size && size > settings.max_binary_string_size)
+ throw Exception(
+ ErrorCodes::TOO_LARGE_STRING_SIZE,
+ "Too large string size: {}. The maximum is: {}. To increase the maximum, use setting "
+ "format_binary_max_string_size",
+ size,
+ settings.max_binary_string_size);
+
+ size_t old_chars_size = data.size();
+ size_t offset = old_chars_size + size + 1;
+ offsets.push_back(offset);
+
+ try
+ {
+ data.resize(offset);
+ istr.readStrict(reinterpret_cast<char*>(&data[offset - size - 1]), size);
+ data.back() = 0;
+ }
+ catch (...)
+ {
+ offsets.pop_back();
+ data.resize_assume_reserved(old_chars_size);
+ throw;
+ }
+}
+
+
+void SerializationString::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
+{
+ const ColumnString & column_string = typeid_cast<const ColumnString &>(column);
+ const ColumnString::Chars & data = column_string.getChars();
+ const ColumnString::Offsets & offsets = column_string.getOffsets();
+
+ size_t size = column_string.size();
+ if (!size)
+ return;
+
+ size_t end = limit && offset + limit < size
+ ? offset + limit
+ : size;
+
+ if (offset == 0)
+ {
+ UInt64 str_size = offsets[0] - 1;
+ writeVarUInt(str_size, ostr);
+ ostr.write(reinterpret_cast<const char *>(data.data()), str_size);
+
+ ++offset;
+ }
+
+ for (size_t i = offset; i < end; ++i)
+ {
+ UInt64 str_size = offsets[i] - offsets[i - 1] - 1;
+ writeVarUInt(str_size, ostr);
+ ostr.write(reinterpret_cast<const char *>(&data[offsets[i - 1]]), str_size);
+ }
+}
+
+
+template <int UNROLL_TIMES>
+static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnString::Offsets & offsets, ReadBuffer & istr, size_t limit)
+{
+ size_t offset = data.size();
+ for (size_t i = 0; i < limit; ++i)
+ {
+ if (istr.eof())
+ break;
+
+ UInt64 size;
+ readVarUInt(size, istr);
+
+ static constexpr size_t max_string_size = 16_GiB; /// Arbitrary value to prevent logical errors and overflows, but large enough.
+ if (size > max_string_size)
+ throw Exception(
+ ErrorCodes::TOO_LARGE_STRING_SIZE,
+ "Too large string size: {}. The maximum is: {}.",
+ size,
+ max_string_size);
+
+ offset += size + 1;
+ offsets.push_back(offset);
+
+ data.resize(offset);
+
+ if (size)
+ {
+#ifdef __SSE2__
+ /// An optimistic branch in which more efficient copying is possible.
+ if (offset + 16 * UNROLL_TIMES <= data.capacity() && istr.position() + size + 16 * UNROLL_TIMES <= istr.buffer().end())
+ {
+ const __m128i * sse_src_pos = reinterpret_cast<const __m128i *>(istr.position());
+ const __m128i * sse_src_end = sse_src_pos + (size + (16 * UNROLL_TIMES - 1)) / 16 / UNROLL_TIMES * UNROLL_TIMES;
+ __m128i * sse_dst_pos = reinterpret_cast<__m128i *>(&data[offset - size - 1]);
+
+ while (sse_src_pos < sse_src_end)
+ {
+ for (size_t j = 0; j < UNROLL_TIMES; ++j)
+ _mm_storeu_si128(sse_dst_pos + j, _mm_loadu_si128(sse_src_pos + j));
+
+ sse_src_pos += UNROLL_TIMES;
+ sse_dst_pos += UNROLL_TIMES;
+ }
+
+ istr.position() += size;
+ }
+ else
+#endif
+ {
+ istr.readStrict(reinterpret_cast<char*>(&data[offset - size - 1]), size);
+ }
+ }
+
+ data[offset - 1] = 0;
+ }
+}
+
+
+void SerializationString::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const
+{
+ ColumnString & column_string = typeid_cast<ColumnString &>(column);
+ ColumnString::Chars & data = column_string.getChars();
+ ColumnString::Offsets & offsets = column_string.getOffsets();
+
+ double avg_chars_size = 1; /// By default reserve only for empty strings.
+
+ if (avg_value_size_hint > 0.0 && avg_value_size_hint > sizeof(offsets[0]))
+ {
+ /// Randomly selected.
+ constexpr auto avg_value_size_hint_reserve_multiplier = 1.2;
+
+ avg_chars_size = (avg_value_size_hint - sizeof(offsets[0])) * avg_value_size_hint_reserve_multiplier;
+ }
+
+ size_t size_to_reserve = data.size() + static_cast<size_t>(std::ceil(limit * avg_chars_size));
+
+ /// Never reserve for too big size.
+ if (size_to_reserve < 256 * 1024 * 1024)
+ {
+ try
+ {
+ data.reserve(size_to_reserve);
+ }
+ catch (Exception & e)
+ {
+ e.addMessage(
+ "(avg_value_size_hint = " + toString(avg_value_size_hint)
+ + ", avg_chars_size = " + toString(avg_chars_size)
+ + ", limit = " + toString(limit) + ")");
+ throw;
+ }
+ }
+
+ offsets.reserve(offsets.size() + limit);
+
+ if (avg_chars_size >= 64)
+ deserializeBinarySSE2<4>(data, offsets, istr, limit);
+ else if (avg_chars_size >= 48)
+ deserializeBinarySSE2<3>(data, offsets, istr, limit);
+ else if (avg_chars_size >= 32)
+ deserializeBinarySSE2<2>(data, offsets, istr, limit);
+ else
+ deserializeBinarySSE2<1>(data, offsets, istr, limit);
+}
+
+
+void SerializationString::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeString(assert_cast<const ColumnString &>(column).getDataAt(row_num), ostr);
+}
+
+
+void SerializationString::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeEscapedString(assert_cast<const ColumnString &>(column).getDataAt(row_num).toView(), ostr);
+}
+
+
+template <typename Reader>
+static inline void read(IColumn & column, Reader && reader)
+{
+ ColumnString & column_string = assert_cast<ColumnString &>(column);
+ ColumnString::Chars & data = column_string.getChars();
+ ColumnString::Offsets & offsets = column_string.getOffsets();
+ size_t old_chars_size = data.size();
+ size_t old_offsets_size = offsets.size();
+ try
+ {
+ reader(data);
+ data.push_back(0);
+ offsets.push_back(data.size());
+ }
+ catch (...)
+ {
+ offsets.resize_assume_reserved(old_offsets_size);
+ data.resize_assume_reserved(old_chars_size);
+ throw;
+ }
+}
+
+
+void SerializationString::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ read(column, [&](ColumnString::Chars & data) { readStringUntilEOFInto(data, istr); });
+}
+
+
+void SerializationString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ read(column, [&](ColumnString::Chars & data) { readEscapedStringInto(data, istr); });
+}
+
+
+void SerializationString::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeQuotedString(assert_cast<const ColumnString &>(column).getDataAt(row_num), ostr);
+}
+
+
+void SerializationString::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ read(column, [&](ColumnString::Chars & data) { readQuotedStringInto<true>(data, istr); });
+}
+
+
+void SerializationString::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeJSONString(assert_cast<const ColumnString &>(column).getDataAt(row_num).toView(), ostr, settings);
+}
+
+
+void SerializationString::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ if (settings.json.read_objects_as_strings && !istr.eof() && *istr.position() == '{')
+ {
+ String field;
+ readJSONObjectPossiblyInvalid(field, istr);
+ ReadBufferFromString buf(field);
+ read(column, [&](ColumnString::Chars & data) { data.insert(field.begin(), field.end()); });
+ }
+ else if (settings.json.read_numbers_as_strings && !istr.eof() && *istr.position() != '"')
+ {
+ String field;
+ readJSONField(field, istr);
+ Float64 tmp;
+ ReadBufferFromString buf(field);
+ if (tryReadFloatText(tmp, buf) && buf.eof())
+ read(column, [&](ColumnString::Chars & data) { data.insert(field.begin(), field.end()); });
+ else
+ throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON String value here: {}", field);
+ }
+ else
+ read(column, [&](ColumnString::Chars & data) { readJSONStringInto(data, istr); });
+}
+
+
+void SerializationString::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeXMLStringForTextElement(assert_cast<const ColumnString &>(column).getDataAt(row_num).toView(), ostr);
+}
+
+
+void SerializationString::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeCSVString<>(assert_cast<const ColumnString &>(column).getDataAt(row_num), ostr);
+}
+
+
+void SerializationString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ read(column, [&](ColumnString::Chars & data) { readCSVStringInto(data, istr, settings.csv); });
+}
+
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationString.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationString.h
new file mode 100644
index 00000000000..f27a5116c15
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationString.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include <DataTypes/Serializations/ISerialization.h>
+
+namespace DB
+{
+
+class SerializationString final : public ISerialization
+{
+public:
+ void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+ void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
+ void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
+
+ void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationTuple.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationTuple.cpp
new file mode 100644
index 00000000000..7f3e7619b0d
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationTuple.cpp
@@ -0,0 +1,484 @@
+#include <DataTypes/Serializations/SerializationTuple.h>
+#include <DataTypes/Serializations/SerializationNullable.h>
+#include <DataTypes/Serializations/SerializationInfoTuple.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <Core/Field.h>
+#include <Columns/ColumnTuple.h>
+#include <Common/assert_cast.h>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteBufferFromString.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH;
+ extern const int NOT_FOUND_COLUMN_IN_BLOCK;
+ extern const int INCORRECT_DATA;
+}
+
+
+static inline IColumn & extractElementColumn(IColumn & column, size_t idx)
+{
+ return assert_cast<ColumnTuple &>(column).getColumn(idx);
+}
+
+static inline const IColumn & extractElementColumn(const IColumn & column, size_t idx)
+{
+ return assert_cast<const ColumnTuple &>(column).getColumn(idx);
+}
+
+void SerializationTuple::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ const auto & tuple = field.get<const Tuple &>();
+ for (size_t element_index = 0; element_index < elems.size(); ++element_index)
+ {
+ const auto & serialization = elems[element_index];
+ serialization->serializeBinary(tuple[element_index], ostr, settings);
+ }
+}
+
+void SerializationTuple::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ const size_t size = elems.size();
+
+ field = Tuple();
+ Tuple & tuple = field.get<Tuple &>();
+ tuple.reserve(size);
+ for (size_t i = 0; i < size; ++i)
+ elems[i]->deserializeBinary(tuple.emplace_back(), istr, settings);
+}
+
+void SerializationTuple::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ for (size_t element_index = 0; element_index < elems.size(); ++element_index)
+ {
+ const auto & serialization = elems[element_index];
+ serialization->serializeBinary(extractElementColumn(column, element_index), row_num, ostr, settings);
+ }
+}
+
+
+template <typename F>
+static void addElementSafe(size_t num_elems, IColumn & column, F && impl)
+{
+ /// We use the assumption that tuples of zero size do not exist.
+ size_t old_size = column.size();
+
+ try
+ {
+ impl();
+
+ // Check that all columns now have the same size.
+ size_t new_size = column.size();
+ for (size_t i = 1; i < num_elems; ++i)
+ {
+ const auto & element_column = extractElementColumn(column, i);
+ if (element_column.size() != new_size)
+ {
+ // This is not a logical error because it may work with
+ // user-supplied data.
+ throw Exception(ErrorCodes::SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH,
+ "Cannot read a tuple because not all elements are present");
+ }
+ }
+ }
+ catch (...)
+ {
+ for (size_t i = 0; i < num_elems; ++i)
+ {
+ auto & element_column = extractElementColumn(column, i);
+ if (element_column.size() > old_size)
+ element_column.popBack(1);
+ }
+
+ throw;
+ }
+}
+
+void SerializationTuple::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ addElementSafe(elems.size(), column, [&]
+ {
+ for (size_t i = 0; i < elems.size(); ++i)
+ elems[i]->deserializeBinary(extractElementColumn(column, i), istr, settings);
+ });
+}
+
+void SerializationTuple::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeChar('(', ostr);
+ for (size_t i = 0; i < elems.size(); ++i)
+ {
+ if (i != 0)
+ writeChar(',', ostr);
+ elems[i]->serializeTextQuoted(extractElementColumn(column, i), row_num, ostr, settings);
+ }
+ writeChar(')', ostr);
+}
+
+void SerializationTuple::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
+{
+ const size_t size = elems.size();
+ assertChar('(', istr);
+
+ addElementSafe(elems.size(), column, [&]
+ {
+ for (size_t i = 0; i < size; ++i)
+ {
+ skipWhitespaceIfAny(istr);
+ if (i != 0)
+ {
+ assertChar(',', istr);
+ skipWhitespaceIfAny(istr);
+ }
+ elems[i]->deserializeTextQuoted(extractElementColumn(column, i), istr, settings);
+ }
+
+ // Special format for one element tuple (1,)
+ if (1 == elems.size())
+ {
+ skipWhitespaceIfAny(istr);
+ // Allow both (1) and (1,)
+ checkChar(',', istr);
+ }
+
+ skipWhitespaceIfAny(istr);
+ assertChar(')', istr);
+
+ if (whole && !istr.eof())
+ throwUnexpectedDataAfterParsedValue(column, istr, settings, "Tuple");
+ });
+}
+
+void SerializationTuple::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ if (settings.json.write_named_tuples_as_objects
+ && have_explicit_names)
+ {
+ writeChar('{', ostr);
+ for (size_t i = 0; i < elems.size(); ++i)
+ {
+ if (i != 0)
+ {
+ writeChar(',', ostr);
+ }
+ writeJSONString(elems[i]->getElementName(), ostr, settings);
+ writeChar(':', ostr);
+ elems[i]->serializeTextJSON(extractElementColumn(column, i), row_num, ostr, settings);
+ }
+ writeChar('}', ostr);
+ }
+ else
+ {
+ writeChar('[', ostr);
+ for (size_t i = 0; i < elems.size(); ++i)
+ {
+ if (i != 0)
+ writeChar(',', ostr);
+ elems[i]->serializeTextJSON(extractElementColumn(column, i), row_num, ostr, settings);
+ }
+ writeChar(']', ostr);
+ }
+}
+
+void SerializationTuple::serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const
+{
+ if (settings.json.write_named_tuples_as_objects
+ && have_explicit_names)
+ {
+ writeCString("{\n", ostr);
+ for (size_t i = 0; i < elems.size(); ++i)
+ {
+ if (i != 0)
+ writeCString(",\n", ostr);
+ writeChar(' ', (indent + 1) * 4, ostr);
+ writeJSONString(elems[i]->getElementName(), ostr, settings);
+ writeCString(": ", ostr);
+ elems[i]->serializeTextJSONPretty(extractElementColumn(column, i), row_num, ostr, settings, indent + 1);
+ }
+ writeChar('\n', ostr);
+ writeChar(' ', indent * 4, ostr);
+ writeChar('}', ostr);
+ }
+ else
+ {
+ writeCString("[\n", ostr);
+ for (size_t i = 0; i < elems.size(); ++i)
+ {
+ if (i != 0)
+ writeCString(",\n", ostr);
+ writeChar(' ', (indent + 1) * 4, ostr);
+ elems[i]->serializeTextJSONPretty(extractElementColumn(column, i), row_num, ostr, settings, indent + 1);
+ }
+ writeChar('\n', ostr);
+ writeChar(' ', indent * 4, ostr);
+ writeChar(']', ostr);
+ }
+}
+
+void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ if (settings.json.read_named_tuples_as_objects
+ && have_explicit_names)
+ {
+ skipWhitespaceIfAny(istr);
+ assertChar('{', istr);
+ skipWhitespaceIfAny(istr);
+
+ addElementSafe(elems.size(), column, [&]
+ {
+ std::vector<UInt8> seen_elements(elems.size(), 0);
+ size_t processed = 0;
+ size_t skipped = 0;
+ while (!istr.eof() && *istr.position() != '}')
+ {
+ if (!settings.json.ignore_unknown_keys_in_named_tuple && processed == elems.size())
+ throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected number of elements in named tuple. Expected no more than {} (consider enabling input_format_json_ignore_unknown_keys_in_named_tuple setting)", elems.size());
+
+ if (processed + skipped > 0)
+ {
+ assertChar(',', istr);
+ skipWhitespaceIfAny(istr);
+ }
+
+ std::string name;
+ readDoubleQuotedString(name, istr);
+ skipWhitespaceIfAny(istr);
+ assertChar(':', istr);
+ skipWhitespaceIfAny(istr);
+
+ const size_t element_pos = getPositionByName(name);
+ if (element_pos == std::numeric_limits<size_t>::max())
+ {
+ if (settings.json.ignore_unknown_keys_in_named_tuple)
+ {
+ skipJSONField(istr, name);
+ skipWhitespaceIfAny(istr);
+ ++skipped;
+ continue;
+ }
+ else
+ throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, "Tuple doesn't have element with name '{}', enable setting input_format_json_ignore_unknown_keys_in_named_tuple", name);
+ }
+
+ seen_elements[element_pos] = 1;
+ auto & element_column = extractElementColumn(column, element_pos);
+
+ try
+ {
+ if (settings.null_as_default)
+ SerializationNullable::deserializeTextJSONImpl(element_column, istr, settings, elems[element_pos]);
+ else
+ elems[element_pos]->deserializeTextJSON(element_column, istr, settings);
+ }
+ catch (Exception & e)
+ {
+ e.addMessage("(while reading the value of nested key " + name + ")");
+ throw;
+ }
+
+ skipWhitespaceIfAny(istr);
+ ++processed;
+ }
+
+ assertChar('}', istr);
+
+ /// Check if we have missing elements.
+ if (processed != elems.size())
+ {
+ for (size_t element_pos = 0; element_pos != seen_elements.size(); ++element_pos)
+ {
+ if (seen_elements[element_pos])
+ continue;
+
+ if (!settings.json.defaults_for_missing_elements_in_named_tuple)
+ throw Exception(
+ ErrorCodes::INCORRECT_DATA,
+ "JSON object doesn't contain tuple element {}. If you want to insert defaults in case of missing elements, "
+ "enable setting input_format_json_defaults_for_missing_elements_in_named_tuple",
+ elems[element_pos]->getElementName());
+
+ auto & element_column = extractElementColumn(column, element_pos);
+ element_column.insertDefault();
+ }
+ }
+ });
+ }
+ else
+ {
+ assertChar('[', istr);
+
+ addElementSafe(elems.size(), column, [&]
+ {
+ for (size_t i = 0; i < elems.size(); ++i)
+ {
+ skipWhitespaceIfAny(istr);
+ if (i != 0)
+ {
+ assertChar(',', istr);
+ skipWhitespaceIfAny(istr);
+ }
+ elems[i]->deserializeTextJSON(extractElementColumn(column, i), istr, settings);
+ }
+
+ skipWhitespaceIfAny(istr);
+ assertChar(']', istr);
+ });
+ }
+}
+
+void SerializationTuple::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeCString("<tuple>", ostr);
+ for (size_t i = 0; i < elems.size(); ++i)
+ {
+ writeCString("<elem>", ostr);
+ elems[i]->serializeTextXML(extractElementColumn(column, i), row_num, ostr, settings);
+ writeCString("</elem>", ostr);
+ }
+ writeCString("</tuple>", ostr);
+}
+
+void SerializationTuple::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ for (size_t i = 0; i < elems.size(); ++i)
+ {
+ if (i != 0)
+ writeChar(settings.csv.tuple_delimiter, ostr);
+ elems[i]->serializeTextCSV(extractElementColumn(column, i), row_num, ostr, settings);
+ }
+}
+
+void SerializationTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ addElementSafe(elems.size(), column, [&]
+ {
+ const size_t size = elems.size();
+ for (size_t i = 0; i < size; ++i)
+ {
+ if (i != 0)
+ {
+ skipWhitespaceIfAny(istr);
+ assertChar(settings.csv.tuple_delimiter, istr);
+ skipWhitespaceIfAny(istr);
+ }
+ elems[i]->deserializeTextCSV(extractElementColumn(column, i), istr, settings);
+ }
+ });
+}
+
+void SerializationTuple::enumerateStreams(
+ EnumerateStreamsSettings & settings,
+ const StreamCallback & callback,
+ const SubstreamData & data) const
+{
+ const auto * type_tuple = data.type ? &assert_cast<const DataTypeTuple &>(*data.type) : nullptr;
+ const auto * column_tuple = data.column ? &assert_cast<const ColumnTuple &>(*data.column) : nullptr;
+ const auto * info_tuple = data.serialization_info ? &assert_cast<const SerializationInfoTuple &>(*data.serialization_info) : nullptr;
+
+ for (size_t i = 0; i < elems.size(); ++i)
+ {
+ auto next_data = SubstreamData(elems[i])
+ .withType(type_tuple ? type_tuple->getElement(i) : nullptr)
+ .withColumn(column_tuple ? column_tuple->getColumnPtr(i) : nullptr)
+ .withSerializationInfo(info_tuple ? info_tuple->getElementInfo(i) : nullptr);
+
+ elems[i]->enumerateStreams(settings, callback, next_data);
+ }
+}
+
+struct SerializeBinaryBulkStateTuple : public ISerialization::SerializeBinaryBulkState
+{
+ std::vector<ISerialization::SerializeBinaryBulkStatePtr> states;
+};
+
+struct DeserializeBinaryBulkStateTuple : public ISerialization::DeserializeBinaryBulkState
+{
+ std::vector<ISerialization::DeserializeBinaryBulkStatePtr> states;
+};
+
+
+void SerializationTuple::serializeBinaryBulkStatePrefix(
+ const IColumn & column,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ auto tuple_state = std::make_shared<SerializeBinaryBulkStateTuple>();
+ tuple_state->states.resize(elems.size());
+
+ for (size_t i = 0; i < elems.size(); ++i)
+ elems[i]->serializeBinaryBulkStatePrefix(extractElementColumn(column, i), settings, tuple_state->states[i]);
+
+ state = std::move(tuple_state);
+}
+
+void SerializationTuple::serializeBinaryBulkStateSuffix(
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ auto * tuple_state = checkAndGetState<SerializeBinaryBulkStateTuple>(state);
+
+ for (size_t i = 0; i < elems.size(); ++i)
+ elems[i]->serializeBinaryBulkStateSuffix(settings, tuple_state->states[i]);
+}
+
+void SerializationTuple::deserializeBinaryBulkStatePrefix(
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state) const
+{
+ auto tuple_state = std::make_shared<DeserializeBinaryBulkStateTuple>();
+ tuple_state->states.resize(elems.size());
+
+ for (size_t i = 0; i < elems.size(); ++i)
+ elems[i]->deserializeBinaryBulkStatePrefix(settings, tuple_state->states[i]);
+
+ state = std::move(tuple_state);
+}
+
+void SerializationTuple::serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column,
+ size_t offset,
+ size_t limit,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ auto * tuple_state = checkAndGetState<SerializeBinaryBulkStateTuple>(state);
+
+ for (size_t i = 0; i < elems.size(); ++i)
+ {
+ const auto & element_col = extractElementColumn(column, i);
+ elems[i]->serializeBinaryBulkWithMultipleStreams(element_col, offset, limit, settings, tuple_state->states[i]);
+ }
+}
+
+void SerializationTuple::deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state,
+ SubstreamsCache * cache) const
+{
+ auto * tuple_state = checkAndGetState<DeserializeBinaryBulkStateTuple>(state);
+
+ auto mutable_column = column->assumeMutable();
+ auto & column_tuple = assert_cast<ColumnTuple &>(*mutable_column);
+
+ settings.avg_value_size_hint = 0;
+ for (size_t i = 0; i < elems.size(); ++i)
+ elems[i]->deserializeBinaryBulkWithMultipleStreams(column_tuple.getColumnPtr(i), limit, settings, tuple_state->states[i], cache);
+}
+
+size_t SerializationTuple::getPositionByName(const String & name) const
+{
+ size_t size = elems.size();
+ for (size_t i = 0; i < size; ++i)
+ if (elems[i]->getElementName() == name)
+ return i;
+ return std::numeric_limits<size_t>::max();
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationTuple.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationTuple.h
new file mode 100644
index 00000000000..7325259f440
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationTuple.h
@@ -0,0 +1,78 @@
+#pragma once
+
+#include <DataTypes/Serializations/SimpleTextSerialization.h>
+#include <DataTypes/Serializations/SerializationNamed.h>
+
+namespace DB
+{
+
+class SerializationTuple final : public SimpleTextSerialization
+{
+public:
+ using ElementSerializationPtr = std::shared_ptr<const SerializationNamed>;
+ using ElementSerializations = std::vector<ElementSerializationPtr>;
+
+ SerializationTuple(const ElementSerializations & elems_, bool have_explicit_names_)
+ : elems(elems_), have_explicit_names(have_explicit_names_)
+ {
+ }
+
+ void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+ void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const override;
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override;
+ void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+
+ /// Tuples in CSV format will be serialized as separate columns (that is, losing their nesting in the tuple).
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ /** Each sub-column in a tuple is serialized in separate stream.
+ */
+ void enumerateStreams(
+ EnumerateStreamsSettings & settings,
+ const StreamCallback & callback,
+ const SubstreamData & data) const override;
+
+ void serializeBinaryBulkStatePrefix(
+ const IColumn & column,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void serializeBinaryBulkStateSuffix(
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void deserializeBinaryBulkStatePrefix(
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state) const override;
+
+ void serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column,
+ size_t offset,
+ size_t limit,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state,
+ SubstreamsCache * cache) const override;
+
+ const ElementSerializations & getElementsSerializations() const { return elems; }
+
+private:
+ ElementSerializations elems;
+ bool have_explicit_names;
+
+ size_t getPositionByName(const String & name) const;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationUUID.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationUUID.cpp
new file mode 100644
index 00000000000..613a16541f5
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationUUID.cpp
@@ -0,0 +1,173 @@
+#include <Columns/ColumnsNumber.h>
+#include <DataTypes/Serializations/SerializationUUID.h>
+#include <Formats/ProtobufReader.h>
+#include <Formats/ProtobufWriter.h>
+#include <IO/ReadBufferFromString.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <Common/assert_cast.h>
+
+#include <ranges>
+
+namespace DB
+{
+
+void SerializationUUID::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeText(assert_cast<const ColumnUUID &>(column).getData()[row_num], ostr);
+}
+
+void SerializationUUID::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
+{
+ UUID x;
+ readText(x, istr);
+ assert_cast<ColumnUUID &>(column).getData().push_back(x);
+
+ if (whole && !istr.eof())
+ throwUnexpectedDataAfterParsedValue(column, istr, settings, "UUID");
+}
+
+void SerializationUUID::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ deserializeText(column, istr, settings, false);
+}
+
+void SerializationUUID::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ serializeText(column, row_num, ostr, settings);
+}
+
+void SerializationUUID::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeChar('\'', ostr);
+ serializeText(column, row_num, ostr, settings);
+ writeChar('\'', ostr);
+}
+
+void SerializationUUID::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ UUID uuid;
+ bool fast = false;
+ if (istr.available() >= 38)
+ {
+ assertChar('\'', istr);
+ char * next_pos = find_first_symbols<'\\', '\''>(istr.position(), istr.buffer().end());
+ const size_t len = next_pos - istr.position();
+ if ((len == 32 || len == 36) && istr.position()[len] == '\'')
+ {
+ uuid = parseUUID(std::span(reinterpret_cast<const UInt8 *>(istr.position()), len));
+ istr.ignore(len + 1);
+ fast = true;
+ }
+ else
+ {
+ // It's ok to go back in the position because we haven't read from the buffer except the first char
+ // and we know there were at least 38 bytes available (so no new read has been triggered)
+ istr.position()--;
+ }
+ }
+
+ if (!fast)
+ {
+ String quoted_chars;
+ readQuotedStringInto<false>(quoted_chars, istr);
+ ReadBufferFromString parsed_quoted_buffer(quoted_chars);
+ readText(uuid, parsed_quoted_buffer);
+ }
+
+ assert_cast<ColumnUUID &>(column).getData().push_back(std::move(uuid)); /// It's important to do this at the end - for exception safety.
+}
+
+void SerializationUUID::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeChar('"', ostr);
+ serializeText(column, row_num, ostr, settings);
+ writeChar('"', ostr);
+}
+
+void SerializationUUID::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ UUID x;
+ assertChar('"', istr);
+ readText(x, istr);
+ assertChar('"', istr);
+ assert_cast<ColumnUUID &>(column).getData().push_back(x);
+}
+
+void SerializationUUID::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ writeChar('"', ostr);
+ serializeText(column, row_num, ostr, settings);
+ writeChar('"', ostr);
+}
+
+void SerializationUUID::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ UUID value;
+ readCSV(value, istr);
+ assert_cast<ColumnUUID &>(column).getData().push_back(value);
+}
+
+
+void SerializationUUID::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const
+{
+ UUID x = field.get<UUID>();
+ writeBinaryLittleEndian(x, ostr);
+}
+
+void SerializationUUID::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const
+{
+ UUID x;
+ readBinaryLittleEndian(x, istr);
+ field = NearestFieldType<UUID>(x);
+}
+
+void SerializationUUID::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
+{
+ writeBinaryLittleEndian(assert_cast<const ColumnVector<UUID> &>(column).getData()[row_num], ostr);
+}
+
+void SerializationUUID::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+{
+ UUID x;
+ readBinaryLittleEndian(x, istr);
+ assert_cast<ColumnVector<UUID> &>(column).getData().push_back(x);
+}
+
+void SerializationUUID::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
+{
+ const typename ColumnVector<UUID>::Container & x = typeid_cast<const ColumnVector<UUID> &>(column).getData();
+ if (const size_t size = x.size(); limit == 0 || offset + limit > size)
+ limit = size - offset;
+
+ if (limit == 0)
+ return;
+
+ if constexpr (std::endian::native == std::endian::big)
+ {
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunreachable-code"
+ for (size_t i = offset; i < offset + limit; ++i)
+ writeBinaryLittleEndian(x[i], ostr);
+#pragma clang diagnostic pop
+ }
+ else
+ ostr.write(reinterpret_cast<const char *>(&x[offset]), sizeof(UUID) * limit);
+}
+
+void SerializationUUID::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const
+{
+ typename ColumnVector<UUID>::Container & x = typeid_cast<ColumnVector<UUID> &>(column).getData();
+ const size_t initial_size = x.size();
+ x.resize(initial_size + limit);
+ const size_t size = istr.readBig(reinterpret_cast<char *>(&x[initial_size]), sizeof(UUID) * limit);
+ x.resize(initial_size + size / sizeof(UUID));
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunreachable-code"
+ if constexpr (std::endian::native == std::endian::big)
+ for (size_t i = initial_size; i < x.size(); ++i)
+ transformEndianness<std::endian::big, std::endian::little>(x[i]);
+#pragma clang diagnostic pop
+}
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationUUID.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationUUID.h
new file mode 100644
index 00000000000..da8c15f7279
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationUUID.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include <DataTypes/Serializations/SerializationNumber.h>
+
+namespace DB
+{
+
+class SerializationUUID : public SimpleTextSerialization
+{
+public:
+ void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override;
+ void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+ void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
+ void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationWrapper.cpp b/contrib/clickhouse/src/DataTypes/Serializations/SerializationWrapper.cpp
new file mode 100644
index 00000000000..18e4891ee65
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationWrapper.cpp
@@ -0,0 +1,149 @@
+#include <DataTypes/Serializations/SerializationWrapper.h>
+#include <Columns/IColumn.h>
+
+namespace DB
+{
+
+void SerializationWrapper::enumerateStreams(
+ EnumerateStreamsSettings & settings,
+ const StreamCallback & callback,
+ const SubstreamData & data) const
+{
+ nested_serialization->enumerateStreams(settings, callback, data);
+}
+
+void SerializationWrapper::serializeBinaryBulkStatePrefix(
+ const IColumn & column,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ nested_serialization->serializeBinaryBulkStatePrefix(column, settings, state);
+}
+
+void SerializationWrapper::serializeBinaryBulkStateSuffix(
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+ nested_serialization->serializeBinaryBulkStateSuffix(settings, state);
+}
+
+void SerializationWrapper::deserializeBinaryBulkStatePrefix(
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state) const
+{
+ nested_serialization->deserializeBinaryBulkStatePrefix(settings, state);
+}
+
+void SerializationWrapper::serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column,
+ size_t offset,
+ size_t limit,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const
+{
+
+ nested_serialization->serializeBinaryBulkWithMultipleStreams(column, offset, limit, settings, state);
+}
+
+void SerializationWrapper::deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state,
+ SubstreamsCache * cache) const
+{
+
+ nested_serialization->deserializeBinaryBulkWithMultipleStreams(column, limit, settings, state, cache);
+}
+
+void SerializationWrapper::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
+{
+ nested_serialization->serializeBinaryBulk(column, ostr, offset, limit);
+}
+
+void SerializationWrapper::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const
+{
+ nested_serialization->deserializeBinaryBulk(column, istr, limit, avg_value_size_hint);
+}
+
+void SerializationWrapper::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ nested_serialization->serializeBinary(field, ostr, settings);
+}
+
+void SerializationWrapper::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ nested_serialization->deserializeBinary(field, istr, settings);
+}
+
+void SerializationWrapper::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ nested_serialization->serializeBinary(column, row_num, ostr, settings);
+}
+
+void SerializationWrapper::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ nested_serialization->deserializeBinary(column, istr, settings);
+}
+
+void SerializationWrapper::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ nested_serialization->serializeTextEscaped(column, row_num, ostr, settings);
+}
+
+void SerializationWrapper::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ nested_serialization->deserializeTextEscaped(column, istr, settings);
+}
+
+void SerializationWrapper::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ nested_serialization->serializeTextQuoted(column, row_num, ostr, settings);
+}
+
+void SerializationWrapper::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ nested_serialization->deserializeTextQuoted(column, istr, settings);
+}
+
+void SerializationWrapper::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ nested_serialization->serializeTextCSV(column, row_num, ostr, settings);
+}
+
+void SerializationWrapper::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ nested_serialization->deserializeTextCSV(column, istr, settings);
+}
+
+void SerializationWrapper::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ nested_serialization->serializeText(column, row_num, ostr, settings);
+}
+
+void SerializationWrapper::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ nested_serialization->deserializeWholeText(column, istr, settings);
+}
+
+void SerializationWrapper::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ nested_serialization->serializeTextJSON(column, row_num, ostr, settings);
+}
+
+void SerializationWrapper::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+ nested_serialization->deserializeTextJSON(column, istr, settings);
+}
+
+void SerializationWrapper::serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const
+{
+ nested_serialization->serializeTextJSONPretty(column, row_num, ostr, settings, indent);
+}
+
+void SerializationWrapper::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+ nested_serialization->serializeTextXML(column, row_num, ostr, settings);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SerializationWrapper.h b/contrib/clickhouse/src/DataTypes/Serializations/SerializationWrapper.h
new file mode 100644
index 00000000000..31900f93148
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SerializationWrapper.h
@@ -0,0 +1,83 @@
+#pragma once
+
+#include <DataTypes/Serializations/ISerialization.h>
+#include <Common/Exception.h>
+
+namespace DB
+{
+
+/// Wrapper for serialization, which calls methods, which are not overridden, from nested serialization.
+/// You can inherit this class, when you need to override bunch of methods, to avoid boilerplate code.
+class SerializationWrapper : public ISerialization
+{
+protected:
+ SerializationPtr nested_serialization;
+
+public:
+ explicit SerializationWrapper(const SerializationPtr & nested_serialization_) : nested_serialization(nested_serialization_) {}
+
+ const SerializationPtr & getNested() const { return nested_serialization; }
+
+ Kind getKind() const override { return nested_serialization->getKind(); }
+
+ void enumerateStreams(
+ EnumerateStreamsSettings & settings,
+ const StreamCallback & callback,
+ const SubstreamData & data) const override;
+
+ void serializeBinaryBulkStatePrefix(
+ const IColumn & column,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void serializeBinaryBulkStateSuffix(
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void deserializeBinaryBulkStatePrefix(
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state) const override;
+
+ void serializeBinaryBulkWithMultipleStreams(
+ const IColumn & column,
+ size_t offset,
+ size_t limit,
+ SerializeBinaryBulkSettings & settings,
+ SerializeBinaryBulkStatePtr & state) const override;
+
+ void deserializeBinaryBulkWithMultipleStreams(
+ ColumnPtr & column,
+ size_t limit,
+ DeserializeBinaryBulkSettings & settings,
+ DeserializeBinaryBulkStatePtr & state,
+ SubstreamsCache * cache) const override;
+
+ void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
+ void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
+
+ void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+ void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+ void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+ void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
+ void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override;
+
+ void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SimpleTextSerialization.h b/contrib/clickhouse/src/DataTypes/Serializations/SimpleTextSerialization.h
new file mode 100644
index 00000000000..0247f30b30a
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SimpleTextSerialization.h
@@ -0,0 +1,64 @@
+#pragma once
+#include <DataTypes/Serializations/ISerialization.h>
+
+namespace DB
+{
+
+/// Helper class to define same ISerialization text (de)serialization for all the variants (escaped, quoted, JSON, CSV).
+/// You need to define serializeText() and deserializeText() in derived class.
+class SimpleTextSerialization : public ISerialization
+{
+protected:
+ SimpleTextSerialization() = default;
+
+ void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override
+ {
+ serializeText(column, row_num, ostr, settings);
+ }
+
+ void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override
+ {
+ serializeText(column, row_num, ostr, settings);
+ }
+
+ void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override
+ {
+ serializeText(column, row_num, ostr, settings);
+ }
+
+ void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override
+ {
+ serializeText(column, row_num, ostr, settings);
+ }
+
+ void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
+ {
+ deserializeText(column, istr, settings, true);
+ }
+
+ void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
+ {
+ deserializeText(column, istr, settings, false);
+ }
+
+ void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
+ {
+ deserializeText(column, istr, settings, false);
+ }
+
+ void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
+ {
+ deserializeText(column, istr, settings, false);
+ }
+
+ void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override
+ {
+ deserializeText(column, istr, settings, false);
+ }
+
+ /// whole = true means that buffer contains only one value, so we should read until EOF.
+ /// It's needed to check if there is garbage after parsed field.
+ virtual void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &, bool whole) const = 0;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/Serializations/SubcolumnsTree.h b/contrib/clickhouse/src/DataTypes/Serializations/SubcolumnsTree.h
new file mode 100644
index 00000000000..fda45e1e9a2
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/Serializations/SubcolumnsTree.h
@@ -0,0 +1,209 @@
+#pragma once
+
+#include <DataTypes/Serializations/PathInData.h>
+#include <DataTypes/IDataType.h>
+#include <Columns/IColumn.h>
+#include <Common/HashTable/HashMap.h>
+
+namespace DB
+{
+
+/// Tree that represents paths in document
+/// with additional data in nodes.
+template <typename NodeData>
+class SubcolumnsTree
+{
+public:
+ struct Node
+ {
+ enum Kind
+ {
+ TUPLE,
+ NESTED,
+ SCALAR,
+ };
+
+ explicit Node(Kind kind_) : kind(kind_) {}
+ Node(Kind kind_, const NodeData & data_) : kind(kind_), data(data_) {}
+ Node(Kind kind_, const NodeData & data_, const PathInData & path_)
+ : kind(kind_), data(data_), path(path_) {}
+
+ Kind kind = TUPLE;
+ const Node * parent = nullptr;
+
+ Arena strings_pool;
+ HashMapWithStackMemory<StringRef, std::shared_ptr<Node>, StringRefHash, 4> children;
+
+ NodeData data;
+ PathInData path;
+
+ bool isNested() const { return kind == NESTED; }
+ bool isScalar() const { return kind == SCALAR; }
+
+ void addChild(std::string_view key, std::shared_ptr<Node> next_node)
+ {
+ next_node->parent = this;
+ StringRef key_ref{strings_pool.insert(key.data(), key.length()), key.length()};
+ children[key_ref] = std::move(next_node);
+ }
+ };
+
+ using NodeKind = typename Node::Kind;
+ using NodePtr = std::shared_ptr<Node>;
+
+ SubcolumnsTree() : root(std::make_shared<Node>(Node::TUPLE)) {}
+
+ /// Add a leaf without any data in other nodes.
+ bool add(const PathInData & path, const NodeData & leaf_data)
+ {
+ return add(path, [&](NodeKind kind, bool exists) -> NodePtr
+ {
+ if (exists)
+ return nullptr;
+
+ if (kind == Node::SCALAR)
+ return std::make_shared<Node>(kind, leaf_data, path);
+
+ return std::make_shared<Node>(kind);
+ });
+ }
+
+ /// Callback for creation of node. Receives kind of node and
+ /// flag, which is true if node already exists.
+ using NodeCreator = std::function<NodePtr(NodeKind, bool)>;
+
+ bool add(const PathInData & path, const NodeCreator & node_creator)
+ {
+ const auto & parts = path.getParts();
+ if (parts.empty())
+ return false;
+
+ Node * current_node = root.get();
+ for (size_t i = 0; i < parts.size() - 1; ++i)
+ {
+ assert(current_node->kind != Node::SCALAR);
+
+ auto it = current_node->children.find(StringRef{parts[i].key});
+ if (it != current_node->children.end())
+ {
+ current_node = it->getMapped().get();
+ node_creator(current_node->kind, true);
+
+ if (current_node->isNested() != parts[i].is_nested)
+ return false;
+ }
+ else
+ {
+ auto next_kind = parts[i].is_nested ? Node::NESTED : Node::TUPLE;
+ auto next_node = node_creator(next_kind, false);
+ current_node->addChild(String(parts[i].key), next_node);
+ current_node = next_node.get();
+ }
+ }
+
+ auto it = current_node->children.find(StringRef{parts.back().key});
+ if (it != current_node->children.end())
+ return false;
+
+ auto next_node = node_creator(Node::SCALAR, false);
+ current_node->addChild(String(parts.back().key), next_node);
+ leaves.push_back(std::move(next_node));
+
+ return true;
+ }
+
+ /// Find node that matches the path the best.
+ const Node * findBestMatch(const PathInData & path) const
+ {
+ return findImpl(path, false);
+ }
+
+ /// Find node that matches the path exactly.
+ const Node * findExact(const PathInData & path) const
+ {
+ return findImpl(path, true);
+ }
+
+ /// Find leaf by path.
+ const Node * findLeaf(const PathInData & path) const
+ {
+ const auto * candidate = findExact(path);
+ if (!candidate || !candidate->isScalar())
+ return nullptr;
+ return candidate;
+ }
+
+ using NodePredicate = std::function<bool(const Node &)>;
+
+ /// Finds leaf that satisfies the predicate.
+ const Node * findLeaf(const NodePredicate & predicate)
+ {
+ return findLeaf(root.get(), predicate);
+ }
+
+ static const Node * findLeaf(const Node * node, const NodePredicate & predicate)
+ {
+ if (!node)
+ return nullptr;
+
+ if (node->isScalar())
+ return predicate(*node) ? node : nullptr;
+
+ for (const auto & [_, child] : node->children)
+ if (const auto * leaf = findLeaf(child.get(), predicate))
+ return leaf;
+
+ return nullptr;
+ }
+
+ /// Find first parent node that satisfies the predicate.
+ static const Node * findParent(const Node * node, const NodePredicate & predicate)
+ {
+ while (node && !predicate(*node))
+ node = node->parent;
+ return node;
+ }
+
+ bool empty() const { return root->children.empty(); }
+ size_t size() const { return leaves.size(); }
+
+ using Nodes = std::vector<NodePtr>;
+
+ const Nodes & getLeaves() const { return leaves; }
+ const Node & getRoot() const { return *root; }
+
+ using iterator = typename Nodes::iterator;
+ using const_iterator = typename Nodes::const_iterator;
+
+ iterator begin() { return leaves.begin(); }
+ iterator end() { return leaves.end(); }
+
+ const_iterator begin() const { return leaves.begin(); }
+ const_iterator end() const { return leaves.end(); }
+
+private:
+ const Node * findImpl(const PathInData & path, bool find_exact) const
+ {
+ if (empty())
+ return nullptr;
+
+ const auto & parts = path.getParts();
+ const auto * current_node = root.get();
+
+ for (const auto & part : parts)
+ {
+ auto it = current_node->children.find(StringRef{part.key});
+ if (it == current_node->children.end())
+ return find_exact ? nullptr : current_node;
+
+ current_node = it->getMapped().get();
+ }
+
+ return current_node;
+ }
+
+ NodePtr root;
+ Nodes leaves;
+};
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/TimezoneMixin.h b/contrib/clickhouse/src/DataTypes/TimezoneMixin.h
new file mode 100644
index 00000000000..03ecde5dd0a
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/TimezoneMixin.h
@@ -0,0 +1,32 @@
+#pragma once
+#include <Core/Types.h>
+#include <Common/DateLUT.h>
+
+class DateLUTImpl;
+
+/** Mixin-class that manages timezone info for timezone-aware DateTime implementations
+ *
+ * Must be used as a (second) base for class implementing IDateType/ISerialization-interface.
+ */
+class TimezoneMixin
+{
+public:
+ TimezoneMixin(const TimezoneMixin &) = default;
+
+ explicit TimezoneMixin(const String & time_zone_name = "")
+ : has_explicit_time_zone(!time_zone_name.empty())
+ , time_zone(DateLUT::instance(time_zone_name))
+ , utc_time_zone(DateLUT::instance("UTC"))
+ {
+ }
+
+ const DateLUTImpl & getTimeZone() const { return time_zone; }
+ bool hasExplicitTimeZone() const { return has_explicit_time_zone; }
+
+protected:
+ /// true if time zone name was provided in data type parameters, false if it's using default time zone.
+ bool has_explicit_time_zone;
+
+ const DateLUTImpl & time_zone;
+ const DateLUTImpl & utc_time_zone;
+};
diff --git a/contrib/clickhouse/src/DataTypes/convertMySQLDataType.cpp b/contrib/clickhouse/src/DataTypes/convertMySQLDataType.cpp
new file mode 100644
index 00000000000..bb848bf1526
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/convertMySQLDataType.cpp
@@ -0,0 +1,132 @@
+#include "convertMySQLDataType.h"
+
+#include <Core/Field.h>
+#include <base/types.h>
+#include <Core/MultiEnum.h>
+#include <Core/SettingsEnums.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/IAST.h>
+#include "DataTypeDate.h"
+#include "DataTypeDate32.h"
+#include "DataTypeDateTime.h"
+#include "DataTypeDateTime64.h"
+#include "DataTypeEnum.h"
+#include "DataTypesDecimal.h"
+#include "DataTypeFixedString.h"
+#include "DataTypeNullable.h"
+#include "DataTypeString.h"
+#include "DataTypesNumber.h"
+#include "IDataType.h"
+
+namespace DB
+{
+
+DataTypePtr convertMySQLDataType(MultiEnum<MySQLDataTypesSupport> type_support,
+ const std::string & mysql_data_type,
+ bool is_nullable,
+ bool is_unsigned,
+ size_t length,
+ size_t precision,
+ size_t scale)
+{
+ // Mysql returns mysql_data_type as below:
+ // 1. basic_type
+ // 2. basic_type options
+ // 3. type_with_params(param1, param2, ...)
+ // 4. type_with_params(param1, param2, ...) options
+ // The options can be unsigned, zerofill, or some other strings.
+ auto data_type = std::string_view(mysql_data_type);
+ const auto type_end_pos = data_type.find_first_of(R"(( )"); // FIXME: fix style-check script instead
+ const auto type_name = data_type.substr(0, type_end_pos);
+
+ DataTypePtr res;
+
+ if (type_name == "tinyint")
+ {
+ if (is_unsigned)
+ res = std::make_shared<DataTypeUInt8>();
+ else
+ res = std::make_shared<DataTypeInt8>();
+ }
+ else if (type_name == "smallint")
+ {
+ if (is_unsigned)
+ res = std::make_shared<DataTypeUInt16>();
+ else
+ res = std::make_shared<DataTypeInt16>();
+ }
+ else if (type_name == "int" || type_name == "mediumint" || type_name == "integer")
+ {
+ if (is_unsigned)
+ res = std::make_shared<DataTypeUInt32>();
+ else
+ res = std::make_shared<DataTypeInt32>();
+ }
+ else if (type_name == "bigint")
+ {
+ if (is_unsigned)
+ res = std::make_shared<DataTypeUInt64>();
+ else
+ res = std::make_shared<DataTypeInt64>();
+ }
+ else if (type_name == "float")
+ res = std::make_shared<DataTypeFloat32>();
+ else if (type_name == "double")
+ res = std::make_shared<DataTypeFloat64>();
+ else if (type_name == "date")
+ {
+ if (type_support.isSet(MySQLDataTypesSupport::DATE2DATE32))
+ res = std::make_shared<DataTypeDate32>();
+ else if (type_support.isSet(MySQLDataTypesSupport::DATE2STRING))
+ res = std::make_shared<DataTypeString>();
+ else
+ res = std::make_shared<DataTypeDate>();
+ }
+ else if (type_name == "binary")
+ {
+ //compatible with binary(0) DataType
+ if (length == 0) length = 1;
+ res = std::make_shared<DataTypeFixedString>(length);
+ }
+ else if (type_name == "datetime" || type_name == "timestamp")
+ {
+ if (!type_support.isSet(MySQLDataTypesSupport::DATETIME64))
+ {
+ res = std::make_shared<DataTypeDateTime>();
+ }
+ else if (type_name == "timestamp" && scale == 0)
+ {
+ res = std::make_shared<DataTypeDateTime>();
+ }
+ else if (type_name == "datetime" || type_name == "timestamp")
+ {
+ res = std::make_shared<DataTypeDateTime64>(scale);
+ }
+ }
+ else if (type_name == "bit")
+ {
+ res = std::make_shared<DataTypeUInt64>();
+ }
+ else if (type_support.isSet(MySQLDataTypesSupport::DECIMAL) && (type_name == "numeric" || type_name == "decimal"))
+ {
+ if (precision <= DecimalUtils::max_precision<Decimal32>)
+ res = std::make_shared<DataTypeDecimal<Decimal32>>(precision, scale);
+ else if (precision <= DecimalUtils::max_precision<Decimal64>)
+ res = std::make_shared<DataTypeDecimal<Decimal64>>(precision, scale);
+ else if (precision <= DecimalUtils::max_precision<Decimal128>)
+ res = std::make_shared<DataTypeDecimal<Decimal128>>(precision, scale);
+ else if (precision <= DecimalUtils::max_precision<Decimal256>)
+ res = std::make_shared<DataTypeDecimal<Decimal256>>(precision, scale);
+ }
+
+ /// Also String is fallback for all unknown types.
+ if (!res)
+ res = std::make_shared<DataTypeString>();
+
+ if (is_nullable)
+ res = std::make_shared<DataTypeNullable>(res);
+
+ return res;
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/convertMySQLDataType.h b/contrib/clickhouse/src/DataTypes/convertMySQLDataType.h
new file mode 100644
index 00000000000..543119bc60e
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/convertMySQLDataType.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include <string>
+#include <Core/MultiEnum.h>
+#include <Parsers/IAST.h>
+#include "IDataType.h"
+
+namespace DB
+{
+enum class MySQLDataTypesSupport;
+
+/// Convert MySQL type to ClickHouse data type.
+DataTypePtr convertMySQLDataType(MultiEnum<MySQLDataTypesSupport> type_support, const std::string & mysql_data_type, bool is_nullable, bool is_unsigned, size_t length, size_t precision, size_t scale);
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/getLeastSupertype.cpp b/contrib/clickhouse/src/DataTypes/getLeastSupertype.cpp
new file mode 100644
index 00000000000..9d42d82ce91
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/getLeastSupertype.cpp
@@ -0,0 +1,668 @@
+#include <unordered_set>
+
+#include <IO/WriteBufferFromString.h>
+#include <IO/Operators.h>
+#include <Common/typeid_cast.h>
+
+#include <DataTypes/getLeastSupertype.h>
+
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeMap.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeNothing.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeDateTime64.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include <DataTypes/DataTypeFactory.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int NO_COMMON_TYPE;
+}
+
+namespace
+{
+
+String typeToString(const DataTypePtr & type) { return type->getName(); }
+String typeToString(const TypeIndex & type) { return String(magic_enum::enum_name(type)); }
+
+template <typename DataTypes>
+String getExceptionMessagePrefix(const DataTypes & types)
+{
+ WriteBufferFromOwnString res;
+
+ bool first = true;
+ for (const auto & type : types)
+ {
+ if (!first)
+ res << ", ";
+ first = false;
+
+ res << typeToString(type);
+ }
+
+ return res.str();
+}
+
+template <LeastSupertypeOnError on_error, typename DataTypes>
+DataTypePtr throwOrReturn(const DataTypes & types, std::string_view message_suffix, int error_code)
+{
+ if constexpr (on_error == LeastSupertypeOnError::String)
+ return std::make_shared<DataTypeString>();
+
+ if constexpr (on_error == LeastSupertypeOnError::Null)
+ return nullptr;
+
+ if (message_suffix.empty())
+ throw Exception(error_code, "There is no supertype for types {}", getExceptionMessagePrefix(types));
+
+ throw Exception(error_code, "There is no supertype for types {} {}", getExceptionMessagePrefix(types), message_suffix);
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr getNumericType(const TypeIndexSet & types)
+{
+ bool all_numbers = true;
+
+ size_t max_bits_of_signed_integer = 0;
+ size_t max_bits_of_unsigned_integer = 0;
+ size_t max_mantissa_bits_of_floating = 0;
+
+ auto maximize = [](size_t & what, size_t value)
+ {
+ if (value > what)
+ what = value;
+ };
+
+ for (const auto & type : types)
+ {
+ if (type == TypeIndex::UInt8)
+ maximize(max_bits_of_unsigned_integer, 8);
+ else if (type == TypeIndex::UInt16)
+ maximize(max_bits_of_unsigned_integer, 16);
+ else if (type == TypeIndex::UInt32 || type == TypeIndex::IPv4)
+ maximize(max_bits_of_unsigned_integer, 32);
+ else if (type == TypeIndex::UInt64)
+ maximize(max_bits_of_unsigned_integer, 64);
+ else if (type == TypeIndex::UInt128)
+ maximize(max_bits_of_unsigned_integer, 128);
+ else if (type == TypeIndex::UInt256)
+ maximize(max_bits_of_unsigned_integer, 256);
+ else if (type == TypeIndex::Int8 || type == TypeIndex::Enum8)
+ maximize(max_bits_of_signed_integer, 8);
+ else if (type == TypeIndex::Int16 || type == TypeIndex::Enum16)
+ maximize(max_bits_of_signed_integer, 16);
+ else if (type == TypeIndex::Int32)
+ maximize(max_bits_of_signed_integer, 32);
+ else if (type == TypeIndex::Int64)
+ maximize(max_bits_of_signed_integer, 64);
+ else if (type == TypeIndex::Int128)
+ maximize(max_bits_of_signed_integer, 128);
+ else if (type == TypeIndex::Int256)
+ maximize(max_bits_of_signed_integer, 256);
+ else if (type == TypeIndex::Float32)
+ maximize(max_mantissa_bits_of_floating, 24);
+ else if (type == TypeIndex::Float64)
+ maximize(max_mantissa_bits_of_floating, 53);
+ else if (type != TypeIndex::Nothing)
+ all_numbers = false;
+ }
+
+ if (max_bits_of_signed_integer || max_bits_of_unsigned_integer || max_mantissa_bits_of_floating)
+ {
+ if (!all_numbers)
+ return throwOrReturn<on_error>(types, "because some of them are numbers and some of them are not", ErrorCodes::NO_COMMON_TYPE);
+
+ /// If there are signed and unsigned types of same bit-width, the result must be signed number with at least one more bit.
+ /// Example, common of Int32, UInt32 = Int64.
+
+ size_t min_bit_width_of_integer = std::max(max_bits_of_signed_integer, max_bits_of_unsigned_integer);
+
+ /// If unsigned is not covered by signed.
+ if (max_bits_of_signed_integer && max_bits_of_unsigned_integer >= max_bits_of_signed_integer)
+ {
+ // Because 128 and 256 bit integers are significantly slower, we should not promote to them.
+ // But if we already have wide numbers, promotion is necessary.
+ if (min_bit_width_of_integer != 64)
+ ++min_bit_width_of_integer;
+ else
+ return throwOrReturn<on_error>(types,
+ "because some of them are signed integers and some are unsigned integers,"
+ " but there is no signed integer type, that can exactly represent all required unsigned integer values",
+ ErrorCodes::NO_COMMON_TYPE);
+ }
+
+ /// If the result must be floating.
+ if (max_mantissa_bits_of_floating)
+ {
+ size_t min_mantissa_bits = std::max(min_bit_width_of_integer, max_mantissa_bits_of_floating);
+ if (min_mantissa_bits <= 24)
+ return std::make_shared<DataTypeFloat32>();
+ else if (min_mantissa_bits <= 53)
+ return std::make_shared<DataTypeFloat64>();
+ else
+ return throwOrReturn<on_error>(types,
+ " because some of them are integers and some are floating point,"
+ " but there is no floating point type, that can exactly represent all required integers", ErrorCodes::NO_COMMON_TYPE);
+ }
+
+ /// If the result must be signed integer.
+ if (max_bits_of_signed_integer)
+ {
+ if (min_bit_width_of_integer <= 8)
+ return std::make_shared<DataTypeInt8>();
+ else if (min_bit_width_of_integer <= 16)
+ return std::make_shared<DataTypeInt16>();
+ else if (min_bit_width_of_integer <= 32)
+ return std::make_shared<DataTypeInt32>();
+ else if (min_bit_width_of_integer <= 64)
+ return std::make_shared<DataTypeInt64>();
+ else if (min_bit_width_of_integer <= 128)
+ return std::make_shared<DataTypeInt128>();
+ else if (min_bit_width_of_integer <= 256)
+ return std::make_shared<DataTypeInt256>();
+ else
+ return throwOrReturn<on_error>(types,
+ " because some of them are signed integers and some are unsigned integers,"
+ " but there is no signed integer type, that can exactly represent all required unsigned integer values", ErrorCodes::NO_COMMON_TYPE);
+ }
+
+ /// All unsigned.
+ {
+ if (min_bit_width_of_integer <= 8)
+ return std::make_shared<DataTypeUInt8>();
+ else if (min_bit_width_of_integer <= 16)
+ return std::make_shared<DataTypeUInt16>();
+ else if (min_bit_width_of_integer <= 32)
+ return std::make_shared<DataTypeUInt32>();
+ else if (min_bit_width_of_integer <= 64)
+ return std::make_shared<DataTypeUInt64>();
+ else if (min_bit_width_of_integer <= 128)
+ return std::make_shared<DataTypeUInt128>();
+ else if (min_bit_width_of_integer <= 256)
+ return std::make_shared<DataTypeUInt256>();
+ else
+ return throwOrReturn<on_error>(types,
+ " but as all data types are unsigned integers, we must have found maximum unsigned integer type", ErrorCodes::NO_COMMON_TYPE);
+ }
+ }
+
+ return {};
+}
+
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr getLeastSupertype(const DataTypes & types)
+{
+ /// Trivial cases
+
+ if (types.empty())
+ return std::make_shared<DataTypeNothing>();
+
+ if (types.size() == 1)
+ return types[0];
+
+ /// All types are equal
+ {
+ bool all_equal = true;
+ for (size_t i = 1, size = types.size(); i < size; ++i)
+ {
+ if (!types[i]->equals(*types[0]))
+ {
+ all_equal = false;
+ break;
+ }
+ }
+
+ if (all_equal)
+ return types[0];
+ }
+
+ /// Recursive rules
+
+ /// If there are Nothing types, skip them
+ {
+ DataTypes non_nothing_types;
+ non_nothing_types.reserve(types.size());
+
+ for (const auto & type : types)
+ if (!typeid_cast<const DataTypeNothing *>(type.get()))
+ non_nothing_types.emplace_back(type);
+
+ if (non_nothing_types.size() < types.size())
+ return getLeastSupertype<on_error>(non_nothing_types);
+ }
+
+ /// For Arrays
+ {
+ bool have_array = false;
+ bool all_arrays = true;
+
+ DataTypes nested_types;
+ nested_types.reserve(types.size());
+
+ for (const auto & type : types)
+ {
+ if (const DataTypeArray * type_array = typeid_cast<const DataTypeArray *>(type.get()))
+ {
+ have_array = true;
+ nested_types.emplace_back(type_array->getNestedType());
+ }
+ else
+ all_arrays = false;
+ }
+
+ if (have_array)
+ {
+ if (!all_arrays)
+ return throwOrReturn<on_error>(types, "because some of them are Array and some of them are not", ErrorCodes::NO_COMMON_TYPE);
+
+ auto nested_type = getLeastSupertype<on_error>(nested_types);
+ /// When on_error == LeastSupertypeOnError::Null and we cannot get least supertype,
+ /// nested_type will be nullptr, we should return nullptr in this case.
+ if (!nested_type)
+ return nullptr;
+
+ return std::make_shared<DataTypeArray>(nested_type);
+ }
+ }
+
+ /// For tuples
+ {
+ bool have_tuple = false;
+ bool all_tuples = true;
+ size_t tuple_size = 0;
+
+ std::vector<DataTypes> nested_types;
+
+ for (const auto & type : types)
+ {
+ if (const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(type.get()))
+ {
+ if (!have_tuple)
+ {
+ tuple_size = type_tuple->getElements().size();
+ nested_types.resize(tuple_size);
+ for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
+ nested_types[elem_idx].reserve(types.size());
+ }
+ else if (tuple_size != type_tuple->getElements().size())
+ return throwOrReturn<on_error>(types, "because Tuples have different sizes", ErrorCodes::NO_COMMON_TYPE);
+
+ have_tuple = true;
+
+ for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
+ nested_types[elem_idx].emplace_back(type_tuple->getElements()[elem_idx]);
+ }
+ else
+ all_tuples = false;
+ }
+
+ if (have_tuple)
+ {
+ if (!all_tuples)
+ return throwOrReturn<on_error>(types, "because some of them are Tuple and some of them are not", ErrorCodes::NO_COMMON_TYPE);
+
+ DataTypes common_tuple_types(tuple_size);
+ for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
+ {
+ auto common_type = getLeastSupertype<on_error>(nested_types[elem_idx]);
+ /// When on_error == LeastSupertypeOnError::Null and we cannot get least supertype,
+ /// common_type will be nullptr, we should return nullptr in this case.
+ if (!common_type)
+ return nullptr;
+ common_tuple_types[elem_idx] = common_type;
+ }
+
+ return std::make_shared<DataTypeTuple>(common_tuple_types);
+ }
+ }
+
+ /// For maps
+ {
+ bool have_maps = false;
+ bool all_maps = true;
+ DataTypes key_types;
+ DataTypes value_types;
+ key_types.reserve(types.size());
+ value_types.reserve(types.size());
+
+ for (const auto & type : types)
+ {
+ if (const DataTypeMap * type_map = typeid_cast<const DataTypeMap *>(type.get()))
+ {
+ have_maps = true;
+ key_types.emplace_back(type_map->getKeyType());
+ value_types.emplace_back(type_map->getValueType());
+ }
+ else
+ all_maps = false;
+ }
+
+ if (have_maps)
+ {
+ if (!all_maps)
+ return throwOrReturn<on_error>(types, "because some of them are Maps and some of them are not", ErrorCodes::NO_COMMON_TYPE);
+
+ auto keys_common_type = getLeastSupertype<on_error>(key_types);
+ auto values_common_type = getLeastSupertype<on_error>(value_types);
+ /// When on_error == LeastSupertypeOnError::Null and we cannot get least supertype for keys or values,
+ /// keys_common_type or values_common_type will be nullptr, we should return nullptr in this case.
+ if (!keys_common_type || !values_common_type)
+ return nullptr;
+
+ return std::make_shared<DataTypeMap>(keys_common_type, values_common_type);
+ }
+ }
+
+ /// For LowCardinality. This is above Nullable, because LowCardinality can contain Nullable but cannot be inside Nullable.
+ {
+ bool have_low_cardinality = false;
+ bool have_not_low_cardinality = false;
+
+ DataTypes nested_types;
+ nested_types.reserve(types.size());
+
+ for (const auto & type : types)
+ {
+ if (const DataTypeLowCardinality * type_low_cardinality = typeid_cast<const DataTypeLowCardinality *>(type.get()))
+ {
+ have_low_cardinality = true;
+ nested_types.emplace_back(type_low_cardinality->getDictionaryType());
+ }
+ else
+ {
+ have_not_low_cardinality = true;
+ nested_types.emplace_back(type);
+ }
+ }
+
+ /// All LowCardinality gives LowCardinality.
+ /// LowCardinality with high cardinality gives high cardinality.
+ if (have_low_cardinality)
+ {
+ if (have_not_low_cardinality)
+ return getLeastSupertype<on_error>(nested_types);
+ else
+ {
+ auto nested_type = getLeastSupertype<on_error>(nested_types);
+ /// When on_error == LeastSupertypeOnError::Null and we cannot get least supertype,
+ /// nested_type will be nullptr, we should return nullptr in this case.
+ if (!nested_type)
+ return nullptr;
+ return std::make_shared<DataTypeLowCardinality>(nested_type);
+ }
+ }
+ }
+
+ /// For Nullable
+ {
+ bool have_nullable = false;
+
+ DataTypes nested_types;
+ nested_types.reserve(types.size());
+
+ for (const auto & type : types)
+ {
+ if (const DataTypeNullable * type_nullable = typeid_cast<const DataTypeNullable *>(type.get()))
+ {
+ have_nullable = true;
+
+ if (!type_nullable->onlyNull())
+ nested_types.emplace_back(type_nullable->getNestedType());
+ }
+ else
+ nested_types.emplace_back(type);
+ }
+
+ if (have_nullable)
+ {
+ auto nested_type = getLeastSupertype<on_error>(nested_types);
+ /// When on_error == LeastSupertypeOnError::Null and we cannot get least supertype,
+ /// nested_type will be nullptr, we should return nullptr in this case.
+ if (!nested_type)
+ return nullptr;
+ return std::make_shared<DataTypeNullable>(nested_type);
+ }
+ }
+
+ /// Non-recursive rules
+
+ TypeIndexSet type_ids;
+ for (const auto & type : types)
+ type_ids.insert(type->getTypeId());
+
+ /// For String and FixedString, or for different FixedStrings, the common type is String.
+ /// No other types are compatible with Strings. TODO Enums?
+ {
+ size_t have_string = type_ids.count(TypeIndex::String);
+ size_t have_fixed_string = type_ids.count(TypeIndex::FixedString);
+
+ if (have_string || have_fixed_string)
+ {
+ bool all_strings = type_ids.size() == (have_string + have_fixed_string);
+ if (!all_strings)
+ return throwOrReturn<on_error>(types, "because some of them are String/FixedString and some of them are not", ErrorCodes::NO_COMMON_TYPE);
+
+ return std::make_shared<DataTypeString>();
+ }
+ }
+
+ /// For Date and DateTime/DateTime64, the common type is DateTime/DateTime64. No other types are compatible.
+ {
+ size_t have_date = type_ids.count(TypeIndex::Date);
+ size_t have_date32 = type_ids.count(TypeIndex::Date32);
+ size_t have_datetime = type_ids.count(TypeIndex::DateTime);
+ size_t have_datetime64 = type_ids.count(TypeIndex::DateTime64);
+
+ if (have_date || have_date32 || have_datetime || have_datetime64)
+ {
+ bool all_date_or_datetime = type_ids.size() == (have_date + have_date32 + have_datetime + have_datetime64);
+ if (!all_date_or_datetime)
+ return throwOrReturn<on_error>(types,
+ "because some of them are Date/Date32/DateTime/DateTime64 and some of them are not",
+ ErrorCodes::NO_COMMON_TYPE);
+
+ if (have_datetime64 == 0 && have_date32 == 0)
+ {
+ for (const auto & type : types)
+ {
+ if (isDateTime(type))
+ return type;
+ }
+
+ return std::make_shared<DataTypeDateTime>();
+ }
+
+ /// For Date and Date32, the common type is Date32
+ if (have_datetime == 0 && have_datetime64 == 0)
+ {
+ for (const auto & type : types)
+ {
+ if (isDate32(type))
+ return type;
+ }
+ }
+
+ /// For Datetime and Date32, the common type is Datetime64
+ if (have_datetime == 1 && have_date32 == 1 && have_datetime64 == 0)
+ {
+ return std::make_shared<DataTypeDateTime64>(0);
+ }
+
+ UInt8 max_scale = 0;
+ size_t max_scale_date_time_index = 0;
+
+ for (size_t i = 0; i < types.size(); ++i)
+ {
+ const auto & type = types[i];
+
+ if (const auto * date_time64_type = typeid_cast<const DataTypeDateTime64 *>(type.get()))
+ {
+ const auto scale = date_time64_type->getScale();
+ if (scale >= max_scale)
+ {
+ max_scale_date_time_index = i;
+ max_scale = scale;
+ }
+ }
+ }
+
+ return types[max_scale_date_time_index];
+ }
+ }
+
+ /// Decimals
+ {
+ size_t have_decimal32 = type_ids.count(TypeIndex::Decimal32);
+ size_t have_decimal64 = type_ids.count(TypeIndex::Decimal64);
+ size_t have_decimal128 = type_ids.count(TypeIndex::Decimal128);
+ size_t have_decimal256 = type_ids.count(TypeIndex::Decimal256);
+
+ if (have_decimal32 || have_decimal64 || have_decimal128 || have_decimal256)
+ {
+ size_t num_supported = have_decimal32 + have_decimal64 + have_decimal128 + have_decimal256;
+
+ std::array<TypeIndex, 8> int_ids = {TypeIndex::Int8, TypeIndex::UInt8, TypeIndex::Int16, TypeIndex::UInt16,
+ TypeIndex::Int32, TypeIndex::UInt32, TypeIndex::Int64, TypeIndex::UInt64};
+
+ TypeIndex max_int = TypeIndex::Nothing;
+ for (auto int_id : int_ids)
+ {
+ size_t num = type_ids.count(int_id);
+ num_supported += num;
+ if (num)
+ max_int = int_id;
+ }
+
+ if (num_supported != type_ids.size())
+ return throwOrReturn<on_error>(types, "because some of them have no lossless conversion to Decimal", ErrorCodes::NO_COMMON_TYPE);
+
+ UInt32 max_scale = 0;
+ for (const auto & type : types)
+ {
+ auto type_id = type->getTypeId();
+ if (type_id != TypeIndex::Decimal32
+ && type_id != TypeIndex::Decimal64
+ && type_id != TypeIndex::Decimal128
+ && type_id != TypeIndex::Decimal256)
+ {
+ continue;
+ }
+
+ UInt32 scale = getDecimalScale(*type);
+ if (scale > max_scale)
+ max_scale = scale;
+ }
+
+ UInt32 min_precision = max_scale + leastDecimalPrecisionFor(max_int);
+
+ /// special cases Int32 -> Dec32, Int64 -> Dec64
+ if (max_scale == 0)
+ {
+ if (max_int == TypeIndex::Int32)
+ min_precision = DataTypeDecimal<Decimal32>::maxPrecision();
+ else if (max_int == TypeIndex::Int64)
+ min_precision = DataTypeDecimal<Decimal64>::maxPrecision();
+ }
+
+ if (min_precision > DataTypeDecimal<Decimal256>::maxPrecision())
+ return throwOrReturn<on_error>(types, "because the least supertype is Decimal("
+ + toString(min_precision) + ',' + toString(max_scale) + ')',
+ ErrorCodes::NO_COMMON_TYPE);
+
+ if (have_decimal256 || min_precision > DataTypeDecimal<Decimal128>::maxPrecision())
+ return std::make_shared<DataTypeDecimal<Decimal256>>(DataTypeDecimal<Decimal256>::maxPrecision(), max_scale);
+ if (have_decimal128 || min_precision > DataTypeDecimal<Decimal64>::maxPrecision())
+ return std::make_shared<DataTypeDecimal<Decimal128>>(DataTypeDecimal<Decimal128>::maxPrecision(), max_scale);
+ if (have_decimal64 || min_precision > DataTypeDecimal<Decimal32>::maxPrecision())
+ return std::make_shared<DataTypeDecimal<Decimal64>>(DataTypeDecimal<Decimal64>::maxPrecision(), max_scale);
+ return std::make_shared<DataTypeDecimal<Decimal32>>(DataTypeDecimal<Decimal32>::maxPrecision(), max_scale);
+ }
+ }
+
+ /// For numeric types, the most complicated part.
+ {
+ auto numeric_type = getNumericType<on_error>(type_ids);
+ if (numeric_type)
+ return numeric_type;
+ }
+
+ /// All other data types (UUID, AggregateFunction, Enum...) are compatible only if they are the same (checked in trivial cases).
+ return throwOrReturn<on_error>(types, "", ErrorCodes::NO_COMMON_TYPE);
+}
+
+DataTypePtr getLeastSupertypeOrString(const DataTypes & types)
+{
+ return getLeastSupertype<LeastSupertypeOnError::String>(types);
+}
+
+DataTypePtr tryGetLeastSupertype(const DataTypes & types)
+{
+ return getLeastSupertype<LeastSupertypeOnError::Null>(types);
+}
+
+template <LeastSupertypeOnError on_error>
+DataTypePtr getLeastSupertype(const TypeIndexSet & types)
+{
+ if (types.empty())
+ return std::make_shared<DataTypeNothing>();
+
+ if (types.size() == 1)
+ {
+ WhichDataType which(*types.begin());
+ if (which.isNothing())
+ return std::make_shared<DataTypeNothing>();
+
+ #define DISPATCH(TYPE) \
+ if (which.idx == TypeIndex::TYPE) \
+ return std::make_shared<DataTypeNumber<TYPE>>(); /// NOLINT
+
+ FOR_NUMERIC_TYPES(DISPATCH)
+ #undef DISPATCH
+
+ if (which.isString())
+ return std::make_shared<DataTypeString>();
+
+ return throwOrReturn<on_error>(types, "because cannot get common type by type indexes with non-simple types", ErrorCodes::NO_COMMON_TYPE);
+ }
+
+ if (types.contains(TypeIndex::String))
+ {
+ bool only_string = types.size() == 2 && types.contains(TypeIndex::Nothing);
+ if (!only_string)
+ return throwOrReturn<on_error>(types, "because some of them are String and some of them are not", ErrorCodes::NO_COMMON_TYPE);
+
+ return std::make_shared<DataTypeString>();
+ }
+
+ auto numeric_type = getNumericType<on_error>(types);
+ if (numeric_type)
+ return numeric_type;
+
+ return throwOrReturn<on_error>(types, "", ErrorCodes::NO_COMMON_TYPE);
+}
+
+DataTypePtr getLeastSupertypeOrString(const TypeIndexSet & types)
+{
+ return getLeastSupertype<LeastSupertypeOnError::String>(types);
+}
+
+DataTypePtr tryGetLeastSupertype(const TypeIndexSet & types)
+{
+ return getLeastSupertype<LeastSupertypeOnError::Null>(types);
+}
+
+template DataTypePtr getLeastSupertype<LeastSupertypeOnError::Throw>(const DataTypes & types);
+template DataTypePtr getLeastSupertype<LeastSupertypeOnError::Throw>(const TypeIndexSet & types);
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/getLeastSupertype.h b/contrib/clickhouse/src/DataTypes/getLeastSupertype.h
new file mode 100644
index 00000000000..2ef4a0e6850
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/getLeastSupertype.h
@@ -0,0 +1,39 @@
+#pragma once
+#include <DataTypes/IDataType.h>
+
+namespace DB
+{
+
+enum class LeastSupertypeOnError
+{
+ Throw,
+ String,
+ Null,
+};
+
+/** Get data type that covers all possible values of passed data types.
+ * If there is no such data type, throws an exception.
+ *
+ * Examples: least common supertype for UInt8, Int8 - Int16.
+ * Examples: there is no least common supertype for Array(UInt8), Int8.
+ */
+template <LeastSupertypeOnError on_error = LeastSupertypeOnError::Throw>
+DataTypePtr getLeastSupertype(const DataTypes & types);
+
+/// Same as above but return String type instead of throwing exception.
+/// All types can be casted to String, because they can be serialized to String.
+DataTypePtr getLeastSupertypeOrString(const DataTypes & types);
+
+/// Same as above but return nullptr instead of throwing exception.
+DataTypePtr tryGetLeastSupertype(const DataTypes & types);
+
+using TypeIndexSet = std::unordered_set<TypeIndex>;
+
+template <LeastSupertypeOnError on_error = LeastSupertypeOnError::Throw>
+DataTypePtr getLeastSupertype(const TypeIndexSet & types);
+
+DataTypePtr getLeastSupertypeOrString(const TypeIndexSet & types);
+
+DataTypePtr tryGetLeastSupertype(const TypeIndexSet & types);
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/getMostSubtype.cpp b/contrib/clickhouse/src/DataTypes/getMostSubtype.cpp
new file mode 100644
index 00000000000..33b5735456e
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/getMostSubtype.cpp
@@ -0,0 +1,398 @@
+#include <IO/WriteBufferFromString.h>
+#include <IO/Operators.h>
+#include <Common/typeid_cast.h>
+
+#include <DataTypes/getMostSubtype.h>
+
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeNothing.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeDate.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypesDecimal.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+extern const int NO_COMMON_TYPE;
+}
+
+namespace
+{
+String getExceptionMessagePrefix(const DataTypes & types)
+{
+ WriteBufferFromOwnString res;
+ res << "There is no subtype for types ";
+
+ bool first = true;
+ for (const auto & type : types)
+ {
+ if (!first)
+ res << ", ";
+ first = false;
+
+ res << type->getName();
+ }
+
+ return res.str();
+}
+
+}
+
+
+DataTypePtr getMostSubtype(const DataTypes & types, bool throw_if_result_is_nothing, bool force_support_conversion)
+{
+ auto get_nothing_or_throw = [throw_if_result_is_nothing, & types](const std::string & reason)
+ {
+ if (throw_if_result_is_nothing)
+ throw Exception::createDeprecated(getExceptionMessagePrefix(types) + reason, ErrorCodes::NO_COMMON_TYPE);
+ return std::make_shared<DataTypeNothing>();
+ };
+
+ /// Trivial cases
+
+ if (types.empty())
+ {
+ if (throw_if_result_is_nothing)
+ throw Exception(ErrorCodes::NO_COMMON_TYPE, "There is no common type for empty type list");
+ return std::make_shared<DataTypeNothing>();
+ }
+
+ if (types.size() == 1)
+ {
+ if (throw_if_result_is_nothing && typeid_cast<const DataTypeNothing *>(types[0].get()))
+ throw Exception(ErrorCodes::NO_COMMON_TYPE, "There is no common type for type Nothing");
+ return types[0];
+ }
+
+ /// All types are equal
+ {
+ bool all_equal = true;
+ for (size_t i = 1, size = types.size(); i < size; ++i)
+ {
+ if (!types[i]->equals(*types[0]))
+ {
+ all_equal = false;
+ break;
+ }
+ }
+
+ if (all_equal)
+ return types[0];
+ }
+
+ /// Recursive rules
+
+ /// If there are Nothing types, result is Nothing
+ {
+ for (const auto & type : types)
+ if (typeid_cast<const DataTypeNothing *>(type.get()))
+ return get_nothing_or_throw(" because some of them are Nothing");
+ }
+
+ /// For Arrays
+ {
+ bool have_array = false;
+ bool all_arrays = true;
+
+ DataTypes nested_types;
+ nested_types.reserve(types.size());
+
+ for (const auto & type : types)
+ {
+ if (const auto * type_array = typeid_cast<const DataTypeArray *>(type.get()))
+ {
+ have_array = true;
+ nested_types.emplace_back(type_array->getNestedType());
+ }
+ else
+ all_arrays = false;
+ }
+
+ if (have_array)
+ {
+ if (!all_arrays)
+ return get_nothing_or_throw(" because some of them are Array and some of them are not");
+
+ return std::make_shared<DataTypeArray>(getMostSubtype(nested_types, false, force_support_conversion));
+ }
+ }
+
+ /// For tuples
+ {
+ bool have_tuple = false;
+ bool all_tuples = true;
+ size_t tuple_size = 0;
+
+ std::vector<DataTypes> nested_types;
+
+ for (const auto & type : types)
+ {
+ if (const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type.get()))
+ {
+ if (!have_tuple)
+ {
+ tuple_size = type_tuple->getElements().size();
+ nested_types.resize(tuple_size);
+ for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
+ nested_types[elem_idx].reserve(types.size());
+ }
+ else if (tuple_size != type_tuple->getElements().size())
+ return get_nothing_or_throw(" because Tuples have different sizes");
+
+ have_tuple = true;
+
+ for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
+ nested_types[elem_idx].emplace_back(type_tuple->getElements()[elem_idx]);
+ }
+ else
+ all_tuples = false;
+ }
+
+ if (have_tuple)
+ {
+ if (!all_tuples)
+ return get_nothing_or_throw(" because some of them are Tuple and some of them are not");
+
+ DataTypes common_tuple_types(tuple_size);
+ for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
+ common_tuple_types[elem_idx] =
+ getMostSubtype(nested_types[elem_idx], throw_if_result_is_nothing, force_support_conversion);
+
+ return std::make_shared<DataTypeTuple>(common_tuple_types);
+ }
+ }
+
+ /// For Nullable
+ {
+ bool all_nullable = true;
+ bool have_nullable = false;
+
+ DataTypes nested_types;
+ nested_types.reserve(types.size());
+
+ for (const auto & type : types)
+ {
+ if (const auto * type_nullable = typeid_cast<const DataTypeNullable *>(type.get()))
+ {
+ have_nullable = true;
+ nested_types.emplace_back(type_nullable->getNestedType());
+ }
+ else
+ {
+ all_nullable = false;
+ nested_types.emplace_back(type);
+ }
+ }
+
+ if (have_nullable)
+ {
+ if (all_nullable || force_support_conversion)
+ return std::make_shared<DataTypeNullable>(getMostSubtype(nested_types, false, force_support_conversion));
+
+ return getMostSubtype(nested_types, throw_if_result_is_nothing, force_support_conversion);
+ }
+ }
+
+ /// Non-recursive rules
+
+ /// For String and FixedString, the common type is FixedString.
+ /// For different FixedStrings, the common type is Nothing.
+ /// No other types are compatible with Strings. TODO Enums?
+ {
+ bool have_string = false;
+ bool all_strings = true;
+
+ DataTypePtr fixed_string_type = nullptr;
+
+ for (const auto & type : types)
+ {
+ if (isFixedString(type))
+ {
+ have_string = true;
+ if (!fixed_string_type)
+ fixed_string_type = type;
+ else if (!type->equals(*fixed_string_type))
+ return get_nothing_or_throw(" because some of them are FixedStrings with different length");
+ }
+ else if (isString(type))
+ have_string = true;
+ else
+ all_strings = false;
+ }
+
+ if (have_string)
+ {
+ if (!all_strings)
+ return get_nothing_or_throw(" because some of them are String/FixedString and some of them are not");
+
+ return fixed_string_type ? fixed_string_type : std::make_shared<DataTypeString>();
+ }
+ }
+
+ /// For Date and DateTime, the common type is Date. No other types are compatible.
+ {
+ bool have_date_or_datetime = false;
+ bool all_date_or_datetime = true;
+
+ for (const auto & type : types)
+ {
+ if (isDate(type) || isDateTime(type) || isDateTime64(type))
+ have_date_or_datetime = true;
+ else
+ all_date_or_datetime = false;
+ }
+
+ if (have_date_or_datetime)
+ {
+ if (!all_date_or_datetime)
+ return get_nothing_or_throw(" because some of them are Date/DateTime and some of them are not");
+
+ return std::make_shared<DataTypeDate>();
+ }
+ }
+
+ /// For numeric types, the most complicated part.
+ {
+ bool all_numbers = true;
+
+ size_t min_bits_of_signed_integer = 0;
+ size_t min_bits_of_unsigned_integer = 0;
+ size_t min_mantissa_bits_of_floating = 0;
+
+ auto minimize = [](size_t & what, size_t value)
+ {
+ if (what == 0 || value < what)
+ what = value;
+ };
+
+ for (const auto & type : types)
+ {
+ if (typeid_cast<const DataTypeUInt8 *>(type.get()))
+ minimize(min_bits_of_unsigned_integer, 8);
+ else if (typeid_cast<const DataTypeUInt16 *>(type.get()))
+ minimize(min_bits_of_unsigned_integer, 16);
+ else if (typeid_cast<const DataTypeUInt32 *>(type.get()))
+ minimize(min_bits_of_unsigned_integer, 32);
+ else if (typeid_cast<const DataTypeUInt64 *>(type.get()))
+ minimize(min_bits_of_unsigned_integer, 64);
+ else if (typeid_cast<const DataTypeUInt128 *>(type.get()))
+ minimize(min_bits_of_unsigned_integer, 128);
+ else if (typeid_cast<const DataTypeUInt256 *>(type.get()))
+ minimize(min_bits_of_unsigned_integer, 256);
+ else if (typeid_cast<const DataTypeInt8 *>(type.get()))
+ minimize(min_bits_of_signed_integer, 8);
+ else if (typeid_cast<const DataTypeInt16 *>(type.get()))
+ minimize(min_bits_of_signed_integer, 16);
+ else if (typeid_cast<const DataTypeInt32 *>(type.get()))
+ minimize(min_bits_of_signed_integer, 32);
+ else if (typeid_cast<const DataTypeInt64 *>(type.get()))
+ minimize(min_bits_of_signed_integer, 64);
+ else if (typeid_cast<const DataTypeInt128 *>(type.get()))
+ minimize(min_bits_of_signed_integer, 128);
+ else if (typeid_cast<const DataTypeInt256 *>(type.get()))
+ minimize(min_bits_of_signed_integer, 256);
+ else if (typeid_cast<const DataTypeFloat32 *>(type.get()))
+ minimize(min_mantissa_bits_of_floating, 24);
+ else if (typeid_cast<const DataTypeFloat64 *>(type.get()))
+ minimize(min_mantissa_bits_of_floating, 53);
+ else
+ all_numbers = false;
+ }
+
+ if (min_bits_of_signed_integer || min_bits_of_unsigned_integer || min_mantissa_bits_of_floating)
+ {
+ if (!all_numbers)
+ return get_nothing_or_throw(" because some of them are numbers and some of them are not");
+
+ /// If the result must be floating.
+ if (!min_bits_of_signed_integer && !min_bits_of_unsigned_integer)
+ {
+ if (min_mantissa_bits_of_floating <= 24)
+ return std::make_shared<DataTypeFloat32>();
+ else if (min_mantissa_bits_of_floating <= 53)
+ return std::make_shared<DataTypeFloat64>();
+ else
+ throw Exception(ErrorCodes::NO_COMMON_TYPE,
+ "Logical error: {} but as all data types are floats, "
+ "we must have found maximum float type", getExceptionMessagePrefix(types));
+ }
+
+ /// If there are signed and unsigned types of same bit-width, the result must be unsigned number.
+ if (min_bits_of_unsigned_integer &&
+ (min_bits_of_signed_integer == 0 || min_bits_of_unsigned_integer <= min_bits_of_signed_integer))
+ {
+ if (min_bits_of_unsigned_integer <= 8)
+ return std::make_shared<DataTypeUInt8>();
+ else if (min_bits_of_unsigned_integer <= 16)
+ return std::make_shared<DataTypeUInt16>();
+ else if (min_bits_of_unsigned_integer <= 32)
+ return std::make_shared<DataTypeUInt32>();
+ else if (min_bits_of_unsigned_integer <= 64)
+ return std::make_shared<DataTypeUInt64>();
+ else if (min_bits_of_unsigned_integer <= 128)
+ return std::make_shared<DataTypeUInt128>();
+ else if (min_bits_of_unsigned_integer <= 256)
+ return std::make_shared<DataTypeUInt256>();
+ else
+ throw Exception(ErrorCodes::NO_COMMON_TYPE,
+ "Logical error: {} but as all data types are integers, "
+ "we must have found maximum unsigned integer type",
+ getExceptionMessagePrefix(types));
+ }
+
+ /// All signed.
+ {
+ if (min_bits_of_signed_integer <= 8)
+ return std::make_shared<DataTypeInt8>();
+ else if (min_bits_of_signed_integer <= 16)
+ return std::make_shared<DataTypeInt16>();
+ else if (min_bits_of_signed_integer <= 32)
+ return std::make_shared<DataTypeInt32>();
+ else if (min_bits_of_signed_integer <= 64)
+ return std::make_shared<DataTypeInt64>();
+ else if (min_bits_of_signed_integer <= 128)
+ return std::make_shared<DataTypeInt128>();
+ else if (min_bits_of_signed_integer <= 256)
+ return std::make_shared<DataTypeInt256>();
+ else
+ throw Exception(ErrorCodes::NO_COMMON_TYPE,
+ "Logical error: {} but as all data types are integers, "
+ "we must have found maximum signed integer type", getExceptionMessagePrefix(types));
+ }
+ }
+ }
+
+ /// Decimals
+ {
+ bool all_decimals = true;
+ UInt32 min_scale = std::numeric_limits<UInt32>::max();
+ UInt32 min_precision = std::numeric_limits<UInt32>::max();
+ for (const auto & type : types)
+ {
+ if (isDecimal(type))
+ {
+ min_scale = std::min(min_scale, getDecimalScale(*type));
+ min_precision = std::min(min_precision, getDecimalPrecision(*type));
+ }
+ else
+ {
+ all_decimals = false;
+ break;
+ }
+ }
+
+ if (all_decimals)
+ return createDecimal<DataTypeDecimal>(min_precision, min_scale);
+ }
+
+ /// All other data types (UUID, AggregateFunction, Enum...) are compatible only if they are the same (checked in trivial cases).
+ return get_nothing_or_throw("");
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/getMostSubtype.h b/contrib/clickhouse/src/DataTypes/getMostSubtype.h
new file mode 100644
index 00000000000..c46cf4e2054
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/getMostSubtype.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+
+
+namespace DB
+{
+
+/** Get data type that covers intersection of all possible values of passed data types.
+ * DataTypeNothing is the most common subtype for all types.
+ * Examples: most common subtype for UInt16, UInt8 and Int8 - UInt16.
+ * Examples: most common subtype for Array(UInt8), Int8 is Nothing
+ *
+ * If force_support_conversion is true, returns type which may be used to convert each argument to.
+ * Example: most common subtype for Array(UInt8) and Array(Nullable(Int32)) is Array(Nullable(UInt8) if force_support_conversion is true.
+ */
+DataTypePtr getMostSubtype(const DataTypes & types, bool throw_if_result_is_nothing = false, bool force_support_conversion = false);
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/hasNullable.cpp b/contrib/clickhouse/src/DataTypes/hasNullable.cpp
new file mode 100644
index 00000000000..908b9880473
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/hasNullable.cpp
@@ -0,0 +1,33 @@
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeMap.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/hasNullable.h>
+
+namespace DB
+{
+
+bool hasNullable(const DataTypePtr & type)
+{
+ if (isNullableOrLowCardinalityNullable(type))
+ return true;
+
+ if (const DataTypeArray * type_array = typeid_cast<const DataTypeArray *>(type.get()))
+ return hasNullable(type_array->getNestedType());
+ else if (const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(type.get()))
+ {
+ for (const auto & subtype : type_tuple->getElements())
+ {
+ if (hasNullable(subtype))
+ return true;
+ }
+ return false;
+ }
+ else if (const DataTypeMap * type_map = typeid_cast<const DataTypeMap *>(type.get()))
+ {
+ // Key type cannot be nullable. We only check value type.
+ return hasNullable(type_map->getValueType());
+ }
+ return false;
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/hasNullable.h b/contrib/clickhouse/src/DataTypes/hasNullable.h
new file mode 100644
index 00000000000..271803496f1
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/hasNullable.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+
+namespace DB
+{
+
+bool hasNullable(const DataTypePtr & type);
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/registerDataTypeDateTime.cpp b/contrib/clickhouse/src/DataTypes/registerDataTypeDateTime.cpp
new file mode 100644
index 00000000000..2b5c4a0a143
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/registerDataTypeDateTime.cpp
@@ -0,0 +1,118 @@
+
+#include <Core/Field.h>
+#include <Parsers/IAST.h>
+#include <Parsers/ASTLiteral.h>
+#include <DataTypes/IDataType.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeDateTime64.h>
+#include <DataTypes/DataTypeFactory.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+ extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+enum class ArgumentKind
+{
+ Optional,
+ Mandatory
+};
+
+PreformattedMessage getExceptionMessage(
+ const String & message, size_t argument_index, const char * argument_name,
+ const std::string & context_data_type_name, Field::Types::Which field_type)
+{
+ return PreformattedMessage::create("Parameter #{} '{}' for {}{}, expected {} literal",
+ argument_index, argument_name, context_data_type_name, message, field_type);
+}
+
+template <typename T, ArgumentKind Kind>
+std::conditional_t<Kind == ArgumentKind::Optional, std::optional<T>, T>
+getArgument(const ASTPtr & arguments, size_t argument_index, const char * argument_name [[maybe_unused]], const std::string context_data_type_name)
+{
+ using NearestResultType = NearestFieldType<T>;
+ const auto field_type = Field::TypeToEnum<NearestResultType>::value;
+ const ASTLiteral * argument = nullptr;
+
+ if (!arguments || arguments->children.size() <= argument_index
+ || !(argument = arguments->children[argument_index]->as<ASTLiteral>())
+ || argument->value.getType() != field_type)
+ {
+ if constexpr (Kind == ArgumentKind::Optional)
+ return {};
+ else
+ {
+ if (argument && argument->value.getType() != field_type)
+ throw Exception(getExceptionMessage(fmt::format(" has wrong type: {}", argument->value.getTypeName()),
+ argument_index, argument_name, context_data_type_name, field_type), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+ else
+ throw Exception(getExceptionMessage(" is missing", argument_index, argument_name, context_data_type_name, field_type),
+ ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+ }
+ }
+
+ return argument->value.get<NearestResultType>();
+}
+
+static DataTypePtr create(const ASTPtr & arguments)
+{
+ if (!arguments || arguments->children.empty())
+ return std::make_shared<DataTypeDateTime>();
+
+ const auto scale = getArgument<UInt64, ArgumentKind::Optional>(arguments, 0, "scale", "DateTime");
+ const auto timezone = getArgument<String, ArgumentKind::Optional>(arguments, scale ? 1 : 0, "timezone", "DateTime");
+
+ if (!scale && !timezone)
+ throw Exception(getExceptionMessage(" has wrong type: ", 0, "scale", "DateTime", Field::Types::Which::UInt64),
+ ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+ /// If scale is defined, the data type is DateTime when scale = 0 otherwise the data type is DateTime64
+ if (scale && scale.value() != 0)
+ return std::make_shared<DataTypeDateTime64>(scale.value(), timezone.value_or(String{}));
+
+ return std::make_shared<DataTypeDateTime>(timezone.value_or(String{}));
+}
+
+static DataTypePtr create32(const ASTPtr & arguments)
+{
+ if (!arguments || arguments->children.empty())
+ return std::make_shared<DataTypeDateTime>();
+
+ if (arguments->children.size() != 1)
+ throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+ "DateTime32 data type can optionally have only one argument - time zone name");
+
+ const auto timezone = getArgument<String, ArgumentKind::Mandatory>(arguments, 0, "timezone", "DateTime32");
+
+ return std::make_shared<DataTypeDateTime>(timezone);
+}
+
+static DataTypePtr create64(const ASTPtr & arguments)
+{
+ if (!arguments || arguments->children.empty())
+ return std::make_shared<DataTypeDateTime64>(DataTypeDateTime64::default_scale);
+
+ if (arguments->children.size() > 2)
+ throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+ "DateTime64 data type can optionally have two argument - scale and time zone name");
+
+ const auto scale = getArgument<UInt64, ArgumentKind::Mandatory>(arguments, 0, "scale", "DateTime64");
+ const auto timezone = getArgument<String, ArgumentKind::Optional>(arguments, 1, "timezone", "DateTime64");
+
+ return std::make_shared<DataTypeDateTime64>(scale, timezone.value_or(String{}));
+}
+
+void registerDataTypeDateTime(DataTypeFactory & factory)
+{
+ factory.registerDataType("DateTime", create, DataTypeFactory::CaseInsensitive);
+ factory.registerDataType("DateTime32", create32, DataTypeFactory::CaseInsensitive);
+ factory.registerDataType("DateTime64", create64, DataTypeFactory::CaseInsensitive);
+
+ factory.registerAlias("TIMESTAMP", "DateTime", DataTypeFactory::CaseInsensitive);
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/transformTypesRecursively.cpp b/contrib/clickhouse/src/DataTypes/transformTypesRecursively.cpp
new file mode 100644
index 00000000000..cdf221a6b72
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/transformTypesRecursively.cpp
@@ -0,0 +1,172 @@
+#include <DataTypes/transformTypesRecursively.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeMap.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeNullable.h>
+
+
+namespace DB
+{
+
+TypeIndexesSet getTypesIndexes(const DataTypes & types)
+{
+ TypeIndexesSet type_indexes;
+ for (const auto & type : types)
+ type_indexes.insert(type->getTypeId());
+ return type_indexes;
+}
+
+void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &, TypeIndexesSet &)> transform_simple_types, std::function<void(DataTypes &, TypeIndexesSet &)> transform_complex_types)
+{
+ TypeIndexesSet type_indexes = getTypesIndexes(types);
+
+ /// Nullable
+ if (type_indexes.contains(TypeIndex::Nullable))
+ {
+ std::vector<UInt8> is_nullable;
+ is_nullable.reserve(types.size());
+ DataTypes nested_types;
+ nested_types.reserve(types.size());
+ for (const auto & type : types)
+ {
+ if (const DataTypeNullable * type_nullable = typeid_cast<const DataTypeNullable *>(type.get()))
+ {
+ is_nullable.push_back(1);
+ nested_types.push_back(type_nullable->getNestedType());
+ }
+ else
+ {
+ is_nullable.push_back(0);
+ nested_types.push_back(type);
+ }
+ }
+
+ transformTypesRecursively(nested_types, transform_simple_types, transform_complex_types);
+ for (size_t i = 0; i != types.size(); ++i)
+ {
+ /// Type could be changed so it cannot be inside Nullable anymore.
+ if (is_nullable[i] && nested_types[i]->canBeInsideNullable())
+ types[i] = makeNullable(nested_types[i]);
+ else
+ types[i] = nested_types[i];
+ }
+
+ if (transform_complex_types)
+ {
+ /// Some types could be changed.
+ type_indexes = getTypesIndexes(types);
+ transform_complex_types(types, type_indexes);
+ }
+
+ return;
+ }
+
+ /// Arrays
+ if (type_indexes.contains(TypeIndex::Array))
+ {
+ /// All types are Array
+ if (type_indexes.size() == 1)
+ {
+ DataTypes nested_types;
+ for (const auto & type : types)
+ nested_types.push_back(typeid_cast<const DataTypeArray *>(type.get())->getNestedType());
+
+ transformTypesRecursively(nested_types, transform_simple_types, transform_complex_types);
+ for (size_t i = 0; i != types.size(); ++i)
+ types[i] = std::make_shared<DataTypeArray>(nested_types[i]);
+ }
+
+ if (transform_complex_types)
+ transform_complex_types(types, type_indexes);
+
+ return;
+ }
+
+ /// Tuples
+ if (type_indexes.contains(TypeIndex::Tuple))
+ {
+ /// All types are Tuple
+ if (type_indexes.size() == 1)
+ {
+ std::vector<DataTypes> nested_types;
+ const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(types[0].get());
+ size_t tuple_size = type_tuple->getElements().size();
+ nested_types.resize(tuple_size);
+ for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
+ nested_types[elem_idx].reserve(types.size());
+
+ bool sizes_are_equal = true;
+ for (const auto & type : types)
+ {
+ type_tuple = typeid_cast<const DataTypeTuple *>(type.get());
+ if (type_tuple->getElements().size() != tuple_size)
+ {
+ sizes_are_equal = false;
+ break;
+ }
+
+ for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
+ nested_types[elem_idx].emplace_back(type_tuple->getElements()[elem_idx]);
+ }
+
+ if (sizes_are_equal)
+ {
+ std::vector<DataTypes> transposed_nested_types(types.size());
+ for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
+ {
+ transformTypesRecursively(nested_types[elem_idx], transform_simple_types, transform_complex_types);
+ for (size_t i = 0; i != types.size(); ++i)
+ transposed_nested_types[i].push_back(nested_types[elem_idx][i]);
+ }
+
+ for (size_t i = 0; i != types.size(); ++i)
+ types[i] = std::make_shared<DataTypeTuple>(transposed_nested_types[i]);
+ }
+ }
+
+ if (transform_complex_types)
+ transform_complex_types(types, type_indexes);
+
+ return;
+ }
+
+ /// Maps
+ if (type_indexes.contains(TypeIndex::Map))
+ {
+ /// All types are Map
+ if (type_indexes.size() == 1)
+ {
+ DataTypes key_types;
+ DataTypes value_types;
+ key_types.reserve(types.size());
+ value_types.reserve(types.size());
+ for (const auto & type : types)
+ {
+ const DataTypeMap * type_map = typeid_cast<const DataTypeMap *>(type.get());
+ key_types.emplace_back(type_map->getKeyType());
+ value_types.emplace_back(type_map->getValueType());
+ }
+
+ transformTypesRecursively(key_types, transform_simple_types, transform_complex_types);
+ transformTypesRecursively(value_types, transform_simple_types, transform_complex_types);
+
+ for (size_t i = 0; i != types.size(); ++i)
+ types[i] = std::make_shared<DataTypeMap>(key_types[i], value_types[i]);
+ }
+
+ if (transform_complex_types)
+ transform_complex_types(types, type_indexes);
+
+ return;
+ }
+
+ transform_simple_types(types, type_indexes);
+}
+
+void callOnNestedSimpleTypes(DataTypePtr & type, std::function<void(DataTypePtr &)> callback)
+{
+ DataTypes types = {type};
+ transformTypesRecursively(types, [callback](auto & data_types, TypeIndexesSet &){ callback(data_types[0]); }, {});
+}
+
+}
diff --git a/contrib/clickhouse/src/DataTypes/transformTypesRecursively.h b/contrib/clickhouse/src/DataTypes/transformTypesRecursively.h
new file mode 100644
index 00000000000..f9c776b4205
--- /dev/null
+++ b/contrib/clickhouse/src/DataTypes/transformTypesRecursively.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+#include <functional>
+
+namespace DB
+{
+
+/// Function that applies custom transformation functions to provided types recursively.
+/// Implementation is similar to function getLeastSuperType:
+/// If all types are Array/Map/Tuple/Nullable, this function will be called to nested types.
+/// If not all types are the same complex type (Array/Map/Tuple), this function won't be called to nested types.
+/// Function transform_simple_types will be applied to resulting simple types after all recursive calls.
+/// Function transform_complex_types will be applied to complex types (Array/Map/Tuple) after recursive call to their nested types.
+void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &, TypeIndexesSet &)> transform_simple_types, std::function<void(DataTypes &, TypeIndexesSet &)> transform_complex_types);
+
+void callOnNestedSimpleTypes(DataTypePtr & type, std::function<void(DataTypePtr &)> callback);
+
+}