diff options
author | hcpp <hcpp@ydb.tech> | 2023-03-08 13:51:38 +0300 |
---|---|---|
committer | hcpp <hcpp@ydb.tech> | 2023-03-08 13:51:38 +0300 |
commit | b83476f25d94210fd0b0e6a50c763238d6193420 (patch) | |
tree | e550824d9a11a95c05fff87436df10181d26e861 | |
parent | 75edce8ac1d79dafcb198fe50d7d97819b2c0ece (diff) | |
download | ydb-b83476f25d94210fd0b0e6a50c763238d6193420.tar.gz |
validation has been added for projection types
3 files changed, 283 insertions, 14 deletions
diff --git a/ydb/core/yq/libs/control_plane_storage/request_validators.cpp b/ydb/core/yq/libs/control_plane_storage/request_validators.cpp index a9ccae0b9a..1ae3107189 100644 --- a/ydb/core/yq/libs/control_plane_storage/request_validators.cpp +++ b/ydb/core/yq/libs/control_plane_storage/request_validators.cpp @@ -2,6 +2,85 @@ namespace NYq { +namespace { + +NYql::TIssues ValidateProjectionType(const NYdb::TType& columnType, const TString& columnName, const std::vector<NYdb::TType>& availableTypes) { + return FindIf(availableTypes, [&columnType](const auto& availableType) { return NYdb::TypesEqual(availableType, columnType); }) == availableTypes.end() + ? NYql::TIssues{MakeErrorIssue(TIssuesIds::BAD_REQUEST, TStringBuilder{} << "Column \"" << columnName << "\" from projection does not support " << columnType.ToString() << " type")} + : NYql::TIssues{}; +} + +NYql::TIssues ValidateIntegerProjectionType(const NYdb::TType& columnType, const TString& columnName) { + static const std::vector<NYdb::TType> availableTypes { + NYdb::TTypeBuilder{} + .Primitive(NYdb::EPrimitiveType::String) + .Build(), + NYdb::TTypeBuilder{} + .Primitive(NYdb::EPrimitiveType::Int64) + .Build(), + NYdb::TTypeBuilder{} + .Primitive(NYdb::EPrimitiveType::Utf8) + .Build() + }; + return ValidateProjectionType(columnType, columnName, availableTypes); +} + +NYql::TIssues ValidateEnumProjectionType(const NYdb::TType& columnType, const TString& columnName) { + static const std::vector<NYdb::TType> availableTypes { + NYdb::TTypeBuilder{} + .Primitive(NYdb::EPrimitiveType::String) + .Build() + }; + return ValidateProjectionType(columnType, columnName, availableTypes); +} + +NYql::TIssues ValidateCommonProjectionType(const NYdb::TType& columnType, const TString& columnName) { + static const std::vector<NYdb::TType> availableTypes { + NYdb::TTypeBuilder{} + .Primitive(NYdb::EPrimitiveType::String) + .Build(), + NYdb::TTypeBuilder{} + .Primitive(NYdb::EPrimitiveType::Int64) + .Build(), + NYdb::TTypeBuilder{} + .Primitive(NYdb::EPrimitiveType::Utf8) + .Build(), + NYdb::TTypeBuilder{} + .Primitive(NYdb::EPrimitiveType::Int32) + .Build(), + NYdb::TTypeBuilder{} + .Primitive(NYdb::EPrimitiveType::Uint32) + .Build(), + NYdb::TTypeBuilder{} + .Primitive(NYdb::EPrimitiveType::Uint64) + .Build(), + NYdb::TTypeBuilder{} + .Primitive(NYdb::EPrimitiveType::Date) + .Build() + }; + return ValidateProjectionType(columnType, columnName, availableTypes); +} + +NYql::TIssues ValidateDateProjectionType(const NYdb::TType& columnType, const TString& columnName) { + static const std::vector<NYdb::TType> availableTypes { + NYdb::TTypeBuilder{} + .Primitive(NYdb::EPrimitiveType::String) + .Build(), + NYdb::TTypeBuilder{} + .Primitive(NYdb::EPrimitiveType::Utf8) + .Build(), + NYdb::TTypeBuilder{} + .Primitive(NYdb::EPrimitiveType::Uint32) + .Build(), + NYdb::TTypeBuilder{} + .Primitive(NYdb::EPrimitiveType::Date) + .Build() + }; + return ValidateProjectionType(columnType, columnName, availableTypes); +} + +} + NYql::TIssues ValidateConnectionSetting(const YandexQuery::ConnectionSetting& setting, const TSet<YandexQuery::ConnectionSetting::ConnectionCase>& availableConnections, bool disableCurrentIam, bool clickHousePasswordRequire) { NYql::TIssues issues; if (!availableConnections.contains(setting.connection_case())) { @@ -196,25 +275,42 @@ NYql::TIssues ValidateProjectionColumns(const YandexQuery::Schema& schema, const for (const auto& column: schema.column()) { types[column.name()] = column.type(); } - static const TSet<Ydb::Type::PrimitiveTypeId> availableProjectionTypes { - Ydb::Type::STRING, - Ydb::Type::UTF8, - Ydb::Type::INT32, - Ydb::Type::INT64, - Ydb::Type::UINT32, - Ydb::Type::UINT64, - Ydb::Type::DATE - }; for (const auto& parititonedColumn: partitionedBy) { auto it = types.find(parititonedColumn); if (it == types.end()) { issues.AddIssue(MakeErrorIssue(TIssuesIds::BAD_REQUEST, TStringBuilder{} << "Column " << parititonedColumn << " from partitioned_by does not exist in the scheme. Please add such a column to your scheme")); continue; } - const auto& type = it->second; - const auto typeId = type.type_id(); - if (!availableProjectionTypes.contains(typeId)) { - issues.AddIssue(MakeErrorIssue(TIssuesIds::BAD_REQUEST, TStringBuilder{} << "Column " << parititonedColumn << " from partitioned_by does not support " << Ydb::Type::PrimitiveTypeId_Name(typeId) << " type")); + NYdb::TType columnType{it->second}; + issues.AddIssues(ValidateCommonProjectionType(columnType, parititonedColumn)); + } + return issues; +} + +NYql::TIssues ValidateProjection(const YandexQuery::Schema& schema, const TString& projection, const TVector<TString>& partitionedBy) { + auto generator =NYql::NPathGenerator::CreatePathGenerator(projection, partitionedBy); // an exception is thrown if an error occurs + TMap<TString, NYql::NPathGenerator::IPathGenerator::EType> projectionColumns; + for (const auto& column: generator->GetConfig().Rules) { + projectionColumns[column.Name] = column.Type; + } + NYql::TIssues issues; + for (const auto& column: schema.column()) { + auto it = projectionColumns.find(column.name()); + if (it != projectionColumns.end()) { + switch (it->second) { + case NYql::NPathGenerator::IPathGenerator::EType::INTEGER: + issues.AddIssues(ValidateIntegerProjectionType(NYdb::TType{column.type()}, column.name())); + break; + case NYql::NPathGenerator::IPathGenerator::EType::ENUM: + issues.AddIssues(ValidateEnumProjectionType(NYdb::TType{column.type()}, column.name())); + break; + case NYql::NPathGenerator::IPathGenerator::EType::DATE: + issues.AddIssues(ValidateDateProjectionType(NYdb::TType{column.type()}, column.name())); + break; + case NYql::NPathGenerator::IPathGenerator::EType::UNDEFINED: + issues.AddIssue(MakeErrorIssue(TIssuesIds::BAD_REQUEST, TStringBuilder{} << "Column \"" << column.name() << "\" from projection has undefined generator type")); + break; + } } } return issues; diff --git a/ydb/core/yq/libs/control_plane_storage/request_validators.h b/ydb/core/yq/libs/control_plane_storage/request_validators.h index 98438c38d1..14804e0499 100644 --- a/ydb/core/yq/libs/control_plane_storage/request_validators.h +++ b/ydb/core/yq/libs/control_plane_storage/request_validators.h @@ -81,6 +81,7 @@ NYql::TIssues ValidateFormatSetting(const TString& format, const google::protobu NYql::TIssues ValidateDateFormatSetting(const google::protobuf::Map<TString, TString>& formatSetting, bool matchAllSettings = false); NYql::TIssues ValidateProjectionColumns(const YandexQuery::Schema& schema, const TVector<TString>& partitionedBy); +NYql::TIssues ValidateProjection(const YandexQuery::Schema& schema, const TString& projection, const TVector<TString>& partitionedBy); template<typename T> NYql::TIssues ValidateBinding(const T& ev, size_t maxSize, const TSet<YandexQuery::BindingSetting::BindingCase>& availableBindings) @@ -137,7 +138,7 @@ NYql::TIssues ValidateBinding(const T& ev, size_t maxSize, const TSet<YandexQuer } projectionStr = projection.ToJsonPretty(); } - NYql::NPathGenerator::CreatePathGenerator(projectionStr, partitionedBy); // an exception is thrown if an error occurs + issues.AddIssues(ValidateProjection(subset.schema(), projectionStr, partitionedBy)); } catch (...) { issues.AddIssue(MakeErrorIssue(TIssuesIds::BAD_REQUEST,CurrentExceptionMessage())); } diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp index 0e1c9a218c..908aebd3bd 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp @@ -2,6 +2,7 @@ #include <ydb/library/yql/core/expr_nodes/yql_expr_nodes.h> #include <ydb/library/yql/providers/s3/expr_nodes/yql_s3_expr_nodes.h> +#include <ydb/library/yql/providers/s3/path_generator/yql_s3_path_generator.h> #include <ydb/library/yql/providers/s3/range_helpers/path_list_reader.h> #include <ydb/library/yql/providers/common/provider/yql_provider.h> @@ -79,6 +80,166 @@ bool ValidateS3Paths(const TExprNode& node, const TStructExprType*& extraColumns return true; } +class TTypeValidator { + using TTypesContainer = std::unordered_set<const TTypeAnnotationNode*, TTypeAnnotationNode::THash, TTypeAnnotationNode::TEqual>; + +public: + TTypeValidator(TExprContext& ctx, const TExprNode::TPtr& input, const TStructExprType* columnsType) + : Ctx(ctx) + , Input(input) + , ColumnsType(columnsType) + , IntegerTypes(CreateIntegerAvailableTypes()) + , CommonTypes(CreateCommonAvailableTypes()) + , EnumTypes(CreateEnumAvailableTypes()) + , DateTypes(CreateDateAvailableTypes()) + {} + + bool ValidatePartitonBy(const std::vector<TString>& partitionedBy) { + TSet<TString> partitionedByColumns{partitionedBy.begin(), partitionedBy.end()}; + for (auto item: ColumnsType->GetItems()) { + if (!partitionedByColumns.contains(item->GetName())) { + continue; + } + if (!ValidateCommonType(item)) { + return false; + } + } + return true; + } + + bool ValidateProjection(const TString& projection, const std::vector<TString>& partitionedBy) { + auto generator = NPathGenerator::CreatePathGenerator(projection, partitionedBy); + TMap<TString, NPathGenerator::IPathGenerator::EType> projectionColumns; + for (const auto& column: generator->GetConfig().Rules) { + projectionColumns[column.Name] = column.Type; + } + for (auto item: ColumnsType->GetItems()) { + auto it = projectionColumns.find(item->GetName()); + if (it == projectionColumns.end()) { + continue; + } + if (!ValidateType(item, it->second)) { + return false; + } + } + return true; + } + +private: + bool ValidateCommonType(const TItemExprType* item) { + return ValidateType(item, CommonTypes); + } + + bool ValidateType(const TItemExprType* item, NYql::NPathGenerator::IPathGenerator::EType type) { + switch (type) { + case NYql::NPathGenerator::IPathGenerator::EType::INTEGER: + return ValidateIntegerType(item); + case NYql::NPathGenerator::IPathGenerator::EType::ENUM: + return ValidateEnumType(item); + case NYql::NPathGenerator::IPathGenerator::EType::DATE: + return ValidateDateType(item); + case NYql::NPathGenerator::IPathGenerator::EType::UNDEFINED: + Ctx.AddError(TIssue(Ctx.GetPosition(Input->Child(TS3ReadObject::idx_RowType)->Pos()), TStringBuilder{} << "Projection column \"" << item->GetName() << "\" has undefined projection type")); + return false; + } + } + + bool ValidateDateType(const TItemExprType* item) { + return ValidateType(item, DateTypes); + } + + bool ValidateIntegerType(const TItemExprType* item) { + return ValidateType(item, IntegerTypes); + } + + bool ValidateEnumType(const TItemExprType* item) { + return ValidateType(item, EnumTypes); + } + + bool ValidateType(const TItemExprType* item, const TTypesContainer& availableTypes) { + auto it = availableTypes.find(item->GetItemType()); + if (it != availableTypes.end()) { + return true; + + } + Ctx.AddError(TIssue(Ctx.GetPosition(Input->Child(TS3ReadObject::idx_RowType)->Pos()), TStringBuilder{} << "Projection column \"" << item->GetName() << "\" has invalid type " << *item->GetItemType())); + return false; + } + + TTypesContainer CreateIntegerAvailableTypes() const { + return { + Ctx.MakeType<TDataExprType>(EDataSlot::String), + Ctx.MakeType<TDataExprType>(EDataSlot::Utf8), + Ctx.MakeType<TDataExprType>(EDataSlot::Int64) + }; + } + + TTypesContainer CreateEnumAvailableTypes() const { + return { + Ctx.MakeType<TDataExprType>(EDataSlot::String) + }; + } + + TTypesContainer CreateCommonAvailableTypes() const { + return { + Ctx.MakeType<TDataExprType>(EDataSlot::String), + Ctx.MakeType<TDataExprType>(EDataSlot::Utf8), + Ctx.MakeType<TDataExprType>(EDataSlot::Int64), + Ctx.MakeType<TDataExprType>(EDataSlot::Uint64), + Ctx.MakeType<TDataExprType>(EDataSlot::Int32), + Ctx.MakeType<TDataExprType>(EDataSlot::Uint32), + Ctx.MakeType<TDataExprType>(EDataSlot::Date) + }; + } + + TTypesContainer CreateDateAvailableTypes() const { + return { + Ctx.MakeType<TDataExprType>(EDataSlot::String), + Ctx.MakeType<TDataExprType>(EDataSlot::Utf8), + Ctx.MakeType<TDataExprType>(EDataSlot::Uint32), + Ctx.MakeType<TDataExprType>(EDataSlot::Date) + }; + } + +private: + TExprContext& Ctx; + const TExprNode::TPtr& Input; + const TStructExprType* ColumnsType; + const TTypesContainer IntegerTypes; + const TTypesContainer CommonTypes; + const TTypesContainer EnumTypes; + const TTypesContainer DateTypes; +}; + + +bool ValidateProjectionTypes(const TStructExprType* columnsType, const TString& projection, const std::vector<TString>& partitionedBy, const TExprNode::TPtr& input, TExprContext& ctx) { + if (!columnsType) { + return true; + } + + TTypeValidator typeValidator(ctx, input, columnsType); + if (!projection && !partitionedBy.empty()) { + if (!typeValidator.ValidatePartitonBy(partitionedBy)) { + return false; + } + } + + if (!projection || partitionedBy.empty()) { + return true; + } + + try { + if (!typeValidator.ValidateProjection(projection, partitionedBy)) { + return false; + } + } catch (...) { + ctx.AddError(TIssue(ctx.GetPosition(input->Child(TS3ReadObject::idx_RowType)->Pos()), CurrentExceptionMessage())); + return false; + } + + return true; +} + bool ExtractSettingValue(const TExprNode& value, TStringBuf settingName, TStringBuf format, TStringBuf expectedFormat, TExprContext& ctx, TStringBuf& settingValue) { if (expectedFormat && format != expectedFormat) { ctx.AddError(TIssue(ctx.GetPosition(value.Pos()), TStringBuilder() << settingName << " can only be used with " << expectedFormat << " format")); @@ -225,6 +386,8 @@ public: return TStatus::Error; } + std::vector<TString> partitionedBy; + TString projection; { THashSet<TStringBuf> columns; const TStructExprType* structRowType = rowType->Cast<TStructExprType>(); @@ -243,16 +406,25 @@ public: return TStatus::Error; } columns.erase(column->Content()); + partitionedBy.push_back(TString{column->Content()}); } if (columns.empty()) { ctx.AddError(TIssue(ctx.GetPosition(input->Pos()), "Table contains no columns except partitioning columns")); return TStatus::Error; } + + } + if (name == "projection"sv) { + projection = settingNode->Tail().Content(); } } } } + if (!ValidateProjectionTypes(rowType->Cast<TStructExprType>(), projection, partitionedBy, input, ctx)) { + return TStatus::Error; + } + input->SetTypeAnn(ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{ input->Child(TS3ReadObject::idx_World)->GetTypeAnn(), ctx.MakeType<TListExprType>(rowType) |