aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorhcpp <hcpp@ydb.tech>2023-03-08 13:51:38 +0300
committerhcpp <hcpp@ydb.tech>2023-03-08 13:51:38 +0300
commitb83476f25d94210fd0b0e6a50c763238d6193420 (patch)
treee550824d9a11a95c05fff87436df10181d26e861
parent75edce8ac1d79dafcb198fe50d7d97819b2c0ece (diff)
downloadydb-b83476f25d94210fd0b0e6a50c763238d6193420.tar.gz
validation has been added for projection types
-rw-r--r--ydb/core/yq/libs/control_plane_storage/request_validators.cpp122
-rw-r--r--ydb/core/yq/libs/control_plane_storage/request_validators.h3
-rw-r--r--ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp172
3 files changed, 283 insertions, 14 deletions
diff --git a/ydb/core/yq/libs/control_plane_storage/request_validators.cpp b/ydb/core/yq/libs/control_plane_storage/request_validators.cpp
index a9ccae0b9a..1ae3107189 100644
--- a/ydb/core/yq/libs/control_plane_storage/request_validators.cpp
+++ b/ydb/core/yq/libs/control_plane_storage/request_validators.cpp
@@ -2,6 +2,85 @@
namespace NYq {
+namespace {
+
+NYql::TIssues ValidateProjectionType(const NYdb::TType& columnType, const TString& columnName, const std::vector<NYdb::TType>& availableTypes) {
+ return FindIf(availableTypes, [&columnType](const auto& availableType) { return NYdb::TypesEqual(availableType, columnType); }) == availableTypes.end()
+ ? NYql::TIssues{MakeErrorIssue(TIssuesIds::BAD_REQUEST, TStringBuilder{} << "Column \"" << columnName << "\" from projection does not support " << columnType.ToString() << " type")}
+ : NYql::TIssues{};
+}
+
+NYql::TIssues ValidateIntegerProjectionType(const NYdb::TType& columnType, const TString& columnName) {
+ static const std::vector<NYdb::TType> availableTypes {
+ NYdb::TTypeBuilder{}
+ .Primitive(NYdb::EPrimitiveType::String)
+ .Build(),
+ NYdb::TTypeBuilder{}
+ .Primitive(NYdb::EPrimitiveType::Int64)
+ .Build(),
+ NYdb::TTypeBuilder{}
+ .Primitive(NYdb::EPrimitiveType::Utf8)
+ .Build()
+ };
+ return ValidateProjectionType(columnType, columnName, availableTypes);
+}
+
+NYql::TIssues ValidateEnumProjectionType(const NYdb::TType& columnType, const TString& columnName) {
+ static const std::vector<NYdb::TType> availableTypes {
+ NYdb::TTypeBuilder{}
+ .Primitive(NYdb::EPrimitiveType::String)
+ .Build()
+ };
+ return ValidateProjectionType(columnType, columnName, availableTypes);
+}
+
+NYql::TIssues ValidateCommonProjectionType(const NYdb::TType& columnType, const TString& columnName) {
+ static const std::vector<NYdb::TType> availableTypes {
+ NYdb::TTypeBuilder{}
+ .Primitive(NYdb::EPrimitiveType::String)
+ .Build(),
+ NYdb::TTypeBuilder{}
+ .Primitive(NYdb::EPrimitiveType::Int64)
+ .Build(),
+ NYdb::TTypeBuilder{}
+ .Primitive(NYdb::EPrimitiveType::Utf8)
+ .Build(),
+ NYdb::TTypeBuilder{}
+ .Primitive(NYdb::EPrimitiveType::Int32)
+ .Build(),
+ NYdb::TTypeBuilder{}
+ .Primitive(NYdb::EPrimitiveType::Uint32)
+ .Build(),
+ NYdb::TTypeBuilder{}
+ .Primitive(NYdb::EPrimitiveType::Uint64)
+ .Build(),
+ NYdb::TTypeBuilder{}
+ .Primitive(NYdb::EPrimitiveType::Date)
+ .Build()
+ };
+ return ValidateProjectionType(columnType, columnName, availableTypes);
+}
+
+NYql::TIssues ValidateDateProjectionType(const NYdb::TType& columnType, const TString& columnName) {
+ static const std::vector<NYdb::TType> availableTypes {
+ NYdb::TTypeBuilder{}
+ .Primitive(NYdb::EPrimitiveType::String)
+ .Build(),
+ NYdb::TTypeBuilder{}
+ .Primitive(NYdb::EPrimitiveType::Utf8)
+ .Build(),
+ NYdb::TTypeBuilder{}
+ .Primitive(NYdb::EPrimitiveType::Uint32)
+ .Build(),
+ NYdb::TTypeBuilder{}
+ .Primitive(NYdb::EPrimitiveType::Date)
+ .Build()
+ };
+ return ValidateProjectionType(columnType, columnName, availableTypes);
+}
+
+}
+
NYql::TIssues ValidateConnectionSetting(const YandexQuery::ConnectionSetting& setting, const TSet<YandexQuery::ConnectionSetting::ConnectionCase>& availableConnections, bool disableCurrentIam, bool clickHousePasswordRequire) {
NYql::TIssues issues;
if (!availableConnections.contains(setting.connection_case())) {
@@ -196,25 +275,42 @@ NYql::TIssues ValidateProjectionColumns(const YandexQuery::Schema& schema, const
for (const auto& column: schema.column()) {
types[column.name()] = column.type();
}
- static const TSet<Ydb::Type::PrimitiveTypeId> availableProjectionTypes {
- Ydb::Type::STRING,
- Ydb::Type::UTF8,
- Ydb::Type::INT32,
- Ydb::Type::INT64,
- Ydb::Type::UINT32,
- Ydb::Type::UINT64,
- Ydb::Type::DATE
- };
for (const auto& parititonedColumn: partitionedBy) {
auto it = types.find(parititonedColumn);
if (it == types.end()) {
issues.AddIssue(MakeErrorIssue(TIssuesIds::BAD_REQUEST, TStringBuilder{} << "Column " << parititonedColumn << " from partitioned_by does not exist in the scheme. Please add such a column to your scheme"));
continue;
}
- const auto& type = it->second;
- const auto typeId = type.type_id();
- if (!availableProjectionTypes.contains(typeId)) {
- issues.AddIssue(MakeErrorIssue(TIssuesIds::BAD_REQUEST, TStringBuilder{} << "Column " << parititonedColumn << " from partitioned_by does not support " << Ydb::Type::PrimitiveTypeId_Name(typeId) << " type"));
+ NYdb::TType columnType{it->second};
+ issues.AddIssues(ValidateCommonProjectionType(columnType, parititonedColumn));
+ }
+ return issues;
+}
+
+NYql::TIssues ValidateProjection(const YandexQuery::Schema& schema, const TString& projection, const TVector<TString>& partitionedBy) {
+ auto generator =NYql::NPathGenerator::CreatePathGenerator(projection, partitionedBy); // an exception is thrown if an error occurs
+ TMap<TString, NYql::NPathGenerator::IPathGenerator::EType> projectionColumns;
+ for (const auto& column: generator->GetConfig().Rules) {
+ projectionColumns[column.Name] = column.Type;
+ }
+ NYql::TIssues issues;
+ for (const auto& column: schema.column()) {
+ auto it = projectionColumns.find(column.name());
+ if (it != projectionColumns.end()) {
+ switch (it->second) {
+ case NYql::NPathGenerator::IPathGenerator::EType::INTEGER:
+ issues.AddIssues(ValidateIntegerProjectionType(NYdb::TType{column.type()}, column.name()));
+ break;
+ case NYql::NPathGenerator::IPathGenerator::EType::ENUM:
+ issues.AddIssues(ValidateEnumProjectionType(NYdb::TType{column.type()}, column.name()));
+ break;
+ case NYql::NPathGenerator::IPathGenerator::EType::DATE:
+ issues.AddIssues(ValidateDateProjectionType(NYdb::TType{column.type()}, column.name()));
+ break;
+ case NYql::NPathGenerator::IPathGenerator::EType::UNDEFINED:
+ issues.AddIssue(MakeErrorIssue(TIssuesIds::BAD_REQUEST, TStringBuilder{} << "Column \"" << column.name() << "\" from projection has undefined generator type"));
+ break;
+ }
}
}
return issues;
diff --git a/ydb/core/yq/libs/control_plane_storage/request_validators.h b/ydb/core/yq/libs/control_plane_storage/request_validators.h
index 98438c38d1..14804e0499 100644
--- a/ydb/core/yq/libs/control_plane_storage/request_validators.h
+++ b/ydb/core/yq/libs/control_plane_storage/request_validators.h
@@ -81,6 +81,7 @@ NYql::TIssues ValidateFormatSetting(const TString& format, const google::protobu
NYql::TIssues ValidateDateFormatSetting(const google::protobuf::Map<TString, TString>& formatSetting, bool matchAllSettings = false);
NYql::TIssues ValidateProjectionColumns(const YandexQuery::Schema& schema, const TVector<TString>& partitionedBy);
+NYql::TIssues ValidateProjection(const YandexQuery::Schema& schema, const TString& projection, const TVector<TString>& partitionedBy);
template<typename T>
NYql::TIssues ValidateBinding(const T& ev, size_t maxSize, const TSet<YandexQuery::BindingSetting::BindingCase>& availableBindings)
@@ -137,7 +138,7 @@ NYql::TIssues ValidateBinding(const T& ev, size_t maxSize, const TSet<YandexQuer
}
projectionStr = projection.ToJsonPretty();
}
- NYql::NPathGenerator::CreatePathGenerator(projectionStr, partitionedBy); // an exception is thrown if an error occurs
+ issues.AddIssues(ValidateProjection(subset.schema(), projectionStr, partitionedBy));
} catch (...) {
issues.AddIssue(MakeErrorIssue(TIssuesIds::BAD_REQUEST,CurrentExceptionMessage()));
}
diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp
index 0e1c9a218c..908aebd3bd 100644
--- a/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp
+++ b/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp
@@ -2,6 +2,7 @@
#include <ydb/library/yql/core/expr_nodes/yql_expr_nodes.h>
#include <ydb/library/yql/providers/s3/expr_nodes/yql_s3_expr_nodes.h>
+#include <ydb/library/yql/providers/s3/path_generator/yql_s3_path_generator.h>
#include <ydb/library/yql/providers/s3/range_helpers/path_list_reader.h>
#include <ydb/library/yql/providers/common/provider/yql_provider.h>
@@ -79,6 +80,166 @@ bool ValidateS3Paths(const TExprNode& node, const TStructExprType*& extraColumns
return true;
}
+class TTypeValidator {
+ using TTypesContainer = std::unordered_set<const TTypeAnnotationNode*, TTypeAnnotationNode::THash, TTypeAnnotationNode::TEqual>;
+
+public:
+ TTypeValidator(TExprContext& ctx, const TExprNode::TPtr& input, const TStructExprType* columnsType)
+ : Ctx(ctx)
+ , Input(input)
+ , ColumnsType(columnsType)
+ , IntegerTypes(CreateIntegerAvailableTypes())
+ , CommonTypes(CreateCommonAvailableTypes())
+ , EnumTypes(CreateEnumAvailableTypes())
+ , DateTypes(CreateDateAvailableTypes())
+ {}
+
+ bool ValidatePartitonBy(const std::vector<TString>& partitionedBy) {
+ TSet<TString> partitionedByColumns{partitionedBy.begin(), partitionedBy.end()};
+ for (auto item: ColumnsType->GetItems()) {
+ if (!partitionedByColumns.contains(item->GetName())) {
+ continue;
+ }
+ if (!ValidateCommonType(item)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ bool ValidateProjection(const TString& projection, const std::vector<TString>& partitionedBy) {
+ auto generator = NPathGenerator::CreatePathGenerator(projection, partitionedBy);
+ TMap<TString, NPathGenerator::IPathGenerator::EType> projectionColumns;
+ for (const auto& column: generator->GetConfig().Rules) {
+ projectionColumns[column.Name] = column.Type;
+ }
+ for (auto item: ColumnsType->GetItems()) {
+ auto it = projectionColumns.find(item->GetName());
+ if (it == projectionColumns.end()) {
+ continue;
+ }
+ if (!ValidateType(item, it->second)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+private:
+ bool ValidateCommonType(const TItemExprType* item) {
+ return ValidateType(item, CommonTypes);
+ }
+
+ bool ValidateType(const TItemExprType* item, NYql::NPathGenerator::IPathGenerator::EType type) {
+ switch (type) {
+ case NYql::NPathGenerator::IPathGenerator::EType::INTEGER:
+ return ValidateIntegerType(item);
+ case NYql::NPathGenerator::IPathGenerator::EType::ENUM:
+ return ValidateEnumType(item);
+ case NYql::NPathGenerator::IPathGenerator::EType::DATE:
+ return ValidateDateType(item);
+ case NYql::NPathGenerator::IPathGenerator::EType::UNDEFINED:
+ Ctx.AddError(TIssue(Ctx.GetPosition(Input->Child(TS3ReadObject::idx_RowType)->Pos()), TStringBuilder{} << "Projection column \"" << item->GetName() << "\" has undefined projection type"));
+ return false;
+ }
+ }
+
+ bool ValidateDateType(const TItemExprType* item) {
+ return ValidateType(item, DateTypes);
+ }
+
+ bool ValidateIntegerType(const TItemExprType* item) {
+ return ValidateType(item, IntegerTypes);
+ }
+
+ bool ValidateEnumType(const TItemExprType* item) {
+ return ValidateType(item, EnumTypes);
+ }
+
+ bool ValidateType(const TItemExprType* item, const TTypesContainer& availableTypes) {
+ auto it = availableTypes.find(item->GetItemType());
+ if (it != availableTypes.end()) {
+ return true;
+
+ }
+ Ctx.AddError(TIssue(Ctx.GetPosition(Input->Child(TS3ReadObject::idx_RowType)->Pos()), TStringBuilder{} << "Projection column \"" << item->GetName() << "\" has invalid type " << *item->GetItemType()));
+ return false;
+ }
+
+ TTypesContainer CreateIntegerAvailableTypes() const {
+ return {
+ Ctx.MakeType<TDataExprType>(EDataSlot::String),
+ Ctx.MakeType<TDataExprType>(EDataSlot::Utf8),
+ Ctx.MakeType<TDataExprType>(EDataSlot::Int64)
+ };
+ }
+
+ TTypesContainer CreateEnumAvailableTypes() const {
+ return {
+ Ctx.MakeType<TDataExprType>(EDataSlot::String)
+ };
+ }
+
+ TTypesContainer CreateCommonAvailableTypes() const {
+ return {
+ Ctx.MakeType<TDataExprType>(EDataSlot::String),
+ Ctx.MakeType<TDataExprType>(EDataSlot::Utf8),
+ Ctx.MakeType<TDataExprType>(EDataSlot::Int64),
+ Ctx.MakeType<TDataExprType>(EDataSlot::Uint64),
+ Ctx.MakeType<TDataExprType>(EDataSlot::Int32),
+ Ctx.MakeType<TDataExprType>(EDataSlot::Uint32),
+ Ctx.MakeType<TDataExprType>(EDataSlot::Date)
+ };
+ }
+
+ TTypesContainer CreateDateAvailableTypes() const {
+ return {
+ Ctx.MakeType<TDataExprType>(EDataSlot::String),
+ Ctx.MakeType<TDataExprType>(EDataSlot::Utf8),
+ Ctx.MakeType<TDataExprType>(EDataSlot::Uint32),
+ Ctx.MakeType<TDataExprType>(EDataSlot::Date)
+ };
+ }
+
+private:
+ TExprContext& Ctx;
+ const TExprNode::TPtr& Input;
+ const TStructExprType* ColumnsType;
+ const TTypesContainer IntegerTypes;
+ const TTypesContainer CommonTypes;
+ const TTypesContainer EnumTypes;
+ const TTypesContainer DateTypes;
+};
+
+
+bool ValidateProjectionTypes(const TStructExprType* columnsType, const TString& projection, const std::vector<TString>& partitionedBy, const TExprNode::TPtr& input, TExprContext& ctx) {
+ if (!columnsType) {
+ return true;
+ }
+
+ TTypeValidator typeValidator(ctx, input, columnsType);
+ if (!projection && !partitionedBy.empty()) {
+ if (!typeValidator.ValidatePartitonBy(partitionedBy)) {
+ return false;
+ }
+ }
+
+ if (!projection || partitionedBy.empty()) {
+ return true;
+ }
+
+ try {
+ if (!typeValidator.ValidateProjection(projection, partitionedBy)) {
+ return false;
+ }
+ } catch (...) {
+ ctx.AddError(TIssue(ctx.GetPosition(input->Child(TS3ReadObject::idx_RowType)->Pos()), CurrentExceptionMessage()));
+ return false;
+ }
+
+ return true;
+}
+
bool ExtractSettingValue(const TExprNode& value, TStringBuf settingName, TStringBuf format, TStringBuf expectedFormat, TExprContext& ctx, TStringBuf& settingValue) {
if (expectedFormat && format != expectedFormat) {
ctx.AddError(TIssue(ctx.GetPosition(value.Pos()), TStringBuilder() << settingName << " can only be used with " << expectedFormat << " format"));
@@ -225,6 +386,8 @@ public:
return TStatus::Error;
}
+ std::vector<TString> partitionedBy;
+ TString projection;
{
THashSet<TStringBuf> columns;
const TStructExprType* structRowType = rowType->Cast<TStructExprType>();
@@ -243,16 +406,25 @@ public:
return TStatus::Error;
}
columns.erase(column->Content());
+ partitionedBy.push_back(TString{column->Content()});
}
if (columns.empty()) {
ctx.AddError(TIssue(ctx.GetPosition(input->Pos()), "Table contains no columns except partitioning columns"));
return TStatus::Error;
}
+
+ }
+ if (name == "projection"sv) {
+ projection = settingNode->Tail().Content();
}
}
}
}
+ if (!ValidateProjectionTypes(rowType->Cast<TStructExprType>(), projection, partitionedBy, input, ctx)) {
+ return TStatus::Error;
+ }
+
input->SetTypeAnn(ctx.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{
input->Child(TS3ReadObject::idx_World)->GetTypeAnn(),
ctx.MakeType<TListExprType>(rowType)