diff options
author | Andrey Neporada <neporada@gmail.com> | 2022-07-01 17:17:02 +0300 |
---|---|---|
committer | Andrey Neporada <neporada@gmail.com> | 2022-07-01 17:17:02 +0300 |
commit | ecf3441bd88ede87dd8e1e848b0818c9c80e67e0 (patch) | |
tree | f855dd7953e843b6fbb568c960073409eeaac309 | |
parent | ad7b9a14241e52ad1aa3be31c56198921620296e (diff) | |
download | ydb-ecf3441bd88ede87dd8e1e848b0818c9c80e67e0.tar.gz |
[YQL-15057] Improve S3 callables validation. Support externalColumns setting in S3Path
ref:d0a2028f97684b65eaf15816ea8a87c70baaade4
4 files changed, 117 insertions, 16 deletions
diff --git a/ydb/library/yql/core/yql_expr_type_annotation.cpp b/ydb/library/yql/core/yql_expr_type_annotation.cpp index 7bb2427f2c8..f14b11367a5 100644 --- a/ydb/library/yql/core/yql_expr_type_annotation.cpp +++ b/ydb/library/yql/core/yql_expr_type_annotation.cpp @@ -1910,6 +1910,48 @@ bool EnsureTupleOfAtoms(const TExprNode& node, TExprContext& ctx) { return true; } +bool EnsureValidSettings(const TExprNode& node, + const THashSet<TStringBuf>& supportedSettings, + const TSettingNodeValidator& validator, + TExprContext& ctx) +{ + if (!EnsureTuple(node, ctx)) { + return false; + } + + for (auto& settingNode : node.ChildrenList()) { + if (!EnsureTupleMinSize(*settingNode, 1, ctx)) { + return false; + } + + if (!EnsureAtom(settingNode->Head(), ctx)) { + return false; + } + + const TStringBuf name = settingNode->Head().Content(); + if (!supportedSettings.contains(name)) { + ctx.AddError(TIssue(ctx.GetPosition(settingNode->Head().Pos()), TStringBuilder() << "Unknown setting '" << name << "'")); + return false; + } + + if (!validator(name, *settingNode, ctx)) { + return false; + } + } + return true; +} + +TSettingNodeValidator RequireSingleValueSettings(const TSettingNodeValidator& validator) { + return [validator](TStringBuf name, const TExprNode& setting, TExprContext& ctx) { + if (setting.ChildrenSize() != 2) { + ctx.AddError(TIssue(ctx.GetPosition(setting.Pos()), + TStringBuilder() << "Option '" << name << "' requires single argument")); + return false; + } + return validator(name, setting, ctx); + }; +} + bool EnsureTupleSize(const TExprNode& node, ui32 expectedSize, TExprContext& ctx) { if (HasError(node.GetTypeAnn(), ctx) || node.Type() != TExprNode::List) { ctx.AddError(TIssue(ctx.GetPosition(node.Pos()), TStringBuilder() << "Expected tuple, but got: " << node.Type())); diff --git a/ydb/library/yql/core/yql_expr_type_annotation.h b/ydb/library/yql/core/yql_expr_type_annotation.h index 34610321dda..8d769ce54f3 100644 --- a/ydb/library/yql/core/yql_expr_type_annotation.h +++ b/ydb/library/yql/core/yql_expr_type_annotation.h @@ -66,6 +66,14 @@ bool EnsureAtom(const TExprNode& node, TExprContext& ctx); bool EnsureCallable(const TExprNode& node, TExprContext& ctx); bool EnsureTuple(const TExprNode& node, TExprContext& ctx); bool EnsureTupleOfAtoms(const TExprNode& node, TExprContext& ctx); + +using TSettingNodeValidator = std::function<bool (TStringBuf name, const TExprNode& setting, TExprContext& ctx)>; +bool EnsureValidSettings(const TExprNode& node, + const THashSet<TStringBuf>& supportedSettings, + const TSettingNodeValidator& validator, + TExprContext& ctx); +TSettingNodeValidator RequireSingleValueSettings(const TSettingNodeValidator& validator); + bool EnsureLambda(const TExprNode& node, TExprContext& ctx); IGraphTransformer::TStatus ConvertToLambda(TExprNode::TPtr& node, TExprContext& ctx, ui32 argumentsCount, ui32 maxArgumentsCount = Max<ui32>(), bool withTypes = true); diff --git a/ydb/library/yql/providers/s3/expr_nodes/yql_s3_expr_nodes.json b/ydb/library/yql/providers/s3/expr_nodes/yql_s3_expr_nodes.json index 4991984cb1e..f1f6e0d8ffa 100644 --- a/ydb/library/yql/providers/s3/expr_nodes/yql_s3_expr_nodes.json +++ b/ydb/library/yql/providers/s3/expr_nodes/yql_s3_expr_nodes.json @@ -31,7 +31,8 @@ "Match": {"Type": "Tuple"}, "Children": [ {"Index": 0, "Name": "Path", "Type": "TCoAtom"}, - {"Index": 1, "Name": "Size", "Type": "TCoAtom"} + {"Index": 1, "Name": "Size", "Type": "TCoAtom"}, + {"Index": 2, "Name": "Settings", "Type": "TExprBase", "Optional": true} ] }, { diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp index d72c5c916e5..7fa248a5eb8 100644 --- a/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp +++ b/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp @@ -15,15 +15,6 @@ using namespace NNodes; namespace { -TStringBuf GetCompression(const TExprNode& settings) { - for (auto i = 0U; i < settings.ChildrenSize(); ++i) { - const auto& child = *settings.Child(i); - if (child.Head().IsAtom("compression") && child.Tail().IsCallable({"String", "Utf8"})) - if (const auto& comp = child.Tail().Head().Content(); !comp.empty()) - return comp; - } - return ""sv; -} class TS3DataSourceTypeAnnotationTransformer : public TVisitorTransformerBase { public: @@ -156,17 +147,76 @@ public: return TStatus::Error; } - if (!EnsureAtom(*input->Child(TS3Object::idx_Format), ctx) || !NCommon::ValidateFormat(input->Child(TS3Object::idx_Format)->Content(), ctx)) { - return TStatus::Error; + for (auto& path : input->Child(TS3Object::idx_Paths)->ChildrenList()) { + if (!EnsureTupleMinSize(*path, 2, ctx) || !EnsureTupleMaxSize(*path, 3, ctx)) { + return TStatus::Error; + } + + if (!EnsureAtom(*path->Child(TS3Path::idx_Path), ctx) || + !EnsureAtom(*path->Child(TS3Path::idx_Size), ctx)) + { + return TStatus::Error; + } + + if (path->Child(TS3Path::idx_Path)->Content().empty()) { + ctx.AddError(TIssue(ctx.GetPosition(path->Child(TS3Path::idx_Path)->Pos()), "Expected non-empty path")); + return TStatus::Error; + } + + ui64 size = 0; + auto sizeStr = path->Child(TS3Path::idx_Size)->Content(); + if (!TryFromString(sizeStr, size)) { + ctx.AddError(TIssue(ctx.GetPosition(path->Child(TS3Path::idx_Size)->Pos()), + TStringBuilder() << "Expected number as S3 object size, got: '" << sizeStr << "'")); + return TStatus::Error; + } + + if (path->ChildrenSize() > TS3Path::idx_Settings) { + auto validator = [](TStringBuf name, const TExprNode& setting, TExprContext& ctx) { + Y_UNUSED(name); + auto& value = setting.Tail(); + if (!EnsureStructType(value, ctx) || !EnsurePersistable(value, ctx)) { + return false; + } + return true; + }; + + if (!EnsureValidSettings(*path->Child(TS3Path::idx_Settings), { "externalColumns" }, + RequireSingleValueSettings(validator), ctx)) + { + return TStatus::Error; + } + } } - if (input->ChildrenSize() > TS3Object::idx_Settings && !EnsureTuple(*input->Child(TS3Object::idx_Settings), ctx)) { + if (!EnsureAtom(*input->Child(TS3Object::idx_Format), ctx) || !NCommon::ValidateFormat(input->Child(TS3Object::idx_Format)->Content(), ctx)) { return TStatus::Error; } - const auto compression = GetCompression(*input->Child(TS3Object::idx_Settings)); - if (!NCommon::ValidateCompression(compression, ctx)) { - return TStatus::Error; + if (input->ChildrenSize() > TS3Object::idx_Settings) { + auto validator = [](TStringBuf name, const TExprNode& setting, TExprContext& ctx) { + Y_UNUSED(name); + auto& value = setting.Tail(); + TStringBuf compression; + if (value.IsAtom()) { + compression = value.Content(); + } else { + if (!EnsureStringOrUtf8Type(value, ctx)) { + return false; + } + if (!value.IsCallable({"String", "Utf8"})) { + ctx.AddError(TIssue(ctx.GetPosition(value.Pos()), "Expected literal string as compression value")); + return false; + } + compression = value.Head().Content(); + } + return NCommon::ValidateCompression(compression, ctx); + }; + if (!EnsureValidSettings(*input->Child(TS3Object::idx_Settings), { "compression" }, + RequireSingleValueSettings(validator), ctx)) + { + return TStatus::Error; + } } input->SetTypeAnn(ctx.MakeType<TUnitExprType>()); |