aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrey Neporada <neporada@gmail.com>2022-07-01 17:17:02 +0300
committerAndrey Neporada <neporada@gmail.com>2022-07-01 17:17:02 +0300
commitecf3441bd88ede87dd8e1e848b0818c9c80e67e0 (patch)
treef855dd7953e843b6fbb568c960073409eeaac309
parentad7b9a14241e52ad1aa3be31c56198921620296e (diff)
downloadydb-ecf3441bd88ede87dd8e1e848b0818c9c80e67e0.tar.gz
[YQL-15057] Improve S3 callables validation. Support externalColumns setting in S3Path
ref:d0a2028f97684b65eaf15816ea8a87c70baaade4
-rw-r--r--ydb/library/yql/core/yql_expr_type_annotation.cpp42
-rw-r--r--ydb/library/yql/core/yql_expr_type_annotation.h8
-rw-r--r--ydb/library/yql/providers/s3/expr_nodes/yql_s3_expr_nodes.json3
-rw-r--r--ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp80
4 files changed, 117 insertions, 16 deletions
diff --git a/ydb/library/yql/core/yql_expr_type_annotation.cpp b/ydb/library/yql/core/yql_expr_type_annotation.cpp
index 7bb2427f2c8..f14b11367a5 100644
--- a/ydb/library/yql/core/yql_expr_type_annotation.cpp
+++ b/ydb/library/yql/core/yql_expr_type_annotation.cpp
@@ -1910,6 +1910,48 @@ bool EnsureTupleOfAtoms(const TExprNode& node, TExprContext& ctx) {
return true;
}
+bool EnsureValidSettings(const TExprNode& node,
+ const THashSet<TStringBuf>& supportedSettings,
+ const TSettingNodeValidator& validator,
+ TExprContext& ctx)
+{
+ if (!EnsureTuple(node, ctx)) {
+ return false;
+ }
+
+ for (auto& settingNode : node.ChildrenList()) {
+ if (!EnsureTupleMinSize(*settingNode, 1, ctx)) {
+ return false;
+ }
+
+ if (!EnsureAtom(settingNode->Head(), ctx)) {
+ return false;
+ }
+
+ const TStringBuf name = settingNode->Head().Content();
+ if (!supportedSettings.contains(name)) {
+ ctx.AddError(TIssue(ctx.GetPosition(settingNode->Head().Pos()), TStringBuilder() << "Unknown setting '" << name << "'"));
+ return false;
+ }
+
+ if (!validator(name, *settingNode, ctx)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+TSettingNodeValidator RequireSingleValueSettings(const TSettingNodeValidator& validator) {
+ return [validator](TStringBuf name, const TExprNode& setting, TExprContext& ctx) {
+ if (setting.ChildrenSize() != 2) {
+ ctx.AddError(TIssue(ctx.GetPosition(setting.Pos()),
+ TStringBuilder() << "Option '" << name << "' requires single argument"));
+ return false;
+ }
+ return validator(name, setting, ctx);
+ };
+}
+
bool EnsureTupleSize(const TExprNode& node, ui32 expectedSize, TExprContext& ctx) {
if (HasError(node.GetTypeAnn(), ctx) || node.Type() != TExprNode::List) {
ctx.AddError(TIssue(ctx.GetPosition(node.Pos()), TStringBuilder() << "Expected tuple, but got: " << node.Type()));
diff --git a/ydb/library/yql/core/yql_expr_type_annotation.h b/ydb/library/yql/core/yql_expr_type_annotation.h
index 34610321dda..8d769ce54f3 100644
--- a/ydb/library/yql/core/yql_expr_type_annotation.h
+++ b/ydb/library/yql/core/yql_expr_type_annotation.h
@@ -66,6 +66,14 @@ bool EnsureAtom(const TExprNode& node, TExprContext& ctx);
bool EnsureCallable(const TExprNode& node, TExprContext& ctx);
bool EnsureTuple(const TExprNode& node, TExprContext& ctx);
bool EnsureTupleOfAtoms(const TExprNode& node, TExprContext& ctx);
+
+using TSettingNodeValidator = std::function<bool (TStringBuf name, const TExprNode& setting, TExprContext& ctx)>;
+bool EnsureValidSettings(const TExprNode& node,
+ const THashSet<TStringBuf>& supportedSettings,
+ const TSettingNodeValidator& validator,
+ TExprContext& ctx);
+TSettingNodeValidator RequireSingleValueSettings(const TSettingNodeValidator& validator);
+
bool EnsureLambda(const TExprNode& node, TExprContext& ctx);
IGraphTransformer::TStatus ConvertToLambda(TExprNode::TPtr& node, TExprContext& ctx, ui32 argumentsCount, ui32 maxArgumentsCount = Max<ui32>(),
bool withTypes = true);
diff --git a/ydb/library/yql/providers/s3/expr_nodes/yql_s3_expr_nodes.json b/ydb/library/yql/providers/s3/expr_nodes/yql_s3_expr_nodes.json
index 4991984cb1e..f1f6e0d8ffa 100644
--- a/ydb/library/yql/providers/s3/expr_nodes/yql_s3_expr_nodes.json
+++ b/ydb/library/yql/providers/s3/expr_nodes/yql_s3_expr_nodes.json
@@ -31,7 +31,8 @@
"Match": {"Type": "Tuple"},
"Children": [
{"Index": 0, "Name": "Path", "Type": "TCoAtom"},
- {"Index": 1, "Name": "Size", "Type": "TCoAtom"}
+ {"Index": 1, "Name": "Size", "Type": "TCoAtom"},
+ {"Index": 2, "Name": "Settings", "Type": "TExprBase", "Optional": true}
]
},
{
diff --git a/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp b/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp
index d72c5c916e5..7fa248a5eb8 100644
--- a/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp
+++ b/ydb/library/yql/providers/s3/provider/yql_s3_datasource_type_ann.cpp
@@ -15,15 +15,6 @@ using namespace NNodes;
namespace {
-TStringBuf GetCompression(const TExprNode& settings) {
- for (auto i = 0U; i < settings.ChildrenSize(); ++i) {
- const auto& child = *settings.Child(i);
- if (child.Head().IsAtom("compression") && child.Tail().IsCallable({"String", "Utf8"}))
- if (const auto& comp = child.Tail().Head().Content(); !comp.empty())
- return comp;
- }
- return ""sv;
-}
class TS3DataSourceTypeAnnotationTransformer : public TVisitorTransformerBase {
public:
@@ -156,17 +147,76 @@ public:
return TStatus::Error;
}
- if (!EnsureAtom(*input->Child(TS3Object::idx_Format), ctx) || !NCommon::ValidateFormat(input->Child(TS3Object::idx_Format)->Content(), ctx)) {
- return TStatus::Error;
+ for (auto& path : input->Child(TS3Object::idx_Paths)->ChildrenList()) {
+ if (!EnsureTupleMinSize(*path, 2, ctx) || !EnsureTupleMaxSize(*path, 3, ctx)) {
+ return TStatus::Error;
+ }
+
+ if (!EnsureAtom(*path->Child(TS3Path::idx_Path), ctx) ||
+ !EnsureAtom(*path->Child(TS3Path::idx_Size), ctx))
+ {
+ return TStatus::Error;
+ }
+
+ if (path->Child(TS3Path::idx_Path)->Content().empty()) {
+ ctx.AddError(TIssue(ctx.GetPosition(path->Child(TS3Path::idx_Path)->Pos()), "Expected non-empty path"));
+ return TStatus::Error;
+ }
+
+ ui64 size = 0;
+ auto sizeStr = path->Child(TS3Path::idx_Size)->Content();
+ if (!TryFromString(sizeStr, size)) {
+ ctx.AddError(TIssue(ctx.GetPosition(path->Child(TS3Path::idx_Size)->Pos()),
+ TStringBuilder() << "Expected number as S3 object size, got: '" << sizeStr << "'"));
+ return TStatus::Error;
+ }
+
+ if (path->ChildrenSize() > TS3Path::idx_Settings) {
+ auto validator = [](TStringBuf name, const TExprNode& setting, TExprContext& ctx) {
+ Y_UNUSED(name);
+ auto& value = setting.Tail();
+ if (!EnsureStructType(value, ctx) || !EnsurePersistable(value, ctx)) {
+ return false;
+ }
+ return true;
+ };
+
+ if (!EnsureValidSettings(*path->Child(TS3Path::idx_Settings), { "externalColumns" },
+ RequireSingleValueSettings(validator), ctx))
+ {
+ return TStatus::Error;
+ }
+ }
}
- if (input->ChildrenSize() > TS3Object::idx_Settings && !EnsureTuple(*input->Child(TS3Object::idx_Settings), ctx)) {
+ if (!EnsureAtom(*input->Child(TS3Object::idx_Format), ctx) || !NCommon::ValidateFormat(input->Child(TS3Object::idx_Format)->Content(), ctx)) {
return TStatus::Error;
}
- const auto compression = GetCompression(*input->Child(TS3Object::idx_Settings));
- if (!NCommon::ValidateCompression(compression, ctx)) {
- return TStatus::Error;
+ if (input->ChildrenSize() > TS3Object::idx_Settings) {
+ auto validator = [](TStringBuf name, const TExprNode& setting, TExprContext& ctx) {
+ Y_UNUSED(name);
+ auto& value = setting.Tail();
+ TStringBuf compression;
+ if (value.IsAtom()) {
+ compression = value.Content();
+ } else {
+ if (!EnsureStringOrUtf8Type(value, ctx)) {
+ return false;
+ }
+ if (!value.IsCallable({"String", "Utf8"})) {
+ ctx.AddError(TIssue(ctx.GetPosition(value.Pos()), "Expected literal string as compression value"));
+ return false;
+ }
+ compression = value.Head().Content();
+ }
+ return NCommon::ValidateCompression(compression, ctx);
+ };
+ if (!EnsureValidSettings(*input->Child(TS3Object::idx_Settings), { "compression" },
+ RequireSingleValueSettings(validator), ctx))
+ {
+ return TStatus::Error;
+ }
}
input->SetTypeAnn(ctx.MakeType<TUnitExprType>());