diff options
author | mrlolthe1st <mrlolthe1st@yandex-team.com> | 2022-12-21 17:08:53 +0300 |
---|---|---|
committer | mrlolthe1st <mrlolthe1st@yandex-team.com> | 2022-12-21 17:08:53 +0300 |
commit | 34fcd78a2d18d9846fd68339c8e240702857e1e5 (patch) | |
tree | 7e978fd3cccc993203a49cf3cf8e36b86caab6e4 | |
parent | 3944a91c66d9c840653678977c58bd5cc0fecfa8 (diff) | |
download | ydb-34fcd78a2d18d9846fd68339c8e240702857e1e5.tar.gz |
feature: UDF calls syntax sugar
initial
-rw-r--r-- | ydb/library/yql/core/type_ann/type_ann_core.cpp | 27 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/builtin.cpp | 37 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/node.cpp | 105 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/node.h | 21 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/sql.cpp | 21 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/sql_ut.cpp | 89 |
6 files changed, 282 insertions, 18 deletions
diff --git a/ydb/library/yql/core/type_ann/type_ann_core.cpp b/ydb/library/yql/core/type_ann/type_ann_core.cpp index 91c59447cda..d831f7d3453 100644 --- a/ydb/library/yql/core/type_ann/type_ann_core.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_core.cpp @@ -7701,7 +7701,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> } IGraphTransformer::TStatus SqlCallWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { - if (!EnsureMinMaxArgsCount(*input, 2, 4, ctx.Expr)) { + if (!EnsureMinMaxArgsCount(*input, 2, 5, ctx.Expr)) { return IGraphTransformer::TStatus::Error; } @@ -7746,17 +7746,36 @@ template <NKikimr::NUdf::EDataSlot DataSlot> } TExprNode::TPtr typeConfig; - if (input->ChildrenSize() == 4) { - typeConfig = input->TailPtr(); + if (input->ChildrenSize() > 3) { + typeConfig = input->Child(3); if (!EnsureAtom(*typeConfig, ctx.Expr)) { return IGraphTransformer::TStatus::Error; } } + + TExprNode::TPtr runConfig; + if (input->ChildrenSize() > 4) { + runConfig = input->Child(4); + if (!EnsureComputable(*runConfig, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + } TExprNode::TPtr udf = ctx.Expr.Builder(input->Pos()) .Callable("Udf") .Add(0, udfName) - .Callable(1, "Void").Seal() + .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& { + if (runConfig) { + parent + .Add(1, runConfig) + .Seal(); + } else { + parent + .Callable(1, "Void") + .Seal(); + } + return parent; + }) .Callable(2, "TupleType") .Callable(0, "TupleType") .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& { diff --git a/ydb/library/yql/sql/v1/builtin.cpp b/ydb/library/yql/sql/v1/builtin.cpp index d2de5e0d242..eb5f0e39ecd 100644 --- a/ydb/library/yql/sql/v1/builtin.cpp +++ b/ydb/library/yql/sql/v1/builtin.cpp @@ -33,7 +33,7 @@ extern const char SubqueryUnionMergeFor[] = "SubqueryUnionMergeFor"; extern const char SubqueryOrderBy[] = "SubqueryOrderBy"; extern const char SubqueryAssumeOrderBy[] = "SubqueryAssumeOrderBy"; -TMaybe<TString> MakeTypeConfig(const TString& ns, const TVector<TNodePtr>& udfArgs) { +TNodePtr MakeTypeConfig(const TPosition& pos, const TString& ns, const TVector<TNodePtr>& udfArgs) { if (ns == "clickhouse") { auto settings = NYT::TNode::CreateMap(); auto args = NYT::TNode::CreateMap(); @@ -46,10 +46,10 @@ TMaybe<TString> MakeTypeConfig(const TString& ns, const TVector<TNodePtr>& udfAr } settings["args"] = args; - return NYT::NodeToYsonString(settings); + return (TDeferredAtom(pos, NYT::NodeToYsonString(settings))).Build(); } - return Nothing(); + return nullptr; } void AdjustCheckedAggFuncName(TString& aggNormalizedName, TContext& ctx) { @@ -2192,7 +2192,7 @@ TNodePtr BuildUdfUserTypeArg(TPosition pos, TNodePtr positionalArgs, TNodePtr na } TVector<TNodePtr> BuildUdfArgs(const TContext& ctx, TPosition pos, const TVector<TNodePtr>& args, - TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, TMaybe<TString> typeConfig) { + TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, TNodePtr typeConfig) { if (!ctx.Settings.EnableGenericUdfs) { return {}; } @@ -2206,14 +2206,14 @@ TVector<TNodePtr> BuildUdfArgs(const TContext& ctx, TPosition pos, const TVector } if (typeConfig) { - udfArgs.push_back(BuildQuotedAtom(pos, *typeConfig)); + udfArgs.push_back(typeConfig); } return udfArgs; } TNodePtr BuildSqlCall(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args, - TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, TMaybe<TString> typeConfig) + TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, const TDeferredAtom& typeConfig, TNodePtr runConfig) { const TString fullName = module + "." + name; TNodePtr callable; @@ -2245,12 +2245,18 @@ TNodePtr BuildSqlCall(TContext& ctx, TPosition pos, const TString& module, const // optional arguments if (customUserType) { sqlCallArgs.push_back(customUserType); - } else if (typeConfig) { + } else if (!typeConfig.Empty()) { sqlCallArgs.push_back(new TCallNodeImpl(pos, "TupleType", {})); } - if (typeConfig) { - sqlCallArgs.push_back(BuildQuotedAtom(pos, *typeConfig)); + if (!typeConfig.Empty()) { + sqlCallArgs.push_back(typeConfig.Build()); + } else if (runConfig) { + sqlCallArgs.push_back(BuildQuotedAtom(pos, "")); + } + + if (runConfig) { + sqlCallArgs.push_back(runConfig); } return new TCallNodeImpl(pos, "SqlCall", sqlCallArgs); @@ -2312,13 +2318,13 @@ public: if ("Datetime" == Module || ("Yson" == Module && ctx.PragmaYsonFast)) Module.append('2'); - TMaybe<TString> typeConfig = MakeTypeConfig(to_lower(Module), Args); + TNodePtr typeConfig = MakeTypeConfig(Pos, to_lower(Module), Args); if (ForReduce) { TVector<TNodePtr> udfArgs; udfArgs.push_back(BuildQuotedAtom(Pos, TString(Module) + "." + Name)); udfArgs.push_back(customUserType ? customUserType : new TCallNodeImpl(Pos, "TupleType", {})); if (typeConfig) { - udfArgs.push_back(BuildQuotedAtom(Pos, *typeConfig)); + udfArgs.push_back(typeConfig); } Node = new TCallNodeImpl(Pos, "SqlReduceUdf", udfArgs); } else { @@ -3514,6 +3520,11 @@ TNodePtr BuildBuiltinFunc(TContext& ctx, TPosition pos, TString name, const TVec auto builtinCallback = builtinFuncs.find(normalizedName); if (builtinCallback != builtinFuncs.end()) { return (*builtinCallback).second(pos, args); + } else if (normalizedName == "udf") { + if (mustUseNamed && *mustUseNamed) { + *mustUseNamed = false; + } + return new TUdfNode(pos, args); } else if (normalizedName == "asstruct" || normalizedName == "structtype") { if (args.empty()) { return new TCallNodeImpl(pos, normalizedName == "asstruct" ? "AsStruct" : "StructType", 0, 0, args); @@ -3648,8 +3659,8 @@ TNodePtr BuildBuiltinFunc(TContext& ctx, TPosition pos, TString name, const TVec }; } - TMaybe<TString> typeConfig = MakeTypeConfig(ns, usedArgs); - return BuildSqlCall(ctx, pos, nameSpace, name, usedArgs, positionalArgs, namedArgs, customUserType, typeConfig); + TNodePtr typeConfig = MakeTypeConfig(pos, ns, usedArgs); + return BuildSqlCall(ctx, pos, nameSpace, name, usedArgs, positionalArgs, namedArgs, customUserType, TDeferredAtom(typeConfig, ctx), nullptr); } } // namespace NSQLTranslationV1 diff --git a/ydb/library/yql/sql/v1/node.cpp b/ydb/library/yql/sql/v1/node.cpp index 79256249a2e..9a340e1dd4b 100644 --- a/ydb/library/yql/sql/v1/node.cpp +++ b/ydb/library/yql/sql/v1/node.cpp @@ -2980,6 +2980,10 @@ public: return copy; } + const TVector<TIdPart>& GetParts() const { + return Ids; + } + protected: void DoUpdateState() const override { YQL_ENSURE(Node); @@ -3323,6 +3327,107 @@ TNodePtr BuildIsNullOp(TPosition pos, TNodePtr a) { return new TCallNodeImpl(pos, "Not", {new TCallNodeImpl(pos, "Exists", {a})}); } + + +TUdfNode::TUdfNode(TPosition pos, const TVector<TNodePtr>& args) + : INode(pos) + , Args(args) +{ + if (Args.size()) { + // If there aren't any named args, args are passed as vector of positional args, + // else Args has length 2: tuple for positional args and struct for named args, + // so let's construct tuple of args there. Other type checks will within DoInit call. + if (TTupleNode* as_tuple = dynamic_cast<TTupleNode*>(Args[0].Get()); !as_tuple) { + Args = {BuildTuple(pos, args)}; + } + } +} + +bool TUdfNode::DoInit(TContext& ctx, ISource* src) { + Y_UNUSED(src); + if (Args.size() < 1) { + ctx.Error(Pos) << "Udf: expected at least one argument"; + return false; + } + + TTupleNode* as_tuple = dynamic_cast<TTupleNode*>(Args[0].Get()); + + if (!as_tuple || as_tuple->GetTupleSize() < 1) { + ctx.Error(Pos) << "Udf: first argument must be a callable, like Foo::Bar"; + return false; + } + + TNodePtr function = as_tuple->GetTupleElement(0); + + if (!function || !function->FuncName()) { + ctx.Error(Pos) << "Udf: first argument must be a callable, like Foo::Bar"; + return false; + } + + FunctionName = function->FuncName(); + ModuleName = function->ModuleName(); + TVector<TNodePtr> external; + external.reserve(as_tuple->GetTupleSize() - 1); + + for (size_t i = 1; i < as_tuple->GetTupleSize(); ++i) { + // TODO(): support named args in GetFunctionArgColumnStatus + TNodePtr current = as_tuple->GetTupleElement(i); + if (TAccessNode* as_access = dynamic_cast<TAccessNode*>(current.Get()); as_access) { + external.push_back(Y("DataType", Q(as_access->GetParts()[1].Name))); + continue; + } + external.push_back(current); + } + + ExternalTypesTuple = new TCallNodeImpl(Pos, "TupleType", external); + + if (Args.size() == 1) { + return true; + } + + if (TStructNode* named_args = dynamic_cast<TStructNode*>(Args[1].Get()); named_args) { + for (const auto &arg: named_args->GetExprs()) { + if (arg->GetLabel() == "TypeConfig") { + TypeConfig = MakeAtomFromExpression(ctx, arg); + } else if (arg->GetLabel() == "RunConfig") { + RunConfig = arg; + } + } + } + + return true; +} + +const TNodePtr TUdfNode::GetExternalTypes() const { + return ExternalTypesTuple; +} + +const TString& TUdfNode::GetFunction() const { + return *FunctionName; +} + +const TString& TUdfNode::GetModule() const { + return *ModuleName; +} + +TNodePtr TUdfNode::GetRunConfig() const { + return RunConfig; +} + +const TDeferredAtom& TUdfNode::GetTypeConfig() const { + return TypeConfig; +} + +TAstNode* TUdfNode::Translate(TContext& ctx) const { + ctx.Error(Pos) << "Abstract Udf Node can't be used as a part of expression."; + return nullptr; +} + +TNodePtr TUdfNode::DoClone() const { + return new TUdfNode(Pos, CloneContainer(Args)); +} + + class TBinaryOpNode final: public TCallNode { public: TBinaryOpNode(TPosition pos, const TString& opName, TNodePtr a, TNodePtr b); diff --git a/ydb/library/yql/sql/v1/node.h b/ydb/library/yql/sql/v1/node.h index 538202854fe..9d0b152c88c 100644 --- a/ydb/library/yql/sql/v1/node.h +++ b/ydb/library/yql/sql/v1/node.h @@ -750,6 +750,27 @@ namespace NSQLTranslationV1 { const bool Ordered; }; + + class TUdfNode: public INode { + public: + TUdfNode(TPosition pos, const TVector<TNodePtr>& args); + bool DoInit(TContext& ctx, ISource* src) override final; + TNodePtr DoClone() const override final; + TAstNode* Translate(TContext& ctx) const override; + const TNodePtr GetExternalTypes() const; + const TString& GetFunction() const; + const TString& GetModule() const; + TNodePtr GetRunConfig() const; + const TDeferredAtom& GetTypeConfig() const; + private: + TVector<TNodePtr> Args; + const TString* FunctionName; + const TString* ModuleName; + TNodePtr ExternalTypesTuple = nullptr; + TNodePtr RunConfig; + TDeferredAtom TypeConfig; + }; + class IAggregation: public INode { public: bool IsDistinct() const; diff --git a/ydb/library/yql/sql/v1/sql.cpp b/ydb/library/yql/sql/v1/sql.cpp index 2569d59b7d1..4a62213ef2f 100644 --- a/ydb/library/yql/sql/v1/sql.cpp +++ b/ydb/library/yql/sql/v1/sql.cpp @@ -51,6 +51,9 @@ using namespace NYql; namespace NSQLTranslationV1 { +TNodePtr BuildSqlCall(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args, + TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, const TDeferredAtom& typeConfig, TNodePtr runConfig); + using NALPDefault::SQLv1LexerTokens; using namespace NSQLv1Generated; @@ -1107,6 +1110,24 @@ public: TNodePtr BuildCall() { TVector<TNodePtr> args; bool warnOnYqlNameSpace = true; + + TUdfNode* udf_node = Node ? dynamic_cast<TUdfNode*>(Node.Get()) : nullptr; + if (udf_node) { + if (!udf_node->DoInit(Ctx, nullptr)) { + return nullptr; + } + TNodePtr positional_args = BuildTuple(Pos, PositionalArgs); + TNodePtr positional = positional_args->Y("TypeOf", positional_args); + TNodePtr named_args = BuildStructure(Pos, NamedArgs); + TNodePtr named = named_args->Y("TypeOf", named_args); + + TNodePtr custom_user_type = new TCallNodeImpl(Pos, "TupleType", {positional, named, udf_node->GetExternalTypes()}); + + return BuildSqlCall(Ctx, Pos, udf_node->GetModule(), udf_node->GetFunction(), + args, positional_args, named_args, custom_user_type, + udf_node->GetTypeConfig(), udf_node->GetRunConfig()); + } + if (Node && !Node->FuncName()) { Module = "YQL"; Func = NamedArgs.empty() ? "Apply" : "NamedApply"; diff --git a/ydb/library/yql/sql/v1/sql_ut.cpp b/ydb/library/yql/sql/v1/sql_ut.cpp index 6e993d79a43..853147d3241 100644 --- a/ydb/library/yql/sql/v1/sql_ut.cpp +++ b/ydb/library/yql/sql/v1/sql_ut.cpp @@ -1533,6 +1533,69 @@ Y_UNIT_TEST_SUITE(SqlParsingOnly) { UNIT_ASSERT(programm.find(expected) != TString::npos); } + Y_UNIT_TEST(UdfSyntaxSugarOnlyCallable) { + auto req = "SELECT Udf(DateTime::FromString)('2022-01-01');"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType)))"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(UdfSyntaxSugarTypeNoRun) { + auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, 'foo' as TypeConfig)('2022-01-01');"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\")"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(UdfSyntaxSugarRunNoType) { + auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, Void() as RunConfig)('2022-01-01');"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"\" (Void))"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(UdfSyntaxSugarFullTest) { + auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, 'foo' as TypeConfig, Void() As RunConfig)('2022-01-01');"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\" (Void))"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(UdfSyntaxSugarOtherRunConfigs) { + auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, 'foo' as TypeConfig, '55' As RunConfig)('2022-01-01');"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\" (String '\"55\"))"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(UdfSyntaxSugarOtherRunConfigs2) { + auto req = "SELECT Udf(DateTime::FromString, String, Tuple<Int32, Float>, 'foo' as TypeConfig, AsTuple(32, 'no', AsStruct(1e-9 As SomeFloat)) As RunConfig)('2022-01-01');"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (DataType 'String) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\" '((Int32 '\"32\") (String '\"no\") (AsStruct '('\"SomeFloat\" (Double '\"1e-9\")))))"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + + Y_UNIT_TEST(UdfSyntaxSugarOptional) { + auto req = "SELECT Udf(DateTime::FromString, String?, Int32??, Tuple<Int32, Float>, \"foo\" as TypeConfig, Void() As RunConfig)(\"2022-01-01\");"; + auto res = SqlToYql(req); + UNIT_ASSERT(res.Root); + const auto programm = GetPrettyPrint(res); + auto expected = "(SqlCall '\"DateTime.FromString\" '((PositionalArgs (String '\"2022-01-01\")) (AsStruct)) (TupleType (TypeOf '((String '\"2022-01-01\"))) (TypeOf (AsStruct)) (TupleType (OptionalType (DataType 'String)) (OptionalType (OptionalType (DataType 'Int32))) (TupleType (DataType 'Int32) (DataType 'Float)))) '\"foo\" (Void))"; + UNIT_ASSERT(programm.find(expected) != TString::npos); + } + Y_UNIT_TEST(CompactionPolicyParseCorrect) { NYql::TAstParseResult res = SqlToYql( R"( USE plato; @@ -1941,7 +2004,31 @@ Y_UNIT_TEST_SUITE(ExternalFunction) { } } -Y_UNIT_TEST_SUITE(SqlToYQLErrors) { +Y_UNIT_TEST_SUITE(SqlToYQLErrors) { + Y_UNIT_TEST(UdfSyntaxSugarMissingCall) { + auto req = "SELECT Udf(DateTime::FromString, \"foo\" as RunConfig);"; + auto res = SqlToYql(req); + TString a1 = Err2Str(res); + TString a2("<main>:1:8: Error: Abstract Udf Node can't be used as a part of expression.\n"); + UNIT_ASSERT_NO_DIFF(a1, a2); + } + + Y_UNIT_TEST(UdfSyntaxSugarIsNotCallable) { + auto req = "SELECT Udf(123, \"foo\" as RunConfig);"; + auto res = SqlToYql(req); + TString a1 = Err2Str(res); + TString a2("<main>:1:8: Error: Udf: first argument must be a callable, like Foo::Bar\n"); + UNIT_ASSERT_NO_DIFF(a1, a2); + } + + Y_UNIT_TEST(UdfSyntaxSugarNoArgs) { + auto req = "SELECT Udf()();"; + auto res = SqlToYql(req); + TString a1 = Err2Str(res); + TString a2("<main>:1:8: Error: Udf: expected at least one argument\n"); + UNIT_ASSERT_NO_DIFF(a1, a2); + } + Y_UNIT_TEST(StrayUTF8) { /// 'c' in plato is russian here NYql::TAstParseResult res = SqlToYql("select * from сedar.Input"); |