diff options
| author | vvvv <[email protected]> | 2025-02-03 11:29:11 +0300 |
|---|---|---|
| committer | vvvv <[email protected]> | 2025-02-03 11:53:49 +0300 |
| commit | 3a1d187623efee16d2825e37ee54294c8fb0f6e7 (patch) | |
| tree | 8ea179d4cfca44bb72bbbc7644feb298ab369f75 /yql/essentials/sql | |
| parent | 10cd04107980396de90d116fd67f75c50cd5beeb (diff) | |
YQL-19530 pass cpu & mem settings to Udf/ScriptUdf
commit_hash:21577e2038f1d80c16e671d74e0c9e1e00b6c448
Diffstat (limited to 'yql/essentials/sql')
| -rw-r--r-- | yql/essentials/sql/v1/builtin.cpp | 101 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/node.cpp | 62 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/node.h | 15 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/sql_call_expr.cpp | 29 | ||||
| -rw-r--r-- | yql/essentials/sql/v1/sql_call_expr.h | 3 |
5 files changed, 169 insertions, 41 deletions
diff --git a/yql/essentials/sql/v1/builtin.cpp b/yql/essentials/sql/v1/builtin.cpp index 8e49ba39f22..2cd2aeaee06 100644 --- a/yql/essentials/sql/v1/builtin.cpp +++ b/yql/essentials/sql/v1/builtin.cpp @@ -2286,7 +2286,8 @@ TVector<TNodePtr> BuildUdfArgs(const TContext& ctx, TPosition pos, const TVector } TNodePtr BuildSqlCall(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args, - TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, const TDeferredAtom& typeConfig, TNodePtr runConfig) + TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, const TDeferredAtom& typeConfig, TNodePtr runConfig, + TNodePtr options) { const TString fullName = module + "." + name; TNodePtr callable; @@ -2318,18 +2319,24 @@ TNodePtr BuildSqlCall(TContext& ctx, TPosition pos, const TString& module, const // optional arguments if (customUserType) { sqlCallArgs.push_back(customUserType); - } else if (!typeConfig.Empty()) { + } else if (!typeConfig.Empty() || runConfig || options) { sqlCallArgs.push_back(new TCallNodeImpl(pos, "TupleType", {})); } if (!typeConfig.Empty()) { sqlCallArgs.push_back(typeConfig.Build()); - } else if (runConfig) { + } else if (runConfig || options) { sqlCallArgs.push_back(BuildQuotedAtom(pos, "")); } if (runConfig) { sqlCallArgs.push_back(runConfig); + } else if (options) { + sqlCallArgs.push_back(new TCallNodeImpl(pos, "Void", {})); + } + + if (options) { + sqlCallArgs.push_back(options); } return new TCallNodeImpl(pos, "SqlCall", sqlCallArgs); @@ -2463,45 +2470,47 @@ TNodePtr BuildUdf(TContext& ctx, TPosition pos, const TString& module, const TSt class TScriptUdf final: public INode { public: - TScriptUdf(TPosition pos, const TString& moduleName, const TString& funcName, const TVector<TNodePtr>& args) + TScriptUdf(TPosition pos, const TString& moduleName, const TString& funcName, const TVector<TNodePtr>& args, + TNodePtr options) : INode(pos) - , ModuleName(moduleName) - , FuncName(funcName) - , Args(args) + , ModuleName_(moduleName) + , FuncName_(funcName) + , Args_(args) + , Options_(options) {} bool DoInit(TContext& ctx, ISource* src) override { - const bool isPython = ModuleName.find(TStringBuf("Python")) != TString::npos; + const bool isPython = ModuleName_.find(TStringBuf("Python")) != TString::npos; if (!isPython) { - if (Args.size() != 2) { - ctx.Error(Pos) << ModuleName << " script declaration requires exactly two parameters"; + if (Args_.size() != 2) { + ctx.Error(Pos) << ModuleName_ << " script declaration requires exactly two parameters"; return false; } } else { - if (Args.size() < 1 || Args.size() > 2) { - ctx.Error(Pos) << ModuleName << " script declaration requires one or two parameters"; + if (Args_.size() < 1 || Args_.size() > 2) { + ctx.Error(Pos) << ModuleName_ << " script declaration requires one or two parameters"; return false; } } - auto nameAtom = BuildQuotedAtom(Pos, FuncName); - auto scriptNode = Args.back(); + auto nameAtom = BuildQuotedAtom(Pos, FuncName_); + auto scriptNode = Args_.back(); if (!scriptNode->Init(ctx, src)) { return false; } - auto scriptStrPtr = Args.back()->GetLiteral("String"); + auto scriptStrPtr = Args_.back()->GetLiteral("String"); if (!ctx.CompactNamedExprs && scriptStrPtr && scriptStrPtr->size() > SQL_MAX_INLINE_SCRIPT_LEN) { scriptNode = ctx.UniversalAlias("scriptudf", std::move(scriptNode)); } INode::TPtr type; - if (Args.size() == 2) { - type = Args[0]; + if (Args_.size() == 2) { + type = Args_[0]; } else { // Python supports getting functions signatures right from docstrings type = Y("EvaluateType", Y("ParseTypeHandle", Y("Apply", Y("bind", "core_module", Q("PythonFuncSignature")), - Q(ModuleName), + Q(ModuleName_), scriptNode, Y("String", nameAtom) ))); @@ -2511,14 +2520,18 @@ public: return false; } - Node = Y("ScriptUdf", Q(ModuleName), nameAtom, type, scriptNode); + Node_ = Y("ScriptUdf", Q(ModuleName_), nameAtom, type, scriptNode); + if (Options_) { + Node_ = L(Node_, Options_); + } + return true; } TAstNode* Translate(TContext& ctx) const override { Y_UNUSED(ctx); - Y_DEBUG_ABORT_UNLESS(Node); - return Node->Translate(ctx); + Y_DEBUG_ABORT_UNLESS(Node_); + return Node_->Translate(ctx); } void DoUpdateState() const override { @@ -2526,20 +2539,47 @@ public: } TNodePtr DoClone() const final { - return new TScriptUdf(GetPos(), ModuleName, FuncName, CloneContainer(Args)); + return new TScriptUdf(GetPos(), ModuleName_, FuncName_, CloneContainer(Args_), Options_); } void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final { - Y_DEBUG_ABORT_UNLESS(Node); - Node->VisitTree(func, visited); + Y_DEBUG_ABORT_UNLESS(Node_); + Node_->VisitTree(func, visited); + } + + const TString* FuncName() const final { + return &FuncName_; + } + + const TString* ModuleName() const final { + return &ModuleName_; + } + + bool IsScript() const final { + return true; } + + size_t GetTupleSize() const final { + return Args_.size(); + } + + TPtr GetTupleElement(size_t index) const final { + return Args_[index]; + } + private: - TString ModuleName; - TString FuncName; - TVector<TNodePtr> Args; - TNodePtr Node; + TString ModuleName_; + TString FuncName_; + TVector<TNodePtr> Args_; + TNodePtr Node_; + TNodePtr Options_; }; +TNodePtr BuildScriptUdf(TPosition pos, const TString& moduleName, const TString& funcName, const TVector<TNodePtr>& args, + TNodePtr options) { + return new TScriptUdf(pos, moduleName, funcName, args, options); +} + template <bool Sorted, bool Hashed> class TYqlToDict final: public TCallNode { public: @@ -3549,7 +3589,7 @@ TNodePtr BuildBuiltinFunc(TContext& ctx, TPosition pos, TString name, const TVec return BuildUdf(ctx, pos, nameSpace, name, makeUdfArgs()); } else if (scriptType != NKikimr::NMiniKQL::EScriptType::Unknown) { auto scriptName = NKikimr::NMiniKQL::IsCustomPython(scriptType) ? nameSpace : TString(NKikimr::NMiniKQL::ScriptTypeAsStr(scriptType)); - return new TScriptUdf(pos, scriptName, name, args); + return BuildScriptUdf(pos, scriptName, name, args, nullptr); } else if (ns.empty()) { if (auto simpleType = LookupSimpleType(normalizedName, ctx.FlexibleTypes, /* isPgType = */ false)) { const auto type = *simpleType; @@ -3871,7 +3911,8 @@ TNodePtr BuildBuiltinFunc(TContext& ctx, TPosition pos, TString name, const TVec } TNodePtr typeConfig = MakeTypeConfig(pos, ns, usedArgs); - return BuildSqlCall(ctx, pos, nameSpace, name, usedArgs, positionalArgs, namedArgs, customUserType, TDeferredAtom(typeConfig, ctx), nullptr); + return BuildSqlCall(ctx, pos, nameSpace, name, usedArgs, positionalArgs, namedArgs, customUserType, + TDeferredAtom(typeConfig, ctx), nullptr, nullptr); } } // namespace NSQLTranslationV1 diff --git a/yql/essentials/sql/v1/node.cpp b/yql/essentials/sql/v1/node.cpp index eea9069abf3..e5c7d353795 100644 --- a/yql/essentials/sql/v1/node.cpp +++ b/yql/essentials/sql/v1/node.cpp @@ -334,6 +334,10 @@ const TString* INode::ModuleName() const { return nullptr; } +bool INode::IsScript() const { + return false; +} + bool INode::HasSkip() const { return false; } @@ -566,6 +570,10 @@ const TString* IProxyNode::ModuleName() const { return Inner->ModuleName(); } +bool IProxyNode::IsScript() const { + return Inner->IsScript(); +} + bool IProxyNode::HasSkip() const { return Inner->HasSkip(); } @@ -3027,6 +3035,18 @@ bool TUdfNode::DoInit(TContext& ctx, ISource* src) { FunctionName = function->FuncName(); ModuleName = function->ModuleName(); + ScriptUdf = function->IsScript(); + if (ScriptUdf && as_tuple->GetTupleSize() > 1) { + ctx.Error(Pos) << "Udf: user type is not supported for script udfs"; + return false; + } + + if (ScriptUdf) { + for (size_t i = 0; i < function->GetTupleSize(); ++i) { + ScriptArgs.push_back(function->GetTupleElement(i)); + } + } + TVector<TNodePtr> external; external.reserve(as_tuple->GetTupleSize() - 1); @@ -3049,9 +3069,26 @@ bool TUdfNode::DoInit(TContext& ctx, ISource* src) { if (TStructNode* named_args = Args[1]->GetStructNode(); named_args) { for (const auto &arg: named_args->GetExprs()) { if (arg->GetLabel() == "TypeConfig") { + if (function->IsScript()) { + ctx.Error() << "Udf: TypeConfig is not supported for script udfs"; + return false; + } + TypeConfig = MakeAtomFromExpression(Pos, ctx, arg); } else if (arg->GetLabel() == "RunConfig") { + if (function->IsScript()) { + ctx.Error() << "Udf: RunConfig is not supported for script udfs"; + return false; + } + RunConfig = arg; + } else if (arg->GetLabel() == "Cpu") { + Cpu = MakeAtomFromExpression(Pos, ctx, arg); + } else if (arg->GetLabel() == "ExtraMem") { + ExtraMem = MakeAtomFromExpression(Pos, ctx, arg); + } else { + ctx.Error() << "Udf: unexpected named argument: " << arg->GetLabel(); + return false; } } } @@ -3079,6 +3116,31 @@ const TDeferredAtom& TUdfNode::GetTypeConfig() const { return TypeConfig; } +TNodePtr TUdfNode::BuildOptions() const { + if (Cpu.Empty() && ExtraMem.Empty()) { + return nullptr; + } + + auto options = Y(); + if (!Cpu.Empty()) { + options = L(options, Q(Y(Q("cpu"), Cpu.Build()))); + } + + if (!ExtraMem.Empty()) { + options = L(options, Q(Y(Q("extraMem"), ExtraMem.Build()))); + } + + return Q(options); +} + +bool TUdfNode::IsScript() const { + return ScriptUdf; +} + +const TVector<TNodePtr>& TUdfNode::GetScriptArgs() const { + return ScriptArgs; +} + TUdfNode* TUdfNode::GetUdfNode() { return this; } diff --git a/yql/essentials/sql/v1/node.h b/yql/essentials/sql/v1/node.h index c197f7ac900..f3e3f5f5808 100644 --- a/yql/essentials/sql/v1/node.h +++ b/yql/essentials/sql/v1/node.h @@ -194,6 +194,7 @@ namespace NSQLTranslationV1 { virtual bool HasSelectResult() const; virtual const TString* FuncName() const; virtual const TString* ModuleName() const; + virtual bool IsScript() const; virtual bool HasSkip() const; virtual TColumnNode* GetColumnNode(); @@ -325,6 +326,7 @@ namespace NSQLTranslationV1 { virtual bool HasSelectResult() const override; virtual const TString* FuncName() const override; virtual const TString* ModuleName() const override; + virtual bool IsScript() const override; virtual bool HasSkip() const override; virtual TColumnNode* GetColumnNode() override; @@ -950,6 +952,9 @@ namespace NSQLTranslationV1 { const TDeferredAtom& GetTypeConfig() const; TUdfNode* GetUdfNode() override; const TUdfNode* GetUdfNode() const override; + bool IsScript() const override; + const TVector<TNodePtr>& GetScriptArgs() const; + TNodePtr BuildOptions() const; private: TVector<TNodePtr> Args; const TString* FunctionName; @@ -957,6 +962,10 @@ namespace NSQLTranslationV1 { TNodePtr ExternalTypesTuple = nullptr; TNodePtr RunConfig; TDeferredAtom TypeConfig; + TDeferredAtom Cpu; + TDeferredAtom ExtraMem; + bool ScriptUdf = false; + TVector<TNodePtr> ScriptArgs; }; class IAggregation: public INode { @@ -1511,6 +1520,12 @@ namespace NSQLTranslationV1 { // Implemented in builtin.cpp + TNodePtr BuildSqlCall(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args, + TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, const TDeferredAtom& typeConfig, TNodePtr runConfig, + TNodePtr options); + TNodePtr BuildScriptUdf(TPosition pos, const TString& moduleName, const TString& funcName, const TVector<TNodePtr>& args, + TNodePtr options); + TNodePtr BuildCallable(TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args, bool forReduce = false); TNodePtr BuildUdf(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args); TNodePtr BuildBuiltinFunc( diff --git a/yql/essentials/sql/v1/sql_call_expr.cpp b/yql/essentials/sql/v1/sql_call_expr.cpp index 1871c1bbc0b..f279e7db779 100644 --- a/yql/essentials/sql/v1/sql_call_expr.cpp +++ b/yql/essentials/sql/v1/sql_call_expr.cpp @@ -7,9 +7,6 @@ namespace NSQLTranslationV1 { -TNodePtr BuildSqlCall(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args, - TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, const TDeferredAtom& typeConfig, TNodePtr runConfig); - using namespace NSQLv1Generated; static bool ValidateForCounters(const TString& input) { @@ -49,26 +46,42 @@ TNodePtr TSqlCallExpr::BuildCall() { TNodePtr named = named_args->Y("TypeOf", named_args); TNodePtr custom_user_type = new TCallNodeImpl(Pos, "TupleType", {positional, named, udf_node->GetExternalTypes()}); + TNodePtr options = udf_node->BuildOptions(); + + if (udf_node->IsScript()) { + auto udf = BuildScriptUdf(Pos, udf_node->GetModule(), udf_node->GetFunction(), udf_node->GetScriptArgs(), options); + TVector<TNodePtr> applyArgs; + applyArgs.push_back(new TAstAtomNodeImpl(Pos, !NamedArgs.empty() ? "NamedApply" : "Apply", TNodeFlags::Default)); + applyArgs.push_back(udf); + if (!NamedArgs.empty()) { + applyArgs.push_back(BuildTuple(Pos, PositionalArgs)); + applyArgs.push_back(BuildStructure(Pos, NamedArgs)); + } else { + applyArgs.insert(applyArgs.end(), PositionalArgs.begin(), PositionalArgs.end()); + } + + return new TAstListNodeImpl(Pos, applyArgs); + } return BuildSqlCall(Ctx, Pos, udf_node->GetModule(), udf_node->GetFunction(), args, positional_args, named_args, custom_user_type, - udf_node->GetTypeConfig(), udf_node->GetRunConfig()); + udf_node->GetTypeConfig(), udf_node->GetRunConfig(), options); } - if (Node && !Node->FuncName()) { + if (Node && (!Node->FuncName() || Node->IsScript())) { Module = "YQL"; Func = NamedArgs.empty() ? "Apply" : "NamedApply"; warnOnYqlNameSpace = false; args.push_back(Node); } - if (Node && Node->FuncName()) { + if (Node && Node->FuncName() && !Node->IsScript()) { Module = Node->ModuleName() ? *Node->ModuleName() : "YQL"; Func = *Node->FuncName(); } bool mustUseNamed = !NamedArgs.empty(); if (mustUseNamed) { - if (Node && !Node->FuncName()) { + if (Node && (!Node->FuncName() || Node->IsScript())) { mustUseNamed = false; } args.emplace_back(BuildTuple(Pos, PositionalArgs)); @@ -276,7 +289,7 @@ bool TSqlCallExpr::FillArg(const TString& module, const TString& func, size_t& i bool TSqlCallExpr::FillArgs(const TRule_named_expr_list& node) { TString module = Module; TString func = Func; - if (Node && Node->FuncName()) { + if (Node && Node->FuncName() && !Node->IsScript()) { module = Node->ModuleName() ? *Node->ModuleName() : "YQL"; func = *Node->FuncName(); } diff --git a/yql/essentials/sql/v1/sql_call_expr.h b/yql/essentials/sql/v1/sql_call_expr.h index 9b9d39b81a8..f01ceabadfa 100644 --- a/yql/essentials/sql/v1/sql_call_expr.h +++ b/yql/essentials/sql/v1/sql_call_expr.h @@ -4,9 +4,6 @@ namespace NSQLTranslationV1 { -TNodePtr BuildSqlCall(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args, - TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, const TDeferredAtom& typeConfig, TNodePtr runConfig); - using namespace NSQLv1Generated; class TSqlCallExpr: public TSqlTranslation { |
