summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql
diff options
context:
space:
mode:
authorvvvv <[email protected]>2025-02-03 11:29:11 +0300
committervvvv <[email protected]>2025-02-03 11:53:49 +0300
commit3a1d187623efee16d2825e37ee54294c8fb0f6e7 (patch)
tree8ea179d4cfca44bb72bbbc7644feb298ab369f75 /yql/essentials/sql
parent10cd04107980396de90d116fd67f75c50cd5beeb (diff)
YQL-19530 pass cpu & mem settings to Udf/ScriptUdf
commit_hash:21577e2038f1d80c16e671d74e0c9e1e00b6c448
Diffstat (limited to 'yql/essentials/sql')
-rw-r--r--yql/essentials/sql/v1/builtin.cpp101
-rw-r--r--yql/essentials/sql/v1/node.cpp62
-rw-r--r--yql/essentials/sql/v1/node.h15
-rw-r--r--yql/essentials/sql/v1/sql_call_expr.cpp29
-rw-r--r--yql/essentials/sql/v1/sql_call_expr.h3
5 files changed, 169 insertions, 41 deletions
diff --git a/yql/essentials/sql/v1/builtin.cpp b/yql/essentials/sql/v1/builtin.cpp
index 8e49ba39f22..2cd2aeaee06 100644
--- a/yql/essentials/sql/v1/builtin.cpp
+++ b/yql/essentials/sql/v1/builtin.cpp
@@ -2286,7 +2286,8 @@ TVector<TNodePtr> BuildUdfArgs(const TContext& ctx, TPosition pos, const TVector
}
TNodePtr BuildSqlCall(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args,
- TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, const TDeferredAtom& typeConfig, TNodePtr runConfig)
+ TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, const TDeferredAtom& typeConfig, TNodePtr runConfig,
+ TNodePtr options)
{
const TString fullName = module + "." + name;
TNodePtr callable;
@@ -2318,18 +2319,24 @@ TNodePtr BuildSqlCall(TContext& ctx, TPosition pos, const TString& module, const
// optional arguments
if (customUserType) {
sqlCallArgs.push_back(customUserType);
- } else if (!typeConfig.Empty()) {
+ } else if (!typeConfig.Empty() || runConfig || options) {
sqlCallArgs.push_back(new TCallNodeImpl(pos, "TupleType", {}));
}
if (!typeConfig.Empty()) {
sqlCallArgs.push_back(typeConfig.Build());
- } else if (runConfig) {
+ } else if (runConfig || options) {
sqlCallArgs.push_back(BuildQuotedAtom(pos, ""));
}
if (runConfig) {
sqlCallArgs.push_back(runConfig);
+ } else if (options) {
+ sqlCallArgs.push_back(new TCallNodeImpl(pos, "Void", {}));
+ }
+
+ if (options) {
+ sqlCallArgs.push_back(options);
}
return new TCallNodeImpl(pos, "SqlCall", sqlCallArgs);
@@ -2463,45 +2470,47 @@ TNodePtr BuildUdf(TContext& ctx, TPosition pos, const TString& module, const TSt
class TScriptUdf final: public INode {
public:
- TScriptUdf(TPosition pos, const TString& moduleName, const TString& funcName, const TVector<TNodePtr>& args)
+ TScriptUdf(TPosition pos, const TString& moduleName, const TString& funcName, const TVector<TNodePtr>& args,
+ TNodePtr options)
: INode(pos)
- , ModuleName(moduleName)
- , FuncName(funcName)
- , Args(args)
+ , ModuleName_(moduleName)
+ , FuncName_(funcName)
+ , Args_(args)
+ , Options_(options)
{}
bool DoInit(TContext& ctx, ISource* src) override {
- const bool isPython = ModuleName.find(TStringBuf("Python")) != TString::npos;
+ const bool isPython = ModuleName_.find(TStringBuf("Python")) != TString::npos;
if (!isPython) {
- if (Args.size() != 2) {
- ctx.Error(Pos) << ModuleName << " script declaration requires exactly two parameters";
+ if (Args_.size() != 2) {
+ ctx.Error(Pos) << ModuleName_ << " script declaration requires exactly two parameters";
return false;
}
} else {
- if (Args.size() < 1 || Args.size() > 2) {
- ctx.Error(Pos) << ModuleName << " script declaration requires one or two parameters";
+ if (Args_.size() < 1 || Args_.size() > 2) {
+ ctx.Error(Pos) << ModuleName_ << " script declaration requires one or two parameters";
return false;
}
}
- auto nameAtom = BuildQuotedAtom(Pos, FuncName);
- auto scriptNode = Args.back();
+ auto nameAtom = BuildQuotedAtom(Pos, FuncName_);
+ auto scriptNode = Args_.back();
if (!scriptNode->Init(ctx, src)) {
return false;
}
- auto scriptStrPtr = Args.back()->GetLiteral("String");
+ auto scriptStrPtr = Args_.back()->GetLiteral("String");
if (!ctx.CompactNamedExprs && scriptStrPtr && scriptStrPtr->size() > SQL_MAX_INLINE_SCRIPT_LEN) {
scriptNode = ctx.UniversalAlias("scriptudf", std::move(scriptNode));
}
INode::TPtr type;
- if (Args.size() == 2) {
- type = Args[0];
+ if (Args_.size() == 2) {
+ type = Args_[0];
} else {
// Python supports getting functions signatures right from docstrings
type = Y("EvaluateType", Y("ParseTypeHandle", Y("Apply",
Y("bind", "core_module", Q("PythonFuncSignature")),
- Q(ModuleName),
+ Q(ModuleName_),
scriptNode,
Y("String", nameAtom)
)));
@@ -2511,14 +2520,18 @@ public:
return false;
}
- Node = Y("ScriptUdf", Q(ModuleName), nameAtom, type, scriptNode);
+ Node_ = Y("ScriptUdf", Q(ModuleName_), nameAtom, type, scriptNode);
+ if (Options_) {
+ Node_ = L(Node_, Options_);
+ }
+
return true;
}
TAstNode* Translate(TContext& ctx) const override {
Y_UNUSED(ctx);
- Y_DEBUG_ABORT_UNLESS(Node);
- return Node->Translate(ctx);
+ Y_DEBUG_ABORT_UNLESS(Node_);
+ return Node_->Translate(ctx);
}
void DoUpdateState() const override {
@@ -2526,20 +2539,47 @@ public:
}
TNodePtr DoClone() const final {
- return new TScriptUdf(GetPos(), ModuleName, FuncName, CloneContainer(Args));
+ return new TScriptUdf(GetPos(), ModuleName_, FuncName_, CloneContainer(Args_), Options_);
}
void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final {
- Y_DEBUG_ABORT_UNLESS(Node);
- Node->VisitTree(func, visited);
+ Y_DEBUG_ABORT_UNLESS(Node_);
+ Node_->VisitTree(func, visited);
+ }
+
+ const TString* FuncName() const final {
+ return &FuncName_;
+ }
+
+ const TString* ModuleName() const final {
+ return &ModuleName_;
+ }
+
+ bool IsScript() const final {
+ return true;
}
+
+ size_t GetTupleSize() const final {
+ return Args_.size();
+ }
+
+ TPtr GetTupleElement(size_t index) const final {
+ return Args_[index];
+ }
+
private:
- TString ModuleName;
- TString FuncName;
- TVector<TNodePtr> Args;
- TNodePtr Node;
+ TString ModuleName_;
+ TString FuncName_;
+ TVector<TNodePtr> Args_;
+ TNodePtr Node_;
+ TNodePtr Options_;
};
+TNodePtr BuildScriptUdf(TPosition pos, const TString& moduleName, const TString& funcName, const TVector<TNodePtr>& args,
+ TNodePtr options) {
+ return new TScriptUdf(pos, moduleName, funcName, args, options);
+}
+
template <bool Sorted, bool Hashed>
class TYqlToDict final: public TCallNode {
public:
@@ -3549,7 +3589,7 @@ TNodePtr BuildBuiltinFunc(TContext& ctx, TPosition pos, TString name, const TVec
return BuildUdf(ctx, pos, nameSpace, name, makeUdfArgs());
} else if (scriptType != NKikimr::NMiniKQL::EScriptType::Unknown) {
auto scriptName = NKikimr::NMiniKQL::IsCustomPython(scriptType) ? nameSpace : TString(NKikimr::NMiniKQL::ScriptTypeAsStr(scriptType));
- return new TScriptUdf(pos, scriptName, name, args);
+ return BuildScriptUdf(pos, scriptName, name, args, nullptr);
} else if (ns.empty()) {
if (auto simpleType = LookupSimpleType(normalizedName, ctx.FlexibleTypes, /* isPgType = */ false)) {
const auto type = *simpleType;
@@ -3871,7 +3911,8 @@ TNodePtr BuildBuiltinFunc(TContext& ctx, TPosition pos, TString name, const TVec
}
TNodePtr typeConfig = MakeTypeConfig(pos, ns, usedArgs);
- return BuildSqlCall(ctx, pos, nameSpace, name, usedArgs, positionalArgs, namedArgs, customUserType, TDeferredAtom(typeConfig, ctx), nullptr);
+ return BuildSqlCall(ctx, pos, nameSpace, name, usedArgs, positionalArgs, namedArgs, customUserType,
+ TDeferredAtom(typeConfig, ctx), nullptr, nullptr);
}
} // namespace NSQLTranslationV1
diff --git a/yql/essentials/sql/v1/node.cpp b/yql/essentials/sql/v1/node.cpp
index eea9069abf3..e5c7d353795 100644
--- a/yql/essentials/sql/v1/node.cpp
+++ b/yql/essentials/sql/v1/node.cpp
@@ -334,6 +334,10 @@ const TString* INode::ModuleName() const {
return nullptr;
}
+bool INode::IsScript() const {
+ return false;
+}
+
bool INode::HasSkip() const {
return false;
}
@@ -566,6 +570,10 @@ const TString* IProxyNode::ModuleName() const {
return Inner->ModuleName();
}
+bool IProxyNode::IsScript() const {
+ return Inner->IsScript();
+}
+
bool IProxyNode::HasSkip() const {
return Inner->HasSkip();
}
@@ -3027,6 +3035,18 @@ bool TUdfNode::DoInit(TContext& ctx, ISource* src) {
FunctionName = function->FuncName();
ModuleName = function->ModuleName();
+ ScriptUdf = function->IsScript();
+ if (ScriptUdf && as_tuple->GetTupleSize() > 1) {
+ ctx.Error(Pos) << "Udf: user type is not supported for script udfs";
+ return false;
+ }
+
+ if (ScriptUdf) {
+ for (size_t i = 0; i < function->GetTupleSize(); ++i) {
+ ScriptArgs.push_back(function->GetTupleElement(i));
+ }
+ }
+
TVector<TNodePtr> external;
external.reserve(as_tuple->GetTupleSize() - 1);
@@ -3049,9 +3069,26 @@ bool TUdfNode::DoInit(TContext& ctx, ISource* src) {
if (TStructNode* named_args = Args[1]->GetStructNode(); named_args) {
for (const auto &arg: named_args->GetExprs()) {
if (arg->GetLabel() == "TypeConfig") {
+ if (function->IsScript()) {
+ ctx.Error() << "Udf: TypeConfig is not supported for script udfs";
+ return false;
+ }
+
TypeConfig = MakeAtomFromExpression(Pos, ctx, arg);
} else if (arg->GetLabel() == "RunConfig") {
+ if (function->IsScript()) {
+ ctx.Error() << "Udf: RunConfig is not supported for script udfs";
+ return false;
+ }
+
RunConfig = arg;
+ } else if (arg->GetLabel() == "Cpu") {
+ Cpu = MakeAtomFromExpression(Pos, ctx, arg);
+ } else if (arg->GetLabel() == "ExtraMem") {
+ ExtraMem = MakeAtomFromExpression(Pos, ctx, arg);
+ } else {
+ ctx.Error() << "Udf: unexpected named argument: " << arg->GetLabel();
+ return false;
}
}
}
@@ -3079,6 +3116,31 @@ const TDeferredAtom& TUdfNode::GetTypeConfig() const {
return TypeConfig;
}
+TNodePtr TUdfNode::BuildOptions() const {
+ if (Cpu.Empty() && ExtraMem.Empty()) {
+ return nullptr;
+ }
+
+ auto options = Y();
+ if (!Cpu.Empty()) {
+ options = L(options, Q(Y(Q("cpu"), Cpu.Build())));
+ }
+
+ if (!ExtraMem.Empty()) {
+ options = L(options, Q(Y(Q("extraMem"), ExtraMem.Build())));
+ }
+
+ return Q(options);
+}
+
+bool TUdfNode::IsScript() const {
+ return ScriptUdf;
+}
+
+const TVector<TNodePtr>& TUdfNode::GetScriptArgs() const {
+ return ScriptArgs;
+}
+
TUdfNode* TUdfNode::GetUdfNode() {
return this;
}
diff --git a/yql/essentials/sql/v1/node.h b/yql/essentials/sql/v1/node.h
index c197f7ac900..f3e3f5f5808 100644
--- a/yql/essentials/sql/v1/node.h
+++ b/yql/essentials/sql/v1/node.h
@@ -194,6 +194,7 @@ namespace NSQLTranslationV1 {
virtual bool HasSelectResult() const;
virtual const TString* FuncName() const;
virtual const TString* ModuleName() const;
+ virtual bool IsScript() const;
virtual bool HasSkip() const;
virtual TColumnNode* GetColumnNode();
@@ -325,6 +326,7 @@ namespace NSQLTranslationV1 {
virtual bool HasSelectResult() const override;
virtual const TString* FuncName() const override;
virtual const TString* ModuleName() const override;
+ virtual bool IsScript() const override;
virtual bool HasSkip() const override;
virtual TColumnNode* GetColumnNode() override;
@@ -950,6 +952,9 @@ namespace NSQLTranslationV1 {
const TDeferredAtom& GetTypeConfig() const;
TUdfNode* GetUdfNode() override;
const TUdfNode* GetUdfNode() const override;
+ bool IsScript() const override;
+ const TVector<TNodePtr>& GetScriptArgs() const;
+ TNodePtr BuildOptions() const;
private:
TVector<TNodePtr> Args;
const TString* FunctionName;
@@ -957,6 +962,10 @@ namespace NSQLTranslationV1 {
TNodePtr ExternalTypesTuple = nullptr;
TNodePtr RunConfig;
TDeferredAtom TypeConfig;
+ TDeferredAtom Cpu;
+ TDeferredAtom ExtraMem;
+ bool ScriptUdf = false;
+ TVector<TNodePtr> ScriptArgs;
};
class IAggregation: public INode {
@@ -1511,6 +1520,12 @@ namespace NSQLTranslationV1 {
// Implemented in builtin.cpp
+ TNodePtr BuildSqlCall(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args,
+ TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, const TDeferredAtom& typeConfig, TNodePtr runConfig,
+ TNodePtr options);
+ TNodePtr BuildScriptUdf(TPosition pos, const TString& moduleName, const TString& funcName, const TVector<TNodePtr>& args,
+ TNodePtr options);
+
TNodePtr BuildCallable(TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args, bool forReduce = false);
TNodePtr BuildUdf(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args);
TNodePtr BuildBuiltinFunc(
diff --git a/yql/essentials/sql/v1/sql_call_expr.cpp b/yql/essentials/sql/v1/sql_call_expr.cpp
index 1871c1bbc0b..f279e7db779 100644
--- a/yql/essentials/sql/v1/sql_call_expr.cpp
+++ b/yql/essentials/sql/v1/sql_call_expr.cpp
@@ -7,9 +7,6 @@
namespace NSQLTranslationV1 {
-TNodePtr BuildSqlCall(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args,
- TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, const TDeferredAtom& typeConfig, TNodePtr runConfig);
-
using namespace NSQLv1Generated;
static bool ValidateForCounters(const TString& input) {
@@ -49,26 +46,42 @@ TNodePtr TSqlCallExpr::BuildCall() {
TNodePtr named = named_args->Y("TypeOf", named_args);
TNodePtr custom_user_type = new TCallNodeImpl(Pos, "TupleType", {positional, named, udf_node->GetExternalTypes()});
+ TNodePtr options = udf_node->BuildOptions();
+
+ if (udf_node->IsScript()) {
+ auto udf = BuildScriptUdf(Pos, udf_node->GetModule(), udf_node->GetFunction(), udf_node->GetScriptArgs(), options);
+ TVector<TNodePtr> applyArgs;
+ applyArgs.push_back(new TAstAtomNodeImpl(Pos, !NamedArgs.empty() ? "NamedApply" : "Apply", TNodeFlags::Default));
+ applyArgs.push_back(udf);
+ if (!NamedArgs.empty()) {
+ applyArgs.push_back(BuildTuple(Pos, PositionalArgs));
+ applyArgs.push_back(BuildStructure(Pos, NamedArgs));
+ } else {
+ applyArgs.insert(applyArgs.end(), PositionalArgs.begin(), PositionalArgs.end());
+ }
+
+ return new TAstListNodeImpl(Pos, applyArgs);
+ }
return BuildSqlCall(Ctx, Pos, udf_node->GetModule(), udf_node->GetFunction(),
args, positional_args, named_args, custom_user_type,
- udf_node->GetTypeConfig(), udf_node->GetRunConfig());
+ udf_node->GetTypeConfig(), udf_node->GetRunConfig(), options);
}
- if (Node && !Node->FuncName()) {
+ if (Node && (!Node->FuncName() || Node->IsScript())) {
Module = "YQL";
Func = NamedArgs.empty() ? "Apply" : "NamedApply";
warnOnYqlNameSpace = false;
args.push_back(Node);
}
- if (Node && Node->FuncName()) {
+ if (Node && Node->FuncName() && !Node->IsScript()) {
Module = Node->ModuleName() ? *Node->ModuleName() : "YQL";
Func = *Node->FuncName();
}
bool mustUseNamed = !NamedArgs.empty();
if (mustUseNamed) {
- if (Node && !Node->FuncName()) {
+ if (Node && (!Node->FuncName() || Node->IsScript())) {
mustUseNamed = false;
}
args.emplace_back(BuildTuple(Pos, PositionalArgs));
@@ -276,7 +289,7 @@ bool TSqlCallExpr::FillArg(const TString& module, const TString& func, size_t& i
bool TSqlCallExpr::FillArgs(const TRule_named_expr_list& node) {
TString module = Module;
TString func = Func;
- if (Node && Node->FuncName()) {
+ if (Node && Node->FuncName() && !Node->IsScript()) {
module = Node->ModuleName() ? *Node->ModuleName() : "YQL";
func = *Node->FuncName();
}
diff --git a/yql/essentials/sql/v1/sql_call_expr.h b/yql/essentials/sql/v1/sql_call_expr.h
index 9b9d39b81a8..f01ceabadfa 100644
--- a/yql/essentials/sql/v1/sql_call_expr.h
+++ b/yql/essentials/sql/v1/sql_call_expr.h
@@ -4,9 +4,6 @@
namespace NSQLTranslationV1 {
-TNodePtr BuildSqlCall(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args,
- TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, const TDeferredAtom& typeConfig, TNodePtr runConfig);
-
using namespace NSQLv1Generated;
class TSqlCallExpr: public TSqlTranslation {