summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v0/builtin.cpp
diff options
context:
space:
mode:
authorvvvv <[email protected]>2024-11-07 12:29:36 +0300
committervvvv <[email protected]>2024-11-07 13:49:47 +0300
commitd4c258e9431675bab6745c8638df6e3dfd4dca6b (patch)
treeb5efcfa11351152a4c872fccaea35749141c0b11 /yql/essentials/sql/v0/builtin.cpp
parent13a4f274caef5cfdaf0263b24e4d6bdd5521472b (diff)
Moved other yql/essentials libs YQL-19206
init commit_hash:7d4c435602078407bbf20dd3c32f9c90d2bbcbc0
Diffstat (limited to 'yql/essentials/sql/v0/builtin.cpp')
-rw-r--r--yql/essentials/sql/v0/builtin.cpp2495
1 files changed, 2495 insertions, 0 deletions
diff --git a/yql/essentials/sql/v0/builtin.cpp b/yql/essentials/sql/v0/builtin.cpp
new file mode 100644
index 00000000000..4c872fba7a9
--- /dev/null
+++ b/yql/essentials/sql/v0/builtin.cpp
@@ -0,0 +1,2495 @@
+#include "node.h"
+#include "context.h"
+
+#include "list_builtin.h"
+
+#include <yql/essentials/ast/yql_type_string.h>
+#include <yql/essentials/public/udf/udf_data_type.h>
+#include <yql/essentials/minikql/mkql_program_builder.h>
+#include <yql/essentials/minikql/mkql_type_ops.h>
+#include <yql/essentials/public/issue/yql_issue_id.h>
+
+#include <library/cpp/charset/ci_string.h>
+#include <util/string/builder.h>
+#include <util/string/cast.h>
+#include <util/string/util.h>
+#include <util/string/join.h>
+
+#include <unordered_map>
+
+using namespace NYql;
+
+namespace NSQLTranslationV0 {
+
+class TGroupingNode final: public TAstListNode {
+public:
+ TGroupingNode(TPosition pos, const TVector<TNodePtr>& args)
+ : TAstListNode(pos)
+ , Args(args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ if (!src) {
+ ctx.Error(Pos) << "Grouping function should have source";
+ return false;
+ }
+ TVector<TString> columns;
+ columns.reserve(Args.size());
+ for (const auto& node: Args) {
+ auto namePtr = node->GetColumnName();
+ if (!namePtr || !*namePtr) {
+ ctx.Error(Pos) << "Grouping function should use columns as arguments";
+ return false;
+ }
+ const auto column = *namePtr;
+ ISource* composite = src->GetCompositeSource();
+ if (!src->IsGroupByColumn(column) && !src->IsAlias(EExprSeat::GroupBy, column) && (!composite || !composite->IsGroupByColumn(column))) {
+ ctx.Error(node->GetPos()) << "Column '" << column << "' not used as grouping column";
+ return false;
+ }
+ columns.emplace_back(column);
+ }
+ ui64 hint;
+ if (!src->CalculateGroupingHint(ctx, columns, hint)) {
+ return false;
+ }
+ Nodes.push_back(BuildAtom(Pos, "Uint64"));
+ Nodes.push_back(BuildQuotedAtom(Pos, IntToString<10>(hint)));
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TGroupingNode(Pos, Args);
+ }
+
+private:
+ const TVector<TNodePtr> Args;
+};
+
+class TBasicAggrFunc final: public TAstListNode {
+public:
+ TBasicAggrFunc(TPosition pos, const TString& name, TAggregationPtr aggr, const TVector<TNodePtr>& args)
+ : TAstListNode(pos)
+ , Name(name)
+ , Aggr(aggr)
+ , Args(args)
+ {}
+
+ TCiString GetName() const {
+ return Name;
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ if (!src) {
+ ctx.Error(Pos) << "Unable to use aggregation function '" << Name << "' without data source";
+ return false;
+ }
+ if (!DoInitAggregation(ctx, src)) {
+ return false;
+ }
+
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ TAggregationPtr aggrClone = static_cast<IAggregation*>(Aggr->Clone().Release());
+ return new TBasicAggrFunc(Pos, Name, aggrClone, CloneContainer(Args));
+ }
+
+ TAggregationPtr GetAggregation() const override {
+ return Aggr;
+ }
+
+private:
+ bool DoInitAggregation(TContext& ctx, ISource* src) {
+ if (!Aggr->InitAggr(ctx, false, src, *this, Args)) {
+ return false;
+ }
+ return src->AddAggregation(ctx, Aggr);
+ }
+
+ void DoUpdateState() const final {
+ State.Set(ENodeState::Const, Args.front()->IsConstant());
+ State.Set(ENodeState::Aggregated);
+ }
+
+protected:
+ const TString Name;
+ TAggregationPtr Aggr;
+ TVector<TNodePtr> Args;
+};
+
+class TBasicAggrFactory final : public TAstListNode {
+public:
+ TBasicAggrFactory(TPosition pos, const TString& name, TAggregationPtr aggr, const TVector<TNodePtr>& args)
+ : TAstListNode(pos)
+ , Name(name)
+ , Aggr(aggr)
+ , Args(args)
+ {}
+
+ TCiString GetName() const {
+ return Name;
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) final {
+ if (!DoInitAggregation(ctx)) {
+ return false;
+ }
+
+ auto factory = Aggr->AggregationTraitsFactory();
+ auto apply = Y("Apply", factory, Y("ListType", "type"));
+
+ auto columnIndices = Aggr->GetFactoryColumnIndices();
+ if (columnIndices.size() == 1) {
+ apply = L(apply, "extractor");
+ } else {
+ // make several extractors from main that returns a tuple
+ for (ui32 arg = 0; arg < columnIndices.size(); ++arg) {
+ auto partial = BuildLambda(Pos, Y("row"), Y("Nth", Y("Apply", "extractor", "row"), Q(ToString(columnIndices[arg]))));
+ apply = L(apply, partial);
+ }
+ }
+
+ Aggr->AddFactoryArguments(apply);
+ Lambda = BuildLambda(Pos, Y("type", "extractor"), apply);
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ return Lambda->Translate(ctx);
+ }
+
+ TNodePtr DoClone() const final {
+ TAggregationPtr aggrClone = static_cast<IAggregation*>(Aggr->Clone().Release());
+ return new TBasicAggrFactory(Pos, Name, aggrClone, CloneContainer(Args));
+ }
+
+ TAggregationPtr GetAggregation() const override {
+ return Aggr;
+ }
+
+private:
+ bool DoInitAggregation(TContext& ctx) {
+ return Aggr->InitAggr(ctx, true, nullptr, *this, Args);
+ }
+
+protected:
+ const TString Name;
+ TAggregationPtr Aggr;
+ TVector<TNodePtr> Args;
+ TNodePtr Lambda;
+};
+
+typedef THolder<TBasicAggrFunc> TAggrFuncPtr;
+
+class TLiteralStringAtom: public INode {
+public:
+ TLiteralStringAtom(TPosition pos, TNodePtr node, const TString& info)
+ : INode(pos)
+ , Node(node)
+ , Info(info)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Y_UNUSED(src);
+ if (!Node) {
+ ctx.Error(Pos) << Info;
+ return false;
+ }
+
+ if (!Node->Init(ctx, src)) {
+ return false;
+ }
+
+ Atom = MakeAtomFromExpression(ctx, Node).Build();
+ return true;
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ return Atom->Translate(ctx);
+ }
+
+ TPtr DoClone() const final {
+ return {};
+ }
+
+private:
+ TNodePtr Node;
+ TNodePtr Atom;
+ TString Info;
+};
+
+class TYqlAsAtom: public TLiteralStringAtom {
+public:
+ TYqlAsAtom(TPosition pos, const TVector<TNodePtr>& args)
+ : TLiteralStringAtom(pos, args.size() == 1 ? args[0] : nullptr, "Literal string is required as argument")
+ {
+ }
+};
+
+class TYqlData: public TCallNode {
+public:
+ TYqlData(TPosition pos, const TString& type, const TVector<TNodePtr>& args)
+ : TCallNode(pos, type, 1, 1, args)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ auto slot = NUdf::FindDataSlot(GetOpName());
+ if (!slot) {
+ ctx.Error(Pos) << "Unexpected type " << GetOpName();
+ return false;
+ }
+
+ if (*slot == NUdf::EDataSlot::Decimal) {
+ MinArgs = MaxArgs = 3;
+ }
+
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ auto stringNode = Args[0];
+ auto atom = stringNode->GetLiteral("String");
+ if (!atom) {
+ ctx.Error(Pos) << "Expected literal string as argument in " << GetOpName() << " function";
+ return false;
+ }
+
+ TString value;
+ if (*slot == NUdf::EDataSlot::Decimal) {
+ const auto precision = Args[1]->GetLiteral("Int32");
+ const auto scale = Args[2]->GetLiteral("Int32");
+
+ if (!NKikimr::NMiniKQL::IsValidDecimal(*atom)) {
+ ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName();
+ return false;
+ }
+
+ ui8 stub;
+ if (!(precision && TryFromString<ui8>(*precision, stub))) {
+ ctx.Error(Pos) << "Invalid precision " << (precision ? precision->Quote() : "") << " for type " << GetOpName();
+ return false;
+ }
+
+ if (!(scale && TryFromString<ui8>(*scale, stub))) {
+ ctx.Error(Pos) << "Invalid scale " << (scale ? scale->Quote() : "") << " for type " << GetOpName();
+ return false;
+ }
+
+ Args[0] = BuildQuotedAtom(GetPos(), *atom);
+ Args[1] = BuildQuotedAtom(GetPos(), *precision);
+ Args[2] = BuildQuotedAtom(GetPos(), *scale);
+ return TCallNode::DoInit(ctx, src);
+ } else if (NUdf::GetDataTypeInfo(*slot).Features & (NUdf::DateType | NUdf::TzDateType | NUdf::TimeIntervalType)) {
+ const auto out = NKikimr::NMiniKQL::ValueFromString(*slot, *atom);
+ if (!out) {
+ ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName();
+ return false;
+ }
+
+ switch (*slot) {
+ case NUdf::EDataSlot::Date:
+ case NUdf::EDataSlot::TzDate:
+ value = ToString(out.Get<ui16>());
+ break;
+ case NUdf::EDataSlot::Datetime:
+ case NUdf::EDataSlot::TzDatetime:
+ value = ToString(out.Get<ui32>());
+ break;
+ case NUdf::EDataSlot::Timestamp:
+ case NUdf::EDataSlot::TzTimestamp:
+ value = ToString(out.Get<ui64>());
+ break;
+ case NUdf::EDataSlot::Interval:
+ value = ToString(out.Get<i64>());
+ if ('T' == atom->back()) {
+ ctx.Warning(Pos, TIssuesIds::YQL_DEPRECATED_INTERVAL_CONSTANT) << "Time prefix 'T' at end of interval constant";
+ }
+ break;
+ default:
+ Y_ABORT("Unexpected data slot");
+ }
+
+ if (NUdf::GetDataTypeInfo(*slot).Features & NUdf::TzDateType) {
+ value += ",";
+ value += NKikimr::NMiniKQL::GetTimezoneIANAName(out.GetTimezoneId());
+ }
+ } else if (NUdf::EDataSlot::Uuid == *slot) {
+ char out[0x10];
+ if (!NKikimr::NMiniKQL::ParseUuid(*atom, out)) {
+ ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName();
+ return false;
+ }
+
+ value.assign(out, sizeof(out));
+ } else {
+ if (!NKikimr::NMiniKQL::IsValidStringValue(*slot, *atom)) {
+ ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName();
+ return false;
+ }
+
+ value = *atom;
+ }
+
+ Args[0] = BuildQuotedAtom(GetPos(), value);
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TPtr DoClone() const final {
+ return new TYqlData(GetPos(), OpName, CloneContainer(Args));
+ }
+};
+
+class TTableName : public TCallNode {
+public:
+ TTableName(TPosition pos, const TVector<TNodePtr>& args, const TString& cluster)
+ : TCallNode(pos, "TableName", 0, 2, args)
+ , Cluster(cluster)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (Args.empty()) {
+ Args.push_back(Y("TablePath", Y("DependsOn", "row")));
+ }
+
+ if (Args.size() == 2) {
+ auto literal = Args[1]->GetLiteral("String");
+ if (!literal) {
+ ctx.Error(Args[1]->GetPos()) << "Expected literal string as second argument in TableName function";
+ return false;
+ }
+
+ Args[1] = BuildQuotedAtom(Args[1]->GetPos(), to_lower(*literal));
+ } else {
+ if (Cluster.empty()) {
+ ctx.Error(GetPos()) << GetOpName() << " requires either one of \"yt\"/\"kikimr\"/\"rtmr\" as second argument or current cluster name";
+ return false;
+ }
+
+ auto service = ctx.GetClusterProvider(Cluster);
+ if (!service) {
+ ctx.Error() << "Unknown cluster name: " << Cluster;
+ return false;
+ }
+
+ Args.push_back(BuildQuotedAtom(GetPos(), to_lower(*service)));
+ }
+
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TPtr DoClone() const final {
+ return new TTableName(GetPos(), CloneContainer(Args), Cluster);
+ }
+
+ void DoUpdateState() const override {
+ State.Set(ENodeState::Const, false);
+ }
+
+private:
+ TString Cluster;
+};
+
+class TYqlParseType final : public INode {
+public:
+ TYqlParseType(TPosition pos, const TVector<TNodePtr>& args)
+ : INode(pos)
+ , Args(args)
+ {}
+
+ TAstNode* Translate(TContext& ctx) const override {
+ if (Args.size() != 1) {
+ ctx.Error(Pos) << "Expected 1 argument in ParseType function";
+ return nullptr;
+ }
+
+ auto literal = Args[0]->GetLiteral("String");
+ if (!literal) {
+ ctx.Error(Args[0]->GetPos()) << "Expected literal string as argument in ParseType function";
+ return nullptr;
+ }
+
+ auto parsed = ParseType(*literal, *ctx.Pool, ctx.Issues, Args[0]->GetPos());
+ if (!parsed) {
+ ctx.Error(Args[0]->GetPos()) << "Failed to parse type";
+ return nullptr;
+ }
+
+ return parsed;
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlParseType(Pos, Args);
+ }
+
+private:
+ TVector<TNodePtr> Args;
+};
+
+class TYqlAddTimezone: public TCallNode {
+public:
+ TYqlAddTimezone(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "AddTimezone", 2, 2, args)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ Args[1] = Y("TimezoneId", Args[1]);
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlAddTimezone(Pos, CloneContainer(Args));
+ }
+};
+
+template <bool Strict>
+class TYqlTypeAssert : public TCallNode {
+public:
+ TYqlTypeAssert(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, Strict ? "EnsureType" : "EnsureConvertibleTo", 2, 3, args)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ auto literal = Args[1]->GetLiteral("String");
+ INode::TPtr type;
+ if (literal) {
+ auto parsed = ParseType(*literal, *ctx.Pool, ctx.Issues, Args[0]->GetPos());
+ if (!parsed) {
+ ctx.Error(Args[1]->GetPos()) << "Failed to parse type";
+ return false;
+ }
+
+ type = AstNode(parsed);
+ } else {
+ type = Args[1];
+ }
+
+ if (!type->Init(ctx, src)) {
+ return false;
+ }
+
+ Args[1] = type;
+ if (Args.size() == 3) {
+ if (!Args[2]->Init(ctx, src)) {
+ return false;
+ }
+
+ auto message = MakeAtomFromExpression(ctx, Args[2]).Build();
+ Args[2] = message;
+ }
+
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlTypeAssert<Strict>(Pos, Args);
+ }
+};
+
+class TFromBytes final : public TCallNode {
+public:
+ TFromBytes(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "FromBytes", 2, 2, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[1]->Init(ctx, src)) {
+ return false;
+ }
+
+ Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TFromBytes(Pos, Args);
+ }
+};
+
+class TYqlTaggedBase : public TCallNode {
+public:
+ TYqlTaggedBase(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, 2, 2, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[1]->Init(ctx, src)) {
+ return false;
+ }
+
+ Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
+ return TCallNode::DoInit(ctx, src);
+ }
+};
+
+class TYqlAsTagged final : public TYqlTaggedBase {
+public:
+ TYqlAsTagged(TPosition pos, const TVector<TNodePtr>& args)
+ : TYqlTaggedBase(pos, "AsTagged", args)
+ {}
+
+ TNodePtr DoClone() const final {
+ return new TYqlAsTagged(Pos, Args);
+ }
+};
+
+class TYqlUntag final : public TYqlTaggedBase {
+public:
+ TYqlUntag(TPosition pos, const TVector<TNodePtr>& args)
+ : TYqlTaggedBase(pos, "Untag", args)
+ {}
+
+ TNodePtr DoClone() const final {
+ return new TYqlUntag(Pos, Args);
+ }
+};
+
+class TYqlVariant final : public TCallNode {
+public:
+ TYqlVariant(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "Variant", 3, 3, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[1]->Init(ctx, src)) {
+ return false;
+ }
+
+ Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlVariant(Pos, Args);
+ }
+};
+
+TNodePtr BuildFileNameArgument(TPosition pos, const TNodePtr& argument) {
+ return new TLiteralStringAtom(pos, argument, "FilePath requires string literal as parameter");
+}
+
+class TYqlAtom final: public TCallNode {
+public:
+ TYqlAtom(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, 1, 1, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!Args.empty()) {
+ Args[0] = BuildFileNameArgument(ctx.Pos(), Args[0]);
+ }
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlAtom(Pos, OpName, Args);
+ }
+};
+
+class TAddMember final: public TCallNode {
+public:
+ TAddMember(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, 3, 3, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (Args.size() != 3) {
+ ctx.Error(Pos) << OpName << " requires exactly three arguments";
+ return false;
+ }
+ for (const auto& arg : Args) {
+ if (!arg->Init(ctx, src)) {
+ return false;
+ }
+ }
+ Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TAddMember(Pos, OpName, Args);
+ }
+};
+class TRemoveMember final: public TCallNode {
+public:
+ TRemoveMember(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, 2, 2, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (Args.size() != 2) {
+ ctx.Error(Pos) << OpName << " requires exactly two arguments";
+ return false;
+ }
+ for (const auto& arg : Args) {
+ if (!arg->Init(ctx, src)) {
+ return false;
+ }
+ }
+ Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TRemoveMember(Pos, OpName, Args);
+ }
+};
+
+class TCombineMembers final: public TCallNode {
+public:
+ TCombineMembers(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, 1, -1, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (Args.empty()) {
+ ctx.Error(Pos) << "CombineMembers requires at least one argument";
+ return false;
+ }
+ for (size_t i = 0; i < Args.size(); ++i) {
+ Args[i] = Q(Y(Q(""), Args[i])); // flatten without prefix
+ }
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TCombineMembers(Pos, OpName, Args);
+ }
+};
+
+class TFlattenMembers final: public TCallNode {
+public:
+ TFlattenMembers(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, 1, -1, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (Args.empty()) {
+ ctx.Error(Pos) << OpName << " requires at least one argument";
+ return false;
+ }
+ for (size_t i = 0; i < Args.size(); ++i) {
+ if (!Args[i]->Init(ctx, src)) {
+ return false;
+ }
+ if (Args[i]->GetTupleSize() == 2) {
+ // flatten with prefix
+ Args[i] = Q(Y(
+ MakeAtomFromExpression(ctx, Args[i]->GetTupleElement(0)).Build(),
+ Args[i]->GetTupleElement(1)
+ ));
+ } else {
+ ctx.Error(Pos) << OpName << " requires arguments to be tuples of size 2: prefix and struct";
+ return false;
+ }
+ }
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TFlattenMembers(Pos, OpName, Args);
+ }
+};
+
+TString NormalizeTypeString(const TString& str) {
+ auto ret = to_title(str);
+ if (ret.StartsWith("Tz")) {
+ ret = "Tz" + to_title(ret.substr(2));
+ }
+
+ return ret;
+}
+
+static const TSet<TString> AvailableDataTypes = {"Bool", "String", "Uint32", "Uint64", "Int32", "Int64", "Float", "Double", "Utf8", "Yson", "Json",
+ "Date", "Datetime", "Timestamp", "Interval", "Uint8", "Int8", "Uint16", "Int16", "TzDate", "TzDatetime", "TzTimestamp", "Uuid", "Decimal"};
+TNodePtr GetDataTypeStringNode(TContext& ctx, TCallNode& node, unsigned argNum, TString* outTypeStrPtr = nullptr) {
+ auto errMsgFunc = [&node, argNum]() {
+ static std::array<TString, 2> numToName = {{"first", "second"}};
+ TStringBuilder sb;
+ sb << "At " << numToName.at(argNum) << " argument of " << node.GetOpName() << " expected type string, available one of: "
+ << JoinRange(", ", AvailableDataTypes.begin(), AvailableDataTypes.end()) << ";";
+ return TString(sb);
+ };
+ auto typeStringNode = node.GetArgs().at(argNum);
+ auto typeStringPtr = typeStringNode->GetLiteral("String");
+ TNodePtr dataTypeNode;
+ if (typeStringPtr) {
+ TString typeString = NormalizeTypeString(*typeStringPtr);
+ if (!AvailableDataTypes.contains(typeString)) {
+ ctx.Error(typeStringNode->GetPos()) << "Bad type string: '" << typeString << "'. " << errMsgFunc();
+ return {};
+ }
+ if (outTypeStrPtr) {
+ *outTypeStrPtr = typeString;
+ }
+ dataTypeNode = typeStringNode->Q(typeString);
+ } else {
+ ctx.Error(typeStringNode->GetPos()) << errMsgFunc();
+ return {};
+ }
+ return dataTypeNode;
+}
+
+class TYqlParseFileOp final: public TCallNode {
+public:
+ TYqlParseFileOp(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "ParseFile", 2, 2, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ auto dataTypeStringNode = GetDataTypeStringNode(ctx, *this, 0);
+ if (!dataTypeStringNode) {
+ return false;
+ }
+ auto aliasNode = BuildFileNameArgument(Args[1]->GetPos(), Args[1]);
+ OpName = "Apply";
+ Args[0] = Y("Udf", Q("File.ByLines"), Y("Void"),
+ Y("TupleType",
+ Y("TupleType", Y("DataType", dataTypeStringNode)),
+ Y("StructType"),
+ Y("TupleType")));
+
+ Args[1] = Y("FilePath", aliasNode);
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TString GetOpName() const override {
+ return "ParseFile";
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlParseFileOp(Pos, Args);
+ }
+};
+
+class TYqlDataType final : public TCallNode {
+public:
+ TYqlDataType(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "DataType", 1, 1, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ auto dataTypeStringNode = GetDataTypeStringNode(ctx, *this, 0);
+ if (!dataTypeStringNode) {
+ return false;
+ }
+ Args[0] = dataTypeStringNode;
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlDataType(Pos, Args);
+ }
+};
+
+TNodePtr TryBuildDataType(TPosition pos, const TString& stringType) {
+ auto normStringType = NormalizeTypeString(stringType);
+ if (!AvailableDataTypes.contains(normStringType)) {
+ return {};
+ }
+ return new TYqlDataType(pos, {BuildLiteralRawString(pos, normStringType)});
+}
+
+class TYqlResourceType final : public TCallNode {
+public:
+ TYqlResourceType(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "ResourceType", 1, 1, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[0]->Init(ctx, src)) {
+ return false;
+ }
+
+ Args[0] = MakeAtomFromExpression(ctx, Args[0]).Build();
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlResourceType(Pos, Args);
+ }
+};
+
+class TYqlTaggedType final : public TCallNode {
+public:
+ TYqlTaggedType(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "TaggedType", 2, 2, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[1]->Init(ctx, src)) {
+ return false;
+ }
+
+ Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlTaggedType(Pos, Args);
+ }
+};
+
+class TYqlCallableType final : public TCallNode {
+public:
+ TYqlCallableType(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "CallableType", 2, -1, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!dynamic_cast<TTupleNode*>(Args[0].Get())) {
+ ui32 numOptArgs;
+ if (!Parseui32(Args[0], numOptArgs)) {
+ ctx.Error(Args[0]->GetPos()) << "Expected either tuple or number of optional arguments";
+ return false;
+ }
+
+ Args[0] = Q(Y(BuildQuotedAtom(Args[0]->GetPos(), ToString(numOptArgs))));
+ }
+
+ if (!dynamic_cast<TTupleNode*>(Args[1].Get())) {
+ Args[1] = Q(Y(Args[1]));
+ }
+
+ for (ui32 index = 2; index < Args.size(); ++index) {
+ if (!dynamic_cast<TTupleNode*>(Args[index].Get())) {
+ Args[index] = Q(Y(Args[index]));
+ }
+ }
+
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlCallableType(Pos, Args);
+ }
+};
+
+class TYqlTupleElementType final : public TCallNode {
+public:
+ TYqlTupleElementType(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "TupleElementType", 2, 2, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[1]->Init(ctx, src)) {
+ return false;
+ }
+
+ Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlTupleElementType(Pos, Args);
+ }
+};
+
+class TYqlStructMemberType final : public TCallNode {
+public:
+ TYqlStructMemberType(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "StructMemberType", 2, 2, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[1]->Init(ctx, src)) {
+ return false;
+ }
+
+ Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlStructMemberType(Pos, Args);
+ }
+};
+
+class TYqlCallableArgumentType final : public TCallNode {
+public:
+ TYqlCallableArgumentType(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "CallableArgumentType", 2, 2, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ ui32 index;
+ if (!Parseui32(Args[1], index)) {
+ ctx.Error(Args[1]->GetPos()) << "Expected index of the callable argument";
+ return false;
+ }
+
+ Args[1] = BuildQuotedAtom(Args[1]->GetPos(), ToString(index));
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlCallableArgumentType(Pos, Args);
+ }
+};
+
+class TStructTypeNode : public TAstListNode {
+public:
+ TStructTypeNode(TPosition pos, const TVector<TNodePtr>& exprs)
+ : TAstListNode(pos)
+ , Exprs(exprs)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Nodes.push_back(BuildAtom(Pos, "StructType", TNodeFlags::Default));
+ for (const auto& expr : Exprs) {
+ const auto& label = expr->GetLabel();
+ if (!label) {
+ ctx.Error(expr->GetPos()) << "Structure does not allow anonymous members";
+ return false;
+ }
+ Nodes.push_back(Q(Y(Q(label), expr)));
+ }
+ return TAstListNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TStructTypeNode(Pos, CloneContainer(Exprs));
+ }
+
+private:
+ const TVector<TNodePtr> Exprs;
+};
+
+template <bool IsStrict>
+class TYqlIf final: public TCallNode {
+public:
+ TYqlIf(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, IsStrict ? "IfStrict" : "If", 2, 3, args)
+ {}
+
+private:
+ TCallNode::TPtr DoClone() const override {
+ return new TYqlIf(GetPos(), CloneContainer(Args));
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ Args[0] = Y("Coalesce", Args[0], Y("Bool", Q("false")));
+ if (Args.size() == 2) {
+ Args.push_back(Y("Null"));
+ }
+ return TCallNode::DoInit(ctx, src);
+ }
+};
+
+class TYqlSubstring final: public TCallNode {
+public:
+ TYqlSubstring(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "Substring", 2, 3, args)
+ {}
+
+private:
+ TCallNode::TPtr DoClone() const override {
+ return new TYqlSubstring(GetPos(), CloneContainer(Args));
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (Args.size() == 2) {
+ Args.push_back(Y("Uint32", Q(ToString(Max<ui32>()))));
+ }
+ return TCallNode::DoInit(ctx, src);
+ }
+};
+
+class TYqlIn final: public TCallNode {
+public:
+ TYqlIn(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "IN", 3, 3, args)
+ {}
+
+private:
+ TNodePtr DoClone() const final {
+ return new TYqlIn(Pos, CloneContainer(Args));
+ }
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ auto key = Args[0];
+ auto inNode = Args[1];
+ auto hints = Args[2];
+
+ if (!key->Init(ctx, src)) {
+ return false;
+ }
+
+ if (!inNode->Init(ctx, inNode->GetSource() ? nullptr : src)) {
+ return false;
+ }
+
+ if (inNode->GetLiteral("String")) {
+ ctx.Error(inNode->GetPos()) << "Unable to use IN predicate with string argument, it won't search substring - "
+ "expecting tuple, list, dict or single column table source";
+ return false;
+ }
+
+ if (inNode->GetTupleSize() == 1) {
+ auto singleElement = inNode->GetTupleElement(0);
+ // TODO: 'IN ((select ...))' is parsed exactly like 'IN (select ...)' instead of a single element tuple
+ if (singleElement->GetSource() || singleElement->IsSelect()) {
+ TStringBuf parenKind = singleElement->GetSource() ? "" : "external ";
+ ctx.Warning(inNode->GetPos(),
+ TIssuesIds::YQL_CONST_SUBREQUEST_IN_LIST) << "Using subrequest in scalar context after IN, "
+ << "perhaps you should remove "
+ << parenKind << "parenthesis here";
+ }
+ }
+
+ if (inNode->GetSource() || inNode->IsSelect()) {
+ TVector<TNodePtr> hintElements;
+ for (size_t i = 0; i < hints->GetTupleSize(); ++i) {
+ hintElements.push_back(hints->GetTupleElement(i));
+ }
+ auto pos = inNode->GetPos();
+ auto tableSourceHint = BuildTuple(pos, { BuildQuotedAtom(pos, "tableSource", NYql::TNodeFlags::Default) });
+ hintElements.push_back(tableSourceHint);
+ hints = BuildTuple(pos, hintElements);
+ }
+
+ OpName = "SqlIn";
+ MinArgs = MaxArgs = 3;
+ Args = {
+ inNode->GetSource() ? inNode->GetSource() : inNode,
+ key,
+ hints
+ };
+
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TString GetOpName() const override {
+ return "IN predicate";
+ }
+};
+
+class TYqlUdfBase : public TCallNode {
+public:
+ TYqlUdfBase(TPosition pos, const TString& name)
+ : TCallNode(pos, "Udf", 1, 1, UdfArgs(pos, name))
+ {}
+
+ TYqlUdfBase(TPosition pos, const TString& name, const TVector<TNodePtr>& args, ui32 argsCount = 2)
+ : TCallNode(pos, "Udf", argsCount, argsCount, UdfArgs(pos, name, &args))
+ {}
+
+protected:
+ TYqlUdfBase(TPosition pos, const TString& opName, ui32 minArgs, ui32 maxArgs, const TVector<TNodePtr>& args)
+ : TCallNode(pos, opName, minArgs, maxArgs, args)
+ {}
+
+private:
+ static TVector<TNodePtr> UdfArgs(TPosition pos, const TString& name, const TVector<TNodePtr>* args = nullptr) {
+ TVector<TNodePtr> res = { BuildQuotedAtom(pos, name) };
+ if (args) {
+ res.insert(res.end(), args->begin(), args->end());
+ }
+ return res;
+ }
+
+ void DoUpdateState() const override {
+ TCallNode::DoUpdateState();
+ State.Set(ENodeState::Aggregated, false/*!RunConfig || RunConfig->IsAggregated()*/);
+ State.Set(ENodeState::Const, true /* FIXME: To avoid CheckAggregationLevel issue for non-const TypeOf. */);
+ }
+
+private:
+ TNodePtr RunConfig;
+};
+
+class TYqlUdf final : public TYqlUdfBase {
+public:
+ TYqlUdf(TPosition pos, const TString& name)
+ : TYqlUdfBase(pos, name)
+ {}
+
+ TYqlUdf(TPosition pos, const TString& name, const TVector<TNodePtr>& args, ui32 argsCount = 2)
+ : TYqlUdfBase(pos, name, args, argsCount)
+ {}
+
+private:
+ TYqlUdf(const TYqlUdf& other)
+ : TYqlUdfBase(other.GetPos(), "Udf", other.MinArgs, other.MaxArgs, CloneContainer(other.Args))
+ {}
+
+ TNodePtr DoClone() const final {
+ return new TYqlUdf(*this);
+ }
+};
+
+class TYqlTypeConfigUdf final : public TYqlUdfBase {
+public:
+ TYqlTypeConfigUdf(TPosition pos, const TString& name)
+ : TYqlUdfBase(pos, name)
+ {}
+
+ TYqlTypeConfigUdf(TPosition pos, const TString& name, const TVector<TNodePtr>& args, ui32 argsCount = 2)
+ : TYqlUdfBase(pos, name, args, argsCount)
+ {}
+
+private:
+ TYqlTypeConfigUdf(const TYqlTypeConfigUdf& other)
+ : TYqlUdfBase(other.GetPos(), "Udf", other.MinArgs, other.MaxArgs, CloneContainer(other.Args))
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+
+ if (!Args[3]->Init(ctx, src)) {
+ return false;
+ }
+
+ Args[3] = MakeAtomFromExpression(ctx, Args[3]).Build();
+ return TYqlUdfBase::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TYqlTypeConfigUdf(*this);
+ }
+};
+
+class TWeakFieldOp final: public TCallNode {
+public:
+ TWeakFieldOp(TPosition pos, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "WeakField", 2, 3, args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!src) {
+ ctx.Error(Pos) << GetCallExplain() << " unable use without source";
+ return false;
+ }
+
+ src->AllColumns();
+
+ if (!ValidateArguments(ctx)) {
+ return false;
+ }
+ PrecacheState();
+
+ const auto memberPos = Args[0]->GetPos();
+ TVector<TNodePtr> repackArgs = {BuildAtom(memberPos, "row", NYql::TNodeFlags::Default)};
+ if (auto literal = Args[1]->GetLiteral("String")) {
+ TString targetType;
+ if (!GetDataTypeStringNode(ctx, *this, 1, &targetType)) {
+ return false;
+ }
+
+ repackArgs.push_back(Args[1]->Q(targetType));
+ } else {
+ repackArgs.push_back(Args[1]);
+ }
+
+ TVector<TNodePtr> column;
+ auto namePtr = Args[0]->GetColumnName();
+ if (!namePtr || !*namePtr) {
+ ctx.Error(Pos) << GetCallExplain() << " expect as first argument column name";
+ return false;
+ }
+ auto memberName = *namePtr;
+ column.push_back(Args[0]->Q(*namePtr));
+
+ if (src->GetJoin() && !src->IsJoinKeysInitializing()) {
+ const auto sourcePtr = Args[0]->GetSourceName();
+ if (!sourcePtr || !*sourcePtr) {
+ ctx.Error(Pos) << GetOpName() << " required to have correlation name in case of JOIN for column at first parameter";
+ return false;
+ }
+ column.push_back(Args[0]->Q(*sourcePtr));
+ memberName = DotJoin(*sourcePtr, memberName);
+ }
+ if (!GetLabel()) {
+ SetLabel(memberName);
+ }
+ repackArgs.push_back(BuildTuple(memberPos, column));
+ if (Args.size() == 3) {
+ repackArgs.push_back(Args[2]);
+ }
+ ++MinArgs;
+ ++MaxArgs;
+ Args.swap(repackArgs);
+
+ return TCallNode::DoInit(ctx, src);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TWeakFieldOp(Pos, Args);
+ }
+};
+
+class TTableRow final : public TAstAtomNode {
+public:
+ TTableRow(TPosition pos, const TVector<TNodePtr>& args)
+ : TTableRow(pos, args.size())
+ {}
+
+ TTableRow(TPosition pos, ui32 argsCount)
+ : TAstAtomNode(pos, "row", 0)
+ , ArgsCount(argsCount)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (!src || src->IsFake()) {
+ ctx.Error(Pos) << "TableRow requires FROM section";
+ return false;
+ }
+
+ if (ArgsCount > 0) {
+ ctx.Error(Pos) << "TableRow requires exactly 0 arguments";
+ return false;
+ }
+
+ src->AllColumns();
+ return true;
+ }
+
+ void DoUpdateState() const final {
+ State.Set(ENodeState::Const, false);
+ }
+
+ TNodePtr DoClone() const final {
+ return MakeIntrusive<TTableRow>(Pos, ArgsCount);
+ }
+
+private:
+ ui32 ArgsCount;
+};
+
+TNodePtr BuildUdfUserTypeArg(TPosition pos, const TVector<TNodePtr>& args, TNodePtr customUserType) {
+ TVector<TNodePtr> argsTypeItems;
+ for (auto& arg : args) {
+ argsTypeItems.push_back(new TCallNodeImpl(pos, "TypeOf", TVector<TNodePtr>(1, arg)));
+ }
+
+ TVector<TNodePtr> userTypeItems;
+ userTypeItems.push_back(new TCallNodeImpl(pos, "TupleType", argsTypeItems));
+ userTypeItems.push_back(new TCallNodeImpl(pos, "StructType", {}));
+ if (customUserType) {
+ userTypeItems.push_back(customUserType);
+ } else {
+ userTypeItems.push_back(new TCallNodeImpl(pos, "TupleType", {}));
+ }
+
+ return new TCallNodeImpl(pos, "TupleType", userTypeItems);
+}
+
+TNodePtr BuildUdfUserTypeArg(TPosition pos, TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType) {
+ TVector<TNodePtr> userTypeItems;
+ userTypeItems.reserve(3);
+ userTypeItems.push_back(positionalArgs->Y("TypeOf", positionalArgs));
+ userTypeItems.push_back(positionalArgs->Y("TypeOf", namedArgs));
+ if (customUserType) {
+ userTypeItems.push_back(customUserType);
+ } else {
+ userTypeItems.push_back(new TCallNodeImpl(pos, "TupleType", {}));
+ }
+
+ return new TCallNodeImpl(pos, "TupleType", userTypeItems);
+}
+
+TVector<TNodePtr> BuildUdfArgs(const TContext& ctx, TPosition pos, const TVector<TNodePtr>& args,
+ TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType) {
+ if (!ctx.Settings.EnableGenericUdfs) {
+ return {};
+ }
+ TVector<TNodePtr> udfArgs;
+ udfArgs.push_back(new TAstListNodeImpl(pos));
+ udfArgs[0]->Add(new TAstAtomNodeImpl(pos, "Void", 0));
+ if (namedArgs) {
+ udfArgs.push_back(BuildUdfUserTypeArg(pos, positionalArgs, namedArgs, customUserType));
+ } else {
+ udfArgs.push_back(BuildUdfUserTypeArg(pos, args, customUserType));
+ }
+ return udfArgs;
+}
+
+class TCallableNode final: public INode {
+public:
+ TCallableNode(TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args)
+ : INode(pos)
+ , Module(module)
+ , Name(name)
+ , Args(args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (Module == "yql") {
+ ui32 flags;
+ TString nameParseError;
+ TPosition pos = Pos;
+ TString parsedName;
+ if (!TryStringContent(Name, parsedName, flags, nameParseError, pos)) {
+ ctx.Error(pos) << "Failed to parse YQL: " << nameParseError;
+ return false;
+ }
+
+ const TString yql("(" + parsedName + ")");
+ TAstParseResult ast = ParseAst(yql, ctx.Pool.get());
+ /// TODO: do not drop warnings
+ if (ast.IsOk()) {
+ Node = AstNode(ast.Root->GetChild(0));
+ } else {
+ ctx.Error(Pos) << "Failed to parse YQL: " << ast.Issues.ToString();
+ return false;
+ }
+
+ if (src) {
+ src->AllColumns();
+ }
+ } else if (ctx.Settings.ModuleMapping.contains(Module)) {
+ Node = Y("bind", Module + "_module", Q(Name));
+ if (src) {
+ src->AllColumns();
+ }
+ } else {
+ TNodePtr customUserType = nullptr;
+ if (Module == "Tensorflow" && Name == "RunBatch") {
+ if (Args.size() > 2) {
+ auto passThroughAtom = Q("PassThrough");
+ auto passThroughType = Y("StructMemberType", Y("ListItemType", Y("TypeOf", Args[1])), passThroughAtom);
+ customUserType = Y("AddMemberType", Args[2], passThroughAtom, passThroughType);
+ Args.erase(Args.begin() + 2);
+ }
+ }
+ auto udfArgs = BuildUdfArgs(ctx, Pos, Args, nullptr, nullptr, customUserType);
+ Node = BuildUdf(ctx, Pos, Module, Name, udfArgs);
+ }
+ return Node->Init(ctx, src);
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ Y_DEBUG_ABORT_UNLESS(Node);
+ return Node->Translate(ctx);
+ }
+
+ void DoUpdateState() const override {
+ YQL_ENSURE(Node);
+ State.Set(ENodeState::Const, Node->IsConstant());
+ State.Set(ENodeState::Aggregated, Node->IsAggregated());
+ }
+
+ TNodePtr DoClone() const override {
+ return new TCallableNode(Pos, Module, Name, Args);
+ }
+
+private:
+ TCiString Module;
+ TString Name;
+ TVector<TNodePtr> Args;
+ TNodePtr Node;
+};
+
+TNodePtr BuildCallable(TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args) {
+ return new TCallableNode(pos, module, name, args);
+}
+
+TNodePtr BuildUdf(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args) {
+ auto fullName = module + "." + name;
+ if (!args.empty()) {
+ return new TYqlUdf(pos, fullName, args, args.size() + 1);
+
+ } else {
+ auto varName = ctx.AddSimpleUdf(fullName);
+ return new TAstAtomNodeImpl(pos, varName, TNodeFlags::ArbitraryContent);
+ }
+}
+
+class TScriptUdf final: public INode {
+public:
+ TScriptUdf(TPosition pos, const TString& moduleName, const TString& funcName, const TVector<TNodePtr>& args)
+ : INode(pos)
+ , ModuleName(moduleName)
+ , FuncName(funcName)
+ , Args(args)
+ {}
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ const bool isPython = ModuleName.find(TStringBuf("Python")) != TString::npos;
+ if (!isPython) {
+ if (Args.size() != 2) {
+ ctx.Error(Pos) << ModuleName << " script declaration requires exactly two parameters";
+ return false;
+ }
+ } else {
+ if (Args.size() < 1 || Args.size() > 2) {
+ ctx.Error(Pos) << ModuleName << " script declaration requires one or two parameters";
+ return false;
+ }
+ }
+
+ auto nameAtom = BuildQuotedAtom(Pos, FuncName);
+ auto scriptNode = Args.back();
+ if (!scriptNode->Init(ctx, src)) {
+ return false;
+ }
+ auto scriptStrPtr = Args.back()->GetLiteral("String");
+ if (scriptStrPtr && scriptStrPtr->size() > SQL_MAX_INLINE_SCRIPT_LEN) {
+ scriptNode = ctx.UniversalAlias("scriptudf", std::move(scriptNode));
+ }
+
+ INode::TPtr type;
+ if (Args.size() == 2) {
+ auto literal = Args[0]->GetLiteral("String");
+ if (literal) {
+ auto parsed = ParseType(*literal, *ctx.Pool, ctx.Issues, Args[0]->GetPos());
+ if (!parsed) {
+ ctx.Error(Args[0]->GetPos()) << "Failed to parse script signature";
+ return false;
+ }
+
+ type = AstNode(parsed);
+ } else {
+ type = Args[0];
+ }
+ } else {
+ // Python supports getting functions signatures right from docstrings
+ type = Y("EvaluateType", Y("ParseTypeHandle", Y("Apply",
+ Y("bind", "core_module", Q("PythonFuncSignature")),
+ Q(ModuleName),
+ scriptNode,
+ Y("String", nameAtom)
+ )));
+ }
+
+ if (!type->Init(ctx, src)) {
+ return false;
+ }
+
+ Node = Y("ScriptUdf", Q(ModuleName), nameAtom, type, scriptNode);
+ return true;
+ }
+
+ TAstNode* Translate(TContext& ctx) const override {
+ Y_UNUSED(ctx);
+ Y_DEBUG_ABORT_UNLESS(Node);
+ return Node->Translate(ctx);
+ }
+
+ void DoUpdateState() const override {
+ State.Set(ENodeState::Const, true);
+ }
+
+ TNodePtr DoClone() const final {
+ return new TScriptUdf(GetPos(), ModuleName, FuncName, CloneContainer(Args));
+ }
+private:
+ TString ModuleName;
+ TString FuncName;
+ TVector<TNodePtr> Args;
+ TNodePtr Node;
+};
+
+template <bool Sorted>
+class TYqlToDict final: public TCallNode {
+public:
+ TYqlToDict(TPosition pos, const TString& mode, const TVector<TNodePtr>& args)
+ : TCallNode(pos, "ToDict", 4, 4, args)
+ , Mode(mode)
+ {}
+
+private:
+ TCallNode::TPtr DoClone() const override {
+ return new TYqlToDict<Sorted>(GetPos(), Mode, CloneContainer(Args));
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ if (Args.size() != 1) {
+ ctx.Error(Pos) << "ToDict required exactly one argument";
+ return false;
+ }
+ Args.push_back(BuildLambda(Pos, Y("val"), Y("Nth", "val", Q("0"))));
+ Args.push_back(BuildLambda(Pos, Y("val"), Y("Nth", "val", Q("1"))));
+ Args.push_back(Q(Y(Q(Sorted ? "Sorted" : "Hashed"), Q(Mode))));
+ return TCallNode::DoInit(ctx, src);
+ }
+private:
+ TString Mode;
+};
+
+template <bool IsStart>
+class THoppingTime final: public TAstListNode {
+public:
+ THoppingTime(TPosition pos, const TVector<TNodePtr>& args = {})
+ : TAstListNode(pos)
+ {
+ Y_UNUSED(args);
+ }
+
+private:
+ TNodePtr DoClone() const override {
+ return new THoppingTime(GetPos());
+ }
+
+ bool DoInit(TContext& ctx, ISource* src) override {
+ Y_UNUSED(ctx);
+
+ auto window = src->GetHoppingWindowSpec();
+ if (!window) {
+ ctx.Error(Pos) << "No hopping window parameters in aggregation";
+ return false;
+ }
+
+ Nodes.clear();
+
+ if (!IsStart) {
+ Add("Member", "row", Q("_yql_time"));
+ return true;
+ }
+
+ Add("Sub",
+ Y("Member", "row", Q("_yql_time")),
+ window->Interval);
+ return true;
+ }
+
+ void DoUpdateState() const override {
+ State.Set(ENodeState::Aggregated, true);
+ }
+};
+
+class TInvalidBuiltin final: public INode {
+public:
+ TInvalidBuiltin(TPosition pos, const TString& info)
+ : INode(pos)
+ , Info(info)
+ {
+ }
+
+ bool DoInit(TContext& ctx, ISource*) override {
+ ctx.Error(Pos) << Info;
+ return false;
+ }
+
+ TAstNode* Translate(TContext&) const override {
+ return nullptr;
+ }
+
+ TPtr DoClone() const override {
+ return {};
+ }
+private:
+ TString Info;
+};
+
+enum EAggrFuncTypeCallback {
+ NORMAL,
+ WINDOW_AUTOARGS,
+ KEY_PAYLOAD,
+ PAYLOAD_PREDICATE,
+ TWO_ARGS,
+ COUNT,
+ HISTOGRAM,
+ LINEAR_HISTOGRAM,
+ PERCENTILE,
+ TOPFREQ,
+ TOP,
+ TOP_BY,
+ COUNT_DISTINCT_ESTIMATE,
+ LIST,
+ UDAF
+};
+
+struct TCoreFuncInfo {
+ TString Name;
+ ui32 MinArgs;
+ ui32 MaxArgs;
+};
+
+using TAggrFuncFactoryCallback = std::function<INode::TPtr(TPosition pos, const TVector<TNodePtr>& args, EAggregateMode aggMode, bool isFactory)>;
+using TAggrFuncFactoryCallbackMap = std::unordered_map<TString, TAggrFuncFactoryCallback, THash<TString>>;
+using TBuiltinFactoryCallback = std::function<TNodePtr(TPosition pos, const TVector<TNodePtr>& args)>;
+using TBuiltinFactoryCallbackMap = std::unordered_map<TString, TBuiltinFactoryCallback, THash<TString>>;
+using TCoreFuncMap = std::unordered_map<TString, TCoreFuncInfo, THash<TString>>;
+
+TAggrFuncFactoryCallback BuildAggrFuncFactoryCallback(
+ const TString& functionName,
+ const TString& factoryName,
+ EAggrFuncTypeCallback type = NORMAL,
+ const TString& functionNameOverride = TString(),
+ const TVector<EAggregateMode>& validModes = {}) {
+
+ const TString realFunctionName = functionNameOverride.empty() ? functionName : functionNameOverride;
+ return [functionName, realFunctionName, factoryName, type, validModes] (TPosition pos, const TVector<TNodePtr>& args, EAggregateMode aggMode, bool isFactory) -> INode::TPtr {
+ if (!validModes.empty()) {
+ if (!IsIn(validModes, aggMode)) {
+ const TString errorText = TStringBuilder()
+ << "Can't use " << functionName << " in " << ToString(aggMode) << " aggregation mode";
+ return INode::TPtr(new TInvalidBuiltin(pos, errorText));
+ }
+ }
+ TAggregationPtr factory = nullptr;
+ switch (type) {
+ case NORMAL:
+ factory = BuildFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case WINDOW_AUTOARGS:
+ factory = BuildFactoryAggregationWinAutoarg(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case KEY_PAYLOAD:
+ factory = BuildKeyPayloadFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case PAYLOAD_PREDICATE:
+ factory = BuildPayloadPredicateFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case TWO_ARGS:
+ factory = BuildTwoArgsFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case COUNT:
+ factory = BuildCountAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case HISTOGRAM:
+ factory = BuildHistogramFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case LINEAR_HISTOGRAM:
+ factory = BuildLinearHistogramFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case PERCENTILE:
+ factory = BuildPercentileFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case TOPFREQ:
+ factory = BuildTopFreqFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case TOP:
+ factory = BuildTopFactoryAggregation<false>(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case TOP_BY:
+ factory = BuildTopFactoryAggregation<true>(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case COUNT_DISTINCT_ESTIMATE:
+ factory = BuildCountDistinctEstimateFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case LIST:
+ factory = BuildListFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ case UDAF:
+ factory = BuildUserDefinedFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
+ break;
+ }
+ if (isFactory) {
+ auto realArgs = args;
+ realArgs.erase(realArgs.begin()); // skip function name
+ return new TBasicAggrFactory(pos, functionName, factory, realArgs);
+ } else {
+ return new TBasicAggrFunc(pos, functionName, factory, args);
+ }
+ };
+}
+
+TAggrFuncFactoryCallback BuildAggrFuncFactoryCallback(
+ const TString& functionName,
+ const TString& factoryName,
+ const TVector<EAggregateMode>& validModes,
+ EAggrFuncTypeCallback type = NORMAL,
+ const TString& functionNameOverride = TString()) {
+ return BuildAggrFuncFactoryCallback(functionName, factoryName, type, functionNameOverride, validModes);
+}
+
+template<typename TType>
+TBuiltinFactoryCallback BuildSimpleBuiltinFactoryCallback() {
+ return [] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
+ return new TType(pos, args);
+ };
+}
+
+template<typename TType>
+TBuiltinFactoryCallback BuildNamedBuiltinFactoryCallback(const TString& name) {
+ return [name] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
+ return new TType(pos, name, args);
+ };
+}
+
+template<typename TType>
+TBuiltinFactoryCallback BuildArgcBuiltinFactoryCallback(i32 minArgs, i32 maxArgs) {
+ return [minArgs, maxArgs] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
+ return new TType(pos, minArgs, maxArgs, args);
+ };
+}
+
+template<typename TType>
+TBuiltinFactoryCallback BuildNamedArgcBuiltinFactoryCallback(const TString& name, i32 minArgs, i32 maxArgs) {
+ return [name, minArgs, maxArgs] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
+ return new TType(pos, name, minArgs, maxArgs, args);
+ };
+}
+
+template<typename TType>
+TBuiltinFactoryCallback BuildNamedDepsArgcBuiltinFactoryCallback(ui32 reqArgsCount, const TString& name, i32 minArgs, i32 maxArgs) {
+ return [reqArgsCount, name, minArgs, maxArgs](TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
+ return new TType(reqArgsCount, pos, name, minArgs, maxArgs, args);
+ };
+}
+
+template<typename TType>
+TBuiltinFactoryCallback BuildBoolBuiltinFactoryCallback(bool arg) {
+ return [arg] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
+ return new TType(pos, args, arg);
+ };
+}
+
+template<typename TType>
+TBuiltinFactoryCallback BuildFoldBuiltinFactoryCallback(const TString& name, const TString& defaultValue) {
+ return [name, defaultValue] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
+ return new TType(pos, name, "Bool", defaultValue, 1, args);
+ };
+}
+
+TNodePtr MakePair(TPosition pos, const TVector<TNodePtr>& args) {
+ TNodePtr list = new TAstListNodeImpl(pos, {
+ args[0],
+ args.size() > 1 ? args[1] : new TAstListNodeImpl(pos,{ new TAstAtomNodeImpl(pos, "Null", TNodeFlags::Default) })
+ });
+
+ return new TAstListNodeImpl(pos, {
+ new TAstAtomNodeImpl(pos, "quote", TNodeFlags::Default),
+ list
+ });
+}
+
+struct TBuiltinFuncData {
+ const TBuiltinFactoryCallbackMap BuiltinFuncs;
+ const TAggrFuncFactoryCallbackMap AggrFuncs;
+ const TCoreFuncMap CoreFuncs;
+
+ TBuiltinFuncData():
+ BuiltinFuncs(MakeBuiltinFuncs()),
+ AggrFuncs(MakeAggrFuncs()),
+ CoreFuncs(MakeCoreFuncs())
+ {
+ }
+
+ TBuiltinFactoryCallbackMap MakeBuiltinFuncs() {
+ TBuiltinFactoryCallbackMap builtinFuncs = {
+ // Branching
+ {"if", BuildSimpleBuiltinFactoryCallback<TYqlIf<false>>()},
+ {"ifstrict", BuildSimpleBuiltinFactoryCallback<TYqlIf<true>>() },
+
+ // String builtins
+ {"len", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Size", 1, 1)},
+ {"length", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Size", 1, 1)},
+ {"charlength", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Size", 1, 1)},
+ {"characterlength", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Size", 1, 1)},
+ {"substring", BuildSimpleBuiltinFactoryCallback<TYqlSubstring>()},
+ {"byteat", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ByteAt", 2, 2) },
+
+ // Numeric builtins
+ {"abs", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Abs", 1, 1) },
+ {"tobytes", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ToBytes", 1, 1) },
+ {"frombytes", BuildSimpleBuiltinFactoryCallback<TFromBytes>() },
+
+ // Compare builtins
+ {"minof", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Min", 1, -1)},
+ {"maxof", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Max", 1, -1)},
+ {"greatest", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Max", 1, -1)},
+ {"least", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Min", 1, -1)},
+ {"in", BuildSimpleBuiltinFactoryCallback<TYqlIn>()},
+
+ // List builtins
+ {"aslist", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsList", 1, -1)},
+ {"asliststrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsListStrict", 1, -1) },
+ {"listlength", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Length", 1, 1)},
+ {"listhasitems", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("HasItems", 1, 1)},
+ {"listcount", BuildSimpleBuiltinFactoryCallback<TListCountBuiltin>()},
+ {"listextend", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Extend", 1, -1)},
+ {"listunionall", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("UnionAll", 1, -1) },
+ {"listzip", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Zip", -1, -1)},
+ {"listzipall", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ZipAll", -1, -1)},
+ {"listenumerate", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Enumerate", 1, 3)},
+ {"listreverse", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Reverse", 1, 1)},
+ {"listskip", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Skip", 2, 2)},
+ {"listtake", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Take", 2, 2)},
+ {"listsort", BuildBoolBuiltinFactoryCallback<TListSortBuiltin>(true)},
+ {"listsortasc", BuildBoolBuiltinFactoryCallback<TListSortBuiltin>(true)},
+ {"listsortdesc", BuildBoolBuiltinFactoryCallback<TListSortBuiltin>(false)},
+ {"listmap", BuildBoolBuiltinFactoryCallback<TListMapBuiltin>(false)},
+ {"listflatmap", BuildBoolBuiltinFactoryCallback<TListMapBuiltin>(true)},
+ {"listfilter", BuildSimpleBuiltinFactoryCallback<TListFilterBuiltin>()},
+ {"listany", BuildFoldBuiltinFactoryCallback<TListFoldBuiltinImpl>("Or", "false")},
+ {"listall", BuildFoldBuiltinFactoryCallback<TListFoldBuiltinImpl>("And", "true")},
+ {"listhas", BuildSimpleBuiltinFactoryCallback<TListHasBuiltin>()},
+ {"listmax", BuildNamedBuiltinFactoryCallback<TListFold1Builtin>("AggrMax")},
+ {"listmin", BuildNamedBuiltinFactoryCallback<TListFold1Builtin>("AggrMin")},
+ {"listsum", BuildNamedBuiltinFactoryCallback<TListFold1Builtin>("AggrAdd")},
+ {"listavg", BuildSimpleBuiltinFactoryCallback<TListAvgBuiltin>()},
+ {"listconcat", BuildNamedBuiltinFactoryCallback<TListFold1Builtin>("Concat")},
+ {"listextract", BuildSimpleBuiltinFactoryCallback<TListExtractBuiltin>()},
+ {"listuniq", BuildSimpleBuiltinFactoryCallback<TListUniqBuiltin>()},
+ {"listcreate", BuildSimpleBuiltinFactoryCallback<TListCreateBuiltin>()},
+ {"listfromrange", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListFromRange", 2, 3) },
+ {"listreplicate", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Replicate", 2, 2) },
+ {"listtakewhile", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TakeWhile", 2, 2) },
+ {"listskipwhile", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("SkipWhile", 2, 2) },
+ {"listcollect", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Collect", 1, 1) },
+
+ // Dict builtins
+ {"dictcreate", BuildSimpleBuiltinFactoryCallback<TDictCreateBuiltin>()},
+ {"asdict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsDict", 1, -1)},
+ {"asdictstrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsDictStrict", 1, -1)},
+ {"todict", BuildNamedBuiltinFactoryCallback<TYqlToDict<false>>("One")},
+ {"tomultidict", BuildNamedBuiltinFactoryCallback<TYqlToDict<false>>("Many")},
+ {"tosorteddict", BuildNamedBuiltinFactoryCallback<TYqlToDict<true>>("One")},
+ {"tosortedmultidict", BuildNamedBuiltinFactoryCallback<TYqlToDict<true>>("Many")},
+ {"dictkeys", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictKeys", 1, 1) },
+ {"dictpayloads", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictPayloads", 1, 1) },
+ {"dictitems", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictItems", 1, 1) },
+ {"dictlookup", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Lookup", 2, 2) },
+ {"dictcontains", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Contains", 2, 2) },
+
+ // Atom builtins
+ {"asatom", BuildSimpleBuiltinFactoryCallback<TYqlAsAtom>()},
+ {"secureparam", BuildNamedBuiltinFactoryCallback<TYqlAtom>("SecureParam")},
+
+ {"void", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Void", 0, 0)},
+ {"callable", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Callable", 2, 2)},
+ {"way", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Way", 1, 1) },
+ {"variant", BuildSimpleBuiltinFactoryCallback<TYqlVariant>() },
+ {"astagged", BuildSimpleBuiltinFactoryCallback<TYqlAsTagged>() },
+ {"untag", BuildSimpleBuiltinFactoryCallback<TYqlUntag>() },
+ {"parsetype", BuildSimpleBuiltinFactoryCallback<TYqlParseType>() },
+ {"ensuretype", BuildSimpleBuiltinFactoryCallback<TYqlTypeAssert<true>>() },
+ {"ensureconvertibleto", BuildSimpleBuiltinFactoryCallback<TYqlTypeAssert<false>>() },
+ {"ensure", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Ensure", 2, 3) },
+ {"evaluateexpr", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EvaluateExpr", 1, 1) },
+ {"evaluateatom", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EvaluateAtom", 1, 1) },
+ {"evaluatetype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EvaluateType", 1, 1) },
+ {"unwrap", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Unwrap", 1, 2) },
+ {"just", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Just", 1, 1) },
+ {"nothing", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Nothing", 1, 1) },
+ {"formattype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FormatType", 1, 1) },
+ {"typeof", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TypeOf", 1, 1) },
+ {"instanceof", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("InstanceOf", 1, 1) },
+ {"datatype", BuildSimpleBuiltinFactoryCallback<TYqlDataType>() },
+ {"optionaltype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("OptionalType", 1, 1) },
+ {"listtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListType", 1, 1) },
+ {"streamtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StreamType", 1, 1) },
+ {"dicttype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictType", 2, 2) },
+ {"tupletype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TupleType", 0, -1) },
+ {"generictype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("GenericType", 0, 0) },
+ {"unittype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("UnitType", 0, 0) },
+ {"voidtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VoidType", 0, 0) },
+ {"resourcetype", BuildSimpleBuiltinFactoryCallback<TYqlResourceType>() },
+ {"taggedtype", BuildSimpleBuiltinFactoryCallback<TYqlTaggedType>() },
+ {"varianttype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VariantType", 1, 1) },
+ {"callabletype", BuildSimpleBuiltinFactoryCallback<TYqlCallableType>() },
+ {"optionalitemtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("OptionalItemType", 1, 1) },
+ {"listitemtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListItemType", 1, 1) },
+ {"streamitemtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StreamItemType", 1, 1) },
+ {"dictkeytype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictKeyType", 1, 1) },
+ {"dictpayloadtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictPayloadType", 1, 1) },
+ {"tupleelementtype", BuildSimpleBuiltinFactoryCallback<TYqlTupleElementType>() },
+ {"structmembertype", BuildSimpleBuiltinFactoryCallback<TYqlStructMemberType>() },
+ {"callableresulttype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CallableResultType", 1, 1) },
+ {"callableargumenttype", BuildSimpleBuiltinFactoryCallback<TYqlCallableArgumentType>() },
+ {"variantunderlyingtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VariantUnderlyingType", 1, 1) },
+ {"fromysonsimpletype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FromYsonSimpleType", 2, 2) },
+ {"currentutcdate", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "CurrentUtcDate", 0, -1) },
+ {"currentutcdatetime", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "CurrentUtcDatetime", 0, -1) },
+ {"currentutctimestamp", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "CurrentUtcTimestamp", 0, -1) },
+ {"currentoperationid", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CurrentOperationId", 0, 0) },
+ {"currentoperationsharedid", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CurrentOperationSharedId", 0, 0) },
+ {"currentauthenticateduser", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CurrentAuthenticatedUser", 0, 0) },
+ {"addtimezone", BuildSimpleBuiltinFactoryCallback<TYqlAddTimezone>() },
+ {"removetimezone", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("RemoveTimezone", 1, 1) },
+ {"typehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TypeHandle", 1, 1) },
+ {"parsetypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ParseTypeHandle", 1, 1) },
+ {"typekind", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TypeKind", 1, 1) },
+ {"datatypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DataTypeComponents", 1, 1) },
+ {"datatypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DataTypeHandle", 1, 1) },
+ {"optionaltypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("OptionalTypeHandle", 1, 1) },
+ {"listtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTypeHandle", 1, 1) },
+ {"streamtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StreamTypeHandle", 1, 1) },
+ {"tupletypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TupleTypeComponents", 1, 1) },
+ {"tupletypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TupleTypeHandle", 1, 1) },
+ {"structtypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructTypeComponents", 1, 1) },
+ {"structtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructTypeHandle", 1, 1) },
+ {"dicttypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictTypeComponents", 1, 1) },
+ {"dicttypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictTypeHandle", 2, 2) },
+ {"resourcetypetag", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ResourceTypeTag", 1, 1) },
+ {"resourcetypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ResourceTypeHandle", 1, 1) },
+ {"taggedtypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TaggedTypeComponents", 1, 1) },
+ {"taggedtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TaggedTypeHandle", 2, 2) },
+ {"varianttypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VariantTypeHandle", 1, 1) },
+ {"voidtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VoidTypeHandle", 0, 0) },
+ {"nulltypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("NullTypeHandle", 0, 0) },
+ {"callabletypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CallableTypeComponents", 1, 1) },
+ {"callableargument", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CallableArgument", 1, 3) },
+ {"callabletypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CallableTypeHandle", 2, 4) },
+ {"formatcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FormatCode", 1, 1) },
+ {"worldcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("WorldCode", 0, 0) },
+ {"atomcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AtomCode", 1, 1) },
+ {"listcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListCode", 0, -1) },
+ {"funccode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FuncCode", 1, -1) },
+ {"lambdacode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("LambdaCode", 1, 2) },
+ {"evaluatecode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EvaluateCode", 1, 1) },
+ {"reprcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ReprCode", 1, 1) },
+ {"quotecode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("QuoteCode", 1, 1) },
+
+ // Tuple builtins
+ {"astuple", BuildSimpleBuiltinFactoryCallback<TTupleNode>()},
+
+ // Struct builtins
+ {"addmember", BuildNamedBuiltinFactoryCallback<TAddMember>("AddMember")},
+ {"removemember", BuildNamedBuiltinFactoryCallback<TRemoveMember>("RemoveMember")},
+ {"forceremovemember", BuildNamedBuiltinFactoryCallback<TRemoveMember>("ForceRemoveMember")},
+ {"combinemembers", BuildNamedBuiltinFactoryCallback<TCombineMembers>("FlattenMembers")},
+ {"flattenmembers", BuildNamedBuiltinFactoryCallback<TFlattenMembers>("FlattenMembers")},
+
+ // File builtins
+ {"filepath", BuildNamedBuiltinFactoryCallback<TYqlAtom>("FilePath")},
+ {"filecontent", BuildNamedBuiltinFactoryCallback<TYqlAtom>("FileContent")},
+ {"folderpath", BuildNamedBuiltinFactoryCallback<TYqlAtom>("FolderPath") },
+ {"files", BuildNamedBuiltinFactoryCallback<TYqlAtom>("Files")},
+ {"parsefile", BuildSimpleBuiltinFactoryCallback<TYqlParseFileOp>()},
+
+ // Misc builtins
+ {"coalesce", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Coalesce", 1, -1)},
+ {"nvl", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Coalesce", 1, -1) },
+ {"nanvl", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Nanvl", 2, 2) },
+ {"likely", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Likely", 1, -1)},
+ {"random", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "Random", 1, -1)},
+ {"randomnumber", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "RandomNumber", 1, -1)},
+ {"randomuuid", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "RandomUuid", 1, -1) },
+ {"tablepath", BuildNamedBuiltinFactoryCallback<TCallDirectRow>("TablePath") },
+ {"tablerecord", BuildNamedBuiltinFactoryCallback<TCallDirectRow>("TableRecord") },
+ {"tablerecordindex", BuildNamedBuiltinFactoryCallback<TCallDirectRow>("TableRecord") },
+ {"weakfield", BuildSimpleBuiltinFactoryCallback<TWeakFieldOp>()},
+ {"tablerow", BuildSimpleBuiltinFactoryCallback<TTableRow>() },
+
+ // Hint builtins
+ {"grouping", BuildSimpleBuiltinFactoryCallback<TGroupingNode>()},
+
+ // Window funcitons
+ {"rownumber", BuildNamedArgcBuiltinFactoryCallback<TWinRowNumber>("RowNumber", 0, 0)},
+ /// by SQL2011 should be with sort
+ {"lead", BuildNamedArgcBuiltinFactoryCallback<TWinLeadLag>("Lead", 1, 2)},
+ {"lag", BuildNamedArgcBuiltinFactoryCallback<TWinLeadLag>("Lag", 1, 2)},
+
+ // Hopping intervals time functions
+ {"hopstart", BuildSimpleBuiltinFactoryCallback<THoppingTime<true>>()},
+ {"hopend", BuildSimpleBuiltinFactoryCallback<THoppingTime<false>>()},
+ };
+ return builtinFuncs;
+ }
+
+ TAggrFuncFactoryCallbackMap MakeAggrFuncs() {
+ constexpr auto OverWindow = EAggregateMode::OverWindow;
+
+ TAggrFuncFactoryCallbackMap aggrFuncs = {
+ {"min", BuildAggrFuncFactoryCallback("Min", "min_traits_factory")},
+ {"max", BuildAggrFuncFactoryCallback("Max", "max_traits_factory")},
+
+ {"minby", BuildAggrFuncFactoryCallback("MinBy", "min_by_traits_factory", KEY_PAYLOAD)},
+ {"maxby", BuildAggrFuncFactoryCallback("MaxBy", "max_by_traits_factory", KEY_PAYLOAD)},
+
+ {"sum", BuildAggrFuncFactoryCallback("Sum", "sum_traits_factory")},
+ {"sumif", BuildAggrFuncFactoryCallback("SumIf", "sum_if_traits_factory", PAYLOAD_PREDICATE) },
+
+ {"some", BuildAggrFuncFactoryCallback("Some", "some_traits_factory")},
+ {"somevalue", BuildAggrFuncFactoryCallback("SomeValue", "some_traits_factory")},
+
+ {"count", BuildAggrFuncFactoryCallback("Count", "count_traits_factory", COUNT)},
+ {"countif", BuildAggrFuncFactoryCallback("CountIf", "count_if_traits_factory")},
+
+ {"every", BuildAggrFuncFactoryCallback("Every", "and_traits_factory")},
+ {"booland", BuildAggrFuncFactoryCallback("BoolAnd", "and_traits_factory")},
+ {"boolor", BuildAggrFuncFactoryCallback("BoolOr", "or_traits_factory")},
+
+ {"bitand", BuildAggrFuncFactoryCallback("BitAnd", "bit_and_traits_factory")},
+ {"bitor", BuildAggrFuncFactoryCallback("BitOr", "bit_or_traits_factory")},
+ {"bitxor", BuildAggrFuncFactoryCallback("BitXor", "bit_xor_traits_factory")},
+
+ {"avg", BuildAggrFuncFactoryCallback("Avg", "avg_traits_factory")},
+ {"avgif", BuildAggrFuncFactoryCallback("AvgIf", "avg_if_traits_factory", PAYLOAD_PREDICATE) },
+
+ {"list", BuildAggrFuncFactoryCallback("List", "list_traits_factory", LIST)},
+ {"agglist", BuildAggrFuncFactoryCallback("AggregateList", "list2_traits_factory", LIST)},
+ {"aggrlist", BuildAggrFuncFactoryCallback("AggregateList", "list2_traits_factory", LIST)},
+ {"aggregatelist", BuildAggrFuncFactoryCallback("AggregateList", "list2_traits_factory", LIST)},
+ {"listdistinct", BuildAggrFuncFactoryCallback("ListDistinct", "set_traits_factory", LIST)},
+ {"agglistdistinct", BuildAggrFuncFactoryCallback("AggregateListDistinct", "set_traits_factory", LIST)},
+ {"aggrlistdistinct", BuildAggrFuncFactoryCallback("AggregateListDistinct", "set_traits_factory", LIST)},
+ {"aggregatelistdistinct", BuildAggrFuncFactoryCallback("AggregateListDistinct", "set_traits_factory", LIST)},
+
+ {"median", BuildAggrFuncFactoryCallback("Median", "percentile_traits_factory", PERCENTILE)},
+ {"percentile", BuildAggrFuncFactoryCallback("Percentile", "percentile_traits_factory", PERCENTILE)},
+
+ {"mode", BuildAggrFuncFactoryCallback("Mode", "topfreq_traits_factory", TOPFREQ) },
+ {"topfreq", BuildAggrFuncFactoryCallback("TopFreq", "topfreq_traits_factory", TOPFREQ) },
+
+ {"top", BuildAggrFuncFactoryCallback("Top", "top_traits_factory", TOP)},
+ {"bottom", BuildAggrFuncFactoryCallback("Bottom", "bottom_traits_factory", TOP)},
+ {"topby", BuildAggrFuncFactoryCallback("TopBy", "top_by_traits_factory", TOP_BY)},
+ {"bottomby", BuildAggrFuncFactoryCallback("BottomBy", "bottom_by_traits_factory", TOP_BY)},
+
+ {"histogram", BuildAggrFuncFactoryCallback("AdaptiveWardHistogram", "histogram_adaptive_ward_traits_factory", HISTOGRAM, "Histogram")},
+ {"adaptivewardhistogram", BuildAggrFuncFactoryCallback("AdaptiveWardHistogram", "histogram_adaptive_ward_traits_factory", HISTOGRAM)},
+ {"adaptiveweighthistogram", BuildAggrFuncFactoryCallback("AdaptiveWeightHistogram", "histogram_adaptive_weight_traits_factory", HISTOGRAM)},
+ {"adaptivedistancehistogram", BuildAggrFuncFactoryCallback("AdaptiveDistanceHistogram", "histogram_adaptive_distance_traits_factory", HISTOGRAM)},
+ {"blockwardhistogram", BuildAggrFuncFactoryCallback("BlockWardHistogram", "histogram_block_ward_traits_factory", HISTOGRAM)},
+ {"blockweighthistogram", BuildAggrFuncFactoryCallback("BlockWeightHistogram", "histogram_block_weight_traits_factory", HISTOGRAM)},
+ {"linearhistogram", BuildAggrFuncFactoryCallback("LinearHistogram", "histogram_linear_traits_factory", LINEAR_HISTOGRAM)},
+ {"logarithmichistogram", BuildAggrFuncFactoryCallback("LogarithmicHistogram", "histogram_logarithmic_traits_factory", LINEAR_HISTOGRAM)},
+ {"loghistogram", BuildAggrFuncFactoryCallback("LogarithmicHistogram", "histogram_logarithmic_traits_factory", LINEAR_HISTOGRAM, "LogHistogram")},
+
+ {"hyperloglog", BuildAggrFuncFactoryCallback("HyperLogLog", "hyperloglog_traits_factory", COUNT_DISTINCT_ESTIMATE)},
+ {"hll", BuildAggrFuncFactoryCallback("HyperLogLog", "hyperloglog_traits_factory", COUNT_DISTINCT_ESTIMATE, "HLL")},
+ {"countdistinctestimate", BuildAggrFuncFactoryCallback("HyperLogLog", "hyperloglog_traits_factory", COUNT_DISTINCT_ESTIMATE, "CountDistinctEstimate")},
+
+ {"variance", BuildAggrFuncFactoryCallback("Variance", "variance_0_1_traits_factory")},
+ {"stddev", BuildAggrFuncFactoryCallback("StdDev", "variance_1_1_traits_factory")},
+ {"populationvariance", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")},
+ {"variancepopulation", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")},
+ {"populationstddev", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")},
+ {"stddevpopulation", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")},
+ {"varpop", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")},
+ {"stddevpop", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")},
+ {"varp", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")},
+ {"stddevp", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")},
+ {"variancesample", BuildAggrFuncFactoryCallback("VarianceSample", "variance_0_1_traits_factory")},
+ {"stddevsample", BuildAggrFuncFactoryCallback("StdDevSample", "variance_1_1_traits_factory")},
+ {"varsamp", BuildAggrFuncFactoryCallback("VarianceSample", "variance_0_1_traits_factory")},
+ {"stddevsamp", BuildAggrFuncFactoryCallback("StdDevSample", "variance_1_1_traits_factory")},
+ {"vars", BuildAggrFuncFactoryCallback("VarianceSample", "variance_0_1_traits_factory")},
+ {"stddevs", BuildAggrFuncFactoryCallback("StdDevSample", "variance_1_1_traits_factory")},
+
+ {"correlation", BuildAggrFuncFactoryCallback("Correlation", "correlation_traits_factory", TWO_ARGS)},
+ {"corr", BuildAggrFuncFactoryCallback("Correlation", "correlation_traits_factory", TWO_ARGS, "Corr")},
+ {"covariance", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "Covariance")},
+ {"covariancesample", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS)},
+ {"covarsamp", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "CovarSamp")},
+ {"covar", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "Covar")},
+ {"covars", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "CovarS")},
+ {"covariancepopulation", BuildAggrFuncFactoryCallback("CovariancePopulation", "covariance_population_traits_factory", TWO_ARGS)},
+ {"covarpop", BuildAggrFuncFactoryCallback("CovariancePopulation", "covariance_population_traits_factory", TWO_ARGS, "CovarPop")},
+ {"covarp", BuildAggrFuncFactoryCallback("CovariancePopulation", "covariance_population_traits_factory", TWO_ARGS, "CovarP")},
+
+ {"udaf", BuildAggrFuncFactoryCallback("UDAF", "udaf_traits_factory", UDAF)},
+
+ // Window functions
+ /// by SQL2011 should be with sort
+ {"rank", BuildAggrFuncFactoryCallback("Rank", "rank_traits_factory", WINDOW_AUTOARGS)},
+ {"denserank", BuildAggrFuncFactoryCallback("DenseRank", "dense_rank_traits_factory", WINDOW_AUTOARGS)},
+ // \todo unsupported now, required count element in window
+ //{"ntile", BuildAggrFuncFactoryCallback("Ntile", "ntile_traits_factory")},
+ //{"percentrank", BuildAggrFuncFactoryCallback("PercentRank", "percent_rank_traits_factory")},
+ //{"cumedist", BuildAggrFuncFactoryCallback("CumeDist", "cume_dist_traits_factory")},
+
+ {"firstvalue", BuildAggrFuncFactoryCallback("FirstValue", "first_value_traits_factory", {OverWindow})},
+ {"lastvalue", BuildAggrFuncFactoryCallback("LastValue", "last_value_traits_factory", {OverWindow})},
+ {"firstvalueignorenulls", BuildAggrFuncFactoryCallback("FirstValueIgnoreNulls", "first_value_ignore_nulls_traits_factory", {OverWindow})},
+ {"lastvalueignorenulls", BuildAggrFuncFactoryCallback("LastValueIgnoreNulls", "last_value_ignore_nulls_traits_factory", {OverWindow})},
+ };
+ return aggrFuncs;
+ }
+
+ TCoreFuncMap MakeCoreFuncs() {
+ TCoreFuncMap coreFuncs = {
+ {"listindexof", { "IndexOf", 2, 2}},
+ {"testbit", { "TestBit", 2, 2}},
+ {"setbit", { "SetBit", 2, 2}},
+ {"clearbit", { "ClearBit", 2, 2}},
+ {"flipbit", { "FlipBit", 2, 2 }},
+ {"toset", { "ToSet", 1, 1 }},
+ {"setisdisjoint", { "SetIsDisjoint", 2, 2}},
+ {"setintersection", { "SetIntersection", 2, 3}},
+ {"setincludes", { "SetIncludes", 2, 2}},
+ {"setunion", { "SetUnion", 2, 3}},
+ {"setdifference", { "SetDifference", 2, 2}},
+ {"setsymmetricdifference", { "SetSymmetricDifference", 2, 3}},
+ {"listaggregate", { "ListAggregate", 2, 2}},
+ {"dictaggregate", { "DictAggregate", 2, 2}},
+ };
+ return coreFuncs;
+ }
+};
+
+TNodePtr BuildBuiltinFunc(TContext& ctx, TPosition pos, TString name, const TVector<TNodePtr>& args,
+ const TString& nameSpace, EAggregateMode aggMode, bool* mustUseNamed, TFuncPrepareNameNode funcPrepareNameNode) {
+
+ const TBuiltinFuncData* funcData = Singleton<TBuiltinFuncData>();
+ const TBuiltinFactoryCallbackMap& builtinFuncs = funcData->BuiltinFuncs;
+ const TAggrFuncFactoryCallbackMap& aggrFuncs = funcData->AggrFuncs;
+ const TCoreFuncMap& coreFuncs = funcData->CoreFuncs;
+
+ for (auto& arg: args) {
+ if (!arg) {
+ return nullptr;
+ }
+ }
+
+ TString normalizedName(name);
+ TString ns = to_lower(nameSpace);
+ if (ns.empty()) {
+ TMaybe<TIssue> error = NormalizeName(pos, normalizedName);
+ if (!error.Empty()) {
+ return new TInvalidBuiltin(pos, error->GetMessage());
+ }
+
+ auto coreFunc = coreFuncs.find(normalizedName);
+ if (coreFunc != coreFuncs.end()) {
+ ns = "core";
+ name = coreFunc->second.Name;
+ if (args.size() < coreFunc->second.MinArgs || args.size() > coreFunc->second.MaxArgs) {
+ return new TInvalidBuiltin(pos, TStringBuilder() << name << " expected from "
+ << coreFunc->second.MinArgs << " to " << coreFunc->second.MaxArgs << " arguments, but got: " << args.size());
+ }
+
+ if (coreFunc->second.MinArgs != coreFunc->second.MaxArgs) {
+ name += ToString(args.size());
+ }
+ }
+ }
+
+ TString moduleResource;
+ if (ctx.Settings.ModuleMapping.contains(ns)) {
+ moduleResource = ctx.Settings.ModuleMapping.at(ns);
+ }
+
+ if (ns == "js") {
+ ns = "javascript";
+ }
+
+ auto scriptType = NKikimr::NMiniKQL::ScriptTypeFromStr(ns);
+ if (scriptType == NKikimr::NMiniKQL::EScriptType::SystemPython2) {
+ scriptType = NKikimr::NMiniKQL::EScriptType::Python2;
+ }
+
+ if (ns == "yql") {
+ return new TCallNodeImpl(pos, name, -1, -1, args);
+ } else if (ns == "string" && name == "SplitToList") {
+ TNodePtr positionalArgs;
+ TNodePtr namedArgs;
+ if (mustUseNamed && *mustUseNamed) {
+ YQL_ENSURE(args.size() == 2);
+ positionalArgs = args[0];
+ namedArgs = args[1];
+ *mustUseNamed = false;
+ }
+
+ TVector<TNodePtr> reuseArgs;
+ if (!namedArgs && args && funcPrepareNameNode) {
+ TString reusedBaseName = TStringBuilder() << "Arg" << to_title(nameSpace) << to_title(name);
+ reuseArgs.reserve(args.size());
+ for (const auto& arg: args) {
+ reuseArgs.push_back(funcPrepareNameNode(reusedBaseName, arg));
+ }
+ }
+
+ auto usedArgs = reuseArgs ? reuseArgs : args;
+
+ TVector<TNodePtr> positionalArgsElements;
+
+ if (namedArgs) {
+ auto positionalArgsTuple = dynamic_cast<TTupleNode*>(positionalArgs.Get());
+ Y_DEBUG_ABORT_UNLESS(positionalArgsTuple, "unexpected value at String::SplitToList positional args");
+ positionalArgsElements = positionalArgsTuple->Elements();
+ } else {
+ positionalArgsElements = usedArgs;
+ }
+
+ auto positionalArgsTupleSize = positionalArgsElements.size();
+ auto argsSize = positionalArgsTupleSize;
+
+ TNodePtr trueLiteral = BuildLiteralBool(pos, "true");
+ TNodePtr falseLiteral = BuildLiteralBool(pos, "false");
+ TNodePtr namedDelimeterStringArg;
+ TNodePtr namedSkipEmptyArg;
+
+ bool hasDelimeterString = false;
+ if (auto namedArgsStruct = dynamic_cast<TStructNode*>(namedArgs.Get())) {
+ auto exprs = namedArgsStruct->GetExprs();
+ for (auto& expr : exprs) {
+ if (expr->GetLabel() == "DelimeterString") {
+ hasDelimeterString = true;
+ break;
+ }
+ }
+ argsSize += namedArgsStruct->GetExprs().size();
+ }
+
+ if (argsSize < 3) {
+ positionalArgsElements.push_back(falseLiteral);
+ }
+ if (argsSize < 4 && !hasDelimeterString) {
+ positionalArgsElements.push_back(trueLiteral);
+ }
+
+ if (namedArgs) {
+ positionalArgs = BuildTuple(pos, positionalArgsElements);
+ } else {
+ usedArgs = positionalArgsElements;
+ }
+
+ TNodePtr customUserType = nullptr;
+ const auto& udfArgs = BuildUdfArgs(ctx, pos, usedArgs, positionalArgs, namedArgs, customUserType);
+ TNodePtr udfNode = BuildUdf(ctx, pos, nameSpace, name, udfArgs);
+ TVector<TNodePtr> applyArgs = { udfNode };
+ applyArgs.insert(applyArgs.end(), usedArgs.begin(), usedArgs.end());
+ return new TCallNodeImpl(pos, namedArgs ? "NamedApply" : "Apply", applyArgs);
+ } else if (moduleResource) {
+ auto exportName = ns == "core" ? name : "$" + name;
+ TVector<TNodePtr> applyArgs = {
+ new TCallNodeImpl(pos, "bind", {
+ BuildAtom(pos, ns + "_module", 0), BuildQuotedAtom(pos, exportName)
+ })
+ };
+ applyArgs.insert(applyArgs.end(), args.begin(), args.end());
+ return new TCallNodeImpl(pos, "Apply", applyArgs);
+ } else if (ns == "hyperscan" || ns == "pcre" || ns == "pire" || ns.StartsWith("re2")) {
+ TString moduleName(nameSpace);
+ moduleName.to_title();
+ if ((args.size() == 1 || args.size() == 2) && (name.StartsWith("Multi") || (ns.StartsWith("re2") && name == "Capture"))) {
+ TVector<TNodePtr> multiArgs{
+ ns.StartsWith("re2") && name == "Capture" ? MakePair(pos, args) : args[0],
+ new TCallNodeImpl(pos, "Void", 0, 0, {}),
+ args[0]
+ };
+ auto fullName = moduleName + "." + name;
+ return new TYqlTypeConfigUdf(pos, fullName, multiArgs, multiArgs.size() + 1);
+ } else if (!(ns.StartsWith("re2") && name == "Options")) {
+ auto newArgs = args;
+ if (ns.StartsWith("re2")) {
+ // convert run config is tuple of string and optional options
+ if (args.size() == 1 || args.size() == 2) {
+ newArgs[0] = MakePair(pos, args);
+ if (args.size() == 2) {
+ newArgs.pop_back();
+ }
+ } else {
+ return new TInvalidBuiltin(pos, TStringBuilder() << ns << "." << name << " expected one or two arguments.");
+ }
+ }
+
+ return BuildUdf(ctx, pos, moduleName, name, newArgs);
+ }
+ } else if (ns == "datetime2" && (name == "Format" || name == "Parse")) {
+ return BuildUdf(ctx, pos, nameSpace, name, args);
+
+ } else if (scriptType != NKikimr::NMiniKQL::EScriptType::Unknown) {
+ auto scriptName = NKikimr::NMiniKQL::ScriptTypeAsStr(scriptType);
+ return new TScriptUdf(pos, TString(scriptName), name, args);
+ } else if (ns.empty()) {
+ auto type = NormalizeTypeString(normalizedName);
+ if (AvailableDataTypes.contains(type)) {
+ return new TYqlData(pos, type, args);
+ }
+
+ if (normalizedName == "tablename") {
+ return new TTableName(pos, args, ctx.CurrCluster);
+ }
+
+ if (normalizedName == "aggregationfactory") {
+ if (args.size() < 1 || !args[0]->GetLiteral("String")) {
+ return new TInvalidBuiltin(pos, "AGGREGATION_FACTORY requries a function name");
+ }
+
+ auto aggNormalizedName = *args[0]->GetLiteral("String");
+ auto error = NormalizeName(pos, aggNormalizedName);
+ if (!error.Empty()) {
+ return new TInvalidBuiltin(pos, error->GetMessage());
+ }
+
+ if (aggNormalizedName == "aggregateby") {
+ return new TInvalidBuiltin(pos, "AGGREGATE_BY is not allowed to use with AGGREGATION_FACTORY");
+ }
+
+ if (aggNormalizedName == "multiaggregateby") {
+ return new TInvalidBuiltin(pos, "MULTI_AGGREGATE_BY is not allowed to use with AGGREGATION_FACTORY");
+ }
+
+ auto aggrCallback = aggrFuncs.find(aggNormalizedName);
+ if (aggrCallback == aggrFuncs.end()) {
+ return new TInvalidBuiltin(pos, TStringBuilder() << "Unknown aggregation function: " << *args[0]->GetLiteral("String"));
+ }
+
+ if (aggMode == EAggregateMode::Distinct) {
+ return new TInvalidBuiltin(pos, "Only aggregation functions allow DISTINCT set specification");
+ }
+
+ return (*aggrCallback).second(pos, args, aggMode, true).Release();
+ }
+
+ if (normalizedName == "aggregateby" || normalizedName == "multiaggregateby") {
+ const bool multi = (normalizedName == "multiaggregateby");
+ if (args.size() != 2) {
+ return new TInvalidBuiltin(pos, TStringBuilder() << (multi ? "MULTI_AGGREGATE_BY" : "AGGREGATE_BY") << " requries two arguments");
+ }
+
+ auto name = multi ? "MultiAggregateBy" : "AggregateBy";
+ auto aggr = BuildFactoryAggregation(pos, name, "", aggMode, multi);
+ return new TBasicAggrFunc(pos, name, aggr, args);
+ }
+
+ auto aggrCallback = aggrFuncs.find(normalizedName);
+ if (aggrCallback != aggrFuncs.end()) {
+ return (*aggrCallback).second(pos, args, aggMode, false).Release();
+ }
+ if (aggMode == EAggregateMode::Distinct) {
+ return new TInvalidBuiltin(pos, "Only aggregation functions allow DISTINCT set specification");
+ }
+
+ auto builtinCallback = builtinFuncs.find(normalizedName);
+ if (builtinCallback != builtinFuncs.end()) {
+ return (*builtinCallback).second(pos, args);
+ } else if (normalizedName == "asstruct" || normalizedName == "structtype") {
+ if (args.empty()) {
+ return new TCallNodeImpl(pos, normalizedName == "asstruct" ? "AsStruct" : "StructType", 0, 0, args);
+ }
+
+ if (mustUseNamed && *mustUseNamed) {
+ *mustUseNamed = false;
+ YQL_ENSURE(args.size() == 2);
+ Y_DEBUG_ABORT_UNLESS(dynamic_cast<TTupleNode*>(args[0].Get()));
+ auto posArgs = static_cast<TTupleNode*>(args[0].Get());
+ if (posArgs->IsEmpty()) {
+ if (normalizedName == "asstruct") {
+ return args[1];
+ } else {
+ Y_DEBUG_ABORT_UNLESS(dynamic_cast<TStructNode*>(args[1].Get()));
+ auto namedArgs = static_cast<TStructNode*>(args[1].Get());
+ return new TStructTypeNode(pos, namedArgs->GetExprs());
+ }
+ }
+ }
+ return new TInvalidBuiltin(pos, TStringBuilder() <<
+ (normalizedName == "asstruct" ? "AsStruct" : "StructType") <<
+ " requires all argument to be named");
+ } else if (normalizedName == "expandstruct") {
+ if (mustUseNamed) {
+ if (!*mustUseNamed) {
+ return new TInvalidBuiltin(pos, TStringBuilder() << "ExpandStruct requires at least one named argument");
+ }
+ *mustUseNamed = false;
+ }
+ YQL_ENSURE(args.size() == 2);
+ auto posArgs = static_cast<TTupleNode*>(args[0].Get());
+ Y_DEBUG_ABORT_UNLESS(dynamic_cast<TTupleNode*>(args[0].Get()));
+ Y_DEBUG_ABORT_UNLESS(dynamic_cast<TStructNode*>(args[1].Get()));
+ if (posArgs->GetTupleSize() != 1) {
+ return new TInvalidBuiltin(pos, TStringBuilder() << "ExpandStruct requires all arguments except first to be named");
+ }
+
+ TVector<TNodePtr> flattenMembersArgs = {
+ BuildTuple(pos, {BuildQuotedAtom(pos, ""), posArgs->GetTupleElement(0)}),
+ BuildTuple(pos, {BuildQuotedAtom(pos, ""), args[1]}),
+ };
+ return new TCallNodeImpl(pos, "FlattenMembers", 2, 2, flattenMembersArgs);
+ } else {
+ return new TInvalidBuiltin(pos, TStringBuilder() << "Unknown builtin: " << name << ", to use YQL functions, try YQL::" << name);
+ }
+ }
+
+ TNodePtr positionalArgs;
+ TNodePtr namedArgs;
+ if (mustUseNamed && *mustUseNamed) {
+ YQL_ENSURE(args.size() == 2);
+ positionalArgs = args[0];
+ namedArgs = args[1];
+ *mustUseNamed = false;
+ }
+
+ TVector<TNodePtr> reuseArgs;
+ if (!namedArgs && args && funcPrepareNameNode) {
+ TString reusedBaseName = TStringBuilder() << "Arg" << to_title(nameSpace) << to_title(name);
+ reuseArgs.reserve(args.size());
+ for (const auto& arg: args) {
+ reuseArgs.push_back(funcPrepareNameNode(reusedBaseName, arg));
+ }
+ }
+
+ auto usedArgs = reuseArgs ? reuseArgs : args;
+
+ TNodePtr customUserType = nullptr;
+ if (ns == "yson") {
+ if (name == "ConvertTo" && usedArgs.size() > 1) {
+ customUserType = usedArgs[1];
+ usedArgs.erase(usedArgs.begin() + 1);
+ }
+ ui32 optionsIndex = name.Contains("Lookup") ? 2 : 1;
+ if (usedArgs.size() <= optionsIndex && (ctx.PragmaYsonAutoConvert || ctx.PragmaYsonStrict)) {
+ usedArgs.push_back(BuildYsonOptionsNode(pos, ctx.PragmaYsonAutoConvert, ctx.PragmaYsonStrict));
+ }
+ } else if (ns == "json") {
+ ctx.Warning(pos, TIssuesIds::YQL_DEPRECATED_JSON_UDF)
+ << "Json UDF is deprecated and is going to be removed, please switch to Yson UDF that also supports Json input: https://yql.yandex-team.ru/docs/yt/udf/list/yson/";
+ }
+
+ const auto& udfArgs = BuildUdfArgs(ctx, pos, usedArgs, positionalArgs, namedArgs, customUserType);
+ TNodePtr udfNode = BuildUdf(ctx, pos, nameSpace, name, udfArgs);
+ TVector<TNodePtr> applyArgs = { udfNode };
+ applyArgs.insert(applyArgs.end(), usedArgs.begin(), usedArgs.end());
+ return new TCallNodeImpl(pos, namedArgs ? "NamedApply" : "Apply", applyArgs);
+}
+
+} // namespace NSQLTranslationV0