aboutsummaryrefslogtreecommitdiffstats
path: root/yql/essentials/core/yql_opt_utils.cpp
diff options
context:
space:
mode:
authorvvvv <vvvv@yandex-team.com>2024-11-07 12:29:36 +0300
committervvvv <vvvv@yandex-team.com>2024-11-07 13:49:47 +0300
commitd4c258e9431675bab6745c8638df6e3dfd4dca6b (patch)
treeb5efcfa11351152a4c872fccaea35749141c0b11 /yql/essentials/core/yql_opt_utils.cpp
parent13a4f274caef5cfdaf0263b24e4d6bdd5521472b (diff)
downloadydb-d4c258e9431675bab6745c8638df6e3dfd4dca6b.tar.gz
Moved other yql/essentials libs YQL-19206
init commit_hash:7d4c435602078407bbf20dd3c32f9c90d2bbcbc0
Diffstat (limited to 'yql/essentials/core/yql_opt_utils.cpp')
-rw-r--r--yql/essentials/core/yql_opt_utils.cpp2220
1 files changed, 2220 insertions, 0 deletions
diff --git a/yql/essentials/core/yql_opt_utils.cpp b/yql/essentials/core/yql_opt_utils.cpp
new file mode 100644
index 0000000000..9b39558176
--- /dev/null
+++ b/yql/essentials/core/yql_opt_utils.cpp
@@ -0,0 +1,2220 @@
+#include "yql_opt_utils.h"
+#include "yql_expr_optimize.h"
+#include "yql_expr_type_annotation.h"
+#include "yql_type_annotation.h"
+#include "yql_type_helpers.h"
+
+#include <yql/essentials/ast/yql_constraint.h>
+#include <yql/essentials/parser/pg_catalog/catalog.h>
+#include <yql/essentials/utils/log/log.h>
+
+#include <util/generic/set.h>
+#include <util/string/type.h>
+
+#include <limits>
+
+namespace NYql {
+
+using namespace NNodes;
+
+namespace {
+
+template<bool Distinct>
+TExprNode::TPtr KeepUniqueConstraint(TExprNode::TPtr node, const TExprNode& src, TExprContext& ctx) {
+ if (const auto constraint = src.GetConstraint<TUniqueConstraintNodeBase<Distinct>>()) {
+ const auto pos = node->Pos();
+ TExprNode::TListType children(1U, std::move(node));
+ for (const auto& sets : constraint->GetContent()) {
+ TExprNode::TListType lists;
+ lists.reserve(sets.size());
+ for (const auto& set : sets) {
+ TExprNode::TListType columns;
+ columns.reserve(set.size());
+ for (const auto& path : set) {
+ if (1U == path.size())
+ columns.emplace_back(ctx.NewAtom(pos, path.front()));
+ else {
+ TExprNode::TListType atoms(path.size());
+ std::transform(path.cbegin(), path.cend(), atoms.begin(), [&](const std::string_view& name) { return ctx.NewAtom(pos, name); });
+ columns.emplace_back(ctx.NewList(pos, std::move(atoms)));
+ }
+ }
+ lists.emplace_back(ctx.NewList(pos, std::move(columns)));
+ }
+ children.emplace_back(ctx.NewList(pos, std::move(lists)));
+ }
+ return ctx.NewCallable(pos, TString("Assume") += TUniqueConstraintNodeBase<Distinct>::Name(), std::move(children));
+ }
+ return node;
+}
+
+TExprNode::TPtr KeepChoppedConstraint(TExprNode::TPtr node, const TExprNode& src, TExprContext& ctx) {
+ if (const auto constraint = src.GetConstraint<TChoppedConstraintNode>()) {
+ const auto pos = node->Pos();
+ TExprNode::TListType children(1U, std::move(node));
+ for (const auto& set : constraint->GetContent()) {
+ TExprNode::TListType columns;
+ columns.reserve(set.size());
+ for (const auto& path : set) {
+ if (1U == path.size())
+ columns.emplace_back(ctx.NewAtom(pos, path.front()));
+ else {
+ TExprNode::TListType atoms(path.size());
+ std::transform(path.cbegin(), path.cend(), atoms.begin(), [&](const std::string_view& name) { return ctx.NewAtom(pos, name); });
+ columns.emplace_back(ctx.NewList(pos, std::move(atoms)));
+ }
+ }
+ children.emplace_back(ctx.NewList(pos, std::move(columns)));
+ }
+ return ctx.NewCallable(pos, TString("Assume") += TChoppedConstraintNode::Name(), std::move(children));
+ }
+ return node;
+}
+
+TExprNodeBuilder GetterBuilder(TExprNodeBuilder parent, ui32 index, const TTypeAnnotationNode& type, TPartOfConstraintBase::TPathType path) {
+ if (path.empty())
+ return parent.Arg(index, "item");
+
+ const auto& name = path.back();
+ path.pop_back();
+ const auto parentType = TPartOfConstraintBase::GetSubTypeByPath(path, type);
+ YQL_ENSURE(parentType, "Wrong path '" << path << "' to component of: " << type);
+ return GetterBuilder(parent.Callable(index, ETypeAnnotationKind::Struct == parentType->GetKind() ? "Member" : "Nth"), 0U, type, path).Atom(1, name).Seal();
+}
+
+const TExprNode& GetLiteralStructMember(const TExprNode& literal, const TExprNode& member) {
+ for (const auto& child : literal.Children())
+ if (&child->Head() == &member || child->Head().Content() == member.Content())
+ return child->Tail();
+ ythrow yexception() << "Member '" << member.Content() << "' not found in literal struct.";
+}
+
+}
+
+TExprNode::TPtr MakeBoolNothing(TPositionHandle position, TExprContext& ctx) {
+ return ctx.NewCallable(position, "Nothing", {
+ ctx.NewCallable(position, "OptionalType", {
+ ctx.NewCallable(position, "DataType", {
+ ctx.NewAtom(position, "Bool", TNodeFlags::Default) }) }) });
+}
+
+TExprNode::TPtr MakeNull(TPositionHandle position, TExprContext& ctx) {
+ return ctx.NewCallable(position, "Null", {});
+}
+
+TExprNode::TPtr MakeConstMap(TPositionHandle position, const TExprNode::TPtr& input,
+ const TExprNode::TPtr& value, TExprContext& ctx) {
+ return ctx.Builder(position)
+ .Callable("Map")
+ .Add(0, input)
+ .Lambda(1)
+ .Param("x")
+ .Set(value)
+ .Seal()
+ .Seal()
+ .Build();
+}
+
+template <bool Bool>
+TExprNode::TPtr MakeBool(TPositionHandle position, TExprContext& ctx) {
+ return ctx.NewCallable(position, "Bool", { ctx.NewAtom(position, Bool ? "true" : "false", TNodeFlags::Default) });
+}
+
+TExprNode::TPtr MakeBool(TPositionHandle position, bool value, TExprContext& ctx) {
+ return value ? MakeBool<true>(position, ctx) : MakeBool<false>(position, ctx);
+}
+
+TExprNode::TPtr MakeOptionalBool(TPositionHandle position, bool value, TExprContext& ctx) {
+ return ctx.NewCallable(position, "Just", { MakeBool(position, value, ctx)});
+}
+
+TExprNode::TPtr MakePgBool(TPositionHandle position, bool value, TExprContext& ctx) {
+ return ctx.NewCallable(position, "PgConst", {
+ ctx.NewAtom(position, value ? "t" : "f", TNodeFlags::Default),
+ ctx.NewCallable(position, "PgType", { ctx.NewAtom(position, "bool")})
+ });
+}
+
+TExprNode::TPtr MakeIdentityLambda(TPositionHandle position, TExprContext& ctx) {
+ return ctx.Builder(position)
+ .Lambda()
+ .Param("arg")
+ .Arg("arg")
+ .Seal()
+ .Build();
+}
+
+bool IsJustOrSingleAsList(const TExprNode& node) {
+ return node.ChildrenSize() == 1U && node.IsCallable({"Just", "AsList"});
+}
+
+bool IsTransparentIfPresent(const TExprNode& node) {
+ return (node.IsCallable("FlatMap") || (3U == node.ChildrenSize() && node.IsCallable("IfPresent") && node.Tail().IsCallable({"Nothing", "EmptyFrom"})))
+ && node.Child(1U)->Tail().IsCallable("Just");
+}
+
+bool IsPredicateFlatMap(const TExprNode& node) {
+ return node.IsCallable({"FlatListIf", "ListIf", "OptionalIf", "FlatOptionalIf"});
+}
+
+bool IsFilterFlatMap(const NNodes::TCoLambda& lambda) {
+ const auto& arg = lambda.Args().Arg(0);
+ const auto& body = lambda.Body();
+ if (body.Maybe<TCoOptionalIf>() || body.Maybe<TCoListIf>()) {
+ if (body.Ref().Child(1) == arg.Raw()) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool IsListReorder(const TExprNode& node) {
+ return node.IsCallable({"Sort", "Reverse"});
+}
+
+// Check if the flat map is a simple rename flat map
+bool IsRenameFlatMap(const NNodes::TCoFlatMapBase& node, TExprNode::TPtr& structNode) {
+
+ auto lambda = node.Lambda();
+ if (!IsJustOrSingleAsList(lambda.Body().Ref())) {
+ return false;
+ }
+
+ structNode = lambda.Body().Ref().Child(0);
+
+ auto asStruct = TExprBase(structNode);
+ if (!asStruct.Maybe<TCoAsStruct>()) {
+ return false;
+ }
+
+ for (auto child : asStruct.Cast<TCoAsStruct>()) {
+
+ if (!child.Item(1).Maybe<TCoMember>()) {
+ return false;
+ }
+
+ auto member = child.Item(1).Cast<TCoMember>();
+ if(member.Struct().Raw() != lambda.Args().Arg(0).Raw()) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// Check if the flat map is a simple rename flat map and compute the mapping from new names to original ones
+bool IsRenameFlatMapWithMapping(const NNodes::TCoFlatMapBase& node, TExprNode::TPtr& structNode,
+ THashMap<TString, TString> & mapping) {
+
+ auto lambda = node.Lambda();
+ if (!IsJustOrSingleAsList(lambda.Body().Ref())) {
+ return false;
+ }
+
+ structNode = lambda.Body().Ref().Child(0);
+
+ auto asStruct = TExprBase(structNode);
+ if (!asStruct.Maybe<TCoAsStruct>()) {
+ return false;
+ }
+
+ for (auto child : asStruct.Cast<TCoAsStruct>()) {
+
+ if (!child.Item(1).Maybe<TCoMember>()) {
+ return false;
+ }
+
+ auto member = child.Item(1).Cast<TCoMember>();
+ if(member.Struct().Raw() != lambda.Args().Arg(0).Raw()) {
+ return false;
+ }
+
+ auto to = child.Item(0).Cast<TCoAtom>();
+ auto from = member.Name();
+
+ if (to != from){
+ mapping[to.StringValue()] = from.StringValue();
+ }
+ }
+
+ return true;
+}
+
+// Check if the flat map is a simple rename flat map or a flatmap that also computes some
+// values in 1-1 fashion
+bool IsRenameOrApplyFlatMapWithMapping(const NNodes::TCoFlatMapBase& node, TExprNode::TPtr& structNode,
+ THashMap<TString, TString> & mapping, TSet<TString> & apply) {
+
+ auto lambda = node.Lambda();
+ if (!IsJustOrSingleAsList(lambda.Body().Ref())) {
+ return false;
+ }
+
+ structNode = lambda.Body().Ref().Child(0);
+
+ auto asStruct = TExprBase(structNode);
+ if (!asStruct.Maybe<TCoAsStruct>()) {
+ return false;
+ }
+
+ for (auto child : asStruct.Cast<TCoAsStruct>()) {
+
+ if (!child.Item(1).Maybe<TCoMember>()) {
+ apply.insert(child.Item(0).Cast<TCoAtom>().StringValue());
+ continue;
+ }
+
+ auto member = child.Item(1).Cast<TCoMember>();
+ if(member.Struct().Raw() != lambda.Args().Arg(0).Raw()) {
+ return false;
+ }
+
+ auto to = child.Item(0).Cast<TCoAtom>();
+ auto from = member.Name();
+
+ if (to != from){
+ mapping[to.StringValue()] = from.StringValue();
+ }
+ }
+
+ return true;
+}
+
+bool IsPassthroughFlatMap(const TCoFlatMapBase& flatmap, TMaybe<THashSet<TStringBuf>>* passthroughFields, bool analyzeJustMember) {
+ return IsPassthroughLambda(flatmap.Lambda(), passthroughFields, analyzeJustMember);
+}
+
+bool IsPassthroughLambda(const TCoLambda& lambda, TMaybe<THashSet<TStringBuf>>* passthroughFields, bool analyzeJustMember) {
+ auto body = lambda.Body();
+ auto arg = lambda.Args().Arg(0);
+
+ TMaybeNode<TExprBase> outItem;
+ if (IsJustOrSingleAsList(body.Ref())) {
+ outItem = body.Ref().Child(0);
+ }
+
+ if (body.Maybe<TCoOptionalIf>() || body.Maybe<TCoListIf>()) {
+ outItem = body.Cast<TCoConditionalValueBase>().Value();
+ }
+
+ if (!outItem) {
+ return false;
+ }
+
+ if (outItem.Cast().Raw() == arg.Raw()) {
+ return true;
+ }
+
+ if (auto maybeStruct = outItem.Maybe<TCoAsStruct>()) {
+ if (passthroughFields) {
+ passthroughFields->ConstructInPlace();
+ }
+
+ for (auto child : maybeStruct.Cast()) {
+ auto tuple = child.Cast<TCoNameValueTuple>();
+ auto value = tuple.Value();
+ if (analyzeJustMember && value.Maybe<TCoJust>()) {
+ value = value.Cast<TCoJust>().Input();
+ }
+
+ if (!value.Maybe<TCoMember>()) {
+ if (!passthroughFields) {
+ return false;
+ }
+
+ continue;
+ }
+
+ auto member = value.Cast<TCoMember>();
+ if (member.Struct().Raw() != arg.Raw() || member.Name().Value() != tuple.Name().Value()) {
+ if (!passthroughFields) {
+ return false;
+ }
+ } else {
+ if (passthroughFields) {
+ (*passthroughFields)->insert(member.Name().Value());
+ }
+ }
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+bool IsTablePropsDependent(const TExprNode& node) {
+ bool found = false;
+ VisitExpr(node, [&found](const TExprNode& n) {
+ found = found || TCoTablePropBase::Match(&n) || (TCoMember::Match(&n) && TCoMember(&n).Name().Value().StartsWith("_yql_sys_"));
+ return !found;
+ });
+ return found;
+}
+
+TExprNode::TPtr KeepColumnOrder(const TExprNode::TPtr& node, const TExprNode& src, TExprContext& ctx, const TTypeAnnotationContext& typeCtx) {
+ auto columnOrder = typeCtx.LookupColumnOrder(src);
+ if (!columnOrder) {
+ return node;
+ }
+
+ return KeepColumnOrder(*columnOrder, node, ctx);
+}
+
+TExprNode::TPtr KeepColumnOrder(const TColumnOrder& order, const TExprNode::TPtr& node, TExprContext& ctx) {
+ return ctx.Builder(node->Pos())
+ .Callable("AssumeColumnOrder")
+ .Add(0, node)
+ .List(1)
+ .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
+ size_t index = 0;
+ for (auto& [col, gen_col] : order) {
+ parent
+ .Atom(index++, gen_col);
+ }
+ return parent;
+ })
+ .Seal()
+ .Seal()
+ .Build();
+}
+
+template<class TFieldsSet>
+bool HaveFieldsSubset(const TExprNode::TPtr& start, const TExprNode& arg, TFieldsSet& usedFields, const TParentsMap& parentsMap, bool allowDependsOn) {
+ const TTypeAnnotationNode* argType = RemoveOptionalType(arg.GetTypeAnn());
+ if (argType->GetKind() != ETypeAnnotationKind::Struct) {
+ return false;
+ }
+
+ if (&arg == start.Get()) {
+ return false;
+ }
+
+ const auto inputStructType = argType->Cast<TStructExprType>();
+ if (!IsDepended(*start, arg)) {
+ return inputStructType->GetSize() > 0;
+ }
+
+ TNodeSet nodes;
+ VisitExpr(start, [&](const TExprNode::TPtr& node) {
+ nodes.insert(node.Get());
+ return true;
+ });
+
+ const auto parents = parentsMap.find(&arg);
+ YQL_ENSURE(parents != parentsMap.cend());
+ for (const auto& parent : parents->second) {
+ if (nodes.cend() == nodes.find(parent)) {
+ continue;
+ }
+
+ if (parent->IsCallable("Member")) {
+ if constexpr (std::is_same_v<typename TFieldsSet::key_type, typename TFieldsSet::value_type>)
+ usedFields.emplace(parent->Tail().Content());
+ else
+ usedFields.emplace(parent->Tail().Content(), parent->TailPtr());
+ } else if (allowDependsOn && parent->IsCallable("DependsOn")) {
+ continue;
+ } else {
+ // unknown node
+ usedFields.clear();
+ return false;
+ }
+ }
+
+ return usedFields.size() < inputStructType->GetSize();
+}
+
+template bool HaveFieldsSubset(const TExprNode::TPtr& start, const TExprNode& arg, TSet<TStringBuf>& usedFields, const TParentsMap& parentsMap,
+ bool allowDependsOn);
+template bool HaveFieldsSubset(const TExprNode::TPtr& start, const TExprNode& arg, TSet<TString>& usedFields, const TParentsMap& parentsMap,
+ bool allowDependsOn);
+template bool HaveFieldsSubset(const TExprNode::TPtr& start, const TExprNode& arg, std::map<std::string_view, TExprNode::TPtr>& usedFields,
+ const TParentsMap& parentsMap, bool allowDependsOn);
+
+TExprNode::TPtr AddMembersUsedInside(const TExprNode::TPtr& start, const TExprNode& arg, TExprNode::TPtr&& members, const TParentsMap& parentsMap, TExprContext& ctx) {
+ if (!members || !start || &arg == start.Get()) {
+ return {};
+ }
+
+ if (RemoveOptionalType(arg.GetTypeAnn())->GetKind() != ETypeAnnotationKind::Struct) {
+ return {};
+ }
+
+ if (!IsDepended(*start, arg)) {
+ return std::move(members);
+ }
+
+ TNodeSet nodes;
+ VisitExpr(start, [&](const TExprNode::TPtr& node) {
+ if (!node->IsCallable("DependsOn"))
+ nodes.emplace(node.Get());
+ return true;
+ });
+
+ std::unordered_set<std::string_view> names(members->ChildrenSize());
+ members->ForEachChild([&names](const TExprNode& name){ names.emplace(name.Content()); });
+
+
+ const auto parents = parentsMap.find(&arg);
+ YQL_ENSURE(parents != parentsMap.cend());
+ TExprNode::TListType extra;
+ for (const auto& parent : parents->second) {
+ if (nodes.cend() == nodes.find(parent))
+ continue;
+ if (!parent->IsCallable("Member"))
+ return {};
+ if (names.emplace(parent->Tail().Content()).second)
+ extra.emplace_back(parent->TailPtr());
+ }
+
+
+ if (!extra.empty()) {
+ auto children = members->ChildrenList();
+ std::move(extra.begin(), extra.end(), std::back_inserter(children));
+ members = ctx.ChangeChildren(*members, std::move(children));
+ }
+
+ return std::move(members);
+}
+
+template<class TFieldsSet>
+TExprNode::TPtr FilterByFields(TPositionHandle position, const TExprNode::TPtr& input, const TFieldsSet& subsetFields,
+ TExprContext& ctx, bool singleValue) {
+ TExprNode::TListType fields;
+ fields.reserve(subsetFields.size());
+ for (const auto& x : subsetFields) {
+ fields.emplace_back(ctx.NewAtom(position, x));
+ }
+
+ return ctx.NewCallable(position, singleValue ? "FilterMembers" : "ExtractMembers", { input, ctx.NewList(position, std::move(fields)) });
+}
+
+template TExprNode::TPtr FilterByFields(TPositionHandle position, const TExprNode::TPtr& input, const TSet<TStringBuf>& subsetFields, TExprContext& ctx, bool singleValue);
+template TExprNode::TPtr FilterByFields(TPositionHandle position, const TExprNode::TPtr& input, const TSet<TString>& subsetFields, TExprContext& ctx, bool singleValue);
+
+
+bool IsDependedImpl(const TExprNode* from, const TExprNode* to, TNodeMap<bool>& deps) {
+ if (from == to)
+ return true;
+
+ auto [it, inserted] = deps.emplace(from, false);
+ if (!inserted) {
+ return it->second;
+ }
+
+ for (const auto& child : from->Children()) {
+ if (IsDependedImpl(child.Get(), to, deps)) {
+ return it->second = true;
+ }
+ }
+
+ return false;
+}
+
+bool IsDepended(const TExprNode& from, const TExprNode& to) {
+ TNodeMap<bool> deps;
+ return IsDependedImpl(&from, &to, deps);
+}
+
+bool MarkDepended(const TExprNode& from, const TExprNode& to, TNodeMap<bool>& deps) {
+ return IsDependedImpl(&from, &to, deps);
+}
+
+bool IsEmpty(const TExprNode& node, const TTypeAnnotationContext& typeCtx) {
+ return typeCtx.IsConstraintCheckEnabled<TEmptyConstraintNode>() && node.Type() != TExprNode::Argument && node.GetConstraint<TEmptyConstraintNode>() != nullptr;
+}
+
+bool IsEmptyContainer(const TExprNode& node) {
+ return node.IsCallable({"EmptyList", "EmptyDict"})
+ || (1U == node.ChildrenSize() && node.IsCallable({"List", "Nothing", "EmptyIterator", "Dict", "EmptyFrom"}));
+}
+
+const TTypeAnnotationNode* RemoveOptionalType(const TTypeAnnotationNode* type) {
+ if (!type || type->GetKind() != ETypeAnnotationKind::Optional) {
+ return type;
+ }
+
+ return type->Cast<TOptionalExprType>()->GetItemType();
+}
+
+const TTypeAnnotationNode* RemoveAllOptionals(const TTypeAnnotationNode* type) {
+ while (type && type->GetKind() == ETypeAnnotationKind::Optional) {
+ type = type->Cast<TOptionalExprType>()->GetItemType();
+ }
+ return type;
+}
+
+TExprNode::TPtr GetSetting(const TExprNode& settings, const TStringBuf& name) {
+ for (auto& setting : settings.Children()) {
+ if (setting->ChildrenSize() != 0 && setting->Child(0)->Content() == name) {
+ return setting;
+ }
+ }
+ return nullptr;
+}
+
+TExprNode::TPtr FilterSettings(const TExprNode& settings, const THashSet<TStringBuf>& names, TExprContext& ctx) {
+ TExprNode::TListType children;
+ for (auto setting : settings.Children()) {
+ if (setting->ChildrenSize() != 0 && names.contains(setting->Head().Content())) {
+ children.push_back(setting);
+ }
+ }
+
+ return ctx.NewList(settings.Pos(), std::move(children));
+}
+
+bool HasSetting(const TExprNode& settings, const TStringBuf& name) {
+ return GetSetting(settings, name) != nullptr;
+}
+
+bool HasAnySetting(const TExprNode& settings, const THashSet<TString>& names) {
+ for (auto& setting : settings.Children()) {
+ if (setting->ChildrenSize() != 0 && names.contains(setting->Head().Content())) {
+ return true;
+ }
+ }
+ return false;
+}
+
+TExprNode::TPtr RemoveSetting(const TExprNode& settings, const TStringBuf& name, TExprContext& ctx) {
+ TExprNode::TListType children;
+ for (auto setting : settings.Children()) {
+ if (setting->ChildrenSize() != 0 && setting->Head().Content() == name) {
+ continue;
+ }
+
+ children.push_back(setting);
+ }
+
+ return ctx.NewList(settings.Pos(), std::move(children));
+}
+
+TExprNode::TPtr ReplaceSetting(const TExprNode& settings, TPositionHandle pos, const TString& name, const TExprNode::TPtr& value, TExprContext& ctx) {
+ auto newSetting = value ? ctx.NewList(pos, { ctx.NewAtom(pos, name), value }) : ctx.NewList(pos, { ctx.NewAtom(pos, name) });
+ return ReplaceSetting(settings, newSetting, ctx);
+}
+
+TExprNode::TPtr ReplaceSetting(const TExprNode& settings, const TExprNode::TPtr& newSetting, TExprContext& ctx) {
+ TExprNode::TListType newChildren;
+ const TStringBuf name = newSetting->Head().Content();
+ for (auto setting : settings.Children()) {
+ if (setting->ChildrenSize() != 0 && setting->Head().Content() == name) {
+ continue;
+ }
+
+ newChildren.push_back(setting);
+ }
+
+ newChildren.push_back(newSetting);
+
+ auto ret = ctx.NewList(settings.Pos(), std::move(newChildren));
+ return ret;
+}
+
+TExprNode::TPtr AddSetting(const TExprNode& settings, TPositionHandle pos, const TString& name, const TExprNode::TPtr& value, TExprContext& ctx) {
+ auto newSetting = value ? ctx.NewList(pos, { ctx.NewAtom(pos, name), value }) : ctx.NewList(pos, { ctx.NewAtom(pos, name) });
+ return AddSetting(settings, newSetting, ctx);
+}
+
+TExprNode::TPtr AddSetting(const TExprNode& settings, const TExprNode::TPtr& newSetting, TExprContext& ctx) {
+ auto newChildren = settings.ChildrenList();
+ newChildren.push_back(newSetting);
+
+ auto ret = ctx.NewList(settings.Pos(), std::move(newChildren));
+ return ret;
+}
+
+TExprNode::TPtr MergeSettings(const TExprNode& settings1, const TExprNode& settings2, TExprContext& ctx) {
+ auto newChildren = settings1.ChildrenList();
+ newChildren.insert(
+ newChildren.cend(),
+ settings2.Children().begin(),
+ settings2.Children().end());
+ auto ret = ctx.NewList(settings1.Pos(), std::move(newChildren));
+ return ret;
+}
+
+TMaybe<TIssue> ParseToDictSettings(const TExprNode& input, TExprContext& ctx, TMaybe<EDictType>& type, TMaybe<bool>& isMany, TMaybe<ui64>& itemsCount, bool& isCompact) {
+ isCompact = false;
+ auto settings = input.Child(3);
+ if (settings->Type() != TExprNode::List) {
+ return TIssue(ctx.GetPosition(settings->Pos()), TStringBuilder() << "Expected tuple, but got: " << input.Type());
+ }
+
+ for (auto& child : settings->Children()) {
+ if (child->Type() == TExprNode::Atom) {
+ if (child->Content() == "One") {
+ isMany = false;
+ }
+ else if (child->Content() == "Many") {
+ isMany = true;
+ }
+ else if (child->Content() == "Sorted") {
+ type = EDictType::Sorted;
+ }
+ else if (child->Content() == "Hashed") {
+ type = EDictType::Hashed;
+ }
+ else if (child->Content() == "Auto") {
+ type = EDictType::Auto;
+ }
+ else if (child->Content() == "Compact") {
+ isCompact = true;
+ }
+ else {
+ return TIssue(ctx.GetPosition(child->Pos()), TStringBuilder() << "Unsupported option: " << child->Content());
+ }
+ } else if (child->Type() == TExprNode::List) {
+ if (child->ChildrenSize() != 2) {
+ return TIssue(ctx.GetPosition(child->Pos()), TStringBuilder() << "Expected list with 2 elemenst");
+ }
+ if (child->Child(0)->Content() == "ItemsCount") {
+ ui64 count = 0;
+ if (TryFromString(child->Child(1)->Content(), count)) {
+ itemsCount = count;
+ } else {
+ return TIssue(ctx.GetPosition(child->Pos()), TStringBuilder() << "Bad 'ItemsCount' value: " << child->Child(1)->Content());
+ }
+ }
+ else {
+ return TIssue(ctx.GetPosition(child->Pos()), TStringBuilder() << "Bad option: " << child->Child(0)->Content());
+ }
+ } else {
+ return TIssue(ctx.GetPosition(child->Pos()), TStringBuilder() << "Expected atom or list, but got: " << input.Type());
+ }
+
+ }
+
+ if (!type || !isMany) {
+ return TIssue(ctx.GetPosition(input.Pos()), TStringBuilder() << "Both options must be specified: Sorted/Hashed/Auto and Many/One");
+ }
+
+ return TMaybe<TIssue>();
+}
+
+EDictType SelectDictType(EDictType type, const TTypeAnnotationNode* keyType) {
+ if (type != EDictType::Auto) {
+ return type;
+ }
+
+ if (keyType->IsHashable() && keyType->IsEquatable()) {
+ return EDictType::Hashed;
+ }
+
+ YQL_ENSURE(keyType->IsComparableInternal());
+ return EDictType::Sorted;
+}
+
+TExprNode::TPtr MakeSingleGroupRow(const TExprNode& aggregateNode, TExprNode::TPtr reduced, TExprContext& ctx) {
+ auto pos = aggregateNode.Pos();
+ auto aggregatedColumns = aggregateNode.Child(2);
+ auto opt = ctx.NewCallable(pos, "ToOptional", { reduced });
+ TExprNode::TListType finalRowNodes;
+ for (ui32 index = 0; index < aggregatedColumns->ChildrenSize(); ++index) {
+ auto column = aggregatedColumns->Child(index);
+ auto trait = column->Child(1);
+ auto defVal = trait->Child(7);
+
+ if (column->Child(0)->IsAtom()) {
+ finalRowNodes.push_back(ctx.Builder(pos)
+ .List()
+ .Atom(0, column->Child(0)->Content())
+ .Callable(1, "Coalesce")
+ .Callable(0, "Member")
+ .Add(0, opt)
+ .Atom(1, column->Child(0)->Content())
+ .Seal()
+ .Add(1, defVal)
+ .Seal()
+ .Seal()
+ .Build());
+ } else {
+ const auto& multiFields = column->Child(0)->Children();
+ for (ui32 field = 0; field < multiFields.size(); ++field) {
+ finalRowNodes.push_back(ctx.Builder(pos)
+ .List()
+ .Atom(0, multiFields[field]->Content())
+ .Callable(1, "Member")
+ .Add(0, opt)
+ .Atom(1, multiFields[field]->Content())
+ .Seal()
+ .Seal()
+ .Build());
+ }
+ }
+ }
+
+ return ctx.Builder(pos)
+ .Callable("AsList")
+ .Add(0, ctx.NewCallable(pos, "AsStruct", std::move(finalRowNodes)))
+ .Seal()
+ .Build();
+}
+
+bool UpdateStructMembers(TExprContext& ctx, const TExprNode::TPtr& node, const TStringBuf& goal, TExprNode::TListType& members, MemberUpdaterFunc updaterFunc, const TTypeAnnotationNode* nodeType) {
+ if (!nodeType) {
+ nodeType = node->GetTypeAnn();
+ Y_DEBUG_ABORT_UNLESS(nodeType || !"Unset node type for UpdateStructMembers");
+ }
+ bool filtered = false;
+ if (node->IsCallable("AsStruct")) {
+ YQL_CLOG(DEBUG, Core) << "Enumerate struct literal for " << goal;
+ for (ui32 i = 0; i < node->ChildrenSize(); ++i) {
+ const auto memberNode = node->ChildPtr(i);
+ const auto& memberName = memberNode->Child(0)->Content();
+ TString useName = TString(memberName);
+ if (updaterFunc && !updaterFunc(useName, memberNode->GetTypeAnn())) {
+ filtered = true;
+ continue;
+ }
+ if (useName == memberName) {
+ members.push_back(memberNode);
+ } else {
+ auto useNameNode = ctx.NewAtom(node->Pos(), useName);
+ members.push_back(ctx.NewList(node->Pos(), { useNameNode, memberNode->ChildPtr(1) }));
+ }
+ }
+ } else if (nodeType->GetKind() == ETypeAnnotationKind::Optional) {
+ YQL_CLOG(DEBUG, Core) << "Enumerate optional struct for " << goal;
+ auto itemType = nodeType->Cast<TOptionalExprType>()->GetItemType();
+ return UpdateStructMembers(ctx, node, goal, members, updaterFunc, itemType);
+ } else {
+ YQL_CLOG(DEBUG, Core) << "Enumerate struct object for " << goal;
+ auto structType = nodeType->Cast<TStructExprType>();
+ for (auto item : structType->GetItems()) {
+ TString useName = TString(item->GetName());
+ if (updaterFunc && !updaterFunc(useName, item->GetItemType())) {
+ filtered = true;
+ continue;
+ }
+ members.push_back(ctx.Builder(node->Pos())
+ .List()
+ .Atom(0, useName)
+ .Callable(1, "Member")
+ .Add(0, node)
+ .Atom(1, item->GetName())
+ .Seal()
+ .Seal()
+ .Build());
+ }
+ }
+ return filtered;
+}
+
+TExprNode::TPtr ExpandRemoveMember(const TExprNode::TPtr& node, TExprContext& ctx) {
+ const bool force = node->Content() == "ForceRemoveMember";
+ const auto& removeMemberName = node->Child(1)->Content();
+ MemberUpdaterFunc removeFunc = [&removeMemberName](TString& memberName, const TTypeAnnotationNode*) {
+ return memberName != removeMemberName;
+ };
+ TExprNode::TListType members;
+ if (!UpdateStructMembers(ctx, node->ChildPtr(0), node->Content(), members, removeFunc)) {
+ if (!force) {
+ YQL_ENSURE(false, "Unexpected member name: " << removeMemberName);
+ }
+ return node->ChildPtr(0);
+ }
+ return ctx.NewCallable(node->Pos(), "AsStruct", std::move(members));
+}
+
+TExprNode::TPtr ExpandRemoveMembers(const TExprNode::TPtr& node, TExprContext& ctx) {
+ const auto& membersToRemove = node->Child(1);
+ MemberUpdaterFunc removeFunc = [&membersToRemove](TString& memberName, const TTypeAnnotationNode*) {
+ for (const auto& x : membersToRemove->Children()) {
+ if (memberName == x->Content()) {
+ return false;
+ }
+ }
+
+ return true;
+ };
+
+ TExprNode::TListType members;
+ if (!UpdateStructMembers(ctx, node->ChildPtr(0), node->Content(), members, removeFunc)) {
+ return node->ChildPtr(0);
+ }
+ return ctx.NewCallable(node->Pos(), "AsStruct", std::move(members));
+}
+
+TExprNode::TPtr ExpandRemovePrefixMembers(const TExprNode::TPtr& node, TExprContext& ctx) {
+ YQL_CLOG(DEBUG, Core) << "Expand " << node->Content();
+
+ const TTypeAnnotationNode* targetItemType = GetSeqItemType(node->GetTypeAnn());
+ bool isSequence = true;
+ if (!targetItemType) {
+ targetItemType = node->GetTypeAnn();
+ isSequence = false;
+ }
+
+ auto rebuildStruct = [&](const TExprNode::TPtr& srcStruct, const TStructExprType* targetType) -> TExprNode::TPtr {
+ TExprNode::TListType nonSystemMembers;
+ for (auto item : targetType->GetItems()) {
+ nonSystemMembers.push_back(
+ Build<TCoNameValueTuple>(ctx, srcStruct->Pos())
+ .Name()
+ .Value(item->GetName())
+ .Build()
+ .Value<TCoMember>()
+ .Struct(srcStruct)
+ .Name()
+ .Value(item->GetName())
+ .Build()
+ .Build()
+ .Done().Ptr()
+ );
+ }
+ return Build<TCoAsStruct>(ctx, srcStruct->Pos())
+ .Add(std::move(nonSystemMembers))
+ .Done()
+ .Ptr();
+ };
+
+ if (targetItemType->GetKind() == ETypeAnnotationKind::Struct) {
+ if (isSequence) {
+ TExprNode::TListType nonSystemMembers;
+ for (auto item : targetItemType->Cast<TStructExprType>()->GetItems()) {
+ nonSystemMembers.push_back(ctx.NewAtom(node->Pos(), item->GetName()));
+ }
+
+ return Build<TCoExtractMembers>(ctx, node->Pos())
+ .Input(node->HeadPtr())
+ .Members()
+ .Add(nonSystemMembers)
+ .Build()
+ .Done().Ptr();
+ }
+
+ return rebuildStruct(node->HeadPtr(), targetItemType->Cast<TStructExprType>());
+ }
+
+ YQL_ENSURE(targetItemType->GetKind() == ETypeAnnotationKind::Variant);
+
+ const auto targetUnderType = targetItemType->Cast<TVariantExprType>()->GetUnderlyingType();
+ TExprNode::TListType variants;
+ TTypeAnnotationNode::TListType types;
+ switch (targetUnderType->GetKind()) {
+ case ETypeAnnotationKind::Tuple: {
+ types = targetUnderType->Cast<TTupleExprType>()->GetItems();
+ variants.resize(types.size());
+ for (ui32 i = 0U; i < variants.size(); ++i) {
+ variants[i] = ctx.NewAtom(node->Pos(), ToString(i), TNodeFlags::Default);
+ }
+ break;
+ }
+ case ETypeAnnotationKind::Struct: {
+ const auto& items = targetUnderType->Cast<TStructExprType>()->GetItems();
+ types.resize(items.size());
+ variants.resize(items.size());
+ for (ui32 i = 0U; i < items.size(); ++i) {
+ types[i] = items[i]->GetItemType();
+ variants[i] = ctx.NewAtom(node->Pos(), items[i]->GetName());
+ }
+ break;
+ }
+ default: break;
+ }
+ const auto type = ExpandType(node->Pos(), *targetItemType, ctx);
+
+ if (isSequence) {
+ return ctx.Builder(node->Pos())
+ .Callable("Map")
+ .Add(0, node->HeadPtr())
+ .Lambda(1)
+ .Param("varItem")
+ .Callable("Visit")
+ .Arg(0, "varItem")
+ .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
+ for (ui32 i = 0U; i < variants.size(); ++i) {
+ parent.Add(1 + 2 * i, variants[i]);
+ parent.Lambda(2 + 2 * i)
+ .Param("item")
+ .Callable("Variant")
+ .Callable(0, "AsStruct")
+ .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
+ auto targetStructType = types[i]->Cast<TStructExprType>();
+ for (size_t j = 0; j < targetStructType->GetItems().size(); ++j) {
+ auto name = targetStructType->GetItems()[j]->GetName();
+ parent.List(j)
+ .Atom(0, name)
+ .Callable(1, "Member")
+ .Arg(0, "item")
+ .Atom(1, name)
+ .Seal()
+ .Seal();
+ }
+ return parent;
+ })
+ .Seal()
+ .Add(1, std::move(variants[i]))
+ .Add(2, type)
+ .Seal()
+ .Seal();
+ }
+ return parent;
+ })
+ .Seal()
+ .Seal()
+ .Seal()
+ .Build();
+ }
+
+ return ctx.Builder(node->Pos())
+ .Callable("Visit")
+ .Add(0, node->HeadPtr())
+ .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
+ for (ui32 i = 0U; i < variants.size(); ++i) {
+ parent.Add(1 + 2 * i, variants[i]);
+ parent.Lambda(2 + 2 * i)
+ .Param("item")
+ .Callable("Variant")
+ .Callable(0, "AsStruct")
+ .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
+ auto targetStructType = types[i]->Cast<TStructExprType>();
+ for (size_t j = 0; j < targetStructType->GetItems().size(); ++j) {
+ auto name = targetStructType->GetItems()[j]->GetName();
+ parent.List(j)
+ .Atom(0, name)
+ .Callable(1, "Member")
+ .Arg(0, "item")
+ .Atom(1, name)
+ .Seal()
+ .Seal();
+ }
+ return parent;
+ })
+ .Seal()
+ .Add(1, std::move(variants[i]))
+ .Add(2, type)
+ .Seal()
+ .Seal();
+ }
+ return parent;
+ })
+ .Seal()
+ .Build();
+}
+
+TExprNode::TPtr ExpandFlattenMembers(const TExprNode::TPtr& node, TExprContext& ctx) {
+ TExprNode::TListType members;
+ for (auto& child : node->Children()) {
+ auto prefix = child->Child(0)->Content();
+ MemberUpdaterFunc addPrefixFunc = [&prefix](TString& memberName, const TTypeAnnotationNode*) {
+ memberName = prefix + memberName;
+ return true;
+ };
+ UpdateStructMembers(ctx, child->ChildPtr(1), "FlattenMembers", members, addPrefixFunc);
+ }
+ return ctx.NewCallable(node->Pos(), "AsStruct", std::move(members));
+}
+
+TExprNode::TPtr ExpandFlattenStructs(const TExprNode::TPtr& node, TExprContext& ctx) {
+ TExprNode::TListType members;
+ auto structObj = node->Child(0);
+ for (auto& x : structObj->GetTypeAnn()->Cast<TStructExprType>()->GetItems()) {
+ auto subMember = ctx.Builder(node->Pos())
+ .Callable("Member")
+ .Add(0, structObj)
+ .Atom(1, x->GetName())
+ .Seal()
+ .Build();
+
+ auto itemType = x->GetItemType();
+ if (itemType->GetKind() == ETypeAnnotationKind::Optional) {
+ itemType = itemType->Cast<TOptionalExprType>()->GetItemType();
+ }
+
+ if (itemType->GetKind() != ETypeAnnotationKind::Struct) {
+ members.push_back(ctx.Builder(node->Pos())
+ .List()
+ .Atom(0, x->GetName())
+ .Add(1, subMember)
+ .Seal()
+ .Build());
+ continue;
+ }
+
+ UpdateStructMembers(ctx, subMember, "FlattenStructs", members, {}, x->GetItemType());
+ }
+ return ctx.NewCallable(node->Pos(), "AsStruct", std::move(members));
+}
+
+TExprNode::TPtr ExpandDivePrefixMembers(const TExprNode::TPtr& node, TExprContext& ctx) {
+ auto prefixes = node->Child(1)->Children();
+ MemberUpdaterFunc filterAndCutByPrefixFunc = [&prefixes](TString& memberName, const TTypeAnnotationNode*) {
+ for (const auto& p : prefixes) {
+ auto prefix = p->Content();
+ if (memberName.StartsWith(prefix)) {
+ memberName = memberName.substr(prefix.length());
+ return true;
+ }
+ }
+ return false;
+ };
+ TExprNode::TListType members;
+ UpdateStructMembers(ctx, node->ChildPtr(0), "DivePrefixMembers", members, filterAndCutByPrefixFunc);
+ auto ret = ctx.NewCallable(node->Pos(), "AsStruct", std::move(members));
+ return ret;
+}
+
+TExprNode::TPtr ExpandAddMember(const TExprNode::TPtr& node, TExprContext& ctx) {
+ TExprNode::TListType members;
+ UpdateStructMembers(ctx, node->ChildPtr(0), "AddMember", members);
+ members.push_back(ctx.NewList(node->Pos(), { node->ChildPtr(1), node->ChildPtr(2) }));
+ return ctx.NewCallable(node->Pos(), "AsStruct", std::move(members));
+}
+
+TExprNode::TPtr ExpandReplaceMember(const TExprNode::TPtr& node, TExprContext& ctx) {
+ const auto& removeMemberName = node->Child(1)->Content();
+ MemberUpdaterFunc cloneFunc = [&removeMemberName](TString& memberName, const TTypeAnnotationNode*) {
+ return memberName != removeMemberName;
+ };
+ TExprNode::TListType members;
+ UpdateStructMembers(ctx, node->ChildPtr(0), "ReplaceMember", members, cloneFunc);
+ members.push_back(ctx.NewList(node->Pos(), { node->ChildPtr(1), node->ChildPtr(2) }));
+ auto ret = ctx.NewCallable(node->Pos(), "AsStruct", std::move(members));
+ return ret;
+}
+
+TExprNode::TPtr ExpandFlattenByColumns(const TExprNode::TPtr& node, TExprContext& ctx) {
+ ui32 shift = 0;
+ TStringBuf mode = "auto";
+ if (node->Child(0)->IsAtom()) {
+ mode = node->Child(0)->Content();
+ shift = 1;
+ }
+
+ auto structObj = node->ChildPtr(shift);
+ TExprNode::TListType members;
+ struct TFlattenInfo {
+ const TTypeAnnotationNode* Type = nullptr;
+ TExprNode::TPtr ArgNode = nullptr;
+ TExprNode::TPtr ListMember = nullptr;
+ TExprNode::TPtr ColumnNode = nullptr;
+ bool ReplaceName = false;
+ };
+ TVector<TFlattenInfo> flattens;
+ TMap<TString, size_t> name2Info;
+ size_t flattenIndex = 0;
+ for (auto iter = node->Children().begin() + 1 + shift; iter != node->Children().end(); ++iter) {
+ auto flattenColumnNode = *iter;
+ const bool haveAlias = flattenColumnNode->ChildrenSize() == 2;
+ const TString flattenItemName = (haveAlias) ? TString(flattenColumnNode->Child(1)->Content()) : TString(flattenColumnNode->Content());
+ TFlattenInfo flattenInfo;
+ if (haveAlias) {
+ flattenColumnNode = flattenColumnNode->Child(0);
+ }
+ else {
+ flattenInfo.ReplaceName = true;
+ }
+ const auto useNameNode = ctx.NewAtom(node->Pos(), flattenItemName);
+ flattenInfo.ArgNode = ctx.NewArgument(node->Pos(), flattenItemName);
+ flattenInfo.ColumnNode = flattenColumnNode;
+ members.push_back(ctx.NewList(node->Pos(), { useNameNode, flattenInfo.ArgNode }));
+ flattens.emplace_back(std::move(flattenInfo));
+ name2Info.insert({ TString(flattenColumnNode->Content()), flattenIndex++ });
+ }
+ MemberUpdaterFunc removeAndInfosUpdateFunc = [&flattens, &name2Info](TString& memberName, const TTypeAnnotationNode* type) {
+ const auto iter = name2Info.find(memberName);
+ if (iter == name2Info.end()) {
+ return true;
+ }
+ TFlattenInfo& flattenInfo = flattens[iter->second];
+ flattenInfo.Type = type;
+ return !flattenInfo.ReplaceName;
+ };
+ UpdateStructMembers(ctx, structObj, "FlattenByColumns", members, removeAndInfosUpdateFunc);
+ auto internalResult = ctx.NewCallable(node->Pos(), "AsStruct", std::move(members));
+ TDeque<const TFlattenInfo*> flattenPriority;
+ for (auto& flattenInfo : flattens) {
+ bool isDict = false;
+ bool isList = false;
+ flattenInfo.ListMember = ctx.Builder(structObj->Pos())
+ .Callable("Member")
+ .Add(0, structObj)
+ .Add(1, flattenInfo.ColumnNode)
+ .Seal().Build();
+ if (mode == "list" && flattenInfo.Type->GetKind() == ETypeAnnotationKind::Optional) {
+ isList = true;
+ flattenInfo.ListMember = ctx.Builder(structObj->Pos())
+ .Callable("Coalesce")
+ .Add(0, flattenInfo.ListMember)
+ .Callable(1, "List")
+ .Add(0, ExpandType(structObj->Pos(), *flattenInfo.Type->Cast<TOptionalExprType>()->GetItemType(), ctx))
+ .Seal()
+ .Seal()
+ .Build();
+ } else if (mode == "dict" && flattenInfo.Type->GetKind() == ETypeAnnotationKind::Optional) {
+ isDict = true;
+ flattenInfo.ListMember = ctx.Builder(structObj->Pos())
+ .Callable("Coalesce")
+ .Add(0, flattenInfo.ListMember)
+ .Callable(1, "Dict")
+ .Add(0, ExpandType(structObj->Pos(), *flattenInfo.Type->Cast<TOptionalExprType>()->GetItemType(), ctx))
+ .Seal()
+ .Seal()
+ .Build();
+ } else {
+ isList = flattenInfo.Type->GetKind() == ETypeAnnotationKind::List;
+ isDict = flattenInfo.Type->GetKind() == ETypeAnnotationKind::Dict;
+ }
+
+ if (isDict) {
+ flattenInfo.ListMember = ctx.Builder(structObj->Pos())
+ .Callable("DictItems")
+ .Add(0, flattenInfo.ListMember)
+ .Seal().Build();
+ }
+
+ if (!isDict && !isList) {
+ flattenPriority.push_back(&flattenInfo);
+ } else {
+ flattenPriority.push_front(&flattenInfo);
+ }
+ }
+
+ TString operation = "OrderedMap";
+ for (const auto& infoPtr : flattenPriority) {
+ const TFlattenInfo& flattenInfo = *infoPtr;
+ auto mapLambda = ctx.NewLambda(node->Pos(), ctx.NewArguments(node->Pos(), { flattenInfo.ArgNode }), std::move(internalResult));
+ internalResult = ctx.Builder(node->Pos())
+ .Callable(operation)
+ .Add(0, flattenInfo.ListMember)
+ .Add(1, mapLambda)
+ .Seal().Build();
+ operation = "OrderedFlatMap";
+ }
+ return internalResult;
+}
+
+TExprNode::TPtr ExpandCastStruct(const TExprNode::TPtr& node, TExprContext& ctx) {
+ YQL_CLOG(DEBUG, Core) << "Expand " << node->Content();
+ auto targetType = node->Tail().GetTypeAnn()->Cast<TTypeExprType>()->GetType()->Cast<TStructExprType>();
+ TExprNode::TListType items;
+ for (auto item : targetType->GetItems()) {
+ auto nameAtom = ctx.NewAtom(node->Pos(), item->GetName());
+ auto tuple = ctx.NewList(node->Pos(), {
+ nameAtom, ctx.NewCallable(node->Pos(), "Member", { node->HeadPtr(), nameAtom }) });
+ items.push_back(std::move(tuple));
+ }
+
+ return ctx.NewCallable(node->Pos(), "AsStruct", std::move(items));
+}
+
+TExprNode::TListType GetOptionals(const TPositionHandle& pos, const TStructExprType& type, TExprContext& ctx) {
+ TExprNode::TListType result;
+ for (const auto& item : type.GetItems())
+ if (ETypeAnnotationKind::Optional == item->GetItemType()->GetKind())
+ result.emplace_back(ctx.NewAtom(pos, item->GetName()));
+ return result;
+}
+
+TExprNode::TListType GetOptionals(const TPositionHandle& pos, const TTupleExprType& type, TExprContext& ctx) {
+ TExprNode::TListType result;
+ if (const auto& items = type.GetItems(); !items.empty())
+ for (ui32 i = 0U; i < items.size(); ++i)
+ if (ETypeAnnotationKind::Optional == items[i]->GetKind())
+ result.emplace_back(ctx.NewAtom(pos, i));
+ return result;
+}
+
+TExprNode::TPtr ExpandSkipNullFields(const TExprNode::TPtr& node, TExprContext& ctx) {
+ YQL_ENSURE(node->IsCallable({"SkipNullMembers", "SkipNullElements"}));
+ YQL_CLOG(DEBUG, Core) << "Expand " << node->Content();
+ const bool isTuple = node->IsCallable("SkipNullElements");
+ TExprNode::TListType fields;
+ if (node->ChildrenSize() > 1) {
+ fields = node->Child(1)->ChildrenList();
+ } else if (isTuple) {
+ fields = GetOptionals(node->Pos(), *GetSeqItemType(node->Head().GetTypeAnn())->Cast<TTupleExprType>(), ctx);
+ } else {
+ fields = GetOptionals(node->Pos(), *GetSeqItemType(node->Head().GetTypeAnn())->Cast<TStructExprType>(), ctx);
+ }
+ if (fields.empty()) {
+ return node->HeadPtr();
+ }
+ return ctx.Builder(node->Pos())
+ .Callable("OrderedFilter")
+ .Add(0, node->HeadPtr())
+ .Lambda(1)
+ .Param("item")
+ .Callable("And")
+ .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
+ for (ui32 i = 0U; i < fields.size(); ++i) {
+ parent
+ .Callable(i, "Exists")
+ .Callable(0, isTuple ? "Nth" : "Member")
+ .Arg(0, "item")
+ .Add(1, std::move(fields[i]))
+ .Seal()
+ .Seal();
+ }
+ return parent;
+ })
+ .Seal()
+ .Seal()
+ .Seal().Build();
+}
+
+void ExtractSimpleKeys(const TExprNode* keySelectorBody, const TExprNode* keySelectorArg, TVector<TStringBuf>& columns) {
+ if (keySelectorBody->IsList()) {
+ for (auto& child: keySelectorBody->Children()) {
+ if (child->IsCallable("Member") && child->Child(0) == keySelectorArg) {
+ columns.push_back(child->Child(1)->Content());
+ } else {
+ break;
+ }
+ }
+ } else if (keySelectorBody->IsCallable("Member") && keySelectorBody->Child(0) == keySelectorArg) {
+ columns.push_back(keySelectorBody->Child(1)->Content());
+ }
+}
+
+const TExprNode& SkipCallables(const TExprNode& node, const std::initializer_list<std::string_view>& skipCallables) {
+ const TExprNode* p = &node;
+ while (p->IsCallable(skipCallables)) {
+ p = &p->Head();
+ }
+ return *p;
+}
+
+namespace {
+TExprNode::TPtr ApplyWithCastStructForFirstArg(const TExprNode::TPtr& node, const TTypeAnnotationNode& targetType, TExprContext& ctx) {
+ YQL_ENSURE(node->IsLambda());
+ TCoLambda lambda(node);
+ YQL_ENSURE(lambda.Args().Size() >= 1);
+
+ TPositionHandle pos = lambda.Pos();
+
+ TExprNodeList args = lambda.Args().Ref().ChildrenList();
+ TExprNode::TPtr body = lambda.Body().Ptr();
+
+ auto newArg = ctx.NewArgument(pos, "row");
+ auto cast = ctx.NewCallable(pos, "CastStruct", { newArg, ExpandType(pos, targetType, ctx) });
+
+ body = ctx.ReplaceNodes(std::move(body), {{ args.front().Get(), cast }});
+ args.front() = newArg;
+
+ auto result = ctx.NewLambda(pos, ctx.NewArguments(pos, std::move(args)), std::move(body));
+ return ctx.DeepCopyLambda(*result);
+}
+
+}
+
+void ExtractSortKeyAndOrder(TPositionHandle pos, const TExprNode::TPtr& sortTraitsNode, TSortParams& sortParams, TExprContext& ctx)
+{
+ ExtractSortKeyAndOrder(pos, sortTraitsNode, sortParams.Key, sortParams.Order, ctx);
+}
+
+void ExtractSortKeyAndOrder(TPositionHandle pos, const TExprNode::TPtr& sortTraitsNode, TExprNode::TPtr& sortKey, TExprNode::TPtr& sortOrder, TExprContext& ctx) {
+ if (sortTraitsNode->IsCallable("SortTraits")) {
+ TCoSortTraits sortTraits(sortTraitsNode);
+ auto lambdaInputType =
+ sortTraits.ListType().Ref().GetTypeAnn()->Cast<TTypeExprType>()->GetType()->Cast<TListExprType>()->GetItemType();
+ sortOrder = sortTraits.SortDirections().Ptr();
+ sortKey = ApplyWithCastStructForFirstArg(sortTraits.SortKeySelectorLambda().Ptr(), *lambdaInputType, ctx);
+ } else {
+ YQL_ENSURE(sortTraitsNode->IsCallable("Void"));
+ sortOrder = sortKey = ctx.NewCallable(pos, "Void", {});
+ }
+}
+
+void ExtractSessionWindowParams(TPositionHandle pos, TSessionWindowParams& sessionParams, TExprContext& ctx)
+{
+ ExtractSessionWindowParams(pos, sessionParams.Traits, sessionParams.Key,
+ sessionParams.KeyType, sessionParams.ParamsType, sessionParams.SortTraits, sessionParams.Init,
+ sessionParams.Update, ctx);
+}
+
+void ExtractSessionWindowParams(TPositionHandle pos, const TExprNode::TPtr& sessionTraits, TExprNode::TPtr& sessionKey,
+ const TTypeAnnotationNode*& sessionKeyType, const TTypeAnnotationNode*& sessionParamsType, TExprNode::TPtr& sessionSortTraits, TExprNode::TPtr& sessionInit,
+ TExprNode::TPtr& sessionUpdate, TExprContext& ctx)
+{
+ sessionKey = sessionSortTraits = sessionInit = sessionUpdate = {};
+ sessionKeyType = nullptr;
+ sessionParamsType = nullptr;
+
+ if (sessionTraits && sessionTraits->IsCallable("SessionWindowTraits")) {
+ TCoSessionWindowTraits swt(sessionTraits);
+ auto lambdaInputType =
+ swt.ListType().Ref().GetTypeAnn()->Cast<TTypeExprType>()->GetType()->Cast<TListExprType>()->GetItemType();
+
+ sessionKeyType = swt.Calculate().Ref().GetTypeAnn();
+
+ TVector<const TItemExprType*> sessionParamItems;
+ sessionParamItems.push_back(ctx.MakeType<TItemExprType>("start", sessionKeyType));
+ sessionParamItems.push_back(ctx.MakeType<TItemExprType>("state", swt.InitState().Ref().GetTypeAnn()));
+ sessionParamsType = ctx.MakeType<TStructExprType>(sessionParamItems);
+
+ sessionSortTraits = swt.SortSpec().Ptr();
+ sessionKey = ApplyWithCastStructForFirstArg(swt.Calculate().Ptr(), *lambdaInputType, ctx);
+ sessionInit = ApplyWithCastStructForFirstArg(swt.InitState().Ptr(), *lambdaInputType, ctx);
+ sessionUpdate = ApplyWithCastStructForFirstArg(swt.UpdateState().Ptr(), *lambdaInputType, ctx);
+ } else {
+ YQL_ENSURE(!sessionTraits || sessionTraits->IsCallable("Void"));
+ sessionSortTraits = ctx.NewCallable(pos, "Void", {});
+ }
+}
+
+TExprNode::TPtr BuildKeySelector(TPositionHandle pos, const TStructExprType& rowType, const TExprNode::TPtr& keyColumns, TExprContext& ctx) {
+ auto keyExtractorArg = ctx.NewArgument(pos, "item");
+ TExprNode::TListType tupleItems;
+ for (auto column : keyColumns->Children()) {
+ auto itemType = rowType.GetItems()[*rowType.FindItem(column->Content())]->GetItemType();
+ auto keyValue = ctx.NewCallable(pos, "Member", { keyExtractorArg, column });
+ if (RemoveOptionalType(itemType)->GetKind() != ETypeAnnotationKind::Data) {
+ keyValue = ctx.NewCallable(pos, "StablePickle", { keyValue });
+ }
+
+ tupleItems.push_back(keyValue);
+ }
+
+ TExprNode::TPtr tuple;
+ if (tupleItems.size() == 0) {
+ tuple = ctx.Builder(pos).Callable("Uint32").Atom(0, 0U).Seal().Build();
+ } else if (tupleItems.size() == 1) {
+ tuple = tupleItems[0];
+ } else {
+ tuple = ctx.NewList(pos, std::move(tupleItems));
+ }
+ return ctx.NewLambda(pos, ctx.NewArguments(pos, {keyExtractorArg}), std::move(tuple));
+}
+
+template <bool Cannonize, bool EnableNewOptimizers>
+TExprNode::TPtr OptimizeIfPresent(const TExprNode::TPtr& node, TExprContext& ctx) {
+ auto optionals = node->ChildrenList();
+ optionals.resize(optionals.size() - 2U);
+ const auto& lambda = *node->Child(optionals.size());
+ if (lambda.Tail().GetDependencyScope()->second != &lambda) {
+ YQL_CLOG(DEBUG, Core) << node->Content() << " where then branch isn't depended on optional";
+ if (1U == optionals.size()) {
+ return ctx.Builder(node->Pos())
+ .Callable("If")
+ .Callable(0, "Exists")
+ .Add(0, std::move(optionals.front()))
+ .Seal()
+ .Add(1, lambda.TailPtr())
+ .Add(2, node->TailPtr())
+ .Seal().Build();
+ }
+
+ std::for_each(optionals.begin(), optionals.end(), [&ctx](TExprNode::TPtr& node) {
+ const auto p = node->Pos();
+ node = ctx.NewCallable(p, "Exists", {std::move(node)});
+ });
+ return ctx.Builder(node->Pos())
+ .Callable("If")
+ .Callable(0, "And")
+ .Add(std::move(optionals))
+ .Seal()
+ .Add(1, lambda.TailPtr())
+ .Add(2, node->TailPtr())
+ .Seal().Build();
+ }
+
+ if (std::any_of(optionals.cbegin(), optionals.cend(), [](const TExprNode::TPtr& node) { return node->IsCallable({"Nothing","EmptyFrom"}); })) {
+ YQL_CLOG(DEBUG, Core) << node->Content() << " over Nothing.";
+ return node->TailPtr();
+ }
+
+ if (std::any_of(optionals.cbegin(), optionals.cend(), [](const TExprNode::TPtr& node) { return node->IsCallable("Just"); })) {
+ YQL_CLOG(DEBUG, Core) << node->Content() << " over Just.";
+ TExprNode::TListType args;
+ args.reserve(optionals.size());
+ std::for_each(optionals.begin(), optionals.end(), [&args](TExprNode::TPtr& arg) {
+ if (arg->IsCallable("Just"))
+ arg = arg->HeadPtr();
+ else
+ args.emplace_back(std::move(arg));
+ });
+
+ if (args.empty()) {
+ TNodeOnNodeOwnedMap replaces(optionals.size());
+ for (ui32 i = 0U; i < optionals.size(); ++i)
+ replaces.emplace(lambda.Head().Child(i), std::move(optionals[i]));
+ return ctx.ReplaceNodes(lambda.TailPtr(), replaces);
+ }
+
+ auto simplify = ctx.Builder(node->Pos())
+ .Lambda()
+ .Params("items", args.size())
+ .Apply(lambda)
+ .Do([&](TExprNodeReplaceBuilder& parent) -> TExprNodeReplaceBuilder& {
+ for (auto i = 0U, j = 0U; i < optionals.size(); ++i) {
+ if (auto opt = std::move(optionals[i]))
+ parent.With(i, std::move(opt));
+ else
+ parent.With(i, "items", j++);
+ }
+ return parent;
+ })
+ .Seal()
+ .Seal().Build();
+
+ args.emplace_back(std::move(simplify));
+ args.emplace_back(node->TailPtr());
+ return ctx.ChangeChildren(*node, std::move(args));
+ }
+
+ if (3U == node->ChildrenSize()) {
+ if (&lambda.Tail() == &lambda.Head().Head()) {
+ YQL_CLOG(DEBUG, Core) << "Simplify " << node->Content() << " as Coalesce";
+ return ctx.NewCallable(node->Pos(), "Coalesce", {node->HeadPtr(), node->TailPtr()});
+ }
+
+ if (const auto& input = node->Head(); IsTransparentIfPresent(input)) {
+ YQL_CLOG(DEBUG, Core) << node->Content() << " over transparent " << input.Content();
+ return ctx.Builder(node->Pos())
+ .Callable(node->Content())
+ .Add(0, input.HeadPtr())
+ .Lambda(1)
+ .Param("item")
+ .Apply(*node->Child(1))
+ .With(0)
+ .ApplyPartial(input.Child(1)->HeadPtr(), input.Child(1)->Tail().HeadPtr())
+ .With(0, "item")
+ .Seal()
+ .Done()
+ .Seal()
+ .Seal()
+ .Add(2, node->TailPtr())
+ .Seal().Build();
+ }
+
+ if (lambda.Tail().IsCallable({"SafeCast", "StrictCast"}) && node->Tail().IsCallable({"Nothing","EmptyFrom"}) && &lambda.Tail().Head() == &lambda.Head().Head() &&
+ ETypeAnnotationKind::Optional != node->Head().GetTypeAnn()->Cast<TOptionalExprType>()->GetItemType()->GetKind()) {
+ YQL_CLOG(DEBUG, Core) << "Drop " << node->Content() << " with " << lambda.Tail().Content() << " and " << node->Tail().Content();
+ return ctx.ChangeChild(lambda.Tail(), 0U, node->HeadPtr());
+ }
+
+ if constexpr (Cannonize) {
+ if (node->Tail().IsCallable({"Nothing","EmptyFrom"}) && node->Tail().GetTypeAnn()->GetKind() != ETypeAnnotationKind::Pg) {
+ YQL_CLOG(DEBUG, Core) << node->Content() << " with else " << node->Tail().Content();
+ return ctx.NewCallable(node->Pos(), "FlatMap", { node->HeadPtr(), node->ChildPtr(1) });
+ }
+ }
+ }
+
+ if constexpr (!Cannonize && EnableNewOptimizers) {
+ if (lambda.Tail().IsCallable("IfPresent") && &node->Tail() == &lambda.Tail().Tail() && lambda.Tail().Head().GetDependencyScope()->second != &lambda) {
+ auto innerOptionals = lambda.Tail().ChildrenList();
+ innerOptionals.resize(innerOptionals.size() - 2U);
+ const auto find = std::find_if(innerOptionals.cbegin(), innerOptionals.cend(), [l = &lambda] (const TExprNode::TPtr& child) {
+ return child->GetDependencyScope()->second == l;
+ });
+ const auto count = std::distance(innerOptionals.cbegin(), find);
+ YQL_CLOG(DEBUG, Core) << node->Content() << " pull " << count << " arg(s) from inner " << lambda.Tail().Content();
+
+ auto& innerLambda = *lambda.Tail().Child(innerOptionals.size());
+
+ auto args = lambda.Head().ChildrenList();
+ auto innerArgs = innerLambda.Head().ChildrenList();
+
+ const auto splitter = [count](TExprNode::TListType& src, TExprNode::TListType& dst) {
+ dst.reserve(dst.size() + count);
+ auto it = src.begin();
+ std::advance(it, count);
+ std::move(src.begin(), it, std::back_inserter(dst));
+ src.erase(src.cbegin(), it);
+ };
+
+ splitter(innerArgs, args);
+ splitter(innerOptionals, optionals);
+
+ auto body = innerLambda.TailPtr();
+ if (!(innerArgs.empty() || innerOptionals.empty())) {
+ innerOptionals.emplace_back(ctx.DeepCopyLambda(*ctx.NewLambda(innerLambda.Pos(), ctx.NewArguments(innerLambda.Head().Pos(), std::move(innerArgs)), std::move(body))));
+ innerOptionals.emplace_back(lambda.Tail().TailPtr());
+ body = ctx.ChangeChildren(lambda.Tail(), std::move(innerOptionals));
+ }
+
+ optionals.emplace_back(ctx.DeepCopyLambda(*ctx.NewLambda(lambda.Pos(), ctx.NewArguments(lambda.Head().Pos(), std::move(args)), std::move(body))));
+ optionals.emplace_back(node->TailPtr());
+ return ctx.ChangeChildren(*node, std::move(optionals));
+ }
+ }
+
+ return node;
+}
+
+template TExprNode::TPtr OptimizeIfPresent<true, true>(const TExprNode::TPtr& node, TExprContext& ctx);
+template TExprNode::TPtr OptimizeIfPresent<false, true>(const TExprNode::TPtr& node, TExprContext& ctx);
+template TExprNode::TPtr OptimizeIfPresent<false, false>(const TExprNode::TPtr& node, TExprContext& ctx);
+
+TExprNode::TPtr OptimizeExists(const TExprNode::TPtr& node, TExprContext& ctx) {
+ if (HasError(node->Head().GetTypeAnn(), ctx)) {
+ return TExprNode::TPtr();
+ }
+
+ if (node->Head().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Void) {
+ YQL_CLOG(DEBUG, Core) << node->Content() << " over " << node->Head().Content();
+ return MakeBool<false>(node->Pos(), ctx);
+ }
+
+ if (node->Head().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Null) {
+ YQL_CLOG(DEBUG, Core) << node->Content() << " over " << node->Head().Content();
+ return MakeBool<false>(node->Pos(), ctx);
+ }
+
+ if (node->Head().IsCallable({"Just", "PgConst"})) {
+ YQL_CLOG(DEBUG, Core) << node->Content() << " over " << node->Head().Content();
+ return MakeBool<true>(node->Pos(), ctx);
+ }
+
+ if (node->Head().IsCallable({"Nothing","EmptyFrom"})) {
+ YQL_CLOG(DEBUG, Core) << node->Content() << " over " << node->Head().Content();
+ return MakeBool<false>(node->Pos(), ctx);
+ }
+
+ if (node->Head().GetTypeAnn()->GetKind() != ETypeAnnotationKind::Optional &&
+ node->Head().GetTypeAnn()->GetKind() != ETypeAnnotationKind::Pg) {
+ YQL_CLOG(DEBUG, Core) << node->Content() << " over non-optional";
+ return MakeBool<true>(node->Pos(), ctx);
+ }
+
+ if (const auto& input = node->Head(); IsTransparentIfPresent(input)) {
+ YQL_CLOG(DEBUG, Core) << node->Content() << " over transparent " << input.Content();
+ return ctx.ChangeChildren(*node, {input.HeadPtr()});
+ }
+
+ return node;
+}
+
+bool WarnUnroderedSubquery(const TExprNode& unourderedSubquery, TExprContext& ctx) {
+ YQL_ENSURE(unourderedSubquery.IsCallable("UnorderedSubquery"));
+
+ auto issueCode = EYqlIssueCode::TIssuesIds_EIssueCode_YQL_ORDER_BY_WITHOUT_LIMIT_IN_SUBQUERY;
+ auto issue = TIssue(ctx.GetPosition(unourderedSubquery.Head().Pos()), "ORDER BY in subquery will be ignored");
+ auto subIssue = TIssue(ctx.GetPosition(unourderedSubquery.Pos()), "When used here");
+ SetIssueCode(issueCode, issue);
+ SetIssueCode(issueCode, subIssue);
+ issue.AddSubIssue(MakeIntrusive<TIssue>(subIssue));
+
+ return ctx.AddWarning(issue);
+}
+
+std::pair<TExprNode::TPtr, TExprNode::TPtr> ReplaceDependsOn(TExprNode::TPtr lambda, TExprContext& ctx, TTypeAnnotationContext* typeCtx) {
+ auto placeHolder = ctx.NewArgument(lambda->Pos(), "placeholder");
+
+ auto status = OptimizeExpr(lambda, lambda, [&placeHolder, arg = &lambda->Head().Head()](const TExprNode::TPtr& node, TExprContext& ctx) -> TExprNode::TPtr {
+ if (TCoDependsOn::Match(node.Get()) && &node->Head() == arg) {
+ return ctx.ChangeChild(*node, 0, TExprNode::TPtr(placeHolder));
+ }
+ return node;
+ }, ctx, TOptimizeExprSettings{typeCtx});
+
+ if (status.Level == IGraphTransformer::TStatus::Error) {
+ return std::pair<TExprNode::TPtr, TExprNode::TPtr>{};
+ }
+
+ return {placeHolder, lambda};
+}
+
+TStringBuf GetEmptyCollectionName(ETypeAnnotationKind kind) {
+ switch (kind) {
+ case ETypeAnnotationKind::Flow:
+ case ETypeAnnotationKind::Stream: return "EmptyIterator";
+ case ETypeAnnotationKind::List: return "List";
+ case ETypeAnnotationKind::Optional: return "Nothing";
+ case ETypeAnnotationKind::Dict: return "Dict";
+ case ETypeAnnotationKind::Pg: return "Nothing";
+ default: break;
+ }
+ return {};
+}
+
+namespace {
+
+constexpr ui64 MaxWeight = std::numeric_limits<ui64>::max();
+constexpr ui64 UnknownWeight = std::numeric_limits<ui32>::max();
+
+ui64 GetTypeWeight(const TTypeAnnotationNode& type) {
+ switch (type.GetKind()) {
+ case ETypeAnnotationKind::Data:
+ switch (type.Cast<TDataExprType>()->GetSlot()) {
+ case NUdf::EDataSlot::Bool:
+ case NUdf::EDataSlot::Int8:
+ case NUdf::EDataSlot::Uint8: return 1;
+
+ case NUdf::EDataSlot::Int16:
+ case NUdf::EDataSlot::Uint16:
+ case NUdf::EDataSlot::Date: return 2;
+
+ case NUdf::EDataSlot::TzDate: return 3;
+
+ case NUdf::EDataSlot::Int32:
+ case NUdf::EDataSlot::Uint32:
+ case NUdf::EDataSlot::Float:
+ case NUdf::EDataSlot::Date32:
+ case NUdf::EDataSlot::Datetime: return 4;
+
+ case NUdf::EDataSlot::TzDatetime: return 5;
+
+ case NUdf::EDataSlot::Int64:
+ case NUdf::EDataSlot::Uint64:
+ case NUdf::EDataSlot::Double:
+ case NUdf::EDataSlot::Datetime64:
+ case NUdf::EDataSlot::Timestamp64:
+ case NUdf::EDataSlot::Interval64:
+ case NUdf::EDataSlot::Timestamp:
+ case NUdf::EDataSlot::Interval: return 8;
+
+ case NUdf::EDataSlot::TzTimestamp: return 9;
+
+ case NUdf::EDataSlot::Decimal: return 15;
+ case NUdf::EDataSlot::Uuid: return 16;
+
+ default: return 32;
+ }
+ case ETypeAnnotationKind::Optional:
+ return 1 + GetTypeWeight(*type.Cast<TOptionalExprType>()->GetItemType());
+ case ETypeAnnotationKind::Pg: {
+ const auto& typeDesc = NPg::LookupType(type.Cast<TPgExprType>()->GetId());
+ if (typeDesc.PassByValue) {
+ return typeDesc.TypeLen;
+ }
+ return UnknownWeight;
+ }
+ default:
+ return UnknownWeight;
+ }
+}
+
+} // namespace
+
+const TItemExprType* GetLightColumn(const TStructExprType& type) {
+ ui64 weight = MaxWeight;
+ const TItemExprType* field = nullptr;
+ for (const auto& item : type.GetItems()) {
+ if (const auto w = GetTypeWeight(*item->GetItemType()); w < weight) {
+ weight = w;
+ field = item;
+ }
+ }
+ return field;
+}
+
+TVector<TStringBuf> GetCommonKeysFromVariantSelector(const NNodes::TCoLambda& lambda) {
+ if (auto maybeVisit = lambda.Body().Maybe<TCoVisit>()) {
+ if (maybeVisit.Input().Raw() == lambda.Args().Arg(0).Raw()) {
+ TVector<TStringBuf> members;
+ for (ui32 index = 1; index < maybeVisit.Raw()->ChildrenSize(); ++index) {
+ if (maybeVisit.Raw()->Child(index)->IsAtom()) {
+ ++index;
+ auto visitLambda = maybeVisit.Raw()->Child(index);
+ auto arg = visitLambda->Child(0)->Child(0);
+
+ TVector<TStringBuf> visitMembers;
+ if (TMaybeNode<TCoMember>(visitLambda->Child(1)).Struct().Raw() == arg) {
+ visitMembers.push_back(TCoMember(visitLambda->Child(1)).Name().Value());
+ }
+ else if (auto maybeList = TMaybeNode<TExprList>(visitLambda->Child(1))) {
+ for (auto item: maybeList.Cast()) {
+ if (item.Maybe<TCoMember>().Struct().Raw() == arg) {
+ visitMembers.push_back(item.Cast<TCoMember>().Name().Value());
+ } else {
+ return {};
+ }
+ }
+ }
+ if (visitMembers.empty()) {
+ return {};
+ } else if (members.empty()) {
+ members = visitMembers;
+ } else if (members != visitMembers) {
+ return {};
+ }
+ } else {
+ return {};
+ }
+ }
+ return members;
+ }
+ return {};
+ }
+ return {};
+}
+
+bool IsIdentityLambda(const TExprNode& lambda) {
+ return lambda.IsLambda() && lambda.Head().ChildrenSize() == 1 && &lambda.Head().Head() == &lambda.Tail();
+}
+
+TExprNode::TPtr MakeExpandMap(TPositionHandle pos, const TVector<TString>& columns, const TExprNode::TPtr& input, TExprContext& ctx) {
+ return ctx.Builder(pos)
+ .Callable("ExpandMap")
+ .Add(0, input)
+ .Lambda(1)
+ .Param("item")
+ .Do([&](TExprNodeBuilder& lambda) -> TExprNodeBuilder& {
+ ui32 i = 0U;
+ for (const auto& col : columns) {
+ lambda.Callable(i++, "Member")
+ .Arg(0, "item")
+ .Atom(1, col)
+ .Seal();
+ }
+ return lambda;
+ })
+ .Seal()
+ .Seal()
+ .Build();
+}
+
+TExprNode::TPtr MakeNarrowMap(TPositionHandle pos, const TVector<TString>& columns, const TExprNode::TPtr& input, TExprContext& ctx) {
+ return ctx.Builder(pos)
+ .Callable("NarrowMap")
+ .Add(0, input)
+ .Lambda(1)
+ .Params("fields", columns.size())
+ .Callable("AsStruct")
+ .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
+ ui32 i = 0U;
+ for (const auto& col : columns) {
+ parent.List(i)
+ .Atom(0, col)
+ .Arg(1, "fields", i)
+ .Seal();
+ ++i;
+ }
+ return parent;
+ })
+ .Seal()
+ .Seal()
+ .Seal()
+ .Build();
+}
+
+TExprNode::TPtr FindNonYieldTransparentNodeImpl(const TExprNode::TPtr& root, const bool udfSupportsYield, const TNodeSet& flowSources) {
+ auto depensOnFlow = [&flowSources](const TExprNode::TPtr& node) {
+ return !!FindNode(node,
+ [](const TExprNode::TPtr& n) {
+ return !TCoDependsOn::Match(n.Get());
+ },
+ [&flowSources](const TExprNode::TPtr& n) {
+ return flowSources.contains(n.Get());
+ }
+ );
+ };
+
+ auto candidates = FindNodes(root,
+ [&flowSources](const TExprNode::TPtr& node) {
+ if (flowSources.contains(node.Get()) || TCoDependsOn::Match(node.Get())) {
+ return false;
+ }
+ if (node->ChildrenSize() > 0 && node->Head().GetTypeAnn()->GetKind() == ETypeAnnotationKind::World) {
+ return false;
+ }
+ return true;
+ },
+ [](const TExprNode::TPtr& node) {
+ return TCoCollect::Match(node.Get())
+ || TCoForwardList::Match(node.Get())
+ || TCoApply::Match(node.Get())
+ || TCoSwitch::Match(node.Get())
+ || node->IsCallable("DqReplicate")
+ || TCoPartitionsByKeys::Match(node.Get());
+ }
+ );
+
+ for (auto candidate: candidates) {
+ if (TCoCollect::Match(candidate.Get()) || TCoForwardList::Match(candidate.Get())) {
+ if (depensOnFlow(candidate->HeadPtr())) {
+ return candidate;
+ }
+ } else if (TCoApply::Match(candidate.Get())) {
+ if (AnyOf(candidate->Children().begin() + 1, candidate->Children().end(), depensOnFlow)) {
+ if (!IsFlowOrStream(*candidate)) {
+ while (TCoApply::Match(candidate.Get())) {
+ candidate = candidate->HeadPtr();
+ }
+ return candidate;
+ }
+ if (!udfSupportsYield) {
+ while (TCoApply::Match(candidate.Get())) {
+ candidate = candidate->HeadPtr();
+ }
+ if (TCoScriptUdf::Match(candidate.Get())) {
+ return candidate;
+ }
+ }
+ }
+ } else if (TCoSwitch::Match(candidate.Get())) {
+ for (size_t i = 3; i < candidate->ChildrenSize(); i += 2) {
+ if (auto node = FindNonYieldTransparentNodeImpl(candidate->Child(i)->TailPtr(), udfSupportsYield, TNodeSet{&candidate->Child(i)->Head().Head()})) {
+ return node;
+ }
+ }
+ } else if (candidate->IsCallable("DqReplicate")) {
+ for (size_t i = 1; i < candidate->ChildrenSize(); ++i) {
+ if (auto node = FindNonYieldTransparentNodeImpl(candidate->Child(i)->TailPtr(), udfSupportsYield, TNodeSet{&candidate->Child(i)->Head().Head()})) {
+ return node;
+ }
+ }
+ } else if (TCoPartitionsByKeys::Match(candidate.Get())) {
+ const auto handlerChild = candidate->Child(TCoPartitionsByKeys::idx_ListHandlerLambda);
+ if (auto node = FindNonYieldTransparentNodeImpl(handlerChild->TailPtr(), udfSupportsYield, TNodeSet{&handlerChild->Head().Head()})) {
+ return node;
+ }
+ }
+ }
+ return {};
+}
+
+TExprNode::TPtr FindNonYieldTransparentNode(const TExprNode::TPtr& root, const TTypeAnnotationContext& typeCtx, TNodeSet flowSources) {
+ TExprNode::TPtr from = root;
+ if (root->IsLambda()) {
+ if (IsIdentityLambda(*root)) {
+ return {};
+ }
+ from = root->TailPtr();
+
+ // Add all flow lambda args
+ root->Head().ForEachChild([&flowSources](const TExprNode& arg) {
+ if (IsFlowOrStream(arg)) {
+ flowSources.insert(&arg);
+ }
+ });
+ }
+
+ static const THashSet<TStringBuf> WHITE_LIST = {"EmptyIterator"sv, TCoToStream::CallableName(), TCoIterator::CallableName(),
+ TCoToFlow::CallableName(), TCoApply::CallableName(), TCoNth::CallableName(), TCoMux::CallableName()};
+ // Find all other flow sources (readers)
+ auto sources = FindNodes(from,
+ [](const TExprNode::TPtr& node) {
+ return !node->IsCallable(WHITE_LIST)
+ && node->IsCallable()
+ && IsFlowOrStream(*node)
+ && (node->ChildrenSize() == 0 || !IsFlowOrStream(node->Head()));
+ }
+ );
+ std::for_each(sources.cbegin(), sources.cend(), [&flowSources](const TExprNode::TPtr& node) { flowSources.insert(node.Get()); });
+
+ if (flowSources.empty()) {
+ return {};
+ }
+ return FindNonYieldTransparentNodeImpl(from, typeCtx.UdfSupportsYield, flowSources);
+}
+
+bool IsYieldTransparent(const TExprNode::TPtr& root, const TTypeAnnotationContext& typeCtx) {
+ return !FindNonYieldTransparentNode(root, typeCtx);
+}
+
+TMaybe<bool> IsStrictNoRecurse(const TExprNode& node) {
+ if (node.IsCallable({"Unwrap", "Ensure", "ScriptUdf", "Error", "ErrorType"})) {
+ return false;
+ }
+ if (node.IsCallable("Udf")) {
+ return HasSetting(*node.Child(TCoUdf::idx_Settings), "strict");
+ }
+ return {};
+}
+
+bool IsStrict(const TExprNode::TPtr& root) {
+ // TODO: add TExprNode::IsStrict() method (with corresponding flag). Fill it as part of type annotation pass
+ bool isStrict = true;
+ VisitExpr(root, [&](const TExprNode::TPtr& node) {
+ if (node->IsCallable("AssumeStrict")) {
+ return false;
+ }
+
+ if (node->IsCallable("AssumeNonStrict")) {
+ isStrict = false;
+ return false;
+ }
+
+ auto maybeStrict = IsStrictNoRecurse(*node);
+ if (maybeStrict.Defined() && !*maybeStrict) {
+ isStrict = false;
+ }
+
+ return isStrict;
+ });
+
+ return isStrict;
+}
+
+bool HasDependsOn(const TExprNode::TPtr& root, const TExprNode::TPtr& arg) {
+ bool withDependsOn = false;
+ size_t insideDependsOn = 0;
+
+ VisitExpr(root, [&](const TExprNode::TPtr& node) {
+ if (node->IsCallable("DependsOn")) {
+ ++insideDependsOn;
+ } else if (insideDependsOn && node == arg) {
+ withDependsOn = true;
+ }
+ return !withDependsOn;
+ }, [&](const TExprNode::TPtr& node) {
+ if (node->IsCallable("DependsOn")) {
+ YQL_ENSURE(insideDependsOn > 0);
+ --insideDependsOn;
+ }
+ return true;
+ });
+
+ return withDependsOn;
+}
+
+template<bool Assume>
+TExprNode::TPtr MakeSortConstraintImpl(TExprNode::TPtr node, const TSortedConstraintNode* sorted, const TTypeAnnotationNode* rowType, TExprContext& ctx) {
+ if (!(sorted && rowType))
+ return node;
+
+ const auto& constent = sorted->GetContent();
+ return ctx.Builder(node->Pos())
+ .Callable(Assume ? "AssumeSorted" : "Sort")
+ .Add(0, std::move(node))
+ .List(1)
+ .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
+ size_t index = 0;
+ for (const auto& c : constent) {
+ parent.Callable(index++, "Bool")
+ .Atom(0, ToString(c.second), TNodeFlags::Default)
+ .Seal();
+ }
+ return parent;
+ })
+ .Seal()
+ .Lambda(2)
+ .Param("item")
+ .List()
+ .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
+ size_t index = 0;
+ for (const auto& c : constent)
+ GetterBuilder(parent, index++, *rowType, c.first.front());
+ return parent;
+ })
+ .Seal()
+ .Seal()
+ .Seal()
+ .Build();
+}
+
+TExprNode::TPtr KeepSortedConstraint(TExprNode::TPtr node, const TSortedConstraintNode* sorted, const TTypeAnnotationNode* rowType, TExprContext& ctx) {
+ return MakeSortConstraintImpl<true>(std::move(node), sorted, rowType, ctx);
+}
+
+TExprNode::TPtr MakeSortByConstraint(TExprNode::TPtr node, const TSortedConstraintNode* sorted, const TTypeAnnotationNode* rowType, TExprContext& ctx) {
+ return MakeSortConstraintImpl<false>(std::move(node), sorted, rowType, ctx);
+}
+
+TExprNode::TPtr KeepConstraints(TExprNode::TPtr node, const TExprNode& src, TExprContext& ctx) {
+ auto res = KeepSortedConstraint(node, src.GetConstraint<TSortedConstraintNode>(), GetSeqItemType(src.GetTypeAnn()), ctx);
+ res = KeepChoppedConstraint(std::move(res), src, ctx);
+ res = KeepUniqueConstraint<true>(std::move(res), src, ctx);
+ res = KeepUniqueConstraint<false>(std::move(res), src, ctx);
+ return res;
+}
+
+bool HasOnlyOneJoinType(const TExprNode& joinTree, TStringBuf joinType) {
+ if (joinTree.IsAtom()) {
+ return true;
+ }
+
+ YQL_ENSURE(joinTree.Child(0)->IsAtom());
+ if (joinTree.Child(0)->Content() != joinType) {
+ return false;
+ }
+
+ return HasOnlyOneJoinType(*joinTree.Child(1), joinType) && HasOnlyOneJoinType(*joinTree.Child(2), joinType);
+}
+
+void OptimizeSubsetFieldsForNodeWithMultiUsage(const TExprNode::TPtr& node, const TParentsMap& parentsMap,
+ TNodeOnNodeOwnedMap& toOptimize, TExprContext& ctx,
+ std::function<TExprNode::TPtr(const TExprNode::TPtr&, const TExprNode::TPtr&, const TParentsMap&, TExprContext&)> handler)
+{
+
+ // Ignore stream input, because it cannot be used multiple times
+ if (node->GetTypeAnn()->GetKind() != ETypeAnnotationKind::List) {
+ return;
+ }
+ auto itemType = node->GetTypeAnn()->Cast<TListExprType>()->GetItemType();
+ if (itemType->GetKind() != ETypeAnnotationKind::Struct) {
+ return;
+ }
+ auto structType = itemType->Cast<TStructExprType>();
+
+ auto it = parentsMap.find(node.Get());
+ if (it == parentsMap.cend() || it->second.size() <= 1) {
+ return;
+ }
+
+ TSet<TStringBuf> usedFields;
+ for (auto parent: it->second) {
+ if (auto maybeFlatMap = TMaybeNode<TCoFlatMapBase>(parent)) {
+ auto flatMap = maybeFlatMap.Cast();
+ TSet<TStringBuf> lambdaSubset;
+ if (!HaveFieldsSubset(flatMap.Lambda().Body().Ptr(), flatMap.Lambda().Args().Arg(0).Ref(), lambdaSubset, parentsMap)) {
+ return;
+ }
+ usedFields.insert(lambdaSubset.cbegin(), lambdaSubset.cend());
+ }
+ else if (auto maybeExtractMembers = TMaybeNode<TCoExtractMembers>(parent)) {
+ auto extractMembers = maybeExtractMembers.Cast();
+ for (auto member: extractMembers.Members()) {
+ usedFields.insert(member.Value());
+ }
+ }
+ else {
+ return;
+ }
+ if (usedFields.size() == structType->GetSize()) {
+ return;
+ }
+ }
+
+ TExprNode::TListType members;
+ for (auto column : usedFields) {
+ members.push_back(ctx.NewAtom(node->Pos(), column));
+ }
+
+ auto newInput = handler(node, ctx.NewList(node->Pos(), std::move(members)), parentsMap, ctx);
+ if (!newInput || newInput == node) {
+ return;
+ }
+
+ for (auto parent: it->second) {
+ if (TCoExtractMembers::Match(parent)) {
+ if (parent->GetTypeAnn()->Cast<TListExprType>()->GetItemType()->Cast<TStructExprType>()->GetSize() == usedFields.size()) {
+ toOptimize[parent] = newInput;
+ } else {
+ toOptimize[parent] = ctx.ChangeChild(*parent, 0, TExprNode::TPtr(newInput));
+ }
+ } else {
+ toOptimize[parent] = ctx.Builder(parent->Pos())
+ .Callable(parent->Content())
+ .Add(0, newInput)
+ .Lambda(1)
+ .Param("item")
+ .Apply(parent->ChildPtr(1)).With(0, "item").Seal()
+ .Seal()
+ .Seal()
+ .Build();
+ }
+ }
+}
+
+
+template<bool Ordered>
+std::optional<std::pair<TPartOfConstraintBase::TPathType, ui32>> GetPathToKey(const TExprNode& body, const TExprNode::TChildrenType& args) {
+ if (body.IsArgument()) {
+ for (auto i = 0U; i < args.size(); ++i)
+ if (&body == args[i].Get())
+ return std::make_pair(TPartOfConstraintBase::TPathType(), i);
+ } else if (body.IsCallable({"Member","Nth"})) {
+ if (auto path = GetPathToKey<Ordered>(body.Head(), args)) {
+ path->first.emplace_back(body.Tail().Content());
+ return path;
+ } else if (const auto& head = SkipCallables(body.Head(), {"CastStruct","FilterMembers"}); head.IsCallable("AsStruct") && body.IsCallable("Member")) {
+ return GetPathToKey<Ordered>(GetLiteralStructMember(head, body.Tail()), args);
+ } else if (body.IsCallable("Nth") && body.Head().IsList()) {
+ return GetPathToKey<Ordered>(*body.Head().Child(FromString<ui32>(body.Tail().Content())), args);
+ } else if (body.IsCallable({"CastStruct","FilterMembers"})) {
+ return GetPathToKey<Ordered>(body.Head(), args);
+ }
+ } else if constexpr (!Ordered) {
+ if (body.IsCallable("StablePickle")) {
+ return GetPathToKey<Ordered>(body.Head(), args);
+ }
+ }
+
+ return std::nullopt;
+}
+
+template std::optional<std::pair<TPartOfConstraintBase::TPathType, ui32>> GetPathToKey<true>(const TExprNode& body, const TExprNode::TChildrenType& args);
+template std::optional<std::pair<TPartOfConstraintBase::TPathType, ui32>> GetPathToKey<false>(const TExprNode& body, const TExprNode::TChildrenType& args);
+
+template<bool Ordered>
+std::optional<TPartOfConstraintBase::TPathType> GetPathToKey(const TExprNode& body, const TExprNode& arg) {
+ if (&body == &arg)
+ return TPartOfConstraintBase::TPathType();
+
+ if (body.IsCallable({"Member","Nth"})) {
+ if (auto path = GetPathToKey(body.Head(), arg)) {
+ path->emplace_back(body.Tail().Content());
+ return path;
+ }
+ }
+
+ if (body.IsCallable({"CastStruct","FilterMembers","Just","Unwrap"}))
+ return GetPathToKey<Ordered>(body.Head(), arg);
+ if (body.IsCallable("Member") && body.Head().IsCallable("AsStruct"))
+ return GetPathToKey<Ordered>(GetLiteralStructMember(body.Head(), body.Tail()), arg);
+ if (body.IsCallable("Nth") && body.Head().IsList())
+ return GetPathToKey<Ordered>(*body.Head().Child(FromString<ui32>(body.Tail().Content())), arg);
+ if (body.IsList() && 1U == body.ChildrenSize() && body.Head().IsCallable("Nth") && body.Head().Tail().IsAtom("0") &&
+ 1U == RemoveOptionality(*body.Head().Head().GetTypeAnn()).Cast<TTupleExprType>()->GetSize())
+ // Especialy for "Extract single item tuple from Condense1" optimizer.
+ return GetPathToKey(body.Head().Head(), arg);
+ if (body.IsCallable("AsStruct") && 1U == body.ChildrenSize() && body.Head().Tail().IsCallable("Member") &&
+ body.Head().Head().Content() == body.Head().Tail().Tail().Content() &&
+ 1U == RemoveOptionality(*body.Head().Tail().Head().GetTypeAnn()).Cast<TStructExprType>()->GetSize())
+ // Especialy for "Extract single item struct from Condense1" optimizer.
+ return GetPathToKey<Ordered>(body.Head().Tail().Head(), arg);
+ if (IsTransparentIfPresent(body) && &body.Head() == &arg)
+ return GetPathToKey<Ordered>(body.Child(1)->Tail().Head(), body.Child(1)->Head().Head());
+ if constexpr (!Ordered)
+ if (body.IsCallable("StablePickle"))
+ return GetPathToKey<Ordered>(body.Head(), arg);
+
+ return std::nullopt;
+}
+
+template<bool Ordered>
+TPartOfConstraintBase::TSetType GetPathsToKeys(const TExprNode& body, const TExprNode& arg) {
+ TPartOfConstraintBase::TSetType keys;
+ if (body.IsList()) {
+ if (const auto size = body.ChildrenSize()) {
+ keys.reserve(size);
+ for (auto i = 0U; i < size; ++i)
+ if (auto path = GetPathToKey<Ordered>(*body.Child(i), arg))
+ keys.insert_unique(std::move(*path));
+ }
+ } else if constexpr (!Ordered) {
+ if (body.IsCallable("StablePickle")) {
+ return GetPathsToKeys<Ordered>(body.Head(), arg);
+ }
+ }
+ if (auto path = GetPathToKey<Ordered>(body, arg)) {
+ keys.insert_unique(std::move(*path));
+ }
+
+ return keys;
+}
+
+template TPartOfConstraintBase::TSetType GetPathsToKeys<true>(const TExprNode& body, const TExprNode& arg);
+template TPartOfConstraintBase::TSetType GetPathsToKeys<false>(const TExprNode& body, const TExprNode& arg);
+
+TVector<TString> GenNoClashColumns(const TStructExprType& source, TStringBuf prefix, size_t count) {
+ YQL_ENSURE(prefix.StartsWith("_yql"));
+ TSet<size_t> existing;
+ for (auto& item : source.GetItems()) {
+ TStringBuf column = item->GetName();
+ if (column.SkipPrefix(prefix)) {
+ size_t idx;
+ if (TryFromString(column, idx)) {
+ existing.insert(idx);
+ }
+ }
+ }
+
+ size_t current = 0;
+ TVector<TString> result;
+ auto it = existing.cbegin();
+ while (count) {
+ if (it == existing.cend() || current < *it) {
+ result.push_back(TStringBuilder() << prefix << current);
+ --count;
+ } else {
+ ++it;
+ }
+ YQL_ENSURE(!count || (current + 1 > current));
+ ++current;
+ }
+ return result;
+}
+
+
+}