diff options
author | mpereskokova <[email protected]> | 2025-03-19 15:28:46 +0300 |
---|---|---|
committer | mpereskokova <[email protected]> | 2025-03-19 15:47:24 +0300 |
commit | 95040611fee3b60e9fb35829f51822b8970d7d70 (patch) | |
tree | 26f0a5f502795661fa5ecf9849219c0e448c39e3 | |
parent | 58b29f8c8934d54a748f4638ce45afcd376cf250 (diff) |
Introduce PruneKeys Callables
commit_hash:ca25a6ece5ae902c4b93846f69bc33b6336a669c
16 files changed, 490 insertions, 27 deletions
diff --git a/yql/essentials/core/common_opt/yql_co_extr_members.cpp b/yql/essentials/core/common_opt/yql_co_extr_members.cpp index c0706815dc0..55923cde936 100644 --- a/yql/essentials/core/common_opt/yql_co_extr_members.cpp +++ b/yql/essentials/core/common_opt/yql_co_extr_members.cpp @@ -176,42 +176,55 @@ TExprNode::TPtr ApplyExtractMembersToFilterNullMembers(const TExprNode::TPtr& no .Done().Ptr(); } -TExprNode::TPtr ApplyExtractMembersToSort(const TExprNode::TPtr& node, const TExprNode::TPtr& members, const TParentsMap& parentsMap, TExprContext& ctx, TStringBuf logSuffix) { - TCoSortBase sort(node); +TExprNode::TPtr ApplyExtractMembersToSortOrPruneKeys(const TExprNode::TPtr& node, const TExprNode::TPtr& members, const TParentsMap& parentsMap, TExprContext& ctx, TStringBuf logSuffix) { + auto nodeIsPruneKeys = node->IsCallable("PruneKeys") || node->IsCallable("PruneAdjacentKeys"); + auto nodeIsSort = !nodeIsPruneKeys; + auto keyExtractorLambdaIndex = nodeIsSort ? 2 : 1; + + TCoLambda keyExtractorLambda(node->ChildPtr(keyExtractorLambdaIndex)); + TSet<TStringBuf> extractFields; for (const auto& x : members->ChildrenList()) { extractFields.emplace(x->Content()); } - TSet<TStringBuf> sortKeys; - bool fieldSubset = HaveFieldsSubset(sort.KeySelectorLambda().Body().Ptr(), sort.KeySelectorLambda().Args().Arg(0).Ref(), sortKeys, parentsMap); + TSet<TStringBuf> usedKeys; + bool fieldSubset = HaveFieldsSubset(keyExtractorLambda.Body().Ptr(), keyExtractorLambda.Args().Arg(0).Ref(), usedKeys, parentsMap); bool allExist = true; - if (!sortKeys.empty()) { - for (const auto& key : sortKeys) { + if (!usedKeys.empty()) { + for (const auto& key : usedKeys) { auto ret = extractFields.emplace(key); if (ret.second) { allExist = false; } } } - if (allExist && sortKeys.size() == extractFields.size()) { + if (allExist && usedKeys.size() == extractFields.size()) { YQL_CLOG(DEBUG, Core) << "Force `fieldSubset` for ExtractMembers over " << node->Content(); fieldSubset = true; } if (fieldSubset && allExist) { YQL_CLOG(DEBUG, Core) << "Move ExtractMembers over " << node->Content() << logSuffix; - return ctx.Builder(sort.Pos()) + auto result = ctx.Builder(node->Pos()) .Callable(node->Content()) - .Callable(0, TCoExtractMembers::CallableName()) - .Add(0, sort.Input().Ptr()) - .Add(1, members) - .Seal() - .Add(1, sort.SortDirections().Ptr()) - .Add(2, ctx.DeepCopyLambda(sort.KeySelectorLambda().Ref())) .Seal() .Build(); + + TExprNode::TListType children; + children.push_back(ctx.Builder(node->Pos()) + .Callable(TCoExtractMembers::CallableName()) + .Add(0, node->HeadPtr()) + .Add(1, members) + .Seal() + .Build()); + if (nodeIsSort) { + children.push_back(node->ChildPtr(1)); + } + children.push_back(ctx.DeepCopyLambda(keyExtractorLambda.Ref())); + + return ctx.ChangeChildren(*result, std::move(children)); } else if (fieldSubset) { - const auto structType = GetSeqItemType(*sort.Ref().GetTypeAnn()).Cast<TStructExprType>(); + const auto structType = GetSeqItemType(node->GetTypeAnn())->Cast<TStructExprType>(); if (structType->GetSize() <= extractFields.size()) { return {}; } @@ -221,16 +234,26 @@ TExprNode::TPtr ApplyExtractMembersToSort(const TExprNode::TPtr& node, const TEx totalExtracted.emplace_back(ctx.NewAtom(members->Pos(), field)); } - return ctx.Builder(sort.Pos()) + TExprNode::TListType children; + children.push_back(ctx.Builder(node->Pos()) + .Callable(TCoExtractMembers::CallableName()) + .Add(0, node->HeadPtr()) + .Add(1, ctx.NewList(members->Pos(), std::move(totalExtracted))) + .Seal() + .Build()); + if (nodeIsSort) { + children.push_back(node->ChildPtr(1)); + } + children.push_back(ctx.DeepCopyLambda(keyExtractorLambda.Ref())); + + auto internalPartOfExtractMembers = ctx.Builder(node->Pos()) + .Callable(node->Content()) + .Seal() + .Build(); + + return ctx.Builder(node->Pos()) .Callable(TCoExtractMembers::CallableName()) - .Callable(0, node->Content()) - .Callable(0, TCoExtractMembers::CallableName()) - .Add(0, sort.Input().Ptr()) - .Add(1, ctx.NewList(members->Pos(), std::move(totalExtracted))) - .Seal() - .Add(1, sort.SortDirections().Ptr()) - .Add(2, ctx.DeepCopyLambda(sort.KeySelectorLambda().Ref())) - .Seal() + .Add(0, ctx.ChangeChildren(*internalPartOfExtractMembers, std::move(children))) .Add(1, members) .Seal() .Build(); diff --git a/yql/essentials/core/common_opt/yql_co_extr_members.h b/yql/essentials/core/common_opt/yql_co_extr_members.h index 6157cd7a284..467a7a9fff9 100644 --- a/yql/essentials/core/common_opt/yql_co_extr_members.h +++ b/yql/essentials/core/common_opt/yql_co_extr_members.h @@ -14,7 +14,7 @@ TExprNode::TPtr ApplyExtractMembersToSkip(const TExprNode::TPtr& node, const TEx TExprNode::TPtr ApplyExtractMembersToExtend(const TExprNode::TPtr& node, const TExprNode::TPtr& members, TExprContext& ctx, TStringBuf logSuffix); TExprNode::TPtr ApplyExtractMembersToSkipNullMembers(const TExprNode::TPtr& node, const TExprNode::TPtr& members, TExprContext& ctx, TStringBuf logSuffix); TExprNode::TPtr ApplyExtractMembersToFilterNullMembers(const TExprNode::TPtr& node, const TExprNode::TPtr& members, TExprContext& ctx, TStringBuf logSuffix); -TExprNode::TPtr ApplyExtractMembersToSort(const TExprNode::TPtr& node, const TExprNode::TPtr& members, const TParentsMap& parentsMap, TExprContext& ctx, TStringBuf logSuffix); +TExprNode::TPtr ApplyExtractMembersToSortOrPruneKeys(const TExprNode::TPtr& node, const TExprNode::TPtr& members, const TParentsMap& parentsMap, TExprContext& ctx, TStringBuf logSuffix); TExprNode::TPtr ApplyExtractMembersToAssumeUnique(const TExprNode::TPtr& node, const TExprNode::TPtr& members, TExprContext& ctx, TStringBuf logSuffix); TExprNode::TPtr ApplyExtractMembersToTop(const TExprNode::TPtr& node, const TExprNode::TPtr& members, const TParentsMap& parentsMap, TExprContext& ctx, TStringBuf logSuffix); TExprNode::TPtr ApplyExtractMembersToEquiJoin(const TExprNode::TPtr& node, const TExprNode::TPtr& members, TExprContext& ctx, TStringBuf logSuffix); diff --git a/yql/essentials/core/common_opt/yql_co_finalizers.cpp b/yql/essentials/core/common_opt/yql_co_finalizers.cpp index 6c9bc8036ff..c1fce215cf2 100644 --- a/yql/essentials/core/common_opt/yql_co_finalizers.cpp +++ b/yql/essentials/core/common_opt/yql_co_finalizers.cpp @@ -395,7 +395,17 @@ void RegisterCoFinalizers(TFinalizingOptimizerMap& map) { } OptimizeSubsetFieldsForNodeWithMultiUsage(node, *optCtx.ParentsMap, toOptimize, ctx, [] (const TExprNode::TPtr& input, const TExprNode::TPtr& members, const TParentsMap& parentsMap, TExprContext& ctx) { - return ApplyExtractMembersToSort(input, members, parentsMap, ctx, " with multi-usage"); + return ApplyExtractMembersToSortOrPruneKeys(input, members, parentsMap, ctx, " with multi-usage"); + } + ); + + return true; + }; + + map[TCoPruneKeys::CallableName()] = map[TCoPruneAdjacentKeys::CallableName()] = [](const TExprNode::TPtr& node, TNodeOnNodeOwnedMap& toOptimize, TExprContext& ctx, TOptimizeContext& optCtx) { + OptimizeSubsetFieldsForNodeWithMultiUsage(node, *optCtx.ParentsMap, toOptimize, ctx, + [] (const TExprNode::TPtr& input, const TExprNode::TPtr& members, const TParentsMap& parentsMap, TExprContext& ctx) { + return ApplyExtractMembersToSortOrPruneKeys(input, members, parentsMap, ctx, " with multi-usage"); } ); diff --git a/yql/essentials/core/common_opt/yql_co_flow1.cpp b/yql/essentials/core/common_opt/yql_co_flow1.cpp index 0bd3cc68dba..7bcfc55dbcd 100644 --- a/yql/essentials/core/common_opt/yql_co_flow1.cpp +++ b/yql/essentials/core/common_opt/yql_co_flow1.cpp @@ -1413,6 +1413,52 @@ TExprNode::TPtr OptimizeFlatMap(const TExprNode::TPtr& node, TExprContext& ctx, void RegisterCoFlowCallables1(TCallableOptimizerMap& map) { using namespace std::placeholders; + map["PruneKeys"] = map["PruneAdjacentKeys"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { + if (!optCtx.IsSingleUsage(node->Head())) { + return node; + } + + TCoPruneKeysBase pruneKeys(node); + TCoLambda keyExtractorLambda = pruneKeys.Extractor(); + TSet<TStringBuf> columns; + + if (!HaveFieldsSubset(keyExtractorLambda.Ref().Child(1), *keyExtractorLambda.Ref().Child(0)->Child(0), columns, *optCtx.ParentsMap)) { + return node; + } + + if (auto maybeFlatmap = TExprBase(node->HeadPtr()).Maybe<TCoFlatMapBase>()) { + auto flatmap = maybeFlatmap.Cast(); + + auto checkAllPruneExtractorPassthroughLambda = [&columns](const TCoLambda& lambda) { + TMaybe<THashSet<TStringBuf>> passthroughFields; + if (IsPassthroughLambda(lambda, &passthroughFields) && passthroughFields) { + for (const auto& column : columns) { + if (!passthroughFields->contains(column)) { + return false; + } + } + return true; + } + return false; + }; + + if (checkAllPruneExtractorPassthroughLambda(flatmap.Lambda())) { + YQL_CLOG(DEBUG, Core) << node->Content() << " Over Flatmap"; + return ctx.Builder(flatmap.Pos()) + .Callable(flatmap.CallableName()) + .Callable(0, pruneKeys.CallableName()) + .Add(0, flatmap.Input().Ptr()) + .Add(1, ctx.DeepCopyLambda(keyExtractorLambda.Ref())) + .Seal() + .Add(1, flatmap.Lambda().Ptr()) + .Seal() + .Build(); + } + } + + return node; + }; + map["FlatMap"] = std::bind(&OptimizeFlatMap<false>, _1, _2, _3); map["OrderedFlatMap"] = std::bind(&OptimizeFlatMap<true>, _1, _2, _3); diff --git a/yql/essentials/core/common_opt/yql_co_flow2.cpp b/yql/essentials/core/common_opt/yql_co_flow2.cpp index 4435aea5dfe..610bde15c60 100644 --- a/yql/essentials/core/common_opt/yql_co_flow2.cpp +++ b/yql/essentials/core/common_opt/yql_co_flow2.cpp @@ -1881,7 +1881,14 @@ void RegisterCoFlowCallables2(TCallableOptimizerMap& map) { } if (self.Input().Maybe<TCoSortBase>()) { - if (auto res = ApplyExtractMembersToSort(self.Input().Ptr(), self.Members().Ptr(), *optCtx.ParentsMap, ctx, {})) { + if (auto res = ApplyExtractMembersToSortOrPruneKeys(self.Input().Ptr(), self.Members().Ptr(), *optCtx.ParentsMap, ctx, {})) { + return res; + } + return node; + } + + if (self.Input().Ptr()->IsCallable("PruneKeys") || self.Input().Ptr()->IsCallable("PruneAdjacentKeys")) { + if (auto res = ApplyExtractMembersToSortOrPruneKeys(self.Input().Ptr(), self.Members().Ptr(), *optCtx.ParentsMap, ctx, {})) { return res; } return node; diff --git a/yql/essentials/core/common_opt/yql_co_simple2.cpp b/yql/essentials/core/common_opt/yql_co_simple2.cpp index 79b12c55dfc..2adcdb6f22e 100644 --- a/yql/essentials/core/common_opt/yql_co_simple2.cpp +++ b/yql/essentials/core/common_opt/yql_co_simple2.cpp @@ -948,6 +948,20 @@ void RegisterCoSimpleCallables2(TCallableOptimizerMap& map) { }; map["PgGrouping"] = ExpandPgGrouping; + + map["PruneKeys"] = map["PruneAdjacentKeys"] = [](const TExprNode::TPtr& node, TExprContext& /*ctx*/, TOptimizeContext&) { + TCoPruneKeysBase pruneKeys(node); + + if (node->Content() == pruneKeys.Input().Ref().Content()) { + auto pruneKeysInput = pruneKeys.Input().Cast<TCoPruneKeysBase>(); + if (&pruneKeys.Extractor().Ref() == &pruneKeysInput.Extractor().Ref()) { + YQL_CLOG(DEBUG, Core) << node->Content() << " Over " << pruneKeys.Input().Ref().Content(); + return node->HeadPtr(); + } + } + + return node; + }; } } diff --git a/yql/essentials/core/expr_nodes/yql_expr_nodes.json b/yql/essentials/core/expr_nodes/yql_expr_nodes.json index 77b74f1390f..7eb01b6114c 100644 --- a/yql/essentials/core/expr_nodes/yql_expr_nodes.json +++ b/yql/essentials/core/expr_nodes/yql_expr_nodes.json @@ -331,6 +331,24 @@ ] }, { + "Name": "TCoPruneKeysBase", + "Base": "TCoInputBase", + "Match": {"Type": "CallableBase"}, + "Children": [ + {"Index": 1, "Name": "Extractor", "Type": "TCoLambda"} + ] + }, + { + "Name": "TCoPruneKeys", + "Base": "TCoPruneKeysBase", + "Match": {"Type": "Callable", "Name": "PruneKeys"} + }, + { + "Name": "TCoPruneAdjacentKeys", + "Base": "TCoPruneKeysBase", + "Match": {"Type": "Callable", "Name": "PruneAdjacentKeys"} + }, + { "Name": "TCoFlatMapBase", "Base": "TCoInputBase", "Match": {"Type": "CallableBase"}, diff --git a/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp b/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp index 91b8945a83c..9cf52f84e79 100644 --- a/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp +++ b/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp @@ -2689,6 +2689,123 @@ TExprNode::TPtr ExpandListHas(const TExprNode::TPtr& input, TExprContext& ctx) { return RewriteSearchByKeyForTypesMismatch<true, true>(input, ctx); } +TExprNode::TPtr ExpandPruneAdjacentKeys(const TExprNode::TPtr& input, TExprContext& ctx) { + const auto type = input->Head().GetTypeAnn(); + const auto& keyExtractorLambda = input->ChildRef(1); + + YQL_ENSURE(type->GetKind() == ETypeAnnotationKind::List || type->GetKind() == ETypeAnnotationKind::Stream); + + const auto elemType = type->GetKind() == ETypeAnnotationKind::List + ? type->Cast<TListExprType>()->GetItemType() + : type->Cast<TStreamExprType>()->GetItemType(); + const auto optionalElemType = *ctx.MakeType<TOptionalExprType>(elemType); + + YQL_CLOG(DEBUG, CorePeepHole) << "Expand " << input->Content(); + return ctx.Builder(input->Pos()) + .Callable("OrderedFlatMap") + .Callable(0, "Fold1Map") + .Add(0, input->HeadPtr()) + .Lambda(1) + .Param("item") + .List(0) + .Callable(0, "Just") + .Arg(0, "item") + .Seal() + .Arg(1, "item") + .Seal() + .Seal() + .Lambda(2) + .Param("item") + .Param("state") + .List(0) + .Callable(0, "If") + .Callable(0, "AggrEquals") + .Apply(0, keyExtractorLambda) + .With(0, "item") + .Seal() + .Apply(1, keyExtractorLambda) + .With(0, "state") + .Seal() + .Seal() + .Callable(1, "Nothing") + .Add(0, ExpandType(input->Pos(), optionalElemType, ctx)) + .Seal() + .Callable(2, "Just") + .Arg(0, "item") + .Seal() + .Seal() + .Arg(1, "item") + .Seal() + .Seal() + .Seal() + .Lambda(1) + .Param("item") + .Arg(0, "item") + .Seal() + .Seal() + .Build(); +} + +TExprNode::TPtr ExpandPruneKeys(const TExprNode::TPtr& input, TExprContext& ctx) { + const auto type = input->Head().GetTypeAnn(); + const auto& keyExtractorLambda = input->ChildRef(1); + YQL_ENSURE(type->GetKind() == ETypeAnnotationKind::List || type->GetKind() == ETypeAnnotationKind::Stream); + + auto initHandler = ctx.Builder(input->Pos()) + .Lambda() + .Param("key") + .Param("item") + .Arg(0, "item") + .Seal() + .Build(); + auto updateHandler = ctx.Builder(input->Pos()) + .Lambda() + .Param("key") + .Param("item") + .Param("state") + .Arg(0, "state") + .Seal() + .Build(); + auto finishHandler = ctx.Builder(input->Pos()) + .Lambda() + .Param("key") + .Param("state") + .Callable(0, "Just") + .Arg(0, "state") + .Seal() + .Seal() + .Build(); + + YQL_CLOG(DEBUG, CorePeepHole) << "Expand " << input->Content(); + if (type->GetKind() == ETypeAnnotationKind::List) { + return ctx.Builder(input->Pos()) + .Callable("CombineByKey") + .Add(0, input->HeadPtr()) + .Lambda(1) // preMap + .Param("item") + .Callable(0, "Just") + .Arg(0, "item") + .Seal() + .Seal() + .Add(2, keyExtractorLambda) + .Add(3, initHandler) + .Add(4, updateHandler) + .Add(5, finishHandler) + .Seal() + .Build(); + } else { + return ctx.Builder(input->Pos()) + .Callable("CombineCore") + .Add(0, input->HeadPtr()) + .Add(1, keyExtractorLambda) + .Add(2, initHandler) + .Add(3, updateHandler) + .Add(4, finishHandler) + .Seal() + .Build(); + } +} + TExprNode::TPtr ExpandPgArrayOp(const TExprNode::TPtr& input, TExprContext& ctx) { const bool all = input->Content() == "PgAllResolvedOp"; auto list = ctx.Builder(input->Pos()) @@ -8675,6 +8792,8 @@ struct TPeepHoleRules { {"CheckedDiv", &ExpandCheckedDiv}, {"CheckedMod", &ExpandCheckedMod}, {"CheckedMinus", &ExpandCheckedMinus}, + {"PruneAdjacentKeys", &ExpandPruneAdjacentKeys}, + {"PruneKeys", &ExpandPruneKeys}, {"JsonValue", &ExpandJsonValue}, {"JsonExists", &ExpandJsonExists}, {"EmptyIterator", &DropDependsOnFromEmptyIterator}, diff --git a/yql/essentials/core/type_ann/type_ann_core.cpp b/yql/essentials/core/type_ann/type_ann_core.cpp index 665378e19f0..75e07ab3a72 100644 --- a/yql/essentials/core/type_ann/type_ann_core.cpp +++ b/yql/essentials/core/type_ann/type_ann_core.cpp @@ -12915,6 +12915,8 @@ template <NKikimr::NUdf::EDataSlot DataSlot> Functions["WithOptionalArgs"] = &WithOptionalArgsWrapper; Functions["WithContext"] = &WithContextWrapper; Functions["EmptyFrom"] = &EmptyFromWrapper; + Functions["PruneAdjacentKeys"] = &PruneKeysWrapper; + Functions["PruneKeys"] = &PruneKeysWrapper; Functions["DecimalDiv"] = &DecimalBinaryWrapper; Functions["DecimalMod"] = &DecimalBinaryWrapper; diff --git a/yql/essentials/core/type_ann/type_ann_list.cpp b/yql/essentials/core/type_ann/type_ann_list.cpp index cb225d30d72..8601c606501 100644 --- a/yql/essentials/core/type_ann/type_ann_list.cpp +++ b/yql/essentials/core/type_ann/type_ann_list.cpp @@ -3081,6 +3081,53 @@ namespace { return ListAutomapArgs(input, output, ctx, "ZipAll"); } + IGraphTransformer::TStatus PruneKeysWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { + if (!EnsureArgsCount(*input, 2, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + if (IsNull(input->Head())) { + output = input->HeadPtr(); + return IGraphTransformer::TStatus::Repeat; + } + + if (IsEmptyList(input->Head())) { + output = input->HeadPtr(); + return IGraphTransformer::TStatus::Repeat; + } + + const TTypeAnnotationNode* itemType = nullptr; + if (!EnsureNewSeqType<false, true, true>(input->Head(), ctx.Expr, &itemType)) { + return IGraphTransformer::TStatus::Error; + } + + auto& keyExtractorLambda = input->ChildRef(1); + const auto status = ConvertToLambda(keyExtractorLambda, ctx.Expr, 1); + if (status.Level != IGraphTransformer::TStatus::Ok) { + return status; + } + + if (!UpdateLambdaAllArgumentsTypes(keyExtractorLambda, {itemType}, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + if (!keyExtractorLambda->GetTypeAnn()) { + return IGraphTransformer::TStatus::Repeat; + } + + if (input->IsCallable("PruneKeys") && + !EnsureHashableKey(keyExtractorLambda->Pos(), keyExtractorLambda->GetTypeAnn(), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + if (!EnsureEquatableKey(keyExtractorLambda->Pos(), keyExtractorLambda->GetTypeAnn(), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + input->SetTypeAnn(input->Head().GetTypeAnn()); + return IGraphTransformer::TStatus::Ok; + } + bool ValidateSortDirections(TExprNode& direction, TExprContext& ctx, bool& isTuple) { bool isOkAscending = false; diff --git a/yql/essentials/core/type_ann/type_ann_list.h b/yql/essentials/core/type_ann/type_ann_list.h index 7071b67461c..abcc3b5ae1b 100644 --- a/yql/essentials/core/type_ann/type_ann_list.h +++ b/yql/essentials/core/type_ann/type_ann_list.h @@ -70,6 +70,7 @@ namespace NTypeAnnImpl { IGraphTransformer::TStatus ListUnionAllWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus ListZipWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus ListZipAllWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); + IGraphTransformer::TStatus PruneKeysWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus SortWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus TopWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus KeepTopWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); diff --git a/yql/essentials/core/yql_expr_constraint.cpp b/yql/essentials/core/yql_expr_constraint.cpp index 7921e31f97e..32cba8f9d28 100644 --- a/yql/essentials/core/yql_expr_constraint.cpp +++ b/yql/essentials/core/yql_expr_constraint.cpp @@ -112,6 +112,8 @@ public: Functions["ToStream"] = &TCallableConstraintTransformer::CopyAllFrom<0>; Functions["ToSequence"] = &TCallableConstraintTransformer::CopyAllFrom<0>; Functions["Collect"] = &TCallableConstraintTransformer::CopyAllFrom<0>; + Functions["PruneAdjacentKeys"] = &TCallableConstraintTransformer::PruneKeysWrap<true>; + Functions["PruneKeys"] = &TCallableConstraintTransformer::PruneKeysWrap<false>; Functions["FilterNullMembers"] = &TCallableConstraintTransformer::FromFirst<TSortedConstraintNode, TPartOfSortedConstraintNode, TChoppedConstraintNode, TPartOfChoppedConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode, TVarIndexConstraintNode>; Functions["SkipNullMembers"] = &TCallableConstraintTransformer::FromFirst<TSortedConstraintNode, TPartOfSortedConstraintNode, TChoppedConstraintNode, TPartOfChoppedConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode, TVarIndexConstraintNode>; Functions["FilterNullElements"] = &TCallableConstraintTransformer::FromFirst<TSortedConstraintNode, TPartOfSortedConstraintNode, TChoppedConstraintNode, TPartOfChoppedConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode, TVarIndexConstraintNode>; @@ -805,6 +807,18 @@ private: return FromFirst<TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>(input, output, ctx); } + template <bool Adjacent> + TStatus PruneKeysWrap(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) const { + if (const auto status = UpdateLambdaConstraints(*input->Child(1)); status != TStatus::Ok) { + return status; + } + + if constexpr (Adjacent) { + return CopyAllFrom<0>(input, output, ctx); + } + return FromFirst<TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode>(input, output, ctx); + } + template<class TConstraint> static const TConstraint* GetConstraintFromWideResultLambda(const TExprNode& lambda, TExprContext& ctx); diff --git a/yql/essentials/tests/sql/minirun/part7/canondata/result.json b/yql/essentials/tests/sql/minirun/part7/canondata/result.json index 587366b57a3..1ed6dbeb712 100644 --- a/yql/essentials/tests/sql/minirun/part7/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part7/canondata/result.json @@ -1216,6 +1216,20 @@ "uri": "https://{canondata_backend}/1946324/427ab5a66851b154f8b0c33004a9e5ce6c7d6dbb/resource.tar.gz#test.test_select-autogen_columns_conflict-default.txt-Results_/results.txt" } ], + "test.test[select-prune_keys-default.txt-Debug]": [ + { + "checksum": "95e58e469ce10fce0d0d5e55c0cf3baf", + "size": 3292, + "uri": "https://{canondata_backend}/1931696/04008bc01ad4f562f8e03ad2bc296f7a64a78489/resource.tar.gz#test.test_select-prune_keys-default.txt-Debug_/opt.yql" + } + ], + "test.test[select-prune_keys-default.txt-Results]": [ + { + "checksum": "f53b6976b6a5e1ee5e1d331c25963930", + "size": 27670, + "uri": "https://{canondata_backend}/1931696/04008bc01ad4f562f8e03ad2bc296f7a64a78489/resource.tar.gz#test.test_select-prune_keys-default.txt-Results_/results.txt" + } + ], "test.test[union-union_positional_mix-default.txt-Debug]": [ { "checksum": "35839a396fdaff3806b0a6117135e8b7", diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json index 635dbdb17fb..be9dfbf6929 100644 --- a/yql/essentials/tests/sql/sql2yql/canondata/result.json +++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json @@ -6978,6 +6978,13 @@ "uri": "https://{canondata_backend}/1942173/99e88108149e222741552e7e6cddef041d6a2846/resource.tar.gz#test_sql2yql.test_select-null_check_/sql.yql" } ], + "test_sql2yql.test[select-prune_keys]": [ + { + "checksum": "55346f77548ef19f9a09d2f1d3f6f466", + "size": 17765, + "uri": "https://{canondata_backend}/1871182/906a4c4e540bb8746f8d7595500d4d1c9f664846/resource.tar.gz#test_sql2yql.test_select-prune_keys_/sql.yql" + } + ], "test_sql2yql.test[select-result_label]": [ { "checksum": "77393e1875cdffd6504971730f13b85c", @@ -10893,6 +10900,11 @@ "uri": "file://test_sql_format.test_select-null_check_/formatted.sql" } ], + "test_sql_format.test[select-prune_keys]": [ + { + "uri": "file://test_sql_format.test_select-prune_keys_/formatted.sql" + } + ], "test_sql_format.test[select-result_label]": [ { "uri": "file://test_sql_format.test_select-result_label_/formatted.sql" diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_select-prune_keys_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_select-prune_keys_/formatted.sql new file mode 100644 index 00000000000..65a0e7c3a79 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_select-prune_keys_/formatted.sql @@ -0,0 +1,97 @@ +/* postgres can not */ +PRAGMA warning('disable', '4510'); + +$id = ($x) -> { + RETURN $x; +}; + +SELECT + Yql::PruneAdjacentKeys(AsList(1, 1, 1, 2, 3, 3, 4, 5), $id) +; + +SELECT + Yql::PruneKeys(AsList(1, 1, 1, 1, 1, 1, 1), $id) +; + +SELECT + Yql::PruneAdjacentKeys([], $id) +; + +SELECT + Yql::PruneKeys([], $id) +; + +$mod2 = ($x) -> { + RETURN $x % 2; +}; + +SELECT + ListLength(Yql::PruneKeys(AsList(1, 1, 1, 3, 3, 3, 3), $mod2)) +; + +-- optimize tests +$get_a = ($x) -> { + RETURN <|a: $x.a|>; +}; + +SELECT + Yql::ExtractMembers(Yql::PruneKeys(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a), AsTuple(EvaluateAtom('a'))) +; + +SELECT + Yql::ExtractMembers(Yql::PruneAdjacentKeys(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a), AsTuple(EvaluateAtom('a'))) +; + +$get_a_b = ($x) -> { + RETURN <|a: $x.a, b: $x.b|>; +}; + +$prune_keys_result = Yql::PruneKeys(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a_b); + +SELECT + Yql::ExtractMembers($prune_keys_result, AsTuple(EvaluateAtom('a'))), + Yql::ExtractMembers($prune_keys_result, AsTuple(EvaluateAtom('b'))) +; + +$prune_adjacent_keys_result = Yql::PruneAdjacentKeys(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a_b); + +SELECT + Yql::ExtractMembers($prune_adjacent_keys_result, AsTuple(EvaluateAtom('a'))), + Yql::ExtractMembers($prune_adjacent_keys_result, AsTuple(EvaluateAtom('b'))) +; + +$get_a_bp1_list = ($x) -> { + RETURN AsList(<|a: $x.a, b: $x.b + 1|>); +}; + +SELECT + Yql::PruneKeys(Yql::FlatMap(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a_bp1_list), $get_a) +; + +SELECT + Yql::PruneKeys(Yql::FlatMap(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a_bp1_list), $get_a_b) +; + +SELECT + Yql::PruneAdjacentKeys(Yql::FlatMap(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a_bp1_list), $get_a) +; + +SELECT + Yql::PruneAdjacentKeys(Yql::FlatMap(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a_bp1_list), $get_a_b) +; + +SELECT + Yql::PruneKeys(Yql::PruneKeys(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a), $get_a) +; + +SELECT + Yql::PruneKeys(Yql::PruneKeys(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a_b), $get_a) +; + +SELECT + Yql::PruneKeys(Yql::PruneAdjacentKeys(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a), $get_a) +; + +SELECT + Yql::PruneAdjacentKeys(Yql::PruneAdjacentKeys(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a), $get_a) +; diff --git a/yql/essentials/tests/sql/suites/select/prune_keys.sql b/yql/essentials/tests/sql/suites/select/prune_keys.sql new file mode 100644 index 00000000000..9cf5cb3ced7 --- /dev/null +++ b/yql/essentials/tests/sql/suites/select/prune_keys.sql @@ -0,0 +1,39 @@ +/* postgres can not */ +pragma warning("disable", "4510"); + +$id = ($x) -> { RETURN $x; }; +SELECT Yql::PruneAdjacentKeys(AsList(1,1,1,2,3,3,4,5), $id); +SELECT Yql::PruneKeys(AsList(1,1,1,1,1,1,1), $id); + +SELECT Yql::PruneAdjacentKeys([], $id); +SELECT Yql::PruneKeys([], $id); + +$mod2 = ($x) -> { RETURN $x % 2; }; +SELECT ListLength(Yql::PruneKeys(AsList(1,1,1,3,3,3,3), $mod2)); + +-- optimize tests + +$get_a = ($x) -> { RETURN <|a:$x.a|>; }; +select Yql::ExtractMembers(Yql::PruneKeys(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a), AsTuple(EvaluateAtom('a'))); +select Yql::ExtractMembers(Yql::PruneAdjacentKeys(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a), AsTuple(EvaluateAtom('a'))); + + +$get_a_b = ($x) -> { RETURN <|a:$x.a, b:$x.b|>; }; +$prune_keys_result = Yql::PruneKeys(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a_b); +select Yql::ExtractMembers($prune_keys_result, AsTuple(EvaluateAtom('a'))), Yql::ExtractMembers($prune_keys_result, AsTuple(EvaluateAtom('b'))); + +$prune_adjacent_keys_result = Yql::PruneAdjacentKeys(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a_b); +select Yql::ExtractMembers($prune_adjacent_keys_result, AsTuple(EvaluateAtom('a'))), Yql::ExtractMembers($prune_adjacent_keys_result, AsTuple(EvaluateAtom('b'))); + + +$get_a_bp1_list = ($x) -> { RETURN AsList(<|a:$x.a, b:$x.b+1|>); }; +select Yql::PruneKeys(Yql::FlatMap(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a_bp1_list), $get_a); +select Yql::PruneKeys(Yql::FlatMap(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a_bp1_list), $get_a_b); +select Yql::PruneAdjacentKeys(Yql::FlatMap(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a_bp1_list), $get_a); +select Yql::PruneAdjacentKeys(Yql::FlatMap(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a_bp1_list), $get_a_b); + + +select Yql::PruneKeys(Yql::PruneKeys(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a), $get_a); +select Yql::PruneKeys(Yql::PruneKeys(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a_b), $get_a); +select Yql::PruneKeys(Yql::PruneAdjacentKeys(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a), $get_a); +select Yql::PruneAdjacentKeys(Yql::PruneAdjacentKeys(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a), $get_a); |