summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormpereskokova <[email protected]>2025-03-19 15:28:46 +0300
committermpereskokova <[email protected]>2025-03-19 15:47:24 +0300
commit95040611fee3b60e9fb35829f51822b8970d7d70 (patch)
tree26f0a5f502795661fa5ecf9849219c0e448c39e3
parent58b29f8c8934d54a748f4638ce45afcd376cf250 (diff)
Introduce PruneKeys Callables
commit_hash:ca25a6ece5ae902c4b93846f69bc33b6336a669c
-rw-r--r--yql/essentials/core/common_opt/yql_co_extr_members.cpp71
-rw-r--r--yql/essentials/core/common_opt/yql_co_extr_members.h2
-rw-r--r--yql/essentials/core/common_opt/yql_co_finalizers.cpp12
-rw-r--r--yql/essentials/core/common_opt/yql_co_flow1.cpp46
-rw-r--r--yql/essentials/core/common_opt/yql_co_flow2.cpp9
-rw-r--r--yql/essentials/core/common_opt/yql_co_simple2.cpp14
-rw-r--r--yql/essentials/core/expr_nodes/yql_expr_nodes.json18
-rw-r--r--yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp119
-rw-r--r--yql/essentials/core/type_ann/type_ann_core.cpp2
-rw-r--r--yql/essentials/core/type_ann/type_ann_list.cpp47
-rw-r--r--yql/essentials/core/type_ann/type_ann_list.h1
-rw-r--r--yql/essentials/core/yql_expr_constraint.cpp14
-rw-r--r--yql/essentials/tests/sql/minirun/part7/canondata/result.json14
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/result.json12
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_select-prune_keys_/formatted.sql97
-rw-r--r--yql/essentials/tests/sql/suites/select/prune_keys.sql39
16 files changed, 490 insertions, 27 deletions
diff --git a/yql/essentials/core/common_opt/yql_co_extr_members.cpp b/yql/essentials/core/common_opt/yql_co_extr_members.cpp
index c0706815dc0..55923cde936 100644
--- a/yql/essentials/core/common_opt/yql_co_extr_members.cpp
+++ b/yql/essentials/core/common_opt/yql_co_extr_members.cpp
@@ -176,42 +176,55 @@ TExprNode::TPtr ApplyExtractMembersToFilterNullMembers(const TExprNode::TPtr& no
.Done().Ptr();
}
-TExprNode::TPtr ApplyExtractMembersToSort(const TExprNode::TPtr& node, const TExprNode::TPtr& members, const TParentsMap& parentsMap, TExprContext& ctx, TStringBuf logSuffix) {
- TCoSortBase sort(node);
+TExprNode::TPtr ApplyExtractMembersToSortOrPruneKeys(const TExprNode::TPtr& node, const TExprNode::TPtr& members, const TParentsMap& parentsMap, TExprContext& ctx, TStringBuf logSuffix) {
+ auto nodeIsPruneKeys = node->IsCallable("PruneKeys") || node->IsCallable("PruneAdjacentKeys");
+ auto nodeIsSort = !nodeIsPruneKeys;
+ auto keyExtractorLambdaIndex = nodeIsSort ? 2 : 1;
+
+ TCoLambda keyExtractorLambda(node->ChildPtr(keyExtractorLambdaIndex));
+
TSet<TStringBuf> extractFields;
for (const auto& x : members->ChildrenList()) {
extractFields.emplace(x->Content());
}
- TSet<TStringBuf> sortKeys;
- bool fieldSubset = HaveFieldsSubset(sort.KeySelectorLambda().Body().Ptr(), sort.KeySelectorLambda().Args().Arg(0).Ref(), sortKeys, parentsMap);
+ TSet<TStringBuf> usedKeys;
+ bool fieldSubset = HaveFieldsSubset(keyExtractorLambda.Body().Ptr(), keyExtractorLambda.Args().Arg(0).Ref(), usedKeys, parentsMap);
bool allExist = true;
- if (!sortKeys.empty()) {
- for (const auto& key : sortKeys) {
+ if (!usedKeys.empty()) {
+ for (const auto& key : usedKeys) {
auto ret = extractFields.emplace(key);
if (ret.second) {
allExist = false;
}
}
}
- if (allExist && sortKeys.size() == extractFields.size()) {
+ if (allExist && usedKeys.size() == extractFields.size()) {
YQL_CLOG(DEBUG, Core) << "Force `fieldSubset` for ExtractMembers over " << node->Content();
fieldSubset = true;
}
if (fieldSubset && allExist) {
YQL_CLOG(DEBUG, Core) << "Move ExtractMembers over " << node->Content() << logSuffix;
- return ctx.Builder(sort.Pos())
+ auto result = ctx.Builder(node->Pos())
.Callable(node->Content())
- .Callable(0, TCoExtractMembers::CallableName())
- .Add(0, sort.Input().Ptr())
- .Add(1, members)
- .Seal()
- .Add(1, sort.SortDirections().Ptr())
- .Add(2, ctx.DeepCopyLambda(sort.KeySelectorLambda().Ref()))
.Seal()
.Build();
+
+ TExprNode::TListType children;
+ children.push_back(ctx.Builder(node->Pos())
+ .Callable(TCoExtractMembers::CallableName())
+ .Add(0, node->HeadPtr())
+ .Add(1, members)
+ .Seal()
+ .Build());
+ if (nodeIsSort) {
+ children.push_back(node->ChildPtr(1));
+ }
+ children.push_back(ctx.DeepCopyLambda(keyExtractorLambda.Ref()));
+
+ return ctx.ChangeChildren(*result, std::move(children));
}
else if (fieldSubset) {
- const auto structType = GetSeqItemType(*sort.Ref().GetTypeAnn()).Cast<TStructExprType>();
+ const auto structType = GetSeqItemType(node->GetTypeAnn())->Cast<TStructExprType>();
if (structType->GetSize() <= extractFields.size()) {
return {};
}
@@ -221,16 +234,26 @@ TExprNode::TPtr ApplyExtractMembersToSort(const TExprNode::TPtr& node, const TEx
totalExtracted.emplace_back(ctx.NewAtom(members->Pos(), field));
}
- return ctx.Builder(sort.Pos())
+ TExprNode::TListType children;
+ children.push_back(ctx.Builder(node->Pos())
+ .Callable(TCoExtractMembers::CallableName())
+ .Add(0, node->HeadPtr())
+ .Add(1, ctx.NewList(members->Pos(), std::move(totalExtracted)))
+ .Seal()
+ .Build());
+ if (nodeIsSort) {
+ children.push_back(node->ChildPtr(1));
+ }
+ children.push_back(ctx.DeepCopyLambda(keyExtractorLambda.Ref()));
+
+ auto internalPartOfExtractMembers = ctx.Builder(node->Pos())
+ .Callable(node->Content())
+ .Seal()
+ .Build();
+
+ return ctx.Builder(node->Pos())
.Callable(TCoExtractMembers::CallableName())
- .Callable(0, node->Content())
- .Callable(0, TCoExtractMembers::CallableName())
- .Add(0, sort.Input().Ptr())
- .Add(1, ctx.NewList(members->Pos(), std::move(totalExtracted)))
- .Seal()
- .Add(1, sort.SortDirections().Ptr())
- .Add(2, ctx.DeepCopyLambda(sort.KeySelectorLambda().Ref()))
- .Seal()
+ .Add(0, ctx.ChangeChildren(*internalPartOfExtractMembers, std::move(children)))
.Add(1, members)
.Seal()
.Build();
diff --git a/yql/essentials/core/common_opt/yql_co_extr_members.h b/yql/essentials/core/common_opt/yql_co_extr_members.h
index 6157cd7a284..467a7a9fff9 100644
--- a/yql/essentials/core/common_opt/yql_co_extr_members.h
+++ b/yql/essentials/core/common_opt/yql_co_extr_members.h
@@ -14,7 +14,7 @@ TExprNode::TPtr ApplyExtractMembersToSkip(const TExprNode::TPtr& node, const TEx
TExprNode::TPtr ApplyExtractMembersToExtend(const TExprNode::TPtr& node, const TExprNode::TPtr& members, TExprContext& ctx, TStringBuf logSuffix);
TExprNode::TPtr ApplyExtractMembersToSkipNullMembers(const TExprNode::TPtr& node, const TExprNode::TPtr& members, TExprContext& ctx, TStringBuf logSuffix);
TExprNode::TPtr ApplyExtractMembersToFilterNullMembers(const TExprNode::TPtr& node, const TExprNode::TPtr& members, TExprContext& ctx, TStringBuf logSuffix);
-TExprNode::TPtr ApplyExtractMembersToSort(const TExprNode::TPtr& node, const TExprNode::TPtr& members, const TParentsMap& parentsMap, TExprContext& ctx, TStringBuf logSuffix);
+TExprNode::TPtr ApplyExtractMembersToSortOrPruneKeys(const TExprNode::TPtr& node, const TExprNode::TPtr& members, const TParentsMap& parentsMap, TExprContext& ctx, TStringBuf logSuffix);
TExprNode::TPtr ApplyExtractMembersToAssumeUnique(const TExprNode::TPtr& node, const TExprNode::TPtr& members, TExprContext& ctx, TStringBuf logSuffix);
TExprNode::TPtr ApplyExtractMembersToTop(const TExprNode::TPtr& node, const TExprNode::TPtr& members, const TParentsMap& parentsMap, TExprContext& ctx, TStringBuf logSuffix);
TExprNode::TPtr ApplyExtractMembersToEquiJoin(const TExprNode::TPtr& node, const TExprNode::TPtr& members, TExprContext& ctx, TStringBuf logSuffix);
diff --git a/yql/essentials/core/common_opt/yql_co_finalizers.cpp b/yql/essentials/core/common_opt/yql_co_finalizers.cpp
index 6c9bc8036ff..c1fce215cf2 100644
--- a/yql/essentials/core/common_opt/yql_co_finalizers.cpp
+++ b/yql/essentials/core/common_opt/yql_co_finalizers.cpp
@@ -395,7 +395,17 @@ void RegisterCoFinalizers(TFinalizingOptimizerMap& map) {
}
OptimizeSubsetFieldsForNodeWithMultiUsage(node, *optCtx.ParentsMap, toOptimize, ctx,
[] (const TExprNode::TPtr& input, const TExprNode::TPtr& members, const TParentsMap& parentsMap, TExprContext& ctx) {
- return ApplyExtractMembersToSort(input, members, parentsMap, ctx, " with multi-usage");
+ return ApplyExtractMembersToSortOrPruneKeys(input, members, parentsMap, ctx, " with multi-usage");
+ }
+ );
+
+ return true;
+ };
+
+ map[TCoPruneKeys::CallableName()] = map[TCoPruneAdjacentKeys::CallableName()] = [](const TExprNode::TPtr& node, TNodeOnNodeOwnedMap& toOptimize, TExprContext& ctx, TOptimizeContext& optCtx) {
+ OptimizeSubsetFieldsForNodeWithMultiUsage(node, *optCtx.ParentsMap, toOptimize, ctx,
+ [] (const TExprNode::TPtr& input, const TExprNode::TPtr& members, const TParentsMap& parentsMap, TExprContext& ctx) {
+ return ApplyExtractMembersToSortOrPruneKeys(input, members, parentsMap, ctx, " with multi-usage");
}
);
diff --git a/yql/essentials/core/common_opt/yql_co_flow1.cpp b/yql/essentials/core/common_opt/yql_co_flow1.cpp
index 0bd3cc68dba..7bcfc55dbcd 100644
--- a/yql/essentials/core/common_opt/yql_co_flow1.cpp
+++ b/yql/essentials/core/common_opt/yql_co_flow1.cpp
@@ -1413,6 +1413,52 @@ TExprNode::TPtr OptimizeFlatMap(const TExprNode::TPtr& node, TExprContext& ctx,
void RegisterCoFlowCallables1(TCallableOptimizerMap& map) {
using namespace std::placeholders;
+ map["PruneKeys"] = map["PruneAdjacentKeys"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) {
+ if (!optCtx.IsSingleUsage(node->Head())) {
+ return node;
+ }
+
+ TCoPruneKeysBase pruneKeys(node);
+ TCoLambda keyExtractorLambda = pruneKeys.Extractor();
+ TSet<TStringBuf> columns;
+
+ if (!HaveFieldsSubset(keyExtractorLambda.Ref().Child(1), *keyExtractorLambda.Ref().Child(0)->Child(0), columns, *optCtx.ParentsMap)) {
+ return node;
+ }
+
+ if (auto maybeFlatmap = TExprBase(node->HeadPtr()).Maybe<TCoFlatMapBase>()) {
+ auto flatmap = maybeFlatmap.Cast();
+
+ auto checkAllPruneExtractorPassthroughLambda = [&columns](const TCoLambda& lambda) {
+ TMaybe<THashSet<TStringBuf>> passthroughFields;
+ if (IsPassthroughLambda(lambda, &passthroughFields) && passthroughFields) {
+ for (const auto& column : columns) {
+ if (!passthroughFields->contains(column)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+ };
+
+ if (checkAllPruneExtractorPassthroughLambda(flatmap.Lambda())) {
+ YQL_CLOG(DEBUG, Core) << node->Content() << " Over Flatmap";
+ return ctx.Builder(flatmap.Pos())
+ .Callable(flatmap.CallableName())
+ .Callable(0, pruneKeys.CallableName())
+ .Add(0, flatmap.Input().Ptr())
+ .Add(1, ctx.DeepCopyLambda(keyExtractorLambda.Ref()))
+ .Seal()
+ .Add(1, flatmap.Lambda().Ptr())
+ .Seal()
+ .Build();
+ }
+ }
+
+ return node;
+ };
+
map["FlatMap"] = std::bind(&OptimizeFlatMap<false>, _1, _2, _3);
map["OrderedFlatMap"] = std::bind(&OptimizeFlatMap<true>, _1, _2, _3);
diff --git a/yql/essentials/core/common_opt/yql_co_flow2.cpp b/yql/essentials/core/common_opt/yql_co_flow2.cpp
index 4435aea5dfe..610bde15c60 100644
--- a/yql/essentials/core/common_opt/yql_co_flow2.cpp
+++ b/yql/essentials/core/common_opt/yql_co_flow2.cpp
@@ -1881,7 +1881,14 @@ void RegisterCoFlowCallables2(TCallableOptimizerMap& map) {
}
if (self.Input().Maybe<TCoSortBase>()) {
- if (auto res = ApplyExtractMembersToSort(self.Input().Ptr(), self.Members().Ptr(), *optCtx.ParentsMap, ctx, {})) {
+ if (auto res = ApplyExtractMembersToSortOrPruneKeys(self.Input().Ptr(), self.Members().Ptr(), *optCtx.ParentsMap, ctx, {})) {
+ return res;
+ }
+ return node;
+ }
+
+ if (self.Input().Ptr()->IsCallable("PruneKeys") || self.Input().Ptr()->IsCallable("PruneAdjacentKeys")) {
+ if (auto res = ApplyExtractMembersToSortOrPruneKeys(self.Input().Ptr(), self.Members().Ptr(), *optCtx.ParentsMap, ctx, {})) {
return res;
}
return node;
diff --git a/yql/essentials/core/common_opt/yql_co_simple2.cpp b/yql/essentials/core/common_opt/yql_co_simple2.cpp
index 79b12c55dfc..2adcdb6f22e 100644
--- a/yql/essentials/core/common_opt/yql_co_simple2.cpp
+++ b/yql/essentials/core/common_opt/yql_co_simple2.cpp
@@ -948,6 +948,20 @@ void RegisterCoSimpleCallables2(TCallableOptimizerMap& map) {
};
map["PgGrouping"] = ExpandPgGrouping;
+
+ map["PruneKeys"] = map["PruneAdjacentKeys"] = [](const TExprNode::TPtr& node, TExprContext& /*ctx*/, TOptimizeContext&) {
+ TCoPruneKeysBase pruneKeys(node);
+
+ if (node->Content() == pruneKeys.Input().Ref().Content()) {
+ auto pruneKeysInput = pruneKeys.Input().Cast<TCoPruneKeysBase>();
+ if (&pruneKeys.Extractor().Ref() == &pruneKeysInput.Extractor().Ref()) {
+ YQL_CLOG(DEBUG, Core) << node->Content() << " Over " << pruneKeys.Input().Ref().Content();
+ return node->HeadPtr();
+ }
+ }
+
+ return node;
+ };
}
}
diff --git a/yql/essentials/core/expr_nodes/yql_expr_nodes.json b/yql/essentials/core/expr_nodes/yql_expr_nodes.json
index 77b74f1390f..7eb01b6114c 100644
--- a/yql/essentials/core/expr_nodes/yql_expr_nodes.json
+++ b/yql/essentials/core/expr_nodes/yql_expr_nodes.json
@@ -331,6 +331,24 @@
]
},
{
+ "Name": "TCoPruneKeysBase",
+ "Base": "TCoInputBase",
+ "Match": {"Type": "CallableBase"},
+ "Children": [
+ {"Index": 1, "Name": "Extractor", "Type": "TCoLambda"}
+ ]
+ },
+ {
+ "Name": "TCoPruneKeys",
+ "Base": "TCoPruneKeysBase",
+ "Match": {"Type": "Callable", "Name": "PruneKeys"}
+ },
+ {
+ "Name": "TCoPruneAdjacentKeys",
+ "Base": "TCoPruneKeysBase",
+ "Match": {"Type": "Callable", "Name": "PruneAdjacentKeys"}
+ },
+ {
"Name": "TCoFlatMapBase",
"Base": "TCoInputBase",
"Match": {"Type": "CallableBase"},
diff --git a/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp b/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp
index 91b8945a83c..9cf52f84e79 100644
--- a/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp
+++ b/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp
@@ -2689,6 +2689,123 @@ TExprNode::TPtr ExpandListHas(const TExprNode::TPtr& input, TExprContext& ctx) {
return RewriteSearchByKeyForTypesMismatch<true, true>(input, ctx);
}
+TExprNode::TPtr ExpandPruneAdjacentKeys(const TExprNode::TPtr& input, TExprContext& ctx) {
+ const auto type = input->Head().GetTypeAnn();
+ const auto& keyExtractorLambda = input->ChildRef(1);
+
+ YQL_ENSURE(type->GetKind() == ETypeAnnotationKind::List || type->GetKind() == ETypeAnnotationKind::Stream);
+
+ const auto elemType = type->GetKind() == ETypeAnnotationKind::List
+ ? type->Cast<TListExprType>()->GetItemType()
+ : type->Cast<TStreamExprType>()->GetItemType();
+ const auto optionalElemType = *ctx.MakeType<TOptionalExprType>(elemType);
+
+ YQL_CLOG(DEBUG, CorePeepHole) << "Expand " << input->Content();
+ return ctx.Builder(input->Pos())
+ .Callable("OrderedFlatMap")
+ .Callable(0, "Fold1Map")
+ .Add(0, input->HeadPtr())
+ .Lambda(1)
+ .Param("item")
+ .List(0)
+ .Callable(0, "Just")
+ .Arg(0, "item")
+ .Seal()
+ .Arg(1, "item")
+ .Seal()
+ .Seal()
+ .Lambda(2)
+ .Param("item")
+ .Param("state")
+ .List(0)
+ .Callable(0, "If")
+ .Callable(0, "AggrEquals")
+ .Apply(0, keyExtractorLambda)
+ .With(0, "item")
+ .Seal()
+ .Apply(1, keyExtractorLambda)
+ .With(0, "state")
+ .Seal()
+ .Seal()
+ .Callable(1, "Nothing")
+ .Add(0, ExpandType(input->Pos(), optionalElemType, ctx))
+ .Seal()
+ .Callable(2, "Just")
+ .Arg(0, "item")
+ .Seal()
+ .Seal()
+ .Arg(1, "item")
+ .Seal()
+ .Seal()
+ .Seal()
+ .Lambda(1)
+ .Param("item")
+ .Arg(0, "item")
+ .Seal()
+ .Seal()
+ .Build();
+}
+
+TExprNode::TPtr ExpandPruneKeys(const TExprNode::TPtr& input, TExprContext& ctx) {
+ const auto type = input->Head().GetTypeAnn();
+ const auto& keyExtractorLambda = input->ChildRef(1);
+ YQL_ENSURE(type->GetKind() == ETypeAnnotationKind::List || type->GetKind() == ETypeAnnotationKind::Stream);
+
+ auto initHandler = ctx.Builder(input->Pos())
+ .Lambda()
+ .Param("key")
+ .Param("item")
+ .Arg(0, "item")
+ .Seal()
+ .Build();
+ auto updateHandler = ctx.Builder(input->Pos())
+ .Lambda()
+ .Param("key")
+ .Param("item")
+ .Param("state")
+ .Arg(0, "state")
+ .Seal()
+ .Build();
+ auto finishHandler = ctx.Builder(input->Pos())
+ .Lambda()
+ .Param("key")
+ .Param("state")
+ .Callable(0, "Just")
+ .Arg(0, "state")
+ .Seal()
+ .Seal()
+ .Build();
+
+ YQL_CLOG(DEBUG, CorePeepHole) << "Expand " << input->Content();
+ if (type->GetKind() == ETypeAnnotationKind::List) {
+ return ctx.Builder(input->Pos())
+ .Callable("CombineByKey")
+ .Add(0, input->HeadPtr())
+ .Lambda(1) // preMap
+ .Param("item")
+ .Callable(0, "Just")
+ .Arg(0, "item")
+ .Seal()
+ .Seal()
+ .Add(2, keyExtractorLambda)
+ .Add(3, initHandler)
+ .Add(4, updateHandler)
+ .Add(5, finishHandler)
+ .Seal()
+ .Build();
+ } else {
+ return ctx.Builder(input->Pos())
+ .Callable("CombineCore")
+ .Add(0, input->HeadPtr())
+ .Add(1, keyExtractorLambda)
+ .Add(2, initHandler)
+ .Add(3, updateHandler)
+ .Add(4, finishHandler)
+ .Seal()
+ .Build();
+ }
+}
+
TExprNode::TPtr ExpandPgArrayOp(const TExprNode::TPtr& input, TExprContext& ctx) {
const bool all = input->Content() == "PgAllResolvedOp";
auto list = ctx.Builder(input->Pos())
@@ -8675,6 +8792,8 @@ struct TPeepHoleRules {
{"CheckedDiv", &ExpandCheckedDiv},
{"CheckedMod", &ExpandCheckedMod},
{"CheckedMinus", &ExpandCheckedMinus},
+ {"PruneAdjacentKeys", &ExpandPruneAdjacentKeys},
+ {"PruneKeys", &ExpandPruneKeys},
{"JsonValue", &ExpandJsonValue},
{"JsonExists", &ExpandJsonExists},
{"EmptyIterator", &DropDependsOnFromEmptyIterator},
diff --git a/yql/essentials/core/type_ann/type_ann_core.cpp b/yql/essentials/core/type_ann/type_ann_core.cpp
index 665378e19f0..75e07ab3a72 100644
--- a/yql/essentials/core/type_ann/type_ann_core.cpp
+++ b/yql/essentials/core/type_ann/type_ann_core.cpp
@@ -12915,6 +12915,8 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
Functions["WithOptionalArgs"] = &WithOptionalArgsWrapper;
Functions["WithContext"] = &WithContextWrapper;
Functions["EmptyFrom"] = &EmptyFromWrapper;
+ Functions["PruneAdjacentKeys"] = &PruneKeysWrapper;
+ Functions["PruneKeys"] = &PruneKeysWrapper;
Functions["DecimalDiv"] = &DecimalBinaryWrapper;
Functions["DecimalMod"] = &DecimalBinaryWrapper;
diff --git a/yql/essentials/core/type_ann/type_ann_list.cpp b/yql/essentials/core/type_ann/type_ann_list.cpp
index cb225d30d72..8601c606501 100644
--- a/yql/essentials/core/type_ann/type_ann_list.cpp
+++ b/yql/essentials/core/type_ann/type_ann_list.cpp
@@ -3081,6 +3081,53 @@ namespace {
return ListAutomapArgs(input, output, ctx, "ZipAll");
}
+ IGraphTransformer::TStatus PruneKeysWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) {
+ if (!EnsureArgsCount(*input, 2, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ if (IsNull(input->Head())) {
+ output = input->HeadPtr();
+ return IGraphTransformer::TStatus::Repeat;
+ }
+
+ if (IsEmptyList(input->Head())) {
+ output = input->HeadPtr();
+ return IGraphTransformer::TStatus::Repeat;
+ }
+
+ const TTypeAnnotationNode* itemType = nullptr;
+ if (!EnsureNewSeqType<false, true, true>(input->Head(), ctx.Expr, &itemType)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ auto& keyExtractorLambda = input->ChildRef(1);
+ const auto status = ConvertToLambda(keyExtractorLambda, ctx.Expr, 1);
+ if (status.Level != IGraphTransformer::TStatus::Ok) {
+ return status;
+ }
+
+ if (!UpdateLambdaAllArgumentsTypes(keyExtractorLambda, {itemType}, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ if (!keyExtractorLambda->GetTypeAnn()) {
+ return IGraphTransformer::TStatus::Repeat;
+ }
+
+ if (input->IsCallable("PruneKeys") &&
+ !EnsureHashableKey(keyExtractorLambda->Pos(), keyExtractorLambda->GetTypeAnn(), ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ if (!EnsureEquatableKey(keyExtractorLambda->Pos(), keyExtractorLambda->GetTypeAnn(), ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ input->SetTypeAnn(input->Head().GetTypeAnn());
+ return IGraphTransformer::TStatus::Ok;
+ }
+
bool ValidateSortDirections(TExprNode& direction, TExprContext& ctx, bool& isTuple) {
bool isOkAscending = false;
diff --git a/yql/essentials/core/type_ann/type_ann_list.h b/yql/essentials/core/type_ann/type_ann_list.h
index 7071b67461c..abcc3b5ae1b 100644
--- a/yql/essentials/core/type_ann/type_ann_list.h
+++ b/yql/essentials/core/type_ann/type_ann_list.h
@@ -70,6 +70,7 @@ namespace NTypeAnnImpl {
IGraphTransformer::TStatus ListUnionAllWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
IGraphTransformer::TStatus ListZipWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
IGraphTransformer::TStatus ListZipAllWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
+ IGraphTransformer::TStatus PruneKeysWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
IGraphTransformer::TStatus SortWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
IGraphTransformer::TStatus TopWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
IGraphTransformer::TStatus KeepTopWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
diff --git a/yql/essentials/core/yql_expr_constraint.cpp b/yql/essentials/core/yql_expr_constraint.cpp
index 7921e31f97e..32cba8f9d28 100644
--- a/yql/essentials/core/yql_expr_constraint.cpp
+++ b/yql/essentials/core/yql_expr_constraint.cpp
@@ -112,6 +112,8 @@ public:
Functions["ToStream"] = &TCallableConstraintTransformer::CopyAllFrom<0>;
Functions["ToSequence"] = &TCallableConstraintTransformer::CopyAllFrom<0>;
Functions["Collect"] = &TCallableConstraintTransformer::CopyAllFrom<0>;
+ Functions["PruneAdjacentKeys"] = &TCallableConstraintTransformer::PruneKeysWrap<true>;
+ Functions["PruneKeys"] = &TCallableConstraintTransformer::PruneKeysWrap<false>;
Functions["FilterNullMembers"] = &TCallableConstraintTransformer::FromFirst<TSortedConstraintNode, TPartOfSortedConstraintNode, TChoppedConstraintNode, TPartOfChoppedConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode, TVarIndexConstraintNode>;
Functions["SkipNullMembers"] = &TCallableConstraintTransformer::FromFirst<TSortedConstraintNode, TPartOfSortedConstraintNode, TChoppedConstraintNode, TPartOfChoppedConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode, TVarIndexConstraintNode>;
Functions["FilterNullElements"] = &TCallableConstraintTransformer::FromFirst<TSortedConstraintNode, TPartOfSortedConstraintNode, TChoppedConstraintNode, TPartOfChoppedConstraintNode, TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode, TVarIndexConstraintNode>;
@@ -805,6 +807,18 @@ private:
return FromFirst<TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode, TVarIndexConstraintNode, TMultiConstraintNode>(input, output, ctx);
}
+ template <bool Adjacent>
+ TStatus PruneKeysWrap(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) const {
+ if (const auto status = UpdateLambdaConstraints(*input->Child(1)); status != TStatus::Ok) {
+ return status;
+ }
+
+ if constexpr (Adjacent) {
+ return CopyAllFrom<0>(input, output, ctx);
+ }
+ return FromFirst<TEmptyConstraintNode, TUniqueConstraintNode, TPartOfUniqueConstraintNode, TDistinctConstraintNode, TPartOfDistinctConstraintNode>(input, output, ctx);
+ }
+
template<class TConstraint>
static const TConstraint* GetConstraintFromWideResultLambda(const TExprNode& lambda, TExprContext& ctx);
diff --git a/yql/essentials/tests/sql/minirun/part7/canondata/result.json b/yql/essentials/tests/sql/minirun/part7/canondata/result.json
index 587366b57a3..1ed6dbeb712 100644
--- a/yql/essentials/tests/sql/minirun/part7/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part7/canondata/result.json
@@ -1216,6 +1216,20 @@
"uri": "https://{canondata_backend}/1946324/427ab5a66851b154f8b0c33004a9e5ce6c7d6dbb/resource.tar.gz#test.test_select-autogen_columns_conflict-default.txt-Results_/results.txt"
}
],
+ "test.test[select-prune_keys-default.txt-Debug]": [
+ {
+ "checksum": "95e58e469ce10fce0d0d5e55c0cf3baf",
+ "size": 3292,
+ "uri": "https://{canondata_backend}/1931696/04008bc01ad4f562f8e03ad2bc296f7a64a78489/resource.tar.gz#test.test_select-prune_keys-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[select-prune_keys-default.txt-Results]": [
+ {
+ "checksum": "f53b6976b6a5e1ee5e1d331c25963930",
+ "size": 27670,
+ "uri": "https://{canondata_backend}/1931696/04008bc01ad4f562f8e03ad2bc296f7a64a78489/resource.tar.gz#test.test_select-prune_keys-default.txt-Results_/results.txt"
+ }
+ ],
"test.test[union-union_positional_mix-default.txt-Debug]": [
{
"checksum": "35839a396fdaff3806b0a6117135e8b7",
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json
index 635dbdb17fb..be9dfbf6929 100644
--- a/yql/essentials/tests/sql/sql2yql/canondata/result.json
+++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json
@@ -6978,6 +6978,13 @@
"uri": "https://{canondata_backend}/1942173/99e88108149e222741552e7e6cddef041d6a2846/resource.tar.gz#test_sql2yql.test_select-null_check_/sql.yql"
}
],
+ "test_sql2yql.test[select-prune_keys]": [
+ {
+ "checksum": "55346f77548ef19f9a09d2f1d3f6f466",
+ "size": 17765,
+ "uri": "https://{canondata_backend}/1871182/906a4c4e540bb8746f8d7595500d4d1c9f664846/resource.tar.gz#test_sql2yql.test_select-prune_keys_/sql.yql"
+ }
+ ],
"test_sql2yql.test[select-result_label]": [
{
"checksum": "77393e1875cdffd6504971730f13b85c",
@@ -10893,6 +10900,11 @@
"uri": "file://test_sql_format.test_select-null_check_/formatted.sql"
}
],
+ "test_sql_format.test[select-prune_keys]": [
+ {
+ "uri": "file://test_sql_format.test_select-prune_keys_/formatted.sql"
+ }
+ ],
"test_sql_format.test[select-result_label]": [
{
"uri": "file://test_sql_format.test_select-result_label_/formatted.sql"
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_select-prune_keys_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_select-prune_keys_/formatted.sql
new file mode 100644
index 00000000000..65a0e7c3a79
--- /dev/null
+++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_select-prune_keys_/formatted.sql
@@ -0,0 +1,97 @@
+/* postgres can not */
+PRAGMA warning('disable', '4510');
+
+$id = ($x) -> {
+ RETURN $x;
+};
+
+SELECT
+ Yql::PruneAdjacentKeys(AsList(1, 1, 1, 2, 3, 3, 4, 5), $id)
+;
+
+SELECT
+ Yql::PruneKeys(AsList(1, 1, 1, 1, 1, 1, 1), $id)
+;
+
+SELECT
+ Yql::PruneAdjacentKeys([], $id)
+;
+
+SELECT
+ Yql::PruneKeys([], $id)
+;
+
+$mod2 = ($x) -> {
+ RETURN $x % 2;
+};
+
+SELECT
+ ListLength(Yql::PruneKeys(AsList(1, 1, 1, 3, 3, 3, 3), $mod2))
+;
+
+-- optimize tests
+$get_a = ($x) -> {
+ RETURN <|a: $x.a|>;
+};
+
+SELECT
+ Yql::ExtractMembers(Yql::PruneKeys(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a), AsTuple(EvaluateAtom('a')))
+;
+
+SELECT
+ Yql::ExtractMembers(Yql::PruneAdjacentKeys(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a), AsTuple(EvaluateAtom('a')))
+;
+
+$get_a_b = ($x) -> {
+ RETURN <|a: $x.a, b: $x.b|>;
+};
+
+$prune_keys_result = Yql::PruneKeys(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a_b);
+
+SELECT
+ Yql::ExtractMembers($prune_keys_result, AsTuple(EvaluateAtom('a'))),
+ Yql::ExtractMembers($prune_keys_result, AsTuple(EvaluateAtom('b')))
+;
+
+$prune_adjacent_keys_result = Yql::PruneAdjacentKeys(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a_b);
+
+SELECT
+ Yql::ExtractMembers($prune_adjacent_keys_result, AsTuple(EvaluateAtom('a'))),
+ Yql::ExtractMembers($prune_adjacent_keys_result, AsTuple(EvaluateAtom('b')))
+;
+
+$get_a_bp1_list = ($x) -> {
+ RETURN AsList(<|a: $x.a, b: $x.b + 1|>);
+};
+
+SELECT
+ Yql::PruneKeys(Yql::FlatMap(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a_bp1_list), $get_a)
+;
+
+SELECT
+ Yql::PruneKeys(Yql::FlatMap(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a_bp1_list), $get_a_b)
+;
+
+SELECT
+ Yql::PruneAdjacentKeys(Yql::FlatMap(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a_bp1_list), $get_a)
+;
+
+SELECT
+ Yql::PruneAdjacentKeys(Yql::FlatMap(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a_bp1_list), $get_a_b)
+;
+
+SELECT
+ Yql::PruneKeys(Yql::PruneKeys(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a), $get_a)
+;
+
+SELECT
+ Yql::PruneKeys(Yql::PruneKeys(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a_b), $get_a)
+;
+
+SELECT
+ Yql::PruneKeys(Yql::PruneAdjacentKeys(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a), $get_a)
+;
+
+SELECT
+ Yql::PruneAdjacentKeys(Yql::PruneAdjacentKeys(AsList(<|a: 1, b: 2, c: 3|>, <|a: 1, b: 3, c: 4|>), $get_a), $get_a)
+;
diff --git a/yql/essentials/tests/sql/suites/select/prune_keys.sql b/yql/essentials/tests/sql/suites/select/prune_keys.sql
new file mode 100644
index 00000000000..9cf5cb3ced7
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/select/prune_keys.sql
@@ -0,0 +1,39 @@
+/* postgres can not */
+pragma warning("disable", "4510");
+
+$id = ($x) -> { RETURN $x; };
+SELECT Yql::PruneAdjacentKeys(AsList(1,1,1,2,3,3,4,5), $id);
+SELECT Yql::PruneKeys(AsList(1,1,1,1,1,1,1), $id);
+
+SELECT Yql::PruneAdjacentKeys([], $id);
+SELECT Yql::PruneKeys([], $id);
+
+$mod2 = ($x) -> { RETURN $x % 2; };
+SELECT ListLength(Yql::PruneKeys(AsList(1,1,1,3,3,3,3), $mod2));
+
+-- optimize tests
+
+$get_a = ($x) -> { RETURN <|a:$x.a|>; };
+select Yql::ExtractMembers(Yql::PruneKeys(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a), AsTuple(EvaluateAtom('a')));
+select Yql::ExtractMembers(Yql::PruneAdjacentKeys(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a), AsTuple(EvaluateAtom('a')));
+
+
+$get_a_b = ($x) -> { RETURN <|a:$x.a, b:$x.b|>; };
+$prune_keys_result = Yql::PruneKeys(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a_b);
+select Yql::ExtractMembers($prune_keys_result, AsTuple(EvaluateAtom('a'))), Yql::ExtractMembers($prune_keys_result, AsTuple(EvaluateAtom('b')));
+
+$prune_adjacent_keys_result = Yql::PruneAdjacentKeys(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a_b);
+select Yql::ExtractMembers($prune_adjacent_keys_result, AsTuple(EvaluateAtom('a'))), Yql::ExtractMembers($prune_adjacent_keys_result, AsTuple(EvaluateAtom('b')));
+
+
+$get_a_bp1_list = ($x) -> { RETURN AsList(<|a:$x.a, b:$x.b+1|>); };
+select Yql::PruneKeys(Yql::FlatMap(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a_bp1_list), $get_a);
+select Yql::PruneKeys(Yql::FlatMap(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a_bp1_list), $get_a_b);
+select Yql::PruneAdjacentKeys(Yql::FlatMap(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a_bp1_list), $get_a);
+select Yql::PruneAdjacentKeys(Yql::FlatMap(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a_bp1_list), $get_a_b);
+
+
+select Yql::PruneKeys(Yql::PruneKeys(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a), $get_a);
+select Yql::PruneKeys(Yql::PruneKeys(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a_b), $get_a);
+select Yql::PruneKeys(Yql::PruneAdjacentKeys(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a), $get_a);
+select Yql::PruneAdjacentKeys(Yql::PruneAdjacentKeys(AsList(<|a:1, b:2, c:3|>, <|a:1, b:3, c:4|>), $get_a), $get_a);