summaryrefslogtreecommitdiffstats
path: root/yql/essentials
diff options
context:
space:
mode:
authormpereskokova <[email protected]>2025-06-18 13:04:12 +0300
committermpereskokova <[email protected]>2025-06-18 14:53:20 +0300
commit1264f781e9428ca95c3daf0b69b43f1ce956ab34 (patch)
tree3162d283606c412ab3ee5e63e5e9d2835eb256a8 /yql/essentials
parent61f8ee251577b9e79f673dc7bacc1deb5b23048d (diff)
Add PruneKeys in YT opt
commit_hash:b12d341458bb39ffb6b4a4d7a99c3ef25a417ca5
Diffstat (limited to 'yql/essentials')
-rw-r--r--yql/essentials/core/common_opt/yql_co_flow2.cpp45
-rw-r--r--yql/essentials/core/common_opt/yql_co_simple1.cpp2
-rw-r--r--yql/essentials/core/yql_opt_utils.cpp55
-rw-r--r--yql/essentials/core/yql_opt_utils.h4
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/result.json7
5 files changed, 75 insertions, 38 deletions
diff --git a/yql/essentials/core/common_opt/yql_co_flow2.cpp b/yql/essentials/core/common_opt/yql_co_flow2.cpp
index c0ce99b8df3..16ba67da460 100644
--- a/yql/essentials/core/common_opt/yql_co_flow2.cpp
+++ b/yql/essentials/core/common_opt/yql_co_flow2.cpp
@@ -2097,56 +2097,27 @@ void RegisterCoFlowCallables2(TCallableOptimizerMap& map) {
continue;
}
+ THashSet<TString> columns;
auto itemNames = columnsForPruneKeysExtractor.find(scope.Ref().Content());
if (itemNames == columnsForPruneKeysExtractor.end() || itemNames->second.empty()) {
children.push_back(equiJoin.Arg(i).Ptr());
continue;
}
-
- if (auto distinct = list.Ref().GetConstraint<TDistinctConstraintNode>()) {
- if (distinct->ContainsCompleteSet(std::vector<std::string_view>(itemNames->second.cbegin(), itemNames->second.cend()))) {
- children.push_back(equiJoin.Arg(i).Ptr());
- continue;
- }
+ for (const auto& elem : itemNames->second) {
+ columns.insert(TString(elem));
}
- bool isOrdered = false;
- if (auto sorted = list.Ref().GetConstraint<TSortedConstraintNode>()) {
- for (const auto& item : sorted->GetContent()) {
- size_t foundItemNamesCount = 0;
- for (const auto& path : item.first) {
- if (itemNames->second.contains(path.front())) {
- foundItemNamesCount++;
- }
- }
- if (foundItemNamesCount == itemNames->second.size()) {
- isOrdered = true;
- break;
- }
- }
+ if (IsAlreadyDistinct(list.Ref(), columns)) {
+ children.push_back(equiJoin.Arg(i).Ptr());
+ continue;
}
-
- auto pruneKeysCallable = isOrdered ? "PruneAdjacentKeys" : "PruneKeys";
+ auto pruneKeysCallable = IsOrdered(list.Ref(), columns) ? "PruneAdjacentKeys" : "PruneKeys";
YQL_CLOG(DEBUG, Core) << "Add " << pruneKeysCallable << " to EquiJoin input #" << i << ", label " << scope.Ref().Content();
children.push_back(ctx.Builder(child.Pos())
.List()
.Callable(0, pruneKeysCallable)
.Add(0, list.Ptr())
- .Lambda(1)
- .Param("item")
- .List(0)
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder & {
- ui32 i = 0;
- for (const auto& column : itemNames->second) {
- parent.Callable(i++, "Member")
- .Arg(0, "item")
- .Atom(1, column)
- .Seal();
- }
- return parent;
- })
- .Seal()
- .Seal()
+ .Add(1, MakePruneKeysExtractorLambda(child.Ref(), columns, ctx))
.Seal()
.Add(1, scope.Ptr())
.Seal()
diff --git a/yql/essentials/core/common_opt/yql_co_simple1.cpp b/yql/essentials/core/common_opt/yql_co_simple1.cpp
index 0df3c0746ef..f2573982d40 100644
--- a/yql/essentials/core/common_opt/yql_co_simple1.cpp
+++ b/yql/essentials/core/common_opt/yql_co_simple1.cpp
@@ -6225,7 +6225,7 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) {
};
map["Unordered"] = map["UnorderedSubquery"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) {
- if (node->Head().IsCallable({"AsList","EquiJoin","Filter","Map","FlatMap","MultiMap","Extend", "Apply","PartitionByKey","PartitionsByKeys"})) {
+ if (node->Head().IsCallable({"AsList","EquiJoin","Filter","Map","FlatMap","MultiMap","Extend", "Apply","PartitionByKey","PartitionsByKeys","PruneKeys"})) {
YQL_CLOG(DEBUG, Core) << "Drop " << node->Content() << " over " << node->Head().Content();
return node->HeadPtr();
}
diff --git a/yql/essentials/core/yql_opt_utils.cpp b/yql/essentials/core/yql_opt_utils.cpp
index b07cc6d41e5..76cc1a51500 100644
--- a/yql/essentials/core/yql_opt_utils.cpp
+++ b/yql/essentials/core/yql_opt_utils.cpp
@@ -359,6 +359,61 @@ bool IsNoPush(const TExprNode& node) {
return node.IsCallable({"NoPush", "Likely"});
}
+bool IsAlreadyDistinct(const TExprNode& node, const THashSet<TString>& columns) {
+ if (auto distinct = node.GetConstraint<TDistinctConstraintNode>()) {
+ if (distinct->ContainsCompleteSet(std::vector<std::string_view>(columns.cbegin(), columns.cend()))) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool IsOrdered(const TExprNode& node, const THashSet<TString>& columns) {
+ if (auto sorted = node.GetConstraint<TSortedConstraintNode>()) {
+ for (const auto& item : sorted->GetContent()) {
+ size_t foundItemNamesCount = 0;
+ bool found = false;
+ for (const auto& path : item.first) {
+ if (path.size() == 1 && columns.contains(path.front())) {
+ foundItemNamesCount++;
+ found = true;
+ break;
+ }
+ }
+ if (foundItemNamesCount == columns.size()) {
+ return true;
+ }
+
+ // Required columns are not sorted by prefix.
+ if (!found) {
+ break;
+ }
+ }
+ }
+
+ return false;
+}
+
+TExprNode::TPtr MakePruneKeysExtractorLambda(const TExprNode& node, const THashSet<TString>& columns, TExprContext& ctx) {
+ return ctx.Builder(node.Pos())
+ .Lambda()
+ .Param("item")
+ .List(0)
+ .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder & {
+ ui32 i = 0;
+ for (const auto& column : columns) {
+ parent.Callable(i++, "Member")
+ .Arg(0, "item")
+ .Atom(1, column)
+ .Seal();
+ }
+ return parent;
+ })
+ .Seal()
+ .Seal()
+ .Build();
+}
+
TExprNode::TPtr KeepColumnOrder(const TExprNode::TPtr& node, const TExprNode& src, TExprContext& ctx, const TTypeAnnotationContext& typeCtx) {
auto columnOrder = typeCtx.LookupColumnOrder(src);
if (!columnOrder) {
diff --git a/yql/essentials/core/yql_opt_utils.h b/yql/essentials/core/yql_opt_utils.h
index 196847b63d1..3acfe0f9739 100644
--- a/yql/essentials/core/yql_opt_utils.h
+++ b/yql/essentials/core/yql_opt_utils.h
@@ -28,6 +28,10 @@ bool IsPassthroughLambda(const NNodes::TCoLambda& lambda, TMaybe<THashSet<TStrin
bool IsTablePropsDependent(const TExprNode& node);
bool IsNoPush(const TExprNode& node);
+bool IsAlreadyDistinct(const TExprNode& node, const THashSet<TString>& columns);
+bool IsOrdered(const TExprNode& node, const THashSet<TString>& columns);
+TExprNode::TPtr MakePruneKeysExtractorLambda(const TExprNode& node, const THashSet<TString>& columns, TExprContext& ctx);
+
bool HasOnlyOneJoinType(const TExprNode& joinTree, TStringBuf joinType);
TExprNode::TPtr KeepColumnOrder(const TExprNode::TPtr& node, const TExprNode& src, TExprContext& ctx, const TTypeAnnotationContext& typeCtx);
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json
index 029d804ed93..8eb7676357f 100644
--- a/yql/essentials/tests/sql/sql2yql/canondata/result.json
+++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json
@@ -4003,6 +4003,13 @@
"uri": "https://{canondata_backend}/1942525/94a477066ea16f69d4848bbe524485fc029978b8/resource.tar.gz#test_sql2yql.test_join-prune_keys_YQL-19979_/sql.yql"
}
],
+ "test_sql2yql.test[join-prune_keys_YQL-19979]": [
+ {
+ "checksum": "0dad5d395f90148805e893a30f0b4963",
+ "size": 3845,
+ "uri": "https://{canondata_backend}/1942525/94a477066ea16f69d4848bbe524485fc029978b8/resource.tar.gz#test_sql2yql.test_join-prune_keys_YQL-19979_/sql.yql"
+ }
+ ],
"test_sql2yql.test[join-yql-19192]": [
{
"checksum": "fffdf1cbb40643da9daf9bdf3edec121",