diff options
author | mpereskokova <[email protected]> | 2025-06-18 13:04:12 +0300 |
---|---|---|
committer | mpereskokova <[email protected]> | 2025-06-18 14:53:20 +0300 |
commit | 1264f781e9428ca95c3daf0b69b43f1ce956ab34 (patch) | |
tree | 3162d283606c412ab3ee5e63e5e9d2835eb256a8 /yql/essentials | |
parent | 61f8ee251577b9e79f673dc7bacc1deb5b23048d (diff) |
Add PruneKeys in YT opt
commit_hash:b12d341458bb39ffb6b4a4d7a99c3ef25a417ca5
Diffstat (limited to 'yql/essentials')
-rw-r--r-- | yql/essentials/core/common_opt/yql_co_flow2.cpp | 45 | ||||
-rw-r--r-- | yql/essentials/core/common_opt/yql_co_simple1.cpp | 2 | ||||
-rw-r--r-- | yql/essentials/core/yql_opt_utils.cpp | 55 | ||||
-rw-r--r-- | yql/essentials/core/yql_opt_utils.h | 4 | ||||
-rw-r--r-- | yql/essentials/tests/sql/sql2yql/canondata/result.json | 7 |
5 files changed, 75 insertions, 38 deletions
diff --git a/yql/essentials/core/common_opt/yql_co_flow2.cpp b/yql/essentials/core/common_opt/yql_co_flow2.cpp index c0ce99b8df3..16ba67da460 100644 --- a/yql/essentials/core/common_opt/yql_co_flow2.cpp +++ b/yql/essentials/core/common_opt/yql_co_flow2.cpp @@ -2097,56 +2097,27 @@ void RegisterCoFlowCallables2(TCallableOptimizerMap& map) { continue; } + THashSet<TString> columns; auto itemNames = columnsForPruneKeysExtractor.find(scope.Ref().Content()); if (itemNames == columnsForPruneKeysExtractor.end() || itemNames->second.empty()) { children.push_back(equiJoin.Arg(i).Ptr()); continue; } - - if (auto distinct = list.Ref().GetConstraint<TDistinctConstraintNode>()) { - if (distinct->ContainsCompleteSet(std::vector<std::string_view>(itemNames->second.cbegin(), itemNames->second.cend()))) { - children.push_back(equiJoin.Arg(i).Ptr()); - continue; - } + for (const auto& elem : itemNames->second) { + columns.insert(TString(elem)); } - bool isOrdered = false; - if (auto sorted = list.Ref().GetConstraint<TSortedConstraintNode>()) { - for (const auto& item : sorted->GetContent()) { - size_t foundItemNamesCount = 0; - for (const auto& path : item.first) { - if (itemNames->second.contains(path.front())) { - foundItemNamesCount++; - } - } - if (foundItemNamesCount == itemNames->second.size()) { - isOrdered = true; - break; - } - } + if (IsAlreadyDistinct(list.Ref(), columns)) { + children.push_back(equiJoin.Arg(i).Ptr()); + continue; } - - auto pruneKeysCallable = isOrdered ? "PruneAdjacentKeys" : "PruneKeys"; + auto pruneKeysCallable = IsOrdered(list.Ref(), columns) ? "PruneAdjacentKeys" : "PruneKeys"; YQL_CLOG(DEBUG, Core) << "Add " << pruneKeysCallable << " to EquiJoin input #" << i << ", label " << scope.Ref().Content(); children.push_back(ctx.Builder(child.Pos()) .List() .Callable(0, pruneKeysCallable) .Add(0, list.Ptr()) - .Lambda(1) - .Param("item") - .List(0) - .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder & { - ui32 i = 0; - for (const auto& column : itemNames->second) { - parent.Callable(i++, "Member") - .Arg(0, "item") - .Atom(1, column) - .Seal(); - } - return parent; - }) - .Seal() - .Seal() + .Add(1, MakePruneKeysExtractorLambda(child.Ref(), columns, ctx)) .Seal() .Add(1, scope.Ptr()) .Seal() diff --git a/yql/essentials/core/common_opt/yql_co_simple1.cpp b/yql/essentials/core/common_opt/yql_co_simple1.cpp index 0df3c0746ef..f2573982d40 100644 --- a/yql/essentials/core/common_opt/yql_co_simple1.cpp +++ b/yql/essentials/core/common_opt/yql_co_simple1.cpp @@ -6225,7 +6225,7 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { }; map["Unordered"] = map["UnorderedSubquery"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { - if (node->Head().IsCallable({"AsList","EquiJoin","Filter","Map","FlatMap","MultiMap","Extend", "Apply","PartitionByKey","PartitionsByKeys"})) { + if (node->Head().IsCallable({"AsList","EquiJoin","Filter","Map","FlatMap","MultiMap","Extend", "Apply","PartitionByKey","PartitionsByKeys","PruneKeys"})) { YQL_CLOG(DEBUG, Core) << "Drop " << node->Content() << " over " << node->Head().Content(); return node->HeadPtr(); } diff --git a/yql/essentials/core/yql_opt_utils.cpp b/yql/essentials/core/yql_opt_utils.cpp index b07cc6d41e5..76cc1a51500 100644 --- a/yql/essentials/core/yql_opt_utils.cpp +++ b/yql/essentials/core/yql_opt_utils.cpp @@ -359,6 +359,61 @@ bool IsNoPush(const TExprNode& node) { return node.IsCallable({"NoPush", "Likely"}); } +bool IsAlreadyDistinct(const TExprNode& node, const THashSet<TString>& columns) { + if (auto distinct = node.GetConstraint<TDistinctConstraintNode>()) { + if (distinct->ContainsCompleteSet(std::vector<std::string_view>(columns.cbegin(), columns.cend()))) { + return true; + } + } + return false; +} + +bool IsOrdered(const TExprNode& node, const THashSet<TString>& columns) { + if (auto sorted = node.GetConstraint<TSortedConstraintNode>()) { + for (const auto& item : sorted->GetContent()) { + size_t foundItemNamesCount = 0; + bool found = false; + for (const auto& path : item.first) { + if (path.size() == 1 && columns.contains(path.front())) { + foundItemNamesCount++; + found = true; + break; + } + } + if (foundItemNamesCount == columns.size()) { + return true; + } + + // Required columns are not sorted by prefix. + if (!found) { + break; + } + } + } + + return false; +} + +TExprNode::TPtr MakePruneKeysExtractorLambda(const TExprNode& node, const THashSet<TString>& columns, TExprContext& ctx) { + return ctx.Builder(node.Pos()) + .Lambda() + .Param("item") + .List(0) + .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder & { + ui32 i = 0; + for (const auto& column : columns) { + parent.Callable(i++, "Member") + .Arg(0, "item") + .Atom(1, column) + .Seal(); + } + return parent; + }) + .Seal() + .Seal() + .Build(); +} + TExprNode::TPtr KeepColumnOrder(const TExprNode::TPtr& node, const TExprNode& src, TExprContext& ctx, const TTypeAnnotationContext& typeCtx) { auto columnOrder = typeCtx.LookupColumnOrder(src); if (!columnOrder) { diff --git a/yql/essentials/core/yql_opt_utils.h b/yql/essentials/core/yql_opt_utils.h index 196847b63d1..3acfe0f9739 100644 --- a/yql/essentials/core/yql_opt_utils.h +++ b/yql/essentials/core/yql_opt_utils.h @@ -28,6 +28,10 @@ bool IsPassthroughLambda(const NNodes::TCoLambda& lambda, TMaybe<THashSet<TStrin bool IsTablePropsDependent(const TExprNode& node); bool IsNoPush(const TExprNode& node); +bool IsAlreadyDistinct(const TExprNode& node, const THashSet<TString>& columns); +bool IsOrdered(const TExprNode& node, const THashSet<TString>& columns); +TExprNode::TPtr MakePruneKeysExtractorLambda(const TExprNode& node, const THashSet<TString>& columns, TExprContext& ctx); + bool HasOnlyOneJoinType(const TExprNode& joinTree, TStringBuf joinType); TExprNode::TPtr KeepColumnOrder(const TExprNode::TPtr& node, const TExprNode& src, TExprContext& ctx, const TTypeAnnotationContext& typeCtx); diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json index 029d804ed93..8eb7676357f 100644 --- a/yql/essentials/tests/sql/sql2yql/canondata/result.json +++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json @@ -4003,6 +4003,13 @@ "uri": "https://{canondata_backend}/1942525/94a477066ea16f69d4848bbe524485fc029978b8/resource.tar.gz#test_sql2yql.test_join-prune_keys_YQL-19979_/sql.yql" } ], + "test_sql2yql.test[join-prune_keys_YQL-19979]": [ + { + "checksum": "0dad5d395f90148805e893a30f0b4963", + "size": 3845, + "uri": "https://{canondata_backend}/1942525/94a477066ea16f69d4848bbe524485fc029978b8/resource.tar.gz#test_sql2yql.test_join-prune_keys_YQL-19979_/sql.yql" + } + ], "test_sql2yql.test[join-yql-19192]": [ { "checksum": "fffdf1cbb40643da9daf9bdf3edec121", |