diff options
author | aneporada <aneporada@yandex-team.com> | 2025-02-10 19:30:53 +0300 |
---|---|---|
committer | aneporada <aneporada@yandex-team.com> | 2025-02-10 20:53:55 +0300 |
commit | 4b2c8aacf05f44a2ed5edf26890d2d0d1e8b7e0b (patch) | |
tree | fecad494bf656cefa7379d394a5c59a20895c23e | |
parent | d2adf92b05da14a10e83d24b20e3fb18402bc149 (diff) | |
download | ydb-4b2c8aacf05f44a2ed5edf26890d2d0d1e8b7e0b.tar.gz |
Improve Unordered over Sort optimizer
commit_hash:943290d931b71f917670ac7a420f8eb736a3f5f5
12 files changed, 186 insertions, 2 deletions
diff --git a/yql/essentials/core/common_opt/yql_co_finalizers.cpp b/yql/essentials/core/common_opt/yql_co_finalizers.cpp index 9211621358..6c9bc8036f 100644 --- a/yql/essentials/core/common_opt/yql_co_finalizers.cpp +++ b/yql/essentials/core/common_opt/yql_co_finalizers.cpp @@ -264,6 +264,68 @@ void FilterPushdownWithMultiusage(const TExprNode::TPtr& node, TNodeOnNodeOwnedM } } +bool AllConsumersAreUnordered(const TExprNode::TPtr& node, const TParentsMap& parents, TNodeSet& unorderedConsumers) { + static const THashSet<TStringBuf> traverseCallables = { + TCoExtractMembers::CallableName(), + TCoAssumeDistinct::CallableName(), + TCoAssumeUnique::CallableName(), + TCoAssumeColumnOrder::CallableName(), + }; + TNodeSet current{node.Get()}; + while (!current.empty()) { + TNodeSet next; + for (auto curr : current) { + auto it = parents.find(curr); + if (it == parents.cend() || it->second.empty()) { + return false; + } + for (auto parent : it->second) { + if (TCoUnordered::Match(parent)) { + // TODO: should probably add more callables here + unorderedConsumers.insert(parent); + continue; + } + if (!parent->IsCallable(traverseCallables)) { + return false; + } + YQL_ENSURE(&parent->Head() == curr); + next.insert(parent); + } + } + current = std::move(next); + } + return true; +} + +bool OptimizeForUnorderedConsumers(const TExprNode::TPtr& node, TNodeOnNodeOwnedMap& toOptimize, TExprContext& ctx, TOptimizeContext& optCtx) { + static const char optName[] = "UnorderedOverSortImproved"; + YQL_ENSURE(optCtx.Types); + const bool optEnabled = IsOptimizerEnabled<optName>(*optCtx.Types) && !IsOptimizerDisabled<optName>(*optCtx.Types); + if (!optEnabled) { + return false; + } + + if (!node->IsCallable({"Sort", "AssumeSorted", "TopSort"})) { + return false; + } + + YQL_ENSURE(optCtx.ParentsMap); + const auto& parentsMap = *optCtx.ParentsMap; + TNodeSet unorderedConsumers; + if (!AllConsumersAreUnordered(node, parentsMap, unorderedConsumers)) { + return false; + } + + YQL_ENSURE(!unorderedConsumers.empty()); + const TExprNode::TPtr newNode = node->IsCallable("TopSort") ? ctx.RenameNode(*node, "Top") : node->HeadPtr(); + for (auto consumer : unorderedConsumers) { + toOptimize[consumer] = ctx.ReplaceNode(ctx.ShallowCopy(*consumer), *node, newNode); + } + + YQL_CLOG(DEBUG, Core) << "Droping sort due to unordered consumers for " << node->Content(); + return true; +} + } void RegisterCoFinalizers(TFinalizingOptimizerMap& map) { @@ -328,6 +390,9 @@ void RegisterCoFinalizers(TFinalizingOptimizerMap& map) { }; map[TCoSort::CallableName()] = map[TCoAssumeSorted::CallableName()] = [](const TExprNode::TPtr& node, TNodeOnNodeOwnedMap& toOptimize, TExprContext& ctx, TOptimizeContext& optCtx) { + if (OptimizeForUnorderedConsumers(node, toOptimize, ctx, optCtx)) { + return true; + } OptimizeSubsetFieldsForNodeWithMultiUsage(node, *optCtx.ParentsMap, toOptimize, ctx, [] (const TExprNode::TPtr& input, const TExprNode::TPtr& members, const TParentsMap& parentsMap, TExprContext& ctx) { return ApplyExtractMembersToSort(input, members, parentsMap, ctx, " with multi-usage"); @@ -348,6 +413,9 @@ void RegisterCoFinalizers(TFinalizingOptimizerMap& map) { }; map[TCoTop::CallableName()] = map[TCoTopSort::CallableName()] = [](const TExprNode::TPtr& node, TNodeOnNodeOwnedMap& toOptimize, TExprContext& ctx, TOptimizeContext& optCtx) { + if (OptimizeForUnorderedConsumers(node, toOptimize, ctx, optCtx)) { + return true; + } OptimizeSubsetFieldsForNodeWithMultiUsage(node, *optCtx.ParentsMap, toOptimize, ctx, [] (const TExprNode::TPtr& input, const TExprNode::TPtr& members, const TParentsMap& parentsMap, TExprContext& ctx) { return ApplyExtractMembersToTop(input, members, parentsMap, ctx, " with multi-usage"); diff --git a/yql/essentials/core/common_opt/yql_co_flow2.cpp b/yql/essentials/core/common_opt/yql_co_flow2.cpp index 0a39c3a266..dac8bd02e7 100644 --- a/yql/essentials/core/common_opt/yql_co_flow2.cpp +++ b/yql/essentials/core/common_opt/yql_co_flow2.cpp @@ -2653,7 +2653,12 @@ void RegisterCoFlowCallables2(TCallableOptimizerMap& map) { return ctx.RenameNode(node->Head(), "Top"); } - if (node->Head().IsCallable({"Sort", "AssumeSorted"})) { + static const char optName[] = "UnorderedOverSortImproved"; + YQL_ENSURE(optCtx.Types); + const bool optEnabled = IsOptimizerEnabled<optName>(*optCtx.Types) && !IsOptimizerDisabled<optName>(*optCtx.Types); + + if (!optEnabled && node->Head().IsCallable({"Sort", "AssumeSorted"})) { + // if optEnabled this action is performed in yql_co_simple1.cpp (without multiusage check) YQL_CLOG(DEBUG, Core) << node->Content() << " absorbs " << node->Head().Content(); return ctx.ChangeChild(*node, 0U, node->Head().HeadPtr()); } diff --git a/yql/essentials/core/common_opt/yql_co_simple1.cpp b/yql/essentials/core/common_opt/yql_co_simple1.cpp index 9abfa313b6..be0a1f4644 100644 --- a/yql/essentials/core/common_opt/yql_co_simple1.cpp +++ b/yql/essentials/core/common_opt/yql_co_simple1.cpp @@ -6136,7 +6136,7 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { Y_UNREACHABLE(); }; - map["Unordered"] = map["UnorderedSubquery"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) { + map["Unordered"] = map["UnorderedSubquery"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { if (node->Head().IsCallable({"AsList","EquiJoin","Filter","Map","FlatMap","MultiMap","Extend", "Apply","PartitionByKey","PartitionsByKeys"})) { YQL_CLOG(DEBUG, Core) << "Drop " << node->Content() << " over " << node->Head().Content(); return node->HeadPtr(); @@ -6147,6 +6147,18 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { return ctx.ChangeChild(*node, 0, node->Head().HeadPtr()); } + static const char optName[] = "UnorderedOverSortImproved"; + YQL_ENSURE(optCtx.Types); + const bool optEnabled = IsOptimizerEnabled<optName>(*optCtx.Types) && !IsOptimizerDisabled<optName>(*optCtx.Types); + if (optEnabled) { + if (node->Head().IsCallable(node->Content()) || + node->Head().IsCallable("Sort") && node->IsCallable("Unordered")) + { + YQL_CLOG(DEBUG, Core) << "Drop " << node->Head().Content() << " under " << node->Content(); + return ctx.ChangeChild(*node, 0, node->Head().HeadPtr()); + } + } + if (node->Head().IsCallable("AssumeConstraints") && node->Head().GetConstraint<TSortedConstraintNode>()) { TConstraintSet constrSet = node->Head().GetConstraintSet(); constrSet.RemoveConstraint<TSortedConstraintNode>(); diff --git a/yql/essentials/tests/sql/minirun/part0/canondata/result.json b/yql/essentials/tests/sql/minirun/part0/canondata/result.json index f7aa9bc69d..6f56c572bd 100644 --- a/yql/essentials/tests/sql/minirun/part0/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part0/canondata/result.json @@ -771,6 +771,20 @@ "uri": "https://{canondata_backend}/1942100/8a8d7c2be621747f507193c25049fe75678399a1/resource.tar.gz#test.test_optimizers-or_absorption--Results_/results.txt" } ], + "test.test[optimizers-unordered_over_extract_members_topsort-default.txt-Debug]": [ + { + "checksum": "85d24ad7144a359ef44f4eab9704c29a", + "size": 755, + "uri": "https://{canondata_backend}/1597364/388cf5920d17fb9690dc3f4d18ce7c82ae3bce7f/resource.tar.gz#test.test_optimizers-unordered_over_extract_members_topsort-default.txt-Debug_/opt.yql" + } + ], + "test.test[optimizers-unordered_over_extract_members_topsort-default.txt-Results]": [ + { + "checksum": "7602716960a8860c90c9f9715c95386e", + "size": 888, + "uri": "https://{canondata_backend}/1597364/388cf5920d17fb9690dc3f4d18ce7c82ae3bce7f/resource.tar.gz#test.test_optimizers-unordered_over_extract_members_topsort-default.txt-Results_/results.txt" + } + ], "test.test[params-struct--Debug]": [ { "checksum": "f9dc205de4d3652a1c45c5c40e79d4ca", diff --git a/yql/essentials/tests/sql/minirun/part1/canondata/result.json b/yql/essentials/tests/sql/minirun/part1/canondata/result.json index 9338973cc7..b9ac523223 100644 --- a/yql/essentials/tests/sql/minirun/part1/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part1/canondata/result.json @@ -762,6 +762,20 @@ "uri": "https://{canondata_backend}/1920236/97f248ad438e8879d0fc75f582d39c0a52739159/resource.tar.gz#test.test_match_recognize-greedy_quantifiers-default.txt-Results_/results.txt" } ], + "test.test[optimizers-unordered_over_sort_multiusage-default.txt-Debug]": [ + { + "checksum": "2c91f3dc50584c24136bd3f942a53f3c", + "size": 704, + "uri": "https://{canondata_backend}/1689644/141978755e651ef0b0128e7c121f7780bbd1f1a7/resource.tar.gz#test.test_optimizers-unordered_over_sort_multiusage-default.txt-Debug_/opt.yql" + } + ], + "test.test[optimizers-unordered_over_sort_multiusage-default.txt-Results]": [ + { + "checksum": "de43e3e98247429e9dcd7be570960749", + "size": 1835, + "uri": "https://{canondata_backend}/1689644/141978755e651ef0b0128e7c121f7780bbd1f1a7/resource.tar.gz#test.test_optimizers-unordered_over_sort_multiusage-default.txt-Results_/results.txt" + } + ], "test.test[optimizers-yql-10074_dont_inline_lists_depends_on-default.txt-Debug]": [ { "checksum": "c5a0f848ea09e08bb847f96d22ba3862", diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json index d5199c3224..680a5b87fa 100644 --- a/yql/essentials/tests/sql/sql2yql/canondata/result.json +++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json @@ -4325,6 +4325,20 @@ "uri": "https://{canondata_backend}/1942173/99e88108149e222741552e7e6cddef041d6a2846/resource.tar.gz#test_sql2yql.test_optimizers-total_order_/sql.yql" } ], + "test_sql2yql.test[optimizers-unordered_over_extract_members_topsort]": [ + { + "checksum": "9cb1b2793d6d65a3bfcebcdc3f3aa420", + "size": 1742, + "uri": "https://{canondata_backend}/1925821/4a7a165d661ae378f2025a97c95475bb115e6d39/resource.tar.gz#test_sql2yql.test_optimizers-unordered_over_extract_members_topsort_/sql.yql" + } + ], + "test_sql2yql.test[optimizers-unordered_over_sort_multiusage]": [ + { + "checksum": "f1c95bf6035b258a482866f55a179e19", + "size": 2131, + "uri": "https://{canondata_backend}/1899731/164076531bc4516ecd4b3deb7194d9251f373494/resource.tar.gz#test_sql2yql.test_optimizers-unordered_over_sort_multiusage_/sql.yql" + } + ], "test_sql2yql.test[optimizers-wide_if_present_over_double_just]": [ { "checksum": "ed6c179f04ea4ee166d0aad0f781b5a3", @@ -10341,6 +10355,16 @@ "uri": "file://test_sql_format.test_optimizers-total_order_/formatted.sql" } ], + "test_sql_format.test[optimizers-unordered_over_extract_members_topsort]": [ + { + "uri": "file://test_sql_format.test_optimizers-unordered_over_extract_members_topsort_/formatted.sql" + } + ], + "test_sql_format.test[optimizers-unordered_over_sort_multiusage]": [ + { + "uri": "file://test_sql_format.test_optimizers-unordered_over_sort_multiusage_/formatted.sql" + } + ], "test_sql_format.test[optimizers-wide_if_present_over_double_just]": [ { "uri": "file://test_sql_format.test_optimizers-wide_if_present_over_double_just_/formatted.sql" diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_optimizers-unordered_over_extract_members_topsort_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_optimizers-unordered_over_extract_members_topsort_/formatted.sql new file mode 100644 index 0000000000..6c57bbe3a5 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_optimizers-unordered_over_extract_members_topsort_/formatted.sql @@ -0,0 +1,10 @@ +PRAGMA warning('disable', '4510'); +PRAGMA config.flags('OptimizerFlags', 'UnorderedOverSortImproved'); + +$l = ListMap(ListFromRange(1, 10), ($x) -> (AsStruct($x + 1 AS x, $x AS y))); +$top_by_neg_x = ListTake(ListSort($l, ($row) -> (-$row.x)), 3); +$y = ListMap($top_by_neg_x, ($row) -> ($row.y)); + +SELECT + ListSum(YQL::Unordered($y)) +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_optimizers-unordered_over_sort_multiusage_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_optimizers-unordered_over_sort_multiusage_/formatted.sql new file mode 100644 index 0000000000..f19b554e4f --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_optimizers-unordered_over_sort_multiusage_/formatted.sql @@ -0,0 +1,12 @@ +PRAGMA warning('disable', '4510'); +PRAGMA config.flags('OptimizerFlags', 'UnorderedOverSortImproved'); + +$l = ListSort(ListFromRange(1, 5), ($x) -> (-$x)); + +SELECT + ListSum(YQL::Unordered($l)) +; + +SELECT + ListTake($l, 3) +; diff --git a/yql/essentials/tests/sql/suites/optimizers/unordered_over_extract_members_topsort.sql b/yql/essentials/tests/sql/suites/optimizers/unordered_over_extract_members_topsort.sql new file mode 100644 index 0000000000..229183ed6b --- /dev/null +++ b/yql/essentials/tests/sql/suites/optimizers/unordered_over_extract_members_topsort.sql @@ -0,0 +1,8 @@ +pragma warning("disable", "4510"); +pragma config.flags('OptimizerFlags', 'UnorderedOverSortImproved'); + +$l = ListMap(ListFromRange(1, 10), ($x) -> (AsStruct($x + 1 as x, $x as y))); +$top_by_neg_x = ListTake(ListSort($l, ($row)->(-$row.x)), 3); +$y = ListMap($top_by_neg_x, ($row) -> ($row.y)); + +select ListSum(YQL::Unordered($y)); diff --git a/yql/essentials/tests/sql/suites/optimizers/unordered_over_sort_multiusage.sql b/yql/essentials/tests/sql/suites/optimizers/unordered_over_sort_multiusage.sql new file mode 100644 index 0000000000..61369ae3ae --- /dev/null +++ b/yql/essentials/tests/sql/suites/optimizers/unordered_over_sort_multiusage.sql @@ -0,0 +1,6 @@ +pragma warning("disable", "4510"); +pragma config.flags('OptimizerFlags', 'UnorderedOverSortImproved'); + +$l = ListSort(ListFromRange(1, 5), ($x)->(-$x)); +select ListSum(YQL::Unordered($l)); +select ListTake($l, 3); diff --git a/yt/yql/tests/sql/suites/optimizers/unordered_over_sort.cfg b/yt/yql/tests/sql/suites/optimizers/unordered_over_sort.cfg new file mode 100644 index 0000000000..654ff6ddb9 --- /dev/null +++ b/yt/yql/tests/sql/suites/optimizers/unordered_over_sort.cfg @@ -0,0 +1,3 @@ +in Input input5.txt +in Dict input3.txt +providers yt diff --git a/yt/yql/tests/sql/suites/optimizers/unordered_over_sort.sql b/yt/yql/tests/sql/suites/optimizers/unordered_over_sort.sql new file mode 100644 index 0000000000..f5df9f1518 --- /dev/null +++ b/yt/yql/tests/sql/suites/optimizers/unordered_over_sort.sql @@ -0,0 +1,8 @@ +use plato; + +pragma config.flags('OptimizerFlags', 'UnorderedOverSortImproved'); + +select * without key +from Input +where subkey in (select subkey from Dict where subkey > "0") + |