aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoraneporada <aneporada@yandex-team.com>2025-02-10 19:30:53 +0300
committeraneporada <aneporada@yandex-team.com>2025-02-10 20:53:55 +0300
commit4b2c8aacf05f44a2ed5edf26890d2d0d1e8b7e0b (patch)
treefecad494bf656cefa7379d394a5c59a20895c23e
parentd2adf92b05da14a10e83d24b20e3fb18402bc149 (diff)
downloadydb-4b2c8aacf05f44a2ed5edf26890d2d0d1e8b7e0b.tar.gz
Improve Unordered over Sort optimizer
commit_hash:943290d931b71f917670ac7a420f8eb736a3f5f5
-rw-r--r--yql/essentials/core/common_opt/yql_co_finalizers.cpp68
-rw-r--r--yql/essentials/core/common_opt/yql_co_flow2.cpp7
-rw-r--r--yql/essentials/core/common_opt/yql_co_simple1.cpp14
-rw-r--r--yql/essentials/tests/sql/minirun/part0/canondata/result.json14
-rw-r--r--yql/essentials/tests/sql/minirun/part1/canondata/result.json14
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/result.json24
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_optimizers-unordered_over_extract_members_topsort_/formatted.sql10
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_optimizers-unordered_over_sort_multiusage_/formatted.sql12
-rw-r--r--yql/essentials/tests/sql/suites/optimizers/unordered_over_extract_members_topsort.sql8
-rw-r--r--yql/essentials/tests/sql/suites/optimizers/unordered_over_sort_multiusage.sql6
-rw-r--r--yt/yql/tests/sql/suites/optimizers/unordered_over_sort.cfg3
-rw-r--r--yt/yql/tests/sql/suites/optimizers/unordered_over_sort.sql8
12 files changed, 186 insertions, 2 deletions
diff --git a/yql/essentials/core/common_opt/yql_co_finalizers.cpp b/yql/essentials/core/common_opt/yql_co_finalizers.cpp
index 9211621358..6c9bc8036f 100644
--- a/yql/essentials/core/common_opt/yql_co_finalizers.cpp
+++ b/yql/essentials/core/common_opt/yql_co_finalizers.cpp
@@ -264,6 +264,68 @@ void FilterPushdownWithMultiusage(const TExprNode::TPtr& node, TNodeOnNodeOwnedM
}
}
+bool AllConsumersAreUnordered(const TExprNode::TPtr& node, const TParentsMap& parents, TNodeSet& unorderedConsumers) {
+ static const THashSet<TStringBuf> traverseCallables = {
+ TCoExtractMembers::CallableName(),
+ TCoAssumeDistinct::CallableName(),
+ TCoAssumeUnique::CallableName(),
+ TCoAssumeColumnOrder::CallableName(),
+ };
+ TNodeSet current{node.Get()};
+ while (!current.empty()) {
+ TNodeSet next;
+ for (auto curr : current) {
+ auto it = parents.find(curr);
+ if (it == parents.cend() || it->second.empty()) {
+ return false;
+ }
+ for (auto parent : it->second) {
+ if (TCoUnordered::Match(parent)) {
+ // TODO: should probably add more callables here
+ unorderedConsumers.insert(parent);
+ continue;
+ }
+ if (!parent->IsCallable(traverseCallables)) {
+ return false;
+ }
+ YQL_ENSURE(&parent->Head() == curr);
+ next.insert(parent);
+ }
+ }
+ current = std::move(next);
+ }
+ return true;
+}
+
+bool OptimizeForUnorderedConsumers(const TExprNode::TPtr& node, TNodeOnNodeOwnedMap& toOptimize, TExprContext& ctx, TOptimizeContext& optCtx) {
+ static const char optName[] = "UnorderedOverSortImproved";
+ YQL_ENSURE(optCtx.Types);
+ const bool optEnabled = IsOptimizerEnabled<optName>(*optCtx.Types) && !IsOptimizerDisabled<optName>(*optCtx.Types);
+ if (!optEnabled) {
+ return false;
+ }
+
+ if (!node->IsCallable({"Sort", "AssumeSorted", "TopSort"})) {
+ return false;
+ }
+
+ YQL_ENSURE(optCtx.ParentsMap);
+ const auto& parentsMap = *optCtx.ParentsMap;
+ TNodeSet unorderedConsumers;
+ if (!AllConsumersAreUnordered(node, parentsMap, unorderedConsumers)) {
+ return false;
+ }
+
+ YQL_ENSURE(!unorderedConsumers.empty());
+ const TExprNode::TPtr newNode = node->IsCallable("TopSort") ? ctx.RenameNode(*node, "Top") : node->HeadPtr();
+ for (auto consumer : unorderedConsumers) {
+ toOptimize[consumer] = ctx.ReplaceNode(ctx.ShallowCopy(*consumer), *node, newNode);
+ }
+
+ YQL_CLOG(DEBUG, Core) << "Droping sort due to unordered consumers for " << node->Content();
+ return true;
+}
+
}
void RegisterCoFinalizers(TFinalizingOptimizerMap& map) {
@@ -328,6 +390,9 @@ void RegisterCoFinalizers(TFinalizingOptimizerMap& map) {
};
map[TCoSort::CallableName()] = map[TCoAssumeSorted::CallableName()] = [](const TExprNode::TPtr& node, TNodeOnNodeOwnedMap& toOptimize, TExprContext& ctx, TOptimizeContext& optCtx) {
+ if (OptimizeForUnorderedConsumers(node, toOptimize, ctx, optCtx)) {
+ return true;
+ }
OptimizeSubsetFieldsForNodeWithMultiUsage(node, *optCtx.ParentsMap, toOptimize, ctx,
[] (const TExprNode::TPtr& input, const TExprNode::TPtr& members, const TParentsMap& parentsMap, TExprContext& ctx) {
return ApplyExtractMembersToSort(input, members, parentsMap, ctx, " with multi-usage");
@@ -348,6 +413,9 @@ void RegisterCoFinalizers(TFinalizingOptimizerMap& map) {
};
map[TCoTop::CallableName()] = map[TCoTopSort::CallableName()] = [](const TExprNode::TPtr& node, TNodeOnNodeOwnedMap& toOptimize, TExprContext& ctx, TOptimizeContext& optCtx) {
+ if (OptimizeForUnorderedConsumers(node, toOptimize, ctx, optCtx)) {
+ return true;
+ }
OptimizeSubsetFieldsForNodeWithMultiUsage(node, *optCtx.ParentsMap, toOptimize, ctx,
[] (const TExprNode::TPtr& input, const TExprNode::TPtr& members, const TParentsMap& parentsMap, TExprContext& ctx) {
return ApplyExtractMembersToTop(input, members, parentsMap, ctx, " with multi-usage");
diff --git a/yql/essentials/core/common_opt/yql_co_flow2.cpp b/yql/essentials/core/common_opt/yql_co_flow2.cpp
index 0a39c3a266..dac8bd02e7 100644
--- a/yql/essentials/core/common_opt/yql_co_flow2.cpp
+++ b/yql/essentials/core/common_opt/yql_co_flow2.cpp
@@ -2653,7 +2653,12 @@ void RegisterCoFlowCallables2(TCallableOptimizerMap& map) {
return ctx.RenameNode(node->Head(), "Top");
}
- if (node->Head().IsCallable({"Sort", "AssumeSorted"})) {
+ static const char optName[] = "UnorderedOverSortImproved";
+ YQL_ENSURE(optCtx.Types);
+ const bool optEnabled = IsOptimizerEnabled<optName>(*optCtx.Types) && !IsOptimizerDisabled<optName>(*optCtx.Types);
+
+ if (!optEnabled && node->Head().IsCallable({"Sort", "AssumeSorted"})) {
+ // if optEnabled this action is performed in yql_co_simple1.cpp (without multiusage check)
YQL_CLOG(DEBUG, Core) << node->Content() << " absorbs " << node->Head().Content();
return ctx.ChangeChild(*node, 0U, node->Head().HeadPtr());
}
diff --git a/yql/essentials/core/common_opt/yql_co_simple1.cpp b/yql/essentials/core/common_opt/yql_co_simple1.cpp
index 9abfa313b6..be0a1f4644 100644
--- a/yql/essentials/core/common_opt/yql_co_simple1.cpp
+++ b/yql/essentials/core/common_opt/yql_co_simple1.cpp
@@ -6136,7 +6136,7 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) {
Y_UNREACHABLE();
};
- map["Unordered"] = map["UnorderedSubquery"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) {
+ map["Unordered"] = map["UnorderedSubquery"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) {
if (node->Head().IsCallable({"AsList","EquiJoin","Filter","Map","FlatMap","MultiMap","Extend", "Apply","PartitionByKey","PartitionsByKeys"})) {
YQL_CLOG(DEBUG, Core) << "Drop " << node->Content() << " over " << node->Head().Content();
return node->HeadPtr();
@@ -6147,6 +6147,18 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) {
return ctx.ChangeChild(*node, 0, node->Head().HeadPtr());
}
+ static const char optName[] = "UnorderedOverSortImproved";
+ YQL_ENSURE(optCtx.Types);
+ const bool optEnabled = IsOptimizerEnabled<optName>(*optCtx.Types) && !IsOptimizerDisabled<optName>(*optCtx.Types);
+ if (optEnabled) {
+ if (node->Head().IsCallable(node->Content()) ||
+ node->Head().IsCallable("Sort") && node->IsCallable("Unordered"))
+ {
+ YQL_CLOG(DEBUG, Core) << "Drop " << node->Head().Content() << " under " << node->Content();
+ return ctx.ChangeChild(*node, 0, node->Head().HeadPtr());
+ }
+ }
+
if (node->Head().IsCallable("AssumeConstraints") && node->Head().GetConstraint<TSortedConstraintNode>()) {
TConstraintSet constrSet = node->Head().GetConstraintSet();
constrSet.RemoveConstraint<TSortedConstraintNode>();
diff --git a/yql/essentials/tests/sql/minirun/part0/canondata/result.json b/yql/essentials/tests/sql/minirun/part0/canondata/result.json
index f7aa9bc69d..6f56c572bd 100644
--- a/yql/essentials/tests/sql/minirun/part0/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part0/canondata/result.json
@@ -771,6 +771,20 @@
"uri": "https://{canondata_backend}/1942100/8a8d7c2be621747f507193c25049fe75678399a1/resource.tar.gz#test.test_optimizers-or_absorption--Results_/results.txt"
}
],
+ "test.test[optimizers-unordered_over_extract_members_topsort-default.txt-Debug]": [
+ {
+ "checksum": "85d24ad7144a359ef44f4eab9704c29a",
+ "size": 755,
+ "uri": "https://{canondata_backend}/1597364/388cf5920d17fb9690dc3f4d18ce7c82ae3bce7f/resource.tar.gz#test.test_optimizers-unordered_over_extract_members_topsort-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[optimizers-unordered_over_extract_members_topsort-default.txt-Results]": [
+ {
+ "checksum": "7602716960a8860c90c9f9715c95386e",
+ "size": 888,
+ "uri": "https://{canondata_backend}/1597364/388cf5920d17fb9690dc3f4d18ce7c82ae3bce7f/resource.tar.gz#test.test_optimizers-unordered_over_extract_members_topsort-default.txt-Results_/results.txt"
+ }
+ ],
"test.test[params-struct--Debug]": [
{
"checksum": "f9dc205de4d3652a1c45c5c40e79d4ca",
diff --git a/yql/essentials/tests/sql/minirun/part1/canondata/result.json b/yql/essentials/tests/sql/minirun/part1/canondata/result.json
index 9338973cc7..b9ac523223 100644
--- a/yql/essentials/tests/sql/minirun/part1/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part1/canondata/result.json
@@ -762,6 +762,20 @@
"uri": "https://{canondata_backend}/1920236/97f248ad438e8879d0fc75f582d39c0a52739159/resource.tar.gz#test.test_match_recognize-greedy_quantifiers-default.txt-Results_/results.txt"
}
],
+ "test.test[optimizers-unordered_over_sort_multiusage-default.txt-Debug]": [
+ {
+ "checksum": "2c91f3dc50584c24136bd3f942a53f3c",
+ "size": 704,
+ "uri": "https://{canondata_backend}/1689644/141978755e651ef0b0128e7c121f7780bbd1f1a7/resource.tar.gz#test.test_optimizers-unordered_over_sort_multiusage-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[optimizers-unordered_over_sort_multiusage-default.txt-Results]": [
+ {
+ "checksum": "de43e3e98247429e9dcd7be570960749",
+ "size": 1835,
+ "uri": "https://{canondata_backend}/1689644/141978755e651ef0b0128e7c121f7780bbd1f1a7/resource.tar.gz#test.test_optimizers-unordered_over_sort_multiusage-default.txt-Results_/results.txt"
+ }
+ ],
"test.test[optimizers-yql-10074_dont_inline_lists_depends_on-default.txt-Debug]": [
{
"checksum": "c5a0f848ea09e08bb847f96d22ba3862",
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json
index d5199c3224..680a5b87fa 100644
--- a/yql/essentials/tests/sql/sql2yql/canondata/result.json
+++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json
@@ -4325,6 +4325,20 @@
"uri": "https://{canondata_backend}/1942173/99e88108149e222741552e7e6cddef041d6a2846/resource.tar.gz#test_sql2yql.test_optimizers-total_order_/sql.yql"
}
],
+ "test_sql2yql.test[optimizers-unordered_over_extract_members_topsort]": [
+ {
+ "checksum": "9cb1b2793d6d65a3bfcebcdc3f3aa420",
+ "size": 1742,
+ "uri": "https://{canondata_backend}/1925821/4a7a165d661ae378f2025a97c95475bb115e6d39/resource.tar.gz#test_sql2yql.test_optimizers-unordered_over_extract_members_topsort_/sql.yql"
+ }
+ ],
+ "test_sql2yql.test[optimizers-unordered_over_sort_multiusage]": [
+ {
+ "checksum": "f1c95bf6035b258a482866f55a179e19",
+ "size": 2131,
+ "uri": "https://{canondata_backend}/1899731/164076531bc4516ecd4b3deb7194d9251f373494/resource.tar.gz#test_sql2yql.test_optimizers-unordered_over_sort_multiusage_/sql.yql"
+ }
+ ],
"test_sql2yql.test[optimizers-wide_if_present_over_double_just]": [
{
"checksum": "ed6c179f04ea4ee166d0aad0f781b5a3",
@@ -10341,6 +10355,16 @@
"uri": "file://test_sql_format.test_optimizers-total_order_/formatted.sql"
}
],
+ "test_sql_format.test[optimizers-unordered_over_extract_members_topsort]": [
+ {
+ "uri": "file://test_sql_format.test_optimizers-unordered_over_extract_members_topsort_/formatted.sql"
+ }
+ ],
+ "test_sql_format.test[optimizers-unordered_over_sort_multiusage]": [
+ {
+ "uri": "file://test_sql_format.test_optimizers-unordered_over_sort_multiusage_/formatted.sql"
+ }
+ ],
"test_sql_format.test[optimizers-wide_if_present_over_double_just]": [
{
"uri": "file://test_sql_format.test_optimizers-wide_if_present_over_double_just_/formatted.sql"
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_optimizers-unordered_over_extract_members_topsort_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_optimizers-unordered_over_extract_members_topsort_/formatted.sql
new file mode 100644
index 0000000000..6c57bbe3a5
--- /dev/null
+++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_optimizers-unordered_over_extract_members_topsort_/formatted.sql
@@ -0,0 +1,10 @@
+PRAGMA warning('disable', '4510');
+PRAGMA config.flags('OptimizerFlags', 'UnorderedOverSortImproved');
+
+$l = ListMap(ListFromRange(1, 10), ($x) -> (AsStruct($x + 1 AS x, $x AS y)));
+$top_by_neg_x = ListTake(ListSort($l, ($row) -> (-$row.x)), 3);
+$y = ListMap($top_by_neg_x, ($row) -> ($row.y));
+
+SELECT
+ ListSum(YQL::Unordered($y))
+;
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_optimizers-unordered_over_sort_multiusage_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_optimizers-unordered_over_sort_multiusage_/formatted.sql
new file mode 100644
index 0000000000..f19b554e4f
--- /dev/null
+++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_optimizers-unordered_over_sort_multiusage_/formatted.sql
@@ -0,0 +1,12 @@
+PRAGMA warning('disable', '4510');
+PRAGMA config.flags('OptimizerFlags', 'UnorderedOverSortImproved');
+
+$l = ListSort(ListFromRange(1, 5), ($x) -> (-$x));
+
+SELECT
+ ListSum(YQL::Unordered($l))
+;
+
+SELECT
+ ListTake($l, 3)
+;
diff --git a/yql/essentials/tests/sql/suites/optimizers/unordered_over_extract_members_topsort.sql b/yql/essentials/tests/sql/suites/optimizers/unordered_over_extract_members_topsort.sql
new file mode 100644
index 0000000000..229183ed6b
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/optimizers/unordered_over_extract_members_topsort.sql
@@ -0,0 +1,8 @@
+pragma warning("disable", "4510");
+pragma config.flags('OptimizerFlags', 'UnorderedOverSortImproved');
+
+$l = ListMap(ListFromRange(1, 10), ($x) -> (AsStruct($x + 1 as x, $x as y)));
+$top_by_neg_x = ListTake(ListSort($l, ($row)->(-$row.x)), 3);
+$y = ListMap($top_by_neg_x, ($row) -> ($row.y));
+
+select ListSum(YQL::Unordered($y));
diff --git a/yql/essentials/tests/sql/suites/optimizers/unordered_over_sort_multiusage.sql b/yql/essentials/tests/sql/suites/optimizers/unordered_over_sort_multiusage.sql
new file mode 100644
index 0000000000..61369ae3ae
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/optimizers/unordered_over_sort_multiusage.sql
@@ -0,0 +1,6 @@
+pragma warning("disable", "4510");
+pragma config.flags('OptimizerFlags', 'UnorderedOverSortImproved');
+
+$l = ListSort(ListFromRange(1, 5), ($x)->(-$x));
+select ListSum(YQL::Unordered($l));
+select ListTake($l, 3);
diff --git a/yt/yql/tests/sql/suites/optimizers/unordered_over_sort.cfg b/yt/yql/tests/sql/suites/optimizers/unordered_over_sort.cfg
new file mode 100644
index 0000000000..654ff6ddb9
--- /dev/null
+++ b/yt/yql/tests/sql/suites/optimizers/unordered_over_sort.cfg
@@ -0,0 +1,3 @@
+in Input input5.txt
+in Dict input3.txt
+providers yt
diff --git a/yt/yql/tests/sql/suites/optimizers/unordered_over_sort.sql b/yt/yql/tests/sql/suites/optimizers/unordered_over_sort.sql
new file mode 100644
index 0000000000..f5df9f1518
--- /dev/null
+++ b/yt/yql/tests/sql/suites/optimizers/unordered_over_sort.sql
@@ -0,0 +1,8 @@
+use plato;
+
+pragma config.flags('OptimizerFlags', 'UnorderedOverSortImproved');
+
+select * without key
+from Input
+where subkey in (select subkey from Dict where subkey > "0")
+