summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorvvvv <[email protected]>2025-09-26 15:33:07 +0300
committervvvv <[email protected]>2025-09-26 15:57:42 +0300
commitee38d4a04d0b4226b36bd9d548c35739f11e88f9 (patch)
tree7c2a6daec4b590f4b78d6cf60d36d3e9da569112
parente664ea1f749f613aa313379c775c2e7a7a268e64 (diff)
YQL-20339 merge blocks optimizer
init commit_hash:fee9a86a1f95e9271bf49205dad4b6eb0b934dec
-rw-r--r--yql/essentials/core/common_opt/ya.make1
-rw-r--r--yql/essentials/core/common_opt/yql_co_blocks.cpp111
-rw-r--r--yql/essentials/core/common_opt/yql_co_blocks.h13
-rw-r--r--yql/essentials/core/common_opt/yql_co_flow1.cpp14
-rw-r--r--yql/essentials/core/common_opt/yql_co_transformer.cpp8
-rw-r--r--yql/essentials/tests/sql/minirun/part2/canondata/result.json14
-rw-r--r--yql/essentials/tests/sql/minirun/part4/canondata/result.json14
-rw-r--r--yql/essentials/tests/sql/minirun/part5/canondata/result.json14
-rw-r--r--yql/essentials/tests/sql/minirun/part9/canondata/result.json14
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/result.json48
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block1_/formatted.sql3
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block1_diff_lambda_/formatted.sql3
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block2_/formatted.sql3
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block_multi_/formatted.sql6
-rw-r--r--yql/essentials/tests/sql/suites/linear/opt_block1.yql2
-rw-r--r--yql/essentials/tests/sql/suites/linear/opt_block1_diff_lambda.yql2
-rw-r--r--yql/essentials/tests/sql/suites/linear/opt_block2.yql1
-rw-r--r--yql/essentials/tests/sql/suites/linear/opt_block_multi.yql4
18 files changed, 275 insertions, 0 deletions
diff --git a/yql/essentials/core/common_opt/ya.make b/yql/essentials/core/common_opt/ya.make
index 31ae6c7d324..e301b885994 100644
--- a/yql/essentials/core/common_opt/ya.make
+++ b/yql/essentials/core/common_opt/ya.make
@@ -2,6 +2,7 @@ LIBRARY()
SRCS(
yql_co.h
+ yql_co_blocks.cpp
yql_co_extr_members.cpp
yql_flatmap_over_join.cpp
yql_co_finalizers.cpp
diff --git a/yql/essentials/core/common_opt/yql_co_blocks.cpp b/yql/essentials/core/common_opt/yql_co_blocks.cpp
new file mode 100644
index 00000000000..94feed3727d
--- /dev/null
+++ b/yql/essentials/core/common_opt/yql_co_blocks.cpp
@@ -0,0 +1,111 @@
+#include "yql_co_blocks.h"
+
+#include <yql/essentials/core/yql_expr_optimize.h>
+#include <yql/essentials/core/yql_type_annotation.h>
+#include <yql/essentials/core/expr_nodes/yql_expr_nodes.h>
+
+#include <yql/essentials/utils/log/log.h>
+
+namespace NYql {
+
+namespace {
+
+class TBlockVisitor {
+public:
+ void Visit(const TExprNode& node) {
+ auto [it, inserted] = Visited_.emplace(&node, nullptr);
+ if (!inserted) {
+ it->second = nullptr; // multiple paths
+ return;
+ }
+
+ const bool isBlock = node.IsCallable("Block");
+ if (isBlock) {
+ Blocks_.emplace(&node);
+ if (!BlockStack_.empty()) {
+ it->second = BlockStack_.back();
+ YQL_ENSURE(it->second);
+ }
+
+ BlockStack_.push_back(&node);
+ Visit(node.Head().Tail());
+ BlockStack_.pop_back();
+ }
+
+ if (node.Type() == TExprNode::Lambda) {
+ TVector<const TExprNode*> savedBlocks = std::move(BlockStack_);
+ BlockStack_.clear();
+ for (ui32 i = 1; i < node.ChildrenSize(); ++i) {
+ Visit(*node.Child(i));
+ }
+
+ BlockStack_ = std::move(savedBlocks);
+ } else {
+ for (const auto& child : node.Children()) {
+ Visit(*child);
+ }
+ }
+ }
+
+ TNodeMap<const TExprNode*> GetBlockParents() const {
+ TNodeMap<const TExprNode*> ret;
+ for (const auto b : Blocks_) {
+ auto it = Visited_.find(b);
+ YQL_ENSURE(it != Visited_.cend());
+ if (it->second != nullptr) {
+ ret.emplace(b, it->second);
+ }
+ }
+
+ for (auto& [node,parent] : ret) {
+ for (;;) {
+ auto parentIt = ret.find(parent);
+ if (parentIt == ret.cend()) {
+ break;
+ }
+
+ if (parentIt->second == nullptr) {
+ break;
+ }
+
+ parent = parentIt->second;
+ }
+ }
+
+ return ret;
+ }
+
+private:
+ TNodeMap<const TExprNode*> Visited_;
+ TVector<const TExprNode*> BlockStack_;
+ TNodeSet Blocks_;
+};
+
+}
+
+IGraphTransformer::TStatus OptimizeBlocks(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx,
+ TTypeAnnotationContext& typeCtx) {
+ output = input;
+ TBlockVisitor visitor;
+ visitor.Visit(*input);
+ auto blockParents = visitor.GetBlockParents();
+ if (blockParents.empty()) {
+ return IGraphTransformer::TStatus::Ok;
+ }
+
+ YQL_CLOG(DEBUG, Core) << "Found " << blockParents.size() << " nested blocks";
+ TNodeOnNodeOwnedMap toOptimize;
+ for (const auto [node, parent]: blockParents) {
+ auto lambda = NNodes::TCoLambda(node->HeadPtr());
+ auto parentLambda = NNodes::TCoLambda(parent->HeadPtr());
+ toOptimize[lambda.Args().Arg(0).Raw()] = parentLambda.Args().Arg(0).Ptr();
+ toOptimize[node] = lambda.Body().Ptr();
+ }
+
+ TOptimizeExprSettings settings(&typeCtx);
+ settings.CustomInstantTypeTransformer = typeCtx.CustomInstantTypeTransformer.Get();
+ return RemapExpr(input, output, toOptimize, ctx, settings);
+}
+
+}
+
diff --git a/yql/essentials/core/common_opt/yql_co_blocks.h b/yql/essentials/core/common_opt/yql_co_blocks.h
new file mode 100644
index 00000000000..bbc55d32c3f
--- /dev/null
+++ b/yql/essentials/core/common_opt/yql_co_blocks.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include <yql/essentials/ast/yql_expr.h>
+#include <yql/essentials/core/yql_graph_transformer.h>
+
+namespace NYql {
+
+struct TTypeAnnotationContext;
+
+IGraphTransformer::TStatus OptimizeBlocks(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx,
+ TTypeAnnotationContext& typeCtx);
+
+} // NYql
diff --git a/yql/essentials/core/common_opt/yql_co_flow1.cpp b/yql/essentials/core/common_opt/yql_co_flow1.cpp
index 4542c707072..82a1dede75d 100644
--- a/yql/essentials/core/common_opt/yql_co_flow1.cpp
+++ b/yql/essentials/core/common_opt/yql_co_flow1.cpp
@@ -2104,6 +2104,20 @@ void RegisterCoFlowCallables1(TCallableOptimizerMap& map) {
}
return node;
};
+
+ map["ToMutDict"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) {
+ Y_UNUSED(ctx);
+ if (!optCtx.IsSingleUsage(node->Head())) {
+ return node;
+ }
+
+ if (node->Head().IsCallable("FromMutDict")) {
+ YQL_CLOG(DEBUG, Core) << "Skip " << node->Content() << " over " << node->Head().Content();
+ return node->Head().HeadPtr();
+ }
+
+ return node;
+ };
}
}
diff --git a/yql/essentials/core/common_opt/yql_co_transformer.cpp b/yql/essentials/core/common_opt/yql_co_transformer.cpp
index 1a807f76858..3287e20f146 100644
--- a/yql/essentials/core/common_opt/yql_co_transformer.cpp
+++ b/yql/essentials/core/common_opt/yql_co_transformer.cpp
@@ -1,5 +1,6 @@
#include "yql_co_transformer.h"
#include "yql_co.h"
+#include "yql_co_blocks.h"
#include <yql/essentials/core/yql_expr_optimize.h>
#include <yql/essentials/core/yql_expr_type_annotation.h>
@@ -93,6 +94,13 @@ IGraphTransformer::TStatus TCommonOptTransformer::DoTransform(TExprNode::TPtr in
return status;
}
+ if (TypeCtx_->LangVer >= MakeLangVersion(2025, 4)) {
+ status = OptimizeBlocks(input = std::move(output), output, ctx, *TypeCtx_);
+ if (status.Level != IGraphTransformer::TStatus::Ok) {
+ return status;
+ }
+ }
+
if (!ScanErrors(*output, ctx)) {
return IGraphTransformer::TStatus::Error;
}
diff --git a/yql/essentials/tests/sql/minirun/part2/canondata/result.json b/yql/essentials/tests/sql/minirun/part2/canondata/result.json
index e81150d55a4..43ad8e8356b 100644
--- a/yql/essentials/tests/sql/minirun/part2/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part2/canondata/result.json
@@ -1009,6 +1009,20 @@
"uri": "https://{canondata_backend}/1871102/308650ad858d99ea42592b0d0a17f7fec7d5e32b/resource.tar.gz#test.test_lambda-list_aggregate-default.txt-Results_/results.txt"
}
],
+ "test.test[linear-opt_block2-default.txt-Debug]": [
+ {
+ "checksum": "243c4f2ce4a38e0f2bf984d99bdae7b9",
+ "size": 572,
+ "uri": "https://{canondata_backend}/1881367/0f09c21c85af11e14e6b688adaae98a82df1ac4f/resource.tar.gz#test.test_linear-opt_block2-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[linear-opt_block2-default.txt-Results]": [
+ {
+ "checksum": "2e39fa9da5ba2f7918cb4108f648bf06",
+ "size": 1707,
+ "uri": "https://{canondata_backend}/1881367/0f09c21c85af11e14e6b688adaae98a82df1ac4f/resource.tar.gz#test.test_linear-opt_block2-default.txt-Results_/results.txt"
+ }
+ ],
"test.test[match_recognize-simple_paritioning-default.txt-Debug]": [
{
"checksum": "d9ccfb5fa5b16b57294abe924c9c942e",
diff --git a/yql/essentials/tests/sql/minirun/part4/canondata/result.json b/yql/essentials/tests/sql/minirun/part4/canondata/result.json
index d94780bf11c..994d4c20629 100644
--- a/yql/essentials/tests/sql/minirun/part4/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part4/canondata/result.json
@@ -1118,6 +1118,20 @@
"uri": "https://{canondata_backend}/1597364/e3cc9ef8f734283bf9fd496c14df715b3fd4bb26/resource.tar.gz#test.test_linear-mutdict_update-default.txt-Results_/results.txt"
}
],
+ "test.test[linear-opt_block1-default.txt-Debug]": [
+ {
+ "checksum": "51a43ad4718740e27a3362a64dec2ec5",
+ "size": 515,
+ "uri": "https://{canondata_backend}/1925821/da98b4c164e400e2df4da147a3e67f730e6ed331/resource.tar.gz#test.test_linear-opt_block1-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[linear-opt_block1-default.txt-Results]": [
+ {
+ "checksum": "56d2e17bb8e4a449febe10601f3dd3d0",
+ "size": 1573,
+ "uri": "https://{canondata_backend}/1925821/da98b4c164e400e2df4da147a3e67f730e6ed331/resource.tar.gz#test.test_linear-opt_block1-default.txt-Results_/results.txt"
+ }
+ ],
"test.test[match_recognize-alerts_many_order-default.txt-Debug]": [
{
"checksum": "3c4a225c79eb3c1505473aa9a652e73c",
diff --git a/yql/essentials/tests/sql/minirun/part5/canondata/result.json b/yql/essentials/tests/sql/minirun/part5/canondata/result.json
index 3f4380b7893..67996d579cf 100644
--- a/yql/essentials/tests/sql/minirun/part5/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part5/canondata/result.json
@@ -1313,6 +1313,20 @@
"uri": "https://{canondata_backend}/1597364/17d565008807ba0a579161791745dfc03f89d322/resource.tar.gz#test.test_linear-mutdict_upsert-default.txt-Results_/results.txt"
}
],
+ "test.test[linear-opt_block1_diff_lambda-default.txt-Debug]": [
+ {
+ "checksum": "32f7b31cbfdc24f08dacdd9e10898a04",
+ "size": 638,
+ "uri": "https://{canondata_backend}/1942278/0ea567f80fe7e4c0c0b21d0b6972889e27e755f7/resource.tar.gz#test.test_linear-opt_block1_diff_lambda-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[linear-opt_block1_diff_lambda-default.txt-Results]": [
+ {
+ "checksum": "56d2e17bb8e4a449febe10601f3dd3d0",
+ "size": 1573,
+ "uri": "https://{canondata_backend}/1942278/0ea567f80fe7e4c0c0b21d0b6972889e27e755f7/resource.tar.gz#test.test_linear-opt_block1_diff_lambda-default.txt-Results_/results.txt"
+ }
+ ],
"test.test[match_recognize-alerts-default.txt-Debug]": [
{
"checksum": "a6f1a68396bb11dc68f8885a093611b2",
diff --git a/yql/essentials/tests/sql/minirun/part9/canondata/result.json b/yql/essentials/tests/sql/minirun/part9/canondata/result.json
index 0e69445d855..30c20fc7032 100644
--- a/yql/essentials/tests/sql/minirun/part9/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part9/canondata/result.json
@@ -950,6 +950,20 @@
"uri": "https://{canondata_backend}/1871182/89f1a6e76b3afde4b0b8530c8d3a4aad5fad5bd4/resource.tar.gz#test.test_linear-mutdict_remove-default.txt-Results_/results.txt"
}
],
+ "test.test[linear-opt_block_multi-default.txt-Debug]": [
+ {
+ "checksum": "fb1becddb98b10bbf9c8186dd2b53058",
+ "size": 755,
+ "uri": "https://{canondata_backend}/1600758/ca70ab59d7712ea59ab25ae2925d48699ea90b3e/resource.tar.gz#test.test_linear-opt_block_multi-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[linear-opt_block_multi-default.txt-Results]": [
+ {
+ "checksum": "756ed740ec4dd8890334ab2c2f5799e4",
+ "size": 2744,
+ "uri": "https://{canondata_backend}/1600758/ca70ab59d7712ea59ab25ae2925d48699ea90b3e/resource.tar.gz#test.test_linear-opt_block_multi-default.txt-Results_/results.txt"
+ }
+ ],
"test.test[linear-types_reflection-default.txt-Debug]": [
{
"checksum": "f560663decd46b1e0aa9ffa6b4cb4abe",
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json
index c2a2dc41ebf..668950f5c77 100644
--- a/yql/essentials/tests/sql/sql2yql/canondata/result.json
+++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json
@@ -5123,6 +5123,34 @@
"uri": "https://{canondata_backend}/1597364/0dfdd9be36ea9ae7f51895a91916fc4ec61ed0d8/resource.tar.gz#test_sql2yql.test_linear-mutdict_upsert_/sql.yql"
}
],
+ "test_sql2yql.test[linear-opt_block1]": [
+ {
+ "checksum": "295de2b8e5b7ab78ae0c27ab40c3e8e6",
+ "size": 1218,
+ "uri": "https://{canondata_backend}/1600758/347bcbaaa0880e8fce734e45f36d8bcfa59ac5d5/resource.tar.gz#test_sql2yql.test_linear-opt_block1_/sql.yql"
+ }
+ ],
+ "test_sql2yql.test[linear-opt_block1_diff_lambda]": [
+ {
+ "checksum": "9e035a30eeeb6508997c433e568d005d",
+ "size": 1255,
+ "uri": "https://{canondata_backend}/1784826/0ef7974e374145886c095b465087fd7e6961c7bb/resource.tar.gz#test_sql2yql.test_linear-opt_block1_diff_lambda_/sql.yql"
+ }
+ ],
+ "test_sql2yql.test[linear-opt_block2]": [
+ {
+ "checksum": "7568b521d86809804ad567f194f08ef6",
+ "size": 1258,
+ "uri": "https://{canondata_backend}/1937429/af85a2a3ced2244b377c50a373a56945f25a377c/resource.tar.gz#test_sql2yql.test_linear-opt_block2_/sql.yql"
+ }
+ ],
+ "test_sql2yql.test[linear-opt_block_multi]": [
+ {
+ "checksum": "9eb82256a4af3df5fe811fc856bf4021",
+ "size": 1551,
+ "uri": "https://{canondata_backend}/1600758/347bcbaaa0880e8fce734e45f36d8bcfa59ac5d5/resource.tar.gz#test_sql2yql.test_linear-opt_block_multi_/sql.yql"
+ }
+ ],
"test_sql2yql.test[linear-types]": [
{
"checksum": "f756d489eb05467c7a6414636f47f37e",
@@ -12255,6 +12283,26 @@
"uri": "file://test_sql_format.test_linear-mutdict_upsert_/formatted.sql"
}
],
+ "test_sql_format.test[linear-opt_block1]": [
+ {
+ "uri": "file://test_sql_format.test_linear-opt_block1_/formatted.sql"
+ }
+ ],
+ "test_sql_format.test[linear-opt_block1_diff_lambda]": [
+ {
+ "uri": "file://test_sql_format.test_linear-opt_block1_diff_lambda_/formatted.sql"
+ }
+ ],
+ "test_sql_format.test[linear-opt_block2]": [
+ {
+ "uri": "file://test_sql_format.test_linear-opt_block2_/formatted.sql"
+ }
+ ],
+ "test_sql_format.test[linear-opt_block_multi]": [
+ {
+ "uri": "file://test_sql_format.test_linear-opt_block_multi_/formatted.sql"
+ }
+ ],
"test_sql_format.test[linear-types]": [
{
"uri": "file://test_sql_format.test_linear-types_/formatted.sql"
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block1_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block1_/formatted.sql
new file mode 100644
index 00000000000..b4e86f12729
--- /dev/null
+++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block1_/formatted.sql
@@ -0,0 +1,3 @@
+SELECT
+ ListSort(DictItems(DictInsert(DictInsert(DictCreate(String, Int32), 'a', 1), 'b', 2)))
+;
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block1_diff_lambda_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block1_diff_lambda_/formatted.sql
new file mode 100644
index 00000000000..0a13b6186a0
--- /dev/null
+++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block1_diff_lambda_/formatted.sql
@@ -0,0 +1,3 @@
+SELECT
+ ListSort(DictItems(DictInsert(Opaque(DictInsert(DictCreate(String, Int32), 'a', 1)), 'b', 2)))
+;
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block2_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block2_/formatted.sql
new file mode 100644
index 00000000000..111e5674a7f
--- /dev/null
+++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block2_/formatted.sql
@@ -0,0 +1,3 @@
+SELECT
+ ListSort(DictItems(DictInsert(DictInsert(DictInsert(DictCreate(String, Int32), 'a', 1), 'b', 2), 'c', 3)))
+;
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block_multi_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block_multi_/formatted.sql
new file mode 100644
index 00000000000..fc66d8c98a8
--- /dev/null
+++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block_multi_/formatted.sql
@@ -0,0 +1,6 @@
+$x = DictInsert(DictCreate(String, Int32), 'a', 1);
+
+SELECT
+ ListSort(DictItems(DictInsert($x, 'b', 2))),
+ ListSort(DictItems(DictInsert($x, 'c', 3)))
+;
diff --git a/yql/essentials/tests/sql/suites/linear/opt_block1.yql b/yql/essentials/tests/sql/suites/linear/opt_block1.yql
new file mode 100644
index 00000000000..86a82bfde95
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/linear/opt_block1.yql
@@ -0,0 +1,2 @@
+select ListSort(DictItems(DictInsert(DictInsert(DictCreate(String,Int32),"a",1),"b",2)));
+
diff --git a/yql/essentials/tests/sql/suites/linear/opt_block1_diff_lambda.yql b/yql/essentials/tests/sql/suites/linear/opt_block1_diff_lambda.yql
new file mode 100644
index 00000000000..5eb5ecb2232
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/linear/opt_block1_diff_lambda.yql
@@ -0,0 +1,2 @@
+select ListSort(DictItems(DictInsert(Opaque(DictInsert(DictCreate(String,Int32),"a",1)),"b",2)));
+
diff --git a/yql/essentials/tests/sql/suites/linear/opt_block2.yql b/yql/essentials/tests/sql/suites/linear/opt_block2.yql
new file mode 100644
index 00000000000..839038bdd84
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/linear/opt_block2.yql
@@ -0,0 +1 @@
+select ListSort(DictItems(DictInsert(DictInsert(DictInsert(DictCreate(String,Int32),"a",1),"b",2),"c",3)));
diff --git a/yql/essentials/tests/sql/suites/linear/opt_block_multi.yql b/yql/essentials/tests/sql/suites/linear/opt_block_multi.yql
new file mode 100644
index 00000000000..35d7094117b
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/linear/opt_block_multi.yql
@@ -0,0 +1,4 @@
+$x = DictInsert(DictCreate(String,Int32),"a",1);
+select ListSort(DictItems(DictInsert($x,"b",2))),
+ ListSort(DictItems(DictInsert($x,"c",3)));
+