diff options
author | vvvv <[email protected]> | 2025-09-26 15:33:07 +0300 |
---|---|---|
committer | vvvv <[email protected]> | 2025-09-26 15:57:42 +0300 |
commit | ee38d4a04d0b4226b36bd9d548c35739f11e88f9 (patch) | |
tree | 7c2a6daec4b590f4b78d6cf60d36d3e9da569112 | |
parent | e664ea1f749f613aa313379c775c2e7a7a268e64 (diff) |
YQL-20339 merge blocks optimizer
init
commit_hash:fee9a86a1f95e9271bf49205dad4b6eb0b934dec
18 files changed, 275 insertions, 0 deletions
diff --git a/yql/essentials/core/common_opt/ya.make b/yql/essentials/core/common_opt/ya.make index 31ae6c7d324..e301b885994 100644 --- a/yql/essentials/core/common_opt/ya.make +++ b/yql/essentials/core/common_opt/ya.make @@ -2,6 +2,7 @@ LIBRARY() SRCS( yql_co.h + yql_co_blocks.cpp yql_co_extr_members.cpp yql_flatmap_over_join.cpp yql_co_finalizers.cpp diff --git a/yql/essentials/core/common_opt/yql_co_blocks.cpp b/yql/essentials/core/common_opt/yql_co_blocks.cpp new file mode 100644 index 00000000000..94feed3727d --- /dev/null +++ b/yql/essentials/core/common_opt/yql_co_blocks.cpp @@ -0,0 +1,111 @@ +#include "yql_co_blocks.h" + +#include <yql/essentials/core/yql_expr_optimize.h> +#include <yql/essentials/core/yql_type_annotation.h> +#include <yql/essentials/core/expr_nodes/yql_expr_nodes.h> + +#include <yql/essentials/utils/log/log.h> + +namespace NYql { + +namespace { + +class TBlockVisitor { +public: + void Visit(const TExprNode& node) { + auto [it, inserted] = Visited_.emplace(&node, nullptr); + if (!inserted) { + it->second = nullptr; // multiple paths + return; + } + + const bool isBlock = node.IsCallable("Block"); + if (isBlock) { + Blocks_.emplace(&node); + if (!BlockStack_.empty()) { + it->second = BlockStack_.back(); + YQL_ENSURE(it->second); + } + + BlockStack_.push_back(&node); + Visit(node.Head().Tail()); + BlockStack_.pop_back(); + } + + if (node.Type() == TExprNode::Lambda) { + TVector<const TExprNode*> savedBlocks = std::move(BlockStack_); + BlockStack_.clear(); + for (ui32 i = 1; i < node.ChildrenSize(); ++i) { + Visit(*node.Child(i)); + } + + BlockStack_ = std::move(savedBlocks); + } else { + for (const auto& child : node.Children()) { + Visit(*child); + } + } + } + + TNodeMap<const TExprNode*> GetBlockParents() const { + TNodeMap<const TExprNode*> ret; + for (const auto b : Blocks_) { + auto it = Visited_.find(b); + YQL_ENSURE(it != Visited_.cend()); + if (it->second != nullptr) { + ret.emplace(b, it->second); + } + } + + for (auto& [node,parent] : ret) { + for (;;) { + auto parentIt = ret.find(parent); + if (parentIt == ret.cend()) { + break; + } + + if (parentIt->second == nullptr) { + break; + } + + parent = parentIt->second; + } + } + + return ret; + } + +private: + TNodeMap<const TExprNode*> Visited_; + TVector<const TExprNode*> BlockStack_; + TNodeSet Blocks_; +}; + +} + +IGraphTransformer::TStatus OptimizeBlocks(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx, + TTypeAnnotationContext& typeCtx) { + output = input; + TBlockVisitor visitor; + visitor.Visit(*input); + auto blockParents = visitor.GetBlockParents(); + if (blockParents.empty()) { + return IGraphTransformer::TStatus::Ok; + } + + YQL_CLOG(DEBUG, Core) << "Found " << blockParents.size() << " nested blocks"; + TNodeOnNodeOwnedMap toOptimize; + for (const auto [node, parent]: blockParents) { + auto lambda = NNodes::TCoLambda(node->HeadPtr()); + auto parentLambda = NNodes::TCoLambda(parent->HeadPtr()); + toOptimize[lambda.Args().Arg(0).Raw()] = parentLambda.Args().Arg(0).Ptr(); + toOptimize[node] = lambda.Body().Ptr(); + } + + TOptimizeExprSettings settings(&typeCtx); + settings.CustomInstantTypeTransformer = typeCtx.CustomInstantTypeTransformer.Get(); + return RemapExpr(input, output, toOptimize, ctx, settings); +} + +} + diff --git a/yql/essentials/core/common_opt/yql_co_blocks.h b/yql/essentials/core/common_opt/yql_co_blocks.h new file mode 100644 index 00000000000..bbc55d32c3f --- /dev/null +++ b/yql/essentials/core/common_opt/yql_co_blocks.h @@ -0,0 +1,13 @@ +#pragma once + +#include <yql/essentials/ast/yql_expr.h> +#include <yql/essentials/core/yql_graph_transformer.h> + +namespace NYql { + +struct TTypeAnnotationContext; + +IGraphTransformer::TStatus OptimizeBlocks(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx, + TTypeAnnotationContext& typeCtx); + +} // NYql diff --git a/yql/essentials/core/common_opt/yql_co_flow1.cpp b/yql/essentials/core/common_opt/yql_co_flow1.cpp index 4542c707072..82a1dede75d 100644 --- a/yql/essentials/core/common_opt/yql_co_flow1.cpp +++ b/yql/essentials/core/common_opt/yql_co_flow1.cpp @@ -2104,6 +2104,20 @@ void RegisterCoFlowCallables1(TCallableOptimizerMap& map) { } return node; }; + + map["ToMutDict"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { + Y_UNUSED(ctx); + if (!optCtx.IsSingleUsage(node->Head())) { + return node; + } + + if (node->Head().IsCallable("FromMutDict")) { + YQL_CLOG(DEBUG, Core) << "Skip " << node->Content() << " over " << node->Head().Content(); + return node->Head().HeadPtr(); + } + + return node; + }; } } diff --git a/yql/essentials/core/common_opt/yql_co_transformer.cpp b/yql/essentials/core/common_opt/yql_co_transformer.cpp index 1a807f76858..3287e20f146 100644 --- a/yql/essentials/core/common_opt/yql_co_transformer.cpp +++ b/yql/essentials/core/common_opt/yql_co_transformer.cpp @@ -1,5 +1,6 @@ #include "yql_co_transformer.h" #include "yql_co.h" +#include "yql_co_blocks.h" #include <yql/essentials/core/yql_expr_optimize.h> #include <yql/essentials/core/yql_expr_type_annotation.h> @@ -93,6 +94,13 @@ IGraphTransformer::TStatus TCommonOptTransformer::DoTransform(TExprNode::TPtr in return status; } + if (TypeCtx_->LangVer >= MakeLangVersion(2025, 4)) { + status = OptimizeBlocks(input = std::move(output), output, ctx, *TypeCtx_); + if (status.Level != IGraphTransformer::TStatus::Ok) { + return status; + } + } + if (!ScanErrors(*output, ctx)) { return IGraphTransformer::TStatus::Error; } diff --git a/yql/essentials/tests/sql/minirun/part2/canondata/result.json b/yql/essentials/tests/sql/minirun/part2/canondata/result.json index e81150d55a4..43ad8e8356b 100644 --- a/yql/essentials/tests/sql/minirun/part2/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part2/canondata/result.json @@ -1009,6 +1009,20 @@ "uri": "https://{canondata_backend}/1871102/308650ad858d99ea42592b0d0a17f7fec7d5e32b/resource.tar.gz#test.test_lambda-list_aggregate-default.txt-Results_/results.txt" } ], + "test.test[linear-opt_block2-default.txt-Debug]": [ + { + "checksum": "243c4f2ce4a38e0f2bf984d99bdae7b9", + "size": 572, + "uri": "https://{canondata_backend}/1881367/0f09c21c85af11e14e6b688adaae98a82df1ac4f/resource.tar.gz#test.test_linear-opt_block2-default.txt-Debug_/opt.yql" + } + ], + "test.test[linear-opt_block2-default.txt-Results]": [ + { + "checksum": "2e39fa9da5ba2f7918cb4108f648bf06", + "size": 1707, + "uri": "https://{canondata_backend}/1881367/0f09c21c85af11e14e6b688adaae98a82df1ac4f/resource.tar.gz#test.test_linear-opt_block2-default.txt-Results_/results.txt" + } + ], "test.test[match_recognize-simple_paritioning-default.txt-Debug]": [ { "checksum": "d9ccfb5fa5b16b57294abe924c9c942e", diff --git a/yql/essentials/tests/sql/minirun/part4/canondata/result.json b/yql/essentials/tests/sql/minirun/part4/canondata/result.json index d94780bf11c..994d4c20629 100644 --- a/yql/essentials/tests/sql/minirun/part4/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part4/canondata/result.json @@ -1118,6 +1118,20 @@ "uri": "https://{canondata_backend}/1597364/e3cc9ef8f734283bf9fd496c14df715b3fd4bb26/resource.tar.gz#test.test_linear-mutdict_update-default.txt-Results_/results.txt" } ], + "test.test[linear-opt_block1-default.txt-Debug]": [ + { + "checksum": "51a43ad4718740e27a3362a64dec2ec5", + "size": 515, + "uri": "https://{canondata_backend}/1925821/da98b4c164e400e2df4da147a3e67f730e6ed331/resource.tar.gz#test.test_linear-opt_block1-default.txt-Debug_/opt.yql" + } + ], + "test.test[linear-opt_block1-default.txt-Results]": [ + { + "checksum": "56d2e17bb8e4a449febe10601f3dd3d0", + "size": 1573, + "uri": "https://{canondata_backend}/1925821/da98b4c164e400e2df4da147a3e67f730e6ed331/resource.tar.gz#test.test_linear-opt_block1-default.txt-Results_/results.txt" + } + ], "test.test[match_recognize-alerts_many_order-default.txt-Debug]": [ { "checksum": "3c4a225c79eb3c1505473aa9a652e73c", diff --git a/yql/essentials/tests/sql/minirun/part5/canondata/result.json b/yql/essentials/tests/sql/minirun/part5/canondata/result.json index 3f4380b7893..67996d579cf 100644 --- a/yql/essentials/tests/sql/minirun/part5/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part5/canondata/result.json @@ -1313,6 +1313,20 @@ "uri": "https://{canondata_backend}/1597364/17d565008807ba0a579161791745dfc03f89d322/resource.tar.gz#test.test_linear-mutdict_upsert-default.txt-Results_/results.txt" } ], + "test.test[linear-opt_block1_diff_lambda-default.txt-Debug]": [ + { + "checksum": "32f7b31cbfdc24f08dacdd9e10898a04", + "size": 638, + "uri": "https://{canondata_backend}/1942278/0ea567f80fe7e4c0c0b21d0b6972889e27e755f7/resource.tar.gz#test.test_linear-opt_block1_diff_lambda-default.txt-Debug_/opt.yql" + } + ], + "test.test[linear-opt_block1_diff_lambda-default.txt-Results]": [ + { + "checksum": "56d2e17bb8e4a449febe10601f3dd3d0", + "size": 1573, + "uri": "https://{canondata_backend}/1942278/0ea567f80fe7e4c0c0b21d0b6972889e27e755f7/resource.tar.gz#test.test_linear-opt_block1_diff_lambda-default.txt-Results_/results.txt" + } + ], "test.test[match_recognize-alerts-default.txt-Debug]": [ { "checksum": "a6f1a68396bb11dc68f8885a093611b2", diff --git a/yql/essentials/tests/sql/minirun/part9/canondata/result.json b/yql/essentials/tests/sql/minirun/part9/canondata/result.json index 0e69445d855..30c20fc7032 100644 --- a/yql/essentials/tests/sql/minirun/part9/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part9/canondata/result.json @@ -950,6 +950,20 @@ "uri": "https://{canondata_backend}/1871182/89f1a6e76b3afde4b0b8530c8d3a4aad5fad5bd4/resource.tar.gz#test.test_linear-mutdict_remove-default.txt-Results_/results.txt" } ], + "test.test[linear-opt_block_multi-default.txt-Debug]": [ + { + "checksum": "fb1becddb98b10bbf9c8186dd2b53058", + "size": 755, + "uri": "https://{canondata_backend}/1600758/ca70ab59d7712ea59ab25ae2925d48699ea90b3e/resource.tar.gz#test.test_linear-opt_block_multi-default.txt-Debug_/opt.yql" + } + ], + "test.test[linear-opt_block_multi-default.txt-Results]": [ + { + "checksum": "756ed740ec4dd8890334ab2c2f5799e4", + "size": 2744, + "uri": "https://{canondata_backend}/1600758/ca70ab59d7712ea59ab25ae2925d48699ea90b3e/resource.tar.gz#test.test_linear-opt_block_multi-default.txt-Results_/results.txt" + } + ], "test.test[linear-types_reflection-default.txt-Debug]": [ { "checksum": "f560663decd46b1e0aa9ffa6b4cb4abe", diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json index c2a2dc41ebf..668950f5c77 100644 --- a/yql/essentials/tests/sql/sql2yql/canondata/result.json +++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json @@ -5123,6 +5123,34 @@ "uri": "https://{canondata_backend}/1597364/0dfdd9be36ea9ae7f51895a91916fc4ec61ed0d8/resource.tar.gz#test_sql2yql.test_linear-mutdict_upsert_/sql.yql" } ], + "test_sql2yql.test[linear-opt_block1]": [ + { + "checksum": "295de2b8e5b7ab78ae0c27ab40c3e8e6", + "size": 1218, + "uri": "https://{canondata_backend}/1600758/347bcbaaa0880e8fce734e45f36d8bcfa59ac5d5/resource.tar.gz#test_sql2yql.test_linear-opt_block1_/sql.yql" + } + ], + "test_sql2yql.test[linear-opt_block1_diff_lambda]": [ + { + "checksum": "9e035a30eeeb6508997c433e568d005d", + "size": 1255, + "uri": "https://{canondata_backend}/1784826/0ef7974e374145886c095b465087fd7e6961c7bb/resource.tar.gz#test_sql2yql.test_linear-opt_block1_diff_lambda_/sql.yql" + } + ], + "test_sql2yql.test[linear-opt_block2]": [ + { + "checksum": "7568b521d86809804ad567f194f08ef6", + "size": 1258, + "uri": "https://{canondata_backend}/1937429/af85a2a3ced2244b377c50a373a56945f25a377c/resource.tar.gz#test_sql2yql.test_linear-opt_block2_/sql.yql" + } + ], + "test_sql2yql.test[linear-opt_block_multi]": [ + { + "checksum": "9eb82256a4af3df5fe811fc856bf4021", + "size": 1551, + "uri": "https://{canondata_backend}/1600758/347bcbaaa0880e8fce734e45f36d8bcfa59ac5d5/resource.tar.gz#test_sql2yql.test_linear-opt_block_multi_/sql.yql" + } + ], "test_sql2yql.test[linear-types]": [ { "checksum": "f756d489eb05467c7a6414636f47f37e", @@ -12255,6 +12283,26 @@ "uri": "file://test_sql_format.test_linear-mutdict_upsert_/formatted.sql" } ], + "test_sql_format.test[linear-opt_block1]": [ + { + "uri": "file://test_sql_format.test_linear-opt_block1_/formatted.sql" + } + ], + "test_sql_format.test[linear-opt_block1_diff_lambda]": [ + { + "uri": "file://test_sql_format.test_linear-opt_block1_diff_lambda_/formatted.sql" + } + ], + "test_sql_format.test[linear-opt_block2]": [ + { + "uri": "file://test_sql_format.test_linear-opt_block2_/formatted.sql" + } + ], + "test_sql_format.test[linear-opt_block_multi]": [ + { + "uri": "file://test_sql_format.test_linear-opt_block_multi_/formatted.sql" + } + ], "test_sql_format.test[linear-types]": [ { "uri": "file://test_sql_format.test_linear-types_/formatted.sql" diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block1_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block1_/formatted.sql new file mode 100644 index 00000000000..b4e86f12729 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block1_/formatted.sql @@ -0,0 +1,3 @@ +SELECT + ListSort(DictItems(DictInsert(DictInsert(DictCreate(String, Int32), 'a', 1), 'b', 2))) +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block1_diff_lambda_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block1_diff_lambda_/formatted.sql new file mode 100644 index 00000000000..0a13b6186a0 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block1_diff_lambda_/formatted.sql @@ -0,0 +1,3 @@ +SELECT + ListSort(DictItems(DictInsert(Opaque(DictInsert(DictCreate(String, Int32), 'a', 1)), 'b', 2))) +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block2_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block2_/formatted.sql new file mode 100644 index 00000000000..111e5674a7f --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block2_/formatted.sql @@ -0,0 +1,3 @@ +SELECT + ListSort(DictItems(DictInsert(DictInsert(DictInsert(DictCreate(String, Int32), 'a', 1), 'b', 2), 'c', 3))) +; diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block_multi_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block_multi_/formatted.sql new file mode 100644 index 00000000000..fc66d8c98a8 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_linear-opt_block_multi_/formatted.sql @@ -0,0 +1,6 @@ +$x = DictInsert(DictCreate(String, Int32), 'a', 1); + +SELECT + ListSort(DictItems(DictInsert($x, 'b', 2))), + ListSort(DictItems(DictInsert($x, 'c', 3))) +; diff --git a/yql/essentials/tests/sql/suites/linear/opt_block1.yql b/yql/essentials/tests/sql/suites/linear/opt_block1.yql new file mode 100644 index 00000000000..86a82bfde95 --- /dev/null +++ b/yql/essentials/tests/sql/suites/linear/opt_block1.yql @@ -0,0 +1,2 @@ +select ListSort(DictItems(DictInsert(DictInsert(DictCreate(String,Int32),"a",1),"b",2))); + diff --git a/yql/essentials/tests/sql/suites/linear/opt_block1_diff_lambda.yql b/yql/essentials/tests/sql/suites/linear/opt_block1_diff_lambda.yql new file mode 100644 index 00000000000..5eb5ecb2232 --- /dev/null +++ b/yql/essentials/tests/sql/suites/linear/opt_block1_diff_lambda.yql @@ -0,0 +1,2 @@ +select ListSort(DictItems(DictInsert(Opaque(DictInsert(DictCreate(String,Int32),"a",1)),"b",2))); + diff --git a/yql/essentials/tests/sql/suites/linear/opt_block2.yql b/yql/essentials/tests/sql/suites/linear/opt_block2.yql new file mode 100644 index 00000000000..839038bdd84 --- /dev/null +++ b/yql/essentials/tests/sql/suites/linear/opt_block2.yql @@ -0,0 +1 @@ +select ListSort(DictItems(DictInsert(DictInsert(DictInsert(DictCreate(String,Int32),"a",1),"b",2),"c",3))); diff --git a/yql/essentials/tests/sql/suites/linear/opt_block_multi.yql b/yql/essentials/tests/sql/suites/linear/opt_block_multi.yql new file mode 100644 index 00000000000..35d7094117b --- /dev/null +++ b/yql/essentials/tests/sql/suites/linear/opt_block_multi.yql @@ -0,0 +1,4 @@ +$x = DictInsert(DictCreate(String,Int32),"a",1); +select ListSort(DictItems(DictInsert($x,"b",2))), + ListSort(DictItems(DictInsert($x,"c",3))); + |