summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorziganshinmr <[email protected]>2025-07-16 20:28:52 +0300
committerziganshinmr <[email protected]>2025-07-16 20:55:11 +0300
commitedd33f92e99348fe240f3b558d4f24eabf6fc657 (patch)
treebeeba727c00ed31edb550a29ec51ab43078f15ac
parentf1da33166a28d1ed1595afcd8e80f9a9a001b901 (diff)
Unessential callable
commit_hash:b193bd01ffc8693293f6aea7bc6460033e52654a
-rw-r--r--yql/essentials/core/common_opt/yql_co_simple1.cpp5
-rw-r--r--yql/essentials/core/expr_nodes/yql_expr_nodes.json9
-rw-r--r--yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp1
-rw-r--r--yql/essentials/core/type_ann/type_ann_core.cpp19
-rw-r--r--yql/essentials/core/yql_opt_utils.cpp34
-rw-r--r--yql/essentials/core/yql_opt_utils.h2
-rw-r--r--yql/essentials/providers/common/mkql/yql_provider_mkql.cpp2
-rw-r--r--yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.cpp1
-rw-r--r--yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.h2
-rw-r--r--yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_map.cpp54
-rw-r--r--yt/yql/tests/sql/suites/optimizers/drop_unessential_map.sql13
11 files changed, 141 insertions, 1 deletions
diff --git a/yql/essentials/core/common_opt/yql_co_simple1.cpp b/yql/essentials/core/common_opt/yql_co_simple1.cpp
index 5ea6c89e5b8..65ace106cd5 100644
--- a/yql/essentials/core/common_opt/yql_co_simple1.cpp
+++ b/yql/essentials/core/common_opt/yql_co_simple1.cpp
@@ -7187,6 +7187,11 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) {
throw yexception() << "Unknown failure kind: " << failureKind;
};
+ map["Unessential"] = [](const TExprNode::TPtr& node, TExprContext& /*ctx*/, TOptimizeContext& /*optCtx*/) {
+ YQL_ENSURE(node->Child(TCoUnessential::idx_AssumeAs)->IsComplete(), "AssumeAs argument of Unessential is expected to be complete expression");
+ return node;
+ };
+
// will be applied to any callable after all above
map[""] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) {
YQL_ENSURE(node->IsCallable());
diff --git a/yql/essentials/core/expr_nodes/yql_expr_nodes.json b/yql/essentials/core/expr_nodes/yql_expr_nodes.json
index c62b447217e..03443f2cef2 100644
--- a/yql/essentials/core/expr_nodes/yql_expr_nodes.json
+++ b/yql/essentials/core/expr_nodes/yql_expr_nodes.json
@@ -2697,6 +2697,15 @@
"Name" : "TCoLikely",
"Base" : "TCoNoPushBase",
"Match": {"Type": "Callable", "Name": "Likely"}
+ },
+ {
+ "Name" : "TCoUnessential",
+ "Base" : "TCallable",
+ "Match": {"Type": "Callable", "Name": "Unessential"},
+ "Children": [
+ {"Index": 0, "Name": "Predicate", "Type": "TExprBase"},
+ {"Index": 1, "Name": "AssumeAs", "Type": "TExprBase"}
+ ]
}
]
}
diff --git a/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp b/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp
index b11db6717b5..1feebdc488b 100644
--- a/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp
+++ b/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp
@@ -9343,6 +9343,7 @@ struct TPeepHoleRules {
{"AssumeDistinct", &DropAssume},
{"AssumeChopped", &DropAssume},
{"AssumeConstraints", &DropAssume},
+ {"Unessential", &DropAssume},
{"EmptyFrom", &DropEmptyFrom},
{"Top", &OptimizeTopOrSort<false, true>},
{"TopSort", &OptimizeTopOrSort<true, true>},
diff --git a/yql/essentials/core/type_ann/type_ann_core.cpp b/yql/essentials/core/type_ann/type_ann_core.cpp
index 69fb4ed3a26..63b5322e01f 100644
--- a/yql/essentials/core/type_ann/type_ann_core.cpp
+++ b/yql/essentials/core/type_ann/type_ann_core.cpp
@@ -3775,6 +3775,24 @@ namespace NTypeAnnImpl {
return IGraphTransformer::TStatus::Ok;
}
+ IGraphTransformer::TStatus UnessentialWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) {
+ Y_UNUSED(output);
+
+ if (!EnsureArgsCount(*input, 2, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ if (!EnsureSpecificDataType(input->Head(), EDataSlot::Bool, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+ if (!EnsureSpecificDataType(*input->Child(1), EDataSlot::Bool, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ input->SetTypeAnn(input->Head().GetTypeAnn());
+ return IGraphTransformer::TStatus::Ok;
+ }
+
template <bool Xor>
IGraphTransformer::TStatus LogicalWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) {
if (!EnsureMinArgsCount(*input, 1, ctx.Expr)) {
@@ -12619,6 +12637,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
Functions["Not"] = &BoolOpt1Wrapper;
Functions["NoPush"] = &NoPushWrapper;
Functions["Likely"] = &NoPushWrapper;
+ Functions["Unessential"] = &UnessentialWrapper;
Functions["Map"] = &MapWrapper;
Functions["OrderedMap"] = &MapWrapper;
Functions["MapNext"] = &MapNextWrapper;
diff --git a/yql/essentials/core/yql_opt_utils.cpp b/yql/essentials/core/yql_opt_utils.cpp
index d416ba2ce83..46703828c13 100644
--- a/yql/essentials/core/yql_opt_utils.cpp
+++ b/yql/essentials/core/yql_opt_utils.cpp
@@ -2572,4 +2572,38 @@ TOperationProgress::EOpBlockStatus DetermineProgramBlockStatus(const TExprNode&
return status;
}
+TExprNode::TPtr ReplaceUnessentials(TExprNode::TPtr predicate, TExprNode::TPtr row, const TNodeSet& banned, TExprContext& ctx) {
+ YQL_ENSURE(row->IsArgument());
+
+ std::vector<TExprNode::TPtr> unessentials;
+ bool hasEssentialRowUsage = false;
+ VisitExpr(predicate, [&](const TExprNode::TPtr& node) {
+ if (banned.contains(node.Get())) {
+ return false;
+ } else if (node == row) {
+ hasEssentialRowUsage = true;
+ return false;
+ } else if (node->IsCallable(TCoUnessential::CallableName())) {
+ // AssumeAs is guaranteed to be complete
+ unessentials.push_back(node);
+ return false;
+ }
+
+ return true;
+ });
+ if (unessentials.empty()) {
+ return predicate;
+ }
+
+ // Consider predicate as unessential if all row usages across predicate are Unessential
+ // Drop all Unessential conditions (= replace with AssumeAs) in that case, or keep them (= replace with Predicate) otherwise
+
+ TNodeOnNodeOwnedMap replaces;
+ for (const auto& unessential : unessentials) {
+ replaces.emplace(unessential.Get(), hasEssentialRowUsage ? unessential->ChildPtr(TCoUnessential::idx_Predicate) : unessential->ChildPtr(TCoUnessential::idx_AssumeAs));
+ }
+
+ return ctx.ReplaceNodes(std::move(predicate), replaces);
+}
+
}
diff --git a/yql/essentials/core/yql_opt_utils.h b/yql/essentials/core/yql_opt_utils.h
index 91fc58905bd..666b7a5ce4c 100644
--- a/yql/essentials/core/yql_opt_utils.h
+++ b/yql/essentials/core/yql_opt_utils.h
@@ -213,4 +213,6 @@ TExprNode::TPtr MakeAtomList(TPositionHandle pos, const C& container, TExprConte
return ctx.NewList(pos, std::move(atoms));
}
+TExprNode::TPtr ReplaceUnessentials(TExprNode::TPtr predicate, TExprNode::TPtr row, const TNodeSet& banned, TExprContext& ctx);
+
}
diff --git a/yql/essentials/providers/common/mkql/yql_provider_mkql.cpp b/yql/essentials/providers/common/mkql/yql_provider_mkql.cpp
index 24505ba8d3b..344aac2475e 100644
--- a/yql/essentials/providers/common/mkql/yql_provider_mkql.cpp
+++ b/yql/essentials/providers/common/mkql/yql_provider_mkql.cpp
@@ -3016,7 +3016,7 @@ TMkqlCommonCallableCompiler::TShared::TShared() {
return MkqlBuildExpr(node.Head(), ctx);
});
- AddCallable({ "AssumeStrict", "AssumeNonStrict", "NoPush", "Likely" }, [](const TExprNode& node, TMkqlBuildContext& ctx) {
+ AddCallable({ "AssumeStrict", "AssumeNonStrict", "NoPush", "Likely", "Unessential" }, [](const TExprNode& node, TMkqlBuildContext& ctx) {
return MkqlBuildExpr(node.Head(), ctx);
});
diff --git a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.cpp b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.cpp
index 05e73a11707..eec754a0087 100644
--- a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.cpp
+++ b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.cpp
@@ -102,6 +102,7 @@ TYtPhysicalOptProposalTransformer::TYtPhysicalOptProposalTransformer(TYtState::T
AddHandler(2, &TYtMap::Match, HNDL(PushDownYtMapOverSortedMerge));
AddHandler(2, &TYtMerge::Match, HNDL(ForceTransform));
AddHandler(2, &TYtMerge::Match, HNDL(MergeToCopy));
+ AddHandler(2, &TYtMap::Match, HNDL(UnessentialFilter));
#undef HNDL
}
diff --git a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.h b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.h
index b14cfe8a5d5..56849058ec3 100644
--- a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.h
+++ b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.h
@@ -156,6 +156,8 @@ private:
template <typename TLMapType>
NNodes::TMaybeNode<NNodes::TExprBase> LMap(NNodes::TExprBase node, TExprContext& ctx) const;
+ NNodes::TMaybeNode<NNodes::TExprBase> UnessentialFilter(NNodes::TExprBase node, TExprContext& ctx) const;
+
template<bool WithList>
NNodes::TCoLambda MakeJobLambda(NNodes::TCoLambda lambda, bool useFlow, TExprContext& ctx) const;
diff --git a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_map.cpp b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_map.cpp
index efc9ce9916c..5c2b696ba89 100644
--- a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_map.cpp
+++ b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_map.cpp
@@ -409,4 +409,58 @@ TMaybeNode<TExprBase> TYtPhysicalOptProposalTransformer::CombineByKey(TExprBase
.Done();
}
+TMaybeNode<TExprBase> TYtPhysicalOptProposalTransformer::UnessentialFilter(TExprBase node, TExprContext& ctx) const {
+ const auto ytMap = node.Cast<TYtMap>();
+ const auto flatMap = ytMap.Mapper().Body().Maybe<TCoFlatMapBase>();
+ if (!flatMap) {
+ return node;
+ }
+ if (flatMap.Cast().Input().Ptr() != ytMap.Mapper().Args().Arg(0).Ptr()) {
+ return node;
+ }
+
+ auto flatMapLambda = flatMap.Cast().Lambda();
+ if (!IsFilterFlatMap(flatMapLambda)) {
+ return node;
+ }
+
+ auto row = flatMapLambda.Args().Arg(0).Ptr();
+ auto predicate = flatMapLambda.Body().Ref().ChildPtr(TCoConditionalValueBase::idx_Predicate);
+
+ TNodeSet banned;
+ VisitExpr(predicate, [&](const TExprNode::TPtr& node) {
+ if (TYtOutput::Match(node.Get())) {
+ // Prevent ReplaceUnessentials to go deeper than current operation
+ banned.insert(node.Get());
+ return false;
+ }
+ return true;
+ });
+
+ auto newPredicate = ReplaceUnessentials(predicate, row, banned, ctx);
+ if (newPredicate == predicate) {
+ return node;
+ }
+
+ auto newFilter = ctx.ChangeChild(flatMapLambda.Body().Ref(), TCoConditionalValueBase::idx_Predicate, std::move(newPredicate));
+ auto newFlatMapLambda = ctx.ChangeChild(flatMapLambda.Ref(), TCoLambda::idx_Body, std::move(newFilter));
+ return Build<TYtMap>(ctx, node.Pos())
+ .InitFrom(ytMap)
+ .Mapper<TCoLambda>()
+ .Args({"stream"})
+ .Body<TCoFlatMapBase>()
+ .CallableName(flatMap.Ref().Content())
+ .Input("stream")
+ .Lambda<TCoLambda>()
+ .Args({"item"})
+ .Body<TExprApplier>()
+ .Apply(TCoLambda(newFlatMapLambda))
+ .With(0, "item")
+ .Build()
+ .Build()
+ .Build()
+ .Build()
+ .Done();
+}
+
} // namespace NYql
diff --git a/yt/yql/tests/sql/suites/optimizers/drop_unessential_map.sql b/yt/yql/tests/sql/suites/optimizers/drop_unessential_map.sql
new file mode 100644
index 00000000000..667f6cb480b
--- /dev/null
+++ b/yt/yql/tests/sql/suites/optimizers/drop_unessential_map.sql
@@ -0,0 +1,13 @@
+USE plato;
+
+PRAGMA yt.EnableFuseMapToMapReduce;
+PRAGMA warning("disable", "4510");
+
+SELECT * FROM Input0 WHERE YQL::Unessential(value != "not present", true);
+SELECT * FROM Input0 WHERE YQL::Unessential(value != "not present 1", true) AND YQL::Unessential(value != "not present 2", true);
+SELECT * FROM Input0 WHERE YQL::Unessential(value != "not present 3", true) OR NOT (value != "not present 3");
+SELECT * FROM Input0 WHERE YQL::Unessential(YQL::Unessential(value != "other not present 4", true) OR NOT (value != "other not present 4") OR value != "not present 4", true) OR NOT (value != "not present 4");
+
+$mapreduce_output = SELECT key FROM Input0 WHERE YQL::Unessential(key != "not present 5", true) GROUP BY key LIMIT 1;
+-- optimizer must not go beyond operation bounds
+SELECT * FROM Input0 WHERE key || Unwrap($mapreduce_output) != "not present 5";