diff options
author | ziganshinmr <[email protected]> | 2025-07-16 20:28:52 +0300 |
---|---|---|
committer | ziganshinmr <[email protected]> | 2025-07-16 20:55:11 +0300 |
commit | edd33f92e99348fe240f3b558d4f24eabf6fc657 (patch) | |
tree | beeba727c00ed31edb550a29ec51ab43078f15ac | |
parent | f1da33166a28d1ed1595afcd8e80f9a9a001b901 (diff) |
Unessential callable
commit_hash:b193bd01ffc8693293f6aea7bc6460033e52654a
11 files changed, 141 insertions, 1 deletions
diff --git a/yql/essentials/core/common_opt/yql_co_simple1.cpp b/yql/essentials/core/common_opt/yql_co_simple1.cpp index 5ea6c89e5b8..65ace106cd5 100644 --- a/yql/essentials/core/common_opt/yql_co_simple1.cpp +++ b/yql/essentials/core/common_opt/yql_co_simple1.cpp @@ -7187,6 +7187,11 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { throw yexception() << "Unknown failure kind: " << failureKind; }; + map["Unessential"] = [](const TExprNode::TPtr& node, TExprContext& /*ctx*/, TOptimizeContext& /*optCtx*/) { + YQL_ENSURE(node->Child(TCoUnessential::idx_AssumeAs)->IsComplete(), "AssumeAs argument of Unessential is expected to be complete expression"); + return node; + }; + // will be applied to any callable after all above map[""] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { YQL_ENSURE(node->IsCallable()); diff --git a/yql/essentials/core/expr_nodes/yql_expr_nodes.json b/yql/essentials/core/expr_nodes/yql_expr_nodes.json index c62b447217e..03443f2cef2 100644 --- a/yql/essentials/core/expr_nodes/yql_expr_nodes.json +++ b/yql/essentials/core/expr_nodes/yql_expr_nodes.json @@ -2697,6 +2697,15 @@ "Name" : "TCoLikely", "Base" : "TCoNoPushBase", "Match": {"Type": "Callable", "Name": "Likely"} + }, + { + "Name" : "TCoUnessential", + "Base" : "TCallable", + "Match": {"Type": "Callable", "Name": "Unessential"}, + "Children": [ + {"Index": 0, "Name": "Predicate", "Type": "TExprBase"}, + {"Index": 1, "Name": "AssumeAs", "Type": "TExprBase"} + ] } ] } diff --git a/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp b/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp index b11db6717b5..1feebdc488b 100644 --- a/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp +++ b/yql/essentials/core/peephole_opt/yql_opt_peephole_physical.cpp @@ -9343,6 +9343,7 @@ struct TPeepHoleRules { {"AssumeDistinct", &DropAssume}, {"AssumeChopped", &DropAssume}, {"AssumeConstraints", &DropAssume}, + {"Unessential", &DropAssume}, {"EmptyFrom", &DropEmptyFrom}, {"Top", &OptimizeTopOrSort<false, true>}, {"TopSort", &OptimizeTopOrSort<true, true>}, diff --git a/yql/essentials/core/type_ann/type_ann_core.cpp b/yql/essentials/core/type_ann/type_ann_core.cpp index 69fb4ed3a26..63b5322e01f 100644 --- a/yql/essentials/core/type_ann/type_ann_core.cpp +++ b/yql/essentials/core/type_ann/type_ann_core.cpp @@ -3775,6 +3775,24 @@ namespace NTypeAnnImpl { return IGraphTransformer::TStatus::Ok; } + IGraphTransformer::TStatus UnessentialWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { + Y_UNUSED(output); + + if (!EnsureArgsCount(*input, 2, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + if (!EnsureSpecificDataType(input->Head(), EDataSlot::Bool, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + if (!EnsureSpecificDataType(*input->Child(1), EDataSlot::Bool, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + input->SetTypeAnn(input->Head().GetTypeAnn()); + return IGraphTransformer::TStatus::Ok; + } + template <bool Xor> IGraphTransformer::TStatus LogicalWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { if (!EnsureMinArgsCount(*input, 1, ctx.Expr)) { @@ -12619,6 +12637,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> Functions["Not"] = &BoolOpt1Wrapper; Functions["NoPush"] = &NoPushWrapper; Functions["Likely"] = &NoPushWrapper; + Functions["Unessential"] = &UnessentialWrapper; Functions["Map"] = &MapWrapper; Functions["OrderedMap"] = &MapWrapper; Functions["MapNext"] = &MapNextWrapper; diff --git a/yql/essentials/core/yql_opt_utils.cpp b/yql/essentials/core/yql_opt_utils.cpp index d416ba2ce83..46703828c13 100644 --- a/yql/essentials/core/yql_opt_utils.cpp +++ b/yql/essentials/core/yql_opt_utils.cpp @@ -2572,4 +2572,38 @@ TOperationProgress::EOpBlockStatus DetermineProgramBlockStatus(const TExprNode& return status; } +TExprNode::TPtr ReplaceUnessentials(TExprNode::TPtr predicate, TExprNode::TPtr row, const TNodeSet& banned, TExprContext& ctx) { + YQL_ENSURE(row->IsArgument()); + + std::vector<TExprNode::TPtr> unessentials; + bool hasEssentialRowUsage = false; + VisitExpr(predicate, [&](const TExprNode::TPtr& node) { + if (banned.contains(node.Get())) { + return false; + } else if (node == row) { + hasEssentialRowUsage = true; + return false; + } else if (node->IsCallable(TCoUnessential::CallableName())) { + // AssumeAs is guaranteed to be complete + unessentials.push_back(node); + return false; + } + + return true; + }); + if (unessentials.empty()) { + return predicate; + } + + // Consider predicate as unessential if all row usages across predicate are Unessential + // Drop all Unessential conditions (= replace with AssumeAs) in that case, or keep them (= replace with Predicate) otherwise + + TNodeOnNodeOwnedMap replaces; + for (const auto& unessential : unessentials) { + replaces.emplace(unessential.Get(), hasEssentialRowUsage ? unessential->ChildPtr(TCoUnessential::idx_Predicate) : unessential->ChildPtr(TCoUnessential::idx_AssumeAs)); + } + + return ctx.ReplaceNodes(std::move(predicate), replaces); +} + } diff --git a/yql/essentials/core/yql_opt_utils.h b/yql/essentials/core/yql_opt_utils.h index 91fc58905bd..666b7a5ce4c 100644 --- a/yql/essentials/core/yql_opt_utils.h +++ b/yql/essentials/core/yql_opt_utils.h @@ -213,4 +213,6 @@ TExprNode::TPtr MakeAtomList(TPositionHandle pos, const C& container, TExprConte return ctx.NewList(pos, std::move(atoms)); } +TExprNode::TPtr ReplaceUnessentials(TExprNode::TPtr predicate, TExprNode::TPtr row, const TNodeSet& banned, TExprContext& ctx); + } diff --git a/yql/essentials/providers/common/mkql/yql_provider_mkql.cpp b/yql/essentials/providers/common/mkql/yql_provider_mkql.cpp index 24505ba8d3b..344aac2475e 100644 --- a/yql/essentials/providers/common/mkql/yql_provider_mkql.cpp +++ b/yql/essentials/providers/common/mkql/yql_provider_mkql.cpp @@ -3016,7 +3016,7 @@ TMkqlCommonCallableCompiler::TShared::TShared() { return MkqlBuildExpr(node.Head(), ctx); }); - AddCallable({ "AssumeStrict", "AssumeNonStrict", "NoPush", "Likely" }, [](const TExprNode& node, TMkqlBuildContext& ctx) { + AddCallable({ "AssumeStrict", "AssumeNonStrict", "NoPush", "Likely", "Unessential" }, [](const TExprNode& node, TMkqlBuildContext& ctx) { return MkqlBuildExpr(node.Head(), ctx); }); diff --git a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.cpp b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.cpp index 05e73a11707..eec754a0087 100644 --- a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.cpp +++ b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.cpp @@ -102,6 +102,7 @@ TYtPhysicalOptProposalTransformer::TYtPhysicalOptProposalTransformer(TYtState::T AddHandler(2, &TYtMap::Match, HNDL(PushDownYtMapOverSortedMerge)); AddHandler(2, &TYtMerge::Match, HNDL(ForceTransform)); AddHandler(2, &TYtMerge::Match, HNDL(MergeToCopy)); + AddHandler(2, &TYtMap::Match, HNDL(UnessentialFilter)); #undef HNDL } diff --git a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.h b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.h index b14cfe8a5d5..56849058ec3 100644 --- a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.h +++ b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt.h @@ -156,6 +156,8 @@ private: template <typename TLMapType> NNodes::TMaybeNode<NNodes::TExprBase> LMap(NNodes::TExprBase node, TExprContext& ctx) const; + NNodes::TMaybeNode<NNodes::TExprBase> UnessentialFilter(NNodes::TExprBase node, TExprContext& ctx) const; + template<bool WithList> NNodes::TCoLambda MakeJobLambda(NNodes::TCoLambda lambda, bool useFlow, TExprContext& ctx) const; diff --git a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_map.cpp b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_map.cpp index efc9ce9916c..5c2b696ba89 100644 --- a/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_map.cpp +++ b/yt/yql/providers/yt/provider/phy_opt/yql_yt_phy_opt_map.cpp @@ -409,4 +409,58 @@ TMaybeNode<TExprBase> TYtPhysicalOptProposalTransformer::CombineByKey(TExprBase .Done(); } +TMaybeNode<TExprBase> TYtPhysicalOptProposalTransformer::UnessentialFilter(TExprBase node, TExprContext& ctx) const { + const auto ytMap = node.Cast<TYtMap>(); + const auto flatMap = ytMap.Mapper().Body().Maybe<TCoFlatMapBase>(); + if (!flatMap) { + return node; + } + if (flatMap.Cast().Input().Ptr() != ytMap.Mapper().Args().Arg(0).Ptr()) { + return node; + } + + auto flatMapLambda = flatMap.Cast().Lambda(); + if (!IsFilterFlatMap(flatMapLambda)) { + return node; + } + + auto row = flatMapLambda.Args().Arg(0).Ptr(); + auto predicate = flatMapLambda.Body().Ref().ChildPtr(TCoConditionalValueBase::idx_Predicate); + + TNodeSet banned; + VisitExpr(predicate, [&](const TExprNode::TPtr& node) { + if (TYtOutput::Match(node.Get())) { + // Prevent ReplaceUnessentials to go deeper than current operation + banned.insert(node.Get()); + return false; + } + return true; + }); + + auto newPredicate = ReplaceUnessentials(predicate, row, banned, ctx); + if (newPredicate == predicate) { + return node; + } + + auto newFilter = ctx.ChangeChild(flatMapLambda.Body().Ref(), TCoConditionalValueBase::idx_Predicate, std::move(newPredicate)); + auto newFlatMapLambda = ctx.ChangeChild(flatMapLambda.Ref(), TCoLambda::idx_Body, std::move(newFilter)); + return Build<TYtMap>(ctx, node.Pos()) + .InitFrom(ytMap) + .Mapper<TCoLambda>() + .Args({"stream"}) + .Body<TCoFlatMapBase>() + .CallableName(flatMap.Ref().Content()) + .Input("stream") + .Lambda<TCoLambda>() + .Args({"item"}) + .Body<TExprApplier>() + .Apply(TCoLambda(newFlatMapLambda)) + .With(0, "item") + .Build() + .Build() + .Build() + .Build() + .Done(); +} + } // namespace NYql diff --git a/yt/yql/tests/sql/suites/optimizers/drop_unessential_map.sql b/yt/yql/tests/sql/suites/optimizers/drop_unessential_map.sql new file mode 100644 index 00000000000..667f6cb480b --- /dev/null +++ b/yt/yql/tests/sql/suites/optimizers/drop_unessential_map.sql @@ -0,0 +1,13 @@ +USE plato; + +PRAGMA yt.EnableFuseMapToMapReduce; +PRAGMA warning("disable", "4510"); + +SELECT * FROM Input0 WHERE YQL::Unessential(value != "not present", true); +SELECT * FROM Input0 WHERE YQL::Unessential(value != "not present 1", true) AND YQL::Unessential(value != "not present 2", true); +SELECT * FROM Input0 WHERE YQL::Unessential(value != "not present 3", true) OR NOT (value != "not present 3"); +SELECT * FROM Input0 WHERE YQL::Unessential(YQL::Unessential(value != "other not present 4", true) OR NOT (value != "other not present 4") OR value != "not present 4", true) OR NOT (value != "not present 4"); + +$mapreduce_output = SELECT key FROM Input0 WHERE YQL::Unessential(key != "not present 5", true) GROUP BY key LIMIT 1; +-- optimizer must not go beyond operation bounds +SELECT * FROM Input0 WHERE key || Unwrap($mapreduce_output) != "not present 5"; |