diff options
author | aneporada <aneporada@ydb.tech> | 2022-08-26 19:19:27 +0300 |
---|---|---|
committer | aneporada <aneporada@ydb.tech> | 2022-08-26 19:19:27 +0300 |
commit | e30ada84abf7af2b7eaa1e97aede8ec386d25bb8 (patch) | |
tree | b16baf33ed96da7f382e721e2afad935e0a9263f | |
parent | 5ed0649ded71642e99fbfad64f95ccf0b8dd18fb (diff) | |
download | ydb-e30ada84abf7af2b7eaa1e97aede8ec386d25bb8.tar.gz |
[] Do not pushdown filter over filtering join unless AssumeStrict is present in predicate
Otherwise we can change filtering order - which is important if predicate calculation can produce runtime error
-rw-r--r-- | ydb/library/yql/core/common_opt/yql_co_simple1.cpp | 4 | ||||
-rw-r--r-- | ydb/library/yql/core/common_opt/yql_flatmap_over_join.cpp | 41 | ||||
-rw-r--r-- | ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp | 5 | ||||
-rw-r--r-- | ydb/library/yql/core/type_ann/type_ann_core.cpp | 15 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_join.cpp | 42 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_join.h | 2 | ||||
-rw-r--r-- | ydb/library/yql/sql/v1/builtin.cpp | 1 |
7 files changed, 100 insertions, 10 deletions
diff --git a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp index 96319ecc28b..ef1f301d95d 100644 --- a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp @@ -807,12 +807,12 @@ TExprNode::TPtr RemoveOptionalReduceOverData(const TExprNode::TPtr& node, TExprC } TExprNode::TPtr PropagateCoalesceWithConstIntoLogicalOps(const TExprNode::TPtr& node, TExprContext& ctx) { - if (node->Head().IsCallable("Likely")) { + if (node->Head().IsCallable({"Likely", "AssumeStrict"})) { const auto value = FromString<bool>(node->Child(1)->Head().Content()); if (!value) { YQL_CLOG(DEBUG, Core) << "PropagateCoalesceWithConst over " << node->Head().Content() << " (false)"; auto ret = ctx.Builder(node->Pos()) - .Callable("Likely") + .Callable(node->Head().Content()) .Callable(0, "Coalesce") .Add(0, node->Head().HeadPtr()) .Add(1, node->ChildPtr(1)) diff --git a/ydb/library/yql/core/common_opt/yql_flatmap_over_join.cpp b/ydb/library/yql/core/common_opt/yql_flatmap_over_join.cpp index ea5442a2fd2..61089d9c024 100644 --- a/ydb/library/yql/core/common_opt/yql_flatmap_over_join.cpp +++ b/ydb/library/yql/core/common_opt/yql_flatmap_over_join.cpp @@ -1,6 +1,7 @@ #include "yql_flatmap_over_join.h" #include "yql_co.h" +#include <ydb/library/yql/core/yql_expr_optimize.h> #include <ydb/library/yql/core/yql_join.h> #include <ydb/library/yql/core/yql_opt_utils.h> @@ -117,11 +118,39 @@ void GatherOptionalKeyColumns(TExprNode::TPtr joinTree, const TJoinLabels& label } } +bool IsRequiredAndFilteredSide(const TExprNode::TPtr& joinTree, const TJoinLabels& labels, ui32 inputIndex) { + TMaybe<bool> isFiltered = IsFilteredSide(joinTree, labels, inputIndex); + return isFiltered.Defined() && *isFiltered; +} + TExprNode::TPtr SingleInputPredicatePushdownOverEquiJoin(TExprNode::TPtr equiJoin, TExprNode::TPtr predicate, const TSet<TStringBuf>& usedFields, TExprNode::TPtr args, const TJoinLabels& labels, - ui32 firstCandidate, const TMap<TStringBuf, TVector<TStringBuf>>& renameMap, bool ordered, TExprContext& ctx) { + ui32 firstCandidate, const TMap<TStringBuf, TVector<TStringBuf>>& renameMap, bool ordered, TExprContext& ctx) +{ auto inputsCount = equiJoin->ChildrenSize() - 2; auto joinTree = equiJoin->Child(inputsCount); + + if (!IsRequiredSide(joinTree, labels, firstCandidate).first) { + return equiJoin; + } + + // TODO: derive from constraints + bool isStrict = true; + if (IsRequiredAndFilteredSide(joinTree, labels, firstCandidate)) { + VisitExpr(*predicate, [&](const TExprNode& node) { + if (node.IsCallable("AssumeStrict")) { + return false; + } + if (node.IsCallable({"Udf", "ScriptUdf", "Unwrap", "Ensure"})) { + isStrict = false; + } + return isStrict; + }); + if (!isStrict) { + return equiJoin; + } + } + TMap<TString, TSet<TString>> aliases; GatherKeyAliases(joinTree, aliases, labels); MakeTransitiveClosure(aliases); @@ -162,10 +191,6 @@ TExprNode::TPtr SingleInputPredicatePushdownOverEquiJoin(TExprNode::TPtr equiJoi } } - if (!IsRequiredSide(joinTree, labels, firstCandidate).first) { - return equiJoin; - } - auto ret = ctx.ShallowCopy(*equiJoin); for (auto& inputIndex : candidates) { auto x = IsRequiredSide(joinTree, labels, inputIndex); @@ -173,6 +198,10 @@ TExprNode::TPtr SingleInputPredicatePushdownOverEquiJoin(TExprNode::TPtr equiJoi continue; } + if (!isStrict && IsRequiredAndFilteredSide(joinTree, labels, inputIndex)) { + continue; + } + auto prevInput = equiJoin->Child(inputIndex)->ChildPtr(0); auto newInput = prevInput; if (x.second) { @@ -556,7 +585,7 @@ TExprNode::TPtr DecayCrossJoinIntoInner(TExprNode::TPtr equiJoin, const TExprNod return ctx.ChangeChild(*equiJoin, inputsCount, std::move(newJoinTree)); } -} +} // namespace TExprBase FlatMapOverEquiJoin(const TCoFlatMapBase& node, TExprContext& ctx, const TParentsMap& parentsMap, bool multiUsage) { auto equiJoin = node.Input(); diff --git a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp index 57b58419aed..d6ecc2d551f 100644 --- a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp +++ b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp @@ -2070,7 +2070,7 @@ TExprNode::TPtr OptimizeMultiMap(const TExprNode::TPtr& node, TExprContext& ctx) return node; } -TExprNode::TPtr LikelyExclude(const TExprNode::TPtr& node, TExprContext&) { +TExprNode::TPtr ReplaceWithFirstArg(const TExprNode::TPtr& node, TExprContext&) { YQL_CLOG(DEBUG, CorePeepHole) << "Exclude " << node->Content(); return node->HeadPtr(); } @@ -5872,7 +5872,8 @@ struct TPeepHoleRules { static constexpr std::initializer_list<TPeepHoleOptimizerMap::value_type> FinalStageRulesInit = { {"Take", &OptimizeTake<EnableNewOptimizers>}, {"Skip", &OptimizeSkip}, - {"Likely", &LikelyExclude}, + {"Likely", &ReplaceWithFirstArg}, + {"AssumeStrict", &ReplaceWithFirstArg}, {"GroupByKey", &PeepHoleConvertGroupBySingleKey}, {"PartitionByKey", &PeepHolePlainKeyForPartitionByKey}, {"ExtractMembers", &PeepHoleExpandExtractItems}, diff --git a/ydb/library/yql/core/type_ann/type_ann_core.cpp b/ydb/library/yql/core/type_ann/type_ann_core.cpp index 9a0d9671702..f586675b6fd 100644 --- a/ydb/library/yql/core/type_ann/type_ann_core.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_core.cpp @@ -10999,6 +10999,20 @@ template <NKikimr::NUdf::EDataSlot DataSlot> return IGraphTransformer::TStatus::Ok; } + IGraphTransformer::TStatus AssumeStrictWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { + Y_UNUSED(output); + if (!EnsureArgsCount(*input, 1, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + if (!EnsureComputable(input->Head(), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + input->SetTypeAnn(input->Head().GetTypeAnn()); + return IGraphTransformer::TStatus::Ok; + } + TSyncFunctionsMap::TSyncFunctionsMap() { Functions["Data"] = &DataWrapper; Functions["DataOrOptionalData"] = &DataWrapper; @@ -11162,6 +11176,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> Functions["AssumeSorted"] = &SortWrapper; Functions["AssumeUnique"] = &AssumeUniqueWrapper; Functions["AssumeAllMembersNullableAtOnce"] = &AssumeAllMembersNullableAtOnceWrapper; + Functions["AssumeStrict"] = &AssumeStrictWrapper; Functions["Top"] = &TopWrapper; Functions["TopSort"] = &TopWrapper; Functions["KeepTop"] = &KeepTopWrapper; diff --git a/ydb/library/yql/core/yql_join.cpp b/ydb/library/yql/core/yql_join.cpp index 67fdb01931b..7ec3afeba06 100644 --- a/ydb/library/yql/core/yql_join.cpp +++ b/ydb/library/yql/core/yql_join.cpp @@ -1040,6 +1040,48 @@ std::pair<bool, bool> IsRequiredSide(const TExprNode::TPtr& joinTree, const TJoi return{ false, false }; } +TMaybe<bool> IsFilteredSide(const TExprNode::TPtr& joinTree, const TJoinLabels& labels, ui32 inputIndex) { + auto joinType = joinTree->Child(0)->Content(); + auto left = joinTree->ChildPtr(1); + auto right = joinTree->ChildPtr(2); + + TMaybe<bool> isLeftFiltered; + if (!left->IsAtom()) { + isLeftFiltered = IsFilteredSide(left, labels, inputIndex); + } else { + auto table = left->Content(); + if (*labels.FindInputIndex(table) == inputIndex) { + if (joinType == "Inner" || joinType == "LeftOnly" || joinType == "LeftSemi") { + isLeftFiltered = true; + } else if (joinType != "RightOnly" && joinType != "RightSemi") { + isLeftFiltered = false; + } + } + } + + TMaybe<bool> isRightFiltered; + if (!right->IsAtom()) { + isRightFiltered = IsFilteredSide(right, labels, inputIndex); + } else { + auto table = right->Content(); + if (*labels.FindInputIndex(table) == inputIndex) { + if (joinType == "Inner" || joinType == "RightOnly" || joinType == "RightSemi") { + isRightFiltered = true; + } else if (joinType != "LeftOnly" && joinType != "LeftSemi") { + isRightFiltered = false; + } + } + } + + YQL_ENSURE(!(isLeftFiltered.Defined() && isRightFiltered.Defined())); + + if (!isLeftFiltered.Defined() && !isRightFiltered.Defined()) { + return {}; + } + + return isLeftFiltered.Defined() ? isLeftFiltered : isRightFiltered; +} + void AppendEquiJoinRenameMap(TPositionHandle pos, const TMap<TStringBuf, TVector<TStringBuf>>& newRenameMap, TExprNode::TListType& joinSettingNodes, TExprContext& ctx) { for (auto& x : newRenameMap) { diff --git a/ydb/library/yql/core/yql_join.h b/ydb/library/yql/core/yql_join.h index 872e6a7f162..672645bf120 100644 --- a/ydb/library/yql/core/yql_join.h +++ b/ydb/library/yql/core/yql_join.h @@ -96,6 +96,8 @@ bool AreSameJoinKeys(const TExprNode& joins, const TStringBuf& table1, const TSt // returns (is required side + allow skip nulls); std::pair<bool, bool> IsRequiredSide(const TExprNode::TPtr& joinTree, const TJoinLabels& labels, ui32 inputIndex); +TMaybe<bool> IsFilteredSide(const TExprNode::TPtr& joinTree, const TJoinLabels& labels, ui32 inputIndex); + void AppendEquiJoinRenameMap(TPositionHandle pos, const TMap<TStringBuf, TVector<TStringBuf>>& newRenameMap, TExprNode::TListType& joinSettingNodes, TExprContext& ctx); diff --git a/ydb/library/yql/sql/v1/builtin.cpp b/ydb/library/yql/sql/v1/builtin.cpp index 27c8e76cb81..94ce993624a 100644 --- a/ydb/library/yql/sql/v1/builtin.cpp +++ b/ydb/library/yql/sql/v1/builtin.cpp @@ -2901,6 +2901,7 @@ struct TBuiltinFuncData { {"nvl", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Coalesce", 1, -1) }, {"nanvl", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Nanvl", 2, 2) }, {"likely", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Likely", 1, -1)}, + {"assumestrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AssumeStrict", 1, 1)}, {"random", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "Random", 1, -1)}, {"randomnumber", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "RandomNumber", 1, -1)}, {"randomuuid", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "RandomUuid", 1, -1) }, |