aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoraneporada <aneporada@ydb.tech>2022-08-26 19:19:27 +0300
committeraneporada <aneporada@ydb.tech>2022-08-26 19:19:27 +0300
commite30ada84abf7af2b7eaa1e97aede8ec386d25bb8 (patch)
treeb16baf33ed96da7f382e721e2afad935e0a9263f
parent5ed0649ded71642e99fbfad64f95ccf0b8dd18fb (diff)
downloadydb-e30ada84abf7af2b7eaa1e97aede8ec386d25bb8.tar.gz
[] Do not pushdown filter over filtering join unless AssumeStrict is present in predicate
Otherwise we can change filtering order - which is important if predicate calculation can produce runtime error
-rw-r--r--ydb/library/yql/core/common_opt/yql_co_simple1.cpp4
-rw-r--r--ydb/library/yql/core/common_opt/yql_flatmap_over_join.cpp41
-rw-r--r--ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp5
-rw-r--r--ydb/library/yql/core/type_ann/type_ann_core.cpp15
-rw-r--r--ydb/library/yql/core/yql_join.cpp42
-rw-r--r--ydb/library/yql/core/yql_join.h2
-rw-r--r--ydb/library/yql/sql/v1/builtin.cpp1
7 files changed, 100 insertions, 10 deletions
diff --git a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp
index 96319ecc28b..ef1f301d95d 100644
--- a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp
+++ b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp
@@ -807,12 +807,12 @@ TExprNode::TPtr RemoveOptionalReduceOverData(const TExprNode::TPtr& node, TExprC
}
TExprNode::TPtr PropagateCoalesceWithConstIntoLogicalOps(const TExprNode::TPtr& node, TExprContext& ctx) {
- if (node->Head().IsCallable("Likely")) {
+ if (node->Head().IsCallable({"Likely", "AssumeStrict"})) {
const auto value = FromString<bool>(node->Child(1)->Head().Content());
if (!value) {
YQL_CLOG(DEBUG, Core) << "PropagateCoalesceWithConst over " << node->Head().Content() << " (false)";
auto ret = ctx.Builder(node->Pos())
- .Callable("Likely")
+ .Callable(node->Head().Content())
.Callable(0, "Coalesce")
.Add(0, node->Head().HeadPtr())
.Add(1, node->ChildPtr(1))
diff --git a/ydb/library/yql/core/common_opt/yql_flatmap_over_join.cpp b/ydb/library/yql/core/common_opt/yql_flatmap_over_join.cpp
index ea5442a2fd2..61089d9c024 100644
--- a/ydb/library/yql/core/common_opt/yql_flatmap_over_join.cpp
+++ b/ydb/library/yql/core/common_opt/yql_flatmap_over_join.cpp
@@ -1,6 +1,7 @@
#include "yql_flatmap_over_join.h"
#include "yql_co.h"
+#include <ydb/library/yql/core/yql_expr_optimize.h>
#include <ydb/library/yql/core/yql_join.h>
#include <ydb/library/yql/core/yql_opt_utils.h>
@@ -117,11 +118,39 @@ void GatherOptionalKeyColumns(TExprNode::TPtr joinTree, const TJoinLabels& label
}
}
+bool IsRequiredAndFilteredSide(const TExprNode::TPtr& joinTree, const TJoinLabels& labels, ui32 inputIndex) {
+ TMaybe<bool> isFiltered = IsFilteredSide(joinTree, labels, inputIndex);
+ return isFiltered.Defined() && *isFiltered;
+}
+
TExprNode::TPtr SingleInputPredicatePushdownOverEquiJoin(TExprNode::TPtr equiJoin, TExprNode::TPtr predicate,
const TSet<TStringBuf>& usedFields, TExprNode::TPtr args, const TJoinLabels& labels,
- ui32 firstCandidate, const TMap<TStringBuf, TVector<TStringBuf>>& renameMap, bool ordered, TExprContext& ctx) {
+ ui32 firstCandidate, const TMap<TStringBuf, TVector<TStringBuf>>& renameMap, bool ordered, TExprContext& ctx)
+{
auto inputsCount = equiJoin->ChildrenSize() - 2;
auto joinTree = equiJoin->Child(inputsCount);
+
+ if (!IsRequiredSide(joinTree, labels, firstCandidate).first) {
+ return equiJoin;
+ }
+
+ // TODO: derive from constraints
+ bool isStrict = true;
+ if (IsRequiredAndFilteredSide(joinTree, labels, firstCandidate)) {
+ VisitExpr(*predicate, [&](const TExprNode& node) {
+ if (node.IsCallable("AssumeStrict")) {
+ return false;
+ }
+ if (node.IsCallable({"Udf", "ScriptUdf", "Unwrap", "Ensure"})) {
+ isStrict = false;
+ }
+ return isStrict;
+ });
+ if (!isStrict) {
+ return equiJoin;
+ }
+ }
+
TMap<TString, TSet<TString>> aliases;
GatherKeyAliases(joinTree, aliases, labels);
MakeTransitiveClosure(aliases);
@@ -162,10 +191,6 @@ TExprNode::TPtr SingleInputPredicatePushdownOverEquiJoin(TExprNode::TPtr equiJoi
}
}
- if (!IsRequiredSide(joinTree, labels, firstCandidate).first) {
- return equiJoin;
- }
-
auto ret = ctx.ShallowCopy(*equiJoin);
for (auto& inputIndex : candidates) {
auto x = IsRequiredSide(joinTree, labels, inputIndex);
@@ -173,6 +198,10 @@ TExprNode::TPtr SingleInputPredicatePushdownOverEquiJoin(TExprNode::TPtr equiJoi
continue;
}
+ if (!isStrict && IsRequiredAndFilteredSide(joinTree, labels, inputIndex)) {
+ continue;
+ }
+
auto prevInput = equiJoin->Child(inputIndex)->ChildPtr(0);
auto newInput = prevInput;
if (x.second) {
@@ -556,7 +585,7 @@ TExprNode::TPtr DecayCrossJoinIntoInner(TExprNode::TPtr equiJoin, const TExprNod
return ctx.ChangeChild(*equiJoin, inputsCount, std::move(newJoinTree));
}
-}
+} // namespace
TExprBase FlatMapOverEquiJoin(const TCoFlatMapBase& node, TExprContext& ctx, const TParentsMap& parentsMap, bool multiUsage) {
auto equiJoin = node.Input();
diff --git a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp
index 57b58419aed..d6ecc2d551f 100644
--- a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp
+++ b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp
@@ -2070,7 +2070,7 @@ TExprNode::TPtr OptimizeMultiMap(const TExprNode::TPtr& node, TExprContext& ctx)
return node;
}
-TExprNode::TPtr LikelyExclude(const TExprNode::TPtr& node, TExprContext&) {
+TExprNode::TPtr ReplaceWithFirstArg(const TExprNode::TPtr& node, TExprContext&) {
YQL_CLOG(DEBUG, CorePeepHole) << "Exclude " << node->Content();
return node->HeadPtr();
}
@@ -5872,7 +5872,8 @@ struct TPeepHoleRules {
static constexpr std::initializer_list<TPeepHoleOptimizerMap::value_type> FinalStageRulesInit = {
{"Take", &OptimizeTake<EnableNewOptimizers>},
{"Skip", &OptimizeSkip},
- {"Likely", &LikelyExclude},
+ {"Likely", &ReplaceWithFirstArg},
+ {"AssumeStrict", &ReplaceWithFirstArg},
{"GroupByKey", &PeepHoleConvertGroupBySingleKey},
{"PartitionByKey", &PeepHolePlainKeyForPartitionByKey},
{"ExtractMembers", &PeepHoleExpandExtractItems},
diff --git a/ydb/library/yql/core/type_ann/type_ann_core.cpp b/ydb/library/yql/core/type_ann/type_ann_core.cpp
index 9a0d9671702..f586675b6fd 100644
--- a/ydb/library/yql/core/type_ann/type_ann_core.cpp
+++ b/ydb/library/yql/core/type_ann/type_ann_core.cpp
@@ -10999,6 +10999,20 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
return IGraphTransformer::TStatus::Ok;
}
+ IGraphTransformer::TStatus AssumeStrictWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) {
+ Y_UNUSED(output);
+ if (!EnsureArgsCount(*input, 1, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ if (!EnsureComputable(input->Head(), ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ input->SetTypeAnn(input->Head().GetTypeAnn());
+ return IGraphTransformer::TStatus::Ok;
+ }
+
TSyncFunctionsMap::TSyncFunctionsMap() {
Functions["Data"] = &DataWrapper;
Functions["DataOrOptionalData"] = &DataWrapper;
@@ -11162,6 +11176,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
Functions["AssumeSorted"] = &SortWrapper;
Functions["AssumeUnique"] = &AssumeUniqueWrapper;
Functions["AssumeAllMembersNullableAtOnce"] = &AssumeAllMembersNullableAtOnceWrapper;
+ Functions["AssumeStrict"] = &AssumeStrictWrapper;
Functions["Top"] = &TopWrapper;
Functions["TopSort"] = &TopWrapper;
Functions["KeepTop"] = &KeepTopWrapper;
diff --git a/ydb/library/yql/core/yql_join.cpp b/ydb/library/yql/core/yql_join.cpp
index 67fdb01931b..7ec3afeba06 100644
--- a/ydb/library/yql/core/yql_join.cpp
+++ b/ydb/library/yql/core/yql_join.cpp
@@ -1040,6 +1040,48 @@ std::pair<bool, bool> IsRequiredSide(const TExprNode::TPtr& joinTree, const TJoi
return{ false, false };
}
+TMaybe<bool> IsFilteredSide(const TExprNode::TPtr& joinTree, const TJoinLabels& labels, ui32 inputIndex) {
+ auto joinType = joinTree->Child(0)->Content();
+ auto left = joinTree->ChildPtr(1);
+ auto right = joinTree->ChildPtr(2);
+
+ TMaybe<bool> isLeftFiltered;
+ if (!left->IsAtom()) {
+ isLeftFiltered = IsFilteredSide(left, labels, inputIndex);
+ } else {
+ auto table = left->Content();
+ if (*labels.FindInputIndex(table) == inputIndex) {
+ if (joinType == "Inner" || joinType == "LeftOnly" || joinType == "LeftSemi") {
+ isLeftFiltered = true;
+ } else if (joinType != "RightOnly" && joinType != "RightSemi") {
+ isLeftFiltered = false;
+ }
+ }
+ }
+
+ TMaybe<bool> isRightFiltered;
+ if (!right->IsAtom()) {
+ isRightFiltered = IsFilteredSide(right, labels, inputIndex);
+ } else {
+ auto table = right->Content();
+ if (*labels.FindInputIndex(table) == inputIndex) {
+ if (joinType == "Inner" || joinType == "RightOnly" || joinType == "RightSemi") {
+ isRightFiltered = true;
+ } else if (joinType != "LeftOnly" && joinType != "LeftSemi") {
+ isRightFiltered = false;
+ }
+ }
+ }
+
+ YQL_ENSURE(!(isLeftFiltered.Defined() && isRightFiltered.Defined()));
+
+ if (!isLeftFiltered.Defined() && !isRightFiltered.Defined()) {
+ return {};
+ }
+
+ return isLeftFiltered.Defined() ? isLeftFiltered : isRightFiltered;
+}
+
void AppendEquiJoinRenameMap(TPositionHandle pos, const TMap<TStringBuf, TVector<TStringBuf>>& newRenameMap,
TExprNode::TListType& joinSettingNodes, TExprContext& ctx) {
for (auto& x : newRenameMap) {
diff --git a/ydb/library/yql/core/yql_join.h b/ydb/library/yql/core/yql_join.h
index 872e6a7f162..672645bf120 100644
--- a/ydb/library/yql/core/yql_join.h
+++ b/ydb/library/yql/core/yql_join.h
@@ -96,6 +96,8 @@ bool AreSameJoinKeys(const TExprNode& joins, const TStringBuf& table1, const TSt
// returns (is required side + allow skip nulls);
std::pair<bool, bool> IsRequiredSide(const TExprNode::TPtr& joinTree, const TJoinLabels& labels, ui32 inputIndex);
+TMaybe<bool> IsFilteredSide(const TExprNode::TPtr& joinTree, const TJoinLabels& labels, ui32 inputIndex);
+
void AppendEquiJoinRenameMap(TPositionHandle pos, const TMap<TStringBuf, TVector<TStringBuf>>& newRenameMap,
TExprNode::TListType& joinSettingNodes, TExprContext& ctx);
diff --git a/ydb/library/yql/sql/v1/builtin.cpp b/ydb/library/yql/sql/v1/builtin.cpp
index 27c8e76cb81..94ce993624a 100644
--- a/ydb/library/yql/sql/v1/builtin.cpp
+++ b/ydb/library/yql/sql/v1/builtin.cpp
@@ -2901,6 +2901,7 @@ struct TBuiltinFuncData {
{"nvl", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Coalesce", 1, -1) },
{"nanvl", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Nanvl", 2, 2) },
{"likely", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Likely", 1, -1)},
+ {"assumestrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AssumeStrict", 1, 1)},
{"random", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "Random", 1, -1)},
{"randomnumber", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "RandomNumber", 1, -1)},
{"randomuuid", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "RandomUuid", 1, -1) },