diff options
author | lucius <[email protected]> | 2025-07-21 13:57:54 +0300 |
---|---|---|
committer | lucius <[email protected]> | 2025-07-21 14:13:37 +0300 |
commit | fea05ea76fdac14746236e14bda98ce8cc29fb00 (patch) | |
tree | 8c6fe4bf0602c2b77b9270bce0816e9ed3b19ba8 | |
parent | caefbb016b70b186bad0fa7f07a1524b3211fec4 (diff) |
YQL-19896: allow optional side in SingleInputPredicatePushdownOverEquiJoin in some cases
Разрешаем пушить на необязательную сторону предикат, который использует только ключевые колонки с обязательной стороны джоина.
1) рассматриваемый предикат зависит только 1 стороны и это обязательная сторона джоина
2) предикат зависит только от ключевых колонок джоина
3) все соответствующие по ключу джоина колонки в обязательном и необязательном инпутах должны полностью совпадать по типу
В этих случаях мы пушим предикат не только на обязательную, но и на необязательную сторону
commit_hash:1404e877fdd271dbe8342478a20b99b6f99a4fda
5 files changed, 404 insertions, 39 deletions
diff --git a/yql/essentials/core/common_opt/yql_flatmap_over_join.cpp b/yql/essentials/core/common_opt/yql_flatmap_over_join.cpp index 279b6fdeb0e..4953b4c4ee6 100644 --- a/yql/essentials/core/common_opt/yql_flatmap_over_join.cpp +++ b/yql/essentials/core/common_opt/yql_flatmap_over_join.cpp @@ -211,10 +211,30 @@ TExprNode::TPtr ApplyJoinPredicate(const TExprNode::TPtr& predicate, const TExpr .Build(); } -TExprNode::TPtr SingleInputPredicatePushdownOverEquiJoin(TExprNode::TPtr equiJoin, TExprNode::TPtr predicate, - const TSet<TStringBuf>& usedFields, TExprNode::TPtr args, const TJoinLabels& labels, - ui32 firstCandidate, const TMap<TStringBuf, TVector<TStringBuf>>& renameMap, bool ordered, bool skipNulls, TExprContext& ctx) -{ +bool NeedEmitSkipNullMembers(const TTypeAnnotationContext* types) { + YQL_ENSURE(types); + static const char flag[] = "EmitSkipNullOnPushdown"; + return IsOptimizerEnabled<flag>(*types) || !IsOptimizerDisabled<flag>(*types); +} + +bool IsPredicatePushdownOverEquiJoinBothSides(const TTypeAnnotationContext* types) { + YQL_ENSURE(types); + static const char flag[] = "PredicatePushdownOverEquiJoinBothSides"; + return IsOptimizerEnabled<flag>(*types) && !IsOptimizerDisabled<flag>(*types); +} + +TExprNode::TPtr SingleInputPredicatePushdownOverEquiJoin( + TExprNode::TPtr equiJoin, + TExprNode::TPtr predicate, + const TSet<TStringBuf>& usedFields, + TExprNode::TPtr args, + const TJoinLabels& labels, + ui32 firstCandidate, + const TMap<TStringBuf, TVector<TStringBuf>>& renameMap, + bool ordered, + TExprContext& ctx, + const TTypeAnnotationContext* types +) { auto inputsCount = equiJoin->ChildrenSize() - 2; auto joinTree = equiJoin->Child(inputsCount); @@ -273,12 +293,17 @@ TExprNode::TPtr SingleInputPredicatePushdownOverEquiJoin(TExprNode::TPtr equiJoi } } + + const bool skipNullsEnabled = NeedEmitSkipNullMembers(types); + const bool pushdownBothSides = IsPredicatePushdownOverEquiJoinBothSides(types); + auto ret = ctx.ShallowCopy(*equiJoin); for (auto& inputIndex : candidates) { - auto x = IsRequiredSide(joinTree, labels, inputIndex); - if (!x.first) { + const auto [required, skipNullsPossible] = IsRequiredSide(joinTree, labels, inputIndex); + if (!pushdownBothSides && !required) { continue; } + YQL_ENSURE(required || onlyKeys); if (!isStrict && IsRequiredAndFilteredSide(joinTree, labels, inputIndex)) { continue; @@ -286,7 +311,7 @@ TExprNode::TPtr SingleInputPredicatePushdownOverEquiJoin(TExprNode::TPtr equiJoi auto prevInput = equiJoin->Child(inputIndex)->ChildPtr(0); auto newInput = prevInput; - if (x.second && skipNulls) { + if (skipNullsPossible && skipNullsEnabled) { // skip null key columns TSet<TString> optionalKeyColumns; GatherOptionalKeyColumns(joinTree, labels, inputIndex, optionalKeyColumns); @@ -406,6 +431,12 @@ TExprNode::TPtr CreateLabelList(const THashSet<TString>& labels, TExprContext& c return ctx.NewList(position, std::move(newKeys)); } +bool FilterPushdownOverJoinOptionalSideIgnoreOnlyKeys(const TTypeAnnotationContext* types) { + YQL_ENSURE(types); + static const char flag[] = "FilterPushdownOverJoinOptionalSideIgnoreOnlyKeys"; + return IsOptimizerEnabled<flag>(*types) && !IsOptimizerDisabled<flag>(*types); +} + TExprNode::TPtr FilterPushdownOverJoinOptionalSide( TExprNode::TPtr equiJoin, TExprNode::TPtr predicate, @@ -415,11 +446,10 @@ TExprNode::TPtr FilterPushdownOverJoinOptionalSide( ui32 inputIndex, const TMap<TStringBuf, TVector<TStringBuf>>& renameMap, bool ordered, - bool skipNulls, - bool ignoreOnlyKeys, TExprContext& ctx, - const TPositionHandle& pos) -{ + const TTypeAnnotationContext* types, + const TPositionHandle& pos +) { auto inputsCount = equiJoin->ChildrenSize() - 2; auto joinTree = equiJoin->Child(inputsCount); @@ -445,7 +475,7 @@ TExprNode::TPtr FilterPushdownOverJoinOptionalSide( bool onlyKeys = false; // ignoreOnlyKeys (aka FilterPushdownOverJoinOptionalSideIgnoreOnlyKeys) was added to canonize ydb tests without breaking them - if (!ignoreOnlyKeys) { + if (!FilterPushdownOverJoinOptionalSideIgnoreOnlyKeys(types)) { // TODO: Remove this after all YDB tests are properly canonized. See YQL-19896 for details. // check whether some used fields are not aliased @@ -506,7 +536,7 @@ TExprNode::TPtr FilterPushdownOverJoinOptionalSide( YQL_ENSURE(leftJoinTree->Child(2)->IsAtom()); auto rightSideInput = equiJoinLabels.at(leftJoinTree->Child(2)->Content()); - if (skipNulls) { + if (NeedEmitSkipNullMembers(types)) { // skip null key columns TSet<TString> optionalKeyColumns; GatherOptionalKeyColumns(joinTree, labels, inputIndex, optionalKeyColumns); @@ -985,19 +1015,6 @@ TExprNode::TPtr DecayCrossJoinIntoInner(TExprNode::TPtr equiJoin, const TExprNod return ctx.ChangeChild(*equiJoin, inputsCount, std::move(newJoinTree)); } -bool NeedEmitSkipNullMembers(const TTypeAnnotationContext* types) { - YQL_ENSURE(types); - static const TString emitFlag = to_lower(TString("EmitSkipNullOnPushdown")); - static const TString noEmitFlag = to_lower(TString("DisableEmitSkipNullOnPushdown")); - if (types->OptimizerFlags.contains(emitFlag)) { - return true; - } - if (types->OptimizerFlags.contains(noEmitFlag)) { - return false; - } - return true; -} - bool IsEqualityFilterOverJoinEnabled(const TTypeAnnotationContext* types) { YQL_ENSURE(types); static const char flag[] = "EqualityFilterOverJoin"; @@ -1365,12 +1382,6 @@ TExprBase HandleEqualityFilterOverJoin(const TCoFlatMapBase& node, const TJoinLa .Build()); } -bool FilterPushdownOverJoinOptionalSideIgnoreOnlyKeys(const TTypeAnnotationContext* types) { - YQL_ENSURE(types); - static const char flag[] = "FilterPushdownOverJoinOptionalSideIgnoreOnlyKeys"; - return IsOptimizerEnabled<flag>(*types) && !IsOptimizerDisabled<flag>(*types); -} - } // namespace TExprBase FlatMapOverEquiJoin( @@ -1520,7 +1531,6 @@ TExprBase FlatMapOverEquiJoin( TExprNode::TPtr extraPredicate; const bool ordered = node.Maybe<TCoOrderedFlatMap>().IsValid(); - const bool skipNulls = NeedEmitSkipNullMembers(types); for (const auto& andTerm : andTerms) { if (IsNoPush(*andTerm)) { @@ -1538,15 +1548,24 @@ TExprBase FlatMapOverEquiJoin( } if (!multiUsage && inputs.size() == 1) { - auto newJoin = SingleInputPredicatePushdownOverEquiJoin(equiJoin.Ptr(), andTerm, usedFields, - node.Lambda().Args().Ptr(), labels, *inputs.begin(), renameMap, ordered, skipNulls, ctx); + auto newJoin = SingleInputPredicatePushdownOverEquiJoin( + equiJoin.Ptr(), + andTerm, + usedFields, + node.Lambda().Args().Ptr(), + labels, + *inputs.begin(), + renameMap, + ordered, + ctx, + types + ); if (newJoin != equiJoin.Ptr()) { YQL_CLOG(DEBUG, Core) << "SingleInputPredicatePushdownOverEquiJoin"; ret = newJoin; extraPredicate = FuseAndTerms(node.Pos(), andTerms, andTerm, isPg, ctx); break; } else if (types->FilterPushdownOverJoinOptionalSide) { - bool ignoreOnlyKeys = FilterPushdownOverJoinOptionalSideIgnoreOnlyKeys(types); auto twoJoins = FilterPushdownOverJoinOptionalSide( equiJoin.Ptr(), andTerm, @@ -1556,10 +1575,10 @@ TExprBase FlatMapOverEquiJoin( *inputs.begin(), renameMap, ordered, - skipNulls, - ignoreOnlyKeys, ctx, - node.Pos()); + types, + node.Pos() + ); if (twoJoins != equiJoin.Ptr()) { YQL_CLOG(DEBUG, Core) << "RightSidePredicatePushdownOverLeftJoin"; ret = twoJoins; diff --git a/yql/essentials/tests/sql/minirun/part4/canondata/result.json b/yql/essentials/tests/sql/minirun/part4/canondata/result.json index e6d2cfd6553..c1628656b33 100644 --- a/yql/essentials/tests/sql/minirun/part4/canondata/result.json +++ b/yql/essentials/tests/sql/minirun/part4/canondata/result.json @@ -877,6 +877,20 @@ "uri": "https://{canondata_backend}/1784117/dbcd3ba4a0d5f6082e54161256fb75ae88cbff44/resource.tar.gz#test.test_join-join_comp_inmem-default.txt-Results_/results.txt" } ], + "test.test[join-pushdown_all_sides-default.txt-Debug]": [ + { + "checksum": "c6ad5b6c8904b81eda5617d3f90e0f03", + "size": 1927, + "uri": "https://{canondata_backend}/1599023/db1fc424c33f1e3d7379b8d279c7b3ecfd28ced1/resource.tar.gz#test.test_join-pushdown_all_sides-default.txt-Debug_/opt.yql" + } + ], + "test.test[join-pushdown_all_sides-default.txt-Results]": [ + { + "checksum": "71f164814672fba7d6bb48c998ecbd0d", + "size": 6703, + "uri": "https://{canondata_backend}/1597364/a08acb9a39de513be38f4870d1fa364d442a056c/resource.tar.gz#test.test_join-pushdown_all_sides-default.txt-Results_/results.txt" + } + ], "test.test[json-json_query/on_error-default.txt-Debug]": [ { "checksum": "e88d18b7b03f5445093c7ffb4f15d241", diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json index 3f6e7f09e9a..b4351ac41d7 100644 --- a/yql/essentials/tests/sql/sql2yql/canondata/result.json +++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json @@ -4234,6 +4234,13 @@ "uri": "https://{canondata_backend}/1942525/94a477066ea16f69d4848bbe524485fc029978b8/resource.tar.gz#test_sql2yql.test_join-prune_keys_YQL-19979_/sql.yql" } ], + "test_sql2yql.test[join-pushdown_all_sides]": [ + { + "checksum": "238c8b175f8fc2f12799c91433faf584", + "size": 10078, + "uri": "https://{canondata_backend}/1599023/96e9363c53a78c2884ae76b39bd982d37627eefa/resource.tar.gz#test_sql2yql.test_join-pushdown_all_sides_/sql.yql" + } + ], "test_sql2yql.test[join-yql-19192]": [ { "checksum": "fffdf1cbb40643da9daf9bdf3edec121", @@ -10871,6 +10878,11 @@ "uri": "file://test_sql_format.test_join-prune_keys_YQL-19979_/formatted.sql" } ], + "test_sql_format.test[join-pushdown_all_sides]": [ + { + "uri": "file://test_sql_format.test_join-pushdown_all_sides_/formatted.sql" + } + ], "test_sql_format.test[join-yql-19192]": [ { "uri": "file://test_sql_format.test_join-yql-19192_/formatted.sql" diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-pushdown_all_sides_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-pushdown_all_sides_/formatted.sql new file mode 100644 index 00000000000..e634a7de154 --- /dev/null +++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-pushdown_all_sides_/formatted.sql @@ -0,0 +1,175 @@ +PRAGMA config.flags('OptimizerFlags', 'PredicatePushdownOverEquiJoinBothSides'); + +SELECT + * +FROM + AS_TABLE([ + <|k1: -2|>, + <|k1: -1|>, + <|k1: 0|>, + <|k1: 0|>, + <|k1: 1|>, + <|k1: 2|>, + <|k1: 3|>, + <|k1: 4|>, + ]) AS t1 +LEFT JOIN + AS_TABLE([ + <|k2: -2|>, + <|k2: 0|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 4|>, + <|k2: 6|>, + ]) AS t2 +ON + t1.k1 == t2.k2 +WHERE + t1.k1 > 0 +; + +SELECT + * +FROM + AS_TABLE([ + <|k1: -2|>, + <|k1: -1|>, + <|k1: 0|>, + <|k1: 0|>, + <|k1: 1|>, + <|k1: 2|>, + <|k1: 3|>, + <|k1: 4|>, + ]) AS t1 +RIGHT JOIN + AS_TABLE([ + <|k2: -2|>, + <|k2: 0|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 4|>, + <|k2: 6|>, + ]) AS t2 +ON + t1.k1 == t2.k2 +WHERE + t2.k2 > 0 +; + +SELECT + * +FROM + AS_TABLE([ + <|k1: -2|>, + <|k1: -1|>, + <|k1: 0|>, + <|k1: 0|>, + <|k1: 1|>, + <|k1: 2|>, + <|k1: 3|>, + <|k1: 4|>, + ]) AS t1 +LEFT SEMI JOIN + AS_TABLE([ + <|k2: -2|>, + <|k2: 0|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 4|>, + <|k2: 6|>, + ]) AS t2 +ON + t1.k1 == t2.k2 +WHERE + t1.k1 > 0 +; + +SELECT + * +FROM + AS_TABLE([ + <|k1: -2|>, + <|k1: -1|>, + <|k1: 0|>, + <|k1: 0|>, + <|k1: 1|>, + <|k1: 2|>, + <|k1: 3|>, + <|k1: 4|>, + ]) AS t1 +RIGHT SEMI JOIN + AS_TABLE([ + <|k2: -2|>, + <|k2: 0|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 4|>, + <|k2: 6|>, + ]) AS t2 +ON + t1.k1 == t2.k2 +WHERE + t2.k2 > 0 +; + +SELECT + * +FROM + AS_TABLE([ + <|k1: -2|>, + <|k1: -1|>, + <|k1: 0|>, + <|k1: 0|>, + <|k1: 1|>, + <|k1: 2|>, + <|k1: 3|>, + <|k1: 4|>, + ]) AS t1 +LEFT ONLY JOIN + AS_TABLE([ + <|k2: -2|>, + <|k2: 0|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 4|>, + <|k2: 6|>, + ]) AS t2 +ON + t1.k1 == t2.k2 +WHERE + t1.k1 > 0 +; + +SELECT + * +FROM + AS_TABLE([ + <|k1: -2|>, + <|k1: -1|>, + <|k1: 0|>, + <|k1: 0|>, + <|k1: 1|>, + <|k1: 2|>, + <|k1: 3|>, + <|k1: 4|>, + ]) AS t1 +RIGHT ONLY JOIN + AS_TABLE([ + <|k2: -2|>, + <|k2: 0|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 4|>, + <|k2: 6|>, + ]) AS t2 +ON + t1.k1 == t2.k2 +WHERE + t2.k2 > 0 +; diff --git a/yql/essentials/tests/sql/suites/join/pushdown_all_sides.sql b/yql/essentials/tests/sql/suites/join/pushdown_all_sides.sql new file mode 100644 index 00000000000..6abbd0890a9 --- /dev/null +++ b/yql/essentials/tests/sql/suites/join/pushdown_all_sides.sql @@ -0,0 +1,145 @@ +PRAGMA config.flags("OptimizerFlags", "PredicatePushdownOverEquiJoinBothSides"); + +SELECT * +FROM AS_TABLE([ + <|k1: -2|>, + <|k1: -1|>, + <|k1: 0|>, + <|k1: 0|>, + <|k1: 1|>, + <|k1: 2|>, + <|k1: 3|>, + <|k1: 4|>, +]) AS t1 +LEFT JOIN AS_TABLE([ + <|k2: -2|>, + <|k2: 0|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 4|>, + <|k2: 6|>, +]) AS t2 +ON t1.k1 == t2.k2 +WHERE t1.k1 > 0 +; + +SELECT * +FROM AS_TABLE([ + <|k1: -2|>, + <|k1: -1|>, + <|k1: 0|>, + <|k1: 0|>, + <|k1: 1|>, + <|k1: 2|>, + <|k1: 3|>, + <|k1: 4|>, +]) AS t1 +RIGHT JOIN AS_TABLE([ + <|k2: -2|>, + <|k2: 0|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 4|>, + <|k2: 6|>, +]) AS t2 +ON t1.k1 == t2.k2 +WHERE t2.k2 > 0 +; + +SELECT * +FROM AS_TABLE([ + <|k1: -2|>, + <|k1: -1|>, + <|k1: 0|>, + <|k1: 0|>, + <|k1: 1|>, + <|k1: 2|>, + <|k1: 3|>, + <|k1: 4|>, +]) AS t1 +LEFT SEMI JOIN AS_TABLE([ + <|k2: -2|>, + <|k2: 0|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 4|>, + <|k2: 6|>, +]) AS t2 +ON t1.k1 == t2.k2 +WHERE t1.k1 > 0 +; + +SELECT * +FROM AS_TABLE([ + <|k1: -2|>, + <|k1: -1|>, + <|k1: 0|>, + <|k1: 0|>, + <|k1: 1|>, + <|k1: 2|>, + <|k1: 3|>, + <|k1: 4|>, +]) AS t1 +RIGHT SEMI JOIN AS_TABLE([ + <|k2: -2|>, + <|k2: 0|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 4|>, + <|k2: 6|>, +]) AS t2 +ON t1.k1 == t2.k2 +WHERE t2.k2 > 0 +; + +SELECT * +FROM AS_TABLE([ + <|k1: -2|>, + <|k1: -1|>, + <|k1: 0|>, + <|k1: 0|>, + <|k1: 1|>, + <|k1: 2|>, + <|k1: 3|>, + <|k1: 4|>, +]) AS t1 +LEFT ONLY JOIN AS_TABLE([ + <|k2: -2|>, + <|k2: 0|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 4|>, + <|k2: 6|>, +]) AS t2 +ON t1.k1 == t2.k2 +WHERE t1.k1 > 0 +; + +SELECT * +FROM AS_TABLE([ + <|k1: -2|>, + <|k1: -1|>, + <|k1: 0|>, + <|k1: 0|>, + <|k1: 1|>, + <|k1: 2|>, + <|k1: 3|>, + <|k1: 4|>, +]) AS t1 +RIGHT ONLY JOIN AS_TABLE([ + <|k2: -2|>, + <|k2: 0|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 2|>, + <|k2: 4|>, + <|k2: 6|>, +]) AS t2 +ON t1.k1 == t2.k2 +WHERE t2.k2 > 0 +; |