summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlucius <[email protected]>2025-07-21 13:57:54 +0300
committerlucius <[email protected]>2025-07-21 14:13:37 +0300
commitfea05ea76fdac14746236e14bda98ce8cc29fb00 (patch)
tree8c6fe4bf0602c2b77b9270bce0816e9ed3b19ba8
parentcaefbb016b70b186bad0fa7f07a1524b3211fec4 (diff)
YQL-19896: allow optional side in SingleInputPredicatePushdownOverEquiJoin in some cases
Разрешаем пушить на необязательную сторону предикат, который использует только ключевые колонки с обязательной стороны джоина. 1) рассматриваемый предикат зависит только 1 стороны и это обязательная сторона джоина 2) предикат зависит только от ключевых колонок джоина 3) все соответствующие по ключу джоина колонки в обязательном  и необязательном инпутах должны полностью совпадать по типу В этих случаях мы пушим предикат не только на обязательную, но и на необязательную сторону commit_hash:1404e877fdd271dbe8342478a20b99b6f99a4fda
-rw-r--r--yql/essentials/core/common_opt/yql_flatmap_over_join.cpp97
-rw-r--r--yql/essentials/tests/sql/minirun/part4/canondata/result.json14
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/result.json12
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-pushdown_all_sides_/formatted.sql175
-rw-r--r--yql/essentials/tests/sql/suites/join/pushdown_all_sides.sql145
5 files changed, 404 insertions, 39 deletions
diff --git a/yql/essentials/core/common_opt/yql_flatmap_over_join.cpp b/yql/essentials/core/common_opt/yql_flatmap_over_join.cpp
index 279b6fdeb0e..4953b4c4ee6 100644
--- a/yql/essentials/core/common_opt/yql_flatmap_over_join.cpp
+++ b/yql/essentials/core/common_opt/yql_flatmap_over_join.cpp
@@ -211,10 +211,30 @@ TExprNode::TPtr ApplyJoinPredicate(const TExprNode::TPtr& predicate, const TExpr
.Build();
}
-TExprNode::TPtr SingleInputPredicatePushdownOverEquiJoin(TExprNode::TPtr equiJoin, TExprNode::TPtr predicate,
- const TSet<TStringBuf>& usedFields, TExprNode::TPtr args, const TJoinLabels& labels,
- ui32 firstCandidate, const TMap<TStringBuf, TVector<TStringBuf>>& renameMap, bool ordered, bool skipNulls, TExprContext& ctx)
-{
+bool NeedEmitSkipNullMembers(const TTypeAnnotationContext* types) {
+ YQL_ENSURE(types);
+ static const char flag[] = "EmitSkipNullOnPushdown";
+ return IsOptimizerEnabled<flag>(*types) || !IsOptimizerDisabled<flag>(*types);
+}
+
+bool IsPredicatePushdownOverEquiJoinBothSides(const TTypeAnnotationContext* types) {
+ YQL_ENSURE(types);
+ static const char flag[] = "PredicatePushdownOverEquiJoinBothSides";
+ return IsOptimizerEnabled<flag>(*types) && !IsOptimizerDisabled<flag>(*types);
+}
+
+TExprNode::TPtr SingleInputPredicatePushdownOverEquiJoin(
+ TExprNode::TPtr equiJoin,
+ TExprNode::TPtr predicate,
+ const TSet<TStringBuf>& usedFields,
+ TExprNode::TPtr args,
+ const TJoinLabels& labels,
+ ui32 firstCandidate,
+ const TMap<TStringBuf, TVector<TStringBuf>>& renameMap,
+ bool ordered,
+ TExprContext& ctx,
+ const TTypeAnnotationContext* types
+) {
auto inputsCount = equiJoin->ChildrenSize() - 2;
auto joinTree = equiJoin->Child(inputsCount);
@@ -273,12 +293,17 @@ TExprNode::TPtr SingleInputPredicatePushdownOverEquiJoin(TExprNode::TPtr equiJoi
}
}
+
+ const bool skipNullsEnabled = NeedEmitSkipNullMembers(types);
+ const bool pushdownBothSides = IsPredicatePushdownOverEquiJoinBothSides(types);
+
auto ret = ctx.ShallowCopy(*equiJoin);
for (auto& inputIndex : candidates) {
- auto x = IsRequiredSide(joinTree, labels, inputIndex);
- if (!x.first) {
+ const auto [required, skipNullsPossible] = IsRequiredSide(joinTree, labels, inputIndex);
+ if (!pushdownBothSides && !required) {
continue;
}
+ YQL_ENSURE(required || onlyKeys);
if (!isStrict && IsRequiredAndFilteredSide(joinTree, labels, inputIndex)) {
continue;
@@ -286,7 +311,7 @@ TExprNode::TPtr SingleInputPredicatePushdownOverEquiJoin(TExprNode::TPtr equiJoi
auto prevInput = equiJoin->Child(inputIndex)->ChildPtr(0);
auto newInput = prevInput;
- if (x.second && skipNulls) {
+ if (skipNullsPossible && skipNullsEnabled) {
// skip null key columns
TSet<TString> optionalKeyColumns;
GatherOptionalKeyColumns(joinTree, labels, inputIndex, optionalKeyColumns);
@@ -406,6 +431,12 @@ TExprNode::TPtr CreateLabelList(const THashSet<TString>& labels, TExprContext& c
return ctx.NewList(position, std::move(newKeys));
}
+bool FilterPushdownOverJoinOptionalSideIgnoreOnlyKeys(const TTypeAnnotationContext* types) {
+ YQL_ENSURE(types);
+ static const char flag[] = "FilterPushdownOverJoinOptionalSideIgnoreOnlyKeys";
+ return IsOptimizerEnabled<flag>(*types) && !IsOptimizerDisabled<flag>(*types);
+}
+
TExprNode::TPtr FilterPushdownOverJoinOptionalSide(
TExprNode::TPtr equiJoin,
TExprNode::TPtr predicate,
@@ -415,11 +446,10 @@ TExprNode::TPtr FilterPushdownOverJoinOptionalSide(
ui32 inputIndex,
const TMap<TStringBuf, TVector<TStringBuf>>& renameMap,
bool ordered,
- bool skipNulls,
- bool ignoreOnlyKeys,
TExprContext& ctx,
- const TPositionHandle& pos)
-{
+ const TTypeAnnotationContext* types,
+ const TPositionHandle& pos
+) {
auto inputsCount = equiJoin->ChildrenSize() - 2;
auto joinTree = equiJoin->Child(inputsCount);
@@ -445,7 +475,7 @@ TExprNode::TPtr FilterPushdownOverJoinOptionalSide(
bool onlyKeys = false;
// ignoreOnlyKeys (aka FilterPushdownOverJoinOptionalSideIgnoreOnlyKeys) was added to canonize ydb tests without breaking them
- if (!ignoreOnlyKeys) {
+ if (!FilterPushdownOverJoinOptionalSideIgnoreOnlyKeys(types)) {
// TODO: Remove this after all YDB tests are properly canonized. See YQL-19896 for details.
// check whether some used fields are not aliased
@@ -506,7 +536,7 @@ TExprNode::TPtr FilterPushdownOverJoinOptionalSide(
YQL_ENSURE(leftJoinTree->Child(2)->IsAtom());
auto rightSideInput = equiJoinLabels.at(leftJoinTree->Child(2)->Content());
- if (skipNulls) {
+ if (NeedEmitSkipNullMembers(types)) {
// skip null key columns
TSet<TString> optionalKeyColumns;
GatherOptionalKeyColumns(joinTree, labels, inputIndex, optionalKeyColumns);
@@ -985,19 +1015,6 @@ TExprNode::TPtr DecayCrossJoinIntoInner(TExprNode::TPtr equiJoin, const TExprNod
return ctx.ChangeChild(*equiJoin, inputsCount, std::move(newJoinTree));
}
-bool NeedEmitSkipNullMembers(const TTypeAnnotationContext* types) {
- YQL_ENSURE(types);
- static const TString emitFlag = to_lower(TString("EmitSkipNullOnPushdown"));
- static const TString noEmitFlag = to_lower(TString("DisableEmitSkipNullOnPushdown"));
- if (types->OptimizerFlags.contains(emitFlag)) {
- return true;
- }
- if (types->OptimizerFlags.contains(noEmitFlag)) {
- return false;
- }
- return true;
-}
-
bool IsEqualityFilterOverJoinEnabled(const TTypeAnnotationContext* types) {
YQL_ENSURE(types);
static const char flag[] = "EqualityFilterOverJoin";
@@ -1365,12 +1382,6 @@ TExprBase HandleEqualityFilterOverJoin(const TCoFlatMapBase& node, const TJoinLa
.Build());
}
-bool FilterPushdownOverJoinOptionalSideIgnoreOnlyKeys(const TTypeAnnotationContext* types) {
- YQL_ENSURE(types);
- static const char flag[] = "FilterPushdownOverJoinOptionalSideIgnoreOnlyKeys";
- return IsOptimizerEnabled<flag>(*types) && !IsOptimizerDisabled<flag>(*types);
-}
-
} // namespace
TExprBase FlatMapOverEquiJoin(
@@ -1520,7 +1531,6 @@ TExprBase FlatMapOverEquiJoin(
TExprNode::TPtr extraPredicate;
const bool ordered = node.Maybe<TCoOrderedFlatMap>().IsValid();
- const bool skipNulls = NeedEmitSkipNullMembers(types);
for (const auto& andTerm : andTerms) {
if (IsNoPush(*andTerm)) {
@@ -1538,15 +1548,24 @@ TExprBase FlatMapOverEquiJoin(
}
if (!multiUsage && inputs.size() == 1) {
- auto newJoin = SingleInputPredicatePushdownOverEquiJoin(equiJoin.Ptr(), andTerm, usedFields,
- node.Lambda().Args().Ptr(), labels, *inputs.begin(), renameMap, ordered, skipNulls, ctx);
+ auto newJoin = SingleInputPredicatePushdownOverEquiJoin(
+ equiJoin.Ptr(),
+ andTerm,
+ usedFields,
+ node.Lambda().Args().Ptr(),
+ labels,
+ *inputs.begin(),
+ renameMap,
+ ordered,
+ ctx,
+ types
+ );
if (newJoin != equiJoin.Ptr()) {
YQL_CLOG(DEBUG, Core) << "SingleInputPredicatePushdownOverEquiJoin";
ret = newJoin;
extraPredicate = FuseAndTerms(node.Pos(), andTerms, andTerm, isPg, ctx);
break;
} else if (types->FilterPushdownOverJoinOptionalSide) {
- bool ignoreOnlyKeys = FilterPushdownOverJoinOptionalSideIgnoreOnlyKeys(types);
auto twoJoins = FilterPushdownOverJoinOptionalSide(
equiJoin.Ptr(),
andTerm,
@@ -1556,10 +1575,10 @@ TExprBase FlatMapOverEquiJoin(
*inputs.begin(),
renameMap,
ordered,
- skipNulls,
- ignoreOnlyKeys,
ctx,
- node.Pos());
+ types,
+ node.Pos()
+ );
if (twoJoins != equiJoin.Ptr()) {
YQL_CLOG(DEBUG, Core) << "RightSidePredicatePushdownOverLeftJoin";
ret = twoJoins;
diff --git a/yql/essentials/tests/sql/minirun/part4/canondata/result.json b/yql/essentials/tests/sql/minirun/part4/canondata/result.json
index e6d2cfd6553..c1628656b33 100644
--- a/yql/essentials/tests/sql/minirun/part4/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part4/canondata/result.json
@@ -877,6 +877,20 @@
"uri": "https://{canondata_backend}/1784117/dbcd3ba4a0d5f6082e54161256fb75ae88cbff44/resource.tar.gz#test.test_join-join_comp_inmem-default.txt-Results_/results.txt"
}
],
+ "test.test[join-pushdown_all_sides-default.txt-Debug]": [
+ {
+ "checksum": "c6ad5b6c8904b81eda5617d3f90e0f03",
+ "size": 1927,
+ "uri": "https://{canondata_backend}/1599023/db1fc424c33f1e3d7379b8d279c7b3ecfd28ced1/resource.tar.gz#test.test_join-pushdown_all_sides-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[join-pushdown_all_sides-default.txt-Results]": [
+ {
+ "checksum": "71f164814672fba7d6bb48c998ecbd0d",
+ "size": 6703,
+ "uri": "https://{canondata_backend}/1597364/a08acb9a39de513be38f4870d1fa364d442a056c/resource.tar.gz#test.test_join-pushdown_all_sides-default.txt-Results_/results.txt"
+ }
+ ],
"test.test[json-json_query/on_error-default.txt-Debug]": [
{
"checksum": "e88d18b7b03f5445093c7ffb4f15d241",
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json
index 3f6e7f09e9a..b4351ac41d7 100644
--- a/yql/essentials/tests/sql/sql2yql/canondata/result.json
+++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json
@@ -4234,6 +4234,13 @@
"uri": "https://{canondata_backend}/1942525/94a477066ea16f69d4848bbe524485fc029978b8/resource.tar.gz#test_sql2yql.test_join-prune_keys_YQL-19979_/sql.yql"
}
],
+ "test_sql2yql.test[join-pushdown_all_sides]": [
+ {
+ "checksum": "238c8b175f8fc2f12799c91433faf584",
+ "size": 10078,
+ "uri": "https://{canondata_backend}/1599023/96e9363c53a78c2884ae76b39bd982d37627eefa/resource.tar.gz#test_sql2yql.test_join-pushdown_all_sides_/sql.yql"
+ }
+ ],
"test_sql2yql.test[join-yql-19192]": [
{
"checksum": "fffdf1cbb40643da9daf9bdf3edec121",
@@ -10871,6 +10878,11 @@
"uri": "file://test_sql_format.test_join-prune_keys_YQL-19979_/formatted.sql"
}
],
+ "test_sql_format.test[join-pushdown_all_sides]": [
+ {
+ "uri": "file://test_sql_format.test_join-pushdown_all_sides_/formatted.sql"
+ }
+ ],
"test_sql_format.test[join-yql-19192]": [
{
"uri": "file://test_sql_format.test_join-yql-19192_/formatted.sql"
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-pushdown_all_sides_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-pushdown_all_sides_/formatted.sql
new file mode 100644
index 00000000000..e634a7de154
--- /dev/null
+++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-pushdown_all_sides_/formatted.sql
@@ -0,0 +1,175 @@
+PRAGMA config.flags('OptimizerFlags', 'PredicatePushdownOverEquiJoinBothSides');
+
+SELECT
+ *
+FROM
+ AS_TABLE([
+ <|k1: -2|>,
+ <|k1: -1|>,
+ <|k1: 0|>,
+ <|k1: 0|>,
+ <|k1: 1|>,
+ <|k1: 2|>,
+ <|k1: 3|>,
+ <|k1: 4|>,
+ ]) AS t1
+LEFT JOIN
+ AS_TABLE([
+ <|k2: -2|>,
+ <|k2: 0|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 4|>,
+ <|k2: 6|>,
+ ]) AS t2
+ON
+ t1.k1 == t2.k2
+WHERE
+ t1.k1 > 0
+;
+
+SELECT
+ *
+FROM
+ AS_TABLE([
+ <|k1: -2|>,
+ <|k1: -1|>,
+ <|k1: 0|>,
+ <|k1: 0|>,
+ <|k1: 1|>,
+ <|k1: 2|>,
+ <|k1: 3|>,
+ <|k1: 4|>,
+ ]) AS t1
+RIGHT JOIN
+ AS_TABLE([
+ <|k2: -2|>,
+ <|k2: 0|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 4|>,
+ <|k2: 6|>,
+ ]) AS t2
+ON
+ t1.k1 == t2.k2
+WHERE
+ t2.k2 > 0
+;
+
+SELECT
+ *
+FROM
+ AS_TABLE([
+ <|k1: -2|>,
+ <|k1: -1|>,
+ <|k1: 0|>,
+ <|k1: 0|>,
+ <|k1: 1|>,
+ <|k1: 2|>,
+ <|k1: 3|>,
+ <|k1: 4|>,
+ ]) AS t1
+LEFT SEMI JOIN
+ AS_TABLE([
+ <|k2: -2|>,
+ <|k2: 0|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 4|>,
+ <|k2: 6|>,
+ ]) AS t2
+ON
+ t1.k1 == t2.k2
+WHERE
+ t1.k1 > 0
+;
+
+SELECT
+ *
+FROM
+ AS_TABLE([
+ <|k1: -2|>,
+ <|k1: -1|>,
+ <|k1: 0|>,
+ <|k1: 0|>,
+ <|k1: 1|>,
+ <|k1: 2|>,
+ <|k1: 3|>,
+ <|k1: 4|>,
+ ]) AS t1
+RIGHT SEMI JOIN
+ AS_TABLE([
+ <|k2: -2|>,
+ <|k2: 0|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 4|>,
+ <|k2: 6|>,
+ ]) AS t2
+ON
+ t1.k1 == t2.k2
+WHERE
+ t2.k2 > 0
+;
+
+SELECT
+ *
+FROM
+ AS_TABLE([
+ <|k1: -2|>,
+ <|k1: -1|>,
+ <|k1: 0|>,
+ <|k1: 0|>,
+ <|k1: 1|>,
+ <|k1: 2|>,
+ <|k1: 3|>,
+ <|k1: 4|>,
+ ]) AS t1
+LEFT ONLY JOIN
+ AS_TABLE([
+ <|k2: -2|>,
+ <|k2: 0|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 4|>,
+ <|k2: 6|>,
+ ]) AS t2
+ON
+ t1.k1 == t2.k2
+WHERE
+ t1.k1 > 0
+;
+
+SELECT
+ *
+FROM
+ AS_TABLE([
+ <|k1: -2|>,
+ <|k1: -1|>,
+ <|k1: 0|>,
+ <|k1: 0|>,
+ <|k1: 1|>,
+ <|k1: 2|>,
+ <|k1: 3|>,
+ <|k1: 4|>,
+ ]) AS t1
+RIGHT ONLY JOIN
+ AS_TABLE([
+ <|k2: -2|>,
+ <|k2: 0|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 4|>,
+ <|k2: 6|>,
+ ]) AS t2
+ON
+ t1.k1 == t2.k2
+WHERE
+ t2.k2 > 0
+;
diff --git a/yql/essentials/tests/sql/suites/join/pushdown_all_sides.sql b/yql/essentials/tests/sql/suites/join/pushdown_all_sides.sql
new file mode 100644
index 00000000000..6abbd0890a9
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/join/pushdown_all_sides.sql
@@ -0,0 +1,145 @@
+PRAGMA config.flags("OptimizerFlags", "PredicatePushdownOverEquiJoinBothSides");
+
+SELECT *
+FROM AS_TABLE([
+ <|k1: -2|>,
+ <|k1: -1|>,
+ <|k1: 0|>,
+ <|k1: 0|>,
+ <|k1: 1|>,
+ <|k1: 2|>,
+ <|k1: 3|>,
+ <|k1: 4|>,
+]) AS t1
+LEFT JOIN AS_TABLE([
+ <|k2: -2|>,
+ <|k2: 0|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 4|>,
+ <|k2: 6|>,
+]) AS t2
+ON t1.k1 == t2.k2
+WHERE t1.k1 > 0
+;
+
+SELECT *
+FROM AS_TABLE([
+ <|k1: -2|>,
+ <|k1: -1|>,
+ <|k1: 0|>,
+ <|k1: 0|>,
+ <|k1: 1|>,
+ <|k1: 2|>,
+ <|k1: 3|>,
+ <|k1: 4|>,
+]) AS t1
+RIGHT JOIN AS_TABLE([
+ <|k2: -2|>,
+ <|k2: 0|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 4|>,
+ <|k2: 6|>,
+]) AS t2
+ON t1.k1 == t2.k2
+WHERE t2.k2 > 0
+;
+
+SELECT *
+FROM AS_TABLE([
+ <|k1: -2|>,
+ <|k1: -1|>,
+ <|k1: 0|>,
+ <|k1: 0|>,
+ <|k1: 1|>,
+ <|k1: 2|>,
+ <|k1: 3|>,
+ <|k1: 4|>,
+]) AS t1
+LEFT SEMI JOIN AS_TABLE([
+ <|k2: -2|>,
+ <|k2: 0|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 4|>,
+ <|k2: 6|>,
+]) AS t2
+ON t1.k1 == t2.k2
+WHERE t1.k1 > 0
+;
+
+SELECT *
+FROM AS_TABLE([
+ <|k1: -2|>,
+ <|k1: -1|>,
+ <|k1: 0|>,
+ <|k1: 0|>,
+ <|k1: 1|>,
+ <|k1: 2|>,
+ <|k1: 3|>,
+ <|k1: 4|>,
+]) AS t1
+RIGHT SEMI JOIN AS_TABLE([
+ <|k2: -2|>,
+ <|k2: 0|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 4|>,
+ <|k2: 6|>,
+]) AS t2
+ON t1.k1 == t2.k2
+WHERE t2.k2 > 0
+;
+
+SELECT *
+FROM AS_TABLE([
+ <|k1: -2|>,
+ <|k1: -1|>,
+ <|k1: 0|>,
+ <|k1: 0|>,
+ <|k1: 1|>,
+ <|k1: 2|>,
+ <|k1: 3|>,
+ <|k1: 4|>,
+]) AS t1
+LEFT ONLY JOIN AS_TABLE([
+ <|k2: -2|>,
+ <|k2: 0|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 4|>,
+ <|k2: 6|>,
+]) AS t2
+ON t1.k1 == t2.k2
+WHERE t1.k1 > 0
+;
+
+SELECT *
+FROM AS_TABLE([
+ <|k1: -2|>,
+ <|k1: -1|>,
+ <|k1: 0|>,
+ <|k1: 0|>,
+ <|k1: 1|>,
+ <|k1: 2|>,
+ <|k1: 3|>,
+ <|k1: 4|>,
+]) AS t1
+RIGHT ONLY JOIN AS_TABLE([
+ <|k2: -2|>,
+ <|k2: 0|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 2|>,
+ <|k2: 4|>,
+ <|k2: 6|>,
+]) AS t2
+ON t1.k1 == t2.k2
+WHERE t2.k2 > 0
+;