aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlucius <lucius@yandex-team.com>2025-07-04 10:22:12 +0300
committerlucius <lucius@yandex-team.com>2025-07-04 10:47:46 +0300
commit94d6d80a1937f3cf2817a2a1d7e04b28eaa7565f (patch)
treee7a84dc5da80dfde88a9b6c5be9eaca293753667
parentdba0afc5ce7049668d44c6f5cb1bd0fd5a7fbc63 (diff)
downloadydb-94d6d80a1937f3cf2817a2a1d7e04b28eaa7565f.tar.gz
YQL-20085: fix FilterPushdownOverJoinOptionalSide
Тут чинятся 2 проблемы: 1) Rename settings во вложенных джоинах после преобразования не позволяли применить пропушенный предикат, что-нибудь типа "Unknown table name:… Failed to parse labels of struct…" 2) Если до пушдауна типизатор вывел distinct у джоина, а мы его заменили на union all, то типизатор не мог доказать что distinct сохранился и падал. Типизатор править не стали, решили через KeepUniqueDistinct. Обе проблемы немного по-разному проявляются и чинятся в двух случаях: 1) Делаем пушдаун в инпут из последнего джоина (верхний/корневой в дереве) — исходный джоин заменяется на union all из двух джоинов 2) Пушдаун в инпут НЕ из последнего джоина (НЕ верхний/корневой в дереве) — union all появляется внутри "исходного" джоина commit_hash:cc7cfbb973bc7dcabba202ab9a637c3b9f1cb5dd
-rw-r--r--yql/essentials/core/common_opt/yql_flatmap_over_join.cpp42
-rw-r--r--yql/essentials/tests/sql/minirun/part2/canondata/result.json14
-rw-r--r--yql/essentials/tests/sql/minirun/part7/canondata/result.json14
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/result.json24
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-yql-20085-distinct_/formatted.sql66
-rw-r--r--yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-yql-20085-rename_/formatted.sql45
-rw-r--r--yql/essentials/tests/sql/suites/join/yql-20085-distinct.sql66
-rw-r--r--yql/essentials/tests/sql/suites/join/yql-20085-rename.sql38
8 files changed, 277 insertions, 32 deletions
diff --git a/yql/essentials/core/common_opt/yql_flatmap_over_join.cpp b/yql/essentials/core/common_opt/yql_flatmap_over_join.cpp
index 1026e8f1b66..b85b0a8178e 100644
--- a/yql/essentials/core/common_opt/yql_flatmap_over_join.cpp
+++ b/yql/essentials/core/common_opt/yql_flatmap_over_join.cpp
@@ -406,17 +406,6 @@ TExprNode::TPtr CreateLabelList(const THashSet<TString>& labels, TExprContext& c
return ctx.NewList(position, std::move(newKeys));
}
-TExprNode::TPtr RemoveJoinKeysFromElimination(const TExprNode& settings, TExprContext& ctx) {
- TExprNode::TListType updated;
- for (auto setting : settings.Children()) {
- if (setting->ChildrenSize() == 3 && setting->Child(0)->Content() == "rename" && setting->Child(2)->Content() == "") {
- continue;
- }
- updated.push_back(setting);
- }
- return ctx.NewList(settings.Pos(), std::move(updated));
-}
-
TExprNode::TPtr FilterPushdownOverJoinOptionalSide(TExprNode::TPtr equiJoin, TExprNode::TPtr predicate,
const TSet<TStringBuf>& usedFields, TExprNode::TPtr args, const TJoinLabels& labels,
ui32 inputIndex, const TMap<TStringBuf, TVector<TStringBuf>>& renameMap, bool ordered, bool skipNulls, TExprContext& ctx,
@@ -425,7 +414,8 @@ TExprNode::TPtr FilterPushdownOverJoinOptionalSide(TExprNode::TPtr equiJoin, TEx
auto inputsCount = equiJoin->ChildrenSize() - 2;
auto joinTree = equiJoin->Child(inputsCount);
- if (!IsRightSideForLeftJoin(joinTree, labels, inputIndex).first) {
+ auto [leftJoinTree, parentJoinPtr] = IsRightSideForLeftJoin(joinTree, labels, inputIndex);
+ if (!leftJoinTree) {
return equiJoin;
}
@@ -478,13 +468,12 @@ TExprNode::TPtr FilterPushdownOverJoinOptionalSide(TExprNode::TPtr equiJoin, TEx
THashMap<TString, int> joinLabelCounters;
CountLabelsInputUsage(joinTree, joinLabelCounters);
- auto [leftJoinTree, parentJoinPtr] = IsRightSideForLeftJoin(joinTree, labels, inputIndex);
YQL_ENSURE(leftJoinTree);
// Left child of the `leftJoinTree` could be a tree, need to walk and decrement them all, the do not need be at fina EquiJoin.
DecrementCountLabelsInputUsage(leftJoinTree, joinLabelCounters);
- auto leftJoinSettings = equiJoin->ChildPtr(equiJoin->ChildrenSize() - 1);
- auto newLeftJoinSettings = RemoveJoinKeysFromElimination(*leftJoinSettings, ctx);
+ const auto joinSettings = equiJoin->TailPtr();
+ const auto innerSettings = parentJoinPtr ? RemoveSetting(*joinSettings, "rename", ctx) : joinSettings;
auto innerJoinTree = ctx.ChangeChild(*leftJoinTree, 0, ctx.NewAtom(leftJoinTree->Pos(), "Inner"));
auto leftOnlyJoinTree = ctx.ChangeChild(*leftJoinTree, 0, ctx.NewAtom(leftJoinTree->Pos(), "LeftOnly"));
@@ -545,7 +534,7 @@ TExprNode::TPtr FilterPushdownOverJoinOptionalSide(TExprNode::TPtr equiJoin, TEx
.Atom(1, innerJoinTree->ChildRef(2)->Content())
.Seal()
.Add(i++, innerJoinTree)
- .Add(i++, newLeftJoinSettings)
+ .Add(i++, innerSettings)
.Seal()
.Build();
@@ -577,7 +566,7 @@ TExprNode::TPtr FilterPushdownOverJoinOptionalSide(TExprNode::TPtr equiJoin, TEx
.Atom(1, leftOnlyJoinTree->ChildRef(2)->Content())
.Seal()
.Add(i++, leftOnlyJoinTree)
- .Add(i++, newLeftJoinSettings)
+ .Add(i++, innerSettings)
.Seal()
.Build();
@@ -597,7 +586,8 @@ TExprNode::TPtr FilterPushdownOverJoinOptionalSide(TExprNode::TPtr equiJoin, TEx
.Build();
if (!parentJoinPtr) {
- return unionAll;
+ // TODO: Evaluate constraints in UnionAll automatically. See https://st.yandex-team.ru/YQL-20085#685bb01e8a10e760cdd58750.
+ return KeepUniqueDistinct(unionAll, *equiJoin, ctx);
}
TExprNode::TPtr remJoinKeys;
@@ -636,18 +626,6 @@ TExprNode::TPtr FilterPushdownOverJoinOptionalSide(TExprNode::TPtr equiJoin, TEx
auto newJoinTree = ctx.ReplaceNode(std::move(joinTree), *parentJoinPtr, newParentJoin);
- i = 0;
- auto newJoinSettings = ctx.Builder(pos)
- .List()
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (const auto& child : equiJoin->TailPtr()->ChildrenList()) {
- parent.Add(i++, child);
- }
- return parent;
- })
- .Seal()
- .Build();
-
// Combine join labels from left tree and associate them with result of `EquiJoin` from above.
auto combinedLabelList = CombineLabels(leftJoinLabelsFull);
auto combinedJoinLabels = CreateLabelList(combinedLabelList, ctx, pos);
@@ -672,11 +650,11 @@ TExprNode::TPtr FilterPushdownOverJoinOptionalSide(TExprNode::TPtr equiJoin, TEx
.Add(1, combinedJoinLabels)
.Seal()
.Add(i++, newJoinTree)
- .Add(i++, newJoinSettings)
+ .Add(i++, joinSettings)
.Seal()
.Build();
- return newEquiJoin;
+ return KeepUniqueDistinct(newEquiJoin, *equiJoin, ctx);
}
class TJoinTreeRebuilder {
diff --git a/yql/essentials/tests/sql/minirun/part2/canondata/result.json b/yql/essentials/tests/sql/minirun/part2/canondata/result.json
index 28b1ecec99c..5c5de71f88e 100644
--- a/yql/essentials/tests/sql/minirun/part2/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part2/canondata/result.json
@@ -814,6 +814,20 @@
"uri": "https://{canondata_backend}/995452/fe026cd31998bf6072028e6429713a6b1158be3d/resource.tar.gz#test.test_join-inmem_with_set_key_any-default.txt-Results_/results.txt"
}
],
+ "test.test[join-yql-20085-distinct-default.txt-Debug]": [
+ {
+ "checksum": "2226cab144ed961ea53e2e9e6f1d881e",
+ "size": 2555,
+ "uri": "https://{canondata_backend}/1936947/6e2695e9df81b811b7a80094733bbd1c0188ffdf/resource.tar.gz#test.test_join-yql-20085-distinct-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[join-yql-20085-distinct-default.txt-Results]": [
+ {
+ "checksum": "fc8289bfacb089191cf58d9e789043f8",
+ "size": 3023,
+ "uri": "https://{canondata_backend}/1936947/6e2695e9df81b811b7a80094733bbd1c0188ffdf/resource.tar.gz#test.test_join-yql-20085-distinct-default.txt-Results_/results.txt"
+ }
+ ],
"test.test[json-json_query/on_empty-default.txt-Debug]": [
{
"checksum": "8a7975441c14dc194368455e9b6b828a",
diff --git a/yql/essentials/tests/sql/minirun/part7/canondata/result.json b/yql/essentials/tests/sql/minirun/part7/canondata/result.json
index 8083b15dac8..c6cd82da8cf 100644
--- a/yql/essentials/tests/sql/minirun/part7/canondata/result.json
+++ b/yql/essentials/tests/sql/minirun/part7/canondata/result.json
@@ -597,6 +597,20 @@
"uri": "https://{canondata_backend}/1784117/2ea0b35b312aa96a64af8f612c875f1585849c84/resource.tar.gz#test.test_join-inmem_with_set_key-default.txt-Results_/results.txt"
}
],
+ "test.test[join-yql-20085-rename-default.txt-Debug]": [
+ {
+ "checksum": "b42cb95f29a207e98f21a3d590dd53e0",
+ "size": 1768,
+ "uri": "https://{canondata_backend}/1924537/0b69b467f37bc6c3cbfab22057a8d8e7bc979c15/resource.tar.gz#test.test_join-yql-20085-rename-default.txt-Debug_/opt.yql"
+ }
+ ],
+ "test.test[join-yql-20085-rename-default.txt-Results]": [
+ {
+ "checksum": "9b46576e462043da56d489c4ad432580",
+ "size": 2130,
+ "uri": "https://{canondata_backend}/1924537/0b69b467f37bc6c3cbfab22057a8d8e7bc979c15/resource.tar.gz#test.test_join-yql-20085-rename-default.txt-Results_/results.txt"
+ }
+ ],
"test.test[join-yql-6199-default.txt-Debug]": [
{
"checksum": "8917f654d003e330ee7c8346505596cc",
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/result.json b/yql/essentials/tests/sql/sql2yql/canondata/result.json
index 8562333d3e8..665f0364892 100644
--- a/yql/essentials/tests/sql/sql2yql/canondata/result.json
+++ b/yql/essentials/tests/sql/sql2yql/canondata/result.json
@@ -4108,6 +4108,20 @@
"uri": "https://{canondata_backend}/212715/e102f70d9232d825c746de10ca1c287eb99f76ad/resource.tar.gz#test_sql2yql.test_join-yql-19731_/sql.yql"
}
],
+ "test_sql2yql.test[join-yql-20085-distinct]": [
+ {
+ "checksum": "d63befb2bb098cc289dd5f13b4c6f034",
+ "size": 5007,
+ "uri": "https://{canondata_backend}/1916746/a9aae45c5a3c55697c94425d0d63b059f9129cca/resource.tar.gz#test_sql2yql.test_join-yql-20085-distinct_/sql.yql"
+ }
+ ],
+ "test_sql2yql.test[join-yql-20085-rename]": [
+ {
+ "checksum": "b5b17a49b19430c9c7f65fb6580734d2",
+ "size": 3143,
+ "uri": "https://{canondata_backend}/1924537/ff990ab7def5cabc141ccc68eee760fe4f19ab8b/resource.tar.gz#test_sql2yql.test_join-yql-20085-rename_/sql.yql"
+ }
+ ],
"test_sql2yql.test[join-yql-6199]": [
{
"checksum": "5421f92ec269e5d76133e1ca54b6d0d0",
@@ -10578,6 +10592,16 @@
"uri": "file://test_sql_format.test_join-yql-19731_/formatted.sql"
}
],
+ "test_sql_format.test[join-yql-20085-distinct]": [
+ {
+ "uri": "file://test_sql_format.test_join-yql-20085-distinct_/formatted.sql"
+ }
+ ],
+ "test_sql_format.test[join-yql-20085-rename]": [
+ {
+ "uri": "file://test_sql_format.test_join-yql-20085-rename_/formatted.sql"
+ }
+ ],
"test_sql_format.test[join-yql-6199]": [
{
"uri": "file://test_sql_format.test_join-yql-6199_/formatted.sql"
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-yql-20085-distinct_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-yql-20085-distinct_/formatted.sql
new file mode 100644
index 00000000000..0ca355a0148
--- /dev/null
+++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-yql-20085-distinct_/formatted.sql
@@ -0,0 +1,66 @@
+PRAGMA FilterPushdownOverJoinOptionalSide;
+
+$t1 = (
+ SELECT DISTINCT
+ k1
+ FROM
+ AS_TABLE(AsList(
+ AsStruct(0 AS k1),
+ AsStruct(1 AS k1),
+ AsStruct(2 AS k1),
+ AsStruct(3 AS k1),
+ AsStruct(4 AS k1),
+ AsStruct(5 AS k1),
+ ))
+);
+
+$t2 = (
+ SELECT
+ k2,
+ SUM(v2) AS v2
+ FROM
+ AS_TABLE(AsList(
+ AsStruct(2 AS k2, 1 AS v2),
+ AsStruct(3 AS k2, 1 AS v2),
+ AsStruct(4 AS k2, 1 AS v2),
+ AsStruct(5 AS k2, 1 AS v2),
+ AsStruct(6 AS k2, 1 AS v2),
+ AsStruct(7 AS k2, 1 AS v2),
+ ))
+ GROUP BY
+ k2
+);
+
+$t3 = (
+ SELECT
+ k3,
+ SUM(v3) AS v3
+ FROM
+ AS_TABLE(AsList(
+ AsStruct(4 AS k3, 1 AS v3),
+ AsStruct(5 AS k3, 1 AS v3),
+ AsStruct(6 AS k3, 1 AS v3),
+ AsStruct(7 AS k3, 1 AS v3),
+ AsStruct(8 AS k3, 1 AS v3),
+ AsStruct(9 AS k3, 1 AS v3),
+ ))
+ GROUP BY
+ k3
+);
+
+SELECT
+ *
+FROM
+ $t1 AS t1
+LEFT JOIN
+ $t2 AS t2
+ON
+ t1.k1 == t2.k2
+LEFT JOIN
+ $t3 AS t3
+ON
+ t1.k1 == t3.k3
+WHERE
+ t2.v2 < 10
+ AND t3.v3 < 10
+;
diff --git a/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-yql-20085-rename_/formatted.sql b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-yql-20085-rename_/formatted.sql
new file mode 100644
index 00000000000..d8c53e4b505
--- /dev/null
+++ b/yql/essentials/tests/sql/sql2yql/canondata/test_sql_format.test_join-yql-20085-rename_/formatted.sql
@@ -0,0 +1,45 @@
+PRAGMA FilterPushdownOverJoinOptionalSide;
+
+$t1_data = AsList(
+ AsStruct(0 AS m1),
+ AsStruct(1 AS m1),
+ AsStruct(2 AS m1),
+ AsStruct(3 AS m1),
+ AsStruct(4 AS m1),
+ AsStruct(5 AS m1),
+);
+
+$t2_data = AsList(
+ AsStruct(2 AS m2, 2 AS m3, 2 AS m4),
+ AsStruct(3 AS m2, 3 AS m3, 3 AS m4),
+ AsStruct(4 AS m2, 4 AS m3, 4 AS m4),
+ AsStruct(5 AS m2, 5 AS m3, 5 AS m4),
+ AsStruct(6 AS m2, 6 AS m3, 6 AS m4),
+ AsStruct(7 AS m2, 7 AS m3, 7 AS m4),
+);
+
+$t3_data = AsList(
+ AsStruct(4 AS m5),
+ AsStruct(5 AS m5),
+ AsStruct(6 AS m5),
+);
+
+SELECT
+ *
+FROM (
+ SELECT
+ *
+ FROM
+ AS_TABLE($t1_data) AS t1
+ LEFT JOIN
+ AS_TABLE($t2_data) AS t2
+ ON
+ t1.m1 == t2.m2
+) AS t_joined
+LEFT SEMI JOIN
+ AS_TABLE($t3_data) AS t3
+ON
+ t_joined.m3 == t3.m5
+WHERE
+ t_joined.m4 < 5
+;
diff --git a/yql/essentials/tests/sql/suites/join/yql-20085-distinct.sql b/yql/essentials/tests/sql/suites/join/yql-20085-distinct.sql
new file mode 100644
index 00000000000..0ca355a0148
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/join/yql-20085-distinct.sql
@@ -0,0 +1,66 @@
+PRAGMA FilterPushdownOverJoinOptionalSide;
+
+$t1 = (
+ SELECT DISTINCT
+ k1
+ FROM
+ AS_TABLE(AsList(
+ AsStruct(0 AS k1),
+ AsStruct(1 AS k1),
+ AsStruct(2 AS k1),
+ AsStruct(3 AS k1),
+ AsStruct(4 AS k1),
+ AsStruct(5 AS k1),
+ ))
+);
+
+$t2 = (
+ SELECT
+ k2,
+ SUM(v2) AS v2
+ FROM
+ AS_TABLE(AsList(
+ AsStruct(2 AS k2, 1 AS v2),
+ AsStruct(3 AS k2, 1 AS v2),
+ AsStruct(4 AS k2, 1 AS v2),
+ AsStruct(5 AS k2, 1 AS v2),
+ AsStruct(6 AS k2, 1 AS v2),
+ AsStruct(7 AS k2, 1 AS v2),
+ ))
+ GROUP BY
+ k2
+);
+
+$t3 = (
+ SELECT
+ k3,
+ SUM(v3) AS v3
+ FROM
+ AS_TABLE(AsList(
+ AsStruct(4 AS k3, 1 AS v3),
+ AsStruct(5 AS k3, 1 AS v3),
+ AsStruct(6 AS k3, 1 AS v3),
+ AsStruct(7 AS k3, 1 AS v3),
+ AsStruct(8 AS k3, 1 AS v3),
+ AsStruct(9 AS k3, 1 AS v3),
+ ))
+ GROUP BY
+ k3
+);
+
+SELECT
+ *
+FROM
+ $t1 AS t1
+LEFT JOIN
+ $t2 AS t2
+ON
+ t1.k1 == t2.k2
+LEFT JOIN
+ $t3 AS t3
+ON
+ t1.k1 == t3.k3
+WHERE
+ t2.v2 < 10
+ AND t3.v3 < 10
+;
diff --git a/yql/essentials/tests/sql/suites/join/yql-20085-rename.sql b/yql/essentials/tests/sql/suites/join/yql-20085-rename.sql
new file mode 100644
index 00000000000..51700e18278
--- /dev/null
+++ b/yql/essentials/tests/sql/suites/join/yql-20085-rename.sql
@@ -0,0 +1,38 @@
+PRAGMA FilterPushdownOverJoinOptionalSide;
+
+$t1_data = AsList(
+ AsStruct(0 AS m1),
+ AsStruct(1 AS m1),
+ AsStruct(2 AS m1),
+ AsStruct(3 AS m1),
+ AsStruct(4 AS m1),
+ AsStruct(5 AS m1),
+);
+
+$t2_data = AsList(
+ AsStruct(2 AS m2, 2 AS m3, 2 AS m4),
+ AsStruct(3 AS m2, 3 AS m3, 3 AS m4),
+ AsStruct(4 AS m2, 4 AS m3, 4 AS m4),
+ AsStruct(5 AS m2, 5 AS m3, 5 AS m4),
+ AsStruct(6 AS m2, 6 AS m3, 6 AS m4),
+ AsStruct(7 AS m2, 7 AS m3, 7 AS m4),
+);
+
+$t3_data = AsList(
+ AsStruct(4 AS m5),
+ AsStruct(5 AS m5),
+ AsStruct(6 AS m5),
+);
+
+
+SELECT *
+FROM (
+ SELECT *
+ FROM AS_TABLE($t1_data) AS t1
+ LEFT JOIN AS_TABLE($t2_data) AS t2
+ ON t1.m1 == t2.m2
+) AS t_joined
+LEFT SEMI JOIN AS_TABLE($t3_data) AS t3
+ON t_joined.m3 == t3.m5
+WHERE t_joined.m4 < 5
+;