aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVitaly Stoyan <vitstn@gmail.com>2022-06-23 18:01:48 +0300
committerVitaly Stoyan <vitstn@gmail.com>2022-06-23 18:01:48 +0300
commit768b582b9ff12cb740c8428e9ca1fa8f57b0d1a6 (patch)
tree970232e83c2b5d112e6fb90df1c5be9532178ebc
parent302f2d67c12b563349b579a092494c715dc787f3 (diff)
downloadydb-768b582b9ff12cb740c8428e9ca1fa8f57b0d1a6.tar.gz
YQL-14728 rotate cross joins to make inner links as close to tables as possible
ref:7263d51e95e32260cf5bc41b3f42acf2312019ad
-rw-r--r--ydb/library/yql/core/common_opt/yql_co_flow2.cpp243
-rw-r--r--ydb/library/yql/core/common_opt/yql_co_pgselect.cpp48
-rw-r--r--ydb/library/yql/core/yql_join.cpp16
-rw-r--r--ydb/library/yql/core/yql_join.h2
4 files changed, 222 insertions, 87 deletions
diff --git a/ydb/library/yql/core/common_opt/yql_co_flow2.cpp b/ydb/library/yql/core/common_opt/yql_co_flow2.cpp
index 1b7f73ca8d2..ff2ec5e947e 100644
--- a/ydb/library/yql/core/common_opt/yql_co_flow2.cpp
+++ b/ydb/library/yql/core/common_opt/yql_co_flow2.cpp
@@ -412,78 +412,221 @@ void GatherJoinInputs(const TExprNode::TPtr& expr, const TExprNode& row,
}
}
-TExprNode::TPtr AddLinkToJoinTree(TExprNode::TPtr joinTree, TStringBuf label1, TStringBuf column1, TStringBuf label2, TStringBuf column2,
- TExprContext& ctx, TMaybe<ui32>& found1, TMaybe<ui32>& found2, bool& updated) {
- YQL_ENSURE(joinTree->Child(0)->Content() == "Cross" || joinTree->Child(0)->Content() == "Inner");
- auto children = joinTree->ChildrenList();
+class TJoinTreeRebuilder {
+public:
+ TJoinTreeRebuilder(TExprNode::TPtr joinTree, TStringBuf label1, TStringBuf column1, TStringBuf label2, TStringBuf column2,
+ TExprContext& ctx)
+ : JoinTree(joinTree)
+ , Labels{ label1, label2 }
+ , Columns{ column1, column2 }
+ , Ctx(ctx)
+ {}
+
+ TExprNode::TPtr Run() {
+ auto pos = JoinTree->Pos();
+ GatherCross(JoinTree);
+ auto inCross1 = FindPtr(CrossJoins, Labels[0]);
+ auto inCross2 = FindPtr(CrossJoins, Labels[1]);
+ auto joinTree = JoinTree;
+ if (inCross1 || inCross2) {
+ if (inCross1 && inCross2) {
+ // make them a leaf
+ joinTree = MakeCrossJoin(pos, Ctx.NewAtom(pos, Labels[0]), Ctx.NewAtom(pos, Labels[1]), Ctx);
+ for (auto label : CrossJoins) {
+ if (label != Labels[0] && label != Labels[1]) {
+ joinTree = MakeCrossJoin(pos, joinTree, Ctx.NewAtom(pos, label), Ctx);
+ }
+ }
- auto& left = children[1];
- if (!left->IsAtom()) {
- left = AddLinkToJoinTree(left, label1, column1, label2, column2, ctx, found1, found2, updated);
- if (found1) {
- found1 = 1u;
- }
+ joinTree = AddRestJoins(pos, joinTree, nullptr);
+ } else if (inCross1) {
+ // leaf with table1 and subtree with table2
+ auto rest = FindRestJoin(Labels[1]);
+ YQL_ENSURE(rest);
+ joinTree = MakeCrossJoin(pos, Ctx.NewAtom(pos, Labels[0]), rest, Ctx);
+ for (auto label : CrossJoins) {
+ if (label != Labels[0]) {
+ joinTree = MakeCrossJoin(pos, joinTree, Ctx.NewAtom(pos, label), Ctx);
+ }
+ }
+
+ joinTree = AddRestJoins(pos, joinTree, rest);
+ } else {
+ // leaf with table2 and subtree with table1
+ auto rest = FindRestJoin(Labels[0]);
+ YQL_ENSURE(rest);
+ joinTree = MakeCrossJoin(pos, Ctx.NewAtom(pos, Labels[1]), rest, Ctx);
+ for (auto label : CrossJoins) {
+ if (label != Labels[1]) {
+ joinTree = MakeCrossJoin(pos, joinTree, Ctx.NewAtom(pos, label), Ctx);
+ }
+ }
- if (found2) {
- found2 = 1u;
+ joinTree = AddRestJoins(pos, joinTree, rest);
+ }
}
- } else {
- if (left->Content() == label1) {
- found1 = 1u;
+
+ auto newJoinTree = std::get<0>(AddLink(joinTree));
+ YQL_ENSURE(Updated);
+ return newJoinTree;
+ }
+
+private:
+ TExprNode::TPtr AddRestJoins(TPositionHandle pos, TExprNode::TPtr joinTree, TExprNode::TPtr exclude) {
+ for (auto join : RestJoins) {
+ if (join == exclude) {
+ continue;
+ }
+
+ joinTree = MakeCrossJoin(pos, joinTree, join, Ctx);
}
- if (left->Content() == label2) {
- found2 = 1u;
+ return joinTree;
+ }
+
+ TExprNode::TPtr FindRestJoin(TStringBuf label) {
+ for (auto join : RestJoins) {
+ if (HasTable(join, label)) {
+ return join;
+ }
}
+
+ return nullptr;
}
- auto& right = children[2];
- if (!right->IsAtom()) {
- right = AddLinkToJoinTree(right, label1, column1, label2, column2, ctx, found1, found2, updated);
- if (found1) {
- found1 = 2u;
+ bool HasTable(TExprNode::TPtr joinTree, TStringBuf label) {
+ auto left = joinTree->ChildPtr(1);
+ if (left->IsAtom()) {
+ if (left->Content() == label) {
+ return true;
+ }
+ } else {
+ if (HasTable(left, label)) {
+ return true;
+ }
}
- if (found2) {
- found2 = 2u;
+ auto right = joinTree->ChildPtr(2);
+ if (right->IsAtom()) {
+ if (right->Content() == label) {
+ return true;
+ }
+ } else {
+ if (HasTable(right, label)) {
+ return true;
+ }
}
- } else {
- if (right->Content() == label1) {
- found1 = 2u;
+
+ return false;
+ }
+
+ void GatherCross(TExprNode::TPtr joinTree) {
+ auto type = joinTree->Child(0)->Content();
+ if (type != "Cross") {
+ RestJoins.push_back(joinTree);
+ return;
+ }
+
+ auto left = joinTree->ChildPtr(1);
+ if (left->IsAtom()) {
+ CrossJoins.push_back(left->Content());
+ } else {
+ GatherCross(left);
}
- if (right->Content() == label2) {
- found2 = 2u;
+ auto right = joinTree->ChildPtr(2);
+ if (right->IsAtom()) {
+ CrossJoins.push_back(right->Content());
+ } else {
+ GatherCross(right);
}
}
- if (found1 && found2) {
- if (!updated) {
- if (joinTree->Child(0)->Content() == "Cross") {
- children[0] = ctx.NewAtom(joinTree->Pos(), "Inner");
+ std::tuple<TExprNode::TPtr, TMaybe<ui32>, TMaybe<ui32>> AddLink(TExprNode::TPtr joinTree) {
+ YQL_ENSURE(joinTree->Child(0)->Content() == "Cross" || joinTree->Child(0)->Content() == "Inner");
+ auto children = joinTree->ChildrenList();
+
+ TMaybe<ui32> found1;
+ TMaybe<ui32> found2;
+ auto& left = children[1];
+ if (!left->IsAtom()) {
+ TMaybe<ui32> leftFound1, leftFound2;
+ std::tie(left, leftFound1, leftFound2) = AddLink(left);
+ if (leftFound1) {
+ found1 = 1u;
+ }
+
+ if (leftFound2) {
+ found2 = 1u;
+ }
+ } else {
+ if (left->Content() == Labels[0]) {
+ found1 = 1u;
+ }
+
+ if (left->Content() == Labels[1]) {
+ found2 = 1u;
+ }
+ }
+
+ auto& right = children[2];
+ if (!right->IsAtom()) {
+ TMaybe<ui32> rightFound1, rightFound2;
+ std::tie(right, rightFound1, rightFound2) = AddLink(right);
+ if (rightFound1) {
+ found1 = 2u;
+ }
+
+ if (rightFound2) {
+ found2 = 2u;
+ }
+ } else {
+ if (right->Content() == Labels[0]) {
+ found1 = 2u;
}
- if (*found1 == 2u) {
- std::swap(label1, label2);
- std::swap(column1, column2);
+ if (right->Content() == Labels[1]) {
+ found2 = 2u;
}
+ }
- auto link1 = children[3]->ChildrenList();
- link1.push_back(ctx.NewAtom(joinTree->Pos(), label1));
- link1.push_back(ctx.NewAtom(joinTree->Pos(), column1));
- children[3] = ctx.ChangeChildren(*children[3], std::move(link1));
+ if (found1 && found2) {
+ if (!Updated) {
+ if (joinTree->Child(0)->Content() == "Cross") {
+ children[0] = Ctx.NewAtom(joinTree->Pos(), "Inner");
+ }
+
+ ui32 index1 = *found1 - 1; // 0/1
+ ui32 index2 = 1 - index1;
- auto link2 = children[4]->ChildrenList();
- link2.push_back(ctx.NewAtom(joinTree->Pos(), label2));
- link2.push_back(ctx.NewAtom(joinTree->Pos(), column2));
- children[4] = ctx.ChangeChildren(*children[4], std::move(link2));
+ auto link1 = children[3]->ChildrenList();
+ link1.push_back(Ctx.NewAtom(joinTree->Pos(), Labels[index1]));
+ link1.push_back(Ctx.NewAtom(joinTree->Pos(), Columns[index1]));
+ children[3] = Ctx.ChangeChildren(*children[3], std::move(link1));
- updated = true;
+ auto link2 = children[4]->ChildrenList();
+ link2.push_back(Ctx.NewAtom(joinTree->Pos(), Labels[index2]));
+ link2.push_back(Ctx.NewAtom(joinTree->Pos(), Columns[index2]));
+ children[4] = Ctx.ChangeChildren(*children[4], std::move(link2));
+
+ Updated = true;
+ }
}
+
+ return { Ctx.ChangeChildren(*joinTree, std::move(children)), found1, found2 };
}
- return ctx.ChangeChildren(*joinTree, std::move(children));
-}
+private:
+ TVector<TStringBuf> CrossJoins;
+ TVector<TExprNode::TPtr> RestJoins;
+
+ bool Updated = false;
+
+ TExprNode::TPtr JoinTree;
+ TStringBuf Labels[2];
+ TStringBuf Columns[2];
+ TExprContext& Ctx;
+};
TExprNode::TPtr DecayCrossJoinIntoInner(TExprNode::TPtr equiJoin, TExprNode::TPtr predicate,
const TJoinLabels& labels, ui32 index1, ui32 index2, const TExprNode& row, const THashMap<TString, TString>& backRenameMap,
@@ -559,10 +702,8 @@ TExprNode::TPtr DecayCrossJoinIntoInner(TExprNode::TPtr equiJoin, TExprNode::TPt
return equiJoin;
}
- TMaybe<ui32> found1, found2;
- bool updated = false;
- auto newJoinTree = AddLinkToJoinTree(joinTree, label1, column1, label2, column2, ctx, found1, found2, updated);
- YQL_ENSURE(updated);
+ TJoinTreeRebuilder rebuilder(joinTree, label1, column1, label2, column2, ctx);
+ auto newJoinTree = rebuilder.Run();
return ctx.ChangeChild(*equiJoin, inputsCount, std::move(newJoinTree));
}
diff --git a/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp b/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp
index b89673508a9..89afb22e2ff 100644
--- a/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp
+++ b/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp
@@ -3,6 +3,7 @@
#include <ydb/library/yql/core/type_ann/type_ann_pg.h>
#include <ydb/library/yql/core/yql_expr_optimize.h>
+#include <ydb/library/yql/core/yql_join.h>
#include <ydb/library/yql/core/yql_opt_utils.h>
namespace NYql {
@@ -1020,34 +1021,9 @@ TExprNode::TPtr BuildCrossJoinsBetweenGroups(TPositionHandle pos, const TExprNod
.Build());
}
- auto tree = ctx.Builder(pos)
- .List()
- .Atom(0, "Cross")
- .Atom(1, "0")
- .Atom(2, "1")
- .List(3)
- .Seal()
- .List(4)
- .Seal()
- .List(5)
- .Seal()
- .Seal()
- .Build();
-
+ auto tree = MakeCrossJoin(pos, ctx.NewAtom(pos, "0"), ctx.NewAtom(pos, "1"), ctx);
for (ui32 i = 2; i < joinGroups.size(); ++i) {
- tree = ctx.Builder(pos)
- .List()
- .Atom(0, "Cross")
- .Add(1, tree)
- .Atom(2, ToString(i))
- .List(3)
- .Seal()
- .List(4)
- .Seal()
- .List(5)
- .Seal()
- .Seal()
- .Build();
+ tree = MakeCrossJoin(pos, tree, ctx.NewAtom(pos, ToString(i)), ctx);
}
args.push_back(tree);
@@ -1278,15 +1254,15 @@ TExprNode::TPtr BuildGroupByAndHaving(TPositionHandle pos, TExprNode::TPtr list,
}
auto payloadsNode = ctx.NewList(pos, std::move(payloadItems));
- TExprNode::TListType keysItems;
- if (finalExtTypes) {
- for (const auto& x : finalExtTypes->Tail().Children()) {
- auto type = x->Tail().GetTypeAnn()->Cast<TTypeExprType>()->GetType()->Cast<TStructExprType>();
- for (const auto& i : type->GetItems()) {
- keysItems.push_back(ctx.NewAtom(pos, NTypeAnnImpl::MakeAliasedColumn(x->Head().Content(), i->GetName())));
- }
- }
- }
+ TExprNode::TListType keysItems;
+ if (finalExtTypes) {
+ for (const auto& x : finalExtTypes->Tail().Children()) {
+ auto type = x->Tail().GetTypeAnn()->Cast<TTypeExprType>()->GetType()->Cast<TStructExprType>();
+ for (const auto& i : type->GetItems()) {
+ keysItems.push_back(ctx.NewAtom(pos, NTypeAnnImpl::MakeAliasedColumn(x->Head().Content(), i->GetName())));
+ }
+ }
+ }
if (groupBy) {
for (const auto& group : groupBy->Tail().Children()) {
diff --git a/ydb/library/yql/core/yql_join.cpp b/ydb/library/yql/core/yql_join.cpp
index 9a9ded7a327..c9324e5a511 100644
--- a/ydb/library/yql/core/yql_join.cpp
+++ b/ydb/library/yql/core/yql_join.cpp
@@ -1539,4 +1539,20 @@ TExprNode::TPtr MakeDictForJoin(TExprNode::TPtr&& list, bool payload, bool multi
template TExprNode::TPtr MakeDictForJoin<true>(TExprNode::TPtr&& list, bool payload, bool multi, TExprContext& ctx);
template TExprNode::TPtr MakeDictForJoin<false>(TExprNode::TPtr&& list, bool payload, bool multi, TExprContext& ctx);
+TExprNode::TPtr MakeCrossJoin(TPositionHandle pos, TExprNode::TPtr left, TExprNode::TPtr right, TExprContext& ctx) {
+ return ctx.Builder(pos)
+ .List()
+ .Atom(0, "Cross")
+ .Add(1, left)
+ .Add(2, right)
+ .List(3)
+ .Seal()
+ .List(4)
+ .Seal()
+ .List(5)
+ .Seal()
+ .Seal()
+ .Build();
+}
+
} // namespace NYql
diff --git a/ydb/library/yql/core/yql_join.h b/ydb/library/yql/core/yql_join.h
index b01b950c2d6..69d5f2b2d21 100644
--- a/ydb/library/yql/core/yql_join.h
+++ b/ydb/library/yql/core/yql_join.h
@@ -142,4 +142,6 @@ TExprNode::TPtr PrepareListForJoin(TExprNode::TPtr list, const TTypeAnnotationNo
template<bool Squeeze = false>
TExprNode::TPtr MakeDictForJoin(TExprNode::TPtr&& list, bool payload, bool multi, TExprContext& ctx);
+TExprNode::TPtr MakeCrossJoin(TPositionHandle pos, TExprNode::TPtr left, TExprNode::TPtr right, TExprContext& ctx);
+
}