diff options
author | Vitaly Stoyan <vitstn@gmail.com> | 2022-06-23 18:01:48 +0300 |
---|---|---|
committer | Vitaly Stoyan <vitstn@gmail.com> | 2022-06-23 18:01:48 +0300 |
commit | 768b582b9ff12cb740c8428e9ca1fa8f57b0d1a6 (patch) | |
tree | 970232e83c2b5d112e6fb90df1c5be9532178ebc | |
parent | 302f2d67c12b563349b579a092494c715dc787f3 (diff) | |
download | ydb-768b582b9ff12cb740c8428e9ca1fa8f57b0d1a6.tar.gz |
YQL-14728 rotate cross joins to make inner links as close to tables as possible
ref:7263d51e95e32260cf5bc41b3f42acf2312019ad
-rw-r--r-- | ydb/library/yql/core/common_opt/yql_co_flow2.cpp | 243 | ||||
-rw-r--r-- | ydb/library/yql/core/common_opt/yql_co_pgselect.cpp | 48 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_join.cpp | 16 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_join.h | 2 |
4 files changed, 222 insertions, 87 deletions
diff --git a/ydb/library/yql/core/common_opt/yql_co_flow2.cpp b/ydb/library/yql/core/common_opt/yql_co_flow2.cpp index 1b7f73ca8d2..ff2ec5e947e 100644 --- a/ydb/library/yql/core/common_opt/yql_co_flow2.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_flow2.cpp @@ -412,78 +412,221 @@ void GatherJoinInputs(const TExprNode::TPtr& expr, const TExprNode& row, } } -TExprNode::TPtr AddLinkToJoinTree(TExprNode::TPtr joinTree, TStringBuf label1, TStringBuf column1, TStringBuf label2, TStringBuf column2, - TExprContext& ctx, TMaybe<ui32>& found1, TMaybe<ui32>& found2, bool& updated) { - YQL_ENSURE(joinTree->Child(0)->Content() == "Cross" || joinTree->Child(0)->Content() == "Inner"); - auto children = joinTree->ChildrenList(); +class TJoinTreeRebuilder { +public: + TJoinTreeRebuilder(TExprNode::TPtr joinTree, TStringBuf label1, TStringBuf column1, TStringBuf label2, TStringBuf column2, + TExprContext& ctx) + : JoinTree(joinTree) + , Labels{ label1, label2 } + , Columns{ column1, column2 } + , Ctx(ctx) + {} + + TExprNode::TPtr Run() { + auto pos = JoinTree->Pos(); + GatherCross(JoinTree); + auto inCross1 = FindPtr(CrossJoins, Labels[0]); + auto inCross2 = FindPtr(CrossJoins, Labels[1]); + auto joinTree = JoinTree; + if (inCross1 || inCross2) { + if (inCross1 && inCross2) { + // make them a leaf + joinTree = MakeCrossJoin(pos, Ctx.NewAtom(pos, Labels[0]), Ctx.NewAtom(pos, Labels[1]), Ctx); + for (auto label : CrossJoins) { + if (label != Labels[0] && label != Labels[1]) { + joinTree = MakeCrossJoin(pos, joinTree, Ctx.NewAtom(pos, label), Ctx); + } + } - auto& left = children[1]; - if (!left->IsAtom()) { - left = AddLinkToJoinTree(left, label1, column1, label2, column2, ctx, found1, found2, updated); - if (found1) { - found1 = 1u; - } + joinTree = AddRestJoins(pos, joinTree, nullptr); + } else if (inCross1) { + // leaf with table1 and subtree with table2 + auto rest = FindRestJoin(Labels[1]); + YQL_ENSURE(rest); + joinTree = MakeCrossJoin(pos, Ctx.NewAtom(pos, Labels[0]), rest, Ctx); + for (auto label : CrossJoins) { + if (label != Labels[0]) { + joinTree = MakeCrossJoin(pos, joinTree, Ctx.NewAtom(pos, label), Ctx); + } + } + + joinTree = AddRestJoins(pos, joinTree, rest); + } else { + // leaf with table2 and subtree with table1 + auto rest = FindRestJoin(Labels[0]); + YQL_ENSURE(rest); + joinTree = MakeCrossJoin(pos, Ctx.NewAtom(pos, Labels[1]), rest, Ctx); + for (auto label : CrossJoins) { + if (label != Labels[1]) { + joinTree = MakeCrossJoin(pos, joinTree, Ctx.NewAtom(pos, label), Ctx); + } + } - if (found2) { - found2 = 1u; + joinTree = AddRestJoins(pos, joinTree, rest); + } } - } else { - if (left->Content() == label1) { - found1 = 1u; + + auto newJoinTree = std::get<0>(AddLink(joinTree)); + YQL_ENSURE(Updated); + return newJoinTree; + } + +private: + TExprNode::TPtr AddRestJoins(TPositionHandle pos, TExprNode::TPtr joinTree, TExprNode::TPtr exclude) { + for (auto join : RestJoins) { + if (join == exclude) { + continue; + } + + joinTree = MakeCrossJoin(pos, joinTree, join, Ctx); } - if (left->Content() == label2) { - found2 = 1u; + return joinTree; + } + + TExprNode::TPtr FindRestJoin(TStringBuf label) { + for (auto join : RestJoins) { + if (HasTable(join, label)) { + return join; + } } + + return nullptr; } - auto& right = children[2]; - if (!right->IsAtom()) { - right = AddLinkToJoinTree(right, label1, column1, label2, column2, ctx, found1, found2, updated); - if (found1) { - found1 = 2u; + bool HasTable(TExprNode::TPtr joinTree, TStringBuf label) { + auto left = joinTree->ChildPtr(1); + if (left->IsAtom()) { + if (left->Content() == label) { + return true; + } + } else { + if (HasTable(left, label)) { + return true; + } } - if (found2) { - found2 = 2u; + auto right = joinTree->ChildPtr(2); + if (right->IsAtom()) { + if (right->Content() == label) { + return true; + } + } else { + if (HasTable(right, label)) { + return true; + } } - } else { - if (right->Content() == label1) { - found1 = 2u; + + return false; + } + + void GatherCross(TExprNode::TPtr joinTree) { + auto type = joinTree->Child(0)->Content(); + if (type != "Cross") { + RestJoins.push_back(joinTree); + return; + } + + auto left = joinTree->ChildPtr(1); + if (left->IsAtom()) { + CrossJoins.push_back(left->Content()); + } else { + GatherCross(left); } - if (right->Content() == label2) { - found2 = 2u; + auto right = joinTree->ChildPtr(2); + if (right->IsAtom()) { + CrossJoins.push_back(right->Content()); + } else { + GatherCross(right); } } - if (found1 && found2) { - if (!updated) { - if (joinTree->Child(0)->Content() == "Cross") { - children[0] = ctx.NewAtom(joinTree->Pos(), "Inner"); + std::tuple<TExprNode::TPtr, TMaybe<ui32>, TMaybe<ui32>> AddLink(TExprNode::TPtr joinTree) { + YQL_ENSURE(joinTree->Child(0)->Content() == "Cross" || joinTree->Child(0)->Content() == "Inner"); + auto children = joinTree->ChildrenList(); + + TMaybe<ui32> found1; + TMaybe<ui32> found2; + auto& left = children[1]; + if (!left->IsAtom()) { + TMaybe<ui32> leftFound1, leftFound2; + std::tie(left, leftFound1, leftFound2) = AddLink(left); + if (leftFound1) { + found1 = 1u; + } + + if (leftFound2) { + found2 = 1u; + } + } else { + if (left->Content() == Labels[0]) { + found1 = 1u; + } + + if (left->Content() == Labels[1]) { + found2 = 1u; + } + } + + auto& right = children[2]; + if (!right->IsAtom()) { + TMaybe<ui32> rightFound1, rightFound2; + std::tie(right, rightFound1, rightFound2) = AddLink(right); + if (rightFound1) { + found1 = 2u; + } + + if (rightFound2) { + found2 = 2u; + } + } else { + if (right->Content() == Labels[0]) { + found1 = 2u; } - if (*found1 == 2u) { - std::swap(label1, label2); - std::swap(column1, column2); + if (right->Content() == Labels[1]) { + found2 = 2u; } + } - auto link1 = children[3]->ChildrenList(); - link1.push_back(ctx.NewAtom(joinTree->Pos(), label1)); - link1.push_back(ctx.NewAtom(joinTree->Pos(), column1)); - children[3] = ctx.ChangeChildren(*children[3], std::move(link1)); + if (found1 && found2) { + if (!Updated) { + if (joinTree->Child(0)->Content() == "Cross") { + children[0] = Ctx.NewAtom(joinTree->Pos(), "Inner"); + } + + ui32 index1 = *found1 - 1; // 0/1 + ui32 index2 = 1 - index1; - auto link2 = children[4]->ChildrenList(); - link2.push_back(ctx.NewAtom(joinTree->Pos(), label2)); - link2.push_back(ctx.NewAtom(joinTree->Pos(), column2)); - children[4] = ctx.ChangeChildren(*children[4], std::move(link2)); + auto link1 = children[3]->ChildrenList(); + link1.push_back(Ctx.NewAtom(joinTree->Pos(), Labels[index1])); + link1.push_back(Ctx.NewAtom(joinTree->Pos(), Columns[index1])); + children[3] = Ctx.ChangeChildren(*children[3], std::move(link1)); - updated = true; + auto link2 = children[4]->ChildrenList(); + link2.push_back(Ctx.NewAtom(joinTree->Pos(), Labels[index2])); + link2.push_back(Ctx.NewAtom(joinTree->Pos(), Columns[index2])); + children[4] = Ctx.ChangeChildren(*children[4], std::move(link2)); + + Updated = true; + } } + + return { Ctx.ChangeChildren(*joinTree, std::move(children)), found1, found2 }; } - return ctx.ChangeChildren(*joinTree, std::move(children)); -} +private: + TVector<TStringBuf> CrossJoins; + TVector<TExprNode::TPtr> RestJoins; + + bool Updated = false; + + TExprNode::TPtr JoinTree; + TStringBuf Labels[2]; + TStringBuf Columns[2]; + TExprContext& Ctx; +}; TExprNode::TPtr DecayCrossJoinIntoInner(TExprNode::TPtr equiJoin, TExprNode::TPtr predicate, const TJoinLabels& labels, ui32 index1, ui32 index2, const TExprNode& row, const THashMap<TString, TString>& backRenameMap, @@ -559,10 +702,8 @@ TExprNode::TPtr DecayCrossJoinIntoInner(TExprNode::TPtr equiJoin, TExprNode::TPt return equiJoin; } - TMaybe<ui32> found1, found2; - bool updated = false; - auto newJoinTree = AddLinkToJoinTree(joinTree, label1, column1, label2, column2, ctx, found1, found2, updated); - YQL_ENSURE(updated); + TJoinTreeRebuilder rebuilder(joinTree, label1, column1, label2, column2, ctx); + auto newJoinTree = rebuilder.Run(); return ctx.ChangeChild(*equiJoin, inputsCount, std::move(newJoinTree)); } diff --git a/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp b/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp index b89673508a9..89afb22e2ff 100644 --- a/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp @@ -3,6 +3,7 @@ #include <ydb/library/yql/core/type_ann/type_ann_pg.h> #include <ydb/library/yql/core/yql_expr_optimize.h> +#include <ydb/library/yql/core/yql_join.h> #include <ydb/library/yql/core/yql_opt_utils.h> namespace NYql { @@ -1020,34 +1021,9 @@ TExprNode::TPtr BuildCrossJoinsBetweenGroups(TPositionHandle pos, const TExprNod .Build()); } - auto tree = ctx.Builder(pos) - .List() - .Atom(0, "Cross") - .Atom(1, "0") - .Atom(2, "1") - .List(3) - .Seal() - .List(4) - .Seal() - .List(5) - .Seal() - .Seal() - .Build(); - + auto tree = MakeCrossJoin(pos, ctx.NewAtom(pos, "0"), ctx.NewAtom(pos, "1"), ctx); for (ui32 i = 2; i < joinGroups.size(); ++i) { - tree = ctx.Builder(pos) - .List() - .Atom(0, "Cross") - .Add(1, tree) - .Atom(2, ToString(i)) - .List(3) - .Seal() - .List(4) - .Seal() - .List(5) - .Seal() - .Seal() - .Build(); + tree = MakeCrossJoin(pos, tree, ctx.NewAtom(pos, ToString(i)), ctx); } args.push_back(tree); @@ -1278,15 +1254,15 @@ TExprNode::TPtr BuildGroupByAndHaving(TPositionHandle pos, TExprNode::TPtr list, } auto payloadsNode = ctx.NewList(pos, std::move(payloadItems)); - TExprNode::TListType keysItems;
- if (finalExtTypes) {
- for (const auto& x : finalExtTypes->Tail().Children()) {
- auto type = x->Tail().GetTypeAnn()->Cast<TTypeExprType>()->GetType()->Cast<TStructExprType>();
- for (const auto& i : type->GetItems()) {
- keysItems.push_back(ctx.NewAtom(pos, NTypeAnnImpl::MakeAliasedColumn(x->Head().Content(), i->GetName())));
- }
- }
- }
+ TExprNode::TListType keysItems; + if (finalExtTypes) { + for (const auto& x : finalExtTypes->Tail().Children()) { + auto type = x->Tail().GetTypeAnn()->Cast<TTypeExprType>()->GetType()->Cast<TStructExprType>(); + for (const auto& i : type->GetItems()) { + keysItems.push_back(ctx.NewAtom(pos, NTypeAnnImpl::MakeAliasedColumn(x->Head().Content(), i->GetName()))); + } + } + } if (groupBy) { for (const auto& group : groupBy->Tail().Children()) { diff --git a/ydb/library/yql/core/yql_join.cpp b/ydb/library/yql/core/yql_join.cpp index 9a9ded7a327..c9324e5a511 100644 --- a/ydb/library/yql/core/yql_join.cpp +++ b/ydb/library/yql/core/yql_join.cpp @@ -1539,4 +1539,20 @@ TExprNode::TPtr MakeDictForJoin(TExprNode::TPtr&& list, bool payload, bool multi template TExprNode::TPtr MakeDictForJoin<true>(TExprNode::TPtr&& list, bool payload, bool multi, TExprContext& ctx); template TExprNode::TPtr MakeDictForJoin<false>(TExprNode::TPtr&& list, bool payload, bool multi, TExprContext& ctx); +TExprNode::TPtr MakeCrossJoin(TPositionHandle pos, TExprNode::TPtr left, TExprNode::TPtr right, TExprContext& ctx) { + return ctx.Builder(pos) + .List() + .Atom(0, "Cross") + .Add(1, left) + .Add(2, right) + .List(3) + .Seal() + .List(4) + .Seal() + .List(5) + .Seal() + .Seal() + .Build(); +} + } // namespace NYql diff --git a/ydb/library/yql/core/yql_join.h b/ydb/library/yql/core/yql_join.h index b01b950c2d6..69d5f2b2d21 100644 --- a/ydb/library/yql/core/yql_join.h +++ b/ydb/library/yql/core/yql_join.h @@ -142,4 +142,6 @@ TExprNode::TPtr PrepareListForJoin(TExprNode::TPtr list, const TTypeAnnotationNo template<bool Squeeze = false> TExprNode::TPtr MakeDictForJoin(TExprNode::TPtr&& list, bool payload, bool multi, TExprContext& ctx); +TExprNode::TPtr MakeCrossJoin(TPositionHandle pos, TExprNode::TPtr left, TExprNode::TPtr right, TExprContext& ctx); + } |