diff options
author | aozeritsky <aozeritsky@ydb.tech> | 2023-09-05 15:35:23 +0300 |
---|---|---|
committer | aozeritsky <aozeritsky@ydb.tech> | 2023-09-05 16:28:47 +0300 |
commit | 0ab82618948bf9a478770431b9eab1dbc38e98f6 (patch) | |
tree | f93c58422fc46e7c247dd3ce66cb73afee59703c | |
parent | 6565b8f5094b692e78fc5c08098a894bdd714d6a (diff) | |
download | ydb-0ab82618948bf9a478770431b9eab1dbc38e98f6.tar.gz |
Add self join optimizer
-rw-r--r-- | ydb/library/yql/dq/common/dq_common.h | 1 | ||||
-rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_join.cpp | 49 |
2 files changed, 38 insertions, 12 deletions
diff --git a/ydb/library/yql/dq/common/dq_common.h b/ydb/library/yql/dq/common/dq_common.h index 17fb53c3494..72f2a038c07 100644 --- a/ydb/library/yql/dq/common/dq_common.h +++ b/ydb/library/yql/dq/common/dq_common.h @@ -86,6 +86,7 @@ enum class EHashJoinMode { Map /* "map" */, Dict /* "dict" */, Grace /* "grace" */, + GraceAndSelf /* "graceandself" */, }; } // namespace NYql::NDq diff --git a/ydb/library/yql/dq/opt/dq_opt_join.cpp b/ydb/library/yql/dq/opt/dq_opt_join.cpp index 993829b6dcf..049b3e52fd0 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join.cpp @@ -1206,6 +1206,15 @@ TExprBase DqBuildHashJoin(const TDqJoin& join, EHashJoinMode mode, TExprContext& const auto rightShuffle = buildShuffle(rightIn, rightJoinKeys); const auto leftShuffle = buildShuffle(leftIn, leftJoinKeys); + TString callableName = "GraceJoinCore"; + int shift = 2; + bool selfJoin = false; + if (mode == EHashJoinMode::GraceAndSelf && leftIn.Stage().Ptr() == rightIn.Stage().Ptr()) { + callableName = "SelfJoinCore"; + shift = 1; + selfJoin = true; + } + TCoArgument leftInputArg{ctx.NewArgument(join.LeftInput().Pos(), "_dq_join_left")}; TCoArgument rightInputArg{ctx.NewArgument(join.RightInput().Pos(), "_dq_join_right")}; @@ -1229,13 +1238,19 @@ TExprBase DqBuildHashJoin(const TDqJoin& join, EHashJoinMode mode, TExprContext& TExprNode::TPtr hashJoin; switch (mode) { + case EHashJoinMode::GraceAndSelf: case EHashJoinMode::Grace: hashJoin = ctx.Builder(join.Pos()) - .Callable("GraceJoinCore") - .Add(0, std::move(leftWideFlow)) - .Add(1, std::move(rightWideFlow)) - .Add(2, join.JoinType().Ptr()) - .List(3) + .Callable(callableName) + .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& { + parent.Add(0, std::move(leftWideFlow)); + if (selfJoin == false) { + parent.Add(1, std::move(rightWideFlow)); + } + return parent; + }) + .Add(shift, join.JoinType().Ptr()) + .List(shift + 1) .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& { for (ui32 i = 0U; i < leftKeys.size(); ++i) { parent.Atom(i, ctx.GetIndexAsString(leftKeys[i]), TNodeFlags::Default); @@ -1243,7 +1258,7 @@ TExprBase DqBuildHashJoin(const TDqJoin& join, EHashJoinMode mode, TExprContext& return parent; }) .Seal() - .List(4) + .List(shift + 2) .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& { for (ui32 i = 0U; i < rightKeys.size(); ++i) { parent.Atom(i, ctx.GetIndexAsString(rightKeys[i]), TNodeFlags::Default); @@ -1251,7 +1266,7 @@ TExprBase DqBuildHashJoin(const TDqJoin& join, EHashJoinMode mode, TExprContext& return parent; }) .Seal() - .List(5) + .List(shift + 3) .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& { for (ui32 i = 0U; i < leftNames.size(); ++i) { parent.Atom(2*i, ctx.GetIndexAsString(i), TNodeFlags::Default); @@ -1260,7 +1275,7 @@ TExprBase DqBuildHashJoin(const TDqJoin& join, EHashJoinMode mode, TExprContext& return parent; }) .Seal() - .List(6) + .List(shift + 4) .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& { for (ui32 i = 0U; i < rightNames.size(); ++i) { parent.Atom(2*i, ctx.GetIndexAsString(i), TNodeFlags::Default); @@ -1269,7 +1284,7 @@ TExprBase DqBuildHashJoin(const TDqJoin& join, EHashJoinMode mode, TExprContext& return parent; }) .Seal() - .List(7).Add(std::move(flags)).Seal() + .List(shift + 5).Add(std::move(flags)).Seal() .Seal() .Build(); break; @@ -1472,15 +1487,25 @@ TExprBase DqBuildHashJoin(const TDqJoin& join, EHashJoinMode mode, TExprContext& .Seal() .Build(); + TVector<TExprBase> stageInputs; stageInputs.reserve(2); + stageInputs.emplace_back(leftShuffle); + if (selfJoin == false) { + stageInputs.emplace_back(rightShuffle); + } + TVector<TCoArgument> inputArgs; inputArgs.reserve(2); + inputArgs.emplace_back(leftInputArg); + if (selfJoin == false) { + inputArgs.emplace_back(rightInputArg); + } + return Build<TDqCnUnionAll>(ctx, join.Pos()) .Output() .Stage<TDqStage>() .Inputs() - .Add(leftShuffle) - .Add(rightShuffle) + .Add(stageInputs) .Build() .Program() - .Args({leftInputArg, rightInputArg}) + .Args(inputArgs) .Body(std::move(hashJoin)) .Build() .Settings(TDqStageSettings().BuildNode(ctx, join.Pos())) |