diff options
author | vvvv <vvvv@ydb.tech> | 2022-08-24 17:56:18 +0300 |
---|---|---|
committer | vvvv <vvvv@ydb.tech> | 2022-08-24 17:56:18 +0300 |
commit | 3c2751611185a482c1ac79784578f10197c9dfc4 (patch) | |
tree | a0ed09ec0fc203906d989ade22be589f674e57cd | |
parent | 210dea83ea4c2f191c7201d3266153090d7d1915 (diff) | |
download | ydb-3c2751611185a482c1ac79784578f10197c9dfc4.tar.gz |
extract columns from used windows
-rw-r--r-- | ydb/library/yql/core/common_opt/yql_co_pgselect.cpp | 364 |
1 files changed, 193 insertions, 171 deletions
diff --git a/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp b/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp index 907b816fb32..9415bdf58ee 100644 --- a/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp @@ -787,8 +787,46 @@ void AddColumnsFromSublinks(const TNodeMap<ui32>& subLinks, TUsedColumns& column } } +struct TWindowsCtx { + TVector<std::pair<TExprNode::TPtr, TExprNode::TPtr>> Funcs; + TMap<ui32, TVector<ui32>> Window2funcs; + TNodeMap<ui32> FuncsId; +}; + +void GatherUsedWindows(const TExprNode::TPtr& window, TExprNode::TPtr& projectionLambda, TWindowsCtx& winCtx) { + VisitExpr(projectionLambda->TailPtr(), [&](const TExprNode::TPtr& node) { + if (node->IsCallable("PgWindowCall") || node->IsCallable("PgAggWindowCall")) { + YQL_ENSURE(window); + ui32 windowIndex; + if (node->Child(1)->IsCallable("PgAnonWindow")) { + windowIndex = FromString<ui32>(node->Child(1)->Head().Content()); + YQL_ENSURE(windowIndex < window->Tail().ChildrenSize()); + } else { + auto name = node->Child(1)->Content(); + bool found = false; + for (ui32 index = 0; index < window->Tail().ChildrenSize(); ++index) { + if (window->Tail().Child(index)->Head().Content() == name) { + windowIndex = index; + found = true; + break; + } + } + + YQL_ENSURE(found); + } + + winCtx.Window2funcs[windowIndex].push_back(winCtx.Funcs.size()); + winCtx.FuncsId[node.Get()] = winCtx.Funcs.size(); + winCtx.Funcs.push_back({ node, projectionLambda->Head().HeadPtr() }); + } + + return true; + }); +} + TUsedColumns GatherUsedColumns(const TExprNode::TPtr& result, const TExprNode::TPtr& joinOps, - const TExprNode::TPtr& filter, const TExprNode::TPtr& groupBy, const TExprNode::TPtr& having, const TExprNode::TPtr& extraSortColumns) { + const TExprNode::TPtr& filter, const TExprNode::TPtr& groupBy, const TExprNode::TPtr& having, const TExprNode::TPtr& extraSortColumns, + const TExprNode::TPtr& window, const TWindowsCtx& winCtx) { TUsedColumns usedColumns; for (const auto& x : result->Tail().Children()) { AddColumnsFromType(x->Child(1)->GetTypeAnn(), usedColumns); @@ -835,6 +873,19 @@ TUsedColumns GatherUsedColumns(const TExprNode::TPtr& result, const TExprNode::T } } + if (window) { + for (const auto& x : winCtx.Window2funcs) { + auto winDef = window->Tail().Child(x.first); + for (auto group : winDef->Child(2)->Children()) { + AddColumnsFromType(group->Head().GetTypeAnn(), usedColumns); + } + + for (auto sort : winDef->Child(3)->Children()) { + AddColumnsFromType(sort->Head().GetTypeAnn(), usedColumns); + } + } + } + return usedColumns; } @@ -1881,206 +1932,170 @@ TExprNode::TPtr BuildSortTraits(TPositionHandle pos, const TExprNode& sortColumn } } -TExprNode::TPtr BuildWindows(TPositionHandle pos, const TExprNode::TPtr& list, const TExprNode::TPtr& window, +TExprNode::TPtr BuildWindows(TPositionHandle pos, const TExprNode::TPtr& list, const TExprNode::TPtr& window, const TWindowsCtx& winCtx, TExprNode::TPtr& projectionLambda, TExprContext& ctx, TOptimizeContext& optCtx) { - TVector<std::pair<TExprNode::TPtr, TExprNode::TPtr>> winFuncs; - TMap<ui32, TVector<ui32>> window2funcs; - TNodeMap<ui32> winFuncsId; auto ret = list; - VisitExpr(projectionLambda->TailPtr(), [&](const TExprNode::TPtr& node) { - if (node->IsCallable("PgWindowCall") || node->IsCallable("PgAggWindowCall")) { - YQL_ENSURE(window); - ui32 windowIndex; - if (node->Child(1)->IsCallable("PgAnonWindow")) { - windowIndex = FromString<ui32>(node->Child(1)->Head().Content()); - } else { - auto name = node->Child(1)->Content(); - bool found = false; - for (ui32 index = 0; index < window->Tail().ChildrenSize(); ++index) { - if (window->Tail().Child(index)->Head().Content() == name) { - windowIndex = index; - found = true; - break; - } - } + auto listTypeNode = ctx.Builder(pos) + .Callable("TypeOf") + .Add(0, list) + .Seal() + .Build(); - YQL_ENSURE(found); - } + for (const auto& x : winCtx.Window2funcs) { + auto winDef = window->Tail().Child(x.first); + const auto& frameSettings = winDef->Tail(); - window2funcs[windowIndex].push_back(winFuncs.size()); - winFuncsId[node.Get()] = winFuncs.size(); - winFuncs.push_back({ node, projectionLambda->Head().HeadPtr() }); + TExprNode::TListType keys; + for (auto p : winDef->Child(2)->Children()) { + YQL_ENSURE(p->IsCallable("PgGroup")); + const auto& member = p->Tail().Tail(); + YQL_ENSURE(member.IsCallable("Member")); + keys.push_back(member.TailPtr()); } - return true; - }); - - if (!winFuncs.empty()) { - auto listTypeNode = ctx.Builder(pos) - .Callable("TypeOf") - .Add(0, list) - .Seal() - .Build(); - - for (const auto& x : window2funcs) { - auto win = window->Tail().Child(x.first); - const auto& frameSettings = win->Tail(); - - TExprNode::TListType keys; - for (auto p : win->Child(2)->Children()) { - YQL_ENSURE(p->IsCallable("PgGroup")); - const auto& member = p->Tail().Tail(); - YQL_ENSURE(member.IsCallable("Member")); - keys.push_back(member.TailPtr()); - } - - auto keysNode = ctx.NewList(pos, std::move(keys)); - auto sortNode = ctx.NewCallable(pos, "Void", {}); - TExprNode::TPtr keyLambda; - if (win->Child(3)->ChildrenSize() > 0) { - sortNode = BuildSortTraits(pos, *win->Child(3), ret, ctx); - keyLambda = sortNode->TailPtr(); - } else { - keyLambda = ctx.Builder(pos) - .Lambda() - .Param("row") - .Callable("Void") - .Seal() + auto keysNode = ctx.NewList(pos, std::move(keys)); + auto sortNode = ctx.NewCallable(pos, "Void", {}); + TExprNode::TPtr keyLambda; + if (winDef->Child(3)->ChildrenSize() > 0) { + sortNode = BuildSortTraits(pos, *winDef->Child(3), ret, ctx); + keyLambda = sortNode->TailPtr(); + } else { + keyLambda = ctx.Builder(pos) + .Lambda() + .Param("row") + .Callable("Void") .Seal() - .Build(); - } + .Seal() + .Build(); + } - TExprNode::TListType args; - // default frame - auto begin = ctx.NewCallable(pos, "Void", {}); - auto end = win->Child(3)->ChildrenSize() > 0 ? - ctx.NewCallable(pos, "Int32", { ctx.NewAtom(pos, "0") }) : - ctx.NewCallable(pos, "Void", {}); - if (HasSetting(frameSettings, "type")) { - std::tie(begin, end) = BuildFrame(pos, frameSettings, ctx); - } + TExprNode::TListType args; + // default frame + auto begin = ctx.NewCallable(pos, "Void", {}); + auto end = winDef->Child(3)->ChildrenSize() > 0 ? + ctx.NewCallable(pos, "Int32", { ctx.NewAtom(pos, "0") }) : + ctx.NewCallable(pos, "Void", {}); + if (HasSetting(frameSettings, "type")) { + std::tie(begin, end) = BuildFrame(pos, frameSettings, ctx); + } - args.push_back(ctx.Builder(pos) - .List() - .List(0) - .Atom(0, "begin") - .Add(1, begin) - .Seal() - .List(1) - .Atom(0, "end") - .Add(1, end) - .Seal() + args.push_back(ctx.Builder(pos) + .List() + .List(0) + .Atom(0, "begin") + .Add(1, begin) .Seal() - .Build()); + .List(1) + .Atom(0, "end") + .Add(1, end) + .Seal() + .Seal() + .Build()); - for (const auto& index : x.second) { - auto p = winFuncs[index]; - auto name = p.first->Head().Content(); - bool isAgg = p.first->IsCallable("PgAggWindowCall"); - TExprNode::TPtr value; - if (isAgg) { - value = BuildAggregationTraits(pos, true, "", p, listTypeNode, ctx); - } else { - if (name == "row_number") { - value = ctx.Builder(pos) - .Callable("RowNumber") - .Callable(0, "TypeOf") + for (const auto& index : x.second) { + auto p = winCtx.Funcs[index]; + auto name = p.first->Head().Content(); + bool isAgg = p.first->IsCallable("PgAggWindowCall"); + TExprNode::TPtr value; + if (isAgg) { + value = BuildAggregationTraits(pos, true, "", p, listTypeNode, ctx); + } else { + if (name == "row_number") { + value = ctx.Builder(pos) + .Callable("RowNumber") + .Callable(0, "TypeOf") + .Add(0, list) + .Seal() + .Seal() + .Build(); + } else if (name == "rank" || name == "dense_rank") { + value = ctx.Builder(pos) + .Callable((name == "rank") ? "Rank" : "DenseRank") + .Callable(0, "TypeOf") .Add(0, list) - .Seal() .Seal() - .Build(); - } else if (name == "rank" || name == "dense_rank") { - value = ctx.Builder(pos) - .Callable((name == "rank") ? "Rank" : "DenseRank") - .Callable(0, "TypeOf") - .Add(0, list) - .Seal() - .Add(1, keyLambda) - .List(2) - .List(0) - .Atom(0, "ansi") - .Seal() + .Add(1, keyLambda) + .List(2) + .List(0) + .Atom(0, "ansi") .Seal() .Seal() - .Build(); - } else if (name == "lead" || name == "lag") { - auto arg = ctx.NewArgument(pos, "row"); - auto arguments = ctx.NewArguments(pos, { arg }); - auto extractor = ctx.NewLambda(pos, std::move(arguments), - ctx.ReplaceNode(p.first->TailPtr(), *p.second, arg)); - - value = ctx.Builder(pos) - .Callable(name == "lead" ? "Lead" : "Lag") - .Callable(0, "TypeOf") - .Add(0, list) - .Seal() - .Add(1, extractor) + .Seal() + .Build(); + } else if (name == "lead" || name == "lag") { + auto arg = ctx.NewArgument(pos, "row"); + auto arguments = ctx.NewArguments(pos, { arg }); + auto extractor = ctx.NewLambda(pos, std::move(arguments), + ctx.ReplaceNode(p.first->TailPtr(), *p.second, arg)); + + value = ctx.Builder(pos) + .Callable(name == "lead" ? "Lead" : "Lag") + .Callable(0, "TypeOf") + .Add(0, list) .Seal() - .Build(); - } else { - ythrow yexception() << "Not supported function: " << name; - } + .Add(1, extractor) + .Seal() + .Build(); + } else { + ythrow yexception() << "Not supported function: " << name; } - - args.push_back(ctx.Builder(pos) - .List() - .Atom(0, "_yql_win_" + ToString(index)) - .Add(1, value) - .Seal() - .Build()); } - auto winOnRows = ctx.NewCallable(pos, "WinOnRows", std::move(args)); - - auto frames = ctx.Builder(pos) + args.push_back(ctx.Builder(pos) .List() - .Add(0, winOnRows) + .Atom(0, "_yql_win_" + ToString(index)) + .Add(1, value) .Seal() - .Build(); + .Build()); + } - ret = ctx.Builder(pos) - .Callable("CalcOverWindow") - .Add(0, ret) - .Add(1, keysNode) - .Add(2, sortNode) - .Add(3, frames) + auto winOnRows = ctx.NewCallable(pos, "WinOnRows", std::move(args)); + + auto frames = ctx.Builder(pos) + .List() + .Add(0, winOnRows) + .Seal() + .Build(); + + ret = ctx.Builder(pos) + .Callable("CalcOverWindow") + .Add(0, ret) + .Add(1, keysNode) + .Add(2, sortNode) + .Add(3, frames) + .Seal() + .Build(); + } + + auto status = OptimizeExpr(projectionLambda, projectionLambda, [&](const TExprNode::TPtr& node, TExprContext& ctx) -> TExprNode::TPtr { + auto it = winCtx.FuncsId.find(node.Get()); + if (it != winCtx.FuncsId.end()) { + auto ret = ctx.Builder(pos) + .Callable("Member") + .Add(0, projectionLambda->Head().HeadPtr()) + .Atom(1, "_yql_win_" + ToString(it->second)) .Seal() .Build(); - } - auto status = OptimizeExpr(projectionLambda, projectionLambda, [&](const TExprNode::TPtr& node, TExprContext& ctx) -> TExprNode::TPtr { - auto it = winFuncsId.find(node.Get()); - if (it != winFuncsId.end()) { - auto ret = ctx.Builder(pos) - .Callable("Member") - .Add(0, projectionLambda->Head().HeadPtr()) - .Atom(1, "_yql_win_" + ToString(it->second)) + if (node->Head().Content() == "row_number" || node->Head().Content() == "rank" || node->Head().Content() == "dense_rank") { + ret = ctx.Builder(node->Pos()) + .Callable("ToPg") + .Callable(0, "SafeCast") + .Add(0, ret) + .Atom(1, "Int64") + .Seal() .Seal() .Build(); - - if (node->Head().Content() == "row_number" || node->Head().Content() == "rank" || node->Head().Content() == "dense_rank") { - ret = ctx.Builder(node->Pos()) - .Callable("ToPg") - .Callable(0, "SafeCast") - .Add(0, ret) - .Atom(1, "Int64") - .Seal() - .Seal() - .Build(); - } - - return ret; } - return node; - }, ctx, TOptimizeExprSettings(optCtx.Types)); - - if (status.Level == IGraphTransformer::TStatus::Error) { - return nullptr; + return ret; } - } + return node; + }, ctx, TOptimizeExprSettings(optCtx.Types)); + + YQL_ENSURE(status.Level != IGraphTransformer::TStatus::Error); return ret; } @@ -2781,13 +2796,18 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct TExprNode::TPtr projectionLambda = BuildProjectionLambda(node->Pos(), result, subLinkId.Defined(), ctx); TVector<TString> inputAliases; TExprNode::TListType cleanedInputs; + TWindowsCtx winCtx; + if (window) { + GatherUsedWindows(window, projectionLambda, winCtx); + } + if (oneRow) { list = BuildOneRow(node->Pos(), ctx); inputAliases.push_back(""); cleanedInputs.push_back(list); } else { // extract all used columns - auto usedColumns = GatherUsedColumns(result, joinOps, filter, groupBy, having, extraSortColumns); + auto usedColumns = GatherUsedColumns(result, joinOps, filter, groupBy, having, extraSortColumns, window, winCtx); // fill index of input for each column FillInputIndices(from, finalExtTypes, usedColumns, optCtx); @@ -2847,7 +2867,9 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct list = BuildHaving(node->Pos(), list, having->TailPtr(), aggId, inputAliases, cleanedInputs, ctx, optCtx); } - list = BuildWindows(node->Pos(), list, window, projectionLambda, ctx, optCtx); + if (!winCtx.Funcs.empty()) { + list = BuildWindows(node->Pos(), list, window, winCtx, projectionLambda, ctx, optCtx); + } if (finalExtTypes) { projectionLambda = AddExtColumns(projectionLambda, finalExtTypes->TailPtr(), columnsItems, *subLinkId, ctx); |