diff options
author | vvvv <vvvv@ydb.tech> | 2022-08-09 09:35:14 +0300 |
---|---|---|
committer | vvvv <vvvv@ydb.tech> | 2022-08-09 09:35:14 +0300 |
commit | 5c535c9f77d8716dbd3ddeaada8e8a571fba79cc (patch) | |
tree | 1b57cdd40807c343cccd64f3042ad35fcc114763 | |
parent | aaf121f19f34bb7ed026b627f75a09e68723f89d (diff) | |
download | ydb-5c535c9f77d8716dbd3ddeaada8e8a571fba79cc.tar.gz |
support of input columns in ORDER BY
-rw-r--r-- | ydb/library/yql/core/common_opt/yql_co_pgselect.cpp | 123 | ||||
-rw-r--r-- | ydb/library/yql/core/common_opt/yql_co_simple1.cpp | 2 | ||||
-rw-r--r-- | ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp | 2 | ||||
-rw-r--r-- | ydb/library/yql/core/type_ann/type_ann_core.cpp | 41 | ||||
-rw-r--r-- | ydb/library/yql/core/type_ann/type_ann_pg.cpp | 206 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_opt_utils.cpp | 19 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_opt_utils.h | 1 |
7 files changed, 316 insertions, 78 deletions
diff --git a/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp b/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp index c518c59995..110a988127 100644 --- a/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp @@ -59,8 +59,8 @@ std::pair<TExprNode::TPtr, TExprNode::TPtr> SplitByPredicate(TPositionHandle pos .Build(); return { - ctx.NewCallable(pos, "Filter", { input, lambda }), - ctx.NewCallable(pos, "Filter", { input, inverseLambda }) + ctx.NewCallable(pos, "OrderedFilter", { input, lambda }), + ctx.NewCallable(pos, "OrderedFilter", { input, inverseLambda }) }; } @@ -148,7 +148,7 @@ TExprNode::TPtr JoinColumns(TPositionHandle pos, const TExprNode::TPtr& list1, c .Build(); return ctx.Builder(pos) - .Callable("Map") + .Callable("OrderedMap") .Add(0, join) .Lambda(1) .Param("row") @@ -660,7 +660,7 @@ TExprNode::TPtr BuildFilter(TPositionHandle pos, const TExprNode::TPtr& list, co } return ctx.Builder(pos) - .Callable("Filter") + .Callable("OrderedFilter") .Add(0, actualList) .Lambda(1) .Param("row") @@ -693,7 +693,7 @@ TExprNode::TPtr ExpandPositionalUnionAll(const TExprNode& node, const TVector<TC YQL_ENSURE(childColumnOrder.size() == targetColumnOrder->size()); child = ctx.Builder(child->Pos()) - .Callable("Map") + .Callable("OrderedMap") .Add(0, child) .Lambda(1) .Param("row") @@ -723,7 +723,7 @@ TExprNode::TPtr ExpandPositionalUnionAll(const TExprNode& node, const TVector<TC TExprNode::TPtr BuildValues(TPositionHandle pos, const TExprNode::TPtr& values, TExprContext& ctx) { return ctx.Builder(pos) - .Callable("Map") + .Callable("OrderedMap") .Add(0, values->ChildPtr(2)) .Lambda(1) .Param("row") @@ -780,7 +780,7 @@ void AddColumnsFromSublinks(const TNodeMap<ui32>& subLinks, TUsedColumns& column } TUsedColumns GatherUsedColumns(const TExprNode::TPtr& result, const TExprNode::TPtr& joinOps, - const TExprNode::TPtr& filter, const TExprNode::TPtr& having) { + const TExprNode::TPtr& filter, const TExprNode::TPtr& having, const TExprNode::TPtr& extraSortColumns) { TUsedColumns usedColumns; for (const auto& x : result->Tail().Children()) { AddColumnsFromType(x->Child(1)->GetTypeAnn(), usedColumns); @@ -811,6 +811,14 @@ TUsedColumns GatherUsedColumns(const TExprNode::TPtr& result, const TExprNode::T AddColumnsFromSublinks(subLinks, usedColumns); } + if (extraSortColumns) { + for (ui32 inputIndex = 0; inputIndex < extraSortColumns->Tail().ChildrenSize(); ++inputIndex) { + for (const auto& x : extraSortColumns->Tail().Child(inputIndex)->Children()) { + usedColumns.insert(std::make_pair(TString(x->Content()), std::make_pair(inputIndex, TString()))); + } + } + } + return usedColumns; } @@ -826,6 +834,10 @@ void FillInputIndices(const TExprNode::TPtr& from, const TExprNode::TPtr& finalE const auto& inputAlias = from->Tail().Child(inputIndex)->Child(1)->Content(); const auto& read = from->Tail().Child(inputIndex)->Head(); const auto& columns = from->Tail().Child(inputIndex)->Tail(); + if (x.second.first != Max<ui32>() && x.second.first != inputIndex) { + continue; + } + if (read.IsCallable("PgResolvedCall")) { Y_ENSURE(!inputAlias.empty()); Y_ENSURE(columns.ChildrenSize() == 0 || columns.ChildrenSize() == 1); @@ -855,6 +867,11 @@ void FillInputIndices(const TExprNode::TPtr& from, const TExprNode::TPtr& finalE } } + if (x.second.first != Max<ui32>()) { + foundColumn = true; + break; + } + if (foundColumn) { x.second.first = inputIndex; break; @@ -927,7 +944,7 @@ TExprNode::TListType BuildCleanedColumns(TPositionHandle pos, const TExprNode::T } auto cleaned = ctx.Builder(pos) - .Callable("Map") + .Callable("OrderedMap") .Add(0, list) .Lambda(1) .Param("row") @@ -964,7 +981,7 @@ TExprNode::TListType BuildCleanedColumns(TPositionHandle pos, const TExprNode::T TExprNode::TPtr BuildMinus(TPositionHandle pos, const TExprNode::TPtr& left, const TExprNode::TPtr& right, const TExprNode::TPtr& predicate, TExprContext& ctx) { return ctx.Builder(pos) - .Callable("Filter") + .Callable("OrderedFilter") .Add(0, left) .Lambda(1) .Param("x") @@ -1072,7 +1089,7 @@ TExprNode::TPtr BuildConstPredicateJoin(TPositionHandle pos, TStringBuf joinType TExprNode::TPtr BuildSingleInputPredicateJoin(TPositionHandle pos, TStringBuf joinType, const TExprNode::TPtr& predicate, const TExprNode::TPtr& left, const TExprNode::TPtr& right, TExprContext& ctx) { auto filteredLeft = ctx.Builder(pos) - .Callable("Filter") + .Callable("OrderedFilter") .Add(0, left) .Lambda(1) .Param("row") @@ -1094,7 +1111,7 @@ TExprNode::TPtr BuildSingleInputPredicateJoin(TPositionHandle pos, TStringBuf jo auto extraLeft = [&]() { return ctx.Builder(pos) - .Callable("Filter") + .Callable("OrderedFilter") .Add(0, left) .Lambda(1) .Param("row") @@ -1232,7 +1249,7 @@ TExprNode::TPtr BuildEquiJoin(TPositionHandle pos, TStringBuf joinType, const TE .Build(); return ctx.Builder(pos) - .Callable("Map") + .Callable("OrderedMap") .Add(0, join) .Lambda(1) .Param("row") @@ -1674,7 +1691,7 @@ TExprNode::TPtr BuildGroupByAndHaving(TPositionHandle pos, TExprNode::TPtr list, auto distinctLambda = ctx.NewLambda(pos, std::move(arguments), std::move(root)); list = ctx.Builder(pos) - .Callable("Map") + .Callable("OrderedMap") .Add(0, list) .Add(1, distinctLambda) .Seal() @@ -1758,7 +1775,7 @@ TExprNode::TPtr BuildGroupByAndHaving(TPositionHandle pos, TExprNode::TPtr list, } list = ctx.Builder(pos) - .Callable("Filter") + .Callable("OrderedFilter") .Add(0, list) .Lambda(1) .Param("row") @@ -2302,7 +2319,7 @@ TExprNode::TPtr AddExtColumns(const TExprNode::TPtr& lambda, const TExprNode::TP .Seal() .List(1) .Atom(0, "_yql_join_sublink_" + ToString(subLinkId) + "_") - .Callable(1, "SelectMembers") + .Callable(1, "FilterMembers") .Arg(0, "row") .List(1) .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder & { @@ -2328,6 +2345,66 @@ TExprNode::TPtr AddExtColumns(const TExprNode::TPtr& lambda, const TExprNode::TP .Build(); } +TExprNode::TPtr AddExtraSortColumns(const TExprNode::TPtr& lambda, const TExprNode::TPtr& extraSortColumns, TExprContext& ctx) { + return ctx.Builder(lambda->Pos()) + .Lambda() + .Param("row") + .Callable("FlattenMembers") + .List(0) + .Atom(0, "") + .Apply(1, lambda) + .With(0, "row") + .Seal() + .Seal() + .List(1) + .Atom(0, "") + .Callable(1, "FilterMembers") + .Arg(0, "row") + .List(1) + .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder & { + ui32 i = 0; + for (const auto& x : extraSortColumns->Children()) { + for (const auto& y : x->Children()) { + parent.Atom(i++, y->Content()); + } + } + + return parent; + }) + .Seal() + .Seal() + .Seal() + .Seal() + .Seal() + .Build(); +} + +TExprNode::TPtr RemoveExtraSortColumns(const TExprNode::TPtr& list, const TExprNode::TPtr& extraSortColumns, TExprContext& ctx) { + return ctx.Builder(list->Pos()) + .Callable("OrderedMap") + .Add(0, list) + .Lambda(1) + .Param("row") + .Callable("RemoveMembers") + .Arg(0, "row") + .List(1) + .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder & { + ui32 i = 0; + for (const auto& x : extraSortColumns->Children()) { + for (const auto& y : x->Children()) { + parent.Atom(i++, y->Content()); + } + } + + return parent; + }) + .Seal() + .Seal() + .Seal() + .Seal() + .Build(); +} + TExprNode::TPtr JoinOuter(TPositionHandle pos, TExprNode::TPtr list, const TExprNode::TPtr& finalExtTypes, const TExprNode::TListType& outerInputs, const TVector<TString>& outerInputAliases, @@ -2415,6 +2492,7 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct auto distinctAll = GetSetting(setItem->Tail(), "distinct_all"); auto distinctOn = GetSetting(setItem->Tail(), "distinct_on"); auto sort = GetSetting(setItem->Tail(), "sort"); + auto extraSortColumns = GetSetting(setItem->Tail(), "extra_sort_columns"); bool oneRow = !from; TExprNode::TPtr list; if (values) { @@ -2431,7 +2509,7 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct cleanedInputs.push_back(list); } else { // extract all used columns - auto usedColumns = GatherUsedColumns(result, joinOps, filter, having); + auto usedColumns = GatherUsedColumns(result, joinOps, filter, having, extraSortColumns); // fill index of input for each column FillInputIndices(from, finalExtTypes, usedColumns, optCtx); @@ -2469,22 +2547,33 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct projectionLambda = AddExtColumns(projectionLambda, finalExtTypes->TailPtr(), columnsItems, *subLinkId, ctx); } + if (extraSortColumns) { + YQL_ENSURE(!distinctAll && !distinctOn); + projectionLambda = AddExtraSortColumns(projectionLambda, extraSortColumns->TailPtr(), ctx); + } + list = ctx.Builder(node->Pos()) - .Callable("Map") + .Callable("OrderedMap") .Add(0, list) .Add(1, projectionLambda) .Seal() .Build(); if (distinctAll) { + YQL_ENSURE(!extraSortColumns); list = ctx.NewCallable(node->Pos(), "SqlAggregateAll", { list }); } else if (distinctOn) { + YQL_ENSURE(!extraSortColumns); list = BuildDistinctOn(node->Pos(), list, distinctOn->TailPtr(), sort, ctx); } if (sort) { list = BuildSort(node->Pos(), sort, list, ctx); } + + if (extraSortColumns) { + list = RemoveExtraSortColumns(list, extraSortColumns->TailPtr(), ctx); + } } setItemNodes.push_back(list); diff --git a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp index fcb0b17d8e..e421d2ebcd 100644 --- a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp @@ -4108,6 +4108,8 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { map["RemoveMember"] = std::bind(&ExpandRemoveMember, _1, _2); map["ForceRemoveMember"] = std::bind(&ExpandRemoveMember, _1, _2); + map["RemoveMembers"] = std::bind(&ExpandRemoveMembers, _1, _2); + map["ForceRemoveMembers"] = std::bind(&ExpandRemoveMembers, _1, _2); map["FlattenMembers"] = std::bind(&ExpandFlattenMembers, _1, _2); map["FlattenStructs"] = std::bind(&ExpandFlattenStructs, _1, _2); map["SelectMembers"] = std::bind(&ExpandSelectMembers<true>, _1, _2); diff --git a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp index 4f67f210c3..57b58419ae 100644 --- a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp +++ b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp @@ -5880,6 +5880,8 @@ struct TPeepHoleRules { {"AddMember", &ExpandAddMember}, {"ReplaceMember", &ExpandReplaceMember}, {"RemoveMember", &ExpandRemoveMember}, + {"RemoveMembers", &ExpandRemoveMembers}, + {"ForceRemoveMembers", &ExpandRemoveMembers}, {"RemovePrefixMembers", &ExpandRemovePrefixMembers}, {"AsSet", &ExpandAsSet}, {"ForceRemoveMember", &ExpandRemoveMember}, diff --git a/ydb/library/yql/core/type_ann/type_ann_core.cpp b/ydb/library/yql/core/type_ann/type_ann_core.cpp index 37163ef819..80a77f5f82 100644 --- a/ydb/library/yql/core/type_ann/type_ann_core.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_core.cpp @@ -1717,6 +1717,45 @@ namespace NTypeAnnImpl { return IGraphTransformer::TStatus::Ok; } + template <bool Forced> + IGraphTransformer::TStatus RemoveMembersWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { + Y_UNUSED(output); + if (!EnsureArgsCount(*input, 2, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + if (!EnsureStructType(input->Head(), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + if (!EnsureTuple(input->Tail(), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + auto structType = input->Head().GetTypeAnn()->Cast<TStructExprType>(); + TVector<const TItemExprType*> newItems = structType->GetItems(); + + for (const auto& child : input->Tail().Children()) { + if (!EnsureAtom(*child, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + auto memberName = child->Content(); + EraseIf(newItems, [&](const auto& item) { return item->GetName() == memberName; }); + + if (!Forced && !FindOrReportMissingMember(memberName, input->Pos(), *structType, ctx)) { + return IGraphTransformer::TStatus::Error; + } + } + + input->SetTypeAnn(ctx.Expr.MakeType<TStructExprType>(newItems)); + if (!input->GetTypeAnn()->Cast<TStructExprType>()->Validate(input->Pos(), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + return IGraphTransformer::TStatus::Ok; + } + IGraphTransformer::TStatus RemovePrefixMembersWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { if (!EnsureArgsCount(*input, 2, ctx.Expr)) { return IGraphTransformer::TStatus::Error; @@ -10984,6 +11023,8 @@ template <NKikimr::NUdf::EDataSlot DataSlot> Functions["FlattenMembers"] = &FlattenMembersWrapper; Functions["SelectMembers"] = &SelectMembersWrapper<true>; Functions["FilterMembers"] = &SelectMembersWrapper<false>; + Functions["RemoveMembers"] = &RemoveMembersWrapper<false>; + Functions["ForceRemoveMembers"] = &RemoveMembersWrapper<true>; Functions["DivePrefixMembers"] = &DivePrefixMembersWrapper; Functions["FlattenByColumns"] = &FlattenByColumns; Functions["ExtractMembers"] = &ExtractMembersWrapper; diff --git a/ydb/library/yql/core/type_ann/type_ann_pg.cpp b/ydb/library/yql/core/type_ann/type_ann_pg.cpp index 214a2f9d6a..37e18d5550 100644 --- a/ydb/library/yql/core/type_ann/type_ann_pg.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_pg.cpp @@ -1360,10 +1360,16 @@ IGraphTransformer::TStatus PgTypeWrapper(const TExprNode::TPtr& input, TExprNode } struct TInput { + enum EInputPriority { + External, + Current, + Projection + }; + TString Alias; const TStructExprType* Type = nullptr; TMaybe<TColumnOrder> Order; - bool External = false; + EInputPriority Priority = External; TSet<TString> UsedExternalColumns; }; @@ -1382,7 +1388,7 @@ void ScanSublinks(TExprNode::TPtr root, TNodeSet& sublinks) { bool ScanColumns(TExprNode::TPtr root, TInputs& inputs, const THashSet<TString>& possibleAliases, bool* hasStar, bool& hasColumnRef, THashSet<TString>& refs, THashMap<TString, THashSet<TString>>* qualifiedRefs, - TExtContext& ctx) { + TExtContext& ctx, bool scanColumnsOnly, THashMap<ui32, TSet<TString>>* nonProjectionColumns = nullptr) { bool isError = false; VisitExpr(root, [&](const TExprNode::TPtr& node) { if (node->IsCallable("PgSubLink")) { @@ -1439,9 +1445,10 @@ bool ScanColumns(TExprNode::TPtr root, TInputs& inputs, const THashSet<TString>& return false; } - for (bool external : { false, true }) { - for (auto& x : inputs) { - if (external != x.External) { + for (ui32 priority : {TInput::Projection, TInput::Current, TInput::External}) { + for (ui32 inputIndex = 0; inputIndex < inputs.size(); ++inputIndex) { + auto& x = inputs[inputIndex]; + if (priority != x.Priority) { continue; } @@ -1452,9 +1459,13 @@ bool ScanColumns(TExprNode::TPtr root, TInputs& inputs, const THashSet<TString>& for (const auto& item : x.Type->GetItems()) { if (!item->GetName().StartsWith("_yql_")) { (*qualifiedRefs)[alias].insert(TString(item->GetName())); - if (x.External) { + if (x.Priority == TInput::External) { x.UsedExternalColumns.insert(TString(item->GetName())); } + + if (x.Priority != TInput::Projection && nonProjectionColumns) { + (*nonProjectionColumns)[inputIndex].insert(MakeAliasedColumn(x.Alias, item->GetName())); + } } } @@ -1482,10 +1493,11 @@ bool ScanColumns(TExprNode::TPtr root, TInputs& inputs, const THashSet<TString>& } TString foundAlias; - for (bool external : { false, true }) { + for (ui32 priority : {TInput::Projection, TInput::Current, TInput::External}) { ui32 matches = 0; - for (auto& x : inputs) { - if (external != x.External) { + for (ui32 inputIndex = 0; inputIndex < inputs.size(); ++inputIndex) { + auto& x = inputs[inputIndex]; + if (priority != x.Priority) { continue; } @@ -1506,9 +1518,13 @@ bool ScanColumns(TExprNode::TPtr root, TInputs& inputs, const THashSet<TString>& return false; } - if (x.External) { + if (x.Priority == TInput::External) { x.UsedExternalColumns.insert(TString(node->Tail().Content())); } + + if (x.Priority != TInput::Projection && nonProjectionColumns) { + (*nonProjectionColumns)[inputIndex].insert(MakeAliasedColumn(x.Alias, node->Tail().Content())); + } } } @@ -1516,7 +1532,12 @@ bool ScanColumns(TExprNode::TPtr root, TInputs& inputs, const THashSet<TString>& break; } - if (!matches && external) { + if (!matches && priority == TInput::External) { + if (scanColumnsOnly) { + // projection columns aren't available yet + return true; + } + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(node->Pos()), TStringBuilder() << "No such column: " << node->Tail().Content())); isError = true; @@ -1539,7 +1560,7 @@ bool ScanColumns(TExprNode::TPtr root, TInputs& inputs, const THashSet<TString>& bool ScanColumnsForSublinks(bool& needRebuildSubLinks, const TNodeSet& sublinks, TInputs& inputs, const THashSet<TString>& possibleAliases, bool& hasColumnRef, THashSet<TString>& refs, - THashMap<TString, THashSet<TString>>* qualifiedRefs, TExtContext& ctx) { + THashMap<TString, THashSet<TString>>* qualifiedRefs, TExtContext& ctx, bool scanColumnsOnly) { needRebuildSubLinks = false; for (const auto& s : sublinks) { if (s->Child(1)->IsCallable("Void")) { @@ -1550,7 +1571,7 @@ bool ScanColumnsForSublinks(bool& needRebuildSubLinks, const TNodeSet& sublinks, if (!testRowLambda.IsCallable("Void")) { YQL_ENSURE(testRowLambda.IsLambda()); if (!ScanColumns(testRowLambda.TailPtr(), inputs, possibleAliases, nullptr, hasColumnRef, - refs, qualifiedRefs, ctx)) { + refs, qualifiedRefs, ctx, scanColumnsOnly)) { return false; } } @@ -1661,14 +1682,14 @@ void AddColumns(const TInputs& inputs, const bool* hasStar, const THashSet<TStri TVector<const TItemExprType*>& items, TExprContext& ctx) { THashSet<TString> usedRefs; THashSet<TString> usedAliases; - for (bool external : { false, true }) { + for (ui32 priority : { TInput::Projection, TInput::Current, TInput::External }) { for (const auto& x : inputs) { - if (external != x.External) { + if (priority != x.Priority) { continue; } if (hasStar && *hasStar) { - if (x.External) { + if (x.Priority == TInput::External) { continue; } @@ -1721,7 +1742,7 @@ IGraphTransformer::TStatus RebuildLambdaColumns(const TExprNode::TPtr& root, con TExprNode::TPtr& newRoot, const TInputs& inputs, TExprNode::TPtr* expandedColumns, TExtContext& ctx) { bool hasExternalInput = false; for (const auto& i : inputs) { - if (i.External) { + if (i.Priority == TInput::External) { hasExternalInput = true; break; } @@ -1739,13 +1760,13 @@ IGraphTransformer::TStatus RebuildLambdaColumns(const TExprNode::TPtr& root, con return OptimizeExpr(root, newRoot, [&](const TExprNode::TPtr& node, TExprContext&) -> TExprNode::TPtr { if (node->IsCallable("PgStar")) { TExprNode::TListType orderAtoms; - for (bool external : { false, true }) { + for (ui32 priority : { TInput::Projection, TInput::Current, TInput::External }) { for (const auto& x : inputs) { - if (external != x.External) { + if (priority != x.Priority) { continue; } - if (x.External) { + if (x.Priority == TInput::External) { continue; } @@ -1778,9 +1799,9 @@ IGraphTransformer::TStatus RebuildLambdaColumns(const TExprNode::TPtr& root, con } if (node->IsCallable("PgColumnRef")) { - for (bool external : { false, true }) { + for (ui32 priority : { TInput::Projection, TInput::Current, TInput::External }) { for (const auto& x : inputs) { - if (external != x.External) { + if (priority != x.Priority) { continue; } @@ -1807,9 +1828,9 @@ IGraphTransformer::TStatus RebuildLambdaColumns(const TExprNode::TPtr& root, con if (node->IsCallable("PgQualifiedStar")) { TExprNode::TListType members; - for (bool external : { false, true }) { + for (ui32 priority : { TInput::Projection, TInput::Current, TInput::External }) { for (const auto& x : inputs) { - if (external != x.External) { + if (priority != x.Priority) { continue; } @@ -1828,11 +1849,11 @@ IGraphTransformer::TStatus RebuildLambdaColumns(const TExprNode::TPtr& root, con members.push_back(ctx.Expr.Builder(node->Pos()) .List() - .Atom(0, NTypeAnnImpl::MakeAliasedColumn(hasExternalInput ? x.Alias : "", item->GetName())) - .Callable(1, "Member") - .Add(0, argNode) - .Atom(1, MakeAliasedColumn(x.Alias, item->GetName())) - .Seal() + .Atom(0, NTypeAnnImpl::MakeAliasedColumn(hasExternalInput ? x.Alias : "", item->GetName())) + .Callable(1, "Member") + .Add(0, argNode) + .Atom(1, MakeAliasedColumn(x.Alias, item->GetName())) + .Seal() .Seal() .Build()); } @@ -1932,7 +1953,7 @@ void MakeOptionalColumns(const TStructExprType*& structType, TExprContext& ctx) } bool ValidateGroups(TInputs& inputs, const THashSet<TString>& possibleAliases, - const TExprNode& data, TExtContext& ctx, TExprNode::TListType& newGroups) { + const TExprNode& data, TExtContext& ctx, TExprNode::TListType& newGroups, bool scanColumnsOnly) { newGroups.clear(); bool hasColumnRef = false; for (const auto& group : data.Children()) { @@ -1947,10 +1968,14 @@ bool ValidateGroups(TInputs& inputs, const THashSet<TString>& possibleAliases, if (group->Child(0)->IsCallable("Void")) { // no effective type yet, scan lambda body if (!ScanColumns(group->Tail().TailPtr(), inputs, possibleAliases, nullptr, hasColumnRef, - refs, &qualifiedRefs, ctx)) { + refs, &qualifiedRefs, ctx, scanColumnsOnly)) { return false; } + if (scanColumnsOnly) { + continue; + } + TVector<const TItemExprType*> items; AddColumns(inputs, nullptr, refs, &qualifiedRefs, items, ctx.Expr); auto effectiveType = ctx.Expr.MakeType<TStructExprType>(items); @@ -1982,17 +2007,22 @@ bool ValidateGroups(TInputs& inputs, const THashSet<TString>& possibleAliases, } bool ValidateSort(TInputs& inputs, const THashSet<TString>& possibleAliases, - const TExprNode& data, TExtContext& ctx, TExprNode::TListType& newSorts) { + const TExprNode& data, TExtContext& ctx, TExprNode::TListType& newSorts, bool scanColumnsOnly, + THashMap<ui32, TSet<TString>>* nonProjectionColumns = nullptr) { newSorts.clear(); for (auto oneSort : data.Children()) { bool hasColumnRef; THashSet<TString> refs; THashMap<TString, THashSet<TString>> qualifiedRefs; if (!ScanColumns(oneSort->Child(1)->TailPtr(), inputs, possibleAliases, nullptr, hasColumnRef, - refs, &qualifiedRefs, ctx)) { + refs, &qualifiedRefs, ctx, scanColumnsOnly, nonProjectionColumns)) { return false; } + if (scanColumnsOnly) { + continue; + } + TVector<const TItemExprType*> items; AddColumns(inputs, nullptr, refs, &qualifiedRefs, items, ctx.Expr); auto effectiveType = ctx.Expr.MakeType<TStructExprType>(items); @@ -2044,10 +2074,11 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN bool hasExtTypes = false; bool hasDistinctAll = false; bool hasDistinctOn = false; + bool hasExtraSortColumns = false; // pass 0 - from/values // pass 1 - join - // pass 2 - ext_types/final_ext_types + // pass 2 - ext_types/final_ext_types, extra_sort_solumns // pass 3 - where, group_by // pass 4 - window // pass 5 - result @@ -2092,7 +2123,7 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN for (const auto& x : data.Children()) { auto alias = x->Head().Content(); auto type = x->Tail().GetTypeAnn()->Cast<TTypeExprType>()->GetType()->Cast<TStructExprType>(); - joinInputs.push_back(TInput{ TString(alias), type, Nothing(), true, {} }); + joinInputs.push_back(TInput{ TString(alias), type, Nothing(), TInput::External, {} }); if (!alias.empty()) { possibleAliases.insert(TString(alias)); } @@ -2173,13 +2204,13 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN ScanSublinks(column->Tail().TailPtr(), sublinks); if (!ScanColumns(column->Tail().TailPtr(), joinInputs, possibleAliases, &hasStar, hasColumnRef, - refs, &qualifiedRefs, ctx)) { + refs, &qualifiedRefs, ctx, scanColumnsOnly)) { return IGraphTransformer::TStatus::Error; } bool needRebuildSubLinks; if (!ScanColumnsForSublinks(needRebuildSubLinks, sublinks, joinInputs, possibleAliases, - hasColumnRef, refs, &qualifiedRefs, ctx)) { + hasColumnRef, refs, &qualifiedRefs, ctx, scanColumnsOnly)) { return IGraphTransformer::TStatus::Error; } @@ -2377,10 +2408,10 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN return IGraphTransformer::TStatus::Error; } - inputs.push_back(TInput{ alias, newStructType, newOrder, false, {} }); + inputs.push_back(TInput{ alias, newStructType, newOrder, TInput::Current, {} }); } else { - inputs.push_back(TInput{ alias, inputStructType, columnOrder, false, {} }); + inputs.push_back(TInput{ alias, inputStructType, columnOrder, TInput::Current, {} }); } } } @@ -2408,13 +2439,13 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN THashSet<TString> refs; THashMap<TString, THashSet<TString>> qualifiedRefs; if (!ScanColumns(data.Child(1)->TailPtr(), joinInputs, possibleAliases, nullptr, hasColumnRef, - refs, &qualifiedRefs, ctx)) { + refs, &qualifiedRefs, ctx, scanColumnsOnly)) { return IGraphTransformer::TStatus::Error; } bool needRebuildSubLinks; if (!ScanColumnsForSublinks(needRebuildSubLinks, sublinks, joinInputs, possibleAliases, hasColumnRef, - refs, &qualifiedRefs, ctx)) { + refs, &qualifiedRefs, ctx, scanColumnsOnly)) { return IGraphTransformer::TStatus::Error; } @@ -2620,7 +2651,7 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN THashSet<TString> refs; THashMap<TString, THashSet<TString>> qualifiedRefs; if (!ScanColumns(quals.Child(1)->TailPtr(), groupInputs, groupPossibleAliases, nullptr, hasColumnRef, - refs, &qualifiedRefs, ctx)) { + refs, &qualifiedRefs, ctx, scanColumnsOnly)) { return IGraphTransformer::TStatus::Error; } @@ -2693,7 +2724,7 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN } TExprNode::TListType newGroups; - if (!ValidateGroups(joinInputs, possibleAliases, data, ctx, newGroups)) { + if (!ValidateGroups(joinInputs, possibleAliases, data, ctx, newGroups, scanColumnsOnly)) { return IGraphTransformer::TStatus::Error; } @@ -2761,7 +2792,7 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN auto newChildren = x->ChildrenList(); if (needRebuildPartition) { TExprNode::TListType newGroups; - if (!ValidateGroups(joinInputs, possibleAliases, *partitions, ctx, newGroups)) { + if (!ValidateGroups(joinInputs, possibleAliases, *partitions, ctx, newGroups, scanColumnsOnly)) { return IGraphTransformer::TStatus::Error; } @@ -2770,7 +2801,7 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN if (needRebuildSort) { TExprNode::TListType newSorts; - if (!ValidateSort(joinInputs, possibleAliases, *sort, ctx, newSorts)) { + if (!ValidateSort(joinInputs, possibleAliases, *sort, ctx, newSorts, scanColumnsOnly)) { return IGraphTransformer::TStatus::Error; } @@ -2817,8 +2848,8 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN TExprNode::TListType newGroups; TInputs projectionInputs; - projectionInputs.push_back(TInput{ "", outputRowType, Nothing(), false, {} }); - if (!ValidateGroups(projectionInputs, {}, data, ctx, newGroups)) { + projectionInputs.push_back(TInput{ "", outputRowType, Nothing(), TInput::Projection, {} }); + if (!ValidateGroups(projectionInputs, {}, data, ctx, newGroups, scanColumnsOnly)) { return IGraphTransformer::TStatus::Error; } @@ -2828,12 +2859,14 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings)); return IGraphTransformer::TStatus::Repeat; } - } else if (optionName == "sort") { + } + else if (optionName == "sort") { if (pass != 7) { continue; } - if (scanColumnsOnly) { + if ((hasDistinctAll || hasDistinctOn) && scanColumnsOnly) { + // for SELECT DISTINCT, ORDER BY expressions must appear in select list continue; } @@ -2852,19 +2885,65 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN } } - TInputs projectionInputs; - projectionInputs.push_back(TInput{ "", outputRowType, Nothing(), false, {} }); + TInputs projectionInputs = joinInputs; + // all row columns are visible too, but projection's columns have more priority + if (!scanColumnsOnly) { + projectionInputs.push_back(TInput{ "", outputRowType, Nothing(), TInput::Projection, {} }); + } + if (data.ChildrenSize() > 0 && data.Child(0)->Child(0)->IsCallable("Void")) { TExprNode::TListType newSortTupleItems; - // no effective types yet, scan lambda bodies
- if (!ValidateSort(projectionInputs, {}, data, ctx, newSortTupleItems)) {
+ // no effective types yet, scan lambda bodies + THashMap<ui32, TSet<TString>> extraSortColumns;
+ if (!ValidateSort(projectionInputs, possibleAliases, data, ctx, newSortTupleItems, scanColumnsOnly, &extraSortColumns)) {
return IGraphTransformer::TStatus::Error;
}
+ + if (!scanColumnsOnly) { + auto newSortTuple = ctx.Expr.NewList(data.Pos(), std::move(newSortTupleItems));
+ auto newSettings = ReplaceSetting(options, {}, "sort", newSortTuple, ctx.Expr); + if (!extraSortColumns.empty()) { + TExprNode::TListType groups; + for (ui32 i = 0; i < joinInputs.size(); ++i) { + TExprNode::TListType columns; + auto it = extraSortColumns.find(i); + if (it != extraSortColumns.end()) { + for (const auto& x : it->second) { + columns.push_back(ctx.Expr.NewAtom(data.Pos(), x)); + } + } + + auto columnsList = ctx.Expr.NewList(data.Pos(), std::move(columns)); + groups.push_back(columnsList); + } - auto newSortTuple = ctx.Expr.NewList(data.Pos(), std::move(newSortTupleItems));
- auto newSettings = ReplaceSetting(options, {}, "sort", newSortTuple, ctx.Expr);
- output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings));
- return IGraphTransformer::TStatus::Repeat; + newSettings = AddSetting(*newSettings, {}, "extra_sort_columns", ctx.Expr.NewList(data.Pos(), std::move(groups)), ctx.Expr);
+ } +
+ output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings));
+ return IGraphTransformer::TStatus::Repeat; + } + } + } else if (optionName == "extra_sort_columns") { + if (pass != 2) { + continue; + } + + hasExtraSortColumns = true; + if (!EnsureTupleSize(*option, 2, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + for (const auto& x : option->Tail().Children()) { + if (!EnsureTupleSize(*x, joinInputs.size(), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + for (const auto& y : x->Children()) { + if (!EnsureAtom(*y, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + } } } else { ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(option->Head().Pos()), @@ -2889,13 +2968,18 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN return IGraphTransformer::TStatus::Error; } + if ((hasDistinctAll || hasDistinctOn) && hasExtraSortColumns) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), "for SELECT DISTINCT, ORDER BY expressions must appear in select list")); + return IGraphTransformer::TStatus::Error; + } + auto extTypes = GetSetting(options, "ext_types"); if (extTypes && scanColumnsOnly) { const auto& data = extTypes->Tail(); bool needRebuild = false; for (ui32 i = joinInputs.size() - data.ChildrenSize(), j = 0; i < joinInputs.size(); ++i, ++j) { const auto& x = joinInputs[i]; - YQL_ENSURE(x.External); + YQL_ENSURE(x.Priority == TInput::External); for (const auto& t : data.Child(j)->Tail().GetTypeAnn()->Cast<TTypeExprType>()-> GetType()->Cast<TStructExprType>()->GetItems()) { if (!x.UsedExternalColumns.contains(t->GetName())) { @@ -2914,7 +2998,7 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN TExprNode::TListType aliases; for (ui32 i = joinInputs.size() - data.ChildrenSize(), j = 0; i < joinInputs.size(); ++i, ++j) { const auto& x = joinInputs[i]; - YQL_ENSURE(x.External); + YQL_ENSURE(x.Priority == TInput::External); const auto child = data.Child(j); TVector<const TItemExprType*> items; @@ -3133,12 +3217,12 @@ IGraphTransformer::TStatus PgSelectWrapper(const TExprNode::TPtr& input, TExprNo YQL_ENSURE(option); const auto& data = option->Tail(); TInputs projectionInputs; - projectionInputs.push_back(TInput{ TString(), resultStructType, resultColumnOrder, false, {} }); + projectionInputs.push_back(TInput{ TString(), resultStructType, resultColumnOrder, TInput::Projection, {} }); TExprNode::TListType newSortTupleItems; if (data.ChildrenSize() > 0 && data.Child(0)->Child(0)->IsCallable("Void")) { // no effective types yet, scan lambda bodies - if (!ValidateSort(projectionInputs, {}, data, ctx, newSortTupleItems)) { + if (!ValidateSort(projectionInputs, {}, data, ctx, newSortTupleItems, false)) { return IGraphTransformer::TStatus::Error; } diff --git a/ydb/library/yql/core/yql_opt_utils.cpp b/ydb/library/yql/core/yql_opt_utils.cpp index 9d88389d12..642a4a996f 100644 --- a/ydb/library/yql/core/yql_opt_utils.cpp +++ b/ydb/library/yql/core/yql_opt_utils.cpp @@ -579,6 +579,25 @@ TExprNode::TPtr ExpandRemoveMember(const TExprNode::TPtr& node, TExprContext& ct return ctx.NewCallable(node->Pos(), "AsStruct", std::move(members)); } +TExprNode::TPtr ExpandRemoveMembers(const TExprNode::TPtr& node, TExprContext& ctx) { + const auto& membersToRemove = node->Child(1); + MemberUpdaterFunc removeFunc = [&membersToRemove](TString& memberName, const TTypeAnnotationNode*) { + for (const auto& x : membersToRemove->Children()) { + if (memberName == x->Content()) { + return false; + } + } + + return true; + }; + + TExprNode::TListType members; + if (!UpdateStructMembers(ctx, node->ChildPtr(0), node->Content(), members, removeFunc)) { + return node->ChildPtr(0); + } + return ctx.NewCallable(node->Pos(), "AsStruct", std::move(members)); +} + TExprNode::TPtr ExpandRemovePrefixMembers(const TExprNode::TPtr& node, TExprContext& ctx) { YQL_CLOG(DEBUG, Core) << "Expand " << node->Content(); diff --git a/ydb/library/yql/core/yql_opt_utils.h b/ydb/library/yql/core/yql_opt_utils.h index 9f5d41a1a8..9ed7d1483f 100644 --- a/ydb/library/yql/core/yql_opt_utils.h +++ b/ydb/library/yql/core/yql_opt_utils.h @@ -57,6 +57,7 @@ bool UpdateStructMembers(TExprContext& ctx, const TExprNode::TPtr& node, const T TExprNode::TPtr MakeSingleGroupRow(const TExprNode& aggregateNode, TExprNode::TPtr reduced, TExprContext& ctx); TExprNode::TPtr ExpandRemoveMember(const TExprNode::TPtr& node, TExprContext& ctx); +TExprNode::TPtr ExpandRemoveMembers(const TExprNode::TPtr& node, TExprContext& ctx); TExprNode::TPtr ExpandRemovePrefixMembers(const TExprNode::TPtr& node, TExprContext& ctx); TExprNode::TPtr ExpandFlattenMembers(const TExprNode::TPtr& node, TExprContext& ctx); TExprNode::TPtr ExpandFlattenStructs(const TExprNode::TPtr& node, TExprContext& ctx); |