diff options
author | vvvv <vvvv@ydb.tech> | 2022-08-29 15:36:53 +0300 |
---|---|---|
committer | vvvv <vvvv@ydb.tech> | 2022-08-29 15:36:53 +0300 |
commit | e8bb58c9a3be843f6df45c9da9b59f5bbd97e79d (patch) | |
tree | 2433478fa5e404a81e1545e0c1f8190b2bc1f5a5 | |
parent | e886456269bc4bfef5c03779311e1fbb7a922518 (diff) | |
download | ydb-e8bb58c9a3be843f6df45c9da9b59f5bbd97e79d.tar.gz |
initial implementation of aggregation by expressions
-rw-r--r-- | ydb/library/yql/core/common_opt/yql_co_pgselect.cpp | 105 | ||||
-rw-r--r-- | ydb/library/yql/core/common_opt/yql_co_pgselect.h | 2 | ||||
-rw-r--r-- | ydb/library/yql/core/common_opt/yql_co_simple2.cpp | 3 | ||||
-rw-r--r-- | ydb/library/yql/core/type_ann/type_ann_core.cpp | 1 | ||||
-rw-r--r-- | ydb/library/yql/core/type_ann/type_ann_pg.cpp | 851 | ||||
-rw-r--r-- | ydb/library/yql/core/type_ann/type_ann_pg.h | 1 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_expr_csee.cpp | 12 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_expr_csee.h | 13 | ||||
-rw-r--r-- | ydb/library/yql/sql/pg/pg_sql.cpp | 40 |
9 files changed, 743 insertions, 285 deletions
diff --git a/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp b/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp index 9415bdf58ee..a523537f962 100644 --- a/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp @@ -1785,13 +1785,48 @@ TExprNode::TPtr BuildGroup(TPositionHandle pos, TExprNode::TPtr list, } } - if (groupBy) { - for (const auto& group : groupBy->Tail().Children()) { + if (groupBy->Tail().ChildrenSize()) { + auto arg = ctx.NewArgument(pos, "row"); + auto arguments = ctx.NewArguments(pos, { arg }); + + TExprNode::TListType newColumns; + for (ui32 i = 0; i < groupBy->Tail().ChildrenSize(); ++i) { + const auto& group = groupBy->Tail().Child(i); const auto& lambda = group->Tail(); - YQL_ENSURE(lambda.IsLambda()); - YQL_ENSURE(lambda.Tail().IsCallable("Member")); - keysItems.push_back(lambda.Tail().TailPtr()); + auto name = "_yql_agg_key_" + ToString(i); + keysItems.push_back(ctx.NewAtom(pos, name)); + newColumns.push_back(ctx.Builder(pos) + .List() + .Atom(0, name) + .Apply(1, lambda) + .With(0, arg) + .Seal() + .Seal() + .Build()); } + + auto newColumnsNode = ctx.NewCallable(pos, "AsStruct", std::move(newColumns)); + auto root = ctx.Builder(pos) + .Callable("FlattenMembers") + .List(0) + .Atom(0, "") + .Add(1, arg) + .Seal() + .List(1) + .Atom(0, "") + .Add(1, newColumnsNode) + .Seal() + .Seal() + .Build(); + + auto keyExprsLambda = ctx.NewLambda(pos, std::move(arguments), std::move(root)); + + list = ctx.Builder(pos) + .Callable("OrderedMap") + .Add(0, list) + .Add(1, keyExprsLambda) + .Seal() + .Build(); } auto keys = ctx.NewList(pos, std::move(keysItems)); @@ -2383,33 +2418,20 @@ TExprNode::TPtr AddExtColumns(const TExprNode::TPtr& lambda, const TExprNode::TP .Build(); } -void BuildExtraSortColumns(bool hasAggregation, const TExprNode::TPtr& groupBy, const TExprNode::TPtr& extraSortColumns, size_t aggIndexBegin, size_t aggIndexEnd, TVector<TString>& list) { - if (extraSortColumns) { - TVector<TString> extra; +void BuildExtraSortColumns(const TExprNode::TPtr& groupBy, + const TExprNode::TPtr& extraSortColumns, const TExprNode::TPtr& extraSortKeys, + size_t aggIndexBegin, size_t aggIndexEnd, TVector<TString>& list) { + if (extraSortColumns && !groupBy) { for (const auto& x : extraSortColumns->Tail().Children()) { for (const auto& y : x->Children()) { - extra.push_back(TString(y->Content())); + list.push_back(TString(y->Content())); } } + } - if (!hasAggregation) { - // all extra columns - list.insert(list.end(), extra.begin(), extra.end()); - } else if (groupBy) { - // keep only keys - THashSet<TString> keys; - for (const auto& group : groupBy->Tail().Children()) { - const auto& lambda = group->Tail(); - YQL_ENSURE(lambda.IsLambda()); - YQL_ENSURE(lambda.Tail().IsCallable("Member")); - keys.insert(TString(lambda.Tail().Tail().Content())); - } - - for (const auto& x : extra) { - if (keys.contains(x)) { - list.push_back(x); - } - } + if (extraSortKeys) { + for (const auto& k : extraSortKeys->Tail().Children()) { + list.push_back(TString(k->Content())); } } @@ -2418,7 +2440,8 @@ void BuildExtraSortColumns(bool hasAggregation, const TExprNode::TPtr& groupBy, } } -TExprNode::TPtr AddExtraSortColumns(const TExprNode::TPtr& lambda, bool hasAggregation, const TExprNode::TPtr& groupBy, const TExprNode::TPtr& extraSortColumns, +TExprNode::TPtr AddExtraSortColumns(const TExprNode::TPtr& lambda, const TExprNode::TPtr& groupBy, + const TExprNode::TPtr& extraSortColumns, const TExprNode::TPtr& extraSortKeys, size_t aggIndexBegin, size_t aggIndexEnd, TExprContext& ctx) { return ctx.Builder(lambda->Pos()) .Lambda() @@ -2435,7 +2458,7 @@ TExprNode::TPtr AddExtraSortColumns(const TExprNode::TPtr& lambda, bool hasAggre .Callable(1, "AsStruct") .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder & { TVector<TString> list; - BuildExtraSortColumns(hasAggregation, groupBy, extraSortColumns, aggIndexBegin, aggIndexEnd, list); + BuildExtraSortColumns(groupBy, extraSortColumns, extraSortKeys, aggIndexBegin, aggIndexEnd, list); for (ui32 i = 0; i < list.size(); ++i) { TStringBuf from = list[i]; from.SkipPrefix("_yql_extra_"); @@ -2457,7 +2480,8 @@ TExprNode::TPtr AddExtraSortColumns(const TExprNode::TPtr& lambda, bool hasAggre .Build(); } -TExprNode::TPtr RemoveExtraSortColumns(const TExprNode::TPtr& list, bool hasAggregation, const TExprNode::TPtr& groupBy, const TExprNode::TPtr& extraSortColumns, +TExprNode::TPtr RemoveExtraSortColumns(const TExprNode::TPtr& list, const TExprNode::TPtr& groupBy, + const TExprNode::TPtr& extraSortColumns, const TExprNode::TPtr& extraSortKeys, size_t aggIndexBegin, size_t aggIndexEnd, const TVector<TString>& sublinkColumns, TExprContext& ctx) { return ctx.Builder(list->Pos()) .Callable("OrderedMap") @@ -2469,7 +2493,7 @@ TExprNode::TPtr RemoveExtraSortColumns(const TExprNode::TPtr& list, bool hasAggr .List(1) .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder & { TVector<TString> list; - BuildExtraSortColumns(hasAggregation, groupBy, extraSortColumns, aggIndexBegin, aggIndexEnd, list); + BuildExtraSortColumns(groupBy, extraSortColumns, extraSortKeys, aggIndexBegin, aggIndexEnd, list); list.insert(list.end(), sublinkColumns.begin(), sublinkColumns.end()); for (ui32 i = 0; i < list.size(); ++i) { parent.Atom(i, list[i]); @@ -2786,6 +2810,7 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct auto distinctOn = GetSetting(setItem->Tail(), "distinct_on"); auto sort = GetSetting(setItem->Tail(), "sort"); auto extraSortColumns = GetSetting(setItem->Tail(), "final_extra_sort_columns"); + auto extraSortKeys = GetSetting(setItem->Tail(), "final_extra_sort_keys"); bool oneRow = !from; TExprNode::TPtr list; if (values) { @@ -2859,7 +2884,7 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct RewriteAggs(projectionLambda, aggId, ctx, optCtx, false); } - if (!aggs.empty() || groupBy) { + if (groupBy) { list = BuildGroup(node->Pos(), list, aggs, groupBy, finalExtTypes, ctx); } @@ -2875,10 +2900,10 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct projectionLambda = AddExtColumns(projectionLambda, finalExtTypes->TailPtr(), columnsItems, *subLinkId, ctx); } - bool hasExtraSortColumns = (extraSortColumns || (aggsSizeBeforeSort < aggs.size())); + bool hasExtraSortColumns = (extraSortColumns || extraSortKeys || (aggsSizeBeforeSort < aggs.size())); if (hasExtraSortColumns) { YQL_ENSURE(!distinctAll && !distinctOn); - projectionLambda = AddExtraSortColumns(projectionLambda, (!aggs.empty() || groupBy), groupBy, extraSortColumns, aggsSizeBeforeSort, aggs.size(), ctx); + projectionLambda = AddExtraSortColumns(projectionLambda, groupBy, extraSortColumns, extraSortKeys, aggsSizeBeforeSort, aggs.size(), ctx); } list = ctx.Builder(node->Pos()) @@ -2910,7 +2935,7 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct } if (hasExtraSortColumns) { - list = RemoveExtraSortColumns(list, (!aggs.empty() || groupBy), groupBy, extraSortColumns, aggsSizeBeforeSort, aggs.size(), sublinkColumns, ctx); + list = RemoveExtraSortColumns(list, groupBy, extraSortColumns, extraSortKeys, aggsSizeBeforeSort, aggs.size(), sublinkColumns, ctx); } } @@ -3133,4 +3158,14 @@ TExprNode::TPtr ExpandPgBetween(const TExprNode::TPtr& node, TExprContext& ctx, .Build(); } +TExprNode::TPtr ExpandPgGroupRef(const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { + Y_UNUSED(optCtx); + return ctx.Builder(node->Pos()) + .Callable("Member") + .Add(0, node->HeadPtr()) + .Atom(1, "_yql_agg_key_" + ToString(node->Tail().Content())) + .Seal() + .Build(); +} + } // namespace NYql diff --git a/ydb/library/yql/core/common_opt/yql_co_pgselect.h b/ydb/library/yql/core/common_opt/yql_co_pgselect.h index e3f2b30eed0..604660c9a85 100644 --- a/ydb/library/yql/core/common_opt/yql_co_pgselect.h +++ b/ydb/library/yql/core/common_opt/yql_co_pgselect.h @@ -21,4 +21,6 @@ TExprNode::TPtr ExpandPgIn(const TExprNode::TPtr& node, TExprContext& ctx, TOpti TExprNode::TPtr ExpandPgBetween(const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx); +TExprNode::TPtr ExpandPgGroupRef(const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx); + } // namespace NYql diff --git a/ydb/library/yql/core/common_opt/yql_co_simple2.cpp b/ydb/library/yql/core/common_opt/yql_co_simple2.cpp index dea0259f347..c933fa17883 100644 --- a/ydb/library/yql/core/common_opt/yql_co_simple2.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_simple2.cpp @@ -1,4 +1,5 @@ #include "yql_co.h" +#include "yql_co_pgselect.h" #include <ydb/library/yql/core/yql_opt_utils.h> #include <ydb/library/yql/core/yql_expr_csee.h> @@ -498,6 +499,8 @@ void RegisterCoSimpleCallables2(TCallableOptimizerMap& map) { return node; }; + + map["PgGroupRef"] = ExpandPgGroupRef; } } diff --git a/ydb/library/yql/core/type_ann/type_ann_core.cpp b/ydb/library/yql/core/type_ann/type_ann_core.cpp index f56a0269910..d0a4f75bafb 100644 --- a/ydb/library/yql/core/type_ann/type_ann_core.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_core.cpp @@ -11404,6 +11404,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> Functions["PgBetween"] = &PgBetweenWrapper; Functions["PgBetweenSym"] = &PgBetweenWrapper; Functions["PgSubLink"] = &PgSubLinkWrapper; + Functions["PgGroupRef"] = &PgGroupRefWrapper; Functions["AutoDemuxList"] = &AutoDemuxListWrapper; Functions["AggrCountInit"] = &AggrCountInitWrapper; diff --git a/ydb/library/yql/core/type_ann/type_ann_pg.cpp b/ydb/library/yql/core/type_ann/type_ann_pg.cpp index fdcea8b25af..b1879b6e300 100644 --- a/ydb/library/yql/core/type_ann/type_ann_pg.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_pg.cpp @@ -7,6 +7,7 @@ #include <ydb/library/yql/core/yql_expr_optimize.h> #include <ydb/library/yql/core/yql_opt_utils.h> #include <ydb/library/yql/core/yql_pg_utils.h> +#include <ydb/library/yql/core/yql_expr_csee.h> #include <ydb/library/yql/parser/pg_catalog/catalog.h> @@ -1388,7 +1389,7 @@ void ScanSublinks(TExprNode::TPtr root, TNodeSet& sublinks) { bool ScanColumns(TExprNode::TPtr root, TInputs& inputs, const THashSet<TString>& possibleAliases, bool* hasStar, bool& hasColumnRef, THashSet<TString>& refs, THashMap<TString, THashSet<TString>>* qualifiedRefs, - TExtContext& ctx, bool scanColumnsOnly, THashMap<ui32, TSet<TString>>* nonProjectionColumns = nullptr) { + TExtContext& ctx, bool scanColumnsOnly) { bool isError = false; VisitExpr(root, [&](const TExprNode::TPtr& node) { if (node->IsCallable("PgSubLink")) { @@ -1462,10 +1463,6 @@ bool ScanColumns(TExprNode::TPtr root, TInputs& inputs, const THashSet<TString>& if (x.Priority == TInput::External) { x.UsedExternalColumns.insert(TString(item->GetName())); } - - if (x.Priority != TInput::Projection && nonProjectionColumns) { - (*nonProjectionColumns)[inputIndex].insert(MakeAliasedColumn(x.Alias, item->GetName())); - } } } @@ -1521,10 +1518,6 @@ bool ScanColumns(TExprNode::TPtr root, TInputs& inputs, const THashSet<TString>& if (x.Priority == TInput::External) { x.UsedExternalColumns.insert(TString(node->Tail().Content())); } - - if (x.Priority != TInput::Projection && nonProjectionColumns) { - (*nonProjectionColumns)[inputIndex].insert(MakeAliasedColumn(x.Alias, node->Tail().Content())); - } } } @@ -1558,10 +1551,11 @@ bool ScanColumns(TExprNode::TPtr root, TInputs& inputs, const THashSet<TString>& return !isError; } -bool ScanColumnsForSublinks(bool& needRebuildSubLinks, const TNodeSet& sublinks, +bool ScanColumnsForSublinks(bool& needRebuildSubLinks, bool& needRebuildTestExprs, const TNodeSet& sublinks, TInputs& inputs, const THashSet<TString>& possibleAliases, bool& hasColumnRef, THashSet<TString>& refs, - THashMap<TString, THashSet<TString>>* qualifiedRefs, TExtContext& ctx, bool scanColumnsOnly, THashMap<ui32, TSet<TString>>* nonProjectionColumns = nullptr) { + THashMap<TString, THashSet<TString>>* qualifiedRefs, TExtContext& ctx, bool scanColumnsOnly) { needRebuildSubLinks = false; + needRebuildTestExprs = false; for (const auto& s : sublinks) { if (s->Child(1)->IsCallable("Void")) { needRebuildSubLinks = true; @@ -1570,9 +1564,13 @@ bool ScanColumnsForSublinks(bool& needRebuildSubLinks, const TNodeSet& sublinks, const auto& testRowLambda = *s->Child(3); if (!testRowLambda.IsCallable("Void")) { YQL_ENSURE(testRowLambda.IsLambda()); - if (!ScanColumns(testRowLambda.TailPtr(), inputs, possibleAliases, nullptr, hasColumnRef, - refs, qualifiedRefs, ctx, scanColumnsOnly, nonProjectionColumns)) { - return false; + if (s->Child(2)->IsCallable("Void")) { + needRebuildTestExprs = true; + + if (!ScanColumns(testRowLambda.TailPtr(), inputs, possibleAliases, nullptr, hasColumnRef, + refs, qualifiedRefs, ctx, scanColumnsOnly)) { + return false; + } } } } @@ -1898,7 +1896,7 @@ IGraphTransformer::TStatus RebuildSubLinks(const TExprNode::TPtr& root, TExprNod auto inputTypes = ctx.Expr.NewList(root->Pos(), std::move(inputTypesItems)); return OptimizeExpr(root, newRoot, [&](const TExprNode::TPtr& node, TExprContext&) -> TExprNode::TPtr { - if (!node->IsCallable("PgSubLink") || !node->Child(1)->IsCallable("Void")) { + if (!node->IsCallable("PgSubLink")) { return node; } @@ -1907,22 +1905,31 @@ IGraphTransformer::TStatus RebuildSubLinks(const TExprNode::TPtr& root, TExprNod } auto children = node->ChildrenList(); - children[1] = inputTypes; - if (!node->Child(3)->IsCallable("Void")) { - // rebuild lambda for row test - auto argNode = ctx.Expr.NewArgument(node->Pos(), "row"); - auto valueNode = ctx.Expr.NewArgument(node->Pos(), "value"); - auto arguments = ctx.Expr.NewArguments(node->Pos(), { argNode, valueNode }); - TExprNode::TPtr newLambdaRoot; - auto status = RebuildLambdaColumns(node->Child(3)->TailPtr(), argNode, newLambdaRoot, inputs, nullptr, ctx); - auto oldValueNode = node->Child(3)->Head().Child(0); - newLambdaRoot = ctx.Expr.ReplaceNode(std::move(newLambdaRoot), *oldValueNode, valueNode); - if (status == IGraphTransformer::TStatus::Error) { - return nullptr; - } + if (children[1]->IsCallable("Void")) { + children[1] = inputTypes; + } else { + if (!node->Child(3)->IsCallable("Void")) { + if (!children[2]->IsCallable("Void")) { + return node; + } - children[2] = rowType; - children[3] = ctx.Expr.NewLambda(node->Pos(), std::move(arguments), std::move(newLambdaRoot)); + // rebuild lambda for row test + auto argNode = ctx.Expr.NewArgument(node->Pos(), "row"); + auto valueNode = ctx.Expr.NewArgument(node->Pos(), "value"); + auto arguments = ctx.Expr.NewArguments(node->Pos(), { argNode, valueNode }); + TExprNode::TPtr newLambdaRoot; + auto status = RebuildLambdaColumns(node->Child(3)->TailPtr(), argNode, newLambdaRoot, inputs, nullptr, ctx); + auto oldValueNode = node->Child(3)->Head().Child(0); + newLambdaRoot = ctx.Expr.ReplaceNode(std::move(newLambdaRoot), *oldValueNode, valueNode); + if (status == IGraphTransformer::TStatus::Error) { + return nullptr; + } + + children[2] = rowType; + children[3] = ctx.Expr.NewLambda(node->Pos(), std::move(arguments), std::move(newLambdaRoot)); + } else { + return node; + } } return ctx.Expr.NewCallable(node->Pos(), node->Content(), std::move(children)); @@ -1952,11 +1959,38 @@ void MakeOptionalColumns(const TStructExprType*& structType, TExprContext& ctx) structType = ctx.MakeType<TStructExprType>(newItems); } +void ScanAggregations(const TExprNode::TPtr& root, bool& hasAggregations) { + VisitExpr(root, [&](const TExprNode::TPtr& node) { + if (node->IsCallable("PgAgg")) { + hasAggregations = true; + return false; + } + + if (node->IsCallable("PgSubLink")) { + return false; + } + + return true; + }); + + TNodeSet sublinks; + ScanSublinks(root, sublinks); + for (const auto& s : sublinks) { + const auto& testRowLambda = *s->Child(3); + if (!testRowLambda.IsCallable("Void")) { + YQL_ENSURE(testRowLambda.IsLambda()); + ScanAggregations(testRowLambda.TailPtr(), hasAggregations); + } + } +} + bool ValidateGroups(TInputs& inputs, const THashSet<TString>& possibleAliases, - const TExprNode& data, TExtContext& ctx, TExprNode::TListType& newGroups, bool scanColumnsOnly) { + const TExprNode& data, TExtContext& ctx, TExprNode::TListType& newGroups, bool& hasNewGroups, bool scanColumnsOnly) { newGroups.clear(); + hasNewGroups = false; bool hasColumnRef = false; - for (const auto& group : data.Children()) { + for (ui32 index = 0; index < data.ChildrenSize(); ++index) { + const auto& group = data.Child(index); if (!group->IsCallable("PgGroup")) { ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(group->Pos()), "Expected PgGroup")); return false; @@ -1976,6 +2010,13 @@ bool ValidateGroups(TInputs& inputs, const THashSet<TString>& possibleAliases, continue; } + bool hasNestedAggregations = false; + ScanAggregations(group->Tail().TailPtr(), hasNestedAggregations); + if (hasNestedAggregations) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(group->Pos()), "Nested aggregations aren't allowed")); + return false; + } + TVector<const TItemExprType*> items; AddColumns(inputs, nullptr, refs, &qualifiedRefs, items, ctx.Expr); auto effectiveType = ctx.Expr.MakeType<TStructExprType>(items); @@ -2000,30 +2041,15 @@ bool ValidateGroups(TInputs& inputs, const THashSet<TString>& possibleAliases, newChildren[1] = newLambda; auto newGroup = ctx.Expr.NewCallable(group->Pos(), "PgGroup", std::move(newChildren)); newGroups.push_back(newGroup); + hasNewGroups = true; + } else { + newGroups.push_back(data.ChildPtr(index)); } } return true; } -THashMap<ui32, TSet<TString>> LoadExtraColumns(TExprNode::TPtr& value, ui32 inputsCount) { - THashMap<ui32, TSet<TString>> ret; - YQL_ENSURE(value->ChildrenSize() == inputsCount); - for (ui32 inputIndex = 0; inputIndex < inputsCount; ++inputIndex) { - auto child = value->Child(inputIndex); - if (child->ChildrenSize() == 0) { - continue; - } - - auto& set = ret[inputIndex]; - for (const auto& x : child->Children()) { - set.insert(TString(x->Content())); - } - } - - return ret; -} - TExprNode::TPtr SaveExtraColumns(TPositionHandle pos, const THashMap<ui32, TSet<TString>>& columns, ui32 inputsCount, TExprContext& ctx) { TExprNode::TListType groups; for (ui32 i = 0; i < inputsCount; ++i) { @@ -2042,30 +2068,6 @@ TExprNode::TPtr SaveExtraColumns(TPositionHandle pos, const THashMap<ui32, TSet< return ctx.NewList(pos, std::move(groups)); } -bool AddExtraColumnsForSublinks(TExprNode::TPtr& value, const TExprNode& data, ui32 inputsCount, TExprContext& ctx) { - THashMap<ui32, TSet<TString>> columns = LoadExtraColumns(value, inputsCount); - for (auto oneSort : data.Children()) { - TNodeSet sublinks; - ScanSublinks(oneSort->Child(1)->TailPtr(), sublinks); - for (const auto& s : sublinks) { - auto c = ExtractExternalColumns(s->Tail()); - for (const auto& [name, index] : c) { - YQL_ENSURE(index < inputsCount); - columns[index].insert("_yql_extra_" + name); - } - } - } - - value = SaveExtraColumns(value->Pos(), columns, inputsCount, ctx); - for (const auto&[index, set] : columns) { - if (!set.empty()) { - return true; - } - } - - return false; -} - TMap<TString, ui32> ExtractExternalColumns(const TExprNode& select) { TMap<TString, ui32> res; const auto& option = select.Head(); @@ -2090,24 +2092,25 @@ TMap<TString, ui32> ExtractExternalColumns(const TExprNode& select) { } bool ValidateSort(TInputs& inputs, TInputs& subLinkInputs, const THashSet<TString>& possibleAliases, - const TExprNode& data, TExtContext& ctx, TExprNode::TListType& newSorts, bool scanColumnsOnly, - THashMap<ui32, TSet<TString>>* nonProjectionColumns = nullptr) { + const TExprNode& data, TExtContext& ctx, bool& hasNewSort, TExprNode::TListType& newSorts, bool scanColumnsOnly) { newSorts.clear(); - for (auto oneSort : data.Children()) { + for (ui32 index = 0; index < data.ChildrenSize(); ++index) { + auto oneSort = data.Child(index); TNodeSet sublinks; ScanSublinks(oneSort->Child(1)->TailPtr(), sublinks); - bool hasColumnRef; + bool hasColumnRef = false; THashSet<TString> refs; THashMap<TString, THashSet<TString>> qualifiedRefs; if (!ScanColumns(oneSort->Child(1)->TailPtr(), inputs, possibleAliases, nullptr, hasColumnRef, - refs, &qualifiedRefs, ctx, scanColumnsOnly, nonProjectionColumns)) { + refs, &qualifiedRefs, ctx, scanColumnsOnly)) { return false; } bool needRebuildSubLinks; - if (!ScanColumnsForSublinks(needRebuildSubLinks, sublinks, subLinkInputs, possibleAliases, - hasColumnRef, refs, &qualifiedRefs, ctx, scanColumnsOnly, nonProjectionColumns)) { + bool needRebuildTestExprs; + if (!ScanColumnsForSublinks(needRebuildSubLinks, needRebuildTestExprs, sublinks, subLinkInputs, possibleAliases, + hasColumnRef, refs, &qualifiedRefs, ctx, scanColumnsOnly)) { return IGraphTransformer::TStatus::Error; } @@ -2124,40 +2127,319 @@ bool ValidateSort(TInputs& inputs, TInputs& subLinkInputs, const THashSet<TStrin auto typeNode = ExpandType(oneSort->Pos(), *effectiveType, ctx.Expr); + auto newLambda = oneSort->ChildPtr(1); auto newChildren = oneSort->ChildrenList(); - if (needRebuildSubLinks) { + bool hasChanges = false; + if (needRebuildSubLinks || needRebuildTestExprs) { auto arguments = ctx.Expr.NewArguments(oneSort->Pos(), { }); TExprNode::TPtr newRoot; - auto status = RebuildSubLinks(oneSort->Child(1)->TailPtr(), newRoot, sublinks, subLinkInputs, typeNode, ctx); + auto status = RebuildSubLinks(newLambda->TailPtr(), newRoot, sublinks, subLinkInputs, typeNode, ctx); if (status == IGraphTransformer::TStatus::Error) { return false; } - auto newLambda = ctx.Expr.NewLambda(oneSort->Pos(), std::move(arguments), std::move(newRoot)); + newLambda = ctx.Expr.NewLambda(oneSort->Pos(), std::move(arguments), std::move(newRoot)); newChildren[1] = newLambda; - } else if (oneSort->Child(1)->Head().ChildrenSize() == 0) { + hasChanges = true; + } + + if (!needRebuildSubLinks && newLambda->Head().ChildrenSize() == 0) { auto argNode = ctx.Expr.NewArgument(oneSort->Pos(), "row"); auto arguments = ctx.Expr.NewArguments(oneSort->Pos(), { argNode }); TExprNode::TPtr newRoot; - auto status = RebuildLambdaColumns(oneSort->Child(1)->TailPtr(), argNode, newRoot, inputs, nullptr, ctx); + auto status = RebuildLambdaColumns(newLambda->TailPtr(), argNode, newRoot, inputs, nullptr, ctx); if (status == IGraphTransformer::TStatus::Error) { return false; } - auto newLambda = ctx.Expr.NewLambda(oneSort->Pos(), std::move(arguments), std::move(newRoot)); - + newLambda = ctx.Expr.NewLambda(oneSort->Pos(), std::move(arguments), std::move(newRoot)); newChildren[0] = typeNode; newChildren[1] = newLambda; + hasChanges = true; } - auto newSort = ctx.Expr.ChangeChildren(*oneSort, std::move(newChildren)); - newSorts.push_back(newSort); + if (hasChanges) { + auto newSort = ctx.Expr.ChangeChildren(*oneSort, std::move(newChildren)); + newSorts.push_back(newSort); + hasNewSort = true; + } else { + newSorts.push_back(data.ChildPtr(index)); + } } return true; } +ui64 CalculateExprHash(const TExprNode& root, TNodeMap<ui64>& visited) { + auto it = visited.find(&root); + if (it != visited.end()) { + return it->second; + } + + ui64 hash = 0; + switch (root.Type()) { + case TExprNode::EType::Callable: + hash = CseeHash(root.Content().Size(), hash); + hash = CseeHash(root.Content().Data(), root.Content().Size(), hash); + [[fallthrough]]; + case TExprNode::EType::List: + hash = CseeHash(root.ChildrenSize(), hash); + for (ui32 i = 0; i < root.ChildrenSize(); ++i) { + hash = CalculateExprHash(*root.Child(i), visited); + } + + break; + case TExprNode::EType::Atom: + hash = CseeHash(root.Content().Size(), hash); + hash = CseeHash(root.Content().Data(), root.Content().Size(), hash); + hash = CseeHash(root.GetFlagsToCompare(), hash); + break; + default: + YQL_ENSURE(false, "Unexpected node type"); + } + + visited.emplace(&root, hash); + return hash; +} + +bool ExprNodesEquals(const TExprNode& left, const TExprNode& right, TNodeSet& visited) { + if (!visited.emplace(&left).second) { + return true; + } + + if (left.Type() != right.Type()) { + return false; + } + + switch (left.Type()) { + case TExprNode::EType::Callable: + if (left.Content() != right.Content()) { + return false; + } + + [[fallthrough]]; + case TExprNode::EType::List: + if (left.ChildrenSize() != right.ChildrenSize()) { + return false; + } + + for (ui32 i = 0; i < left.ChildrenSize(); ++i) { + if (!ExprNodesEquals(*left.Child(i), *right.Child(i), visited)) { + return false; + } + } + + return true; + case TExprNode::EType::Atom: + return left.Content() == right.Content() && left.GetFlagsToCompare() == right.GetFlagsToCompare(); + case TExprNode::EType::Argument: + return left.GetArgIndex() == right.GetArgIndex(); + default: + YQL_ENSURE(false, "Unexpected node type"); + } +} + +struct TGroupExpr { + TExprNode::TPtr OriginalRoot; + ui64 Hash; + TExprNode::TPtr TypeNode; +}; + +bool ScanExprForMatchedGroup(const TExprNode::TPtr& row, const TExprNode& root, const TVector<TGroupExpr>& exprs, + TNodeOnNodeOwnedMap& replaces, TNodeMap<ui64>& hashVisited, TNodeMap<bool>& nodeVisited, TExprContext& ctx) { + auto it = nodeVisited.find(&root); + if (it != nodeVisited.end()) { + return it->second; + } + + if (root.IsCallable("PgSubLink")) { + const auto& testRowLambda = *root.Child(3); + if (!testRowLambda.IsCallable("Void")) { + hashVisited[testRowLambda.Head().Child(0)] = 0; // original row + hashVisited[testRowLambda.Head().Child(1)] = 1; // sublink value + ScanExprForMatchedGroup(testRowLambda.Head().ChildPtr(0), testRowLambda.Tail(), + exprs, replaces, hashVisited, nodeVisited, ctx); + } + + nodeVisited[&root] = false; + return false; + } + + bool hasChanges = false; + for (const auto& child : root.Children()) { + if (!ScanExprForMatchedGroup(row, *child, exprs, replaces, hashVisited, nodeVisited, ctx)) { + hasChanges = true; + } + } + + if (hasChanges) { + nodeVisited[&root] = false; + return false; + } + + ui64 hash = CalculateExprHash(root, hashVisited); + for (ui32 i = 0; i < exprs.size(); ++i) { + if (exprs[i].Hash != hash) { + continue; + } + + TNodeSet equalsVisited; + if (!ExprNodesEquals(*exprs[i].OriginalRoot, root, equalsVisited)) { + continue; + } + + replaces[&root] = ctx.Builder(root.Pos()) + .Callable("PgGroupRef") + .Add(0, row) + .Add(1, exprs[i].TypeNode) + .Atom(2, ToString(i)) + .Seal() + .Build(); + nodeVisited[&root] = false; + return false; + } + + nodeVisited[&root] = true; + return true; +} + +TExprNode::TPtr ReplaceGroupByExpr(const TExprNode::TPtr& root, const TExprNode& groups, TExprContext& ctx) { + if (!groups.ChildrenSize()) { + return root; + } + + // calculate hashes + TVector<TGroupExpr> exprs; + TExprNode::TListType typeNodes; + for (ui32 index = 0; index < groups.ChildrenSize(); ++index) { + const auto& g = *groups.Child(index); + const auto& lambda = g.Tail(); + TNodeMap<ui64> visited; + visited[&lambda.Head().Head()] = 0; + exprs.push_back({ + lambda.TailPtr(), + CalculateExprHash(lambda.Tail(), visited), + ExpandType(g.Pos(), *lambda.GetTypeAnn(), ctx) + }); + } + + TNodeOnNodeOwnedMap replaces; + TNodeMap<ui64> hashVisited; + TNodeMap<bool> nodeVisited; + hashVisited[&root->Head().Head()] = 0; + ScanExprForMatchedGroup(root->Head().HeadPtr(), root->Tail(), exprs, replaces, hashVisited, nodeVisited, ctx); + auto ret = root; + if (replaces.empty()) { + return ret; + } + + TOptimizeExprSettings settings(nullptr); + settings.VisitTuples = true; + auto status = RemapExpr(ret, ret, replaces, ctx, settings); + YQL_ENSURE(status != IGraphTransformer::TStatus::Error); + return ret; +} + +bool GatherExtraSortColumns(const TExprNode& data, const TInputs& inputs, TExprNode::TPtr& extraInputColumns, TExprNode::TPtr& extraKeys, TExprContext& ctx) { + ui32 inputsCount = inputs.size() - 1; + THashMap<ui32, TSet<TString>> columns; + TSet<TString> keys; + extraKeys = nullptr; + for (auto oneSort : data.Children()) { + TNodeSet sublinks; + ScanSublinks(oneSort->Child(1)->TailPtr(), sublinks); + + auto scanLambda = [&](const auto& lambda) { + auto arg = &lambda.Head().Head(); + VisitExpr(lambda.TailPtr(), [&](const TExprNode::TPtr& node) { + if (node->IsCallable("PgSubLink")) { + return false; + } + + if (node->IsCallable("PgGroupRef")) { + keys.insert("_yql_agg_key_" + ToString(node->Tail().Content())); + } + + if (node->IsCallable("Member") && &node->Head() == arg) { + TStringBuf alias; + TStringBuf column = NTypeAnnImpl::RemoveAlias(node->Tail().Content(), alias); + + TMaybe<ui32> index; + for (ui32 priority : {TInput::Projection, TInput::Current, TInput::External}) { + for (ui32 inputIndex = 0; inputIndex < inputs.size(); ++inputIndex) { + auto& x = inputs[inputIndex]; + if (priority != x.Priority) { + continue; + } + + if (!alias.empty() && (x.Alias.empty() || alias != x.Alias)) { + continue; + } + + auto pos = x.Type->FindItem(column); + if (pos) { + index = inputIndex; + break; + } + } + + if (index) { + break; + } + } + + YQL_ENSURE(index); + if (inputs[*index].Priority != TInput::Projection) { + columns[*index].insert(TString(node->Tail().Content())); + } + + return false; + } + + return true; + }); + }; + + scanLambda(*oneSort->Child(1)); + for (const auto& s : sublinks) { + auto c = ExtractExternalColumns(s->Tail()); + for (const auto&[name, index] : c) { + YQL_ENSURE(index < inputsCount); + columns[index].insert("_yql_extra_" + name); + } + + if (!s->Child(3)->IsCallable("Void")) { + scanLambda(*s->Child(3)); + } + } + } + + extraInputColumns = SaveExtraColumns(data.Pos(), columns, inputsCount, ctx); + if (!keys.empty()) { + extraKeys = ctx.Builder(data.Pos()) + .List() + .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder & { + ui32 i = 0; + for (const auto& k : keys) { + parent.Atom(i++, k); + } + + return parent; + }) + .Seal() + .Build(); + } + + for (const auto&[index, set] : columns) { + if (!set.empty()) { + return true; + } + } + + return false; +} + IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx) { if (!EnsureArgsCount(*input, 1, ctx.Expr)) { return IGraphTransformer::TStatus::Error; @@ -2169,8 +2451,10 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN } bool scanColumnsOnly = true; + const TStructExprType* outputRowType; + bool hasAggregations = false; for (;;) { - const TStructExprType* outputRowType = nullptr; + outputRowType = nullptr; TInputs inputs; TInputs joinInputs; THashSet<TString> possibleAliases; @@ -2180,14 +2464,14 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN bool hasExtTypes = false; bool hasDistinctAll = false; bool hasDistinctOn = false; - bool hasExtraSortColumns = false; bool hasFinalExtraSortColumns = false; + TExprNode::TPtr groupBy; // pass 0 - from/values // pass 1 - join // pass 2 - ext_types/final_ext_types, extra_sort_solumns // pass 3 - where, group_by - // pass 4 - window + // pass 4 - having, window // pass 5 - result // pass 6 - distinct_all, distinct_on // pass 7 - sort @@ -2292,16 +2576,21 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN return IGraphTransformer::TStatus::Error; } - TVector<const TItemExprType*> outputItems; - TExprNode::TListType newResult; - bool hasStar = false; - bool hasColumnRef = false; - for (const auto& column : data.Children()) { + for (ui32 index = 0; index < data.ChildrenSize(); ++index) { + const auto& column = data.Child(index); if (!column->IsCallable("PgResultItem")) { ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(column->Pos()), "Expected PgResultItem")); return IGraphTransformer::TStatus::Error; } + } + TVector<const TItemExprType*> outputItems; + TExprNode::TListType newResult; + bool hasNewResult = false; + bool hasStar = false; + bool hasColumnRef = false; + for (ui32 index = 0; index < data.ChildrenSize(); ++index) { + const auto& column = data.Child(index); YQL_ENSURE(column->Tail().IsLambda()); THashSet<TString> refs; THashMap<TString, THashSet<TString>> qualifiedRefs; @@ -2316,7 +2605,8 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN } bool needRebuildSubLinks; - if (!ScanColumnsForSublinks(needRebuildSubLinks, sublinks, joinInputs, possibleAliases, + bool needRebuildTestExprs; + if (!ScanColumnsForSublinks(needRebuildSubLinks, needRebuildTestExprs, sublinks, joinInputs, possibleAliases, hasColumnRef, refs, &qualifiedRefs, ctx, scanColumnsOnly)) { return IGraphTransformer::TStatus::Error; } @@ -2332,36 +2622,47 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN auto typeNode = ExpandType(column->Pos(), *effectiveType, ctx.Expr); auto newColumnChildren = column->ChildrenList(); - if (needRebuildSubLinks) { + auto newLambda = column->TailPtr(); + bool hasChanges = false; + if (needRebuildSubLinks || needRebuildTestExprs) { auto arguments = ctx.Expr.NewArguments(column->Pos(), { }); TExprNode::TPtr newRoot; - auto status = RebuildSubLinks(column->Tail().TailPtr(), newRoot, sublinks, joinInputs, typeNode, ctx); + auto status = RebuildSubLinks(newLambda->TailPtr(), newRoot, sublinks, joinInputs, typeNode, ctx); if (status == IGraphTransformer::TStatus::Error) { return IGraphTransformer::TStatus::Error; } - auto newLambda = ctx.Expr.NewLambda(column->Pos(), std::move(arguments), std::move(newRoot)); + newLambda = ctx.Expr.NewLambda(column->Pos(), std::move(arguments), std::move(newRoot)); newColumnChildren[2] = newLambda; - } else if (column->Tail().Head().ChildrenSize() == 0) { + hasChanges = true; + } + + if (!needRebuildSubLinks && newLambda->Head().ChildrenSize() == 0) { auto argNode = ctx.Expr.NewArgument(column->Pos(), "row"); auto arguments = ctx.Expr.NewArguments(column->Pos(), { argNode }); auto expandedColumns = column->HeadPtr(); TExprNode::TPtr newRoot; - auto status = RebuildLambdaColumns(column->Tail().TailPtr(), argNode, newRoot, joinInputs, &expandedColumns, ctx); + auto status = RebuildLambdaColumns(newLambda->TailPtr(), argNode, newRoot, joinInputs, &expandedColumns, ctx); if (status == IGraphTransformer::TStatus::Error) { return IGraphTransformer::TStatus::Error; } - auto newLambda = ctx.Expr.NewLambda(column->Pos(), std::move(arguments), std::move(newRoot)); + newLambda = ctx.Expr.NewLambda(column->Pos(), std::move(arguments), std::move(newRoot)); newColumnChildren[0] = expandedColumns; newColumnChildren[1] = typeNode; newColumnChildren[2] = newLambda; + hasChanges = true; } - auto newColumn = ctx.Expr.NewCallable(column->Pos(), "PgResultItem", std::move(newColumnChildren)); - newResult.push_back(newColumn); + if (hasChanges) { + auto newColumn = ctx.Expr.NewCallable(column->Pos(), "PgResultItem", std::move(newColumnChildren)); + newResult.push_back(newColumn); + hasNewResult = true; + } else { + newResult.push_back(data.ChildPtr(index)); + } } } else { @@ -2379,11 +2680,13 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN if (!ValidateWindowRefs(column->TailPtr(), windows ? &windows->Tail() : nullptr, ctx.Expr)) { return IGraphTransformer::TStatus::Error; } + + newResult.push_back(data.ChildPtr(index)); } } if (!scanColumnsOnly) { - if (!newResult.empty()) { + if (hasNewResult) { auto resultValue = ctx.Expr.NewList(options.Pos(), std::move(newResult)); auto newSettings = ReplaceSetting(options, {}, "result", resultValue, ctx.Expr); output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings)); @@ -2394,6 +2697,33 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN if (!outputRowType->Validate(data.Pos(), ctx.Expr)) { return IGraphTransformer::TStatus::Error; } + + for (const auto& column : data.Children()) { + ScanAggregations(column->TailPtr(), hasAggregations); + } + + if (groupBy) { + TExprNode::TListType newResultItems; + bool hasChanges = false; + for (ui32 index = 0; index < data.ChildrenSize(); ++index) { + const auto& column = *data.Child(index); + auto ret = ReplaceGroupByExpr(column.TailPtr(), groupBy->Tail(), ctx.Expr); + if (ret != column.TailPtr()) { + hasChanges = true; + newResultItems.push_back(ctx.Expr.ChangeChild(column, 2, std::move(ret))); + } + else { + newResultItems.push_back(data.ChildPtr(index)); + } + } + + if (hasChanges) { + auto newResult = ctx.Expr.NewList(input->Pos(), std::move(newResultItems)); + auto newSettings = ReplaceSetting(options, {}, "result", newResult, ctx.Expr); + output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings)); + return IGraphTransformer::TStatus::Repeat; + } + } } } else if (optionName == "from") { @@ -2523,7 +2853,11 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN } } else if (optionName == "where" || optionName == "having") { - if (pass != 3) { + if (optionName == "where" && pass != 3) { + continue; + } + + if (optionName == "having" && pass != 4) { continue; } @@ -2542,7 +2876,7 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN TNodeSet sublinks; ScanSublinks(data.Child(1)->TailPtr(), sublinks); - bool hasColumnRef; + bool hasColumnRef = false; THashSet<TString> refs; THashMap<TString, THashSet<TString>> qualifiedRefs; if (!ScanColumns(data.Child(1)->TailPtr(), joinInputs, possibleAliases, nullptr, hasColumnRef, @@ -2551,7 +2885,8 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN } bool needRebuildSubLinks; - if (!ScanColumnsForSublinks(needRebuildSubLinks, sublinks, joinInputs, possibleAliases, hasColumnRef, + bool needRebuildTestExprs; + if (!ScanColumnsForSublinks(needRebuildSubLinks, needRebuildTestExprs, sublinks, joinInputs, possibleAliases, hasColumnRef, refs, &qualifiedRefs, ctx, scanColumnsOnly)) { return IGraphTransformer::TStatus::Error; } @@ -2565,37 +2900,40 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN } auto typeNode = ExpandType(data.Pos(), *effectiveType, ctx.Expr); + auto newLambda = data.ChildPtr(1); + bool hasChanges = false; + auto newChildren = data.ChildrenList(); - if (needRebuildSubLinks) { + if (needRebuildSubLinks || needRebuildTestExprs) { auto arguments = ctx.Expr.NewArguments(data.Pos(), {}); TExprNode::TPtr newRoot; - auto status = RebuildSubLinks(data.Child(1)->TailPtr(), newRoot, sublinks, joinInputs, typeNode, ctx); + auto status = RebuildSubLinks(newLambda->TailPtr(), newRoot, sublinks, joinInputs, typeNode, ctx); if (status == IGraphTransformer::TStatus::Error) { return IGraphTransformer::TStatus::Error; } - auto newLambda = ctx.Expr.NewLambda(data.Pos(), std::move(arguments), std::move(newRoot)); - - auto newChildren = data.ChildrenList(); + newLambda = ctx.Expr.NewLambda(data.Pos(), std::move(arguments), std::move(newRoot)); newChildren[1] = newLambda; - auto newWhere = ctx.Expr.NewCallable(data.Pos(), "PgWhere", std::move(newChildren)); - auto newSettings = ReplaceSetting(options, {}, TString(optionName), newWhere, ctx.Expr); - output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings)); - return IGraphTransformer::TStatus::Repeat; - } else if (data.Child(1)->Head().ChildrenSize() == 0) { + hasChanges = true; + } + + if (!needRebuildSubLinks && newLambda->Head().ChildrenSize() == 0) { auto argNode = ctx.Expr.NewArgument(data.Pos(), "row"); auto arguments = ctx.Expr.NewArguments(data.Pos(), { argNode }); TExprNode::TPtr newRoot; - auto status = RebuildLambdaColumns(data.Child(1)->TailPtr(), argNode, newRoot, joinInputs, nullptr, ctx); + auto status = RebuildLambdaColumns(newLambda->TailPtr(), argNode, newRoot, joinInputs, nullptr, ctx); if (status == IGraphTransformer::TStatus::Error) { return IGraphTransformer::TStatus::Error; } - auto newLambda = ctx.Expr.NewLambda(data.Pos(), std::move(arguments), std::move(newRoot)); + newLambda = ctx.Expr.NewLambda(data.Pos(), std::move(arguments), std::move(newRoot)); - auto newChildren = data.ChildrenList(); newChildren[0] = typeNode; newChildren[1] = newLambda; + hasChanges = true; + } + + if (hasChanges) { auto newWhere = ctx.Expr.NewCallable(data.Pos(), "PgWhere", std::move(newChildren)); auto newSettings = ReplaceSetting(options, {}, TString(optionName), newWhere, ctx.Expr); output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings)); @@ -2621,6 +2959,15 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN return IGraphTransformer::TStatus::Error; } } + + if (!scanColumnsOnly && optionName == "having" && groupBy) { + auto ret = ReplaceGroupByExpr(data.TailPtr(), groupBy->Tail(), ctx.Expr); + if (ret != data.TailPtr()) { + auto newSettings = ReplaceSetting(options, {}, "having", ctx.Expr.ChangeChild(data, 1, std::move(ret)), ctx.Expr); + output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings)); + return IGraphTransformer::TStatus::Repeat; + } + } } else if (optionName == "join_ops") { if (pass != 1) { @@ -2754,7 +3101,7 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN } else { const auto& quals = child->Tail(); - bool hasColumnRef; + bool hasColumnRef = false; THashSet<TString> refs; THashMap<TString, THashSet<TString>> qualifiedRefs; if (!ScanColumns(quals.Child(1)->TailPtr(), groupInputs, groupPossibleAliases, nullptr, hasColumnRef, @@ -2821,6 +3168,7 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN continue; } + groupBy = option; if (!EnsureTupleSize(*option, 2, ctx.Expr)) { return IGraphTransformer::TStatus::Error; } @@ -2831,11 +3179,12 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN } TExprNode::TListType newGroups; - if (!ValidateGroups(joinInputs, possibleAliases, data, ctx, newGroups, scanColumnsOnly)) { + bool hasNewGroups = false; + if (!ValidateGroups(joinInputs, possibleAliases, data, ctx, newGroups, hasNewGroups, scanColumnsOnly)) { return IGraphTransformer::TStatus::Error; } - if (!scanColumnsOnly && !newGroups.empty()) { + if (!scanColumnsOnly && hasNewGroups) { auto resultValue = ctx.Expr.NewList(options.Pos(), std::move(newGroups)); auto newSettings = ReplaceSetting(options, {}, "group_by", resultValue, ctx.Expr); output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings)); @@ -2874,7 +3223,6 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN auto partitions = x->Child(2); auto sort = x->Child(3); - bool needRebuildSort = false; bool needRebuildPartition = false; for (const auto& p : partitions->Children()) { if (p->Child(0)->IsCallable("Void")) { @@ -2883,39 +3231,33 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN } } - for (const auto& s : sort->Children()) { - if (s->Child(0)->IsCallable("Void")) { - needRebuildSort = true; - break; - } - } - - if (!needRebuildSort && !needRebuildPartition) { - newWindow.push_back(x); - continue; - } - - hasChanges = true; auto newChildren = x->ChildrenList(); if (needRebuildPartition) { TExprNode::TListType newGroups; - if (!ValidateGroups(joinInputs, possibleAliases, *partitions, ctx, newGroups, scanColumnsOnly)) { + bool hasNewGroups = false; + if (!ValidateGroups(joinInputs, possibleAliases, *partitions, ctx, newGroups, hasNewGroups, scanColumnsOnly)) { return IGraphTransformer::TStatus::Error; } newChildren[2] = ctx.Expr.NewList(x->Pos(), std::move(newGroups)); } - if (needRebuildSort) { - TExprNode::TListType newSorts; - if (!ValidateSort(joinInputs, joinInputs, possibleAliases, *sort, ctx, newSorts, scanColumnsOnly)) { - return IGraphTransformer::TStatus::Error; - } + bool hasNewSort = false; + TExprNode::TListType newSorts; + if (!ValidateSort(joinInputs, joinInputs, possibleAliases, *sort, ctx, hasNewSort, newSorts, scanColumnsOnly)) { + return IGraphTransformer::TStatus::Error; + } + if (hasNewSort) { newChildren[3] = ctx.Expr.NewList(x->Pos(), std::move(newSorts)); } - newWindow.push_back(ctx.Expr.ChangeChildren(*x, std::move(newChildren))); + if (needRebuildPartition || hasNewSort) { + hasChanges = true; + newWindow.push_back(ctx.Expr.ChangeChildren(*x, std::move(newChildren))); + } else { + newWindow.push_back(x); + } } if (!scanColumnsOnly && hasChanges) { @@ -2953,14 +3295,15 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN return IGraphTransformer::TStatus::Error; } + bool hasNewGroups = false; TExprNode::TListType newGroups; TInputs projectionInputs; projectionInputs.push_back(TInput{ "", outputRowType, Nothing(), TInput::Projection, {} }); - if (!ValidateGroups(projectionInputs, {}, data, ctx, newGroups, scanColumnsOnly)) { + if (!ValidateGroups(projectionInputs, {}, data, ctx, newGroups, hasNewGroups, scanColumnsOnly)) { return IGraphTransformer::TStatus::Error; } - if (!newGroups.empty()) { + if (hasNewGroups) { auto resultValue = ctx.Expr.NewList(options.Pos(), std::move(newGroups)); auto newSettings = ReplaceSetting(options, {}, "distinct_on", resultValue, ctx.Expr); output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings)); @@ -2986,60 +3329,77 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN return IGraphTransformer::TStatus::Error; } - bool needScan = false; - for (const auto& x : data.Children()) { - if (!x->IsCallable("PgSort")) { - ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(x->Pos()), "Expected PgSort")); - } - - if (x->Child(0)->IsCallable("Void")) { - needScan = true; - } - } - TInputs projectionInputs = joinInputs; // all row columns are visible too, but projection's columns have more priority if (!scanColumnsOnly) { projectionInputs.push_back(TInput{ "", outputRowType, Nothing(), TInput::Projection, {} }); } - if (needScan) { - TExprNode::TListType newSortTupleItems; - // no effective types yet, scan lambda bodies - THashMap<ui32, TSet<TString>> extraSortColumns; - if (!ValidateSort(projectionInputs, joinInputs, possibleAliases, data, ctx, newSortTupleItems, scanColumnsOnly, &extraSortColumns)) { - return IGraphTransformer::TStatus::Error; - } + bool hasNewSort = false; + TExprNode::TListType newSortTupleItems; + // no effective types yet, scan lambda bodies + if (!ValidateSort(projectionInputs, joinInputs, possibleAliases, data, ctx, hasNewSort, newSortTupleItems, scanColumnsOnly)) { + return IGraphTransformer::TStatus::Error; + } - if (!scanColumnsOnly) { - auto newSortTuple = ctx.Expr.NewList(data.Pos(), std::move(newSortTupleItems)); - auto newSettings = ReplaceSetting(options, {}, "sort", newSortTuple, ctx.Expr); - auto extra = SaveExtraColumns(data.Pos(), extraSortColumns, joinInputs.size(), ctx.Expr); - newSettings = AddSetting(*newSettings, {}, "extra_sort_columns", extra, ctx.Expr); + if (hasNewSort && !scanColumnsOnly) { + auto newSortTuple = ctx.Expr.NewList(data.Pos(), std::move(newSortTupleItems)); + auto newSettings = ReplaceSetting(options, {}, "sort", newSortTuple, ctx.Expr); - output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings)); - return IGraphTransformer::TStatus::Repeat; - } - } else { - auto extra = GetSetting(options, "extra_sort_columns"); - if (extra) { - auto value = extra->TailPtr(); - bool hasColumns = AddExtraColumnsForSublinks(value, data, joinInputs.size(), ctx.Expr); - auto newSettings = RemoveSetting(options, "extra_sort_columns", ctx.Expr); - if (hasColumns) { - newSettings = AddSetting(*newSettings, {}, "final_extra_sort_columns", value, ctx.Expr); + output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings)); + return IGraphTransformer::TStatus::Repeat; + } + + if (!scanColumnsOnly) { + if (groupBy) { + TExprNode::TListType newSortItems; + bool hasChanges = false; + for (ui32 index = 0; index < data.ChildrenSize(); ++index) { + const auto& column = *data.Child(index); + auto ret = ReplaceGroupByExpr(column.ChildPtr(1), groupBy->Tail(), ctx.Expr); + if (ret != column.ChildPtr(1)) { + hasChanges = true; + newSortItems.push_back(ctx.Expr.ChangeChild(column, 1, std::move(ret))); + } + else { + newSortItems.push_back(data.ChildPtr(index)); + } } - output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings)); - return IGraphTransformer::TStatus::Repeat; + if (hasChanges) { + auto newSort = ctx.Expr.NewList(input->Pos(), std::move(newSortItems)); + auto newSettings = ReplaceSetting(options, {}, "sort", newSort, ctx.Expr); + output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings)); + return IGraphTransformer::TStatus::Repeat; + } + } + + if (!GetSetting(options, "final_extra_sort_columns") && !GetSetting(options, "final_extra_sort_keys")) { + TExprNode::TPtr extraColumns; + TExprNode::TPtr extraKeys; + auto hasExtraColumns = GatherExtraSortColumns(data, projectionInputs, extraColumns, extraKeys, ctx.Expr); + if (hasExtraColumns || extraKeys) { + TExprNode::TPtr newSettings; + if (hasExtraColumns) { + newSettings = AddSetting(newSettings ? *newSettings : options, {}, "final_extra_sort_columns", extraColumns, ctx.Expr); + } + + if (extraKeys) { + newSettings = AddSetting(newSettings ? *newSettings : options, {}, "final_extra_sort_keys", extraKeys, ctx.Expr); + } + + output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings)); + return IGraphTransformer::TStatus::Repeat; + } } } - } else if (optionName == "extra_sort_columns" || optionName == "final_extra_sort_columns") { + } + else if (optionName == "final_extra_sort_columns") { if (pass != 2) { continue; } - (optionName == "extra_sort_columns" ? hasExtraSortColumns : hasFinalExtraSortColumns) = true; + hasFinalExtraSortColumns = true; if (!EnsureTupleSize(*option, 2, ctx.Expr)) { return IGraphTransformer::TStatus::Error; @@ -3060,6 +3420,24 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN } } } + } else if (optionName == "final_extra_sort_keys") { + if (pass != 2) { + continue; + } + + if (!EnsureTupleSize(*option, 2, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + if (!EnsureTuple(option->Tail(), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + for (const auto& x : option->Tail().Children()) { + if (!EnsureAtom(*x, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + } } else { ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(option->Head().Pos()), TStringBuilder() << "Unsupported option: " << optionName)); @@ -3145,10 +3523,19 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN scanColumnsOnly = !scanColumnsOnly; if (scanColumnsOnly) { - input->SetTypeAnn(ctx.Expr.MakeType<TListExprType>(outputRowType)); - return IGraphTransformer::TStatus::Ok; + break; } } + + if ((hasAggregations || GetSetting(options, "having")) && !GetSetting(options, "group_by")) { + // add empty group by section + auto newSettings = AddSetting(options, {}, "group_by", ctx.Expr.NewList(input->Pos(), {}), ctx.Expr); + output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings)); + return IGraphTransformer::TStatus::Repeat; + } + + input->SetTypeAnn(ctx.Expr.MakeType<TListExprType>(outputRowType)); + return IGraphTransformer::TStatus::Ok; } IGraphTransformer::TStatus PgSelectWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx) { @@ -3338,12 +3725,13 @@ IGraphTransformer::TStatus PgSelectWrapper(const TExprNode::TPtr& input, TExprNo projectionInputs.push_back(TInput{ TString(), resultStructType, resultColumnOrder, TInput::Projection, {} }); TExprNode::TListType newSortTupleItems; - if (data.ChildrenSize() > 0 && data.Child(0)->Child(0)->IsCallable("Void")) { - // no effective types yet, scan lambda bodies - if (!ValidateSort(projectionInputs, projectionInputs, {}, data, ctx, newSortTupleItems, false)) { - return IGraphTransformer::TStatus::Error; - } + // no effective types yet, scan lambda bodies + bool hasNewSort = false; + if (!ValidateSort(projectionInputs, projectionInputs, {}, data, ctx, hasNewSort, newSortTupleItems, false)) { + return IGraphTransformer::TStatus::Error; + } + if (hasNewSort) { auto newSortTuple = ctx.Expr.NewList(data.Pos(), std::move(newSortTupleItems)); auto newSettings = ReplaceSetting(options, {}, "sort", newSortTuple, ctx.Expr); output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings)); @@ -3726,7 +4114,7 @@ IGraphTransformer::TStatus PgSubLinkWrapper(const TExprNode::TPtr& input, TExprN if (!input->Child(3)->IsCallable("Void")) { auto& lambda = input->ChildRef(3); - const auto status = ConvertToLambda(lambda, ctx.Expr, hasType ? 2 : 1); + const auto status = ConvertToLambda(lambda, ctx.Expr, (!input->Child(2)->IsCallable("Void") && hasType) ? 2 : 1); if (status.Level != IGraphTransformer::TStatus::Ok) { return status; } @@ -3801,41 +4189,38 @@ IGraphTransformer::TStatus PgSubLinkWrapper(const TExprNode::TPtr& input, TExprN } if (linkType == "all" || linkType == "any") { - if (input->Child(2)->IsCallable("Void")) { - ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), "Missing test row type")); - return IGraphTransformer::TStatus::Error; - } - if (input->Child(3)->IsCallable("Void")) { ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), "Missing test row expression")); return IGraphTransformer::TStatus::Error; } - auto& lambda = input->ChildRef(3); - const auto status = ConvertToLambda(lambda, ctx.Expr, hasType ? 2 : 1); - if (status.Level != IGraphTransformer::TStatus::Ok) { - return status; - } + if (!input->Child(2)->IsCallable("Void")) { + auto& lambda = input->ChildRef(3); + const auto status = ConvertToLambda(lambda, ctx.Expr, hasType ? 2 : 1); + if (status.Level != IGraphTransformer::TStatus::Ok) { + return status; + } - auto rowType = input->Child(2)->GetTypeAnn()->Cast<TTypeExprType>()->GetType(); - if (!UpdateLambdaAllArgumentsTypes(lambda, { rowType, valueType }, ctx.Expr)) { - return IGraphTransformer::TStatus::Error; - } + auto rowType = input->Child(2)->GetTypeAnn()->Cast<TTypeExprType>()->GetType(); + if (!UpdateLambdaAllArgumentsTypes(lambda, { rowType, valueType }, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } - if (!lambda->GetTypeAnn()) { - return IGraphTransformer::TStatus::Repeat; - } + if (!lambda->GetTypeAnn()) { + return IGraphTransformer::TStatus::Repeat; + } - ui32 testExprType; - bool convertToPg; - if (!ExtractPgType(lambda->GetTypeAnn(), testExprType, convertToPg, lambda->Pos(), ctx.Expr)) { - return IGraphTransformer::TStatus::Error; - } + ui32 testExprType; + bool convertToPg; + if (!ExtractPgType(lambda->GetTypeAnn(), testExprType, convertToPg, lambda->Pos(), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } - if (testExprType && testExprType != NPg::LookupType("bool").TypeId) { - ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), - TStringBuilder() << "Expected pg bool, but got " << NPg::LookupType(testExprType).Name)); - return IGraphTransformer::TStatus::Error; + if (testExprType && testExprType != NPg::LookupType("bool").TypeId) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), + TStringBuilder() << "Expected pg bool, but got " << NPg::LookupType(testExprType).Name)); + return IGraphTransformer::TStatus::Error; + } } } else { if (!input->Child(3)->IsCallable("Void")) { @@ -3854,5 +4239,27 @@ IGraphTransformer::TStatus PgSubLinkWrapper(const TExprNode::TPtr& input, TExprN return IGraphTransformer::TStatus::Ok; } +IGraphTransformer::TStatus PgGroupRefWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { + Y_UNUSED(output); + if (!EnsureArgsCount(*input, 3, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + if (!EnsureStructType(*input->Child(0), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + if (!EnsureType(*input->Child(1), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + if (!EnsureAtom(*input->Child(2), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + input->SetTypeAnn(input->Child(1)->GetTypeAnn()->Cast<TTypeExprType>()->GetType()); + return IGraphTransformer::TStatus::Ok; +} + } // namespace NTypeAnnImpl } diff --git a/ydb/library/yql/core/type_ann/type_ann_pg.h b/ydb/library/yql/core/type_ann/type_ann_pg.h index 5cb1a257563..03f401b94bb 100644 --- a/ydb/library/yql/core/type_ann/type_ann_pg.h +++ b/ydb/library/yql/core/type_ann/type_ann_pg.h @@ -43,6 +43,7 @@ IGraphTransformer::TStatus PgLikeWrapper(const TExprNode::TPtr& input, TExprNode IGraphTransformer::TStatus PgInWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus PgBetweenWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus PgSubLinkWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); +IGraphTransformer::TStatus PgGroupRefWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); } // namespace NTypeAnnImpl } // namespace NYql diff --git a/ydb/library/yql/core/yql_expr_csee.cpp b/ydb/library/yql/core/yql_expr_csee.cpp index b0e75255215..8b7b2afd4e1 100644 --- a/ydb/library/yql/core/yql_expr_csee.cpp +++ b/ydb/library/yql/core/yql_expr_csee.cpp @@ -5,7 +5,6 @@ #include <ydb/library/yql/utils/log/log.h> #include <util/generic/hash_set.h> -#include <util/digest/murmur.h> #include <util/system/env.h> #include <tuple> @@ -14,17 +13,6 @@ namespace NYql { namespace { static constexpr bool UseDeterminsticHash = false; - ui64 CseeHash(const void* data, size_t size, ui64 initHash) { - return MurmurHash<ui64>(data, size, initHash); - } - - template<typename T, std::enable_if_t<std::is_integral<T>::value>* = nullptr> - ui64 CseeHash(T value, ui64 initHash) { - // workaround Coverity warning for Murmur when sizeof(T) < 8 - ui64 val = static_cast<ui64>(value); - return MurmurHash<ui64>(&val, sizeof(val), initHash); - } - struct TLambdaFrame { TLambdaFrame(const TExprNode* lambda, const TLambdaFrame* prev) : Lambda(lambda) diff --git a/ydb/library/yql/core/yql_expr_csee.h b/ydb/library/yql/core/yql_expr_csee.h index 177041d998e..804b8f21ff8 100644 --- a/ydb/library/yql/core/yql_expr_csee.h +++ b/ydb/library/yql/core/yql_expr_csee.h @@ -3,6 +3,8 @@ #include "yql_graph_transformer.h" #include "yql_type_annotation.h" +#include <util/digest/murmur.h> + namespace NYql { IGraphTransformer::TStatus EliminateCommonSubExpressions(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx, @@ -13,4 +15,15 @@ IGraphTransformer::TStatus UpdateCompletness(const TExprNode::TPtr& input, TExpr // 0 may not mean equality of nodes because we cannot distinguish order of external arguments in some cases. int CompareNodes(const TExprNode& left, const TExprNode& right); +inline ui64 CseeHash(const void* data, size_t size, ui64 initHash) { + return MurmurHash<ui64>(data, size, initHash); +} + +template<typename T, std::enable_if_t<std::is_integral<T>::value>* = nullptr> +inline ui64 CseeHash(T value, ui64 initHash) { + // workaround Coverity warning for Murmur when sizeof(T) < 8 + ui64 val = static_cast<ui64>(value); + return MurmurHash<ui64>(&val, sizeof(val), initHash); +} + } diff --git a/ydb/library/yql/sql/pg/pg_sql.cpp b/ydb/library/yql/sql/pg/pg_sql.cpp index 271d5cd1324..027c4d19ecb 100644 --- a/ydb/library/yql/sql/pg/pg_sql.cpp +++ b/ydb/library/yql/sql/pg/pg_sql.cpp @@ -152,6 +152,7 @@ public: bool AllowAggregates = false; bool AllowOver = false; bool AllowReturnSet = false; + bool AllowSubLinks = false; TVector<TAstNode*>* WindowItems = nullptr; TString Scope; }; @@ -457,6 +458,7 @@ public: if (x->whereClause) { TExprSettings settings; settings.AllowColumns = true; + settings.AllowSubLinks = true; settings.Scope = "WHERE"; whereFilter = ParseExpr(x->whereClause, settings); if (!whereFilter) { @@ -469,17 +471,15 @@ public: TVector<TAstNode*> groupByItems; for (int i = 0; i < ListLength(x->groupClause); ++i) { auto node = ListNodeNth(x->groupClause, i); - if (NodeTag(node) != T_ColumnRef) { - NodeNotImplemented(x, node); - return nullptr; - } - - auto ref = ParseColumnRef(CAST_NODE(ColumnRef, node)); - if (!ref) { + TExprSettings settings; + settings.AllowColumns = true; + settings.Scope = "GROUP BY"; + auto expr = ParseExpr(node, settings); + if (!expr) { return nullptr; } - auto lambda = L(A("lambda"), QL(), ref); + auto lambda = L(A("lambda"), QL(), expr); groupByItems.push_back(L(A("PgGroup"), L(A("Void")), lambda)); } @@ -492,6 +492,7 @@ public: settings.AllowColumns = true; settings.Scope = "HAVING"; settings.AllowAggregates = true; + settings.AllowSubLinks = true; having = ParseExpr(x->havingClause, settings); if (!having) { return nullptr; @@ -567,6 +568,7 @@ public: settings.AllowColumns = true; settings.AllowAggregates = true; settings.AllowOver = true; + settings.AllowSubLinks = true; settings.WindowItems = &windowItems; settings.Scope = "SELECT"; auto x = ParseExpr(r->val, settings); @@ -710,6 +712,7 @@ public: if (value->limitCount) { TExprSettings settings; settings.AllowColumns = false; + settings.AllowSubLinks = true; settings.Scope = "LIMIT"; limit = ParseExpr(value->limitCount, settings); if (!limit) { @@ -720,6 +723,7 @@ public: if (value->limitOffset) { TExprSettings settings; settings.AllowColumns = false; + settings.AllowSubLinks = true; settings.Scope = "OFFSET"; offset = ParseExpr(value->limitOffset, settings); if (!offset) { @@ -1467,6 +1471,11 @@ public: } TAstNode* ParseSubLinkExpr(const SubLink* value, const TExprSettings& settings) { + if (!settings.AllowSubLinks) { + AddError(TStringBuilder() << "SubLinks are not allowed in: " << settings.Scope); + return nullptr; + } + TString linkType; TString operName; switch (value->subLinkType) { @@ -1840,17 +1849,15 @@ public: TVector<TAstNode*> groupByItems; for (int i = 0; i < ListLength(value->partitionClause); ++i) { auto node = ListNodeNth(value->partitionClause, i); - if (NodeTag(node) != T_ColumnRef) { - NodeNotImplemented(value, node); - return nullptr; - } - - auto ref = ParseColumnRef(CAST_NODE(ColumnRef, node)); - if (!ref) { + TExprSettings settings; + settings.AllowColumns = true; + settings.Scope = "PARTITITON BY"; + auto expr = ParseExpr(node, settings); + if (!expr) { return nullptr; } - auto lambda = L(A("lambda"), QL(), ref); + auto lambda = L(A("lambda"), QL(), expr); groupByItems.push_back(L(A("PgGroup"), L(A("Void")), lambda)); } @@ -2093,6 +2100,7 @@ public: TExprSettings settings; settings.AllowColumns = true; + settings.AllowSubLinks = true; settings.Scope = "ORDER BY"; settings.AllowAggregates = allowAggregates; auto expr = ParseExpr(value->node, settings); |