aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorvvvv <vvvv@ydb.tech>2022-08-29 15:36:53 +0300
committervvvv <vvvv@ydb.tech>2022-08-29 15:36:53 +0300
commite8bb58c9a3be843f6df45c9da9b59f5bbd97e79d (patch)
tree2433478fa5e404a81e1545e0c1f8190b2bc1f5a5
parente886456269bc4bfef5c03779311e1fbb7a922518 (diff)
downloadydb-e8bb58c9a3be843f6df45c9da9b59f5bbd97e79d.tar.gz
initial implementation of aggregation by expressions
-rw-r--r--ydb/library/yql/core/common_opt/yql_co_pgselect.cpp105
-rw-r--r--ydb/library/yql/core/common_opt/yql_co_pgselect.h2
-rw-r--r--ydb/library/yql/core/common_opt/yql_co_simple2.cpp3
-rw-r--r--ydb/library/yql/core/type_ann/type_ann_core.cpp1
-rw-r--r--ydb/library/yql/core/type_ann/type_ann_pg.cpp851
-rw-r--r--ydb/library/yql/core/type_ann/type_ann_pg.h1
-rw-r--r--ydb/library/yql/core/yql_expr_csee.cpp12
-rw-r--r--ydb/library/yql/core/yql_expr_csee.h13
-rw-r--r--ydb/library/yql/sql/pg/pg_sql.cpp40
9 files changed, 743 insertions, 285 deletions
diff --git a/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp b/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp
index 9415bdf58ee..a523537f962 100644
--- a/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp
+++ b/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp
@@ -1785,13 +1785,48 @@ TExprNode::TPtr BuildGroup(TPositionHandle pos, TExprNode::TPtr list,
}
}
- if (groupBy) {
- for (const auto& group : groupBy->Tail().Children()) {
+ if (groupBy->Tail().ChildrenSize()) {
+ auto arg = ctx.NewArgument(pos, "row");
+ auto arguments = ctx.NewArguments(pos, { arg });
+
+ TExprNode::TListType newColumns;
+ for (ui32 i = 0; i < groupBy->Tail().ChildrenSize(); ++i) {
+ const auto& group = groupBy->Tail().Child(i);
const auto& lambda = group->Tail();
- YQL_ENSURE(lambda.IsLambda());
- YQL_ENSURE(lambda.Tail().IsCallable("Member"));
- keysItems.push_back(lambda.Tail().TailPtr());
+ auto name = "_yql_agg_key_" + ToString(i);
+ keysItems.push_back(ctx.NewAtom(pos, name));
+ newColumns.push_back(ctx.Builder(pos)
+ .List()
+ .Atom(0, name)
+ .Apply(1, lambda)
+ .With(0, arg)
+ .Seal()
+ .Seal()
+ .Build());
}
+
+ auto newColumnsNode = ctx.NewCallable(pos, "AsStruct", std::move(newColumns));
+ auto root = ctx.Builder(pos)
+ .Callable("FlattenMembers")
+ .List(0)
+ .Atom(0, "")
+ .Add(1, arg)
+ .Seal()
+ .List(1)
+ .Atom(0, "")
+ .Add(1, newColumnsNode)
+ .Seal()
+ .Seal()
+ .Build();
+
+ auto keyExprsLambda = ctx.NewLambda(pos, std::move(arguments), std::move(root));
+
+ list = ctx.Builder(pos)
+ .Callable("OrderedMap")
+ .Add(0, list)
+ .Add(1, keyExprsLambda)
+ .Seal()
+ .Build();
}
auto keys = ctx.NewList(pos, std::move(keysItems));
@@ -2383,33 +2418,20 @@ TExprNode::TPtr AddExtColumns(const TExprNode::TPtr& lambda, const TExprNode::TP
.Build();
}
-void BuildExtraSortColumns(bool hasAggregation, const TExprNode::TPtr& groupBy, const TExprNode::TPtr& extraSortColumns, size_t aggIndexBegin, size_t aggIndexEnd, TVector<TString>& list) {
- if (extraSortColumns) {
- TVector<TString> extra;
+void BuildExtraSortColumns(const TExprNode::TPtr& groupBy,
+ const TExprNode::TPtr& extraSortColumns, const TExprNode::TPtr& extraSortKeys,
+ size_t aggIndexBegin, size_t aggIndexEnd, TVector<TString>& list) {
+ if (extraSortColumns && !groupBy) {
for (const auto& x : extraSortColumns->Tail().Children()) {
for (const auto& y : x->Children()) {
- extra.push_back(TString(y->Content()));
+ list.push_back(TString(y->Content()));
}
}
+ }
- if (!hasAggregation) {
- // all extra columns
- list.insert(list.end(), extra.begin(), extra.end());
- } else if (groupBy) {
- // keep only keys
- THashSet<TString> keys;
- for (const auto& group : groupBy->Tail().Children()) {
- const auto& lambda = group->Tail();
- YQL_ENSURE(lambda.IsLambda());
- YQL_ENSURE(lambda.Tail().IsCallable("Member"));
- keys.insert(TString(lambda.Tail().Tail().Content()));
- }
-
- for (const auto& x : extra) {
- if (keys.contains(x)) {
- list.push_back(x);
- }
- }
+ if (extraSortKeys) {
+ for (const auto& k : extraSortKeys->Tail().Children()) {
+ list.push_back(TString(k->Content()));
}
}
@@ -2418,7 +2440,8 @@ void BuildExtraSortColumns(bool hasAggregation, const TExprNode::TPtr& groupBy,
}
}
-TExprNode::TPtr AddExtraSortColumns(const TExprNode::TPtr& lambda, bool hasAggregation, const TExprNode::TPtr& groupBy, const TExprNode::TPtr& extraSortColumns,
+TExprNode::TPtr AddExtraSortColumns(const TExprNode::TPtr& lambda, const TExprNode::TPtr& groupBy,
+ const TExprNode::TPtr& extraSortColumns, const TExprNode::TPtr& extraSortKeys,
size_t aggIndexBegin, size_t aggIndexEnd, TExprContext& ctx) {
return ctx.Builder(lambda->Pos())
.Lambda()
@@ -2435,7 +2458,7 @@ TExprNode::TPtr AddExtraSortColumns(const TExprNode::TPtr& lambda, bool hasAggre
.Callable(1, "AsStruct")
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder & {
TVector<TString> list;
- BuildExtraSortColumns(hasAggregation, groupBy, extraSortColumns, aggIndexBegin, aggIndexEnd, list);
+ BuildExtraSortColumns(groupBy, extraSortColumns, extraSortKeys, aggIndexBegin, aggIndexEnd, list);
for (ui32 i = 0; i < list.size(); ++i) {
TStringBuf from = list[i];
from.SkipPrefix("_yql_extra_");
@@ -2457,7 +2480,8 @@ TExprNode::TPtr AddExtraSortColumns(const TExprNode::TPtr& lambda, bool hasAggre
.Build();
}
-TExprNode::TPtr RemoveExtraSortColumns(const TExprNode::TPtr& list, bool hasAggregation, const TExprNode::TPtr& groupBy, const TExprNode::TPtr& extraSortColumns,
+TExprNode::TPtr RemoveExtraSortColumns(const TExprNode::TPtr& list, const TExprNode::TPtr& groupBy,
+ const TExprNode::TPtr& extraSortColumns, const TExprNode::TPtr& extraSortKeys,
size_t aggIndexBegin, size_t aggIndexEnd, const TVector<TString>& sublinkColumns, TExprContext& ctx) {
return ctx.Builder(list->Pos())
.Callable("OrderedMap")
@@ -2469,7 +2493,7 @@ TExprNode::TPtr RemoveExtraSortColumns(const TExprNode::TPtr& list, bool hasAggr
.List(1)
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder & {
TVector<TString> list;
- BuildExtraSortColumns(hasAggregation, groupBy, extraSortColumns, aggIndexBegin, aggIndexEnd, list);
+ BuildExtraSortColumns(groupBy, extraSortColumns, extraSortKeys, aggIndexBegin, aggIndexEnd, list);
list.insert(list.end(), sublinkColumns.begin(), sublinkColumns.end());
for (ui32 i = 0; i < list.size(); ++i) {
parent.Atom(i, list[i]);
@@ -2786,6 +2810,7 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct
auto distinctOn = GetSetting(setItem->Tail(), "distinct_on");
auto sort = GetSetting(setItem->Tail(), "sort");
auto extraSortColumns = GetSetting(setItem->Tail(), "final_extra_sort_columns");
+ auto extraSortKeys = GetSetting(setItem->Tail(), "final_extra_sort_keys");
bool oneRow = !from;
TExprNode::TPtr list;
if (values) {
@@ -2859,7 +2884,7 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct
RewriteAggs(projectionLambda, aggId, ctx, optCtx, false);
}
- if (!aggs.empty() || groupBy) {
+ if (groupBy) {
list = BuildGroup(node->Pos(), list, aggs, groupBy, finalExtTypes, ctx);
}
@@ -2875,10 +2900,10 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct
projectionLambda = AddExtColumns(projectionLambda, finalExtTypes->TailPtr(), columnsItems, *subLinkId, ctx);
}
- bool hasExtraSortColumns = (extraSortColumns || (aggsSizeBeforeSort < aggs.size()));
+ bool hasExtraSortColumns = (extraSortColumns || extraSortKeys || (aggsSizeBeforeSort < aggs.size()));
if (hasExtraSortColumns) {
YQL_ENSURE(!distinctAll && !distinctOn);
- projectionLambda = AddExtraSortColumns(projectionLambda, (!aggs.empty() || groupBy), groupBy, extraSortColumns, aggsSizeBeforeSort, aggs.size(), ctx);
+ projectionLambda = AddExtraSortColumns(projectionLambda, groupBy, extraSortColumns, extraSortKeys, aggsSizeBeforeSort, aggs.size(), ctx);
}
list = ctx.Builder(node->Pos())
@@ -2910,7 +2935,7 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct
}
if (hasExtraSortColumns) {
- list = RemoveExtraSortColumns(list, (!aggs.empty() || groupBy), groupBy, extraSortColumns, aggsSizeBeforeSort, aggs.size(), sublinkColumns, ctx);
+ list = RemoveExtraSortColumns(list, groupBy, extraSortColumns, extraSortKeys, aggsSizeBeforeSort, aggs.size(), sublinkColumns, ctx);
}
}
@@ -3133,4 +3158,14 @@ TExprNode::TPtr ExpandPgBetween(const TExprNode::TPtr& node, TExprContext& ctx,
.Build();
}
+TExprNode::TPtr ExpandPgGroupRef(const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) {
+ Y_UNUSED(optCtx);
+ return ctx.Builder(node->Pos())
+ .Callable("Member")
+ .Add(0, node->HeadPtr())
+ .Atom(1, "_yql_agg_key_" + ToString(node->Tail().Content()))
+ .Seal()
+ .Build();
+}
+
} // namespace NYql
diff --git a/ydb/library/yql/core/common_opt/yql_co_pgselect.h b/ydb/library/yql/core/common_opt/yql_co_pgselect.h
index e3f2b30eed0..604660c9a85 100644
--- a/ydb/library/yql/core/common_opt/yql_co_pgselect.h
+++ b/ydb/library/yql/core/common_opt/yql_co_pgselect.h
@@ -21,4 +21,6 @@ TExprNode::TPtr ExpandPgIn(const TExprNode::TPtr& node, TExprContext& ctx, TOpti
TExprNode::TPtr ExpandPgBetween(const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx);
+TExprNode::TPtr ExpandPgGroupRef(const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx);
+
} // namespace NYql
diff --git a/ydb/library/yql/core/common_opt/yql_co_simple2.cpp b/ydb/library/yql/core/common_opt/yql_co_simple2.cpp
index dea0259f347..c933fa17883 100644
--- a/ydb/library/yql/core/common_opt/yql_co_simple2.cpp
+++ b/ydb/library/yql/core/common_opt/yql_co_simple2.cpp
@@ -1,4 +1,5 @@
#include "yql_co.h"
+#include "yql_co_pgselect.h"
#include <ydb/library/yql/core/yql_opt_utils.h>
#include <ydb/library/yql/core/yql_expr_csee.h>
@@ -498,6 +499,8 @@ void RegisterCoSimpleCallables2(TCallableOptimizerMap& map) {
return node;
};
+
+ map["PgGroupRef"] = ExpandPgGroupRef;
}
}
diff --git a/ydb/library/yql/core/type_ann/type_ann_core.cpp b/ydb/library/yql/core/type_ann/type_ann_core.cpp
index f56a0269910..d0a4f75bafb 100644
--- a/ydb/library/yql/core/type_ann/type_ann_core.cpp
+++ b/ydb/library/yql/core/type_ann/type_ann_core.cpp
@@ -11404,6 +11404,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
Functions["PgBetween"] = &PgBetweenWrapper;
Functions["PgBetweenSym"] = &PgBetweenWrapper;
Functions["PgSubLink"] = &PgSubLinkWrapper;
+ Functions["PgGroupRef"] = &PgGroupRefWrapper;
Functions["AutoDemuxList"] = &AutoDemuxListWrapper;
Functions["AggrCountInit"] = &AggrCountInitWrapper;
diff --git a/ydb/library/yql/core/type_ann/type_ann_pg.cpp b/ydb/library/yql/core/type_ann/type_ann_pg.cpp
index fdcea8b25af..b1879b6e300 100644
--- a/ydb/library/yql/core/type_ann/type_ann_pg.cpp
+++ b/ydb/library/yql/core/type_ann/type_ann_pg.cpp
@@ -7,6 +7,7 @@
#include <ydb/library/yql/core/yql_expr_optimize.h>
#include <ydb/library/yql/core/yql_opt_utils.h>
#include <ydb/library/yql/core/yql_pg_utils.h>
+#include <ydb/library/yql/core/yql_expr_csee.h>
#include <ydb/library/yql/parser/pg_catalog/catalog.h>
@@ -1388,7 +1389,7 @@ void ScanSublinks(TExprNode::TPtr root, TNodeSet& sublinks) {
bool ScanColumns(TExprNode::TPtr root, TInputs& inputs, const THashSet<TString>& possibleAliases,
bool* hasStar, bool& hasColumnRef, THashSet<TString>& refs, THashMap<TString, THashSet<TString>>* qualifiedRefs,
- TExtContext& ctx, bool scanColumnsOnly, THashMap<ui32, TSet<TString>>* nonProjectionColumns = nullptr) {
+ TExtContext& ctx, bool scanColumnsOnly) {
bool isError = false;
VisitExpr(root, [&](const TExprNode::TPtr& node) {
if (node->IsCallable("PgSubLink")) {
@@ -1462,10 +1463,6 @@ bool ScanColumns(TExprNode::TPtr root, TInputs& inputs, const THashSet<TString>&
if (x.Priority == TInput::External) {
x.UsedExternalColumns.insert(TString(item->GetName()));
}
-
- if (x.Priority != TInput::Projection && nonProjectionColumns) {
- (*nonProjectionColumns)[inputIndex].insert(MakeAliasedColumn(x.Alias, item->GetName()));
- }
}
}
@@ -1521,10 +1518,6 @@ bool ScanColumns(TExprNode::TPtr root, TInputs& inputs, const THashSet<TString>&
if (x.Priority == TInput::External) {
x.UsedExternalColumns.insert(TString(node->Tail().Content()));
}
-
- if (x.Priority != TInput::Projection && nonProjectionColumns) {
- (*nonProjectionColumns)[inputIndex].insert(MakeAliasedColumn(x.Alias, node->Tail().Content()));
- }
}
}
@@ -1558,10 +1551,11 @@ bool ScanColumns(TExprNode::TPtr root, TInputs& inputs, const THashSet<TString>&
return !isError;
}
-bool ScanColumnsForSublinks(bool& needRebuildSubLinks, const TNodeSet& sublinks,
+bool ScanColumnsForSublinks(bool& needRebuildSubLinks, bool& needRebuildTestExprs, const TNodeSet& sublinks,
TInputs& inputs, const THashSet<TString>& possibleAliases, bool& hasColumnRef, THashSet<TString>& refs,
- THashMap<TString, THashSet<TString>>* qualifiedRefs, TExtContext& ctx, bool scanColumnsOnly, THashMap<ui32, TSet<TString>>* nonProjectionColumns = nullptr) {
+ THashMap<TString, THashSet<TString>>* qualifiedRefs, TExtContext& ctx, bool scanColumnsOnly) {
needRebuildSubLinks = false;
+ needRebuildTestExprs = false;
for (const auto& s : sublinks) {
if (s->Child(1)->IsCallable("Void")) {
needRebuildSubLinks = true;
@@ -1570,9 +1564,13 @@ bool ScanColumnsForSublinks(bool& needRebuildSubLinks, const TNodeSet& sublinks,
const auto& testRowLambda = *s->Child(3);
if (!testRowLambda.IsCallable("Void")) {
YQL_ENSURE(testRowLambda.IsLambda());
- if (!ScanColumns(testRowLambda.TailPtr(), inputs, possibleAliases, nullptr, hasColumnRef,
- refs, qualifiedRefs, ctx, scanColumnsOnly, nonProjectionColumns)) {
- return false;
+ if (s->Child(2)->IsCallable("Void")) {
+ needRebuildTestExprs = true;
+
+ if (!ScanColumns(testRowLambda.TailPtr(), inputs, possibleAliases, nullptr, hasColumnRef,
+ refs, qualifiedRefs, ctx, scanColumnsOnly)) {
+ return false;
+ }
}
}
}
@@ -1898,7 +1896,7 @@ IGraphTransformer::TStatus RebuildSubLinks(const TExprNode::TPtr& root, TExprNod
auto inputTypes = ctx.Expr.NewList(root->Pos(), std::move(inputTypesItems));
return OptimizeExpr(root, newRoot, [&](const TExprNode::TPtr& node, TExprContext&) -> TExprNode::TPtr {
- if (!node->IsCallable("PgSubLink") || !node->Child(1)->IsCallable("Void")) {
+ if (!node->IsCallable("PgSubLink")) {
return node;
}
@@ -1907,22 +1905,31 @@ IGraphTransformer::TStatus RebuildSubLinks(const TExprNode::TPtr& root, TExprNod
}
auto children = node->ChildrenList();
- children[1] = inputTypes;
- if (!node->Child(3)->IsCallable("Void")) {
- // rebuild lambda for row test
- auto argNode = ctx.Expr.NewArgument(node->Pos(), "row");
- auto valueNode = ctx.Expr.NewArgument(node->Pos(), "value");
- auto arguments = ctx.Expr.NewArguments(node->Pos(), { argNode, valueNode });
- TExprNode::TPtr newLambdaRoot;
- auto status = RebuildLambdaColumns(node->Child(3)->TailPtr(), argNode, newLambdaRoot, inputs, nullptr, ctx);
- auto oldValueNode = node->Child(3)->Head().Child(0);
- newLambdaRoot = ctx.Expr.ReplaceNode(std::move(newLambdaRoot), *oldValueNode, valueNode);
- if (status == IGraphTransformer::TStatus::Error) {
- return nullptr;
- }
+ if (children[1]->IsCallable("Void")) {
+ children[1] = inputTypes;
+ } else {
+ if (!node->Child(3)->IsCallable("Void")) {
+ if (!children[2]->IsCallable("Void")) {
+ return node;
+ }
- children[2] = rowType;
- children[3] = ctx.Expr.NewLambda(node->Pos(), std::move(arguments), std::move(newLambdaRoot));
+ // rebuild lambda for row test
+ auto argNode = ctx.Expr.NewArgument(node->Pos(), "row");
+ auto valueNode = ctx.Expr.NewArgument(node->Pos(), "value");
+ auto arguments = ctx.Expr.NewArguments(node->Pos(), { argNode, valueNode });
+ TExprNode::TPtr newLambdaRoot;
+ auto status = RebuildLambdaColumns(node->Child(3)->TailPtr(), argNode, newLambdaRoot, inputs, nullptr, ctx);
+ auto oldValueNode = node->Child(3)->Head().Child(0);
+ newLambdaRoot = ctx.Expr.ReplaceNode(std::move(newLambdaRoot), *oldValueNode, valueNode);
+ if (status == IGraphTransformer::TStatus::Error) {
+ return nullptr;
+ }
+
+ children[2] = rowType;
+ children[3] = ctx.Expr.NewLambda(node->Pos(), std::move(arguments), std::move(newLambdaRoot));
+ } else {
+ return node;
+ }
}
return ctx.Expr.NewCallable(node->Pos(), node->Content(), std::move(children));
@@ -1952,11 +1959,38 @@ void MakeOptionalColumns(const TStructExprType*& structType, TExprContext& ctx)
structType = ctx.MakeType<TStructExprType>(newItems);
}
+void ScanAggregations(const TExprNode::TPtr& root, bool& hasAggregations) {
+ VisitExpr(root, [&](const TExprNode::TPtr& node) {
+ if (node->IsCallable("PgAgg")) {
+ hasAggregations = true;
+ return false;
+ }
+
+ if (node->IsCallable("PgSubLink")) {
+ return false;
+ }
+
+ return true;
+ });
+
+ TNodeSet sublinks;
+ ScanSublinks(root, sublinks);
+ for (const auto& s : sublinks) {
+ const auto& testRowLambda = *s->Child(3);
+ if (!testRowLambda.IsCallable("Void")) {
+ YQL_ENSURE(testRowLambda.IsLambda());
+ ScanAggregations(testRowLambda.TailPtr(), hasAggregations);
+ }
+ }
+}
+
bool ValidateGroups(TInputs& inputs, const THashSet<TString>& possibleAliases,
- const TExprNode& data, TExtContext& ctx, TExprNode::TListType& newGroups, bool scanColumnsOnly) {
+ const TExprNode& data, TExtContext& ctx, TExprNode::TListType& newGroups, bool& hasNewGroups, bool scanColumnsOnly) {
newGroups.clear();
+ hasNewGroups = false;
bool hasColumnRef = false;
- for (const auto& group : data.Children()) {
+ for (ui32 index = 0; index < data.ChildrenSize(); ++index) {
+ const auto& group = data.Child(index);
if (!group->IsCallable("PgGroup")) {
ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(group->Pos()), "Expected PgGroup"));
return false;
@@ -1976,6 +2010,13 @@ bool ValidateGroups(TInputs& inputs, const THashSet<TString>& possibleAliases,
continue;
}
+ bool hasNestedAggregations = false;
+ ScanAggregations(group->Tail().TailPtr(), hasNestedAggregations);
+ if (hasNestedAggregations) {
+ ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(group->Pos()), "Nested aggregations aren't allowed"));
+ return false;
+ }
+
TVector<const TItemExprType*> items;
AddColumns(inputs, nullptr, refs, &qualifiedRefs, items, ctx.Expr);
auto effectiveType = ctx.Expr.MakeType<TStructExprType>(items);
@@ -2000,30 +2041,15 @@ bool ValidateGroups(TInputs& inputs, const THashSet<TString>& possibleAliases,
newChildren[1] = newLambda;
auto newGroup = ctx.Expr.NewCallable(group->Pos(), "PgGroup", std::move(newChildren));
newGroups.push_back(newGroup);
+ hasNewGroups = true;
+ } else {
+ newGroups.push_back(data.ChildPtr(index));
}
}
return true;
}
-THashMap<ui32, TSet<TString>> LoadExtraColumns(TExprNode::TPtr& value, ui32 inputsCount) {
- THashMap<ui32, TSet<TString>> ret;
- YQL_ENSURE(value->ChildrenSize() == inputsCount);
- for (ui32 inputIndex = 0; inputIndex < inputsCount; ++inputIndex) {
- auto child = value->Child(inputIndex);
- if (child->ChildrenSize() == 0) {
- continue;
- }
-
- auto& set = ret[inputIndex];
- for (const auto& x : child->Children()) {
- set.insert(TString(x->Content()));
- }
- }
-
- return ret;
-}
-
TExprNode::TPtr SaveExtraColumns(TPositionHandle pos, const THashMap<ui32, TSet<TString>>& columns, ui32 inputsCount, TExprContext& ctx) {
TExprNode::TListType groups;
for (ui32 i = 0; i < inputsCount; ++i) {
@@ -2042,30 +2068,6 @@ TExprNode::TPtr SaveExtraColumns(TPositionHandle pos, const THashMap<ui32, TSet<
return ctx.NewList(pos, std::move(groups));
}
-bool AddExtraColumnsForSublinks(TExprNode::TPtr& value, const TExprNode& data, ui32 inputsCount, TExprContext& ctx) {
- THashMap<ui32, TSet<TString>> columns = LoadExtraColumns(value, inputsCount);
- for (auto oneSort : data.Children()) {
- TNodeSet sublinks;
- ScanSublinks(oneSort->Child(1)->TailPtr(), sublinks);
- for (const auto& s : sublinks) {
- auto c = ExtractExternalColumns(s->Tail());
- for (const auto& [name, index] : c) {
- YQL_ENSURE(index < inputsCount);
- columns[index].insert("_yql_extra_" + name);
- }
- }
- }
-
- value = SaveExtraColumns(value->Pos(), columns, inputsCount, ctx);
- for (const auto&[index, set] : columns) {
- if (!set.empty()) {
- return true;
- }
- }
-
- return false;
-}
-
TMap<TString, ui32> ExtractExternalColumns(const TExprNode& select) {
TMap<TString, ui32> res;
const auto& option = select.Head();
@@ -2090,24 +2092,25 @@ TMap<TString, ui32> ExtractExternalColumns(const TExprNode& select) {
}
bool ValidateSort(TInputs& inputs, TInputs& subLinkInputs, const THashSet<TString>& possibleAliases,
- const TExprNode& data, TExtContext& ctx, TExprNode::TListType& newSorts, bool scanColumnsOnly,
- THashMap<ui32, TSet<TString>>* nonProjectionColumns = nullptr) {
+ const TExprNode& data, TExtContext& ctx, bool& hasNewSort, TExprNode::TListType& newSorts, bool scanColumnsOnly) {
newSorts.clear();
- for (auto oneSort : data.Children()) {
+ for (ui32 index = 0; index < data.ChildrenSize(); ++index) {
+ auto oneSort = data.Child(index);
TNodeSet sublinks;
ScanSublinks(oneSort->Child(1)->TailPtr(), sublinks);
- bool hasColumnRef;
+ bool hasColumnRef = false;
THashSet<TString> refs;
THashMap<TString, THashSet<TString>> qualifiedRefs;
if (!ScanColumns(oneSort->Child(1)->TailPtr(), inputs, possibleAliases, nullptr, hasColumnRef,
- refs, &qualifiedRefs, ctx, scanColumnsOnly, nonProjectionColumns)) {
+ refs, &qualifiedRefs, ctx, scanColumnsOnly)) {
return false;
}
bool needRebuildSubLinks;
- if (!ScanColumnsForSublinks(needRebuildSubLinks, sublinks, subLinkInputs, possibleAliases,
- hasColumnRef, refs, &qualifiedRefs, ctx, scanColumnsOnly, nonProjectionColumns)) {
+ bool needRebuildTestExprs;
+ if (!ScanColumnsForSublinks(needRebuildSubLinks, needRebuildTestExprs, sublinks, subLinkInputs, possibleAliases,
+ hasColumnRef, refs, &qualifiedRefs, ctx, scanColumnsOnly)) {
return IGraphTransformer::TStatus::Error;
}
@@ -2124,40 +2127,319 @@ bool ValidateSort(TInputs& inputs, TInputs& subLinkInputs, const THashSet<TStrin
auto typeNode = ExpandType(oneSort->Pos(), *effectiveType, ctx.Expr);
+ auto newLambda = oneSort->ChildPtr(1);
auto newChildren = oneSort->ChildrenList();
- if (needRebuildSubLinks) {
+ bool hasChanges = false;
+ if (needRebuildSubLinks || needRebuildTestExprs) {
auto arguments = ctx.Expr.NewArguments(oneSort->Pos(), { });
TExprNode::TPtr newRoot;
- auto status = RebuildSubLinks(oneSort->Child(1)->TailPtr(), newRoot, sublinks, subLinkInputs, typeNode, ctx);
+ auto status = RebuildSubLinks(newLambda->TailPtr(), newRoot, sublinks, subLinkInputs, typeNode, ctx);
if (status == IGraphTransformer::TStatus::Error) {
return false;
}
- auto newLambda = ctx.Expr.NewLambda(oneSort->Pos(), std::move(arguments), std::move(newRoot));
+ newLambda = ctx.Expr.NewLambda(oneSort->Pos(), std::move(arguments), std::move(newRoot));
newChildren[1] = newLambda;
- } else if (oneSort->Child(1)->Head().ChildrenSize() == 0) {
+ hasChanges = true;
+ }
+
+ if (!needRebuildSubLinks && newLambda->Head().ChildrenSize() == 0) {
auto argNode = ctx.Expr.NewArgument(oneSort->Pos(), "row");
auto arguments = ctx.Expr.NewArguments(oneSort->Pos(), { argNode });
TExprNode::TPtr newRoot;
- auto status = RebuildLambdaColumns(oneSort->Child(1)->TailPtr(), argNode, newRoot, inputs, nullptr, ctx);
+ auto status = RebuildLambdaColumns(newLambda->TailPtr(), argNode, newRoot, inputs, nullptr, ctx);
if (status == IGraphTransformer::TStatus::Error) {
return false;
}
- auto newLambda = ctx.Expr.NewLambda(oneSort->Pos(), std::move(arguments), std::move(newRoot));
-
+ newLambda = ctx.Expr.NewLambda(oneSort->Pos(), std::move(arguments), std::move(newRoot));
newChildren[0] = typeNode;
newChildren[1] = newLambda;
+ hasChanges = true;
}
- auto newSort = ctx.Expr.ChangeChildren(*oneSort, std::move(newChildren));
- newSorts.push_back(newSort);
+ if (hasChanges) {
+ auto newSort = ctx.Expr.ChangeChildren(*oneSort, std::move(newChildren));
+ newSorts.push_back(newSort);
+ hasNewSort = true;
+ } else {
+ newSorts.push_back(data.ChildPtr(index));
+ }
}
return true;
}
+ui64 CalculateExprHash(const TExprNode& root, TNodeMap<ui64>& visited) {
+ auto it = visited.find(&root);
+ if (it != visited.end()) {
+ return it->second;
+ }
+
+ ui64 hash = 0;
+ switch (root.Type()) {
+ case TExprNode::EType::Callable:
+ hash = CseeHash(root.Content().Size(), hash);
+ hash = CseeHash(root.Content().Data(), root.Content().Size(), hash);
+ [[fallthrough]];
+ case TExprNode::EType::List:
+ hash = CseeHash(root.ChildrenSize(), hash);
+ for (ui32 i = 0; i < root.ChildrenSize(); ++i) {
+ hash = CalculateExprHash(*root.Child(i), visited);
+ }
+
+ break;
+ case TExprNode::EType::Atom:
+ hash = CseeHash(root.Content().Size(), hash);
+ hash = CseeHash(root.Content().Data(), root.Content().Size(), hash);
+ hash = CseeHash(root.GetFlagsToCompare(), hash);
+ break;
+ default:
+ YQL_ENSURE(false, "Unexpected node type");
+ }
+
+ visited.emplace(&root, hash);
+ return hash;
+}
+
+bool ExprNodesEquals(const TExprNode& left, const TExprNode& right, TNodeSet& visited) {
+ if (!visited.emplace(&left).second) {
+ return true;
+ }
+
+ if (left.Type() != right.Type()) {
+ return false;
+ }
+
+ switch (left.Type()) {
+ case TExprNode::EType::Callable:
+ if (left.Content() != right.Content()) {
+ return false;
+ }
+
+ [[fallthrough]];
+ case TExprNode::EType::List:
+ if (left.ChildrenSize() != right.ChildrenSize()) {
+ return false;
+ }
+
+ for (ui32 i = 0; i < left.ChildrenSize(); ++i) {
+ if (!ExprNodesEquals(*left.Child(i), *right.Child(i), visited)) {
+ return false;
+ }
+ }
+
+ return true;
+ case TExprNode::EType::Atom:
+ return left.Content() == right.Content() && left.GetFlagsToCompare() == right.GetFlagsToCompare();
+ case TExprNode::EType::Argument:
+ return left.GetArgIndex() == right.GetArgIndex();
+ default:
+ YQL_ENSURE(false, "Unexpected node type");
+ }
+}
+
+struct TGroupExpr {
+ TExprNode::TPtr OriginalRoot;
+ ui64 Hash;
+ TExprNode::TPtr TypeNode;
+};
+
+bool ScanExprForMatchedGroup(const TExprNode::TPtr& row, const TExprNode& root, const TVector<TGroupExpr>& exprs,
+ TNodeOnNodeOwnedMap& replaces, TNodeMap<ui64>& hashVisited, TNodeMap<bool>& nodeVisited, TExprContext& ctx) {
+ auto it = nodeVisited.find(&root);
+ if (it != nodeVisited.end()) {
+ return it->second;
+ }
+
+ if (root.IsCallable("PgSubLink")) {
+ const auto& testRowLambda = *root.Child(3);
+ if (!testRowLambda.IsCallable("Void")) {
+ hashVisited[testRowLambda.Head().Child(0)] = 0; // original row
+ hashVisited[testRowLambda.Head().Child(1)] = 1; // sublink value
+ ScanExprForMatchedGroup(testRowLambda.Head().ChildPtr(0), testRowLambda.Tail(),
+ exprs, replaces, hashVisited, nodeVisited, ctx);
+ }
+
+ nodeVisited[&root] = false;
+ return false;
+ }
+
+ bool hasChanges = false;
+ for (const auto& child : root.Children()) {
+ if (!ScanExprForMatchedGroup(row, *child, exprs, replaces, hashVisited, nodeVisited, ctx)) {
+ hasChanges = true;
+ }
+ }
+
+ if (hasChanges) {
+ nodeVisited[&root] = false;
+ return false;
+ }
+
+ ui64 hash = CalculateExprHash(root, hashVisited);
+ for (ui32 i = 0; i < exprs.size(); ++i) {
+ if (exprs[i].Hash != hash) {
+ continue;
+ }
+
+ TNodeSet equalsVisited;
+ if (!ExprNodesEquals(*exprs[i].OriginalRoot, root, equalsVisited)) {
+ continue;
+ }
+
+ replaces[&root] = ctx.Builder(root.Pos())
+ .Callable("PgGroupRef")
+ .Add(0, row)
+ .Add(1, exprs[i].TypeNode)
+ .Atom(2, ToString(i))
+ .Seal()
+ .Build();
+ nodeVisited[&root] = false;
+ return false;
+ }
+
+ nodeVisited[&root] = true;
+ return true;
+}
+
+TExprNode::TPtr ReplaceGroupByExpr(const TExprNode::TPtr& root, const TExprNode& groups, TExprContext& ctx) {
+ if (!groups.ChildrenSize()) {
+ return root;
+ }
+
+ // calculate hashes
+ TVector<TGroupExpr> exprs;
+ TExprNode::TListType typeNodes;
+ for (ui32 index = 0; index < groups.ChildrenSize(); ++index) {
+ const auto& g = *groups.Child(index);
+ const auto& lambda = g.Tail();
+ TNodeMap<ui64> visited;
+ visited[&lambda.Head().Head()] = 0;
+ exprs.push_back({
+ lambda.TailPtr(),
+ CalculateExprHash(lambda.Tail(), visited),
+ ExpandType(g.Pos(), *lambda.GetTypeAnn(), ctx)
+ });
+ }
+
+ TNodeOnNodeOwnedMap replaces;
+ TNodeMap<ui64> hashVisited;
+ TNodeMap<bool> nodeVisited;
+ hashVisited[&root->Head().Head()] = 0;
+ ScanExprForMatchedGroup(root->Head().HeadPtr(), root->Tail(), exprs, replaces, hashVisited, nodeVisited, ctx);
+ auto ret = root;
+ if (replaces.empty()) {
+ return ret;
+ }
+
+ TOptimizeExprSettings settings(nullptr);
+ settings.VisitTuples = true;
+ auto status = RemapExpr(ret, ret, replaces, ctx, settings);
+ YQL_ENSURE(status != IGraphTransformer::TStatus::Error);
+ return ret;
+}
+
+bool GatherExtraSortColumns(const TExprNode& data, const TInputs& inputs, TExprNode::TPtr& extraInputColumns, TExprNode::TPtr& extraKeys, TExprContext& ctx) {
+ ui32 inputsCount = inputs.size() - 1;
+ THashMap<ui32, TSet<TString>> columns;
+ TSet<TString> keys;
+ extraKeys = nullptr;
+ for (auto oneSort : data.Children()) {
+ TNodeSet sublinks;
+ ScanSublinks(oneSort->Child(1)->TailPtr(), sublinks);
+
+ auto scanLambda = [&](const auto& lambda) {
+ auto arg = &lambda.Head().Head();
+ VisitExpr(lambda.TailPtr(), [&](const TExprNode::TPtr& node) {
+ if (node->IsCallable("PgSubLink")) {
+ return false;
+ }
+
+ if (node->IsCallable("PgGroupRef")) {
+ keys.insert("_yql_agg_key_" + ToString(node->Tail().Content()));
+ }
+
+ if (node->IsCallable("Member") && &node->Head() == arg) {
+ TStringBuf alias;
+ TStringBuf column = NTypeAnnImpl::RemoveAlias(node->Tail().Content(), alias);
+
+ TMaybe<ui32> index;
+ for (ui32 priority : {TInput::Projection, TInput::Current, TInput::External}) {
+ for (ui32 inputIndex = 0; inputIndex < inputs.size(); ++inputIndex) {
+ auto& x = inputs[inputIndex];
+ if (priority != x.Priority) {
+ continue;
+ }
+
+ if (!alias.empty() && (x.Alias.empty() || alias != x.Alias)) {
+ continue;
+ }
+
+ auto pos = x.Type->FindItem(column);
+ if (pos) {
+ index = inputIndex;
+ break;
+ }
+ }
+
+ if (index) {
+ break;
+ }
+ }
+
+ YQL_ENSURE(index);
+ if (inputs[*index].Priority != TInput::Projection) {
+ columns[*index].insert(TString(node->Tail().Content()));
+ }
+
+ return false;
+ }
+
+ return true;
+ });
+ };
+
+ scanLambda(*oneSort->Child(1));
+ for (const auto& s : sublinks) {
+ auto c = ExtractExternalColumns(s->Tail());
+ for (const auto&[name, index] : c) {
+ YQL_ENSURE(index < inputsCount);
+ columns[index].insert("_yql_extra_" + name);
+ }
+
+ if (!s->Child(3)->IsCallable("Void")) {
+ scanLambda(*s->Child(3));
+ }
+ }
+ }
+
+ extraInputColumns = SaveExtraColumns(data.Pos(), columns, inputsCount, ctx);
+ if (!keys.empty()) {
+ extraKeys = ctx.Builder(data.Pos())
+ .List()
+ .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder & {
+ ui32 i = 0;
+ for (const auto& k : keys) {
+ parent.Atom(i++, k);
+ }
+
+ return parent;
+ })
+ .Seal()
+ .Build();
+ }
+
+ for (const auto&[index, set] : columns) {
+ if (!set.empty()) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx) {
if (!EnsureArgsCount(*input, 1, ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
@@ -2169,8 +2451,10 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
}
bool scanColumnsOnly = true;
+ const TStructExprType* outputRowType;
+ bool hasAggregations = false;
for (;;) {
- const TStructExprType* outputRowType = nullptr;
+ outputRowType = nullptr;
TInputs inputs;
TInputs joinInputs;
THashSet<TString> possibleAliases;
@@ -2180,14 +2464,14 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
bool hasExtTypes = false;
bool hasDistinctAll = false;
bool hasDistinctOn = false;
- bool hasExtraSortColumns = false;
bool hasFinalExtraSortColumns = false;
+ TExprNode::TPtr groupBy;
// pass 0 - from/values
// pass 1 - join
// pass 2 - ext_types/final_ext_types, extra_sort_solumns
// pass 3 - where, group_by
- // pass 4 - window
+ // pass 4 - having, window
// pass 5 - result
// pass 6 - distinct_all, distinct_on
// pass 7 - sort
@@ -2292,16 +2576,21 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
return IGraphTransformer::TStatus::Error;
}
- TVector<const TItemExprType*> outputItems;
- TExprNode::TListType newResult;
- bool hasStar = false;
- bool hasColumnRef = false;
- for (const auto& column : data.Children()) {
+ for (ui32 index = 0; index < data.ChildrenSize(); ++index) {
+ const auto& column = data.Child(index);
if (!column->IsCallable("PgResultItem")) {
ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(column->Pos()), "Expected PgResultItem"));
return IGraphTransformer::TStatus::Error;
}
+ }
+ TVector<const TItemExprType*> outputItems;
+ TExprNode::TListType newResult;
+ bool hasNewResult = false;
+ bool hasStar = false;
+ bool hasColumnRef = false;
+ for (ui32 index = 0; index < data.ChildrenSize(); ++index) {
+ const auto& column = data.Child(index);
YQL_ENSURE(column->Tail().IsLambda());
THashSet<TString> refs;
THashMap<TString, THashSet<TString>> qualifiedRefs;
@@ -2316,7 +2605,8 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
}
bool needRebuildSubLinks;
- if (!ScanColumnsForSublinks(needRebuildSubLinks, sublinks, joinInputs, possibleAliases,
+ bool needRebuildTestExprs;
+ if (!ScanColumnsForSublinks(needRebuildSubLinks, needRebuildTestExprs, sublinks, joinInputs, possibleAliases,
hasColumnRef, refs, &qualifiedRefs, ctx, scanColumnsOnly)) {
return IGraphTransformer::TStatus::Error;
}
@@ -2332,36 +2622,47 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
auto typeNode = ExpandType(column->Pos(), *effectiveType, ctx.Expr);
auto newColumnChildren = column->ChildrenList();
- if (needRebuildSubLinks) {
+ auto newLambda = column->TailPtr();
+ bool hasChanges = false;
+ if (needRebuildSubLinks || needRebuildTestExprs) {
auto arguments = ctx.Expr.NewArguments(column->Pos(), { });
TExprNode::TPtr newRoot;
- auto status = RebuildSubLinks(column->Tail().TailPtr(), newRoot, sublinks, joinInputs, typeNode, ctx);
+ auto status = RebuildSubLinks(newLambda->TailPtr(), newRoot, sublinks, joinInputs, typeNode, ctx);
if (status == IGraphTransformer::TStatus::Error) {
return IGraphTransformer::TStatus::Error;
}
- auto newLambda = ctx.Expr.NewLambda(column->Pos(), std::move(arguments), std::move(newRoot));
+ newLambda = ctx.Expr.NewLambda(column->Pos(), std::move(arguments), std::move(newRoot));
newColumnChildren[2] = newLambda;
- } else if (column->Tail().Head().ChildrenSize() == 0) {
+ hasChanges = true;
+ }
+
+ if (!needRebuildSubLinks && newLambda->Head().ChildrenSize() == 0) {
auto argNode = ctx.Expr.NewArgument(column->Pos(), "row");
auto arguments = ctx.Expr.NewArguments(column->Pos(), { argNode });
auto expandedColumns = column->HeadPtr();
TExprNode::TPtr newRoot;
- auto status = RebuildLambdaColumns(column->Tail().TailPtr(), argNode, newRoot, joinInputs, &expandedColumns, ctx);
+ auto status = RebuildLambdaColumns(newLambda->TailPtr(), argNode, newRoot, joinInputs, &expandedColumns, ctx);
if (status == IGraphTransformer::TStatus::Error) {
return IGraphTransformer::TStatus::Error;
}
- auto newLambda = ctx.Expr.NewLambda(column->Pos(), std::move(arguments), std::move(newRoot));
+ newLambda = ctx.Expr.NewLambda(column->Pos(), std::move(arguments), std::move(newRoot));
newColumnChildren[0] = expandedColumns;
newColumnChildren[1] = typeNode;
newColumnChildren[2] = newLambda;
+ hasChanges = true;
}
- auto newColumn = ctx.Expr.NewCallable(column->Pos(), "PgResultItem", std::move(newColumnChildren));
- newResult.push_back(newColumn);
+ if (hasChanges) {
+ auto newColumn = ctx.Expr.NewCallable(column->Pos(), "PgResultItem", std::move(newColumnChildren));
+ newResult.push_back(newColumn);
+ hasNewResult = true;
+ } else {
+ newResult.push_back(data.ChildPtr(index));
+ }
}
}
else {
@@ -2379,11 +2680,13 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
if (!ValidateWindowRefs(column->TailPtr(), windows ? &windows->Tail() : nullptr, ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}
+
+ newResult.push_back(data.ChildPtr(index));
}
}
if (!scanColumnsOnly) {
- if (!newResult.empty()) {
+ if (hasNewResult) {
auto resultValue = ctx.Expr.NewList(options.Pos(), std::move(newResult));
auto newSettings = ReplaceSetting(options, {}, "result", resultValue, ctx.Expr);
output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings));
@@ -2394,6 +2697,33 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
if (!outputRowType->Validate(data.Pos(), ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}
+
+ for (const auto& column : data.Children()) {
+ ScanAggregations(column->TailPtr(), hasAggregations);
+ }
+
+ if (groupBy) {
+ TExprNode::TListType newResultItems;
+ bool hasChanges = false;
+ for (ui32 index = 0; index < data.ChildrenSize(); ++index) {
+ const auto& column = *data.Child(index);
+ auto ret = ReplaceGroupByExpr(column.TailPtr(), groupBy->Tail(), ctx.Expr);
+ if (ret != column.TailPtr()) {
+ hasChanges = true;
+ newResultItems.push_back(ctx.Expr.ChangeChild(column, 2, std::move(ret)));
+ }
+ else {
+ newResultItems.push_back(data.ChildPtr(index));
+ }
+ }
+
+ if (hasChanges) {
+ auto newResult = ctx.Expr.NewList(input->Pos(), std::move(newResultItems));
+ auto newSettings = ReplaceSetting(options, {}, "result", newResult, ctx.Expr);
+ output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings));
+ return IGraphTransformer::TStatus::Repeat;
+ }
+ }
}
}
else if (optionName == "from") {
@@ -2523,7 +2853,11 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
}
}
else if (optionName == "where" || optionName == "having") {
- if (pass != 3) {
+ if (optionName == "where" && pass != 3) {
+ continue;
+ }
+
+ if (optionName == "having" && pass != 4) {
continue;
}
@@ -2542,7 +2876,7 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
TNodeSet sublinks;
ScanSublinks(data.Child(1)->TailPtr(), sublinks);
- bool hasColumnRef;
+ bool hasColumnRef = false;
THashSet<TString> refs;
THashMap<TString, THashSet<TString>> qualifiedRefs;
if (!ScanColumns(data.Child(1)->TailPtr(), joinInputs, possibleAliases, nullptr, hasColumnRef,
@@ -2551,7 +2885,8 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
}
bool needRebuildSubLinks;
- if (!ScanColumnsForSublinks(needRebuildSubLinks, sublinks, joinInputs, possibleAliases, hasColumnRef,
+ bool needRebuildTestExprs;
+ if (!ScanColumnsForSublinks(needRebuildSubLinks, needRebuildTestExprs, sublinks, joinInputs, possibleAliases, hasColumnRef,
refs, &qualifiedRefs, ctx, scanColumnsOnly)) {
return IGraphTransformer::TStatus::Error;
}
@@ -2565,37 +2900,40 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
}
auto typeNode = ExpandType(data.Pos(), *effectiveType, ctx.Expr);
+ auto newLambda = data.ChildPtr(1);
+ bool hasChanges = false;
+ auto newChildren = data.ChildrenList();
- if (needRebuildSubLinks) {
+ if (needRebuildSubLinks || needRebuildTestExprs) {
auto arguments = ctx.Expr.NewArguments(data.Pos(), {});
TExprNode::TPtr newRoot;
- auto status = RebuildSubLinks(data.Child(1)->TailPtr(), newRoot, sublinks, joinInputs, typeNode, ctx);
+ auto status = RebuildSubLinks(newLambda->TailPtr(), newRoot, sublinks, joinInputs, typeNode, ctx);
if (status == IGraphTransformer::TStatus::Error) {
return IGraphTransformer::TStatus::Error;
}
- auto newLambda = ctx.Expr.NewLambda(data.Pos(), std::move(arguments), std::move(newRoot));
-
- auto newChildren = data.ChildrenList();
+ newLambda = ctx.Expr.NewLambda(data.Pos(), std::move(arguments), std::move(newRoot));
newChildren[1] = newLambda;
- auto newWhere = ctx.Expr.NewCallable(data.Pos(), "PgWhere", std::move(newChildren));
- auto newSettings = ReplaceSetting(options, {}, TString(optionName), newWhere, ctx.Expr);
- output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings));
- return IGraphTransformer::TStatus::Repeat;
- } else if (data.Child(1)->Head().ChildrenSize() == 0) {
+ hasChanges = true;
+ }
+
+ if (!needRebuildSubLinks && newLambda->Head().ChildrenSize() == 0) {
auto argNode = ctx.Expr.NewArgument(data.Pos(), "row");
auto arguments = ctx.Expr.NewArguments(data.Pos(), { argNode });
TExprNode::TPtr newRoot;
- auto status = RebuildLambdaColumns(data.Child(1)->TailPtr(), argNode, newRoot, joinInputs, nullptr, ctx);
+ auto status = RebuildLambdaColumns(newLambda->TailPtr(), argNode, newRoot, joinInputs, nullptr, ctx);
if (status == IGraphTransformer::TStatus::Error) {
return IGraphTransformer::TStatus::Error;
}
- auto newLambda = ctx.Expr.NewLambda(data.Pos(), std::move(arguments), std::move(newRoot));
+ newLambda = ctx.Expr.NewLambda(data.Pos(), std::move(arguments), std::move(newRoot));
- auto newChildren = data.ChildrenList();
newChildren[0] = typeNode;
newChildren[1] = newLambda;
+ hasChanges = true;
+ }
+
+ if (hasChanges) {
auto newWhere = ctx.Expr.NewCallable(data.Pos(), "PgWhere", std::move(newChildren));
auto newSettings = ReplaceSetting(options, {}, TString(optionName), newWhere, ctx.Expr);
output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings));
@@ -2621,6 +2959,15 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
return IGraphTransformer::TStatus::Error;
}
}
+
+ if (!scanColumnsOnly && optionName == "having" && groupBy) {
+ auto ret = ReplaceGroupByExpr(data.TailPtr(), groupBy->Tail(), ctx.Expr);
+ if (ret != data.TailPtr()) {
+ auto newSettings = ReplaceSetting(options, {}, "having", ctx.Expr.ChangeChild(data, 1, std::move(ret)), ctx.Expr);
+ output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings));
+ return IGraphTransformer::TStatus::Repeat;
+ }
+ }
}
else if (optionName == "join_ops") {
if (pass != 1) {
@@ -2754,7 +3101,7 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
}
else {
const auto& quals = child->Tail();
- bool hasColumnRef;
+ bool hasColumnRef = false;
THashSet<TString> refs;
THashMap<TString, THashSet<TString>> qualifiedRefs;
if (!ScanColumns(quals.Child(1)->TailPtr(), groupInputs, groupPossibleAliases, nullptr, hasColumnRef,
@@ -2821,6 +3168,7 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
continue;
}
+ groupBy = option;
if (!EnsureTupleSize(*option, 2, ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}
@@ -2831,11 +3179,12 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
}
TExprNode::TListType newGroups;
- if (!ValidateGroups(joinInputs, possibleAliases, data, ctx, newGroups, scanColumnsOnly)) {
+ bool hasNewGroups = false;
+ if (!ValidateGroups(joinInputs, possibleAliases, data, ctx, newGroups, hasNewGroups, scanColumnsOnly)) {
return IGraphTransformer::TStatus::Error;
}
- if (!scanColumnsOnly && !newGroups.empty()) {
+ if (!scanColumnsOnly && hasNewGroups) {
auto resultValue = ctx.Expr.NewList(options.Pos(), std::move(newGroups));
auto newSettings = ReplaceSetting(options, {}, "group_by", resultValue, ctx.Expr);
output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings));
@@ -2874,7 +3223,6 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
auto partitions = x->Child(2);
auto sort = x->Child(3);
- bool needRebuildSort = false;
bool needRebuildPartition = false;
for (const auto& p : partitions->Children()) {
if (p->Child(0)->IsCallable("Void")) {
@@ -2883,39 +3231,33 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
}
}
- for (const auto& s : sort->Children()) {
- if (s->Child(0)->IsCallable("Void")) {
- needRebuildSort = true;
- break;
- }
- }
-
- if (!needRebuildSort && !needRebuildPartition) {
- newWindow.push_back(x);
- continue;
- }
-
- hasChanges = true;
auto newChildren = x->ChildrenList();
if (needRebuildPartition) {
TExprNode::TListType newGroups;
- if (!ValidateGroups(joinInputs, possibleAliases, *partitions, ctx, newGroups, scanColumnsOnly)) {
+ bool hasNewGroups = false;
+ if (!ValidateGroups(joinInputs, possibleAliases, *partitions, ctx, newGroups, hasNewGroups, scanColumnsOnly)) {
return IGraphTransformer::TStatus::Error;
}
newChildren[2] = ctx.Expr.NewList(x->Pos(), std::move(newGroups));
}
- if (needRebuildSort) {
- TExprNode::TListType newSorts;
- if (!ValidateSort(joinInputs, joinInputs, possibleAliases, *sort, ctx, newSorts, scanColumnsOnly)) {
- return IGraphTransformer::TStatus::Error;
- }
+ bool hasNewSort = false;
+ TExprNode::TListType newSorts;
+ if (!ValidateSort(joinInputs, joinInputs, possibleAliases, *sort, ctx, hasNewSort, newSorts, scanColumnsOnly)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+ if (hasNewSort) {
newChildren[3] = ctx.Expr.NewList(x->Pos(), std::move(newSorts));
}
- newWindow.push_back(ctx.Expr.ChangeChildren(*x, std::move(newChildren)));
+ if (needRebuildPartition || hasNewSort) {
+ hasChanges = true;
+ newWindow.push_back(ctx.Expr.ChangeChildren(*x, std::move(newChildren)));
+ } else {
+ newWindow.push_back(x);
+ }
}
if (!scanColumnsOnly && hasChanges) {
@@ -2953,14 +3295,15 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
return IGraphTransformer::TStatus::Error;
}
+ bool hasNewGroups = false;
TExprNode::TListType newGroups;
TInputs projectionInputs;
projectionInputs.push_back(TInput{ "", outputRowType, Nothing(), TInput::Projection, {} });
- if (!ValidateGroups(projectionInputs, {}, data, ctx, newGroups, scanColumnsOnly)) {
+ if (!ValidateGroups(projectionInputs, {}, data, ctx, newGroups, hasNewGroups, scanColumnsOnly)) {
return IGraphTransformer::TStatus::Error;
}
- if (!newGroups.empty()) {
+ if (hasNewGroups) {
auto resultValue = ctx.Expr.NewList(options.Pos(), std::move(newGroups));
auto newSettings = ReplaceSetting(options, {}, "distinct_on", resultValue, ctx.Expr);
output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings));
@@ -2986,60 +3329,77 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
return IGraphTransformer::TStatus::Error;
}
- bool needScan = false;
- for (const auto& x : data.Children()) {
- if (!x->IsCallable("PgSort")) {
- ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(x->Pos()), "Expected PgSort"));
- }
-
- if (x->Child(0)->IsCallable("Void")) {
- needScan = true;
- }
- }
-
TInputs projectionInputs = joinInputs;
// all row columns are visible too, but projection's columns have more priority
if (!scanColumnsOnly) {
projectionInputs.push_back(TInput{ "", outputRowType, Nothing(), TInput::Projection, {} });
}
- if (needScan) {
- TExprNode::TListType newSortTupleItems;
- // no effective types yet, scan lambda bodies
- THashMap<ui32, TSet<TString>> extraSortColumns;
- if (!ValidateSort(projectionInputs, joinInputs, possibleAliases, data, ctx, newSortTupleItems, scanColumnsOnly, &extraSortColumns)) {
- return IGraphTransformer::TStatus::Error;
- }
+ bool hasNewSort = false;
+ TExprNode::TListType newSortTupleItems;
+ // no effective types yet, scan lambda bodies
+ if (!ValidateSort(projectionInputs, joinInputs, possibleAliases, data, ctx, hasNewSort, newSortTupleItems, scanColumnsOnly)) {
+ return IGraphTransformer::TStatus::Error;
+ }
- if (!scanColumnsOnly) {
- auto newSortTuple = ctx.Expr.NewList(data.Pos(), std::move(newSortTupleItems));
- auto newSettings = ReplaceSetting(options, {}, "sort", newSortTuple, ctx.Expr);
- auto extra = SaveExtraColumns(data.Pos(), extraSortColumns, joinInputs.size(), ctx.Expr);
- newSettings = AddSetting(*newSettings, {}, "extra_sort_columns", extra, ctx.Expr);
+ if (hasNewSort && !scanColumnsOnly) {
+ auto newSortTuple = ctx.Expr.NewList(data.Pos(), std::move(newSortTupleItems));
+ auto newSettings = ReplaceSetting(options, {}, "sort", newSortTuple, ctx.Expr);
- output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings));
- return IGraphTransformer::TStatus::Repeat;
- }
- } else {
- auto extra = GetSetting(options, "extra_sort_columns");
- if (extra) {
- auto value = extra->TailPtr();
- bool hasColumns = AddExtraColumnsForSublinks(value, data, joinInputs.size(), ctx.Expr);
- auto newSettings = RemoveSetting(options, "extra_sort_columns", ctx.Expr);
- if (hasColumns) {
- newSettings = AddSetting(*newSettings, {}, "final_extra_sort_columns", value, ctx.Expr);
+ output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings));
+ return IGraphTransformer::TStatus::Repeat;
+ }
+
+ if (!scanColumnsOnly) {
+ if (groupBy) {
+ TExprNode::TListType newSortItems;
+ bool hasChanges = false;
+ for (ui32 index = 0; index < data.ChildrenSize(); ++index) {
+ const auto& column = *data.Child(index);
+ auto ret = ReplaceGroupByExpr(column.ChildPtr(1), groupBy->Tail(), ctx.Expr);
+ if (ret != column.ChildPtr(1)) {
+ hasChanges = true;
+ newSortItems.push_back(ctx.Expr.ChangeChild(column, 1, std::move(ret)));
+ }
+ else {
+ newSortItems.push_back(data.ChildPtr(index));
+ }
}
- output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings));
- return IGraphTransformer::TStatus::Repeat;
+ if (hasChanges) {
+ auto newSort = ctx.Expr.NewList(input->Pos(), std::move(newSortItems));
+ auto newSettings = ReplaceSetting(options, {}, "sort", newSort, ctx.Expr);
+ output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings));
+ return IGraphTransformer::TStatus::Repeat;
+ }
+ }
+
+ if (!GetSetting(options, "final_extra_sort_columns") && !GetSetting(options, "final_extra_sort_keys")) {
+ TExprNode::TPtr extraColumns;
+ TExprNode::TPtr extraKeys;
+ auto hasExtraColumns = GatherExtraSortColumns(data, projectionInputs, extraColumns, extraKeys, ctx.Expr);
+ if (hasExtraColumns || extraKeys) {
+ TExprNode::TPtr newSettings;
+ if (hasExtraColumns) {
+ newSettings = AddSetting(newSettings ? *newSettings : options, {}, "final_extra_sort_columns", extraColumns, ctx.Expr);
+ }
+
+ if (extraKeys) {
+ newSettings = AddSetting(newSettings ? *newSettings : options, {}, "final_extra_sort_keys", extraKeys, ctx.Expr);
+ }
+
+ output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings));
+ return IGraphTransformer::TStatus::Repeat;
+ }
}
}
- } else if (optionName == "extra_sort_columns" || optionName == "final_extra_sort_columns") {
+ }
+ else if (optionName == "final_extra_sort_columns") {
if (pass != 2) {
continue;
}
- (optionName == "extra_sort_columns" ? hasExtraSortColumns : hasFinalExtraSortColumns) = true;
+ hasFinalExtraSortColumns = true;
if (!EnsureTupleSize(*option, 2, ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
@@ -3060,6 +3420,24 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
}
}
}
+ } else if (optionName == "final_extra_sort_keys") {
+ if (pass != 2) {
+ continue;
+ }
+
+ if (!EnsureTupleSize(*option, 2, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ if (!EnsureTuple(option->Tail(), ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ for (const auto& x : option->Tail().Children()) {
+ if (!EnsureAtom(*x, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+ }
} else {
ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(option->Head().Pos()),
TStringBuilder() << "Unsupported option: " << optionName));
@@ -3145,10 +3523,19 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
scanColumnsOnly = !scanColumnsOnly;
if (scanColumnsOnly) {
- input->SetTypeAnn(ctx.Expr.MakeType<TListExprType>(outputRowType));
- return IGraphTransformer::TStatus::Ok;
+ break;
}
}
+
+ if ((hasAggregations || GetSetting(options, "having")) && !GetSetting(options, "group_by")) {
+ // add empty group by section
+ auto newSettings = AddSetting(options, {}, "group_by", ctx.Expr.NewList(input->Pos(), {}), ctx.Expr);
+ output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings));
+ return IGraphTransformer::TStatus::Repeat;
+ }
+
+ input->SetTypeAnn(ctx.Expr.MakeType<TListExprType>(outputRowType));
+ return IGraphTransformer::TStatus::Ok;
}
IGraphTransformer::TStatus PgSelectWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExtContext& ctx) {
@@ -3338,12 +3725,13 @@ IGraphTransformer::TStatus PgSelectWrapper(const TExprNode::TPtr& input, TExprNo
projectionInputs.push_back(TInput{ TString(), resultStructType, resultColumnOrder, TInput::Projection, {} });
TExprNode::TListType newSortTupleItems;
- if (data.ChildrenSize() > 0 && data.Child(0)->Child(0)->IsCallable("Void")) {
- // no effective types yet, scan lambda bodies
- if (!ValidateSort(projectionInputs, projectionInputs, {}, data, ctx, newSortTupleItems, false)) {
- return IGraphTransformer::TStatus::Error;
- }
+ // no effective types yet, scan lambda bodies
+ bool hasNewSort = false;
+ if (!ValidateSort(projectionInputs, projectionInputs, {}, data, ctx, hasNewSort, newSortTupleItems, false)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+ if (hasNewSort) {
auto newSortTuple = ctx.Expr.NewList(data.Pos(), std::move(newSortTupleItems));
auto newSettings = ReplaceSetting(options, {}, "sort", newSortTuple, ctx.Expr);
output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings));
@@ -3726,7 +4114,7 @@ IGraphTransformer::TStatus PgSubLinkWrapper(const TExprNode::TPtr& input, TExprN
if (!input->Child(3)->IsCallable("Void")) {
auto& lambda = input->ChildRef(3);
- const auto status = ConvertToLambda(lambda, ctx.Expr, hasType ? 2 : 1);
+ const auto status = ConvertToLambda(lambda, ctx.Expr, (!input->Child(2)->IsCallable("Void") && hasType) ? 2 : 1);
if (status.Level != IGraphTransformer::TStatus::Ok) {
return status;
}
@@ -3801,41 +4189,38 @@ IGraphTransformer::TStatus PgSubLinkWrapper(const TExprNode::TPtr& input, TExprN
}
if (linkType == "all" || linkType == "any") {
- if (input->Child(2)->IsCallable("Void")) {
- ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), "Missing test row type"));
- return IGraphTransformer::TStatus::Error;
- }
-
if (input->Child(3)->IsCallable("Void")) {
ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), "Missing test row expression"));
return IGraphTransformer::TStatus::Error;
}
- auto& lambda = input->ChildRef(3);
- const auto status = ConvertToLambda(lambda, ctx.Expr, hasType ? 2 : 1);
- if (status.Level != IGraphTransformer::TStatus::Ok) {
- return status;
- }
+ if (!input->Child(2)->IsCallable("Void")) {
+ auto& lambda = input->ChildRef(3);
+ const auto status = ConvertToLambda(lambda, ctx.Expr, hasType ? 2 : 1);
+ if (status.Level != IGraphTransformer::TStatus::Ok) {
+ return status;
+ }
- auto rowType = input->Child(2)->GetTypeAnn()->Cast<TTypeExprType>()->GetType();
- if (!UpdateLambdaAllArgumentsTypes(lambda, { rowType, valueType }, ctx.Expr)) {
- return IGraphTransformer::TStatus::Error;
- }
+ auto rowType = input->Child(2)->GetTypeAnn()->Cast<TTypeExprType>()->GetType();
+ if (!UpdateLambdaAllArgumentsTypes(lambda, { rowType, valueType }, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
- if (!lambda->GetTypeAnn()) {
- return IGraphTransformer::TStatus::Repeat;
- }
+ if (!lambda->GetTypeAnn()) {
+ return IGraphTransformer::TStatus::Repeat;
+ }
- ui32 testExprType;
- bool convertToPg;
- if (!ExtractPgType(lambda->GetTypeAnn(), testExprType, convertToPg, lambda->Pos(), ctx.Expr)) {
- return IGraphTransformer::TStatus::Error;
- }
+ ui32 testExprType;
+ bool convertToPg;
+ if (!ExtractPgType(lambda->GetTypeAnn(), testExprType, convertToPg, lambda->Pos(), ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
- if (testExprType && testExprType != NPg::LookupType("bool").TypeId) {
- ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()),
- TStringBuilder() << "Expected pg bool, but got " << NPg::LookupType(testExprType).Name));
- return IGraphTransformer::TStatus::Error;
+ if (testExprType && testExprType != NPg::LookupType("bool").TypeId) {
+ ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()),
+ TStringBuilder() << "Expected pg bool, but got " << NPg::LookupType(testExprType).Name));
+ return IGraphTransformer::TStatus::Error;
+ }
}
} else {
if (!input->Child(3)->IsCallable("Void")) {
@@ -3854,5 +4239,27 @@ IGraphTransformer::TStatus PgSubLinkWrapper(const TExprNode::TPtr& input, TExprN
return IGraphTransformer::TStatus::Ok;
}
+IGraphTransformer::TStatus PgGroupRefWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) {
+ Y_UNUSED(output);
+ if (!EnsureArgsCount(*input, 3, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ if (!EnsureStructType(*input->Child(0), ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ if (!EnsureType(*input->Child(1), ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ if (!EnsureAtom(*input->Child(2), ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ input->SetTypeAnn(input->Child(1)->GetTypeAnn()->Cast<TTypeExprType>()->GetType());
+ return IGraphTransformer::TStatus::Ok;
+}
+
} // namespace NTypeAnnImpl
}
diff --git a/ydb/library/yql/core/type_ann/type_ann_pg.h b/ydb/library/yql/core/type_ann/type_ann_pg.h
index 5cb1a257563..03f401b94bb 100644
--- a/ydb/library/yql/core/type_ann/type_ann_pg.h
+++ b/ydb/library/yql/core/type_ann/type_ann_pg.h
@@ -43,6 +43,7 @@ IGraphTransformer::TStatus PgLikeWrapper(const TExprNode::TPtr& input, TExprNode
IGraphTransformer::TStatus PgInWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
IGraphTransformer::TStatus PgBetweenWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
IGraphTransformer::TStatus PgSubLinkWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
+IGraphTransformer::TStatus PgGroupRefWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx);
} // namespace NTypeAnnImpl
} // namespace NYql
diff --git a/ydb/library/yql/core/yql_expr_csee.cpp b/ydb/library/yql/core/yql_expr_csee.cpp
index b0e75255215..8b7b2afd4e1 100644
--- a/ydb/library/yql/core/yql_expr_csee.cpp
+++ b/ydb/library/yql/core/yql_expr_csee.cpp
@@ -5,7 +5,6 @@
#include <ydb/library/yql/utils/log/log.h>
#include <util/generic/hash_set.h>
-#include <util/digest/murmur.h>
#include <util/system/env.h>
#include <tuple>
@@ -14,17 +13,6 @@ namespace NYql {
namespace {
static constexpr bool UseDeterminsticHash = false;
- ui64 CseeHash(const void* data, size_t size, ui64 initHash) {
- return MurmurHash<ui64>(data, size, initHash);
- }
-
- template<typename T, std::enable_if_t<std::is_integral<T>::value>* = nullptr>
- ui64 CseeHash(T value, ui64 initHash) {
- // workaround Coverity warning for Murmur when sizeof(T) < 8
- ui64 val = static_cast<ui64>(value);
- return MurmurHash<ui64>(&val, sizeof(val), initHash);
- }
-
struct TLambdaFrame {
TLambdaFrame(const TExprNode* lambda, const TLambdaFrame* prev)
: Lambda(lambda)
diff --git a/ydb/library/yql/core/yql_expr_csee.h b/ydb/library/yql/core/yql_expr_csee.h
index 177041d998e..804b8f21ff8 100644
--- a/ydb/library/yql/core/yql_expr_csee.h
+++ b/ydb/library/yql/core/yql_expr_csee.h
@@ -3,6 +3,8 @@
#include "yql_graph_transformer.h"
#include "yql_type_annotation.h"
+#include <util/digest/murmur.h>
+
namespace NYql {
IGraphTransformer::TStatus EliminateCommonSubExpressions(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx,
@@ -13,4 +15,15 @@ IGraphTransformer::TStatus UpdateCompletness(const TExprNode::TPtr& input, TExpr
// 0 may not mean equality of nodes because we cannot distinguish order of external arguments in some cases.
int CompareNodes(const TExprNode& left, const TExprNode& right);
+inline ui64 CseeHash(const void* data, size_t size, ui64 initHash) {
+ return MurmurHash<ui64>(data, size, initHash);
+}
+
+template<typename T, std::enable_if_t<std::is_integral<T>::value>* = nullptr>
+inline ui64 CseeHash(T value, ui64 initHash) {
+ // workaround Coverity warning for Murmur when sizeof(T) < 8
+ ui64 val = static_cast<ui64>(value);
+ return MurmurHash<ui64>(&val, sizeof(val), initHash);
+}
+
}
diff --git a/ydb/library/yql/sql/pg/pg_sql.cpp b/ydb/library/yql/sql/pg/pg_sql.cpp
index 271d5cd1324..027c4d19ecb 100644
--- a/ydb/library/yql/sql/pg/pg_sql.cpp
+++ b/ydb/library/yql/sql/pg/pg_sql.cpp
@@ -152,6 +152,7 @@ public:
bool AllowAggregates = false;
bool AllowOver = false;
bool AllowReturnSet = false;
+ bool AllowSubLinks = false;
TVector<TAstNode*>* WindowItems = nullptr;
TString Scope;
};
@@ -457,6 +458,7 @@ public:
if (x->whereClause) {
TExprSettings settings;
settings.AllowColumns = true;
+ settings.AllowSubLinks = true;
settings.Scope = "WHERE";
whereFilter = ParseExpr(x->whereClause, settings);
if (!whereFilter) {
@@ -469,17 +471,15 @@ public:
TVector<TAstNode*> groupByItems;
for (int i = 0; i < ListLength(x->groupClause); ++i) {
auto node = ListNodeNth(x->groupClause, i);
- if (NodeTag(node) != T_ColumnRef) {
- NodeNotImplemented(x, node);
- return nullptr;
- }
-
- auto ref = ParseColumnRef(CAST_NODE(ColumnRef, node));
- if (!ref) {
+ TExprSettings settings;
+ settings.AllowColumns = true;
+ settings.Scope = "GROUP BY";
+ auto expr = ParseExpr(node, settings);
+ if (!expr) {
return nullptr;
}
- auto lambda = L(A("lambda"), QL(), ref);
+ auto lambda = L(A("lambda"), QL(), expr);
groupByItems.push_back(L(A("PgGroup"), L(A("Void")), lambda));
}
@@ -492,6 +492,7 @@ public:
settings.AllowColumns = true;
settings.Scope = "HAVING";
settings.AllowAggregates = true;
+ settings.AllowSubLinks = true;
having = ParseExpr(x->havingClause, settings);
if (!having) {
return nullptr;
@@ -567,6 +568,7 @@ public:
settings.AllowColumns = true;
settings.AllowAggregates = true;
settings.AllowOver = true;
+ settings.AllowSubLinks = true;
settings.WindowItems = &windowItems;
settings.Scope = "SELECT";
auto x = ParseExpr(r->val, settings);
@@ -710,6 +712,7 @@ public:
if (value->limitCount) {
TExprSettings settings;
settings.AllowColumns = false;
+ settings.AllowSubLinks = true;
settings.Scope = "LIMIT";
limit = ParseExpr(value->limitCount, settings);
if (!limit) {
@@ -720,6 +723,7 @@ public:
if (value->limitOffset) {
TExprSettings settings;
settings.AllowColumns = false;
+ settings.AllowSubLinks = true;
settings.Scope = "OFFSET";
offset = ParseExpr(value->limitOffset, settings);
if (!offset) {
@@ -1467,6 +1471,11 @@ public:
}
TAstNode* ParseSubLinkExpr(const SubLink* value, const TExprSettings& settings) {
+ if (!settings.AllowSubLinks) {
+ AddError(TStringBuilder() << "SubLinks are not allowed in: " << settings.Scope);
+ return nullptr;
+ }
+
TString linkType;
TString operName;
switch (value->subLinkType) {
@@ -1840,17 +1849,15 @@ public:
TVector<TAstNode*> groupByItems;
for (int i = 0; i < ListLength(value->partitionClause); ++i) {
auto node = ListNodeNth(value->partitionClause, i);
- if (NodeTag(node) != T_ColumnRef) {
- NodeNotImplemented(value, node);
- return nullptr;
- }
-
- auto ref = ParseColumnRef(CAST_NODE(ColumnRef, node));
- if (!ref) {
+ TExprSettings settings;
+ settings.AllowColumns = true;
+ settings.Scope = "PARTITITON BY";
+ auto expr = ParseExpr(node, settings);
+ if (!expr) {
return nullptr;
}
- auto lambda = L(A("lambda"), QL(), ref);
+ auto lambda = L(A("lambda"), QL(), expr);
groupByItems.push_back(L(A("PgGroup"), L(A("Void")), lambda));
}
@@ -2093,6 +2100,7 @@ public:
TExprSettings settings;
settings.AllowColumns = true;
+ settings.AllowSubLinks = true;
settings.Scope = "ORDER BY";
settings.AllowAggregates = allowAggregates;
auto expr = ParseExpr(value->node, settings);