aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorvvvv <vvvv@ydb.tech>2022-08-05 15:14:24 +0300
committervvvv <vvvv@ydb.tech>2022-08-05 15:14:24 +0300
commit76482e2669b58f7b8485ab95b13e23b1d162207b (patch)
treef9bcf84ab8b6f529ed1ad41d1838ab70778ac2ca
parent535d0b70fe9e03c1897c7f284d070a13bb4b89bf (diff)
downloadydb-76482e2669b58f7b8485ab95b13e23b1d162207b.tar.gz
distinct on
-rw-r--r--ydb/library/yql/core/common_opt/yql_co_pgselect.cpp108
-rw-r--r--ydb/library/yql/core/type_ann/type_ann_pg.cpp42
-rw-r--r--ydb/library/yql/sql/pg/pg_sql.cpp21
3 files changed, 166 insertions, 5 deletions
diff --git a/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp b/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp
index 8bc2b617fd8..bc071ee04ef 100644
--- a/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp
+++ b/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp
@@ -2082,6 +2082,105 @@ TExprNode::TPtr BuildSort(TPositionHandle pos, const TExprNode::TPtr& sort, cons
.Build();
}
+TExprNode::TPtr BuildDistinctOn(TPositionHandle pos, const TExprNode::TPtr& list, const TExprNode::TPtr& distinctOn,
+ const TExprNode::TPtr& sort, TExprContext& ctx) {
+ // filter by RowNumber() == 1
+
+ TExprNode::TListType args;
+ auto begin = ctx.NewCallable(pos, "Void", {});
+ auto end = ctx.NewCallable(pos, "Int32", { ctx.NewAtom(pos, "0") });
+ args.push_back(ctx.Builder(pos)
+ .List()
+ .List(0)
+ .Atom(0, "begin")
+ .Add(1, begin)
+ .Seal()
+ .List(1)
+ .Atom(0, "end")
+ .Add(1, end)
+ .Seal()
+ .Seal()
+ .Build());
+
+ auto value = ctx.Builder(pos)
+ .Callable("RowNumber")
+ .Callable(0, "TypeOf")
+ .Add(0, list)
+ .Seal()
+ .Seal()
+ .Build();
+
+ args.push_back(ctx.Builder(pos)
+ .List()
+ .Atom(0, "_yql_row_number")
+ .Add(1, value)
+ .Seal()
+ .Build());
+
+ auto winOnRows = ctx.NewCallable(pos, "WinOnRows", std::move(args));
+ auto frames = ctx.Builder(pos)
+ .List()
+ .Add(0, winOnRows)
+ .Seal()
+ .Build();
+
+ TExprNode::TListType keys;
+ for (auto p : distinctOn->Children()) {
+ YQL_ENSURE(p->IsCallable("PgGroup"));
+ const auto& member = p->Tail().Tail();
+ YQL_ENSURE(member.IsCallable("Member"));
+ keys.push_back(member.TailPtr());
+ }
+
+ auto keysNode = ctx.NewList(pos, std::move(keys));
+ auto sortNode = ctx.NewCallable(pos, "Void", {});
+ if (sort && sort->Tail().ChildrenSize() > 0) {
+ sortNode = BuildSortTraits(pos, sort->Tail(), list, ctx);
+ }
+
+ auto ret = ctx.Builder(pos)
+ .Callable("CalcOverWindow")
+ .Add(0, list)
+ .Add(1, keysNode)
+ .Add(2, sortNode)
+ .Add(3, frames)
+ .Seal()
+ .Build();
+
+ ret = ctx.Builder(pos)
+ .Callable("Filter")
+ .Add(0, ret)
+ .Lambda(1)
+ .Param("row")
+ .Callable("==")
+ .Callable(0, "Member")
+ .Arg(0, "row")
+ .Atom(1, "_yql_row_number")
+ .Seal()
+ .Callable(1, "Uint64")
+ .Atom(0, "1")
+ .Seal()
+ .Seal()
+ .Seal()
+ .Seal()
+ .Build();
+
+ ret = ctx.Builder(pos)
+ .Callable("Map")
+ .Add(0, ret)
+ .Lambda(1)
+ .Param("row")
+ .Callable("RemoveMember")
+ .Arg(0, "row")
+ .Atom(1, "_yql_row_number")
+ .Seal()
+ .Seal()
+ .Seal()
+ .Build();
+
+ return ret;
+}
+
TExprNode::TPtr BuildOffset(TPositionHandle pos, const TExprNode::TPtr& offset, const TExprNode::TPtr& list, TExprContext& ctx) {
return ctx.Builder(pos)
.Callable("Skip")
@@ -2232,7 +2331,10 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct
columnsItems.push_back(ctx.NewAtom(node->Pos(), x));
}
+ auto sort = GetSetting(node->Head(), "sort");
auto setItems = GetSetting(node->Head(), "set_items");
+ const bool onlyOneSetItem = (setItems->Tail().ChildrenSize() == 1);
+
TExprNode::TListType setItemNodes;
TVector<TColumnOrder> columnOrders;
for (auto setItem : setItems->Tail().Children()) {
@@ -2253,6 +2355,7 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct
auto having = GetSetting(setItem->Tail(), "having");
auto window = GetSetting(setItem->Tail(), "window");
auto distinctAll = GetSetting(setItem->Tail(), "distinct_all");
+ auto distinctOn = GetSetting(setItem->Tail(), "distinct_on");
bool oneRow = !from;
TExprNode::TPtr list;
if (values) {
@@ -2316,6 +2419,8 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct
if (distinctAll) {
list = ctx.NewCallable(node->Pos(), "SqlAggregateAll", { list });
+ } else if (distinctOn) {
+ list = BuildDistinctOn(node->Pos(), list, distinctOn->TailPtr(), onlyOneSetItem ? sort : nullptr, ctx);
}
}
@@ -2323,13 +2428,12 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct
}
TExprNode::TPtr list;
- if (setItemNodes.size() == 1) {
+ if (onlyOneSetItem == 1) {
list = setItemNodes.front();
} else {
list = ExpandPositionalUnionAll(*node, columnOrders, setItemNodes, ctx, optCtx);
}
- auto sort = GetSetting(node->Head(), "sort");
if (sort && sort->Tail().ChildrenSize() > 0) {
list = BuildSort(node->Pos(), sort, list, ctx);
}
diff --git a/ydb/library/yql/core/type_ann/type_ann_pg.cpp b/ydb/library/yql/core/type_ann/type_ann_pg.cpp
index 7ccbd71d270..c9db06e1e47 100644
--- a/ydb/library/yql/core/type_ann/type_ann_pg.cpp
+++ b/ydb/library/yql/core/type_ann/type_ann_pg.cpp
@@ -2042,6 +2042,8 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
bool hasValues = false;
bool hasJoinOps = false;
bool hasExtTypes = false;
+ bool hasDistinctAll = false;
+ bool hasDistinctOn = false;
// pass 0 - from/values
// pass 1 - join
@@ -2049,7 +2051,7 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
// pass 3 - where, group_by
// pass 4 - window
// pass 5 - result
- // pass 6 - distinct_all
+ // pass 6 - distinct_all, distinct_on
for (ui32 pass = 0; pass < 7; ++pass) {
if (pass > 1 && !inputs.empty() && !hasJoinOps) {
ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), "Missing join_ops"));
@@ -2785,6 +2787,7 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
}
}
else if (optionName == "distinct_all") {
+ hasDistinctAll = true;
if (pass != 6) {
continue;
}
@@ -2792,6 +2795,38 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
if (!EnsureTupleSize(*option, 1, ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}
+ } else if (optionName == "distinct_on") {
+ hasDistinctOn = true;
+ if (pass != 6) {
+ continue;
+ }
+
+ if (scanColumnsOnly) {
+ continue;
+ }
+
+ if (!EnsureTupleSize(*option, 2, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ const auto& data = option->Tail();
+ if (!EnsureTuple(data, ctx.Expr)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ TExprNode::TListType newGroups;
+ TInputs projectionInputs;
+ projectionInputs.push_back(TInput{ "", outputRowType, Nothing(), false, {} });
+ if (!ValidateGroups(projectionInputs, {}, data, ctx, newGroups)) {
+ return IGraphTransformer::TStatus::Error;
+ }
+
+ if (!newGroups.empty()) {
+ auto resultValue = ctx.Expr.NewList(options.Pos(), std::move(newGroups));
+ auto newSettings = ReplaceSetting(options, {}, "distinct_on", resultValue, ctx.Expr);
+ output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings));
+ return IGraphTransformer::TStatus::Repeat;
+ }
} else {
ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(option->Head().Pos()),
TStringBuilder() << "Unsupported option: " << optionName));
@@ -2810,6 +2845,11 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN
return IGraphTransformer::TStatus::Error;
}
+ if (hasDistinctAll && hasDistinctOn) {
+ ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), "Distinct ON isn't compatible with distinct over all columns"));
+ return IGraphTransformer::TStatus::Error;
+ }
+
auto extTypes = GetSetting(options, "ext_types");
if (extTypes && scanColumnsOnly) {
const auto& data = extTypes->Tail();
diff --git a/ydb/library/yql/sql/pg/pg_sql.cpp b/ydb/library/yql/sql/pg/pg_sql.cpp
index 7f28a5f64ef..d66f14229f7 100644
--- a/ydb/library/yql/sql/pg/pg_sql.cpp
+++ b/ydb/library/yql/sql/pg/pg_sql.cpp
@@ -306,12 +306,26 @@ public:
TVector<TAstNode*> setItemNodes;
for (const auto& x : setItems) {
bool hasDistinctAll = false;
+ TVector<TAstNode*> distinctOnItems;
if (x->distinctClause) {
if (linitial(x->distinctClause) == NULL) {
hasDistinctAll = true;
} else {
- AddError("SelectStmt: not supported DISTINCT ON");
- return nullptr;
+ for (int i = 0; i < ListLength(x->distinctClause); ++i) {
+ auto node = ListNodeNth(x->distinctClause, i);
+ if (NodeTag(node) != T_ColumnRef) {
+ NodeNotImplemented(x->distinctClause, node);
+ return nullptr;
+ }
+
+ auto ref = ParseColumnRef(CAST_NODE(ColumnRef, node));
+ if (!ref) {
+ return nullptr;
+ }
+
+ auto lambda = L(A("lambda"), QL(), ref);
+ distinctOnItems.push_back(L(A("PgGroup"), L(A("Void")), lambda));
+ }
}
}
@@ -647,6 +661,9 @@ public:
if (hasDistinctAll) {
setItemOptions.push_back(QL(QA("distinct_all")));
+ } else if (!distinctOnItems.empty()) {
+ auto distinctOn = QVL(distinctOnItems.data(), distinctOnItems.size());
+ setItemOptions.push_back(QL(QA("distinct_on"), distinctOn));
}
auto setItem = L(A("PgSetItem"), QVL(setItemOptions.data(), setItemOptions.size()));