diff options
author | vvvv <vvvv@ydb.tech> | 2022-08-05 15:14:24 +0300 |
---|---|---|
committer | vvvv <vvvv@ydb.tech> | 2022-08-05 15:14:24 +0300 |
commit | 76482e2669b58f7b8485ab95b13e23b1d162207b (patch) | |
tree | f9bcf84ab8b6f529ed1ad41d1838ab70778ac2ca | |
parent | 535d0b70fe9e03c1897c7f284d070a13bb4b89bf (diff) | |
download | ydb-76482e2669b58f7b8485ab95b13e23b1d162207b.tar.gz |
distinct on
-rw-r--r-- | ydb/library/yql/core/common_opt/yql_co_pgselect.cpp | 108 | ||||
-rw-r--r-- | ydb/library/yql/core/type_ann/type_ann_pg.cpp | 42 | ||||
-rw-r--r-- | ydb/library/yql/sql/pg/pg_sql.cpp | 21 |
3 files changed, 166 insertions, 5 deletions
diff --git a/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp b/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp index 8bc2b617fd8..bc071ee04ef 100644 --- a/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_pgselect.cpp @@ -2082,6 +2082,105 @@ TExprNode::TPtr BuildSort(TPositionHandle pos, const TExprNode::TPtr& sort, cons .Build(); } +TExprNode::TPtr BuildDistinctOn(TPositionHandle pos, const TExprNode::TPtr& list, const TExprNode::TPtr& distinctOn, + const TExprNode::TPtr& sort, TExprContext& ctx) { + // filter by RowNumber() == 1 + + TExprNode::TListType args; + auto begin = ctx.NewCallable(pos, "Void", {});
+ auto end = ctx.NewCallable(pos, "Int32", { ctx.NewAtom(pos, "0") }); + args.push_back(ctx.Builder(pos) + .List() + .List(0) + .Atom(0, "begin") + .Add(1, begin) + .Seal() + .List(1) + .Atom(0, "end") + .Add(1, end) + .Seal() + .Seal() + .Build()); + + auto value = ctx.Builder(pos) + .Callable("RowNumber") + .Callable(0, "TypeOf") + .Add(0, list) + .Seal() + .Seal() + .Build(); + + args.push_back(ctx.Builder(pos) + .List() + .Atom(0, "_yql_row_number") + .Add(1, value) + .Seal() + .Build()); + + auto winOnRows = ctx.NewCallable(pos, "WinOnRows", std::move(args)); + auto frames = ctx.Builder(pos) + .List() + .Add(0, winOnRows) + .Seal() + .Build(); + + TExprNode::TListType keys; + for (auto p : distinctOn->Children()) { + YQL_ENSURE(p->IsCallable("PgGroup")); + const auto& member = p->Tail().Tail(); + YQL_ENSURE(member.IsCallable("Member")); + keys.push_back(member.TailPtr()); + } + + auto keysNode = ctx.NewList(pos, std::move(keys)); + auto sortNode = ctx.NewCallable(pos, "Void", {}); + if (sort && sort->Tail().ChildrenSize() > 0) { + sortNode = BuildSortTraits(pos, sort->Tail(), list, ctx); + } + + auto ret = ctx.Builder(pos)
+ .Callable("CalcOverWindow")
+ .Add(0, list)
+ .Add(1, keysNode)
+ .Add(2, sortNode)
+ .Add(3, frames)
+ .Seal()
+ .Build(); + + ret = ctx.Builder(pos) + .Callable("Filter") + .Add(0, ret) + .Lambda(1) + .Param("row") + .Callable("==") + .Callable(0, "Member") + .Arg(0, "row") + .Atom(1, "_yql_row_number") + .Seal() + .Callable(1, "Uint64") + .Atom(0, "1") + .Seal() + .Seal() + .Seal() + .Seal() + .Build(); + + ret = ctx.Builder(pos) + .Callable("Map") + .Add(0, ret) + .Lambda(1) + .Param("row") + .Callable("RemoveMember") + .Arg(0, "row") + .Atom(1, "_yql_row_number") + .Seal() + .Seal() + .Seal() + .Build(); + + return ret;
+} + TExprNode::TPtr BuildOffset(TPositionHandle pos, const TExprNode::TPtr& offset, const TExprNode::TPtr& list, TExprContext& ctx) { return ctx.Builder(pos) .Callable("Skip") @@ -2232,7 +2331,10 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct columnsItems.push_back(ctx.NewAtom(node->Pos(), x)); } + auto sort = GetSetting(node->Head(), "sort"); auto setItems = GetSetting(node->Head(), "set_items"); + const bool onlyOneSetItem = (setItems->Tail().ChildrenSize() == 1); + TExprNode::TListType setItemNodes; TVector<TColumnOrder> columnOrders; for (auto setItem : setItems->Tail().Children()) { @@ -2253,6 +2355,7 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct auto having = GetSetting(setItem->Tail(), "having"); auto window = GetSetting(setItem->Tail(), "window"); auto distinctAll = GetSetting(setItem->Tail(), "distinct_all"); + auto distinctOn = GetSetting(setItem->Tail(), "distinct_on"); bool oneRow = !from; TExprNode::TPtr list; if (values) { @@ -2316,6 +2419,8 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct if (distinctAll) { list = ctx.NewCallable(node->Pos(), "SqlAggregateAll", { list }); + } else if (distinctOn) { + list = BuildDistinctOn(node->Pos(), list, distinctOn->TailPtr(), onlyOneSetItem ? sort : nullptr, ctx); } } @@ -2323,13 +2428,12 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct } TExprNode::TPtr list; - if (setItemNodes.size() == 1) { + if (onlyOneSetItem == 1) { list = setItemNodes.front(); } else { list = ExpandPositionalUnionAll(*node, columnOrders, setItemNodes, ctx, optCtx); } - auto sort = GetSetting(node->Head(), "sort"); if (sort && sort->Tail().ChildrenSize() > 0) { list = BuildSort(node->Pos(), sort, list, ctx); } diff --git a/ydb/library/yql/core/type_ann/type_ann_pg.cpp b/ydb/library/yql/core/type_ann/type_ann_pg.cpp index 7ccbd71d270..c9db06e1e47 100644 --- a/ydb/library/yql/core/type_ann/type_ann_pg.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_pg.cpp @@ -2042,6 +2042,8 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN bool hasValues = false; bool hasJoinOps = false; bool hasExtTypes = false; + bool hasDistinctAll = false; + bool hasDistinctOn = false; // pass 0 - from/values // pass 1 - join @@ -2049,7 +2051,7 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN // pass 3 - where, group_by // pass 4 - window // pass 5 - result - // pass 6 - distinct_all + // pass 6 - distinct_all, distinct_on for (ui32 pass = 0; pass < 7; ++pass) { if (pass > 1 && !inputs.empty() && !hasJoinOps) { ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), "Missing join_ops")); @@ -2785,6 +2787,7 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN } } else if (optionName == "distinct_all") { + hasDistinctAll = true; if (pass != 6) { continue; } @@ -2792,6 +2795,38 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN if (!EnsureTupleSize(*option, 1, ctx.Expr)) { return IGraphTransformer::TStatus::Error; } + } else if (optionName == "distinct_on") { + hasDistinctOn = true; + if (pass != 6) { + continue; + } + + if (scanColumnsOnly) { + continue; + } + + if (!EnsureTupleSize(*option, 2, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + const auto& data = option->Tail(); + if (!EnsureTuple(data, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + TExprNode::TListType newGroups; + TInputs projectionInputs; + projectionInputs.push_back(TInput{ "", outputRowType, Nothing(), false, {} }); + if (!ValidateGroups(projectionInputs, {}, data, ctx, newGroups)) { + return IGraphTransformer::TStatus::Error; + } + + if (!newGroups.empty()) { + auto resultValue = ctx.Expr.NewList(options.Pos(), std::move(newGroups)); + auto newSettings = ReplaceSetting(options, {}, "distinct_on", resultValue, ctx.Expr); + output = ctx.Expr.ChangeChild(*input, 0, std::move(newSettings)); + return IGraphTransformer::TStatus::Repeat; + } } else { ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(option->Head().Pos()), TStringBuilder() << "Unsupported option: " << optionName)); @@ -2810,6 +2845,11 @@ IGraphTransformer::TStatus PgSetItemWrapper(const TExprNode::TPtr& input, TExprN return IGraphTransformer::TStatus::Error; } + if (hasDistinctAll && hasDistinctOn) { + ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), "Distinct ON isn't compatible with distinct over all columns")); + return IGraphTransformer::TStatus::Error; + } + auto extTypes = GetSetting(options, "ext_types"); if (extTypes && scanColumnsOnly) { const auto& data = extTypes->Tail(); diff --git a/ydb/library/yql/sql/pg/pg_sql.cpp b/ydb/library/yql/sql/pg/pg_sql.cpp index 7f28a5f64ef..d66f14229f7 100644 --- a/ydb/library/yql/sql/pg/pg_sql.cpp +++ b/ydb/library/yql/sql/pg/pg_sql.cpp @@ -306,12 +306,26 @@ public: TVector<TAstNode*> setItemNodes; for (const auto& x : setItems) { bool hasDistinctAll = false; + TVector<TAstNode*> distinctOnItems; if (x->distinctClause) { if (linitial(x->distinctClause) == NULL) { hasDistinctAll = true; } else { - AddError("SelectStmt: not supported DISTINCT ON"); - return nullptr; + for (int i = 0; i < ListLength(x->distinctClause); ++i) { + auto node = ListNodeNth(x->distinctClause, i); + if (NodeTag(node) != T_ColumnRef) { + NodeNotImplemented(x->distinctClause, node); + return nullptr; + } + + auto ref = ParseColumnRef(CAST_NODE(ColumnRef, node)); + if (!ref) { + return nullptr; + } + + auto lambda = L(A("lambda"), QL(), ref); + distinctOnItems.push_back(L(A("PgGroup"), L(A("Void")), lambda)); + } } } @@ -647,6 +661,9 @@ public: if (hasDistinctAll) { setItemOptions.push_back(QL(QA("distinct_all"))); + } else if (!distinctOnItems.empty()) { + auto distinctOn = QVL(distinctOnItems.data(), distinctOnItems.size()); + setItemOptions.push_back(QL(QA("distinct_on"), distinctOn)); } auto setItem = L(A("PgSetItem"), QVL(setItemOptions.data(), setItemOptions.size())); |