diff options
author | aneporada <aneporada@ydb.tech> | 2023-10-12 17:06:19 +0300 |
---|---|---|
committer | aneporada <aneporada@ydb.tech> | 2023-10-12 17:26:23 +0300 |
commit | 597e0411eb83650b79cf46d727e60211df316169 (patch) | |
tree | 8290f80925487ee7457a9cc2f2f5fca7040fd0b2 | |
parent | af591f3c1fb2a8b85e65dde9085ce908a65b166c (diff) | |
download | ydb-597e0411eb83650b79cf46d727e60211df316169.tar.gz |
Normalize Aggregate arguments
20 files changed, 134 insertions, 36 deletions
diff --git a/ydb/core/kqp/ut/query/kqp_explain_ut.cpp b/ydb/core/kqp/ut/query/kqp_explain_ut.cpp index 8f1af515985..fe6bdefe13d 100644 --- a/ydb/core/kqp/ut/query/kqp_explain_ut.cpp +++ b/ydb/core/kqp/ut/query/kqp_explain_ut.cpp @@ -173,7 +173,7 @@ Y_UNIT_TEST_SUITE(KqpExplain) { UNIT_ASSERT(aggregate.IsDefined()); UNIT_ASSERT(aggregate.GetMapSafe().at("GroupBy").GetStringSafe() == "item.App"); UNIT_ASSERT(aggregate.GetMapSafe().at("Aggregation").GetStringSafe() == - "{_yql_agg_0: MIN(item.Message),_yql_agg_1: MAX(item.Message)}"); + "{_yql_agg_0: MAX(item.Message),_yql_agg_1: MIN(item.Message)}"); } Y_UNIT_TEST(ComplexJoin) { diff --git a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp index 8ed678f2c41..032cec5c2ca 100644 --- a/ydb/library/yql/core/common_opt/yql_co_simple1.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_simple1.cpp @@ -3022,6 +3022,99 @@ bool HasPayload(const TCoAggregate& node) { HasSetting(node.Settings().Ref(), "session"); } +TExprNode::TPtr Normalize(const TCoAggregate& node, TExprContext& ctx) { + TMap<TStringBuf, TExprNode::TPtr> aggTuples; // key is a min output column for given tuple + bool needRebuild = false; + for (const auto& aggTuple : node.Handlers()) { + const TExprNode& columns = aggTuple.ColumnName().Ref(); + TVector<TStringBuf> names; + bool namesInOrder = true; + if (columns.IsList()) { + for (auto& column : columns.ChildrenList()) { + YQL_ENSURE(column->IsAtom()); + if (!names.empty()) { + namesInOrder = namesInOrder && (column->Content() >= names.back()); + } + names.push_back(column->Content()); + } + } else { + YQL_ENSURE(columns.IsAtom()); + names.push_back(columns.Content()); + } + + TExprNode::TPtr aggTupleNode = aggTuple.Ptr(); + if (!namesInOrder && aggTuple.Trait().Maybe<TCoAggregationTraits>()) { + auto traits = aggTuple.Trait().Cast<TCoAggregationTraits>(); + const TTypeAnnotationNode* finishType = traits.FinishHandler().Ref().GetTypeAnn(); + if (finishType->GetKind() == ETypeAnnotationKind::Tuple && finishType->Cast<TTupleExprType>()->GetSize() == names.size()) { + needRebuild = true; + TMap<TStringBuf, size_t> originalIndexes; + for (size_t i = 0; i < names.size(); ++i) { + YQL_ENSURE(originalIndexes.insert({ names[i], i}).second); + } + + TExprNodeList nameNodes; + TExprNodeList finishBody; + TExprNode::TPtr arg = ctx.NewArgument(traits.FinishHandler().Pos(), "arg"); + auto originalTuple = ctx.Builder(traits.FinishHandler().Pos()) + .Apply(traits.FinishHandler().Ref()) + .With(0, arg) + .Seal() + .Build(); + + for (auto& [name, idx] : originalIndexes) { + nameNodes.emplace_back(ctx.NewAtom(aggTuple.ColumnName().Pos(), name)); + finishBody.emplace_back(ctx.Builder(traits.FinishHandler().Pos()) + .Callable("Nth") + .Add(0, originalTuple) + .Atom(1, idx) + .Seal() + .Build()); + } + + auto finishLambda = ctx.NewLambda(traits.FinishHandler().Pos(), + ctx.NewArguments(traits.FinishHandler().Pos(), { arg }), + ctx.NewList(traits.FinishHandler().Pos(), std::move(finishBody))); + + aggTupleNode = Build<TCoAggregateTuple>(ctx, aggTuple.Pos()) + .InitFrom(aggTuple) + .ColumnName(ctx.NewList(aggTuple.ColumnName().Pos(), std::move(nameNodes))) + .Trait<TCoAggregationTraits>() + .InitFrom(traits) + .FinishHandler(finishLambda) + .Build() + .Done().Ptr(); + Sort(names); + } + } + + YQL_ENSURE(!names.empty()); + if (!aggTuples.empty()) { + auto last = aggTuples.end(); + --last; + if (names.front() < last->first) { + needRebuild = true; + } + } + + aggTuples[names.front()] = aggTupleNode; + } + + if (!needRebuild) { + return node.Ptr(); + } + + TExprNodeList newHandlers; + for (auto& t : aggTuples) { + newHandlers.push_back(t.second); + } + + return Build<TCoAggregate>(ctx, node.Pos()) + .InitFrom(node) + .Handlers(ctx.NewList(node.Pos(), std::move(newHandlers))) + .Done().Ptr(); +} + TExprNode::TPtr PullAssumeColumnOrderOverEquiJoin(const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { TVector<ui32> withAssume; for (ui32 i = 0; i < node->ChildrenSize() - 2; i++) { @@ -4718,6 +4811,11 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) { } } + if (auto normalized = Normalize(self, ctx); normalized != node) { + YQL_CLOG(DEBUG, Core) << "Normalized " << node->Content() << " payloads"; + return normalized; + } + return DropReorder<false>(node, ctx); }; diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-2 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-2 index e7c2846cdb9..6a13b0f7995 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-2 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-2 @@ -79,7 +79,7 @@ "Function": { "Arguments": [ { - "Id": 41 + "Id": 21 } ], "Id": 5 @@ -90,6 +90,11 @@ "Id": 107 }, "Function": { + "Arguments": [ + { + "Id": 21 + } + ], "Id": 2 } }, @@ -98,12 +103,7 @@ "Id": 108 }, "Function": { - "Arguments": [ - { - "Id": 21 - } - ], - "Id": 5 + "Id": 2 } }, { @@ -113,10 +113,10 @@ "Function": { "Arguments": [ { - "Id": 21 + "Id": 41 } ], - "Id": 2 + "Id": 5 } } ] @@ -126,16 +126,16 @@ "Projection": { "Columns": [ { - "Id": 106 - }, - { "Id": 107 }, { - "Id": 109 + "Id": 106 }, { "Id": 108 + }, + { + "Id": 109 } ] } diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-21 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-21 index c070c96199d..24b622239eb 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-21 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-21 @@ -52,7 +52,7 @@ "Node Type": "Aggregate-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: MIN(item.URL),_yql_agg_1: SUM(state._yql_agg_1)}", + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0),_yql_agg_1: MIN(item.URL)}", "GroupBy": "item.SearchPhrase", "Name": "Aggregate" }, diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-22 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-22 index b2421762398..0893604342d 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-22 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-22 @@ -66,7 +66,7 @@ "Node Type": "Aggregate-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: MIN(item.URL),_yql_agg_1: MIN(item.Title),_yql_agg_2: SUM(state._yql_agg_2)}", + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0),_yql_agg_2: MIN(item.URL),_yql_agg_3: MIN(item.Title)}", "GroupBy": "item.SearchPhrase", "Name": "Aggregate" }, @@ -298,7 +298,7 @@ "Node Type": "Aggregate", "Operators": [ { - "Aggregation": "{_yql_agg_3: Inc(state._yql_agg_3)}", + "Aggregation": "{_yql_agg_1: Inc(state._yql_agg_1)}", "GroupBy": "item.SearchPhrase", "Name": "Aggregate" } diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-27 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-27 index 02b8fadab72..b05edfbfa80 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-27 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-27 @@ -56,7 +56,7 @@ "Node Type": "Aggregate-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0)}", + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1)}", "GroupBy": "item.CounterID", "Name": "Aggregate" }, diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-28 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-28 index e9b462cae76..56f52e86cfc 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-28 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-28 @@ -56,7 +56,7 @@ "Node Type": "Aggregate-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0),_yql_agg_2: MIN(item.Referer)}", + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1),_yql_agg_2: MIN(item.Referer)}", "GroupBy": "item.key", "Name": "Aggregate" }, diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-30 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-30 index 4885dca295d..ca62661f7a3 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-30 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-30 @@ -53,7 +53,7 @@ "Node Type": "Aggregate-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0),_yql_agg_1: SUM(item.IsRefresh)}", + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1),_yql_agg_2: SUM(item.IsRefresh)}", "GroupBy": "", "Name": "Aggregate" }, diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-31 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-31 index d317350b0eb..be78c35d3bf 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-31 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-31 @@ -53,7 +53,7 @@ "Node Type": "Aggregate-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0),_yql_agg_1: SUM(item.IsRefresh)}", + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1),_yql_agg_2: SUM(item.IsRefresh)}", "GroupBy": "", "Name": "Aggregate" }, diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-32 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-32 index c30dd75e8df..0e6213aaf7b 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-32 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-32 @@ -53,7 +53,7 @@ "Node Type": "Aggregate-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0),_yql_agg_1: SUM(item.IsRefresh)}", + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1),_yql_agg_2: SUM(item.IsRefresh)}", "GroupBy": "", "Name": "Aggregate" }, diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-6 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-6 index 9792b3806f8..55c5dd943c4 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-6 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-6 @@ -81,7 +81,7 @@ "Id": 6 } ], - "Id": 3 + "Id": 4 } }, { @@ -94,7 +94,7 @@ "Id": 6 } ], - "Id": 4 + "Id": 3 } } ] diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-9 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-9 index 32c010b75d5..17b1cb77500 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-9 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-9 @@ -66,7 +66,7 @@ "Node Type": "Aggregate-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: SUM(item.AdvEngineID),_yql_agg_1: SUM(state._yql_agg_1)}", + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1),_yql_agg_3: SUM(item.AdvEngineID)}", "GroupBy": "item.RegionID", "Name": "Aggregate" }, @@ -130,7 +130,7 @@ "Node Type": "Aggregate", "Operators": [ { - "Aggregation": "{_yql_agg_3: Inc(state._yql_agg_3)}", + "Aggregation": "{_yql_agg_2: Inc(state._yql_agg_2)}", "GroupBy": "item.RegionID", "Name": "Aggregate" } diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-21 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-21 index 015b7c4501c..8945945f0fc 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-21 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-21 @@ -52,7 +52,7 @@ "Node Type": "Aggregate-Filter-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: MIN(item.URL),_yql_agg_1: SUM(state._yql_agg_1)}", + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0),_yql_agg_1: MIN(item.URL)}", "GroupBy": "item.SearchPhrase", "Name": "Aggregate" }, diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-22 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-22 index 716cb81a6c2..082c3e4e91b 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-22 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-22 @@ -66,7 +66,7 @@ "Node Type": "Aggregate-Filter-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: MIN(item.URL),_yql_agg_1: MIN(item.Title),_yql_agg_2: SUM(state._yql_agg_2)}", + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0),_yql_agg_2: MIN(item.URL),_yql_agg_3: MIN(item.Title)}", "GroupBy": "item.SearchPhrase", "Name": "Aggregate" }, @@ -111,7 +111,7 @@ "Node Type": "Aggregate", "Operators": [ { - "Aggregation": "{_yql_agg_3: COUNT(item.UserID)}", + "Aggregation": "{_yql_agg_1: COUNT(item.UserID)}", "GroupBy": "item.SearchPhrase", "Name": "Aggregate" } diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-27 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-27 index b02c82508a8..219499edeb1 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-27 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-27 @@ -56,7 +56,7 @@ "Node Type": "Aggregate-Filter-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0)}", + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1)}", "GroupBy": "item.CounterID", "Name": "Aggregate" }, diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-28 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-28 index ce9a179b288..6cb7b2daa8c 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-28 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-28 @@ -56,7 +56,7 @@ "Node Type": "Aggregate-Filter-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0),_yql_agg_2: MIN(item.Referer)}", + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1),_yql_agg_2: MIN(item.Referer)}", "GroupBy": "item.key", "Name": "Aggregate" }, diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-30 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-30 index 7fea5b73430..f77754c19d4 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-30 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-30 @@ -53,7 +53,7 @@ "Node Type": "Aggregate-Filter-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0),_yql_agg_1: SUM(item.IsRefresh)}", + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1),_yql_agg_2: SUM(item.IsRefresh)}", "GroupBy": "", "Name": "Aggregate" }, diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-31 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-31 index 8fa7d88ba48..21bbff9b148 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-31 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-31 @@ -53,7 +53,7 @@ "Node Type": "Aggregate-Filter-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0),_yql_agg_1: SUM(item.IsRefresh)}", + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1),_yql_agg_2: SUM(item.IsRefresh)}", "GroupBy": "", "Name": "Aggregate" }, diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-32 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-32 index a3a7ed46dc4..13bf7b306c1 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-32 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-32 @@ -53,7 +53,7 @@ "Node Type": "Aggregate-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0),_yql_agg_1: SUM(item.IsRefresh)}", + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1),_yql_agg_2: SUM(item.IsRefresh)}", "GroupBy": "", "Name": "Aggregate" }, diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-9 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-9 index e72bf729ffc..15497b6c730 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-9 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-9 @@ -66,7 +66,7 @@ "Node Type": "Aggregate-TableFullScan", "Operators": [ { - "Aggregation": "{_yql_agg_0: SUM(item.AdvEngineID),_yql_agg_1: SUM(state._yql_agg_1)}", + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1),_yql_agg_3: SUM(item.AdvEngineID)}", "GroupBy": "item.RegionID", "Name": "Aggregate" }, @@ -107,7 +107,7 @@ "Node Type": "Aggregate", "Operators": [ { - "Aggregation": "{_yql_agg_3: COUNT(item.UserID)}", + "Aggregation": "{_yql_agg_2: COUNT(item.UserID)}", "GroupBy": "item.RegionID", "Name": "Aggregate" } |