diff options
author | vvvv <vvvv@ydb.tech> | 2023-02-21 15:50:42 +0300 |
---|---|---|
committer | vvvv <vvvv@ydb.tech> | 2023-02-21 15:50:42 +0300 |
commit | d203996858cdf8c2c59024e6cedab2819d17ebe5 (patch) | |
tree | 9a4e8f40226e219210fb08adf43a2ecf8790f5b8 | |
parent | 5b385d51edea3954d9d14010a69c2f30bc888891 (diff) | |
download | ydb-d203996858cdf8c2c59024e6cedab2819d17ebe5.tar.gz |
optimizers for WideSort, WideSortBlocks w/o runtime
9 files changed, 115 insertions, 27 deletions
diff --git a/ydb/library/yql/core/expr_nodes/yql_expr_nodes.json b/ydb/library/yql/core/expr_nodes/yql_expr_nodes.json index caef313ca94..363b331e0b9 100644 --- a/ydb/library/yql/core/expr_nodes/yql_expr_nodes.json +++ b/ydb/library/yql/core/expr_nodes/yql_expr_nodes.json @@ -1610,6 +1610,14 @@ "Match": {"Type": "Callable", "Name": "WideTopSort"} }, { + "Name": "TCoWideSort", + "Base": "TCoInputBase", + "Match": {"Type": "Callable", "Name": "WideSort"}, + "Children": [ + {"Index": 1, "Name": "Keys", "Type": "TCoSortKeys"} + ] + }, + { "Name": "TCoAddMember", "Base": "TCallable", "Match": {"Type": "Callable", "Name": "AddMember"}, diff --git a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp index c70b97893dc..b3fa592f171 100644 --- a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp +++ b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp @@ -4060,8 +4060,8 @@ TExprNode::TPtr OptimizeCombineCore(const TExprNode::TPtr& node, TExprContext& c return node; } -template<bool Sort> -TExprNode::TPtr OptimizeTop(const TExprNode::TPtr& node, TExprContext& ctx) { +template<bool Sort, bool HasCount> +TExprNode::TPtr OptimizeTopOrSort(const TExprNode::TPtr& node, TExprContext& ctx) { if (const auto& input = node->Head(); input.IsCallable("NarrowMap") && input.Tail().Tail().IsCallable("AsStruct")) { TNodeMap<size_t> indexes(input.Tail().Tail().ChildrenSize()); input.Tail().Tail().ForEachChild([&](const TExprNode& field) { @@ -4073,6 +4073,7 @@ TExprNode::TPtr OptimizeTop(const TExprNode::TPtr& node, TExprContext& ctx) { }); const auto itemType = node->GetTypeAnn()->Cast<TFlowExprType>()->GetItemType()->Cast<TStructExprType>(); + std::unordered_set<size_t> unique; std::vector<size_t> sorted; if (node->Tail().Tail().IsCallable("Member") && &node->Tail().Tail().Head() == &node->Tail().Head().Head()) { if (const auto it = indexes.find(&node->Tail().Tail().Tail()); indexes.cend() == it) @@ -4095,14 +4096,16 @@ TExprNode::TPtr OptimizeTop(const TExprNode::TPtr& node, TExprContext& ctx) { } } - if (sorted.empty()) + unique.insert(sorted.begin(), sorted.end()); + if (sorted.empty() || unique.size() != sorted.size()) return node; YQL_CLOG(DEBUG, CorePeepHole) << "Swap " << node->Content() << " with " << input.Content(); TExprNode::TListType directions(sorted.size()); + auto dirIndex = HasCount ? 2U : 1U; for (auto i = 0U; i < sorted.size(); ++i) { - auto dir = node->Child(2U)->IsList() ? node->Child(2U)->ChildPtr(i) : node->ChildPtr(2U); + auto dir = node->Child(dirIndex)->IsList() ? node->Child(dirIndex)->ChildPtr(i) : node->ChildPtr(dirIndex); directions[i] = ctx.Builder(dir->Pos()) .List() .Atom(0, sorted[i]) @@ -4110,16 +4113,28 @@ TExprNode::TPtr OptimizeTop(const TExprNode::TPtr& node, TExprContext& ctx) { .Seal().Build(); } - return Build<TCoNarrowMap>(ctx, node->Pos()) - .template Input<std::conditional_t<Sort, TCoWideTopSort, TCoWideTop>>() - .Input(input.HeadPtr()) - .Count(node->ChildPtr(1)) - .template Keys<TCoSortKeys>() - .Add(std::move(directions)) + if constexpr (HasCount) { + return Build<TCoNarrowMap>(ctx, node->Pos()) + .template Input<std::conditional_t<Sort, TCoWideTopSort, TCoWideTop>>() + .Input(input.HeadPtr()) + .Count(node->ChildPtr(1)) + .template Keys<TCoSortKeys>() + .Add(std::move(directions)) + .Build() .Build() - .Build() - .Lambda(input.TailPtr()) - .Done().Ptr(); + .Lambda(input.TailPtr()) + .Done().Ptr(); + } else { + return Build<TCoNarrowMap>(ctx, node->Pos()) + .template Input<TCoWideSort>() + .Input(input.HeadPtr()) + .template Keys<TCoSortKeys>() + .Add(std::move(directions)) + .Build() + .Build() + .Lambda(input.TailPtr()) + .Done().Ptr(); + } } return node; @@ -5369,7 +5384,7 @@ TExprNode::TPtr OptimizeSkipTakeToBlocks(const TExprNode::TPtr& node, TExprConte .Build(); } -TExprNode::TPtr OptimizeTopBlocks(const TExprNode::TPtr& node, TExprContext& ctx, TTypeAnnotationContext& types) { +TExprNode::TPtr OptimizeTopOrSortBlocks(const TExprNode::TPtr& node, TExprContext& ctx, TTypeAnnotationContext& types) { if (!types.ArrowResolver) { return node; } @@ -5395,17 +5410,14 @@ TExprNode::TPtr OptimizeTopBlocks(const TExprNode::TPtr& node, TExprContext& ctx return node; } - TStringBuf newName = node->Content() == "WideTop" ? "WideTopBlocks" : "WideTopSortBlocks"; + TString newName = node->Content() + TString("Blocks"); YQL_CLOG(DEBUG, CorePeepHole) << "Convert " << node->Content() << " to " << newName; + auto children = node->ChildrenList(); + children[0] = ctx.NewCallable(node->Pos(), "WideToBlocks", { children[0] }); + return ctx.Builder(node->Pos()) .Callable("WideFromBlocks") - .Callable(0, newName) - .Callable(0, "WideToBlocks") - .Add(0, node->HeadPtr()) - .Seal() - .Add(1, node->ChildPtr(1)) - .Add(2, node->ChildPtr(2)) - .Seal() + .Add(0, ctx.NewCallable(node->Pos(), newName, std::move(children))) .Seal() .Build(); } @@ -7237,8 +7249,9 @@ struct TPeepHoleRules { {"WideMap", &OptimizeWideMaps}, {"NarrowMap", &OptimizeWideMaps}, {"Unordered", &DropUnordered}, - {"Top", &OptimizeTop<false>}, - {"TopSort", &OptimizeTop<true>}, + {"Top", &OptimizeTopOrSort<false, true>}, + {"TopSort", &OptimizeTopOrSort<true, true>}, + {"Sort", &OptimizeTopOrSort<true, false>}, }; static constexpr std::initializer_list<TExtPeepHoleOptimizerMap::value_type> FinalStageExtRulesInit = {}; @@ -7265,8 +7278,9 @@ struct TPeepHoleRules { {"Take", &OptimizeSkipTakeToBlocks}, {"BlockCombineAll", &OptimizeBlockCombine}, {"BlockCombineHashed", &OptimizeBlockCombine}, - {"WideTop", &OptimizeTopBlocks}, - {"WideTopSort", &OptimizeTopBlocks}, + {"WideTop", &OptimizeTopOrSortBlocks}, + {"WideTopSort", &OptimizeTopOrSortBlocks}, + //{"WideSort", &OptimizeTopOrSortBlocks}, }; TPeepHoleRules() diff --git a/ydb/library/yql/core/type_ann/type_ann_blocks.cpp b/ydb/library/yql/core/type_ann/type_ann_blocks.cpp index 4d398d996a1..7cde42795ea 100644 --- a/ydb/library/yql/core/type_ann/type_ann_blocks.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_blocks.cpp @@ -651,5 +651,24 @@ IGraphTransformer::TStatus WideTopBlocksWrapper(const TExprNode::TPtr& input, TE return IGraphTransformer::TStatus::Ok; } +IGraphTransformer::TStatus WideSortBlocksWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { + Y_UNUSED(output); + if (!EnsureArgsCount(*input, 2U, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + TTypeAnnotationNode::TListType blockItemTypes; + if (!EnsureWideFlowBlockType(input->Head(), blockItemTypes, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + if (!ValidateWideTopKeys(input->Tail(), blockItemTypes, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + input->SetTypeAnn(input->Head().GetTypeAnn()); + return IGraphTransformer::TStatus::Ok; +} + } // namespace NTypeAnnImpl } diff --git a/ydb/library/yql/core/type_ann/type_ann_blocks.h b/ydb/library/yql/core/type_ann/type_ann_blocks.h index 377d6521b72..0c32ae6bb4c 100644 --- a/ydb/library/yql/core/type_ann/type_ann_blocks.h +++ b/ydb/library/yql/core/type_ann/type_ann_blocks.h @@ -24,6 +24,7 @@ namespace NTypeAnnImpl { IGraphTransformer::TStatus WideFromBlocksWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus WideSkipTakeBlocksWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus WideTopBlocksWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); + IGraphTransformer::TStatus WideSortBlocksWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); } // namespace NTypeAnnImpl } // namespace NYql diff --git a/ydb/library/yql/core/type_ann/type_ann_core.cpp b/ydb/library/yql/core/type_ann/type_ann_core.cpp index d3391229cba..91daaaf31c9 100644 --- a/ydb/library/yql/core/type_ann/type_ann_core.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_core.cpp @@ -11855,6 +11855,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> Functions["WideChain1Map"] = &WideChain1MapWrapper; Functions["WideTop"] = &WideTopWrapper; Functions["WideTopSort"] = &WideTopWrapper; + Functions["WideSort"] = &WideSortWrapper; Functions["NarrowMap"] = &NarrowMapWrapper; Functions["NarrowFlatMap"] = &NarrowFlatMapWrapper; Functions["NarrowMultiMap"] = &NarrowMultiMapWrapper; @@ -11867,6 +11868,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot> Functions["BlockExpandChunked"] = &BlockExpandChunkedWrapper; Functions["WideTopBlocks"] = &WideTopBlocksWrapper; Functions["WideTopSortBlocks"] = &WideTopBlocksWrapper; + Functions["WideSortBlocks"] = &WideSortBlocksWrapper; Functions["AsScalar"] = &AsScalarWrapper; Functions["BlockCoalesce"] = &BlockCoalesceWrapper; diff --git a/ydb/library/yql/core/type_ann/type_ann_wide.cpp b/ydb/library/yql/core/type_ann/type_ann_wide.cpp index 7c4a2364976..5662a7b449b 100644 --- a/ydb/library/yql/core/type_ann/type_ann_wide.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_wide.cpp @@ -648,6 +648,25 @@ IGraphTransformer::TStatus WideTopWrapper(const TExprNode::TPtr& input, TExprNod return IGraphTransformer::TStatus::Ok; } +IGraphTransformer::TStatus WideSortWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) { + Y_UNUSED(output); + if (!EnsureArgsCount(*input, 2U, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + if (!EnsureWideFlowType(input->Head(), ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + const auto& types = input->Head().GetTypeAnn()->Cast<TFlowExprType>()->GetItemType()->Cast<TMultiExprType>()->GetItems(); + if (!ValidateWideTopKeys(input->Tail(), types, ctx.Expr)) { + return IGraphTransformer::TStatus::Error; + } + + input->SetTypeAnn(input->Head().GetTypeAnn()); + return IGraphTransformer::TStatus::Ok; +} + bool ValidateWideTopKeys(const TExprNode& keys, const TTypeAnnotationNode::TListType& types, TExprContext& ctx) { if (!(EnsureTupleMinSize(keys, 1U, ctx) && EnsureTupleMaxSize(keys, types.size(), ctx))) { return false; diff --git a/ydb/library/yql/core/type_ann/type_ann_wide.h b/ydb/library/yql/core/type_ann/type_ann_wide.h index 0b662477558..d4a159a49dc 100644 --- a/ydb/library/yql/core/type_ann/type_ann_wide.h +++ b/ydb/library/yql/core/type_ann/type_ann_wide.h @@ -23,5 +23,6 @@ namespace NTypeAnnImpl { IGraphTransformer::TStatus NarrowMultiMapWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); IGraphTransformer::TStatus WideTopWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); + IGraphTransformer::TStatus WideSortWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx); } // namespace NTypeAnnImpl } // namespace NYql diff --git a/ydb/library/yql/core/yql_expr_constraint.cpp b/ydb/library/yql/core/yql_expr_constraint.cpp index 37ddb1bf41c..a47f90b9ae3 100644 --- a/ydb/library/yql/core/yql_expr_constraint.cpp +++ b/ydb/library/yql/core/yql_expr_constraint.cpp @@ -228,10 +228,13 @@ public: Functions["WithWorld"] = &TCallableConstraintTransformer::CopyAllFrom<0>; Functions["WideTop"] = &TCallableConstraintTransformer::WideTopWrap<false>; Functions["WideTopSort"] = &TCallableConstraintTransformer::WideTopWrap<true>; + Functions["WideSort"] = &TCallableConstraintTransformer::WideTopWrap<true>; Functions["WideTopBlocks"] = &TCallableConstraintTransformer::WideTopWrap<false>; Functions["WideTopSortBlocks"] = &TCallableConstraintTransformer::WideTopWrap<true>; + Functions["WideSortBlocks"] = &TCallableConstraintTransformer::WideTopWrap<true>; Functions["WideToBlocks"] = &TCallableConstraintTransformer::CopyAllFrom<0>; Functions["WideFromBlocks"] = &TCallableConstraintTransformer::CopyAllFrom<0>; + Functions["BlockExpandChunked"] = &TCallableConstraintTransformer::CopyAllFrom<0>; } std::optional<IGraphTransformer::TStatus> ProcessCore(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) { @@ -3372,7 +3375,7 @@ private: // Constraint Passthrough can be reduced in empty containers newConstr = input.GetConstraint<TEmptyConstraintNode>(); } - YQL_ENSURE(newConstr, "Rewrite error, missing " << *expectedConstr << " constraint in node " << input.Content()); + YQL_ENSURE(newConstr, "Rewrite error, missing " << *expectedConstr << " constraint in node\n" << input.Dump()); } } } diff --git a/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp b/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp index fb969e86349..26fb981a459 100644 --- a/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp +++ b/ydb/library/yql/providers/common/mkql/yql_provider_mkql.cpp @@ -39,6 +39,19 @@ TRuntimeNode WideTopImpl(const TExprNode& node, TMkqlBuildContext& ctx, return (ctx.ProgramBuilder.*func)(flow, count, directions); } +TRuntimeNode WideSortImpl(const TExprNode& node, TMkqlBuildContext& ctx, + TRuntimeNode(TProgramBuilder::*func)(TRuntimeNode, const std::vector<std::pair<ui32, TRuntimeNode>>&)) { + const auto flow = MkqlBuildExpr(node.Head(), ctx); + + std::vector<std::pair<ui32, TRuntimeNode>> directions; + directions.reserve(node.Tail().ChildrenSize()); + node.Tail().ForEachChild([&](const TExprNode& dir) { + directions.emplace_back(std::make_pair(::FromString<ui32>(dir.Head().Content()), MkqlBuildExpr(dir.Tail(), ctx))); + }); + + return (ctx.ProgramBuilder.*func)(flow, directions); +} + TRuntimeNode CombineByKeyImpl(const TExprNode& node, TMkqlBuildContext& ctx) { NNodes::TCoCombineByKey combine(&node); const bool isStreamOrFlow = combine.Ref().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Stream || @@ -766,6 +779,10 @@ TMkqlCommonCallableCompiler::TShared::TShared() { return WideTopImpl(node, ctx, &TProgramBuilder::WideTopSort); }); + AddCallable("WideSort", [](const TExprNode& node, TMkqlBuildContext& ctx) { + return WideSortImpl(node, ctx, &TProgramBuilder::WideSort); + }); + AddCallable("WideTopBlocks", [](const TExprNode& node, TMkqlBuildContext& ctx) { return WideTopImpl(node, ctx, &TProgramBuilder::WideTopBlocks); }); @@ -774,6 +791,10 @@ TMkqlCommonCallableCompiler::TShared::TShared() { return WideTopImpl(node, ctx, &TProgramBuilder::WideTopSortBlocks); }); + AddCallable("WideSortBlocks", [](const TExprNode& node, TMkqlBuildContext& ctx) { + return WideSortImpl(node, ctx, &TProgramBuilder::WideSortBlocks); + }); + AddCallable("Iterable", [](const TExprNode& node, TMkqlBuildContext& ctx) { const auto lambda = [&]() { return MkqlBuildLambda(node.Head(), ctx, {}); }; return ctx.ProgramBuilder.Iterable(lambda); |