diff options
author | pilik <pudge1000-7@ydb.tech> | 2025-03-01 01:29:22 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-03-01 01:29:22 +0300 |
commit | d08419b4a78e460944e81523fe9c7ce7c7f5ca14 (patch) | |
tree | 5ad5bf06a82a9fc1ee248447663d148480546045 | |
parent | bf4f55b8e360284b921e123eaec0ee6202d2b969 (diff) | |
download | ydb-d08419b4a78e460944e81523fe9c7ce7c7f5ca14.tar.gz |
[CBO] added enableshuffleelimination flag (#15218)
16 files changed, 192 insertions, 189 deletions
diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp index 2db1296b16..4adbfa8616 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp @@ -92,7 +92,7 @@ public: public: TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) override { auto status = TOptimizeTransformerBase::DoTransform(input, output, ctx); - + if (status == TStatus::Ok) { for (const auto& hint: KqpCtx.GetOptimizerHints().GetUnappliedString()) { ctx.AddWarning(YqlIssue({}, TIssuesIds::YQL_UNUSED_HINT, "Unapplied hint: " + hint)); @@ -118,7 +118,7 @@ protected: TMaybeNode<TExprBase> RewriteAggregate(TExprBase node, TExprContext& ctx) { TMaybeNode<TExprBase> output; auto aggregate = node.Cast<TCoAggregateBase>(); - auto hopSetting = GetSetting(aggregate.Settings().Ref(), "hopping"); + auto hopSetting = GetSetting(aggregate.Settings().Ref(), "hopping"); if (hopSetting) { auto input = aggregate.Input().Maybe<TDqConnection>(); if (!input) { @@ -163,8 +163,9 @@ protected: TMaybeNode<TExprBase> OptimizeEquiJoinWithCosts(TExprBase node, TExprContext& ctx) { auto maxDPhypDPTableSize = Config->MaxDPHypDPTableSize.Get().GetOrElse(TDqSettings::TDefault::MaxDPHypDPTableSize); auto optLevel = Config->CostBasedOptimizationLevel.Get().GetOrElse(Config->DefaultCostBasedOptimizationLevel); + bool enableShuffleElimination = KqpCtx.Config->OptShuffleElimination.Get().GetOrElse(false);; auto providerCtx = TKqpProviderContext(KqpCtx, optLevel); - auto opt = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(providerCtx, maxDPhypDPTableSize, ctx)); + auto opt = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(providerCtx, maxDPhypDPTableSize, ctx, enableShuffleElimination)); TExprBase output = DqOptimizeEquiJoinWithCosts(node, ctx, TypesCtx, optLevel, *opt, [](auto& rels, auto label, auto node, auto stat) { rels.emplace_back(std::make_shared<TKqpRelOptimizerNode>(TString(label), *stat, node)); diff --git a/ydb/core/kqp/ut/join/data/join_order/lookupbug.json b/ydb/core/kqp/ut/join/data/join_order/lookupbug.json index 96c727f9c5..b32ce9858c 100644 --- a/ydb/core/kqp/ut/join/data/join_order/lookupbug.json +++ b/ydb/core/kqp/ut/join/data/join_order/lookupbug.json @@ -7,22 +7,22 @@ "args": [ { - "op_name":"TableFullScan", - "table":"quotas_browsers_relation" - }, - { "op_name":"LeftJoin (MapJoin)", "args": [ { "op_name":"TableFullScan", - "table":"browsers" + "table":"quotas_browsers_relation" }, { - "op_name":"TableFullScan", - "table":"browser_groups" + "op_name":"TableLookup", + "table":"browsers" } ] + }, + { + "op_name":"TableLookup", + "table":"browser_groups" } ] }, diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcc.json b/ydb/core/kqp/ut/join/data/join_order/tpcc.json index 2ddae4ea99..4a577a1b4c 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpcc.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpcc.json @@ -3,12 +3,12 @@ "args": [ { - "op_name":"TableRangeScan", - "table":"test\/tpcc\/order_line" + "op_name":"TableLookup", + "table":"test\/tpcc\/stock" }, { "op_name":"TableRangeScan", - "table":"test\/tpcc\/stock" + "table":"test\/tpcc\/order_line" } ] } diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s.json index 7b0d733f58..12594bc929 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s.json @@ -1,5 +1,5 @@ { - "op_name":"InnerJoin (MapJoin)", + "op_name":"InnerJoin (Grace)", "args": [ { diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s_column_store.json index 7b0d733f58..12594bc929 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s_column_store.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s_column_store.json @@ -1,5 +1,5 @@ { - "op_name":"InnerJoin (MapJoin)", + "op_name":"InnerJoin (Grace)", "args": [ { diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s.json index 74ebff5f89..9c7b141dde 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s.json @@ -7,11 +7,11 @@ "args": [ { - "op_name":"InnerJoin (MapJoin)", + "op_name":"LeftJoin (Grace)", "args": [ { - "op_name":"LeftJoin (Grace)", + "op_name":"InnerJoin (MapJoin)", "args": [ { @@ -19,25 +19,25 @@ "table":"test\/ds\/store_sales" }, { - "op_name":"TableFullScan", - "table":"test\/ds\/store_returns" + "op_name":"Union", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/date_dim" + } + ] } ] }, { - "op_name":"Union", - "args": - [ - { - "op_name":"TableFullScan", - "table":"test\/ds\/date_dim" - } - ] + "op_name":"TableFullScan", + "table":"test\/ds\/store_returns" } ] }, { - "op_name":"InnerJoin (Grace)", + "op_name":"InnerJoin (MapJoin)", "args": [ { @@ -46,11 +46,11 @@ [ { "op_name":"TableFullScan", - "table":"test\/ds\/catalog_sales" + "table":"test\/ds\/web_sales" }, { "op_name":"TableFullScan", - "table":"test\/ds\/catalog_returns" + "table":"test\/ds\/web_returns" } ] }, @@ -69,7 +69,7 @@ ] }, { - "op_name":"InnerJoin (Grace)", + "op_name":"InnerJoin (MapJoin)", "args": [ { @@ -78,11 +78,11 @@ [ { "op_name":"TableFullScan", - "table":"test\/ds\/web_sales" + "table":"test\/ds\/catalog_sales" }, { "op_name":"TableFullScan", - "table":"test\/ds\/web_returns" + "table":"test\/ds\/catalog_returns" } ] }, diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s_column_store.json index fd6b1dd32b..cf514b8165 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s_column_store.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s_column_store.json @@ -7,11 +7,11 @@ "args": [ { - "op_name":"InnerJoin (MapJoin)", + "op_name":"LeftJoin (Grace)", "args": [ { - "op_name":"LeftJoin (Grace)", + "op_name":"InnerJoin (MapJoin)", "args": [ { @@ -20,18 +20,18 @@ }, { "op_name":"TableFullScan", - "table":"test\/ds\/store_returns" + "table":"test\/ds\/date_dim" } ] }, { "op_name":"TableFullScan", - "table":"test\/ds\/date_dim" + "table":"test\/ds\/store_returns" } ] }, { - "op_name":"InnerJoin (Grace)", + "op_name":"InnerJoin (MapJoin)", "args": [ { @@ -40,11 +40,11 @@ [ { "op_name":"TableFullScan", - "table":"test\/ds\/catalog_sales" + "table":"test\/ds\/web_sales" }, { "op_name":"TableFullScan", - "table":"test\/ds\/catalog_returns" + "table":"test\/ds\/web_returns" } ] }, @@ -57,7 +57,7 @@ ] }, { - "op_name":"InnerJoin (Grace)", + "op_name":"InnerJoin (MapJoin)", "args": [ { @@ -66,11 +66,11 @@ [ { "op_name":"TableFullScan", - "table":"test\/ds\/web_sales" + "table":"test\/ds\/catalog_sales" }, { "op_name":"TableFullScan", - "table":"test\/ds\/web_returns" + "table":"test\/ds\/catalog_returns" } ] }, diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json index 25570a61bb..4b2772b1a1 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json @@ -8,75 +8,75 @@ [ { "op_name":"TableFullScan", - "table":"part" + "table":"partsupp" }, { "op_name":"InnerJoin (MapJoin)", "args": [ { + "op_name":"TableFullScan", + "table":"supplier" + }, + { "op_name":"InnerJoin (MapJoin)", "args": [ { - "op_name":"InnerJoin (Grace)", - "args": - [ - { - "op_name":"TableFullScan", - "table":"partsupp" - }, - { - "op_name":"TableFullScan", - "table":"supplier" - } - ] + "op_name":"TableLookup", + "table":"region" }, { "op_name":"TableFullScan", "table":"nation" } ] - }, - { - "op_name":"TableFullScan", - "table":"region" } ] } ] }, { - "op_name":"InnerJoin (MapJoin)", + "op_name":"InnerJoin (Grace)", "args": [ { - "op_name":"InnerJoin (MapJoin)", + "op_name":"InnerJoin (Grace)", "args": [ { - "op_name":"InnerJoin (Grace)", + "op_name":"TableFullScan", + "table":"partsupp" + }, + { + "op_name":"InnerJoin (MapJoin)", "args": [ { "op_name":"TableFullScan", - "table":"partsupp" + "table":"supplier" }, { - "op_name":"TableFullScan", - "table":"supplier" + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableLookup", + "table":"region" + }, + { + "op_name":"TableFullScan", + "table":"nation" + } + ] } ] - }, - { - "op_name":"TableFullScan", - "table":"nation" } ] }, { "op_name":"TableFullScan", - "table":"region" + "table":"part" } ] } diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json index 25570a61bb..e5adefe907 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json @@ -8,75 +8,75 @@ [ { "op_name":"TableFullScan", - "table":"part" + "table":"partsupp" }, { "op_name":"InnerJoin (MapJoin)", "args": [ { + "op_name":"TableFullScan", + "table":"supplier" + }, + { "op_name":"InnerJoin (MapJoin)", "args": [ { - "op_name":"InnerJoin (Grace)", - "args": - [ - { - "op_name":"TableFullScan", - "table":"partsupp" - }, - { - "op_name":"TableFullScan", - "table":"supplier" - } - ] + "op_name":"TableFullScan", + "table":"nation" }, { "op_name":"TableFullScan", - "table":"nation" + "table":"region" } ] - }, - { - "op_name":"TableFullScan", - "table":"region" } ] } ] }, { - "op_name":"InnerJoin (MapJoin)", + "op_name":"InnerJoin (Grace)", "args": [ { - "op_name":"InnerJoin (MapJoin)", + "op_name":"InnerJoin (Grace)", "args": [ { - "op_name":"InnerJoin (Grace)", + "op_name":"TableFullScan", + "table":"partsupp" + }, + { + "op_name":"InnerJoin (MapJoin)", "args": [ { "op_name":"TableFullScan", - "table":"partsupp" + "table":"supplier" }, { - "op_name":"TableFullScan", - "table":"supplier" + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"nation" + }, + { + "op_name":"TableFullScan", + "table":"region" + } + ] } ] - }, - { - "op_name":"TableFullScan", - "table":"nation" } ] }, { "op_name":"TableFullScan", - "table":"region" + "table":"part" } ] } diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s.json index 62f26536a7..2239d496bf 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s.json @@ -3,23 +3,37 @@ "args": [ { + "op_name":"TableFullScan", + "table":"orders" + }, + { "op_name":"InnerJoin (Grace)", "args": [ { "op_name":"TableFullScan", - "table":"orders" + "table":"lineitem" }, { "op_name":"InnerJoin (Grace)", "args": [ { - "op_name":"TableFullScan", - "table":"nation" + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"partsupp" + }, + { + "op_name":"TableFullScan", + "table":"part" + } + ] }, { - "op_name":"InnerJoin (Grace)", + "op_name":"InnerJoin (MapJoin)", "args": [ { @@ -27,28 +41,14 @@ "table":"supplier" }, { - "op_name":"InnerJoin (Grace)", - "args": - [ - { - "op_name":"TableFullScan", - "table":"lineitem" - }, - { - "op_name":"TableFullScan", - "table":"partsupp" - } - ] + "op_name":"TableFullScan", + "table":"nation" } ] } ] } ] - }, - { - "op_name":"TableFullScan", - "table":"part" } ] } diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s_column_store.json index 62f26536a7..2239d496bf 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s_column_store.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s_column_store.json @@ -3,23 +3,37 @@ "args": [ { + "op_name":"TableFullScan", + "table":"orders" + }, + { "op_name":"InnerJoin (Grace)", "args": [ { "op_name":"TableFullScan", - "table":"orders" + "table":"lineitem" }, { "op_name":"InnerJoin (Grace)", "args": [ { - "op_name":"TableFullScan", - "table":"nation" + "op_name":"InnerJoin (Grace)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"partsupp" + }, + { + "op_name":"TableFullScan", + "table":"part" + } + ] }, { - "op_name":"InnerJoin (Grace)", + "op_name":"InnerJoin (MapJoin)", "args": [ { @@ -27,28 +41,14 @@ "table":"supplier" }, { - "op_name":"InnerJoin (Grace)", - "args": - [ - { - "op_name":"TableFullScan", - "table":"lineitem" - }, - { - "op_name":"TableFullScan", - "table":"partsupp" - } - ] + "op_name":"TableFullScan", + "table":"nation" } ] } ] } ] - }, - { - "op_name":"TableFullScan", - "table":"part" } ] } diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp index 0442d62cba..36bf157ce5 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp @@ -10,7 +10,7 @@ namespace { class TDqOptimizerFactory : public IOptimizerFactory { public: virtual IOptimizerNew::TPtr MakeJoinCostBasedOptimizerNative(IProviderContext& pctx, TExprContext& ectx, const TNativeSettings& settings) const override { - return IOptimizerNew::TPtr(MakeNativeOptimizerNew(pctx, settings.MaxDPhypDPTableSize, ectx)); + return IOptimizerNew::TPtr(MakeNativeOptimizerNew(pctx, settings.MaxDPhypDPTableSize, ectx, false)); } virtual IOptimizerNew::TPtr MakeJoinCostBasedOptimizerPG(IProviderContext& pctx, TExprContext& ctx, const TPGSettings& settings) const override { diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp index 4316540292..65069e42ba 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp @@ -187,14 +187,14 @@ TExprBase BuildTree(TExprContext& ctx, const TCoEquiJoin& equiJoin, shuffleBy.reserve(optimizerNode->Stats.ShuffledByColumns->Data.size()); for (const auto& column: optimizerNode->Stats.ShuffledByColumns->Data) { - auto node = + auto node = ctx.Builder(equiJoin.Pos()) .List() .Atom(0, column.RelName) .Atom(1, column.AttributeName) .Seal() .Build(); - + shuffleBy.emplace_back(std::move(node)); } @@ -204,7 +204,7 @@ TExprBase BuildTree(TExprContext& ctx, const TCoEquiJoin& equiJoin, case EShuffleSide::ERight: { shuffleSideOpt = "shuffle_rhs_by"; break;} } - auto option = + auto option = Build<TExprList>(ctx, equiJoin.Pos()) .Add<TCoAtom>() .Build(shuffleSideOpt) @@ -267,10 +267,10 @@ void ComputeStatistics(const std::shared_ptr<TJoinOptimizerNode>& join, IProvide } join->Stats = TOptimizerStatistics( ctx.ComputeJoinStatsV1( - join->LeftArg->Stats, + join->LeftArg->Stats, join->RightArg->Stats, - join->LeftJoinKeys, - join->RightJoinKeys, + join->LeftJoinKeys, + join->RightJoinKeys, EJoinAlgoType::GraceJoin, join->JoinType, nullptr, @@ -282,26 +282,27 @@ void ComputeStatistics(const std::shared_ptr<TJoinOptimizerNode>& join, IProvide class TOptimizerNativeNew: public IOptimizerNew { public: - TOptimizerNativeNew(IProviderContext& ctx, ui32 maxDPhypDPTableSize, TExprContext& exprCtx) + TOptimizerNativeNew(IProviderContext& ctx, ui32 maxDPhypDPTableSize, TExprContext& exprCtx, bool enableShuffleElimination) : IOptimizerNew(ctx) , MaxDPHypTableSize_(maxDPhypDPTableSize) , ExprCtx(exprCtx) + , EnableShuffleElimination(enableShuffleElimination) {} std::shared_ptr<TJoinOptimizerNode> JoinSearch( - const std::shared_ptr<TJoinOptimizerNode>& joinTree, + const std::shared_ptr<TJoinOptimizerNode>& joinTree, const TOptimizerHints& hints = {} ) override { auto relsCount = joinTree->Labels().size(); - if (relsCount <= 14) { + if (EnableShuffleElimination && relsCount <= 14) { return JoinSearchImpl<TNodeSet64, TDPHypSolverShuffleElimination<TNodeSet64>>(joinTree, false, hints); } else if (relsCount <= 64) { // The algorithm is more efficient. - return JoinSearchImpl<TNodeSet64, TDPHypSolverClassic<TNodeSet64>>(joinTree, true, hints); + return JoinSearchImpl<TNodeSet64, TDPHypSolverClassic<TNodeSet64>>(joinTree, EnableShuffleElimination, hints); } else if (64 < relsCount && relsCount <= 128) { - return JoinSearchImpl<TNodeSet128, TDPHypSolverClassic<TNodeSet128>>(joinTree, true, hints); + return JoinSearchImpl<TNodeSet128, TDPHypSolverClassic<TNodeSet128>>(joinTree, EnableShuffleElimination, hints); } else if (128 < relsCount && relsCount <= 192) { - return JoinSearchImpl<TNodeSet192, TDPHypSolverClassic<TNodeSet192>>(joinTree, true, hints); + return JoinSearchImpl<TNodeSet192, TDPHypSolverClassic<TNodeSet192>>(joinTree, EnableShuffleElimination, hints); } ComputeStatistics(joinTree, this->Pctx); @@ -314,7 +315,7 @@ private: using TNodeSet192 = std::bitset<192>; template < - typename TNodeSet, + typename TNodeSet, typename TDPHypImpl > std::shared_ptr<TJoinOptimizerNode> JoinSearchImpl( @@ -379,7 +380,7 @@ private: joinNode->LogicalOrderings = fsm.CreateState(); switch (joinNode->JoinAlgo) { case EJoinAlgoType::GraceJoin: { - bool hashFuncArgsMatch = + bool hashFuncArgsMatch = left->LogicalOrderings.GetShuffleHashFuncArgsCount() == right->LogicalOrderings.GetShuffleHashFuncArgsCount(); if (!hashFuncArgsMatch || !left->LogicalOrderings.HasState() || !left->LogicalOrderings.ContainsShuffle(leftJoinKeysOrderingIdx)) { @@ -433,10 +434,11 @@ private: private: ui32 MaxDPHypTableSize_; TExprContext& ExprCtx; + bool EnableShuffleElimination; }; -IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& pctx, const ui32 maxDPhypDPTableSize, TExprContext& ectx) { - return new TOptimizerNativeNew(pctx, maxDPhypDPTableSize, ectx); +IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& pctx, const ui32 maxDPhypDPTableSize, TExprContext& ectx, bool enableShuffleElimination) { + return new TOptimizerNativeNew(pctx, maxDPhypDPTableSize, ectx, enableShuffleElimination); } TExprBase DqOptimizeEquiJoinWithCosts( @@ -496,8 +498,8 @@ TExprBase DqOptimizeEquiJoinWithCosts( YQL_CLOG(TRACE, CoreDq) << "All statistics for join in place"; bool allRowStorage = std::all_of( - rels.begin(), - rels.end(), + rels.begin(), + rels.end(), [](std::shared_ptr<TRelOptimizerNode>& r) {return r->Stats.StorageType==EStorageType::RowStorage; }); if (optLevel == 2 && allRowStorage) { diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.h b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.h index 581b3135b0..39a82d815f 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.h +++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.h @@ -1,12 +1,12 @@ #pragma once -#include <yql/essentials/core/cbo/cbo_optimizer_new.h> +#include <yql/essentials/core/cbo/cbo_optimizer_new.h> #include <yql/essentials/core/expr_nodes_gen/yql_expr_nodes_gen.h> #include <yql/essentials/core/yql_type_annotation.h> namespace NYql::NDq { -using TProviderCollectFunction = +using TProviderCollectFunction = std::function<void(TVector<std::shared_ptr<TRelOptimizerNode>>&, TStringBuf, const TExprNode::TPtr, const std::shared_ptr<TOptimizerStatistics>&)>; /* @@ -38,6 +38,6 @@ NYql::NNodes::TExprBase DqOptimizeEquiJoinWithCosts( const TOptimizerHints& hints = {} ); -IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& ctx, const ui32 maxDPHypDPTableSize, TExprContext& ectx); +IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& ctx, const ui32 maxDPHypDPTableSize, TExprContext& ectx, bool enableShuffleElimination); } // namespace NYql::NDq diff --git a/ydb/library/yql/dq/opt/ut/dq_cbo_ut.cpp b/ydb/library/yql/dq/opt/ut/dq_cbo_ut.cpp index d937510b59..16cdae2d91 100644 --- a/ydb/library/yql/dq/opt/ut/dq_cbo_ut.cpp +++ b/ydb/library/yql/dq/opt/ut/dq_cbo_ut.cpp @@ -34,13 +34,13 @@ Y_UNIT_TEST_SUITE(DQCBO) { Y_UNIT_TEST(Empty) { TBaseProviderContext pctx; TExprContext dummyCtx; - std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, 100000, dummyCtx)); + std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, 100000, dummyCtx, false)); } Y_UNIT_TEST(JoinSearch2Rels) { TBaseProviderContext pctx; TExprContext dummyCtx; - std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, 100000, dummyCtx)); + std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, 100000, dummyCtx, false)); auto rel1 = std::make_shared<TRelOptimizerNode>( "a", @@ -83,7 +83,7 @@ Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10, Sel: Y_UNIT_TEST(JoinSearch3Rels) { TBaseProviderContext pctx; TExprContext dummyCtx; - std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, 100000, dummyCtx)); + std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, 100000, dummyCtx, false)); auto rel1 = std::make_shared<TRelOptimizerNode>("a", TOptimizerStatistics(BaseTable, 100000, 1, 0, 1000000)); @@ -246,7 +246,7 @@ Y_UNIT_TEST(DqOptimizeEquiJoinWithCostsNative) { TExprContext ctx; TBaseProviderContext pctx; std::function<IOptimizerNew*()> optFactory = [&]() { - return MakeNativeOptimizerNew(pctx, 100000, ctx); + return MakeNativeOptimizerNew(pctx, 100000, ctx, false); }; _DqOptimizeEquiJoinWithCosts(optFactory, ctx); } diff --git a/ydb/library/yql/dq/opt/ut/dq_opt_hypergraph_ut.cpp b/ydb/library/yql/dq/opt/ut/dq_opt_hypergraph_ut.cpp index 335c8c29d7..5b1176aaa5 100644 --- a/ydb/library/yql/dq/opt/ut/dq_opt_hypergraph_ut.cpp +++ b/ydb/library/yql/dq/opt/ut/dq_opt_hypergraph_ut.cpp @@ -42,7 +42,7 @@ struct TTestContext : public TBaseProviderContext { const TVector<NDq::TJoinColumn>& , const TVector<NDq::TJoinColumn>& , EJoinAlgoType , - EJoinKind + EJoinKind ) override { return true; } @@ -53,8 +53,8 @@ std::shared_ptr<IBaseOptimizerNode> Enumerate(const std::shared_ptr<IBaseOptimiz auto ctx = TProviderContext(); TExprContext dummyCtx; auto optimizer = - std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(ctx, std::numeric_limits<ui32>::max(), dummyCtx)); - + std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(ctx, std::numeric_limits<ui32>::max(), dummyCtx, false)); + Y_ENSURE(root->Kind == EOptimizerNodeKind::JoinNodeType); auto res = optimizer->JoinSearch(std::static_pointer_cast<TJoinOptimizerNode>(root), hints); Cout << "Optimized Tree:" << Endl; @@ -76,7 +76,7 @@ TVector<TJoinColumn> CollectConditions(const std::shared_ptr<IBaseOptimizerNode> lhsConds.push_back(lhsCond); lhsConds.push_back(rhsCond); } - + return lhsConds; } @@ -84,17 +84,17 @@ bool HaveSameConditions(const std::shared_ptr<IBaseOptimizerNode>& actual, std:: auto actualConds = CollectConditions(actual); auto expectedConds = CollectConditions(expected); - return - std::unordered_set<TJoinColumn, TJoinColumn::THashFunction>(actualConds.begin(), actualConds.end()) == + return + std::unordered_set<TJoinColumn, TJoinColumn::THashFunction>(actualConds.begin(), actualConds.end()) == std::unordered_set<TJoinColumn, TJoinColumn::THashFunction>(expectedConds.begin(), expectedConds.end()); } bool HaveSameConditionCount(const std::shared_ptr<IBaseOptimizerNode>& actual, std::shared_ptr<IBaseOptimizerNode> expected) { auto actualConds = CollectConditions(actual); auto expectedConds = CollectConditions(expected); - + return actualConds.size() == expectedConds.size() && - std::unordered_set<TJoinColumn, TJoinColumn::THashFunction>(actualConds.begin(), actualConds.end()).size() == + std::unordered_set<TJoinColumn, TJoinColumn::THashFunction>(actualConds.begin(), actualConds.end()).size() == std::unordered_set<TJoinColumn, TJoinColumn::THashFunction>(expectedConds.begin(), expectedConds.end()).size(); } @@ -107,7 +107,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { for (size_t i = 0; i < nodeCount; ++i) { for (size_t j = 0; j < nodeCount; ++j) { - if (i == j) continue; + if (i == j) continue; TNodeSet64 lhs; lhs[i] = 1; TNodeSet64 rhs; rhs[j] = 1; @@ -122,17 +122,17 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { UNIT_ASSERT(graph.GetEdges().size() == 6); - CheckClique(graph); + CheckClique(graph); Enumerate(root); } Y_UNIT_TEST(SimpleChain4NodesTransitiveClosure) { auto root = CreateChain(4, "Ya hochu pitsu"); auto graph = MakeJoinHypergraph<TNodeSet64>(root); - + UNIT_ASSERT(graph.GetEdges().size() == 12); - CheckClique(graph); + CheckClique(graph); Enumerate(root); } @@ -142,7 +142,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { UNIT_ASSERT(graph.GetEdges().size() == 20); - CheckClique(graph); + CheckClique(graph); Enumerate(root); } @@ -166,7 +166,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { rhs = CreateChain(2, "228", "c"); - // a1 --228-- a2 --228-- a3 --1337-- b1 --1337-- b2 --123-- c1 --228-- c2 + // a1 --228-- a2 --228-- a3 --1337-- b1 --1337-- b2 --123-- c1 --228-- c2 // ^ we don't want to have transitive closure between c and a root = std::make_shared<TJoinOptimizerNode>( root, rhs, leftKeys, rightKeys, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false @@ -211,7 +211,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { return root; } else { static_assert( - std::is_convertible_v<TJoinArg, std::string> || std::is_same_v<TJoinArg, std::shared_ptr<IBaseOptimizerNode>>, + std::is_convertible_v<TJoinArg, std::string> || std::is_same_v<TJoinArg, std::shared_ptr<IBaseOptimizerNode>>, "Args of join must be either Join or TString, for example: Join(Join('A', 'B'), 'C')" ); } @@ -252,7 +252,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { TString attr = ToString(rand()); leftJoinCond.push_back(TJoinColumn(std::move(lhsCond), attr)); rightJoinCond.push_back(TJoinColumn(std::move(rhsCond), attr)); - + } } @@ -288,7 +288,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { Y_UNIT_TEST(SimpleDimpleJoin) { auto join = Join("A", "B"); - + auto graph = MakeJoinHypergraph<TNodeSet64>(join); Cout << graph.String() << Endl; @@ -301,7 +301,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { auto graph = MakeJoinHypergraph<TNodeSet64>(root); Cout << graph.String() << Endl; - + auto A = graph.GetNodesByRelNames({"A"}); auto B = graph.GetNodesByRelNames({"B"}); auto C = graph.GetNodesByRelNames({"C"}); @@ -318,7 +318,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { auto anyJoin = Join(Join("A", "B"), "C", /*on=*/ "B=C"); std::static_pointer_cast<TJoinOptimizerNode>(anyJoin)->LeftAny = true; auto join = Join(anyJoin, "D", /*on=*/"A=D"); - + auto graph = MakeJoinHypergraph<TNodeSet64>(join); Cout << graph.String() << Endl; UNIT_ASSERT(graph.GetEdges().size() != graph.GetSimpleEdges().size()); @@ -331,7 +331,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { auto anyJoin = Join(Join(Join("A", "B"), "C", /*on=*/ "B=C"), "D", "C=D"); std::static_pointer_cast<TJoinOptimizerNode>(anyJoin)->LeftAny = true; auto join = Join(anyJoin, "E", /*on=*/ "A=E"); - + auto graph = MakeJoinHypergraph<TNodeSet64>(join); Cout << graph.String() << Endl; UNIT_ASSERT(graph.GetEdges().size() != graph.GetSimpleEdges().size()); @@ -343,7 +343,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { auto anyJoin = Join(Join("A", "B"), Join("C", "D"), /*on=*/"B=C"); std::static_pointer_cast<TJoinOptimizerNode>(anyJoin)->RightAny = true; auto join = Join(anyJoin, "E", /*on=*/ "C=E"); - + auto graph = MakeJoinHypergraph<TNodeSet64>(join); Cout << graph.String() << Endl; UNIT_ASSERT(graph.GetEdges().size() != graph.GetSimpleEdges().size()); @@ -355,7 +355,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { auto nonReorderable = Join(Join(Join("A", "B"), "C", /*on=*/ "B=C"), "D", "C=D"); std::static_pointer_cast<TJoinOptimizerNode>(nonReorderable)->IsReorderable = false; auto join = Join(nonReorderable, "E", /*on=*/ "A=E"); - + auto graph = MakeJoinHypergraph<TNodeSet64>(join); Cout << graph.String() << Endl; UNIT_ASSERT(graph.GetEdges().size() != graph.GetSimpleEdges().size()); @@ -556,7 +556,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { return res; }; - auto mean = [](const TVector<double>& v) -> double { + auto mean = [](const TVector<double>& v) -> double { double sum = std::accumulate(v.begin(), v.end(), 0.0); return sum / static_cast<double>(v.size()); }; |