aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpilik <pudge1000-7@ydb.tech>2025-03-01 01:29:22 +0300
committerGitHub <noreply@github.com>2025-03-01 01:29:22 +0300
commitd08419b4a78e460944e81523fe9c7ce7c7f5ca14 (patch)
tree5ad5bf06a82a9fc1ee248447663d148480546045
parentbf4f55b8e360284b921e123eaec0ee6202d2b969 (diff)
downloadydb-d08419b4a78e460944e81523fe9c7ce7c7f5ca14.tar.gz
[CBO] added enableshuffleelimination flag (#15218)
-rw-r--r--ydb/core/kqp/opt/logical/kqp_opt_log.cpp7
-rw-r--r--ydb/core/kqp/ut/join/data/join_order/lookupbug.json14
-rw-r--r--ydb/core/kqp/ut/join/data/join_order/tpcc.json6
-rw-r--r--ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s.json2
-rw-r--r--ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s_column_store.json2
-rw-r--r--ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s.json36
-rw-r--r--ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s_column_store.json20
-rw-r--r--ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json56
-rw-r--r--ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json58
-rw-r--r--ydb/core/kqp/ut/join/data/join_order/tpch9_1000s.json40
-rw-r--r--ydb/core/kqp/ut/join/data/join_order/tpch9_1000s_column_store.json40
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp2
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp38
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_join_cost_based.h6
-rw-r--r--ydb/library/yql/dq/opt/ut/dq_cbo_ut.cpp8
-rw-r--r--ydb/library/yql/dq/opt/ut/dq_opt_hypergraph_ut.cpp46
16 files changed, 192 insertions, 189 deletions
diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp
index 2db1296b16..4adbfa8616 100644
--- a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp
+++ b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp
@@ -92,7 +92,7 @@ public:
public:
TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) override {
auto status = TOptimizeTransformerBase::DoTransform(input, output, ctx);
-
+
if (status == TStatus::Ok) {
for (const auto& hint: KqpCtx.GetOptimizerHints().GetUnappliedString()) {
ctx.AddWarning(YqlIssue({}, TIssuesIds::YQL_UNUSED_HINT, "Unapplied hint: " + hint));
@@ -118,7 +118,7 @@ protected:
TMaybeNode<TExprBase> RewriteAggregate(TExprBase node, TExprContext& ctx) {
TMaybeNode<TExprBase> output;
auto aggregate = node.Cast<TCoAggregateBase>();
- auto hopSetting = GetSetting(aggregate.Settings().Ref(), "hopping");
+ auto hopSetting = GetSetting(aggregate.Settings().Ref(), "hopping");
if (hopSetting) {
auto input = aggregate.Input().Maybe<TDqConnection>();
if (!input) {
@@ -163,8 +163,9 @@ protected:
TMaybeNode<TExprBase> OptimizeEquiJoinWithCosts(TExprBase node, TExprContext& ctx) {
auto maxDPhypDPTableSize = Config->MaxDPHypDPTableSize.Get().GetOrElse(TDqSettings::TDefault::MaxDPHypDPTableSize);
auto optLevel = Config->CostBasedOptimizationLevel.Get().GetOrElse(Config->DefaultCostBasedOptimizationLevel);
+ bool enableShuffleElimination = KqpCtx.Config->OptShuffleElimination.Get().GetOrElse(false);;
auto providerCtx = TKqpProviderContext(KqpCtx, optLevel);
- auto opt = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(providerCtx, maxDPhypDPTableSize, ctx));
+ auto opt = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(providerCtx, maxDPhypDPTableSize, ctx, enableShuffleElimination));
TExprBase output = DqOptimizeEquiJoinWithCosts(node, ctx, TypesCtx, optLevel,
*opt, [](auto& rels, auto label, auto node, auto stat) {
rels.emplace_back(std::make_shared<TKqpRelOptimizerNode>(TString(label), *stat, node));
diff --git a/ydb/core/kqp/ut/join/data/join_order/lookupbug.json b/ydb/core/kqp/ut/join/data/join_order/lookupbug.json
index 96c727f9c5..b32ce9858c 100644
--- a/ydb/core/kqp/ut/join/data/join_order/lookupbug.json
+++ b/ydb/core/kqp/ut/join/data/join_order/lookupbug.json
@@ -7,22 +7,22 @@
"args":
[
{
- "op_name":"TableFullScan",
- "table":"quotas_browsers_relation"
- },
- {
"op_name":"LeftJoin (MapJoin)",
"args":
[
{
"op_name":"TableFullScan",
- "table":"browsers"
+ "table":"quotas_browsers_relation"
},
{
- "op_name":"TableFullScan",
- "table":"browser_groups"
+ "op_name":"TableLookup",
+ "table":"browsers"
}
]
+ },
+ {
+ "op_name":"TableLookup",
+ "table":"browser_groups"
}
]
},
diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcc.json b/ydb/core/kqp/ut/join/data/join_order/tpcc.json
index 2ddae4ea99..4a577a1b4c 100644
--- a/ydb/core/kqp/ut/join/data/join_order/tpcc.json
+++ b/ydb/core/kqp/ut/join/data/join_order/tpcc.json
@@ -3,12 +3,12 @@
"args":
[
{
- "op_name":"TableRangeScan",
- "table":"test\/tpcc\/order_line"
+ "op_name":"TableLookup",
+ "table":"test\/tpcc\/stock"
},
{
"op_name":"TableRangeScan",
- "table":"test\/tpcc\/stock"
+ "table":"test\/tpcc\/order_line"
}
]
}
diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s.json
index 7b0d733f58..12594bc929 100644
--- a/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s.json
+++ b/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s.json
@@ -1,5 +1,5 @@
{
- "op_name":"InnerJoin (MapJoin)",
+ "op_name":"InnerJoin (Grace)",
"args":
[
{
diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s_column_store.json
index 7b0d733f58..12594bc929 100644
--- a/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s_column_store.json
+++ b/ydb/core/kqp/ut/join/data/join_order/tpcds64_small_1000s_column_store.json
@@ -1,5 +1,5 @@
{
- "op_name":"InnerJoin (MapJoin)",
+ "op_name":"InnerJoin (Grace)",
"args":
[
{
diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s.json
index 74ebff5f89..9c7b141dde 100644
--- a/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s.json
+++ b/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s.json
@@ -7,11 +7,11 @@
"args":
[
{
- "op_name":"InnerJoin (MapJoin)",
+ "op_name":"LeftJoin (Grace)",
"args":
[
{
- "op_name":"LeftJoin (Grace)",
+ "op_name":"InnerJoin (MapJoin)",
"args":
[
{
@@ -19,25 +19,25 @@
"table":"test\/ds\/store_sales"
},
{
- "op_name":"TableFullScan",
- "table":"test\/ds\/store_returns"
+ "op_name":"Union",
+ "args":
+ [
+ {
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/date_dim"
+ }
+ ]
}
]
},
{
- "op_name":"Union",
- "args":
- [
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/date_dim"
- }
- ]
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/store_returns"
}
]
},
{
- "op_name":"InnerJoin (Grace)",
+ "op_name":"InnerJoin (MapJoin)",
"args":
[
{
@@ -46,11 +46,11 @@
[
{
"op_name":"TableFullScan",
- "table":"test\/ds\/catalog_sales"
+ "table":"test\/ds\/web_sales"
},
{
"op_name":"TableFullScan",
- "table":"test\/ds\/catalog_returns"
+ "table":"test\/ds\/web_returns"
}
]
},
@@ -69,7 +69,7 @@
]
},
{
- "op_name":"InnerJoin (Grace)",
+ "op_name":"InnerJoin (MapJoin)",
"args":
[
{
@@ -78,11 +78,11 @@
[
{
"op_name":"TableFullScan",
- "table":"test\/ds\/web_sales"
+ "table":"test\/ds\/catalog_sales"
},
{
"op_name":"TableFullScan",
- "table":"test\/ds\/web_returns"
+ "table":"test\/ds\/catalog_returns"
}
]
},
diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s_column_store.json
index fd6b1dd32b..cf514b8165 100644
--- a/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s_column_store.json
+++ b/ydb/core/kqp/ut/join/data/join_order/tpcds78_1000s_column_store.json
@@ -7,11 +7,11 @@
"args":
[
{
- "op_name":"InnerJoin (MapJoin)",
+ "op_name":"LeftJoin (Grace)",
"args":
[
{
- "op_name":"LeftJoin (Grace)",
+ "op_name":"InnerJoin (MapJoin)",
"args":
[
{
@@ -20,18 +20,18 @@
},
{
"op_name":"TableFullScan",
- "table":"test\/ds\/store_returns"
+ "table":"test\/ds\/date_dim"
}
]
},
{
"op_name":"TableFullScan",
- "table":"test\/ds\/date_dim"
+ "table":"test\/ds\/store_returns"
}
]
},
{
- "op_name":"InnerJoin (Grace)",
+ "op_name":"InnerJoin (MapJoin)",
"args":
[
{
@@ -40,11 +40,11 @@
[
{
"op_name":"TableFullScan",
- "table":"test\/ds\/catalog_sales"
+ "table":"test\/ds\/web_sales"
},
{
"op_name":"TableFullScan",
- "table":"test\/ds\/catalog_returns"
+ "table":"test\/ds\/web_returns"
}
]
},
@@ -57,7 +57,7 @@
]
},
{
- "op_name":"InnerJoin (Grace)",
+ "op_name":"InnerJoin (MapJoin)",
"args":
[
{
@@ -66,11 +66,11 @@
[
{
"op_name":"TableFullScan",
- "table":"test\/ds\/web_sales"
+ "table":"test\/ds\/catalog_sales"
},
{
"op_name":"TableFullScan",
- "table":"test\/ds\/web_returns"
+ "table":"test\/ds\/catalog_returns"
}
]
},
diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json
index 25570a61bb..4b2772b1a1 100644
--- a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json
+++ b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json
@@ -8,75 +8,75 @@
[
{
"op_name":"TableFullScan",
- "table":"part"
+ "table":"partsupp"
},
{
"op_name":"InnerJoin (MapJoin)",
"args":
[
{
+ "op_name":"TableFullScan",
+ "table":"supplier"
+ },
+ {
"op_name":"InnerJoin (MapJoin)",
"args":
[
{
- "op_name":"InnerJoin (Grace)",
- "args":
- [
- {
- "op_name":"TableFullScan",
- "table":"partsupp"
- },
- {
- "op_name":"TableFullScan",
- "table":"supplier"
- }
- ]
+ "op_name":"TableLookup",
+ "table":"region"
},
{
"op_name":"TableFullScan",
"table":"nation"
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"region"
}
]
}
]
},
{
- "op_name":"InnerJoin (MapJoin)",
+ "op_name":"InnerJoin (Grace)",
"args":
[
{
- "op_name":"InnerJoin (MapJoin)",
+ "op_name":"InnerJoin (Grace)",
"args":
[
{
- "op_name":"InnerJoin (Grace)",
+ "op_name":"TableFullScan",
+ "table":"partsupp"
+ },
+ {
+ "op_name":"InnerJoin (MapJoin)",
"args":
[
{
"op_name":"TableFullScan",
- "table":"partsupp"
+ "table":"supplier"
},
{
- "op_name":"TableFullScan",
- "table":"supplier"
+ "op_name":"InnerJoin (MapJoin)",
+ "args":
+ [
+ {
+ "op_name":"TableLookup",
+ "table":"region"
+ },
+ {
+ "op_name":"TableFullScan",
+ "table":"nation"
+ }
+ ]
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"nation"
}
]
},
{
"op_name":"TableFullScan",
- "table":"region"
+ "table":"part"
}
]
}
diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json
index 25570a61bb..e5adefe907 100644
--- a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json
+++ b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json
@@ -8,75 +8,75 @@
[
{
"op_name":"TableFullScan",
- "table":"part"
+ "table":"partsupp"
},
{
"op_name":"InnerJoin (MapJoin)",
"args":
[
{
+ "op_name":"TableFullScan",
+ "table":"supplier"
+ },
+ {
"op_name":"InnerJoin (MapJoin)",
"args":
[
{
- "op_name":"InnerJoin (Grace)",
- "args":
- [
- {
- "op_name":"TableFullScan",
- "table":"partsupp"
- },
- {
- "op_name":"TableFullScan",
- "table":"supplier"
- }
- ]
+ "op_name":"TableFullScan",
+ "table":"nation"
},
{
"op_name":"TableFullScan",
- "table":"nation"
+ "table":"region"
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"region"
}
]
}
]
},
{
- "op_name":"InnerJoin (MapJoin)",
+ "op_name":"InnerJoin (Grace)",
"args":
[
{
- "op_name":"InnerJoin (MapJoin)",
+ "op_name":"InnerJoin (Grace)",
"args":
[
{
- "op_name":"InnerJoin (Grace)",
+ "op_name":"TableFullScan",
+ "table":"partsupp"
+ },
+ {
+ "op_name":"InnerJoin (MapJoin)",
"args":
[
{
"op_name":"TableFullScan",
- "table":"partsupp"
+ "table":"supplier"
},
{
- "op_name":"TableFullScan",
- "table":"supplier"
+ "op_name":"InnerJoin (MapJoin)",
+ "args":
+ [
+ {
+ "op_name":"TableFullScan",
+ "table":"nation"
+ },
+ {
+ "op_name":"TableFullScan",
+ "table":"region"
+ }
+ ]
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"nation"
}
]
},
{
"op_name":"TableFullScan",
- "table":"region"
+ "table":"part"
}
]
}
diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s.json
index 62f26536a7..2239d496bf 100644
--- a/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s.json
+++ b/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s.json
@@ -3,23 +3,37 @@
"args":
[
{
+ "op_name":"TableFullScan",
+ "table":"orders"
+ },
+ {
"op_name":"InnerJoin (Grace)",
"args":
[
{
"op_name":"TableFullScan",
- "table":"orders"
+ "table":"lineitem"
},
{
"op_name":"InnerJoin (Grace)",
"args":
[
{
- "op_name":"TableFullScan",
- "table":"nation"
+ "op_name":"InnerJoin (Grace)",
+ "args":
+ [
+ {
+ "op_name":"TableFullScan",
+ "table":"partsupp"
+ },
+ {
+ "op_name":"TableFullScan",
+ "table":"part"
+ }
+ ]
},
{
- "op_name":"InnerJoin (Grace)",
+ "op_name":"InnerJoin (MapJoin)",
"args":
[
{
@@ -27,28 +41,14 @@
"table":"supplier"
},
{
- "op_name":"InnerJoin (Grace)",
- "args":
- [
- {
- "op_name":"TableFullScan",
- "table":"lineitem"
- },
- {
- "op_name":"TableFullScan",
- "table":"partsupp"
- }
- ]
+ "op_name":"TableFullScan",
+ "table":"nation"
}
]
}
]
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"part"
}
]
}
diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s_column_store.json
index 62f26536a7..2239d496bf 100644
--- a/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s_column_store.json
+++ b/ydb/core/kqp/ut/join/data/join_order/tpch9_1000s_column_store.json
@@ -3,23 +3,37 @@
"args":
[
{
+ "op_name":"TableFullScan",
+ "table":"orders"
+ },
+ {
"op_name":"InnerJoin (Grace)",
"args":
[
{
"op_name":"TableFullScan",
- "table":"orders"
+ "table":"lineitem"
},
{
"op_name":"InnerJoin (Grace)",
"args":
[
{
- "op_name":"TableFullScan",
- "table":"nation"
+ "op_name":"InnerJoin (Grace)",
+ "args":
+ [
+ {
+ "op_name":"TableFullScan",
+ "table":"partsupp"
+ },
+ {
+ "op_name":"TableFullScan",
+ "table":"part"
+ }
+ ]
},
{
- "op_name":"InnerJoin (Grace)",
+ "op_name":"InnerJoin (MapJoin)",
"args":
[
{
@@ -27,28 +41,14 @@
"table":"supplier"
},
{
- "op_name":"InnerJoin (Grace)",
- "args":
- [
- {
- "op_name":"TableFullScan",
- "table":"lineitem"
- },
- {
- "op_name":"TableFullScan",
- "table":"partsupp"
- }
- ]
+ "op_name":"TableFullScan",
+ "table":"nation"
}
]
}
]
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"part"
}
]
}
diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp
index 0442d62cba..36bf157ce5 100644
--- a/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp
+++ b/ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp
@@ -10,7 +10,7 @@ namespace {
class TDqOptimizerFactory : public IOptimizerFactory {
public:
virtual IOptimizerNew::TPtr MakeJoinCostBasedOptimizerNative(IProviderContext& pctx, TExprContext& ectx, const TNativeSettings& settings) const override {
- return IOptimizerNew::TPtr(MakeNativeOptimizerNew(pctx, settings.MaxDPhypDPTableSize, ectx));
+ return IOptimizerNew::TPtr(MakeNativeOptimizerNew(pctx, settings.MaxDPhypDPTableSize, ectx, false));
}
virtual IOptimizerNew::TPtr MakeJoinCostBasedOptimizerPG(IProviderContext& pctx, TExprContext& ctx, const TPGSettings& settings) const override {
diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp
index 4316540292..65069e42ba 100644
--- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp
+++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp
@@ -187,14 +187,14 @@ TExprBase BuildTree(TExprContext& ctx, const TCoEquiJoin& equiJoin,
shuffleBy.reserve(optimizerNode->Stats.ShuffledByColumns->Data.size());
for (const auto& column: optimizerNode->Stats.ShuffledByColumns->Data) {
- auto node =
+ auto node =
ctx.Builder(equiJoin.Pos())
.List()
.Atom(0, column.RelName)
.Atom(1, column.AttributeName)
.Seal()
.Build();
-
+
shuffleBy.emplace_back(std::move(node));
}
@@ -204,7 +204,7 @@ TExprBase BuildTree(TExprContext& ctx, const TCoEquiJoin& equiJoin,
case EShuffleSide::ERight: { shuffleSideOpt = "shuffle_rhs_by"; break;}
}
- auto option =
+ auto option =
Build<TExprList>(ctx, equiJoin.Pos())
.Add<TCoAtom>()
.Build(shuffleSideOpt)
@@ -267,10 +267,10 @@ void ComputeStatistics(const std::shared_ptr<TJoinOptimizerNode>& join, IProvide
}
join->Stats = TOptimizerStatistics(
ctx.ComputeJoinStatsV1(
- join->LeftArg->Stats,
+ join->LeftArg->Stats,
join->RightArg->Stats,
- join->LeftJoinKeys,
- join->RightJoinKeys,
+ join->LeftJoinKeys,
+ join->RightJoinKeys,
EJoinAlgoType::GraceJoin,
join->JoinType,
nullptr,
@@ -282,26 +282,27 @@ void ComputeStatistics(const std::shared_ptr<TJoinOptimizerNode>& join, IProvide
class TOptimizerNativeNew: public IOptimizerNew {
public:
- TOptimizerNativeNew(IProviderContext& ctx, ui32 maxDPhypDPTableSize, TExprContext& exprCtx)
+ TOptimizerNativeNew(IProviderContext& ctx, ui32 maxDPhypDPTableSize, TExprContext& exprCtx, bool enableShuffleElimination)
: IOptimizerNew(ctx)
, MaxDPHypTableSize_(maxDPhypDPTableSize)
, ExprCtx(exprCtx)
+ , EnableShuffleElimination(enableShuffleElimination)
{}
std::shared_ptr<TJoinOptimizerNode> JoinSearch(
- const std::shared_ptr<TJoinOptimizerNode>& joinTree,
+ const std::shared_ptr<TJoinOptimizerNode>& joinTree,
const TOptimizerHints& hints = {}
) override {
auto relsCount = joinTree->Labels().size();
- if (relsCount <= 14) {
+ if (EnableShuffleElimination && relsCount <= 14) {
return JoinSearchImpl<TNodeSet64, TDPHypSolverShuffleElimination<TNodeSet64>>(joinTree, false, hints);
} else if (relsCount <= 64) { // The algorithm is more efficient.
- return JoinSearchImpl<TNodeSet64, TDPHypSolverClassic<TNodeSet64>>(joinTree, true, hints);
+ return JoinSearchImpl<TNodeSet64, TDPHypSolverClassic<TNodeSet64>>(joinTree, EnableShuffleElimination, hints);
} else if (64 < relsCount && relsCount <= 128) {
- return JoinSearchImpl<TNodeSet128, TDPHypSolverClassic<TNodeSet128>>(joinTree, true, hints);
+ return JoinSearchImpl<TNodeSet128, TDPHypSolverClassic<TNodeSet128>>(joinTree, EnableShuffleElimination, hints);
} else if (128 < relsCount && relsCount <= 192) {
- return JoinSearchImpl<TNodeSet192, TDPHypSolverClassic<TNodeSet192>>(joinTree, true, hints);
+ return JoinSearchImpl<TNodeSet192, TDPHypSolverClassic<TNodeSet192>>(joinTree, EnableShuffleElimination, hints);
}
ComputeStatistics(joinTree, this->Pctx);
@@ -314,7 +315,7 @@ private:
using TNodeSet192 = std::bitset<192>;
template <
- typename TNodeSet,
+ typename TNodeSet,
typename TDPHypImpl
>
std::shared_ptr<TJoinOptimizerNode> JoinSearchImpl(
@@ -379,7 +380,7 @@ private:
joinNode->LogicalOrderings = fsm.CreateState();
switch (joinNode->JoinAlgo) {
case EJoinAlgoType::GraceJoin: {
- bool hashFuncArgsMatch =
+ bool hashFuncArgsMatch =
left->LogicalOrderings.GetShuffleHashFuncArgsCount() == right->LogicalOrderings.GetShuffleHashFuncArgsCount();
if (!hashFuncArgsMatch || !left->LogicalOrderings.HasState() || !left->LogicalOrderings.ContainsShuffle(leftJoinKeysOrderingIdx)) {
@@ -433,10 +434,11 @@ private:
private:
ui32 MaxDPHypTableSize_;
TExprContext& ExprCtx;
+ bool EnableShuffleElimination;
};
-IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& pctx, const ui32 maxDPhypDPTableSize, TExprContext& ectx) {
- return new TOptimizerNativeNew(pctx, maxDPhypDPTableSize, ectx);
+IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& pctx, const ui32 maxDPhypDPTableSize, TExprContext& ectx, bool enableShuffleElimination) {
+ return new TOptimizerNativeNew(pctx, maxDPhypDPTableSize, ectx, enableShuffleElimination);
}
TExprBase DqOptimizeEquiJoinWithCosts(
@@ -496,8 +498,8 @@ TExprBase DqOptimizeEquiJoinWithCosts(
YQL_CLOG(TRACE, CoreDq) << "All statistics for join in place";
bool allRowStorage = std::all_of(
- rels.begin(),
- rels.end(),
+ rels.begin(),
+ rels.end(),
[](std::shared_ptr<TRelOptimizerNode>& r) {return r->Stats.StorageType==EStorageType::RowStorage; });
if (optLevel == 2 && allRowStorage) {
diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.h b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.h
index 581b3135b0..39a82d815f 100644
--- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.h
+++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.h
@@ -1,12 +1,12 @@
#pragma once
-#include <yql/essentials/core/cbo/cbo_optimizer_new.h>
+#include <yql/essentials/core/cbo/cbo_optimizer_new.h>
#include <yql/essentials/core/expr_nodes_gen/yql_expr_nodes_gen.h>
#include <yql/essentials/core/yql_type_annotation.h>
namespace NYql::NDq {
-using TProviderCollectFunction =
+using TProviderCollectFunction =
std::function<void(TVector<std::shared_ptr<TRelOptimizerNode>>&, TStringBuf, const TExprNode::TPtr, const std::shared_ptr<TOptimizerStatistics>&)>;
/*
@@ -38,6 +38,6 @@ NYql::NNodes::TExprBase DqOptimizeEquiJoinWithCosts(
const TOptimizerHints& hints = {}
);
-IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& ctx, const ui32 maxDPHypDPTableSize, TExprContext& ectx);
+IOptimizerNew* MakeNativeOptimizerNew(IProviderContext& ctx, const ui32 maxDPHypDPTableSize, TExprContext& ectx, bool enableShuffleElimination);
} // namespace NYql::NDq
diff --git a/ydb/library/yql/dq/opt/ut/dq_cbo_ut.cpp b/ydb/library/yql/dq/opt/ut/dq_cbo_ut.cpp
index d937510b59..16cdae2d91 100644
--- a/ydb/library/yql/dq/opt/ut/dq_cbo_ut.cpp
+++ b/ydb/library/yql/dq/opt/ut/dq_cbo_ut.cpp
@@ -34,13 +34,13 @@ Y_UNIT_TEST_SUITE(DQCBO) {
Y_UNIT_TEST(Empty) {
TBaseProviderContext pctx;
TExprContext dummyCtx;
- std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, 100000, dummyCtx));
+ std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, 100000, dummyCtx, false));
}
Y_UNIT_TEST(JoinSearch2Rels) {
TBaseProviderContext pctx;
TExprContext dummyCtx;
- std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, 100000, dummyCtx));
+ std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, 100000, dummyCtx, false));
auto rel1 = std::make_shared<TRelOptimizerNode>(
"a",
@@ -83,7 +83,7 @@ Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10, Sel:
Y_UNIT_TEST(JoinSearch3Rels) {
TBaseProviderContext pctx;
TExprContext dummyCtx;
- std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, 100000, dummyCtx));
+ std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, 100000, dummyCtx, false));
auto rel1 = std::make_shared<TRelOptimizerNode>("a",
TOptimizerStatistics(BaseTable, 100000, 1, 0, 1000000));
@@ -246,7 +246,7 @@ Y_UNIT_TEST(DqOptimizeEquiJoinWithCostsNative) {
TExprContext ctx;
TBaseProviderContext pctx;
std::function<IOptimizerNew*()> optFactory = [&]() {
- return MakeNativeOptimizerNew(pctx, 100000, ctx);
+ return MakeNativeOptimizerNew(pctx, 100000, ctx, false);
};
_DqOptimizeEquiJoinWithCosts(optFactory, ctx);
}
diff --git a/ydb/library/yql/dq/opt/ut/dq_opt_hypergraph_ut.cpp b/ydb/library/yql/dq/opt/ut/dq_opt_hypergraph_ut.cpp
index 335c8c29d7..5b1176aaa5 100644
--- a/ydb/library/yql/dq/opt/ut/dq_opt_hypergraph_ut.cpp
+++ b/ydb/library/yql/dq/opt/ut/dq_opt_hypergraph_ut.cpp
@@ -42,7 +42,7 @@ struct TTestContext : public TBaseProviderContext {
const TVector<NDq::TJoinColumn>& ,
const TVector<NDq::TJoinColumn>& ,
EJoinAlgoType ,
- EJoinKind
+ EJoinKind
) override {
return true;
}
@@ -53,8 +53,8 @@ std::shared_ptr<IBaseOptimizerNode> Enumerate(const std::shared_ptr<IBaseOptimiz
auto ctx = TProviderContext();
TExprContext dummyCtx;
auto optimizer =
- std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(ctx, std::numeric_limits<ui32>::max(), dummyCtx));
-
+ std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(ctx, std::numeric_limits<ui32>::max(), dummyCtx, false));
+
Y_ENSURE(root->Kind == EOptimizerNodeKind::JoinNodeType);
auto res = optimizer->JoinSearch(std::static_pointer_cast<TJoinOptimizerNode>(root), hints);
Cout << "Optimized Tree:" << Endl;
@@ -76,7 +76,7 @@ TVector<TJoinColumn> CollectConditions(const std::shared_ptr<IBaseOptimizerNode>
lhsConds.push_back(lhsCond);
lhsConds.push_back(rhsCond);
}
-
+
return lhsConds;
}
@@ -84,17 +84,17 @@ bool HaveSameConditions(const std::shared_ptr<IBaseOptimizerNode>& actual, std::
auto actualConds = CollectConditions(actual);
auto expectedConds = CollectConditions(expected);
- return
- std::unordered_set<TJoinColumn, TJoinColumn::THashFunction>(actualConds.begin(), actualConds.end()) ==
+ return
+ std::unordered_set<TJoinColumn, TJoinColumn::THashFunction>(actualConds.begin(), actualConds.end()) ==
std::unordered_set<TJoinColumn, TJoinColumn::THashFunction>(expectedConds.begin(), expectedConds.end());
}
bool HaveSameConditionCount(const std::shared_ptr<IBaseOptimizerNode>& actual, std::shared_ptr<IBaseOptimizerNode> expected) {
auto actualConds = CollectConditions(actual);
auto expectedConds = CollectConditions(expected);
-
+
return actualConds.size() == expectedConds.size() &&
- std::unordered_set<TJoinColumn, TJoinColumn::THashFunction>(actualConds.begin(), actualConds.end()).size() ==
+ std::unordered_set<TJoinColumn, TJoinColumn::THashFunction>(actualConds.begin(), actualConds.end()).size() ==
std::unordered_set<TJoinColumn, TJoinColumn::THashFunction>(expectedConds.begin(), expectedConds.end()).size();
}
@@ -107,7 +107,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
for (size_t i = 0; i < nodeCount; ++i) {
for (size_t j = 0; j < nodeCount; ++j) {
- if (i == j) continue;
+ if (i == j) continue;
TNodeSet64 lhs; lhs[i] = 1;
TNodeSet64 rhs; rhs[j] = 1;
@@ -122,17 +122,17 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
UNIT_ASSERT(graph.GetEdges().size() == 6);
- CheckClique(graph);
+ CheckClique(graph);
Enumerate(root);
}
Y_UNIT_TEST(SimpleChain4NodesTransitiveClosure) {
auto root = CreateChain(4, "Ya hochu pitsu");
auto graph = MakeJoinHypergraph<TNodeSet64>(root);
-
+
UNIT_ASSERT(graph.GetEdges().size() == 12);
- CheckClique(graph);
+ CheckClique(graph);
Enumerate(root);
}
@@ -142,7 +142,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
UNIT_ASSERT(graph.GetEdges().size() == 20);
- CheckClique(graph);
+ CheckClique(graph);
Enumerate(root);
}
@@ -166,7 +166,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
rhs = CreateChain(2, "228", "c");
- // a1 --228-- a2 --228-- a3 --1337-- b1 --1337-- b2 --123-- c1 --228-- c2
+ // a1 --228-- a2 --228-- a3 --1337-- b1 --1337-- b2 --123-- c1 --228-- c2
// ^ we don't want to have transitive closure between c and a
root = std::make_shared<TJoinOptimizerNode>(
root, rhs, leftKeys, rightKeys, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false
@@ -211,7 +211,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
return root;
} else {
static_assert(
- std::is_convertible_v<TJoinArg, std::string> || std::is_same_v<TJoinArg, std::shared_ptr<IBaseOptimizerNode>>,
+ std::is_convertible_v<TJoinArg, std::string> || std::is_same_v<TJoinArg, std::shared_ptr<IBaseOptimizerNode>>,
"Args of join must be either Join or TString, for example: Join(Join('A', 'B'), 'C')"
);
}
@@ -252,7 +252,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
TString attr = ToString(rand());
leftJoinCond.push_back(TJoinColumn(std::move(lhsCond), attr));
rightJoinCond.push_back(TJoinColumn(std::move(rhsCond), attr));
-
+
}
}
@@ -288,7 +288,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
Y_UNIT_TEST(SimpleDimpleJoin) {
auto join = Join("A", "B");
-
+
auto graph = MakeJoinHypergraph<TNodeSet64>(join);
Cout << graph.String() << Endl;
@@ -301,7 +301,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
auto graph = MakeJoinHypergraph<TNodeSet64>(root);
Cout << graph.String() << Endl;
-
+
auto A = graph.GetNodesByRelNames({"A"});
auto B = graph.GetNodesByRelNames({"B"});
auto C = graph.GetNodesByRelNames({"C"});
@@ -318,7 +318,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
auto anyJoin = Join(Join("A", "B"), "C", /*on=*/ "B=C");
std::static_pointer_cast<TJoinOptimizerNode>(anyJoin)->LeftAny = true;
auto join = Join(anyJoin, "D", /*on=*/"A=D");
-
+
auto graph = MakeJoinHypergraph<TNodeSet64>(join);
Cout << graph.String() << Endl;
UNIT_ASSERT(graph.GetEdges().size() != graph.GetSimpleEdges().size());
@@ -331,7 +331,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
auto anyJoin = Join(Join(Join("A", "B"), "C", /*on=*/ "B=C"), "D", "C=D");
std::static_pointer_cast<TJoinOptimizerNode>(anyJoin)->LeftAny = true;
auto join = Join(anyJoin, "E", /*on=*/ "A=E");
-
+
auto graph = MakeJoinHypergraph<TNodeSet64>(join);
Cout << graph.String() << Endl;
UNIT_ASSERT(graph.GetEdges().size() != graph.GetSimpleEdges().size());
@@ -343,7 +343,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
auto anyJoin = Join(Join("A", "B"), Join("C", "D"), /*on=*/"B=C");
std::static_pointer_cast<TJoinOptimizerNode>(anyJoin)->RightAny = true;
auto join = Join(anyJoin, "E", /*on=*/ "C=E");
-
+
auto graph = MakeJoinHypergraph<TNodeSet64>(join);
Cout << graph.String() << Endl;
UNIT_ASSERT(graph.GetEdges().size() != graph.GetSimpleEdges().size());
@@ -355,7 +355,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
auto nonReorderable = Join(Join(Join("A", "B"), "C", /*on=*/ "B=C"), "D", "C=D");
std::static_pointer_cast<TJoinOptimizerNode>(nonReorderable)->IsReorderable = false;
auto join = Join(nonReorderable, "E", /*on=*/ "A=E");
-
+
auto graph = MakeJoinHypergraph<TNodeSet64>(join);
Cout << graph.String() << Endl;
UNIT_ASSERT(graph.GetEdges().size() != graph.GetSimpleEdges().size());
@@ -556,7 +556,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
return res;
};
- auto mean = [](const TVector<double>& v) -> double {
+ auto mean = [](const TVector<double>& v) -> double {
double sum = std::accumulate(v.begin(), v.end(), 0.0);
return sum / static_cast<double>(v.size());
};