aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPavel Velikhov <pavelvelikhov@ydb.tech>2024-11-26 16:59:51 +0300
committerGitHub <noreply@github.com>2024-11-26 16:59:51 +0300
commit806d77103e5cb80c90102ffe61063f365e06e1a4 (patch)
treec65b274e4b45383a62e1048fdfcc546a81c46cf1
parentc5837d4aace6e4255bdd9dcc41f40a9aa93dba66 (diff)
downloadydb-806d77103e5cb80c90102ffe61063f365e06e1a4.tar.gz
Added statistics inference for DqJoins (#11994)
-rw-r--r--ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json154
-rw-r--r--ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s_column_store.json132
-rw-r--r--ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json42
-rw-r--r--ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json40
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_stat.cpp63
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_stat.h1
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_stat_transformer_base.cpp3
7 files changed, 251 insertions, 184 deletions
diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json
index cbc1461ddb..9f04a9dfad 100644
--- a/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json
+++ b/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json
@@ -1,5 +1,5 @@
{
- "op_name":"InnerJoin (Grace)",
+ "op_name":"InnerJoin (MapJoin)",
"args":
[
{
@@ -11,22 +11,12 @@
"args":
[
{
- "op_name":"InnerJoin (Grace)",
- "args":
- [
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/catalog_sales"
- },
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/catalog_returns"
- }
- ]
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/catalog_sales"
},
{
"op_name":"TableFullScan",
- "table":"test\/ds\/store_returns"
+ "table":"test\/ds\/catalog_returns"
}
]
},
@@ -55,64 +45,82 @@
"args":
[
{
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/customer_address"
+ },
+ {
"op_name":"InnerJoin (Grace)",
"args":
[
{
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/customer_demographics"
+ },
+ {
"op_name":"InnerJoin (MapJoin)",
"args":
[
{
- "op_name":"InnerJoin (Grace)",
+ "op_name":"InnerJoin (MapJoin)",
"args":
[
{
- "op_name":"InnerJoin (MapJoin)",
+ "op_name":"InnerJoin (Grace)",
"args":
[
{
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/customer"
+ },
+ {
"op_name":"InnerJoin (Grace)",
"args":
[
{
"op_name":"TableFullScan",
- "table":"test\/ds\/customer"
+ "table":"test\/ds\/customer_demographics"
},
{
"op_name":"InnerJoin (Grace)",
"args":
[
{
- "op_name":"InnerJoin (MapJoin)",
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/customer_address"
+ },
+ {
+ "op_name":"InnerJoin (Grace)",
"args":
[
{
- "op_name":"TableLookup",
- "table":"test\/ds\/store_sales"
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/store_returns"
},
{
- "op_name":"TableFullScan",
- "table":"test\/ds\/item"
+ "op_name":"InnerJoin (MapJoin)",
+ "args":
+ [
+ {
+ "op_name":"TableLookup",
+ "table":"test\/ds\/store_sales"
+ },
+ {
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/item"
+ }
+ ]
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/customer_address"
}
]
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/date_dim"
}
]
},
{
"op_name":"TableFullScan",
- "table":"test\/ds\/customer_demographics"
+ "table":"test\/ds\/date_dim"
}
]
},
@@ -121,16 +129,8 @@
"table":"test\/ds\/store"
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/customer_demographics"
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/customer_address"
}
]
},
@@ -195,22 +195,12 @@
"args":
[
{
- "op_name":"InnerJoin (Grace)",
- "args":
- [
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/catalog_sales"
- },
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/catalog_returns"
- }
- ]
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/catalog_sales"
},
{
"op_name":"TableFullScan",
- "table":"test\/ds\/store_returns"
+ "table":"test\/ds\/catalog_returns"
}
]
},
@@ -239,64 +229,82 @@
"args":
[
{
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/customer_address"
+ },
+ {
"op_name":"InnerJoin (Grace)",
"args":
[
{
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/customer_demographics"
+ },
+ {
"op_name":"InnerJoin (MapJoin)",
"args":
[
{
- "op_name":"InnerJoin (Grace)",
+ "op_name":"InnerJoin (MapJoin)",
"args":
[
{
- "op_name":"InnerJoin (MapJoin)",
+ "op_name":"InnerJoin (Grace)",
"args":
[
{
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/customer"
+ },
+ {
"op_name":"InnerJoin (Grace)",
"args":
[
{
"op_name":"TableFullScan",
- "table":"test\/ds\/customer"
+ "table":"test\/ds\/customer_demographics"
},
{
"op_name":"InnerJoin (Grace)",
"args":
[
{
- "op_name":"InnerJoin (MapJoin)",
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/customer_address"
+ },
+ {
+ "op_name":"InnerJoin (Grace)",
"args":
[
{
- "op_name":"TableLookup",
- "table":"test\/ds\/store_sales"
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/store_returns"
},
{
- "op_name":"TableFullScan",
- "table":"test\/ds\/item"
+ "op_name":"InnerJoin (MapJoin)",
+ "args":
+ [
+ {
+ "op_name":"TableLookup",
+ "table":"test\/ds\/store_sales"
+ },
+ {
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/item"
+ }
+ ]
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/customer_address"
}
]
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/date_dim"
}
]
},
{
"op_name":"TableFullScan",
- "table":"test\/ds\/customer_demographics"
+ "table":"test\/ds\/date_dim"
}
]
},
@@ -305,16 +313,8 @@
"table":"test\/ds\/store"
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/customer_demographics"
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/customer_address"
}
]
},
diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s_column_store.json
index ad5442c66b..ea9564d2db 100644
--- a/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s_column_store.json
+++ b/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s_column_store.json
@@ -11,22 +11,12 @@
"args":
[
{
- "op_name":"InnerJoin (Grace)",
- "args":
- [
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/catalog_sales"
- },
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/catalog_returns"
- }
- ]
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/catalog_sales"
},
{
"op_name":"TableFullScan",
- "table":"test\/ds\/store_returns"
+ "table":"test\/ds\/catalog_returns"
}
]
},
@@ -55,10 +45,18 @@
"args":
[
{
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/customer_address"
+ },
+ {
"op_name":"InnerJoin (Grace)",
"args":
[
{
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/customer_demographics"
+ },
+ {
"op_name":"InnerJoin (MapJoin)",
"args":
[
@@ -79,32 +77,42 @@
"args":
[
{
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/customer_demographics"
+ },
+ {
"op_name":"InnerJoin (Grace)",
"args":
[
{
- "op_name":"InnerJoin (MapJoin)",
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/customer_address"
+ },
+ {
+ "op_name":"InnerJoin (Grace)",
"args":
[
{
"op_name":"TableFullScan",
- "table":"test\/ds\/store_sales"
+ "table":"test\/ds\/store_returns"
},
{
- "op_name":"TableFullScan",
- "table":"test\/ds\/item"
+ "op_name":"InnerJoin (MapJoin)",
+ "args":
+ [
+ {
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/store_sales"
+ },
+ {
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/item"
+ }
+ ]
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/customer_address"
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/customer_demographics"
}
]
}
@@ -121,16 +129,8 @@
"table":"test\/ds\/store"
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/customer_demographics"
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/customer_address"
}
]
},
@@ -195,22 +195,12 @@
"args":
[
{
- "op_name":"InnerJoin (Grace)",
- "args":
- [
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/catalog_sales"
- },
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/catalog_returns"
- }
- ]
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/catalog_sales"
},
{
"op_name":"TableFullScan",
- "table":"test\/ds\/store_returns"
+ "table":"test\/ds\/catalog_returns"
}
]
},
@@ -239,10 +229,18 @@
"args":
[
{
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/customer_address"
+ },
+ {
"op_name":"InnerJoin (Grace)",
"args":
[
{
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/customer_demographics"
+ },
+ {
"op_name":"InnerJoin (MapJoin)",
"args":
[
@@ -263,32 +261,42 @@
"args":
[
{
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/customer_demographics"
+ },
+ {
"op_name":"InnerJoin (Grace)",
"args":
[
{
- "op_name":"InnerJoin (MapJoin)",
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/customer_address"
+ },
+ {
+ "op_name":"InnerJoin (Grace)",
"args":
[
{
"op_name":"TableFullScan",
- "table":"test\/ds\/store_sales"
+ "table":"test\/ds\/store_returns"
},
{
- "op_name":"TableFullScan",
- "table":"test\/ds\/item"
+ "op_name":"InnerJoin (MapJoin)",
+ "args":
+ [
+ {
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/store_sales"
+ },
+ {
+ "op_name":"TableFullScan",
+ "table":"test\/ds\/item"
+ }
+ ]
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/customer_address"
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/customer_demographics"
}
]
}
@@ -305,16 +313,8 @@
"table":"test\/ds\/store"
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/customer_demographics"
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"test\/ds\/customer_address"
}
]
},
diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json
index e122be691e..4b2772b1a1 100644
--- a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json
+++ b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json
@@ -7,39 +7,39 @@
"args":
[
{
- "op_name":"InnerJoin (Grace)",
+ "op_name":"TableFullScan",
+ "table":"partsupp"
+ },
+ {
+ "op_name":"InnerJoin (MapJoin)",
"args":
[
{
"op_name":"TableFullScan",
- "table":"partsupp"
+ "table":"supplier"
},
{
"op_name":"InnerJoin (MapJoin)",
"args":
[
{
- "op_name":"TableFullScan",
- "table":"supplier"
+ "op_name":"TableLookup",
+ "table":"region"
},
{
- "op_name":"InnerJoin (MapJoin)",
- "args":
- [
- {
- "op_name":"TableLookup",
- "table":"region"
- },
- {
- "op_name":"TableFullScan",
- "table":"nation"
- }
- ]
+ "op_name":"TableFullScan",
+ "table":"nation"
}
]
}
]
- },
+ }
+ ]
+ },
+ {
+ "op_name":"InnerJoin (Grace)",
+ "args":
+ [
{
"op_name":"InnerJoin (Grace)",
"args":
@@ -73,12 +73,12 @@
]
}
]
+ },
+ {
+ "op_name":"TableFullScan",
+ "table":"part"
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"part"
}
]
}
diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json
index 1a0edf0bd3..e5adefe907 100644
--- a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json
+++ b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json
@@ -7,12 +7,16 @@
"args":
[
{
- "op_name":"InnerJoin (Grace)",
+ "op_name":"TableFullScan",
+ "table":"partsupp"
+ },
+ {
+ "op_name":"InnerJoin (MapJoin)",
"args":
[
{
"op_name":"TableFullScan",
- "table":"partsupp"
+ "table":"supplier"
},
{
"op_name":"InnerJoin (MapJoin)",
@@ -20,26 +24,22 @@
[
{
"op_name":"TableFullScan",
- "table":"supplier"
+ "table":"nation"
},
{
- "op_name":"InnerJoin (MapJoin)",
- "args":
- [
- {
- "op_name":"TableFullScan",
- "table":"nation"
- },
- {
- "op_name":"TableFullScan",
- "table":"region"
- }
- ]
+ "op_name":"TableFullScan",
+ "table":"region"
}
]
}
]
- },
+ }
+ ]
+ },
+ {
+ "op_name":"InnerJoin (Grace)",
+ "args":
+ [
{
"op_name":"InnerJoin (Grace)",
"args":
@@ -73,12 +73,12 @@
]
}
]
+ },
+ {
+ "op_name":"TableFullScan",
+ "table":"part"
}
]
- },
- {
- "op_name":"TableFullScan",
- "table":"part"
}
]
}
diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.cpp b/ydb/library/yql/dq/opt/dq_opt_stat.cpp
index 124c5c6969..70647840c5 100644
--- a/ydb/library/yql/dq/opt/dq_opt_stat.cpp
+++ b/ydb/library/yql/dq/opt/dq_opt_stat.cpp
@@ -370,6 +370,69 @@ void InferStatisticsForGraceJoin(const TExprNode::TPtr& input, TTypeAnnotationCo
}
/**
+ * Infer statistics for DqJoin
+ * DqJoin is an intermediary join representantation in Dq
+ */
+void InferStatisticsForDqJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx, const IProviderContext& ctx, TCardinalityHints hints) {
+ auto inputNode = TExprBase(input);
+ auto join = inputNode.Cast<TDqJoin>();
+
+ auto leftArg = join.LeftInput();
+ auto rightArg = join.RightInput();
+
+ auto leftStats = typeCtx->GetStats(leftArg.Raw());
+ auto rightStats = typeCtx->GetStats(rightArg.Raw());
+
+ if (!leftStats || !rightStats) {
+ return;
+ }
+
+ auto joinAlgo = FromString<EJoinAlgoType>(join.JoinAlgo().StringValue());
+ if (joinAlgo == EJoinAlgoType::Undefined) {
+ return;
+ }
+
+ auto leftLabels = InferLabels(leftStats, join.LeftJoinKeyNames());
+ auto rightLabels = InferLabels(rightStats, join.RightJoinKeyNames());
+
+ leftStats = ApplyCardinalityHints(leftStats, leftLabels, hints);
+ rightStats = ApplyCardinalityHints(rightStats, rightLabels, hints);
+
+ TVector<TJoinColumn> leftJoinKeys;
+ TVector<TJoinColumn> rightJoinKeys;
+
+ for (size_t i=0; i<join.LeftJoinKeyNames().Size(); i++) {
+ auto alias = ExtractAlias(join.LeftJoinKeyNames().Item(i).StringValue());
+ auto attrName = RemoveAliases(join.LeftJoinKeyNames().Item(i).StringValue());
+ leftJoinKeys.push_back(TJoinColumn(alias, attrName));
+ }
+ for (size_t i=0; i<join.RightJoinKeyNames().Size(); i++) {
+ auto alias = ExtractAlias(join.RightJoinKeyNames().Item(i).StringValue());
+ auto attrName = RemoveAliases(join.RightJoinKeyNames().Item(i).StringValue());
+ rightJoinKeys.push_back(TJoinColumn(alias, attrName));
+ }
+
+ auto unionOfLabels = UnionLabels(leftLabels, rightLabels);
+
+ auto resStats = std::make_shared<TOptimizerStatistics>(
+ ctx.ComputeJoinStats(
+ *leftStats,
+ *rightStats,
+ leftJoinKeys,
+ rightJoinKeys,
+ joinAlgo,
+ ConvertToJoinKind(join.JoinType().StringValue()),
+ FindCardHint(unionOfLabels, hints)
+ )
+ );
+
+ resStats->Labels = std::make_shared<TVector<TString>>();
+ resStats->Labels->insert(resStats->Labels->begin(), unionOfLabels.begin(), unionOfLabels.end());
+ typeCtx->SetStats(join.Raw(), resStats);
+ YQL_CLOG(TRACE, CoreDq) << "Infer statistics for DqJoin: " << resStats->ToString();
+}
+
+/**
* Infer statistics for DqSource
*
* We just pass up the statistics from the Settings of the DqSource
diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.h b/ydb/library/yql/dq/opt/dq_opt_stat.h
index a19f3f5b84..0959bcbef4 100644
--- a/ydb/library/yql/dq/opt/dq_opt_stat.h
+++ b/ydb/library/yql/dq/opt/dq_opt_stat.h
@@ -19,6 +19,7 @@ void InferStatisticsForStage(const TExprNode::TPtr& input, TTypeAnnotationContex
void InferStatisticsForDqSource(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx);
void InferStatisticsForGraceJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx, const IProviderContext& ctx, TCardinalityHints hints = {});
void InferStatisticsForMapJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx, const IProviderContext& ctx, TCardinalityHints hints = {});
+void InferStatisticsForDqJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx, const IProviderContext& ctx, TCardinalityHints hints = {});
void InferStatisticsForAsList(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx);
bool InferStatisticsForListParam(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx);
diff --git a/ydb/library/yql/dq/opt/dq_opt_stat_transformer_base.cpp b/ydb/library/yql/dq/opt/dq_opt_stat_transformer_base.cpp
index 7601bcabe1..50f0b2d4a2 100644
--- a/ydb/library/yql/dq/opt/dq_opt_stat_transformer_base.cpp
+++ b/ydb/library/yql/dq/opt/dq_opt_stat_transformer_base.cpp
@@ -65,6 +65,9 @@ bool TDqStatisticsTransformerBase::BeforeLambdas(const TExprNode::TPtr& input, T
else if(TCoGraceJoinCore::Match(input.Get())) {
InferStatisticsForGraceJoin(input, TypeCtx, Pctx, CardinalityHints);
}
+ else if (TDqJoin::Match(input.Get())) {
+ InferStatisticsForDqJoin(input, TypeCtx, Pctx, CardinalityHints);
+ }
// Do nothing in case of EquiJoin, otherwise the EquiJoin rule won't fire
else if(TCoEquiJoin::Match(input.Get())){