aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPavel Velikhov <pavelvelikhov@ydb.tech>2024-01-10 12:31:25 +0300
committerGitHub <noreply@github.com>2024-01-10 12:31:25 +0300
commitea061ca74b256c1a013dc9bbe359a30ea8251b79 (patch)
treeddff9c02392a32c99ffe40a29c0844623384f271
parent6f3e5d01564ca125c82d841961eb6c83363db616 (diff)
downloadydb-ea061ca74b256c1a013dc9bbe359a30ea8251b79.tar.gz
Fixed a problem with cardinality estimation for PK joins (#907)
-rw-r--r--ydb/core/kqp/opt/kqp_statistics_transformer.cpp2
-rw-r--r--ydb/library/yql/core/yql_cost_function.cpp5
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_stat.cpp4
3 files changed, 7 insertions, 4 deletions
diff --git a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp
index 22fec88eb7..a5594b43c8 100644
--- a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp
+++ b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp
@@ -55,7 +55,7 @@ void InferStatisticsForKqpTable(const TExprNode::TPtr& input, TTypeAnnotationCon
const auto& tableData = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, path.Value());
double nRows = tableData.Metadata->RecordsCount;
int nAttrs = tableData.Metadata->Columns.size();
- YQL_CLOG(TRACE, CoreDq) << "Infer statistics for table: " << path.Value() << ", nrows: " << nRows << ", nattrs: " << nAttrs;
+ YQL_CLOG(TRACE, CoreDq) << "Infer statistics for table: " << path.Value() << ", nrows: " << nRows << ", nattrs: " << nAttrs << ", nKeyColumns: " << tableData.Metadata->KeyColumnNames.size();
auto outputStats = TOptimizerStatistics(EStatisticsType::BaseTable, nRows, nAttrs, 0.0, tableData.Metadata->KeyColumnNames);
typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(outputStats));
diff --git a/ydb/library/yql/core/yql_cost_function.cpp b/ydb/library/yql/core/yql_cost_function.cpp
index a5f0edf21d..58eb22fd0c 100644
--- a/ydb/library/yql/core/yql_cost_function.cpp
+++ b/ydb/library/yql/core/yql_cost_function.cpp
@@ -41,9 +41,11 @@ TOptimizerStatistics NYql::ComputeJoinStats(const TOptimizerStatistics& leftStat
double newCard;
EStatisticsType outputType;
+ TVector<TString> joinedTableKeys;
if (IsPKJoin(rightStats,rightJoinKeys)) {
newCard = std::max(leftStats.Nrows,rightStats.Nrows);
+ joinedTableKeys = leftStats.KeyColumns;
if (leftStats.Type == EStatisticsType::BaseTable){
outputType = EStatisticsType::FilteredFactTable;
} else {
@@ -52,6 +54,7 @@ TOptimizerStatistics NYql::ComputeJoinStats(const TOptimizerStatistics& leftStat
}
else if (IsPKJoin(leftStats,leftJoinKeys)) {
newCard = std::max(leftStats.Nrows,rightStats.Nrows);
+ joinedTableKeys = rightStats.KeyColumns;
if (rightStats.Type == EStatisticsType::BaseTable){
outputType = EStatisticsType::FilteredFactTable;
} else {
@@ -69,7 +72,7 @@ TOptimizerStatistics NYql::ComputeJoinStats(const TOptimizerStatistics& leftStat
+ newCard
+ leftStats.Cost + rightStats.Cost;
- return TOptimizerStatistics(outputType, newCard, newNCols, cost);
+ return TOptimizerStatistics(outputType, newCard, newNCols, cost, joinedTableKeys);
}
TOptimizerStatistics NYql::ComputeJoinStats(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats,
diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.cpp b/ydb/library/yql/dq/opt/dq_opt_stat.cpp
index 755b60dc0c..747bda5ca5 100644
--- a/ydb/library/yql/dq/opt/dq_opt_stat.cpp
+++ b/ydb/library/yql/dq/opt/dq_opt_stat.cpp
@@ -194,7 +194,7 @@ void InferStatisticsForFlatMap(const TExprNode::TPtr& input, TTypeAnnotationCont
double selectivity = ComputePredicateSelectivity(flatmap.Lambda().Body(), inputStats);
- auto outputStats = TOptimizerStatistics(inputStats->Type, inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->Cost );
+ auto outputStats = TOptimizerStatistics(inputStats->Type, inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->Cost, inputStats->KeyColumns );
typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(outputStats) );
}
@@ -235,7 +235,7 @@ void InferStatisticsForFilter(const TExprNode::TPtr& input, TTypeAnnotationConte
double selectivity = ComputePredicateSelectivity(filterBody, inputStats);
- auto outputStats = TOptimizerStatistics(inputStats->Type, inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->Cost);
+ auto outputStats = TOptimizerStatistics(inputStats->Type, inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->Cost, inputStats->KeyColumns);
typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(outputStats) );
}