diff options
author | Pavel Velikhov <pavelvelikhov@ydb.tech> | 2024-01-10 12:31:25 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-10 12:31:25 +0300 |
commit | ea061ca74b256c1a013dc9bbe359a30ea8251b79 (patch) | |
tree | ddff9c02392a32c99ffe40a29c0844623384f271 | |
parent | 6f3e5d01564ca125c82d841961eb6c83363db616 (diff) | |
download | ydb-ea061ca74b256c1a013dc9bbe359a30ea8251b79.tar.gz |
Fixed a problem with cardinality estimation for PK joins (#907)
-rw-r--r-- | ydb/core/kqp/opt/kqp_statistics_transformer.cpp | 2 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_cost_function.cpp | 5 | ||||
-rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_stat.cpp | 4 |
3 files changed, 7 insertions, 4 deletions
diff --git a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp index 22fec88eb7..a5594b43c8 100644 --- a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp +++ b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp @@ -55,7 +55,7 @@ void InferStatisticsForKqpTable(const TExprNode::TPtr& input, TTypeAnnotationCon const auto& tableData = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, path.Value()); double nRows = tableData.Metadata->RecordsCount; int nAttrs = tableData.Metadata->Columns.size(); - YQL_CLOG(TRACE, CoreDq) << "Infer statistics for table: " << path.Value() << ", nrows: " << nRows << ", nattrs: " << nAttrs; + YQL_CLOG(TRACE, CoreDq) << "Infer statistics for table: " << path.Value() << ", nrows: " << nRows << ", nattrs: " << nAttrs << ", nKeyColumns: " << tableData.Metadata->KeyColumnNames.size(); auto outputStats = TOptimizerStatistics(EStatisticsType::BaseTable, nRows, nAttrs, 0.0, tableData.Metadata->KeyColumnNames); typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(outputStats)); diff --git a/ydb/library/yql/core/yql_cost_function.cpp b/ydb/library/yql/core/yql_cost_function.cpp index a5f0edf21d..58eb22fd0c 100644 --- a/ydb/library/yql/core/yql_cost_function.cpp +++ b/ydb/library/yql/core/yql_cost_function.cpp @@ -41,9 +41,11 @@ TOptimizerStatistics NYql::ComputeJoinStats(const TOptimizerStatistics& leftStat double newCard; EStatisticsType outputType; + TVector<TString> joinedTableKeys; if (IsPKJoin(rightStats,rightJoinKeys)) { newCard = std::max(leftStats.Nrows,rightStats.Nrows); + joinedTableKeys = leftStats.KeyColumns; if (leftStats.Type == EStatisticsType::BaseTable){ outputType = EStatisticsType::FilteredFactTable; } else { @@ -52,6 +54,7 @@ TOptimizerStatistics NYql::ComputeJoinStats(const TOptimizerStatistics& leftStat } else if (IsPKJoin(leftStats,leftJoinKeys)) { newCard = std::max(leftStats.Nrows,rightStats.Nrows); + joinedTableKeys = rightStats.KeyColumns; if (rightStats.Type == EStatisticsType::BaseTable){ outputType = EStatisticsType::FilteredFactTable; } else { @@ -69,7 +72,7 @@ TOptimizerStatistics NYql::ComputeJoinStats(const TOptimizerStatistics& leftStat + newCard + leftStats.Cost + rightStats.Cost; - return TOptimizerStatistics(outputType, newCard, newNCols, cost); + return TOptimizerStatistics(outputType, newCard, newNCols, cost, joinedTableKeys); } TOptimizerStatistics NYql::ComputeJoinStats(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.cpp b/ydb/library/yql/dq/opt/dq_opt_stat.cpp index 755b60dc0c..747bda5ca5 100644 --- a/ydb/library/yql/dq/opt/dq_opt_stat.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_stat.cpp @@ -194,7 +194,7 @@ void InferStatisticsForFlatMap(const TExprNode::TPtr& input, TTypeAnnotationCont double selectivity = ComputePredicateSelectivity(flatmap.Lambda().Body(), inputStats); - auto outputStats = TOptimizerStatistics(inputStats->Type, inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->Cost ); + auto outputStats = TOptimizerStatistics(inputStats->Type, inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->Cost, inputStats->KeyColumns ); typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(outputStats) ); } @@ -235,7 +235,7 @@ void InferStatisticsForFilter(const TExprNode::TPtr& input, TTypeAnnotationConte double selectivity = ComputePredicateSelectivity(filterBody, inputStats); - auto outputStats = TOptimizerStatistics(inputStats->Type, inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->Cost); + auto outputStats = TOptimizerStatistics(inputStats->Type, inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->Cost, inputStats->KeyColumns); typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(outputStats) ); } |