diff options
author | Pavel Velikhov <pavelvelikhov@ydb.tech> | 2024-07-18 13:52:06 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-18 13:52:06 +0300 |
commit | fd163ccec7174e6b610cccaacf9edfc18c625d27 (patch) | |
tree | 0d437f2659bd1a9acf530e6d0cddef67521b46cf | |
parent | 302e20479e4fbcf9d57bd40b1cb330e84eab86f7 (diff) | |
download | ydb-fd163ccec7174e6b610cccaacf9edfc18c625d27.tar.gz |
Cached overriden statistics for CBO (#6791)
-rw-r--r-- | ydb/core/kqp/opt/kqp_opt.h | 11 | ||||
-rw-r--r-- | ydb/core/kqp/opt/kqp_statistics_transformer.cpp | 6 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_statistics.cpp | 6 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_statistics.h | 4 |
4 files changed, 19 insertions, 8 deletions
diff --git a/ydb/core/kqp/opt/kqp_opt.h b/ydb/core/kqp/opt/kqp_opt.h index 7e8181a85a..30dda2a66a 100644 --- a/ydb/core/kqp/opt/kqp_opt.h +++ b/ydb/core/kqp/opt/kqp_opt.h @@ -26,6 +26,17 @@ struct TKqpOptimizeContext : public TSimpleRefCount<TKqpOptimizeContext> { const TIntrusivePtr<NYql::TKikimrTablesData> Tables; int JoinsCount{}; int EquiJoinsCount{}; + std::shared_ptr<NJson::TJsonValue> OverrideStatistics{}; + + std::shared_ptr<NJson::TJsonValue> GetOverrideStatistics() const { + if (Config->OverrideStatistics.Get()) { + auto jsonValue = new NJson::TJsonValue(); + NJson::ReadJsonTree(*Config->OverrideStatistics.Get(), jsonValue, true); + return std::shared_ptr<NJson::TJsonValue>(jsonValue); + } else { + return std::shared_ptr<NJson::TJsonValue>(); + } + } bool IsDataQuery() const { return QueryCtx->Type == NYql::EKikimrQueryType::Dml; diff --git a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp index afc93f0655..5949463d9e 100644 --- a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp +++ b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp @@ -100,7 +100,7 @@ void InferStatisticsForKqpTable(const TExprNode::TPtr& input, TTypeAnnotationCon auto keyColumns = TIntrusivePtr<TOptimizerStatistics::TKeyColumns>(new TOptimizerStatistics::TKeyColumns(tableData.Metadata->KeyColumnNames)); auto stats = std::make_shared<TOptimizerStatistics>(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0.0, keyColumns); if (kqpCtx.Config->OverrideStatistics.Get()) { - stats = OverrideStatistics(*stats, path.Value(), *kqpCtx.Config->OverrideStatistics.Get()); + stats = OverrideStatistics(*stats, path.Value(), kqpCtx.GetOverrideStatistics()); } if (stats->ColumnStatistics) { for (const auto& [columnName, metaData]: tableData.Metadata->Columns) { @@ -308,11 +308,11 @@ void InferStatisticsForDqSourceWrap(const TExprNode::TPtr& input, TTypeAnnotatio auto path = s3DataSource.Name().Cast().StringValue(); if (kqpCtx.Config->OverrideStatistics.Get() && path) { auto stats = std::make_shared<TOptimizerStatistics>(EStatisticsType::BaseTable, 0.0, 0, 0, 0.0, TIntrusivePtr<TOptimizerStatistics::TKeyColumns>()); - stats = OverrideStatistics(*stats, path, *kqpCtx.Config->OverrideStatistics.Get()); + stats = OverrideStatistics(*stats, path, kqpCtx.GetOverrideStatistics()); if (stats->ByteSize == 0.0) { auto n = path.find_last_of('/'); if (n != path.npos) { - stats = OverrideStatistics(*stats, path.substr(n + 1), *kqpCtx.Config->OverrideStatistics.Get()); + stats = OverrideStatistics(*stats, path.substr(n + 1), kqpCtx.GetOverrideStatistics()); } } if (stats->ByteSize != 0.0) { diff --git a/ydb/library/yql/core/yql_statistics.cpp b/ydb/library/yql/core/yql_statistics.cpp index 69bc93a086..fddcf10bf0 100644 --- a/ydb/library/yql/core/yql_statistics.cpp +++ b/ydb/library/yql/core/yql_statistics.cpp @@ -63,12 +63,10 @@ TOptimizerStatistics& TOptimizerStatistics::operator+=(const TOptimizerStatistic return *this; } -std::shared_ptr<TOptimizerStatistics> NYql::OverrideStatistics(const NYql::TOptimizerStatistics& s, const TStringBuf& tablePath, const TString& statHints) { +std::shared_ptr<TOptimizerStatistics> NYql::OverrideStatistics(const NYql::TOptimizerStatistics& s, const TStringBuf& tablePath, const std::shared_ptr<NJson::TJsonValue>& stats) { auto res = std::make_shared<TOptimizerStatistics>(s.Type, s.Nrows, s.Ncols, s.ByteSize, s.Cost, s.KeyColumns, s.ColumnStatistics); - NJson::TJsonValue root; - NJson::ReadJsonTree(statHints, &root, true); - auto dbStats = root.GetMapSafe(); + auto dbStats = stats->GetMapSafe(); if (!dbStats.contains(tablePath)){ return res; diff --git a/ydb/library/yql/core/yql_statistics.h b/ydb/library/yql/core/yql_statistics.h index 5141d30dce..9514a2f700 100644 --- a/ydb/library/yql/core/yql_statistics.h +++ b/ydb/library/yql/core/yql_statistics.h @@ -2,6 +2,8 @@ #include <ydb/library/minsketch/count_min_sketch.h> +#include <library/cpp/json/json_reader.h> + #include <util/generic/vector.h> #include <util/generic/hash.h> @@ -80,6 +82,6 @@ struct TOptimizerStatistics { friend std::ostream& operator<<(std::ostream& os, const TOptimizerStatistics& s); }; -std::shared_ptr<TOptimizerStatistics> OverrideStatistics(const TOptimizerStatistics& s, const TStringBuf& tablePath, const TString& statHints); +std::shared_ptr<TOptimizerStatistics> OverrideStatistics(const TOptimizerStatistics& s, const TStringBuf& tablePath, const std::shared_ptr<NJson::TJsonValue>& stats); } |