aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPavel Velikhov <pavelvelikhov@ydb.tech>2024-07-18 13:52:06 +0300
committerGitHub <noreply@github.com>2024-07-18 13:52:06 +0300
commitfd163ccec7174e6b610cccaacf9edfc18c625d27 (patch)
tree0d437f2659bd1a9acf530e6d0cddef67521b46cf
parent302e20479e4fbcf9d57bd40b1cb330e84eab86f7 (diff)
downloadydb-fd163ccec7174e6b610cccaacf9edfc18c625d27.tar.gz
Cached overriden statistics for CBO (#6791)
-rw-r--r--ydb/core/kqp/opt/kqp_opt.h11
-rw-r--r--ydb/core/kqp/opt/kqp_statistics_transformer.cpp6
-rw-r--r--ydb/library/yql/core/yql_statistics.cpp6
-rw-r--r--ydb/library/yql/core/yql_statistics.h4
4 files changed, 19 insertions, 8 deletions
diff --git a/ydb/core/kqp/opt/kqp_opt.h b/ydb/core/kqp/opt/kqp_opt.h
index 7e8181a85a..30dda2a66a 100644
--- a/ydb/core/kqp/opt/kqp_opt.h
+++ b/ydb/core/kqp/opt/kqp_opt.h
@@ -26,6 +26,17 @@ struct TKqpOptimizeContext : public TSimpleRefCount<TKqpOptimizeContext> {
const TIntrusivePtr<NYql::TKikimrTablesData> Tables;
int JoinsCount{};
int EquiJoinsCount{};
+ std::shared_ptr<NJson::TJsonValue> OverrideStatistics{};
+
+ std::shared_ptr<NJson::TJsonValue> GetOverrideStatistics() const {
+ if (Config->OverrideStatistics.Get()) {
+ auto jsonValue = new NJson::TJsonValue();
+ NJson::ReadJsonTree(*Config->OverrideStatistics.Get(), jsonValue, true);
+ return std::shared_ptr<NJson::TJsonValue>(jsonValue);
+ } else {
+ return std::shared_ptr<NJson::TJsonValue>();
+ }
+ }
bool IsDataQuery() const {
return QueryCtx->Type == NYql::EKikimrQueryType::Dml;
diff --git a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp
index afc93f0655..5949463d9e 100644
--- a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp
+++ b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp
@@ -100,7 +100,7 @@ void InferStatisticsForKqpTable(const TExprNode::TPtr& input, TTypeAnnotationCon
auto keyColumns = TIntrusivePtr<TOptimizerStatistics::TKeyColumns>(new TOptimizerStatistics::TKeyColumns(tableData.Metadata->KeyColumnNames));
auto stats = std::make_shared<TOptimizerStatistics>(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0.0, keyColumns);
if (kqpCtx.Config->OverrideStatistics.Get()) {
- stats = OverrideStatistics(*stats, path.Value(), *kqpCtx.Config->OverrideStatistics.Get());
+ stats = OverrideStatistics(*stats, path.Value(), kqpCtx.GetOverrideStatistics());
}
if (stats->ColumnStatistics) {
for (const auto& [columnName, metaData]: tableData.Metadata->Columns) {
@@ -308,11 +308,11 @@ void InferStatisticsForDqSourceWrap(const TExprNode::TPtr& input, TTypeAnnotatio
auto path = s3DataSource.Name().Cast().StringValue();
if (kqpCtx.Config->OverrideStatistics.Get() && path) {
auto stats = std::make_shared<TOptimizerStatistics>(EStatisticsType::BaseTable, 0.0, 0, 0, 0.0, TIntrusivePtr<TOptimizerStatistics::TKeyColumns>());
- stats = OverrideStatistics(*stats, path, *kqpCtx.Config->OverrideStatistics.Get());
+ stats = OverrideStatistics(*stats, path, kqpCtx.GetOverrideStatistics());
if (stats->ByteSize == 0.0) {
auto n = path.find_last_of('/');
if (n != path.npos) {
- stats = OverrideStatistics(*stats, path.substr(n + 1), *kqpCtx.Config->OverrideStatistics.Get());
+ stats = OverrideStatistics(*stats, path.substr(n + 1), kqpCtx.GetOverrideStatistics());
}
}
if (stats->ByteSize != 0.0) {
diff --git a/ydb/library/yql/core/yql_statistics.cpp b/ydb/library/yql/core/yql_statistics.cpp
index 69bc93a086..fddcf10bf0 100644
--- a/ydb/library/yql/core/yql_statistics.cpp
+++ b/ydb/library/yql/core/yql_statistics.cpp
@@ -63,12 +63,10 @@ TOptimizerStatistics& TOptimizerStatistics::operator+=(const TOptimizerStatistic
return *this;
}
-std::shared_ptr<TOptimizerStatistics> NYql::OverrideStatistics(const NYql::TOptimizerStatistics& s, const TStringBuf& tablePath, const TString& statHints) {
+std::shared_ptr<TOptimizerStatistics> NYql::OverrideStatistics(const NYql::TOptimizerStatistics& s, const TStringBuf& tablePath, const std::shared_ptr<NJson::TJsonValue>& stats) {
auto res = std::make_shared<TOptimizerStatistics>(s.Type, s.Nrows, s.Ncols, s.ByteSize, s.Cost, s.KeyColumns, s.ColumnStatistics);
- NJson::TJsonValue root;
- NJson::ReadJsonTree(statHints, &root, true);
- auto dbStats = root.GetMapSafe();
+ auto dbStats = stats->GetMapSafe();
if (!dbStats.contains(tablePath)){
return res;
diff --git a/ydb/library/yql/core/yql_statistics.h b/ydb/library/yql/core/yql_statistics.h
index 5141d30dce..9514a2f700 100644
--- a/ydb/library/yql/core/yql_statistics.h
+++ b/ydb/library/yql/core/yql_statistics.h
@@ -2,6 +2,8 @@
#include <ydb/library/minsketch/count_min_sketch.h>
+#include <library/cpp/json/json_reader.h>
+
#include <util/generic/vector.h>
#include <util/generic/hash.h>
@@ -80,6 +82,6 @@ struct TOptimizerStatistics {
friend std::ostream& operator<<(std::ostream& os, const TOptimizerStatistics& s);
};
-std::shared_ptr<TOptimizerStatistics> OverrideStatistics(const TOptimizerStatistics& s, const TStringBuf& tablePath, const TString& statHints);
+std::shared_ptr<TOptimizerStatistics> OverrideStatistics(const TOptimizerStatistics& s, const TStringBuf& tablePath, const std::shared_ptr<NJson::TJsonValue>& stats);
}