aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpavelvelikhov <pavelvelikhov@yandex-team.com>2023-10-02 16:05:28 +0300
committerpavelvelikhov <pavelvelikhov@yandex-team.com>2023-10-02 16:44:20 +0300
commit8390fed624c7cfb1b8794b68fb2eea88e2d6b4b9 (patch)
tree2880fc15e12b1de12a351c8aa31841d003456c32
parentdda26179eda44bc3694bcfe1e61554baa2f44b72 (diff)
downloadydb-8390fed624c7cfb1b8794b68fb2eea88e2d6b4b9.tar.gz
Removed optional cost from statistics
Removed optional cost from statistics
-rw-r--r--ydb/core/kqp/opt/kqp_query_plan.cpp2
-rw-r--r--ydb/core/kqp/opt/kqp_statistics_transformer.cpp2
-rw-r--r--ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp2
-rw-r--r--ydb/library/yql/core/yql_cost_function.cpp2
-rw-r--r--ydb/library/yql/core/yql_statistics.cpp14
-rw-r--r--ydb/library/yql/core/yql_statistics.h8
-rw-r--r--ydb/library/yql/core/yql_type_annotation.h16
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp15
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_join_cost_based_generic.cpp5
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_stat.cpp16
10 files changed, 20 insertions, 62 deletions
diff --git a/ydb/core/kqp/opt/kqp_query_plan.cpp b/ydb/core/kqp/opt/kqp_query_plan.cpp
index 2aa2a17de8..cd4498eb02 100644
--- a/ydb/core/kqp/opt/kqp_query_plan.cpp
+++ b/ydb/core/kqp/opt/kqp_query_plan.cpp
@@ -1265,7 +1265,7 @@ private:
if (auto stats = SerializerCtx.TypeCtx.GetStats(expr.Raw())) {
op.Properties["E-Rows"] = stats->Nrows;
- op.Properties["E-Cost"] = stats->Cost.value();
+ op.Properties["E-Cost"] = stats->Cost;
}
else {
op.Properties["E-Rows"] = "No estimate";
diff --git a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp
index 5680bc3c83..dc95487448 100644
--- a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp
+++ b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp
@@ -118,7 +118,7 @@ void InferStatisticsForRowsSourceSettings(const TExprNode::TPtr& input, TTypeAnn
}
int nAttrs = sourceSettings.Columns().Size();
- double cost = inputStats->Cost.value();
+ double cost = inputStats->Cost;
auto outputStats = TOptimizerStatistics(nRows, nAttrs, cost);
typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(outputStats));
diff --git a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp
index 41edddac10..0027feaef4 100644
--- a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp
+++ b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp
@@ -5836,7 +5836,7 @@ TExprNode::TPtr ExpandCostsOf(const TExprNode::TPtr& node, TExprContext& ctx, TT
}
if (stat) {
if (stat->Cost) {
- jsonWriter.Write("Cost", *stat->Cost);
+ jsonWriter.Write("Cost", stat->Cost);
}
jsonWriter.Write("Cols", stat->Ncols);
jsonWriter.Write("Rows", stat->Nrows);
diff --git a/ydb/library/yql/core/yql_cost_function.cpp b/ydb/library/yql/core/yql_cost_function.cpp
index 635b0552eb..f42c95164b 100644
--- a/ydb/library/yql/core/yql_cost_function.cpp
+++ b/ydb/library/yql/core/yql_cost_function.cpp
@@ -16,7 +16,7 @@ TOptimizerStatistics NYql::ComputeJoinStats(TOptimizerStatistics leftStats, TOpt
int newNCols = leftStats.Ncols + rightStats.Ncols;
double cost = leftStats.Nrows + 2.0 * rightStats.Nrows
+ newCard
- + leftStats.Cost.value() + rightStats.Cost.value();
+ + leftStats.Cost + rightStats.Cost;
return TOptimizerStatistics(newCard, newNCols, cost);
}
diff --git a/ydb/library/yql/core/yql_statistics.cpp b/ydb/library/yql/core/yql_statistics.cpp
index 91043b31b7..30d8dcd26b 100644
--- a/ydb/library/yql/core/yql_statistics.cpp
+++ b/ydb/library/yql/core/yql_statistics.cpp
@@ -3,13 +3,7 @@
using namespace NYql;
std::ostream& NYql::operator<<(std::ostream& os, const TOptimizerStatistics& s) {
- os << "Nrows: " << s.Nrows << ", Ncols: " << s.Ncols;
- os << ", Cost: ";
- if (s.Cost.has_value()){
- os << s.Cost.value();
- } else {
- os << "none";
- }
+ os << "Nrows: " << s.Nrows << ", Ncols: " << s.Ncols << ", Cost: " << s.Cost ;
return os;
}
@@ -20,10 +14,6 @@ bool TOptimizerStatistics::Empty() const {
TOptimizerStatistics& TOptimizerStatistics::operator+=(const TOptimizerStatistics& other) {
Nrows += other.Nrows;
Ncols += other.Ncols;
- if (Cost.has_value() && other.Cost.has_value()) {
- Cost = *Cost + *other.Cost;
- } else if (other.Cost.has_value()) {
- Cost = other.Cost;
- }
+ Cost += other.Cost;
return *this;
}
diff --git a/ydb/library/yql/core/yql_statistics.h b/ydb/library/yql/core/yql_statistics.h
index a8fed920d6..7564a751a1 100644
--- a/ydb/library/yql/core/yql_statistics.h
+++ b/ydb/library/yql/core/yql_statistics.h
@@ -11,16 +11,16 @@ namespace NYql {
* for the current operator in the plan. Currently, only Nrows and Ncols are
* recorded.
* Cost is also included in statistics, as its updated concurrently with statistics
- * all of the time. Cost is optional, so it could be missing.
+ * all of the time.
*/
struct TOptimizerStatistics {
double Nrows = 0;
int Ncols = 0;
- std::optional<double> Cost;
+ double Cost;
TString Descr;
- TOptimizerStatistics() : Cost(std::nullopt) {}
- TOptimizerStatistics(double nrows,int ncols): Nrows(nrows), Ncols(ncols), Cost(std::nullopt) {}
+ TOptimizerStatistics() {}
+ TOptimizerStatistics(double nrows,int ncols): Nrows(nrows), Ncols(ncols) {}
TOptimizerStatistics(double nrows,int ncols, double cost): Nrows(nrows), Ncols(ncols), Cost(cost) {}
TOptimizerStatistics(double nrows,int ncols, double cost, TString descr): Nrows(nrows), Ncols(ncols), Cost(cost), Descr(descr) {}
diff --git a/ydb/library/yql/core/yql_type_annotation.h b/ydb/library/yql/core/yql_type_annotation.h
index b8efdc52a6..de931c1034 100644
--- a/ydb/library/yql/core/yql_type_annotation.h
+++ b/ydb/library/yql/core/yql_type_annotation.h
@@ -338,21 +338,7 @@ struct TTypeAnnotationContext: public TThrRefBase {
void SetStats(const TExprNode* input, std::shared_ptr<TOptimizerStatistics> stats) {
StatisticsMap[input] = stats;
}
-
- /**
- * Helper method to get cost from type annotation context
- * Doesn't check if the cost is in the mapping
- */
- std::optional<double> GetCost(const TExprNode* input) {
- return StatisticsMap[input]->Cost;
- }
-
- /**
- * Helper method to set the cost in type annotation context
- */
- void SetCost(const TExprNode* input, std::optional<double> cost) {
- StatisticsMap[input]->Cost = cost;
- }
+
};
template <> inline
diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp
index b9d74d1877..343d7704fb 100644
--- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp
+++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp
@@ -665,7 +665,7 @@ template <int N> void TDPccpSolver<N>::EmitCsgCmp(const std::bitset<N>& S1, cons
TEdge e2 = Graph.FindCrossingEdge(S2, S1);
std::shared_ptr<TJoinOptimizerNode> newJoin =
MakeJoin(DpTable[S2], DpTable[S1], e2.JoinConditions, GraceJoin);
- if (newJoin->Stats->Cost.value() < DpTable[joined]->Stats->Cost.value()){
+ if (newJoin->Stats->Cost < DpTable[joined]->Stats->Cost){
DpTable[joined] = newJoin;
}
} else {
@@ -675,10 +675,10 @@ template <int N> void TDPccpSolver<N>::EmitCsgCmp(const std::bitset<N>& S1, cons
TEdge e2 = Graph.FindCrossingEdge(S2, S1);
std::shared_ptr<TJoinOptimizerNode> newJoin2 =
MakeJoin(DpTable[S2], DpTable[S1], e2.JoinConditions, GraceJoin);
- if (newJoin1->Stats->Cost.value() < DpTable[joined]->Stats->Cost.value()){
+ if (newJoin1->Stats->Cost < DpTable[joined]->Stats->Cost){
DpTable[joined] = newJoin1;
}
- if (newJoin2->Stats->Cost.value() < DpTable[joined]->Stats->Cost.value()){
+ if (newJoin2->Stats->Cost < DpTable[joined]->Stats->Cost){
DpTable[joined] = newJoin2;
}
}
@@ -860,10 +860,6 @@ bool DqCollectJoinRelationsWithStats(
return false;
}
- if (!maybeStat->second->Cost.has_value()) {
- return false;
- }
-
auto scope = input.Scope();
if (!scope.Maybe<TCoAtom>()){
return false;
@@ -899,8 +895,7 @@ TExprBase DqOptimizeEquiJoinWithCosts(const TExprBase& node, TExprContext& ctx,
auto equiJoin = node.Cast<TCoEquiJoin>();
YQL_ENSURE(equiJoin.ArgCount() >= 4);
- if (typesCtx.StatisticsMap.contains(equiJoin.Raw()) &&
- typesCtx.StatisticsMap[equiJoin.Raw()]->Cost.has_value()) {
+ if (typesCtx.StatisticsMap.contains(equiJoin.Raw())) {
return node;
}
@@ -999,7 +994,7 @@ public:
TVector<int> scope;
BuildOutput(&output, result.get(), scope);
output.Rows = result->Stats->Nrows;
- output.TotalCost = *result->Stats->Cost;
+ output.TotalCost = result->Stats->Cost;
if (Log) {
Log(output.ToString());
}
diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based_generic.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based_generic.cpp
index 27c05b4e1c..2f9c42d02a 100644
--- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based_generic.cpp
+++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based_generic.cpp
@@ -44,7 +44,7 @@ struct TState {
VarIds.emplace_back();
int relId = Input.Rels.size();
Input.Rels.back().Rows = stat->Nrows;
- Input.Rels.back().TotalCost = *stat->Cost;
+ Input.Rels.back().TotalCost = stat->Cost;
Tables.emplace_back(label);
Table2RelIds[label].emplace_back(relId);
}
@@ -171,8 +171,7 @@ TExprBase DqOptimizeEquiJoinWithCosts(
YQL_ENSURE(equiJoin.ArgCount() >= 4);
auto maybeStat = typesCtx.StatisticsMap.find(equiJoin.Raw());
- if (maybeStat != typesCtx.StatisticsMap.end() &&
- maybeStat->second->Cost.has_value()) {
+ if (maybeStat != typesCtx.StatisticsMap.end()) {
return node;
}
diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.cpp b/ydb/library/yql/dq/opt/dq_opt_stat.cpp
index 9e14bf142e..3e838da4b9 100644
--- a/ydb/library/yql/dq/opt/dq_opt_stat.cpp
+++ b/ydb/library/yql/dq/opt/dq_opt_stat.cpp
@@ -67,7 +67,6 @@ void InferStatisticsForDqSource(const TExprNode::TPtr& input, TTypeAnnotationCon
}
typeCtx->SetStats(input.Get(), inputStats);
- typeCtx->SetCost(input.Get(), typeCtx->GetCost(dqSource.Settings().Raw()));
}
/**
@@ -95,11 +94,9 @@ void InferStatisticsForFlatMap(const TExprNode::TPtr& input, TTypeAnnotationCont
double selectivity = ComputePredicateSelectivity(flatmap.Lambda().Body(), inputStats);
- auto outputStats = TOptimizerStatistics(inputStats->Nrows * selectivity, inputStats->Ncols);
+ auto outputStats = TOptimizerStatistics(inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->Cost );
typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(outputStats) );
- typeCtx->SetCost(input.Get(), typeCtx->GetCost(flatmapInput.Raw()));
-
}
else if (flatmap.Lambda().Body().Maybe<TCoMapJoinCore>() ||
flatmap.Lambda().Body().Maybe<TCoMap>().Input().Maybe<TCoMapJoinCore>() ||
@@ -138,11 +135,9 @@ void InferStatisticsForFilter(const TExprNode::TPtr& input, TTypeAnnotationConte
double selectivity = ComputePredicateSelectivity(filterBody, inputStats);
- auto outputStats = TOptimizerStatistics(inputStats->Nrows * selectivity, inputStats->Ncols);
+ auto outputStats = TOptimizerStatistics(inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->Cost);
typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(outputStats) );
- typeCtx->SetCost(input.Get(), typeCtx->GetCost(filterInput.Raw()));
-
}
/**
@@ -162,7 +157,6 @@ void InferStatisticsForSkipNullMembers(const TExprNode::TPtr& input, TTypeAnnota
}
typeCtx->SetStats( input.Get(), inputStats );
- typeCtx->SetCost( input.Get(), typeCtx->GetCost( skipNullMembersInput.Raw() ) );
}
/**
@@ -181,7 +175,6 @@ void InferStatisticsForExtractMembers(const TExprNode::TPtr& input, TTypeAnnotat
}
typeCtx->SetStats( input.Get(), inputStats );
- typeCtx->SetCost( input.Get(), typeCtx->GetCost( extractMembersInput.Raw() ) );
}
/**
@@ -200,7 +193,6 @@ void InferStatisticsForAggregateCombine(const TExprNode::TPtr& input, TTypeAnnot
}
typeCtx->SetStats( input.Get(), inputStats );
- typeCtx->SetCost( input.Get(), typeCtx->GetCost( aggInput.Raw() ) );
}
/**
@@ -219,7 +211,6 @@ void InferStatisticsForAggregateMergeFinalize(const TExprNode::TPtr& input, TTyp
}
typeCtx->SetStats( input.Get(), inputStats );
- typeCtx->SetCost( input.Get(), typeCtx->GetCost( aggInput.Raw() ) );
}
/***
@@ -255,7 +246,6 @@ void PropagateStatisticsToLambdaArgument(const TExprNode::TPtr& input, TTypeAnno
auto inputStats = typeCtx->GetStats(callableInput->Child(j) );
if (inputStats){
typeCtx->SetStats( lambda.Args().Arg(j).Raw(), inputStats );
- typeCtx->SetCost( lambda.Args().Arg(j).Raw(), typeCtx->GetCost( callableInput->Child(j) ));
}
}
@@ -267,7 +257,6 @@ void PropagateStatisticsToLambdaArgument(const TExprNode::TPtr& input, TTypeAnno
}
typeCtx->SetStats( lambda.Args().Arg(0).Raw(), inputStats );
- typeCtx->SetCost( lambda.Args().Arg(0).Raw(), typeCtx->GetCost( callableInput.Get() ));
}
}
}
@@ -282,7 +271,6 @@ void InferStatisticsForStage(const TExprNode::TPtr& input, TTypeAnnotationContex
auto lambdaStats = typeCtx->GetStats( stage.Program().Body().Raw());
if (lambdaStats){
typeCtx->SetStats( stage.Raw(), lambdaStats );
- typeCtx->SetCost( stage.Raw(), typeCtx->GetCost( stage.Program().Body().Raw()));
}
}