diff options
author | pavelvelikhov <pavelvelikhov@yandex-team.com> | 2023-10-02 16:05:28 +0300 |
---|---|---|
committer | pavelvelikhov <pavelvelikhov@yandex-team.com> | 2023-10-02 16:44:20 +0300 |
commit | 8390fed624c7cfb1b8794b68fb2eea88e2d6b4b9 (patch) | |
tree | 2880fc15e12b1de12a351c8aa31841d003456c32 | |
parent | dda26179eda44bc3694bcfe1e61554baa2f44b72 (diff) | |
download | ydb-8390fed624c7cfb1b8794b68fb2eea88e2d6b4b9.tar.gz |
Removed optional cost from statistics
Removed optional cost from statistics
-rw-r--r-- | ydb/core/kqp/opt/kqp_query_plan.cpp | 2 | ||||
-rw-r--r-- | ydb/core/kqp/opt/kqp_statistics_transformer.cpp | 2 | ||||
-rw-r--r-- | ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp | 2 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_cost_function.cpp | 2 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_statistics.cpp | 14 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_statistics.h | 8 | ||||
-rw-r--r-- | ydb/library/yql/core/yql_type_annotation.h | 16 | ||||
-rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp | 15 | ||||
-rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_join_cost_based_generic.cpp | 5 | ||||
-rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_stat.cpp | 16 |
10 files changed, 20 insertions, 62 deletions
diff --git a/ydb/core/kqp/opt/kqp_query_plan.cpp b/ydb/core/kqp/opt/kqp_query_plan.cpp index 2aa2a17de8..cd4498eb02 100644 --- a/ydb/core/kqp/opt/kqp_query_plan.cpp +++ b/ydb/core/kqp/opt/kqp_query_plan.cpp @@ -1265,7 +1265,7 @@ private: if (auto stats = SerializerCtx.TypeCtx.GetStats(expr.Raw())) { op.Properties["E-Rows"] = stats->Nrows; - op.Properties["E-Cost"] = stats->Cost.value(); + op.Properties["E-Cost"] = stats->Cost; } else { op.Properties["E-Rows"] = "No estimate"; diff --git a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp index 5680bc3c83..dc95487448 100644 --- a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp +++ b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp @@ -118,7 +118,7 @@ void InferStatisticsForRowsSourceSettings(const TExprNode::TPtr& input, TTypeAnn } int nAttrs = sourceSettings.Columns().Size(); - double cost = inputStats->Cost.value(); + double cost = inputStats->Cost; auto outputStats = TOptimizerStatistics(nRows, nAttrs, cost); typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(outputStats)); diff --git a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp index 41edddac10..0027feaef4 100644 --- a/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp +++ b/ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp @@ -5836,7 +5836,7 @@ TExprNode::TPtr ExpandCostsOf(const TExprNode::TPtr& node, TExprContext& ctx, TT } if (stat) { if (stat->Cost) { - jsonWriter.Write("Cost", *stat->Cost); + jsonWriter.Write("Cost", stat->Cost); } jsonWriter.Write("Cols", stat->Ncols); jsonWriter.Write("Rows", stat->Nrows); diff --git a/ydb/library/yql/core/yql_cost_function.cpp b/ydb/library/yql/core/yql_cost_function.cpp index 635b0552eb..f42c95164b 100644 --- a/ydb/library/yql/core/yql_cost_function.cpp +++ b/ydb/library/yql/core/yql_cost_function.cpp @@ -16,7 +16,7 @@ TOptimizerStatistics NYql::ComputeJoinStats(TOptimizerStatistics leftStats, TOpt int newNCols = leftStats.Ncols + rightStats.Ncols; double cost = leftStats.Nrows + 2.0 * rightStats.Nrows + newCard - + leftStats.Cost.value() + rightStats.Cost.value(); + + leftStats.Cost + rightStats.Cost; return TOptimizerStatistics(newCard, newNCols, cost); } diff --git a/ydb/library/yql/core/yql_statistics.cpp b/ydb/library/yql/core/yql_statistics.cpp index 91043b31b7..30d8dcd26b 100644 --- a/ydb/library/yql/core/yql_statistics.cpp +++ b/ydb/library/yql/core/yql_statistics.cpp @@ -3,13 +3,7 @@ using namespace NYql; std::ostream& NYql::operator<<(std::ostream& os, const TOptimizerStatistics& s) { - os << "Nrows: " << s.Nrows << ", Ncols: " << s.Ncols; - os << ", Cost: "; - if (s.Cost.has_value()){ - os << s.Cost.value(); - } else { - os << "none"; - } + os << "Nrows: " << s.Nrows << ", Ncols: " << s.Ncols << ", Cost: " << s.Cost ; return os; } @@ -20,10 +14,6 @@ bool TOptimizerStatistics::Empty() const { TOptimizerStatistics& TOptimizerStatistics::operator+=(const TOptimizerStatistics& other) { Nrows += other.Nrows; Ncols += other.Ncols; - if (Cost.has_value() && other.Cost.has_value()) { - Cost = *Cost + *other.Cost; - } else if (other.Cost.has_value()) { - Cost = other.Cost; - } + Cost += other.Cost; return *this; } diff --git a/ydb/library/yql/core/yql_statistics.h b/ydb/library/yql/core/yql_statistics.h index a8fed920d6..7564a751a1 100644 --- a/ydb/library/yql/core/yql_statistics.h +++ b/ydb/library/yql/core/yql_statistics.h @@ -11,16 +11,16 @@ namespace NYql { * for the current operator in the plan. Currently, only Nrows and Ncols are * recorded. * Cost is also included in statistics, as its updated concurrently with statistics - * all of the time. Cost is optional, so it could be missing. + * all of the time. */ struct TOptimizerStatistics { double Nrows = 0; int Ncols = 0; - std::optional<double> Cost; + double Cost; TString Descr; - TOptimizerStatistics() : Cost(std::nullopt) {} - TOptimizerStatistics(double nrows,int ncols): Nrows(nrows), Ncols(ncols), Cost(std::nullopt) {} + TOptimizerStatistics() {} + TOptimizerStatistics(double nrows,int ncols): Nrows(nrows), Ncols(ncols) {} TOptimizerStatistics(double nrows,int ncols, double cost): Nrows(nrows), Ncols(ncols), Cost(cost) {} TOptimizerStatistics(double nrows,int ncols, double cost, TString descr): Nrows(nrows), Ncols(ncols), Cost(cost), Descr(descr) {} diff --git a/ydb/library/yql/core/yql_type_annotation.h b/ydb/library/yql/core/yql_type_annotation.h index b8efdc52a6..de931c1034 100644 --- a/ydb/library/yql/core/yql_type_annotation.h +++ b/ydb/library/yql/core/yql_type_annotation.h @@ -338,21 +338,7 @@ struct TTypeAnnotationContext: public TThrRefBase { void SetStats(const TExprNode* input, std::shared_ptr<TOptimizerStatistics> stats) { StatisticsMap[input] = stats; } - - /** - * Helper method to get cost from type annotation context - * Doesn't check if the cost is in the mapping - */ - std::optional<double> GetCost(const TExprNode* input) { - return StatisticsMap[input]->Cost; - } - - /** - * Helper method to set the cost in type annotation context - */ - void SetCost(const TExprNode* input, std::optional<double> cost) { - StatisticsMap[input]->Cost = cost; - } + }; template <> inline diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp index b9d74d1877..343d7704fb 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp @@ -665,7 +665,7 @@ template <int N> void TDPccpSolver<N>::EmitCsgCmp(const std::bitset<N>& S1, cons TEdge e2 = Graph.FindCrossingEdge(S2, S1); std::shared_ptr<TJoinOptimizerNode> newJoin = MakeJoin(DpTable[S2], DpTable[S1], e2.JoinConditions, GraceJoin); - if (newJoin->Stats->Cost.value() < DpTable[joined]->Stats->Cost.value()){ + if (newJoin->Stats->Cost < DpTable[joined]->Stats->Cost){ DpTable[joined] = newJoin; } } else { @@ -675,10 +675,10 @@ template <int N> void TDPccpSolver<N>::EmitCsgCmp(const std::bitset<N>& S1, cons TEdge e2 = Graph.FindCrossingEdge(S2, S1); std::shared_ptr<TJoinOptimizerNode> newJoin2 = MakeJoin(DpTable[S2], DpTable[S1], e2.JoinConditions, GraceJoin); - if (newJoin1->Stats->Cost.value() < DpTable[joined]->Stats->Cost.value()){ + if (newJoin1->Stats->Cost < DpTable[joined]->Stats->Cost){ DpTable[joined] = newJoin1; } - if (newJoin2->Stats->Cost.value() < DpTable[joined]->Stats->Cost.value()){ + if (newJoin2->Stats->Cost < DpTable[joined]->Stats->Cost){ DpTable[joined] = newJoin2; } } @@ -860,10 +860,6 @@ bool DqCollectJoinRelationsWithStats( return false; } - if (!maybeStat->second->Cost.has_value()) { - return false; - } - auto scope = input.Scope(); if (!scope.Maybe<TCoAtom>()){ return false; @@ -899,8 +895,7 @@ TExprBase DqOptimizeEquiJoinWithCosts(const TExprBase& node, TExprContext& ctx, auto equiJoin = node.Cast<TCoEquiJoin>(); YQL_ENSURE(equiJoin.ArgCount() >= 4); - if (typesCtx.StatisticsMap.contains(equiJoin.Raw()) && - typesCtx.StatisticsMap[equiJoin.Raw()]->Cost.has_value()) { + if (typesCtx.StatisticsMap.contains(equiJoin.Raw())) { return node; } @@ -999,7 +994,7 @@ public: TVector<int> scope; BuildOutput(&output, result.get(), scope); output.Rows = result->Stats->Nrows; - output.TotalCost = *result->Stats->Cost; + output.TotalCost = result->Stats->Cost; if (Log) { Log(output.ToString()); } diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based_generic.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based_generic.cpp index 27c05b4e1c..2f9c42d02a 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based_generic.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based_generic.cpp @@ -44,7 +44,7 @@ struct TState { VarIds.emplace_back(); int relId = Input.Rels.size(); Input.Rels.back().Rows = stat->Nrows; - Input.Rels.back().TotalCost = *stat->Cost; + Input.Rels.back().TotalCost = stat->Cost; Tables.emplace_back(label); Table2RelIds[label].emplace_back(relId); } @@ -171,8 +171,7 @@ TExprBase DqOptimizeEquiJoinWithCosts( YQL_ENSURE(equiJoin.ArgCount() >= 4); auto maybeStat = typesCtx.StatisticsMap.find(equiJoin.Raw()); - if (maybeStat != typesCtx.StatisticsMap.end() && - maybeStat->second->Cost.has_value()) { + if (maybeStat != typesCtx.StatisticsMap.end()) { return node; } diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.cpp b/ydb/library/yql/dq/opt/dq_opt_stat.cpp index 9e14bf142e..3e838da4b9 100644 --- a/ydb/library/yql/dq/opt/dq_opt_stat.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_stat.cpp @@ -67,7 +67,6 @@ void InferStatisticsForDqSource(const TExprNode::TPtr& input, TTypeAnnotationCon } typeCtx->SetStats(input.Get(), inputStats); - typeCtx->SetCost(input.Get(), typeCtx->GetCost(dqSource.Settings().Raw())); } /** @@ -95,11 +94,9 @@ void InferStatisticsForFlatMap(const TExprNode::TPtr& input, TTypeAnnotationCont double selectivity = ComputePredicateSelectivity(flatmap.Lambda().Body(), inputStats); - auto outputStats = TOptimizerStatistics(inputStats->Nrows * selectivity, inputStats->Ncols); + auto outputStats = TOptimizerStatistics(inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->Cost ); typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(outputStats) ); - typeCtx->SetCost(input.Get(), typeCtx->GetCost(flatmapInput.Raw())); - } else if (flatmap.Lambda().Body().Maybe<TCoMapJoinCore>() || flatmap.Lambda().Body().Maybe<TCoMap>().Input().Maybe<TCoMapJoinCore>() || @@ -138,11 +135,9 @@ void InferStatisticsForFilter(const TExprNode::TPtr& input, TTypeAnnotationConte double selectivity = ComputePredicateSelectivity(filterBody, inputStats); - auto outputStats = TOptimizerStatistics(inputStats->Nrows * selectivity, inputStats->Ncols); + auto outputStats = TOptimizerStatistics(inputStats->Nrows * selectivity, inputStats->Ncols, inputStats->Cost); typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(outputStats) ); - typeCtx->SetCost(input.Get(), typeCtx->GetCost(filterInput.Raw())); - } /** @@ -162,7 +157,6 @@ void InferStatisticsForSkipNullMembers(const TExprNode::TPtr& input, TTypeAnnota } typeCtx->SetStats( input.Get(), inputStats ); - typeCtx->SetCost( input.Get(), typeCtx->GetCost( skipNullMembersInput.Raw() ) ); } /** @@ -181,7 +175,6 @@ void InferStatisticsForExtractMembers(const TExprNode::TPtr& input, TTypeAnnotat } typeCtx->SetStats( input.Get(), inputStats ); - typeCtx->SetCost( input.Get(), typeCtx->GetCost( extractMembersInput.Raw() ) ); } /** @@ -200,7 +193,6 @@ void InferStatisticsForAggregateCombine(const TExprNode::TPtr& input, TTypeAnnot } typeCtx->SetStats( input.Get(), inputStats ); - typeCtx->SetCost( input.Get(), typeCtx->GetCost( aggInput.Raw() ) ); } /** @@ -219,7 +211,6 @@ void InferStatisticsForAggregateMergeFinalize(const TExprNode::TPtr& input, TTyp } typeCtx->SetStats( input.Get(), inputStats ); - typeCtx->SetCost( input.Get(), typeCtx->GetCost( aggInput.Raw() ) ); } /*** @@ -255,7 +246,6 @@ void PropagateStatisticsToLambdaArgument(const TExprNode::TPtr& input, TTypeAnno auto inputStats = typeCtx->GetStats(callableInput->Child(j) ); if (inputStats){ typeCtx->SetStats( lambda.Args().Arg(j).Raw(), inputStats ); - typeCtx->SetCost( lambda.Args().Arg(j).Raw(), typeCtx->GetCost( callableInput->Child(j) )); } } @@ -267,7 +257,6 @@ void PropagateStatisticsToLambdaArgument(const TExprNode::TPtr& input, TTypeAnno } typeCtx->SetStats( lambda.Args().Arg(0).Raw(), inputStats ); - typeCtx->SetCost( lambda.Args().Arg(0).Raw(), typeCtx->GetCost( callableInput.Get() )); } } } @@ -282,7 +271,6 @@ void InferStatisticsForStage(const TExprNode::TPtr& input, TTypeAnnotationContex auto lambdaStats = typeCtx->GetStats( stage.Program().Body().Raw()); if (lambdaStats){ typeCtx->SetStats( stage.Raw(), lambdaStats ); - typeCtx->SetCost( stage.Raw(), typeCtx->GetCost( stage.Program().Body().Raw())); } } |