diff options
author | aozeritsky <aozeritsky@ydb.tech> | 2023-09-29 15:33:54 +0300 |
---|---|---|
committer | aozeritsky <aozeritsky@ydb.tech> | 2023-09-29 15:56:34 +0300 |
commit | d4dc18a29171fae83be9ebed56b88e13c5ed51a3 (patch) | |
tree | d0fd4298d8e56b6d15ba495363dac08c4f77c4c6 | |
parent | c221c8b12c8869a45d9d1d87bdc219cdb86f89ae (diff) | |
download | ydb-d4dc18a29171fae83be9ebed56b88e13c5ed51a3.tar.gz |
Move common functions to dq library
-rw-r--r-- | ydb/core/kqp/opt/kqp_statistics_transformer.cpp | 61 | ||||
-rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_stat.cpp | 62 | ||||
-rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_stat.h | 5 |
3 files changed, 66 insertions, 62 deletions
diff --git a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp index 3b55caf4355..a10a9328c5f 100644 --- a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp +++ b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp @@ -133,50 +133,6 @@ void InferStatisticsForIndexLookup(const TExprNode::TPtr& input, TTypeAnnotation typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(outputStats)); } -/** - * Compute statistics for map join - * FIX: Currently we treat all join the same from the cost perspective, need to refine cost function - */ -void InferStatisticsForMapJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) { - auto inputNode = TExprBase(input); - auto join = inputNode.Cast<TCoMapJoinCore>(); - - auto leftArg = join.LeftInput(); - auto rightArg = join.RightDict(); - - auto leftStats = typeCtx->GetStats(leftArg.Raw()); - auto rightStats = typeCtx->GetStats(rightArg.Raw()); - - if (!leftStats || !rightStats) { - return; - } - - typeCtx->SetStats(join.Raw(), std::make_shared<TOptimizerStatistics>( - ComputeJoinStats(*leftStats, *rightStats, MapJoin))); -} - -/** - * Compute statistics for grace join - * FIX: Currently we treat all join the same from the cost perspective, need to refine cost function - */ -void InferStatisticsForGraceJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) { - auto inputNode = TExprBase(input); - auto join = inputNode.Cast<TCoGraceJoinCore>(); - - auto leftArg = join.LeftInput(); - auto rightArg = join.RightInput(); - - auto leftStats = typeCtx->GetStats(leftArg.Raw()); - auto rightStats = typeCtx->GetStats(rightArg.Raw()); - - if (!leftStats || !rightStats) { - return; - } - - typeCtx->SetStats(join.Raw(), std::make_shared<TOptimizerStatistics>( - ComputeJoinStats(*leftStats, *rightStats, GraceJoin))); -} - /*** * Infer statistics for result binding of a stage */ @@ -206,23 +162,6 @@ void InferStatisticsForResultBinding(const TExprNode::TPtr& input, TTypeAnnotati } /** - * Infer statistics for DqSource - * - * We just pass up the statistics from the Settings of the DqSource - */ -void InferStatisticsForDqSource(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) { - auto inputNode = TExprBase(input); - auto dqSource = inputNode.Cast<TDqSource>(); - auto inputStats = typeCtx->GetStats(dqSource.Settings().Raw()); - if (!inputStats) { - return; - } - - typeCtx->SetStats(input.Get(), inputStats); - typeCtx->SetCost(input.Get(), typeCtx->GetCost(dqSource.Settings().Raw())); -} - -/** * When encountering a KqpPhysicalTx, we save the results of the stage in a vector * where it can later be accessed via binding parameters */ diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.cpp b/ydb/library/yql/dq/opt/dq_opt_stat.cpp index 67c921d26cd..18fdc39c7bc 100644 --- a/ydb/library/yql/dq/opt/dq_opt_stat.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_stat.cpp @@ -1,6 +1,7 @@ #include "dq_opt_stat.h" #include <ydb/library/yql/core/yql_opt_utils.h> +#include <ydb/library/yql/core/yql_cost_function.h> #include <ydb/library/yql/utils/log/log.h> @@ -9,6 +10,67 @@ namespace NYql::NDq { using namespace NNodes; /** + * Compute statistics for map join + * FIX: Currently we treat all join the same from the cost perspective, need to refine cost function + */ +void InferStatisticsForMapJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) { + auto inputNode = TExprBase(input); + auto join = inputNode.Cast<TCoMapJoinCore>(); + + auto leftArg = join.LeftInput(); + auto rightArg = join.RightDict(); + + auto leftStats = typeCtx->GetStats(leftArg.Raw()); + auto rightStats = typeCtx->GetStats(rightArg.Raw()); + + if (!leftStats || !rightStats) { + return; + } + + typeCtx->SetStats(join.Raw(), std::make_shared<TOptimizerStatistics>( + ComputeJoinStats(*leftStats, *rightStats, MapJoin))); +} + +/** + * Compute statistics for grace join + * FIX: Currently we treat all join the same from the cost perspective, need to refine cost function + */ +void InferStatisticsForGraceJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) { + auto inputNode = TExprBase(input); + auto join = inputNode.Cast<TCoGraceJoinCore>(); + + auto leftArg = join.LeftInput(); + auto rightArg = join.RightInput(); + + auto leftStats = typeCtx->GetStats(leftArg.Raw()); + auto rightStats = typeCtx->GetStats(rightArg.Raw()); + + if (!leftStats || !rightStats) { + return; + } + + typeCtx->SetStats(join.Raw(), std::make_shared<TOptimizerStatistics>( + ComputeJoinStats(*leftStats, *rightStats, GraceJoin))); +} + +/** + * Infer statistics for DqSource + * + * We just pass up the statistics from the Settings of the DqSource + */ +void InferStatisticsForDqSource(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) { + auto inputNode = TExprBase(input); + auto dqSource = inputNode.Cast<TDqSource>(); + auto inputStats = typeCtx->GetStats(dqSource.Settings().Raw()); + if (!inputStats) { + return; + } + + typeCtx->SetStats(input.Get(), inputStats); + typeCtx->SetCost(input.Get(), typeCtx->GetCost(dqSource.Settings().Raw())); +} + +/** * For Flatmap we check the input and fetch the statistcs and cost from below * Then we analyze the filter predicate and compute it's selectivity and apply it * to the result. diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.h b/ydb/library/yql/dq/opt/dq_opt_stat.h index 4baa3f272e9..078fa907d7c 100644 --- a/ydb/library/yql/dq/opt/dq_opt_stat.h +++ b/ydb/library/yql/dq/opt/dq_opt_stat.h @@ -13,5 +13,8 @@ void InferStatisticsForAggregateMergeFinalize(const TExprNode::TPtr& input, TTyp void PropagateStatisticsToLambdaArgument(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx); void PropagateStatisticsToStageArguments(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx); void InferStatisticsForStage(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx); +void InferStatisticsForDqSource(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx); +void InferStatisticsForGraceJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx); +void InferStatisticsForMapJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx); -} // namespace NYql::NDq {
\ No newline at end of file +} // namespace NYql::NDq { |