diff options
author | aozeritsky <aozeritsky@ydb.tech> | 2023-08-18 17:06:56 +0300 |
---|---|---|
committer | aozeritsky <aozeritsky@ydb.tech> | 2023-08-18 18:58:48 +0300 |
commit | 89efb41db3e4ce655dac0b6b3da2fb32616fa98c (patch) | |
tree | 501eef4d77f52e5f8a49df7df8abba54a3b2433d | |
parent | 50188727eeaf28bc4155b5cef0bb54ff04b2fab8 (diff) | |
download | ydb-89efb41db3e4ce655dac0b6b3da2fb32616fa98c.tar.gz |
Move some code to dq library
-rw-r--r-- | ydb/core/kqp/opt/kqp_statistics_transformer.cpp | 56 | ||||
-rw-r--r-- | ydb/library/yql/dq/opt/CMakeLists.darwin-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/library/yql/dq/opt/CMakeLists.linux-aarch64.txt | 1 | ||||
-rw-r--r-- | ydb/library/yql/dq/opt/CMakeLists.linux-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/library/yql/dq/opt/CMakeLists.windows-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_stat.cpp | 60 | ||||
-rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_stat.h | 10 | ||||
-rw-r--r-- | ydb/library/yql/dq/opt/ya.make | 1 |
8 files changed, 78 insertions, 53 deletions
diff --git a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp index 03a8467efc..20d71b3c0e 100644 --- a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp +++ b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp @@ -1,5 +1,6 @@ #include "kqp_statistics_transformer.h" #include <ydb/library/yql/utils/log/log.h> +#include <ydb/library/yql/dq/opt/dq_opt_stat.h> using namespace NYql; @@ -7,57 +8,6 @@ using namespace NYql::NNodes; using namespace NKikimr::NKqp; /** - * For Flatmap we check the input and fetch the statistcs and cost from below - * Then we analyze the filter predicate and compute it's selectivity and apply it - * to the result. -*/ -void InferStatisticsForFlatMap(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) { - - auto inputNode = TExprBase(input); - auto flatmap = inputNode.Cast<TCoFlatMap>(); - if (!IsPredicateFlatMap(flatmap.Lambda().Body().Ref())) { - return; - } - - auto flatmapInput = flatmap.Input(); - auto inputStats = typeCtx->GetStats(flatmapInput.Raw()); - - if (! inputStats ) { - return; - } - - // Selectivity is the fraction of tuples that are selected by this predicate - // Currently we just set the number to 10% before we have statistics and parse - // the predicate - double selectivity = 0.1; - - auto outputStats = TOptimizerStatistics(inputStats->Nrows * selectivity, inputStats->Ncols); - - typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(outputStats) ); - typeCtx->SetCost(input.Get(), typeCtx->GetCost(flatmapInput.Raw())); -} - -/** - * Infer statistics and costs for SkipNullMembers - * We don't have a good idea at this time how many nulls will be discarded, so we just return the - * input statistics. -*/ -void InferStatisticsForSkipNullMembers(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) { - - auto inputNode = TExprBase(input); - auto skipNullMembers = inputNode.Cast<TCoSkipNullMembers>(); - auto skipNullMembersInput = skipNullMembers.Input(); - - auto inputStats = typeCtx->GetStats(skipNullMembersInput.Raw()); - if (!inputStats) { - return; - } - - typeCtx->SetStats( input.Get(), inputStats ); - typeCtx->SetCost( input.Get(), typeCtx->GetCost( skipNullMembersInput.Raw() ) ); -} - -/** * Compute statistics and cost for read table * Currently we just make up a number for the cardinality (100000) and set cost to 0 */ @@ -98,10 +48,10 @@ IGraphTransformer::TStatus TKqpStatisticsTransformer::DoTransform(TExprNode::TPt auto output = input; if (TCoFlatMap::Match(input.Get())){ - InferStatisticsForFlatMap(input, typeCtx); + NDq::InferStatisticsForFlatMap(input, typeCtx); } else if(TCoSkipNullMembers::Match(input.Get())){ - InferStatisticsForSkipNullMembers(input, typeCtx); + NDq::InferStatisticsForSkipNullMembers(input, typeCtx); } else if(TKqlReadTableBase::Match(input.Get()) || TKqlReadTableRangesBase::Match(input.Get())){ InferStatisticsForReadTable(input, typeCtx); diff --git a/ydb/library/yql/dq/opt/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/dq/opt/CMakeLists.darwin-x86_64.txt index 6ac7a6e01e..a5f828a08b 100644 --- a/ydb/library/yql/dq/opt/CMakeLists.darwin-x86_64.txt +++ b/ydb/library/yql/dq/opt/CMakeLists.darwin-x86_64.txt @@ -31,5 +31,6 @@ target_sources(yql-dq-opt PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_peephole.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_phy_finalizing.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_phy.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_stat.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp ) diff --git a/ydb/library/yql/dq/opt/CMakeLists.linux-aarch64.txt b/ydb/library/yql/dq/opt/CMakeLists.linux-aarch64.txt index 54ab33f05f..c6ab2ba6f0 100644 --- a/ydb/library/yql/dq/opt/CMakeLists.linux-aarch64.txt +++ b/ydb/library/yql/dq/opt/CMakeLists.linux-aarch64.txt @@ -32,5 +32,6 @@ target_sources(yql-dq-opt PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_peephole.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_phy_finalizing.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_phy.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_stat.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp ) diff --git a/ydb/library/yql/dq/opt/CMakeLists.linux-x86_64.txt b/ydb/library/yql/dq/opt/CMakeLists.linux-x86_64.txt index 54ab33f05f..c6ab2ba6f0 100644 --- a/ydb/library/yql/dq/opt/CMakeLists.linux-x86_64.txt +++ b/ydb/library/yql/dq/opt/CMakeLists.linux-x86_64.txt @@ -32,5 +32,6 @@ target_sources(yql-dq-opt PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_peephole.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_phy_finalizing.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_phy.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_stat.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp ) diff --git a/ydb/library/yql/dq/opt/CMakeLists.windows-x86_64.txt b/ydb/library/yql/dq/opt/CMakeLists.windows-x86_64.txt index 6ac7a6e01e..a5f828a08b 100644 --- a/ydb/library/yql/dq/opt/CMakeLists.windows-x86_64.txt +++ b/ydb/library/yql/dq/opt/CMakeLists.windows-x86_64.txt @@ -31,5 +31,6 @@ target_sources(yql-dq-opt PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_peephole.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_phy_finalizing.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_phy.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_stat.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp ) diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.cpp b/ydb/library/yql/dq/opt/dq_opt_stat.cpp new file mode 100644 index 0000000000..fcd7b69256 --- /dev/null +++ b/ydb/library/yql/dq/opt/dq_opt_stat.cpp @@ -0,0 +1,60 @@ +#include "dq_opt_stat.h" + +#include <ydb/library/yql/core/yql_opt_utils.h> + +namespace NYql::NDq { + +using namespace NNodes; + +/** + * For Flatmap we check the input and fetch the statistcs and cost from below + * Then we analyze the filter predicate and compute it's selectivity and apply it + * to the result. + */ +void InferStatisticsForFlatMap(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) { + + auto inputNode = TExprBase(input); + auto flatmap = inputNode.Cast<TCoFlatMap>(); + if (!IsPredicateFlatMap(flatmap.Lambda().Body().Ref())) { + return; + } + + auto flatmapInput = flatmap.Input(); + auto inputStats = typeCtx->GetStats(flatmapInput.Raw()); + + if (! inputStats ) { + return; + } + + // Selectivity is the fraction of tuples that are selected by this predicate + // Currently we just set the number to 10% before we have statistics and parse + // the predicate + double selectivity = 0.1; + + auto outputStats = TOptimizerStatistics(inputStats->Nrows * selectivity, inputStats->Ncols); + + typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(outputStats) ); + typeCtx->SetCost(input.Get(), typeCtx->GetCost(flatmapInput.Raw())); +} + +/** + * Infer statistics and costs for SkipNullMembers + * We don't have a good idea at this time how many nulls will be discarded, so we just return the + * input statistics. + */ +void InferStatisticsForSkipNullMembers(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) { + + auto inputNode = TExprBase(input); + auto skipNullMembers = inputNode.Cast<TCoSkipNullMembers>(); + auto skipNullMembersInput = skipNullMembers.Input(); + + auto inputStats = typeCtx->GetStats(skipNullMembersInput.Raw()); + if (!inputStats) { + return; + } + + typeCtx->SetStats( input.Get(), inputStats ); + typeCtx->SetCost( input.Get(), typeCtx->GetCost( skipNullMembersInput.Raw() ) ); +} + +} // namespace NYql::NDq { diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.h b/ydb/library/yql/dq/opt/dq_opt_stat.h new file mode 100644 index 0000000000..c4ab54aff5 --- /dev/null +++ b/ydb/library/yql/dq/opt/dq_opt_stat.h @@ -0,0 +1,10 @@ +#include "dq_opt.h" + +#include <ydb/library/yql/core/yql_type_annotation.h> + +namespace NYql::NDq { + +void InferStatisticsForFlatMap(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx); +void InferStatisticsForSkipNullMembers(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx); + +} // namespace NYql::NDq {
\ No newline at end of file diff --git a/ydb/library/yql/dq/opt/ya.make b/ydb/library/yql/dq/opt/ya.make index 52bd6534b6..c51d3a675f 100644 --- a/ydb/library/yql/dq/opt/ya.make +++ b/ydb/library/yql/dq/opt/ya.make @@ -19,6 +19,7 @@ SRCS( dq_opt_peephole.cpp dq_opt_phy_finalizing.cpp dq_opt_phy.cpp + dq_opt_stat.cpp dq_opt_join_cost_based.cpp ) |