aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoraozeritsky <aozeritsky@ydb.tech>2023-08-18 17:06:56 +0300
committeraozeritsky <aozeritsky@ydb.tech>2023-08-18 18:58:48 +0300
commit89efb41db3e4ce655dac0b6b3da2fb32616fa98c (patch)
tree501eef4d77f52e5f8a49df7df8abba54a3b2433d
parent50188727eeaf28bc4155b5cef0bb54ff04b2fab8 (diff)
downloadydb-89efb41db3e4ce655dac0b6b3da2fb32616fa98c.tar.gz
Move some code to dq library
-rw-r--r--ydb/core/kqp/opt/kqp_statistics_transformer.cpp56
-rw-r--r--ydb/library/yql/dq/opt/CMakeLists.darwin-x86_64.txt1
-rw-r--r--ydb/library/yql/dq/opt/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/library/yql/dq/opt/CMakeLists.linux-x86_64.txt1
-rw-r--r--ydb/library/yql/dq/opt/CMakeLists.windows-x86_64.txt1
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_stat.cpp60
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_stat.h10
-rw-r--r--ydb/library/yql/dq/opt/ya.make1
8 files changed, 78 insertions, 53 deletions
diff --git a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp
index 03a8467efc..20d71b3c0e 100644
--- a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp
+++ b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp
@@ -1,5 +1,6 @@
#include "kqp_statistics_transformer.h"
#include <ydb/library/yql/utils/log/log.h>
+#include <ydb/library/yql/dq/opt/dq_opt_stat.h>
using namespace NYql;
@@ -7,57 +8,6 @@ using namespace NYql::NNodes;
using namespace NKikimr::NKqp;
/**
- * For Flatmap we check the input and fetch the statistcs and cost from below
- * Then we analyze the filter predicate and compute it's selectivity and apply it
- * to the result.
-*/
-void InferStatisticsForFlatMap(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) {
-
- auto inputNode = TExprBase(input);
- auto flatmap = inputNode.Cast<TCoFlatMap>();
- if (!IsPredicateFlatMap(flatmap.Lambda().Body().Ref())) {
- return;
- }
-
- auto flatmapInput = flatmap.Input();
- auto inputStats = typeCtx->GetStats(flatmapInput.Raw());
-
- if (! inputStats ) {
- return;
- }
-
- // Selectivity is the fraction of tuples that are selected by this predicate
- // Currently we just set the number to 10% before we have statistics and parse
- // the predicate
- double selectivity = 0.1;
-
- auto outputStats = TOptimizerStatistics(inputStats->Nrows * selectivity, inputStats->Ncols);
-
- typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(outputStats) );
- typeCtx->SetCost(input.Get(), typeCtx->GetCost(flatmapInput.Raw()));
-}
-
-/**
- * Infer statistics and costs for SkipNullMembers
- * We don't have a good idea at this time how many nulls will be discarded, so we just return the
- * input statistics.
-*/
-void InferStatisticsForSkipNullMembers(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) {
-
- auto inputNode = TExprBase(input);
- auto skipNullMembers = inputNode.Cast<TCoSkipNullMembers>();
- auto skipNullMembersInput = skipNullMembers.Input();
-
- auto inputStats = typeCtx->GetStats(skipNullMembersInput.Raw());
- if (!inputStats) {
- return;
- }
-
- typeCtx->SetStats( input.Get(), inputStats );
- typeCtx->SetCost( input.Get(), typeCtx->GetCost( skipNullMembersInput.Raw() ) );
-}
-
-/**
* Compute statistics and cost for read table
* Currently we just make up a number for the cardinality (100000) and set cost to 0
*/
@@ -98,10 +48,10 @@ IGraphTransformer::TStatus TKqpStatisticsTransformer::DoTransform(TExprNode::TPt
auto output = input;
if (TCoFlatMap::Match(input.Get())){
- InferStatisticsForFlatMap(input, typeCtx);
+ NDq::InferStatisticsForFlatMap(input, typeCtx);
}
else if(TCoSkipNullMembers::Match(input.Get())){
- InferStatisticsForSkipNullMembers(input, typeCtx);
+ NDq::InferStatisticsForSkipNullMembers(input, typeCtx);
}
else if(TKqlReadTableBase::Match(input.Get()) || TKqlReadTableRangesBase::Match(input.Get())){
InferStatisticsForReadTable(input, typeCtx);
diff --git a/ydb/library/yql/dq/opt/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/dq/opt/CMakeLists.darwin-x86_64.txt
index 6ac7a6e01e..a5f828a08b 100644
--- a/ydb/library/yql/dq/opt/CMakeLists.darwin-x86_64.txt
+++ b/ydb/library/yql/dq/opt/CMakeLists.darwin-x86_64.txt
@@ -31,5 +31,6 @@ target_sources(yql-dq-opt PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_peephole.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_phy_finalizing.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_phy.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_stat.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp
)
diff --git a/ydb/library/yql/dq/opt/CMakeLists.linux-aarch64.txt b/ydb/library/yql/dq/opt/CMakeLists.linux-aarch64.txt
index 54ab33f05f..c6ab2ba6f0 100644
--- a/ydb/library/yql/dq/opt/CMakeLists.linux-aarch64.txt
+++ b/ydb/library/yql/dq/opt/CMakeLists.linux-aarch64.txt
@@ -32,5 +32,6 @@ target_sources(yql-dq-opt PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_peephole.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_phy_finalizing.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_phy.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_stat.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp
)
diff --git a/ydb/library/yql/dq/opt/CMakeLists.linux-x86_64.txt b/ydb/library/yql/dq/opt/CMakeLists.linux-x86_64.txt
index 54ab33f05f..c6ab2ba6f0 100644
--- a/ydb/library/yql/dq/opt/CMakeLists.linux-x86_64.txt
+++ b/ydb/library/yql/dq/opt/CMakeLists.linux-x86_64.txt
@@ -32,5 +32,6 @@ target_sources(yql-dq-opt PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_peephole.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_phy_finalizing.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_phy.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_stat.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp
)
diff --git a/ydb/library/yql/dq/opt/CMakeLists.windows-x86_64.txt b/ydb/library/yql/dq/opt/CMakeLists.windows-x86_64.txt
index 6ac7a6e01e..a5f828a08b 100644
--- a/ydb/library/yql/dq/opt/CMakeLists.windows-x86_64.txt
+++ b/ydb/library/yql/dq/opt/CMakeLists.windows-x86_64.txt
@@ -31,5 +31,6 @@ target_sources(yql-dq-opt PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_peephole.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_phy_finalizing.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_phy.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_stat.cpp
${CMAKE_SOURCE_DIR}/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp
)
diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.cpp b/ydb/library/yql/dq/opt/dq_opt_stat.cpp
new file mode 100644
index 0000000000..fcd7b69256
--- /dev/null
+++ b/ydb/library/yql/dq/opt/dq_opt_stat.cpp
@@ -0,0 +1,60 @@
+#include "dq_opt_stat.h"
+
+#include <ydb/library/yql/core/yql_opt_utils.h>
+
+namespace NYql::NDq {
+
+using namespace NNodes;
+
+/**
+ * For Flatmap we check the input and fetch the statistcs and cost from below
+ * Then we analyze the filter predicate and compute it's selectivity and apply it
+ * to the result.
+ */
+void InferStatisticsForFlatMap(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) {
+
+ auto inputNode = TExprBase(input);
+ auto flatmap = inputNode.Cast<TCoFlatMap>();
+ if (!IsPredicateFlatMap(flatmap.Lambda().Body().Ref())) {
+ return;
+ }
+
+ auto flatmapInput = flatmap.Input();
+ auto inputStats = typeCtx->GetStats(flatmapInput.Raw());
+
+ if (! inputStats ) {
+ return;
+ }
+
+ // Selectivity is the fraction of tuples that are selected by this predicate
+ // Currently we just set the number to 10% before we have statistics and parse
+ // the predicate
+ double selectivity = 0.1;
+
+ auto outputStats = TOptimizerStatistics(inputStats->Nrows * selectivity, inputStats->Ncols);
+
+ typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(outputStats) );
+ typeCtx->SetCost(input.Get(), typeCtx->GetCost(flatmapInput.Raw()));
+}
+
+/**
+ * Infer statistics and costs for SkipNullMembers
+ * We don't have a good idea at this time how many nulls will be discarded, so we just return the
+ * input statistics.
+ */
+void InferStatisticsForSkipNullMembers(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) {
+
+ auto inputNode = TExprBase(input);
+ auto skipNullMembers = inputNode.Cast<TCoSkipNullMembers>();
+ auto skipNullMembersInput = skipNullMembers.Input();
+
+ auto inputStats = typeCtx->GetStats(skipNullMembersInput.Raw());
+ if (!inputStats) {
+ return;
+ }
+
+ typeCtx->SetStats( input.Get(), inputStats );
+ typeCtx->SetCost( input.Get(), typeCtx->GetCost( skipNullMembersInput.Raw() ) );
+}
+
+} // namespace NYql::NDq {
diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.h b/ydb/library/yql/dq/opt/dq_opt_stat.h
new file mode 100644
index 0000000000..c4ab54aff5
--- /dev/null
+++ b/ydb/library/yql/dq/opt/dq_opt_stat.h
@@ -0,0 +1,10 @@
+#include "dq_opt.h"
+
+#include <ydb/library/yql/core/yql_type_annotation.h>
+
+namespace NYql::NDq {
+
+void InferStatisticsForFlatMap(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx);
+void InferStatisticsForSkipNullMembers(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx);
+
+} // namespace NYql::NDq { \ No newline at end of file
diff --git a/ydb/library/yql/dq/opt/ya.make b/ydb/library/yql/dq/opt/ya.make
index 52bd6534b6..c51d3a675f 100644
--- a/ydb/library/yql/dq/opt/ya.make
+++ b/ydb/library/yql/dq/opt/ya.make
@@ -19,6 +19,7 @@ SRCS(
dq_opt_peephole.cpp
dq_opt_phy_finalizing.cpp
dq_opt_phy.cpp
+ dq_opt_stat.cpp
dq_opt_join_cost_based.cpp
)