aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoraozeritsky <aozeritsky@ydb.tech>2023-09-29 15:33:54 +0300
committeraozeritsky <aozeritsky@ydb.tech>2023-09-29 15:56:34 +0300
commitd4dc18a29171fae83be9ebed56b88e13c5ed51a3 (patch)
treed0fd4298d8e56b6d15ba495363dac08c4f77c4c6
parentc221c8b12c8869a45d9d1d87bdc219cdb86f89ae (diff)
downloadydb-d4dc18a29171fae83be9ebed56b88e13c5ed51a3.tar.gz
Move common functions to dq library
-rw-r--r--ydb/core/kqp/opt/kqp_statistics_transformer.cpp61
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_stat.cpp62
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_stat.h5
3 files changed, 66 insertions, 62 deletions
diff --git a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp
index 3b55caf4355..a10a9328c5f 100644
--- a/ydb/core/kqp/opt/kqp_statistics_transformer.cpp
+++ b/ydb/core/kqp/opt/kqp_statistics_transformer.cpp
@@ -133,50 +133,6 @@ void InferStatisticsForIndexLookup(const TExprNode::TPtr& input, TTypeAnnotation
typeCtx->SetStats(input.Get(), std::make_shared<TOptimizerStatistics>(outputStats));
}
-/**
- * Compute statistics for map join
- * FIX: Currently we treat all join the same from the cost perspective, need to refine cost function
- */
-void InferStatisticsForMapJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) {
- auto inputNode = TExprBase(input);
- auto join = inputNode.Cast<TCoMapJoinCore>();
-
- auto leftArg = join.LeftInput();
- auto rightArg = join.RightDict();
-
- auto leftStats = typeCtx->GetStats(leftArg.Raw());
- auto rightStats = typeCtx->GetStats(rightArg.Raw());
-
- if (!leftStats || !rightStats) {
- return;
- }
-
- typeCtx->SetStats(join.Raw(), std::make_shared<TOptimizerStatistics>(
- ComputeJoinStats(*leftStats, *rightStats, MapJoin)));
-}
-
-/**
- * Compute statistics for grace join
- * FIX: Currently we treat all join the same from the cost perspective, need to refine cost function
- */
-void InferStatisticsForGraceJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) {
- auto inputNode = TExprBase(input);
- auto join = inputNode.Cast<TCoGraceJoinCore>();
-
- auto leftArg = join.LeftInput();
- auto rightArg = join.RightInput();
-
- auto leftStats = typeCtx->GetStats(leftArg.Raw());
- auto rightStats = typeCtx->GetStats(rightArg.Raw());
-
- if (!leftStats || !rightStats) {
- return;
- }
-
- typeCtx->SetStats(join.Raw(), std::make_shared<TOptimizerStatistics>(
- ComputeJoinStats(*leftStats, *rightStats, GraceJoin)));
-}
-
/***
* Infer statistics for result binding of a stage
*/
@@ -206,23 +162,6 @@ void InferStatisticsForResultBinding(const TExprNode::TPtr& input, TTypeAnnotati
}
/**
- * Infer statistics for DqSource
- *
- * We just pass up the statistics from the Settings of the DqSource
- */
-void InferStatisticsForDqSource(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) {
- auto inputNode = TExprBase(input);
- auto dqSource = inputNode.Cast<TDqSource>();
- auto inputStats = typeCtx->GetStats(dqSource.Settings().Raw());
- if (!inputStats) {
- return;
- }
-
- typeCtx->SetStats(input.Get(), inputStats);
- typeCtx->SetCost(input.Get(), typeCtx->GetCost(dqSource.Settings().Raw()));
-}
-
-/**
* When encountering a KqpPhysicalTx, we save the results of the stage in a vector
* where it can later be accessed via binding parameters
*/
diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.cpp b/ydb/library/yql/dq/opt/dq_opt_stat.cpp
index 67c921d26cd..18fdc39c7bc 100644
--- a/ydb/library/yql/dq/opt/dq_opt_stat.cpp
+++ b/ydb/library/yql/dq/opt/dq_opt_stat.cpp
@@ -1,6 +1,7 @@
#include "dq_opt_stat.h"
#include <ydb/library/yql/core/yql_opt_utils.h>
+#include <ydb/library/yql/core/yql_cost_function.h>
#include <ydb/library/yql/utils/log/log.h>
@@ -9,6 +10,67 @@ namespace NYql::NDq {
using namespace NNodes;
/**
+ * Compute statistics for map join
+ * FIX: Currently we treat all join the same from the cost perspective, need to refine cost function
+ */
+void InferStatisticsForMapJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) {
+ auto inputNode = TExprBase(input);
+ auto join = inputNode.Cast<TCoMapJoinCore>();
+
+ auto leftArg = join.LeftInput();
+ auto rightArg = join.RightDict();
+
+ auto leftStats = typeCtx->GetStats(leftArg.Raw());
+ auto rightStats = typeCtx->GetStats(rightArg.Raw());
+
+ if (!leftStats || !rightStats) {
+ return;
+ }
+
+ typeCtx->SetStats(join.Raw(), std::make_shared<TOptimizerStatistics>(
+ ComputeJoinStats(*leftStats, *rightStats, MapJoin)));
+}
+
+/**
+ * Compute statistics for grace join
+ * FIX: Currently we treat all join the same from the cost perspective, need to refine cost function
+ */
+void InferStatisticsForGraceJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) {
+ auto inputNode = TExprBase(input);
+ auto join = inputNode.Cast<TCoGraceJoinCore>();
+
+ auto leftArg = join.LeftInput();
+ auto rightArg = join.RightInput();
+
+ auto leftStats = typeCtx->GetStats(leftArg.Raw());
+ auto rightStats = typeCtx->GetStats(rightArg.Raw());
+
+ if (!leftStats || !rightStats) {
+ return;
+ }
+
+ typeCtx->SetStats(join.Raw(), std::make_shared<TOptimizerStatistics>(
+ ComputeJoinStats(*leftStats, *rightStats, GraceJoin)));
+}
+
+/**
+ * Infer statistics for DqSource
+ *
+ * We just pass up the statistics from the Settings of the DqSource
+ */
+void InferStatisticsForDqSource(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx) {
+ auto inputNode = TExprBase(input);
+ auto dqSource = inputNode.Cast<TDqSource>();
+ auto inputStats = typeCtx->GetStats(dqSource.Settings().Raw());
+ if (!inputStats) {
+ return;
+ }
+
+ typeCtx->SetStats(input.Get(), inputStats);
+ typeCtx->SetCost(input.Get(), typeCtx->GetCost(dqSource.Settings().Raw()));
+}
+
+/**
* For Flatmap we check the input and fetch the statistcs and cost from below
* Then we analyze the filter predicate and compute it's selectivity and apply it
* to the result.
diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.h b/ydb/library/yql/dq/opt/dq_opt_stat.h
index 4baa3f272e9..078fa907d7c 100644
--- a/ydb/library/yql/dq/opt/dq_opt_stat.h
+++ b/ydb/library/yql/dq/opt/dq_opt_stat.h
@@ -13,5 +13,8 @@ void InferStatisticsForAggregateMergeFinalize(const TExprNode::TPtr& input, TTyp
void PropagateStatisticsToLambdaArgument(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx);
void PropagateStatisticsToStageArguments(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx);
void InferStatisticsForStage(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx);
+void InferStatisticsForDqSource(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx);
+void InferStatisticsForGraceJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx);
+void InferStatisticsForMapJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx);
-} // namespace NYql::NDq { \ No newline at end of file
+} // namespace NYql::NDq {