diff options
author | Pavel Velikhov <pavelvelikhov@ydb.tech> | 2024-11-26 16:59:51 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-26 16:59:51 +0300 |
commit | 806d77103e5cb80c90102ffe61063f365e06e1a4 (patch) | |
tree | c65b274e4b45383a62e1048fdfcc546a81c46cf1 | |
parent | c5837d4aace6e4255bdd9dcc41f40a9aa93dba66 (diff) | |
download | ydb-806d77103e5cb80c90102ffe61063f365e06e1a4.tar.gz |
Added statistics inference for DqJoins (#11994)
7 files changed, 251 insertions, 184 deletions
diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json index cbc1461ddb..9f04a9dfad 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s.json @@ -1,5 +1,5 @@ { - "op_name":"InnerJoin (Grace)", + "op_name":"InnerJoin (MapJoin)", "args": [ { @@ -11,22 +11,12 @@ "args": [ { - "op_name":"InnerJoin (Grace)", - "args": - [ - { - "op_name":"TableFullScan", - "table":"test\/ds\/catalog_sales" - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/catalog_returns" - } - ] + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_sales" }, { "op_name":"TableFullScan", - "table":"test\/ds\/store_returns" + "table":"test\/ds\/catalog_returns" } ] }, @@ -55,64 +45,82 @@ "args": [ { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_address" + }, + { "op_name":"InnerJoin (Grace)", "args": [ { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_demographics" + }, + { "op_name":"InnerJoin (MapJoin)", "args": [ { - "op_name":"InnerJoin (Grace)", + "op_name":"InnerJoin (MapJoin)", "args": [ { - "op_name":"InnerJoin (MapJoin)", + "op_name":"InnerJoin (Grace)", "args": [ { + "op_name":"TableFullScan", + "table":"test\/ds\/customer" + }, + { "op_name":"InnerJoin (Grace)", "args": [ { "op_name":"TableFullScan", - "table":"test\/ds\/customer" + "table":"test\/ds\/customer_demographics" }, { "op_name":"InnerJoin (Grace)", "args": [ { - "op_name":"InnerJoin (MapJoin)", + "op_name":"TableFullScan", + "table":"test\/ds\/customer_address" + }, + { + "op_name":"InnerJoin (Grace)", "args": [ { - "op_name":"TableLookup", - "table":"test\/ds\/store_sales" + "op_name":"TableFullScan", + "table":"test\/ds\/store_returns" }, { - "op_name":"TableFullScan", - "table":"test\/ds\/item" + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableLookup", + "table":"test\/ds\/store_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/item" + } + ] } ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_address" } ] } ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/date_dim" } ] }, { "op_name":"TableFullScan", - "table":"test\/ds\/customer_demographics" + "table":"test\/ds\/date_dim" } ] }, @@ -121,16 +129,8 @@ "table":"test\/ds\/store" } ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_demographics" } ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_address" } ] }, @@ -195,22 +195,12 @@ "args": [ { - "op_name":"InnerJoin (Grace)", - "args": - [ - { - "op_name":"TableFullScan", - "table":"test\/ds\/catalog_sales" - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/catalog_returns" - } - ] + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_sales" }, { "op_name":"TableFullScan", - "table":"test\/ds\/store_returns" + "table":"test\/ds\/catalog_returns" } ] }, @@ -239,64 +229,82 @@ "args": [ { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_address" + }, + { "op_name":"InnerJoin (Grace)", "args": [ { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_demographics" + }, + { "op_name":"InnerJoin (MapJoin)", "args": [ { - "op_name":"InnerJoin (Grace)", + "op_name":"InnerJoin (MapJoin)", "args": [ { - "op_name":"InnerJoin (MapJoin)", + "op_name":"InnerJoin (Grace)", "args": [ { + "op_name":"TableFullScan", + "table":"test\/ds\/customer" + }, + { "op_name":"InnerJoin (Grace)", "args": [ { "op_name":"TableFullScan", - "table":"test\/ds\/customer" + "table":"test\/ds\/customer_demographics" }, { "op_name":"InnerJoin (Grace)", "args": [ { - "op_name":"InnerJoin (MapJoin)", + "op_name":"TableFullScan", + "table":"test\/ds\/customer_address" + }, + { + "op_name":"InnerJoin (Grace)", "args": [ { - "op_name":"TableLookup", - "table":"test\/ds\/store_sales" + "op_name":"TableFullScan", + "table":"test\/ds\/store_returns" }, { - "op_name":"TableFullScan", - "table":"test\/ds\/item" + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableLookup", + "table":"test\/ds\/store_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/item" + } + ] } ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_address" } ] } ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/date_dim" } ] }, { "op_name":"TableFullScan", - "table":"test\/ds\/customer_demographics" + "table":"test\/ds\/date_dim" } ] }, @@ -305,16 +313,8 @@ "table":"test\/ds\/store" } ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_demographics" } ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_address" } ] }, diff --git a/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s_column_store.json index ad5442c66b..ea9564d2db 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s_column_store.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpcds64_1000s_column_store.json @@ -11,22 +11,12 @@ "args": [ { - "op_name":"InnerJoin (Grace)", - "args": - [ - { - "op_name":"TableFullScan", - "table":"test\/ds\/catalog_sales" - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/catalog_returns" - } - ] + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_sales" }, { "op_name":"TableFullScan", - "table":"test\/ds\/store_returns" + "table":"test\/ds\/catalog_returns" } ] }, @@ -55,10 +45,18 @@ "args": [ { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_address" + }, + { "op_name":"InnerJoin (Grace)", "args": [ { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_demographics" + }, + { "op_name":"InnerJoin (MapJoin)", "args": [ @@ -79,32 +77,42 @@ "args": [ { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_demographics" + }, + { "op_name":"InnerJoin (Grace)", "args": [ { - "op_name":"InnerJoin (MapJoin)", + "op_name":"TableFullScan", + "table":"test\/ds\/customer_address" + }, + { + "op_name":"InnerJoin (Grace)", "args": [ { "op_name":"TableFullScan", - "table":"test\/ds\/store_sales" + "table":"test\/ds\/store_returns" }, { - "op_name":"TableFullScan", - "table":"test\/ds\/item" + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/store_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/item" + } + ] } ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_address" } ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_demographics" } ] } @@ -121,16 +129,8 @@ "table":"test\/ds\/store" } ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_demographics" } ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_address" } ] }, @@ -195,22 +195,12 @@ "args": [ { - "op_name":"InnerJoin (Grace)", - "args": - [ - { - "op_name":"TableFullScan", - "table":"test\/ds\/catalog_sales" - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/catalog_returns" - } - ] + "op_name":"TableFullScan", + "table":"test\/ds\/catalog_sales" }, { "op_name":"TableFullScan", - "table":"test\/ds\/store_returns" + "table":"test\/ds\/catalog_returns" } ] }, @@ -239,10 +229,18 @@ "args": [ { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_address" + }, + { "op_name":"InnerJoin (Grace)", "args": [ { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_demographics" + }, + { "op_name":"InnerJoin (MapJoin)", "args": [ @@ -263,32 +261,42 @@ "args": [ { + "op_name":"TableFullScan", + "table":"test\/ds\/customer_demographics" + }, + { "op_name":"InnerJoin (Grace)", "args": [ { - "op_name":"InnerJoin (MapJoin)", + "op_name":"TableFullScan", + "table":"test\/ds\/customer_address" + }, + { + "op_name":"InnerJoin (Grace)", "args": [ { "op_name":"TableFullScan", - "table":"test\/ds\/store_sales" + "table":"test\/ds\/store_returns" }, { - "op_name":"TableFullScan", - "table":"test\/ds\/item" + "op_name":"InnerJoin (MapJoin)", + "args": + [ + { + "op_name":"TableFullScan", + "table":"test\/ds\/store_sales" + }, + { + "op_name":"TableFullScan", + "table":"test\/ds\/item" + } + ] } ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_address" } ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_demographics" } ] } @@ -305,16 +313,8 @@ "table":"test\/ds\/store" } ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_demographics" } ] - }, - { - "op_name":"TableFullScan", - "table":"test\/ds\/customer_address" } ] }, diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json index e122be691e..4b2772b1a1 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s.json @@ -7,39 +7,39 @@ "args": [ { - "op_name":"InnerJoin (Grace)", + "op_name":"TableFullScan", + "table":"partsupp" + }, + { + "op_name":"InnerJoin (MapJoin)", "args": [ { "op_name":"TableFullScan", - "table":"partsupp" + "table":"supplier" }, { "op_name":"InnerJoin (MapJoin)", "args": [ { - "op_name":"TableFullScan", - "table":"supplier" + "op_name":"TableLookup", + "table":"region" }, { - "op_name":"InnerJoin (MapJoin)", - "args": - [ - { - "op_name":"TableLookup", - "table":"region" - }, - { - "op_name":"TableFullScan", - "table":"nation" - } - ] + "op_name":"TableFullScan", + "table":"nation" } ] } ] - }, + } + ] + }, + { + "op_name":"InnerJoin (Grace)", + "args": + [ { "op_name":"InnerJoin (Grace)", "args": @@ -73,12 +73,12 @@ ] } ] + }, + { + "op_name":"TableFullScan", + "table":"part" } ] - }, - { - "op_name":"TableFullScan", - "table":"part" } ] } diff --git a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json index 1a0edf0bd3..e5adefe907 100644 --- a/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json +++ b/ydb/core/kqp/ut/join/data/join_order/tpch2_1000s_column_store.json @@ -7,12 +7,16 @@ "args": [ { - "op_name":"InnerJoin (Grace)", + "op_name":"TableFullScan", + "table":"partsupp" + }, + { + "op_name":"InnerJoin (MapJoin)", "args": [ { "op_name":"TableFullScan", - "table":"partsupp" + "table":"supplier" }, { "op_name":"InnerJoin (MapJoin)", @@ -20,26 +24,22 @@ [ { "op_name":"TableFullScan", - "table":"supplier" + "table":"nation" }, { - "op_name":"InnerJoin (MapJoin)", - "args": - [ - { - "op_name":"TableFullScan", - "table":"nation" - }, - { - "op_name":"TableFullScan", - "table":"region" - } - ] + "op_name":"TableFullScan", + "table":"region" } ] } ] - }, + } + ] + }, + { + "op_name":"InnerJoin (Grace)", + "args": + [ { "op_name":"InnerJoin (Grace)", "args": @@ -73,12 +73,12 @@ ] } ] + }, + { + "op_name":"TableFullScan", + "table":"part" } ] - }, - { - "op_name":"TableFullScan", - "table":"part" } ] } diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.cpp b/ydb/library/yql/dq/opt/dq_opt_stat.cpp index 124c5c6969..70647840c5 100644 --- a/ydb/library/yql/dq/opt/dq_opt_stat.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_stat.cpp @@ -370,6 +370,69 @@ void InferStatisticsForGraceJoin(const TExprNode::TPtr& input, TTypeAnnotationCo } /** + * Infer statistics for DqJoin + * DqJoin is an intermediary join representantation in Dq + */ +void InferStatisticsForDqJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx, const IProviderContext& ctx, TCardinalityHints hints) { + auto inputNode = TExprBase(input); + auto join = inputNode.Cast<TDqJoin>(); + + auto leftArg = join.LeftInput(); + auto rightArg = join.RightInput(); + + auto leftStats = typeCtx->GetStats(leftArg.Raw()); + auto rightStats = typeCtx->GetStats(rightArg.Raw()); + + if (!leftStats || !rightStats) { + return; + } + + auto joinAlgo = FromString<EJoinAlgoType>(join.JoinAlgo().StringValue()); + if (joinAlgo == EJoinAlgoType::Undefined) { + return; + } + + auto leftLabels = InferLabels(leftStats, join.LeftJoinKeyNames()); + auto rightLabels = InferLabels(rightStats, join.RightJoinKeyNames()); + + leftStats = ApplyCardinalityHints(leftStats, leftLabels, hints); + rightStats = ApplyCardinalityHints(rightStats, rightLabels, hints); + + TVector<TJoinColumn> leftJoinKeys; + TVector<TJoinColumn> rightJoinKeys; + + for (size_t i=0; i<join.LeftJoinKeyNames().Size(); i++) { + auto alias = ExtractAlias(join.LeftJoinKeyNames().Item(i).StringValue()); + auto attrName = RemoveAliases(join.LeftJoinKeyNames().Item(i).StringValue()); + leftJoinKeys.push_back(TJoinColumn(alias, attrName)); + } + for (size_t i=0; i<join.RightJoinKeyNames().Size(); i++) { + auto alias = ExtractAlias(join.RightJoinKeyNames().Item(i).StringValue()); + auto attrName = RemoveAliases(join.RightJoinKeyNames().Item(i).StringValue()); + rightJoinKeys.push_back(TJoinColumn(alias, attrName)); + } + + auto unionOfLabels = UnionLabels(leftLabels, rightLabels); + + auto resStats = std::make_shared<TOptimizerStatistics>( + ctx.ComputeJoinStats( + *leftStats, + *rightStats, + leftJoinKeys, + rightJoinKeys, + joinAlgo, + ConvertToJoinKind(join.JoinType().StringValue()), + FindCardHint(unionOfLabels, hints) + ) + ); + + resStats->Labels = std::make_shared<TVector<TString>>(); + resStats->Labels->insert(resStats->Labels->begin(), unionOfLabels.begin(), unionOfLabels.end()); + typeCtx->SetStats(join.Raw(), resStats); + YQL_CLOG(TRACE, CoreDq) << "Infer statistics for DqJoin: " << resStats->ToString(); +} + +/** * Infer statistics for DqSource * * We just pass up the statistics from the Settings of the DqSource diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.h b/ydb/library/yql/dq/opt/dq_opt_stat.h index a19f3f5b84..0959bcbef4 100644 --- a/ydb/library/yql/dq/opt/dq_opt_stat.h +++ b/ydb/library/yql/dq/opt/dq_opt_stat.h @@ -19,6 +19,7 @@ void InferStatisticsForStage(const TExprNode::TPtr& input, TTypeAnnotationContex void InferStatisticsForDqSource(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx); void InferStatisticsForGraceJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx, const IProviderContext& ctx, TCardinalityHints hints = {}); void InferStatisticsForMapJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx, const IProviderContext& ctx, TCardinalityHints hints = {}); +void InferStatisticsForDqJoin(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx, const IProviderContext& ctx, TCardinalityHints hints = {}); void InferStatisticsForAsList(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx); bool InferStatisticsForListParam(const TExprNode::TPtr& input, TTypeAnnotationContext* typeCtx); diff --git a/ydb/library/yql/dq/opt/dq_opt_stat_transformer_base.cpp b/ydb/library/yql/dq/opt/dq_opt_stat_transformer_base.cpp index 7601bcabe1..50f0b2d4a2 100644 --- a/ydb/library/yql/dq/opt/dq_opt_stat_transformer_base.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_stat_transformer_base.cpp @@ -65,6 +65,9 @@ bool TDqStatisticsTransformerBase::BeforeLambdas(const TExprNode::TPtr& input, T else if(TCoGraceJoinCore::Match(input.Get())) { InferStatisticsForGraceJoin(input, TypeCtx, Pctx, CardinalityHints); } + else if (TDqJoin::Match(input.Get())) { + InferStatisticsForDqJoin(input, TypeCtx, Pctx, CardinalityHints); + } // Do nothing in case of EquiJoin, otherwise the EquiJoin rule won't fire else if(TCoEquiJoin::Match(input.Get())){ |