diff options
author | aozeritsky <aozeritsky@ydb.tech> | 2023-09-20 23:28:41 +0300 |
---|---|---|
committer | aozeritsky <aozeritsky@ydb.tech> | 2023-09-20 23:48:31 +0300 |
commit | b5afdcb64e1d05c6b8289c2e33b0c384c8acd7c7 (patch) | |
tree | 532be59266a882ca63028e81c0b3428b4f4fb839 | |
parent | 599ae03d021d62d3b7b12a8483071cac51b5366b (diff) | |
download | ydb-b5afdcb64e1d05c6b8289c2e33b0c384c8acd7c7.tar.gz |
Can use standalone native CBO
Can use standalone native CBO
-rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp | 101 | ||||
-rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_log.h | 3 |
2 files changed, 104 insertions, 0 deletions
diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp index 867e6fd977d..330271e3e8c 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp @@ -9,6 +9,8 @@ #include <ydb/library/yql/core/yql_type_helpers.h> #include <ydb/library/yql/core/yql_statistics.h> +#include <ydb/library/yql/core/cbo/cbo_optimizer.h> //interface + #include <library/cpp/disjoint_sets/disjoint_sets.h> @@ -1001,4 +1003,103 @@ TExprBase DqOptimizeEquiJoinWithCosts(const TExprBase& node, TExprContext& ctx, return res; } +class TOptimizerNative: public IOptimizer { +public: + TOptimizerNative(const IOptimizer::TInput& input, const std::function<void(const TString&)>& log) + : Input(input) + , Log(log) + { + Prepare(); + } + + TOutput JoinSearch() override { + TDPccpSolver<64> solver(JoinGraph, Rels); + std::shared_ptr<TJoinOptimizerNode> result = solver.Solve(); + TOutput output; + output.Input = &Input; + BuildOutput(&output, result.get()); + return output; + } + +private: + int BuildOutput(TOutput* output, IBaseOptimizerNode* node) { + int index = (int)output->Nodes.size(); + TJoinNode& r = output->Nodes.emplace_back(); + switch (node->Kind) { + case EOptimizerNodeKind::RelNodeType: { + // leaf + TRelOptimizerNode* n = static_cast<TRelOptimizerNode*>(node); + r.Rels.emplace_back(FromString<int>(n->Label)); + break; + } + case EOptimizerNodeKind::JoinNodeType: { + // node + r.Mode = IOptimizer::EJoinType::Inner; + TJoinOptimizerNode* n = static_cast<TJoinOptimizerNode*>(node); + r.Outer = BuildOutput(output, n->LeftArg.get()); + r.Inner = BuildOutput(output, n->RightArg.get()); + + std::set<int> rels; + for (auto& [col1, col2] : n->JoinConditions) { + int relId1 = FromString<int>(col1.RelName); + int colId1 = FromString<int>(col1.AttributeName); + int relId2 = FromString<int>(col2.RelName); + int colId2 = FromString<int>(col2.AttributeName); + + r.LeftVars.emplace_back(std::make_tuple(relId1, colId1)); + r.RightVars.emplace_back(std::make_tuple(relId2, colId2)); + + rels.emplace(relId1); + rels.emplace(relId2); + } + + r.Rels.reserve(rels.size()); + r.Rels.insert(r.Rels.end(), rels.begin(), rels.end()); + break; + } + default: + Y_VERIFY(false); + }; + return index; + } + + void Prepare() { + int index = 1; + for (const auto& r : Input.Rels) { + auto label = ToString(index++); + auto stats = std::make_shared<TOptimizerStatistics>(r.Rows, r.TargetVars.size(), r.TotalCost); + Rels.push_back(std::shared_ptr<TRelOptimizerNode>(new TRelOptimizerNode(label, stats))); + } + + std::set<std::pair<TJoinColumn, TJoinColumn>> joinConditions; + for (const auto& clazz : Input.EqClasses) { + auto [relId, varId] = clazz.Vars[0]; + auto c1 = TJoinColumn(ToString(relId), ToString(varId)); + for (int i = 1; i < (int)clazz.Vars.size(); i++) { + auto [crelId, cvarId] = clazz.Vars[i]; + auto c2 = TJoinColumn(ToString(crelId), ToString(cvarId)); + joinConditions.emplace(std::make_pair(c1, c2)); + } + } + + for (auto cond : joinConditions) { + int fromNode = JoinGraph.FindNode(cond.first.RelName); + int toNode = JoinGraph.FindNode(cond.second.RelName); + JoinGraph.AddEdge(TEdge(fromNode, toNode, cond)); + } + JoinGraph.ComputeTransitiveClosure(joinConditions); + } + + TInput Input; + const std::function<void(const TString&)> Log; + + TVector<std::shared_ptr<TRelOptimizerNode>> Rels; + TGraph<64> JoinGraph; +}; + +IOptimizer* MakeNativeOptimizer(const IOptimizer::TInput& input, const std::function<void(const TString&)>& log) { + return new TOptimizerNative(input, log); } + +} // namespace NYql::NDq + diff --git a/ydb/library/yql/dq/opt/dq_opt_log.h b/ydb/library/yql/dq/opt/dq_opt_log.h index f1ae7b4978b..119137a18c4 100644 --- a/ydb/library/yql/dq/opt/dq_opt_log.h +++ b/ydb/library/yql/dq/opt/dq_opt_log.h @@ -4,6 +4,7 @@ #include <ydb/library/yql/core/expr_nodes/yql_expr_nodes.h> #include <ydb/library/yql/core/expr_nodes_gen/yql_expr_nodes_gen.h> #include <ydb/library/yql/core/yql_graph_transformer.h> +#include <ydb/library/yql/core/cbo/cbo_optimizer.h> #include <functional> @@ -36,4 +37,6 @@ IGraphTransformer::TStatus DqWrapRead(const TExprNode::TPtr& input, TExprNode::T NNodes::TExprBase DqExpandMatchRecognize(NNodes::TExprBase node, TExprContext& ctx, TTypeAnnotationContext& typeAnnCtx); +IOptimizer* MakeNativeOptimizer(const IOptimizer::TInput& input, const std::function<void(const TString&)>& log); + } // namespace NYql::NDq |