diff options
| author | Tony-Romanov <[email protected]> | 2024-09-24 08:11:32 +0200 |
|---|---|---|
| committer | GitHub <[email protected]> | 2024-09-24 09:11:32 +0300 |
| commit | d2b896d3c5ddf1371fa3d332244ea5b7684e8f17 (patch) | |
| tree | 57803a6b09a5b03372ba039c7348e60355b35ff3 | |
| parent | f3bb311b382b307918adba2f34c600b6e21ca361 (diff) | |
Don't lose 'any' flag after CBO. (#8674)
| -rw-r--r-- | ydb/core/kqp/ut/join/data/queries/any_join.sql | 0 | ||||
| -rw-r--r-- | ydb/core/kqp/ut/join/kqp_join_ut.cpp | 2 | ||||
| -rw-r--r-- | ydb/library/yql/core/cbo/cbo_optimizer_new.cpp | 45 | ||||
| -rw-r--r-- | ydb/library/yql/core/cbo/cbo_optimizer_new.h | 9 | ||||
| -rw-r--r-- | ydb/library/yql/dq/opt/dq_cbo_ut.cpp | 24 | ||||
| -rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_conflict_rules_collector.h | 24 | ||||
| -rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h | 14 | ||||
| -rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp | 139 | ||||
| -rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp | 41 | ||||
| -rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h | 15 | ||||
| -rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp | 6 | ||||
| -rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_join_tree_node.h | 10 | ||||
| -rw-r--r-- | ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h | 10 | ||||
| -rw-r--r-- | ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp | 2 | ||||
| -rw-r--r-- | ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp | 5 | ||||
| -rw-r--r-- | ydb/library/yql/sql/pg/optimizer.cpp | 4 |
16 files changed, 289 insertions, 61 deletions
diff --git a/ydb/core/kqp/ut/join/data/queries/any_join.sql b/ydb/core/kqp/ut/join/data/queries/any_join.sql new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/ydb/core/kqp/ut/join/data/queries/any_join.sql diff --git a/ydb/core/kqp/ut/join/kqp_join_ut.cpp b/ydb/core/kqp/ut/join/kqp_join_ut.cpp index 5216a9a51ec..15efc66c9c8 100644 --- a/ydb/core/kqp/ut/join/kqp_join_ut.cpp +++ b/ydb/core/kqp/ut/join/kqp_join_ut.cpp @@ -1892,7 +1892,7 @@ Y_UNIT_TEST_SUITE(KqpJoin) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); Cout << FormatResultSetYson(result.GetResultSet(0)); CompareYson(R"( - [[["02"];#;["02"];["03"];#;["03"];["1"];#;["1"]];[["02"];#;["02"];["03"];#;["04"];["1"];#;["1"]];[["02"];#;["02"];["05"];#;["05"];["2"];#;["2"]];[["02"];#;["02"];["05"];#;["06"];["2"];#;["2"]];[["02"];#;["02"];["06"];#;["05"];["2"];#;["2"]];[["02"];#;["02"];["06"];#;["06"];["2"];#;["2"]];[["03"];["03"];["03"];["08"];["02"];["07"];["1"];["1"];["1"]];[["03"];["03"];["03"];["09"];["03"];["08"];["2"];["2"];["2"]];[["09"];#;["09"];["20"];#;["09"];["1"];#;["1"]];[["09"];#;["09"];["21"];#;["10"];["2"];#;["2"]]] + [[["02"];#;["02"];["03"];#;["03"];["1"];#;["1"]];[["02"];#;["02"];["05"];#;["05"];["2"];#;["2"]];[["02"];#;["02"];["06"];#;["05"];["2"];#;["2"]];[["03"];["03"];["03"];["08"];["02"];["07"];["1"];["1"];["1"]];[["03"];["03"];["03"];["09"];["03"];["08"];["2"];["2"];["2"]];[["09"];#;["09"];["20"];#;["09"];["1"];#;["1"]];[["09"];#;["09"];["21"];#;["10"];["2"];#;["2"]]] )", FormatResultSetYson(result.GetResultSet(0))); } } diff --git a/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp b/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp index ea9d5d6f6e1..0bfda45956e 100644 --- a/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp +++ b/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp @@ -74,20 +74,30 @@ void TRelOptimizerNode::Print(std::stringstream& stream, int ntabs) { stream << *Stats << "\n"; } -TJoinOptimizerNode::TJoinOptimizerNode(const std::shared_ptr<IBaseOptimizerNode>& left, const std::shared_ptr<IBaseOptimizerNode>& right, - const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions, const EJoinKind joinType, const EJoinAlgoType joinAlgo, bool nonReorderable) : - IBaseOptimizerNode(JoinNodeType), - LeftArg(left), - RightArg(right), - JoinConditions(joinConditions), - JoinType(joinType), - JoinAlgo(joinAlgo) { - IsReorderable = !nonReorderable; - for (auto [l,r] : joinConditions ) { - LeftJoinKeys.push_back(l.AttributeName); - RightJoinKeys.push_back(r.AttributeName); - } +TJoinOptimizerNode::TJoinOptimizerNode( + const std::shared_ptr<IBaseOptimizerNode>& left, + const std::shared_ptr<IBaseOptimizerNode>& right, + const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions, + const EJoinKind joinType, + const EJoinAlgoType joinAlgo, + bool leftAny, + bool rightAny, + bool nonReorderable +) : IBaseOptimizerNode(JoinNodeType) + , LeftArg(left) + , RightArg(right) + , JoinConditions(joinConditions) + , JoinType(joinType) + , JoinAlgo(joinAlgo) + , LeftAny(leftAny) + , RightAny(rightAny) + , IsReorderable(!nonReorderable) +{ + for (const auto& [l,r] : joinConditions ) { + LeftJoinKeys.push_back(l.AttributeName); + RightJoinKeys.push_back(r.AttributeName); } +} TVector<TString> TJoinOptimizerNode::Labels() { auto res = LeftArg->Labels(); @@ -101,7 +111,14 @@ void TJoinOptimizerNode::Print(std::stringstream& stream, int ntabs) { stream << " "; } - stream << "Join: (" << ToString(JoinType) << "," << ToString(JoinAlgo) << ") "; + stream << "Join: (" << ToString(JoinType) << "," << ToString(JoinAlgo); + if (LeftAny) { + stream << ",LeftAny"; + } + if (RightAny) { + stream << ",RightAny"; + } + stream << ") "; for (auto c : JoinConditions){ stream << c.first.RelName << "." << c.first.AttributeName diff --git a/ydb/library/yql/core/cbo/cbo_optimizer_new.h b/ydb/library/yql/core/cbo/cbo_optimizer_new.h index cdb143eaa88..f40a45b55fa 100644 --- a/ydb/library/yql/core/cbo/cbo_optimizer_new.h +++ b/ydb/library/yql/core/cbo/cbo_optimizer_new.h @@ -295,6 +295,10 @@ struct TJoinOptimizerNode : public IBaseOptimizerNode { TVector<TString> RightJoinKeys; EJoinKind JoinType; EJoinAlgoType JoinAlgo; + /////////////////// 'ANY' flag means leaving only one row from the join side. + bool LeftAny; + bool RightAny; + /////////////////// bool IsReorderable; TJoinOptimizerNode(const std::shared_ptr<IBaseOptimizerNode>& left, @@ -302,7 +306,10 @@ struct TJoinOptimizerNode : public IBaseOptimizerNode { const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions, const EJoinKind joinType, const EJoinAlgoType joinAlgo, - bool nonReorderable=false); + bool leftAny, + bool rightAny, + bool nonReorderable = false + ); virtual ~TJoinOptimizerNode() {} virtual TVector<TString> Labels(); virtual void Print(std::stringstream& stream, int ntabs=0); diff --git a/ydb/library/yql/dq/opt/dq_cbo_ut.cpp b/ydb/library/yql/dq/opt/dq_cbo_ut.cpp index ef3663626b9..3e088a59ded 100644 --- a/ydb/library/yql/dq/opt/dq_cbo_ut.cpp +++ b/ydb/library/yql/dq/opt/dq_cbo_ut.cpp @@ -55,13 +55,16 @@ Y_UNIT_TEST(JoinSearch2Rels) { std::static_pointer_cast<IBaseOptimizerNode>(rel2), joinConditions, InnerJoin, - EJoinAlgoType::GraceJoin + EJoinAlgoType::GraceJoin, + true, + false ); auto res = optimizer->JoinSearch(op); std::stringstream ss; res->Print(ss); - TString expected = R"__(Join: (InnerJoin,MapJoin) b.1=a.1, + Cout << ss.str() << '\n'; + TString expected = R"__(Join: (InnerJoin,MapJoin,RightAny) b.1=a.1, Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10, Sel: 1, Storage: NA Rel: b Type: BaseTable, Nrows: 1e+06, Ncols: 1, ByteSize: 0, Cost: 9.00001e+06, Sel: 1, Storage: NA @@ -93,8 +96,10 @@ Y_UNIT_TEST(JoinSearch3Rels) { std::static_pointer_cast<IBaseOptimizerNode>(rel2), joinConditions, InnerJoin, - EJoinAlgoType::GraceJoin - ); + EJoinAlgoType::GraceJoin, + false, + false + ); joinConditions.insert({ NDq::TJoinColumn("a", "1"), @@ -106,14 +111,17 @@ Y_UNIT_TEST(JoinSearch3Rels) { std::static_pointer_cast<IBaseOptimizerNode>(rel3), joinConditions, InnerJoin, - EJoinAlgoType::GraceJoin - ); + EJoinAlgoType::GraceJoin, + true, + false + ); auto res = optimizer->JoinSearch(op2); std::stringstream ss; res->Print(ss); + Cout << ss.str() << '\n'; - TString expected = R"__(Join: (InnerJoin,MapJoin) a.1=b.1,a.1=c.1, + TString expected = R"__(Join: (InnerJoin,MapJoin,LeftAny) a.1=b.1,a.1=c.1, Type: ManyManyJoin, Nrows: 4e+13, Ncols: 3, ByteSize: 0, Cost: 4.004e+13, Sel: 1, Storage: NA Join: (InnerJoin,MapJoin) b.1=a.1, Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10, Sel: 1, Storage: NA @@ -223,7 +231,7 @@ void _DqOptimizeEquiJoinWithCosts(const std::function<IOptimizerNew*()>& optFact UNIT_ASSERT(equiJoin.Maybe<TCoEquiJoin>()); auto resStr = NCommon::ExprToPrettyString(ctx, *res.Ptr()); auto expected = R"__(( -(let $1 '('"Inner" '"orders" '"customer" '('"orders" '"a") '('"customer" '"b") '('('"join_algo" '"MapJoin")))) +(let $1 '('"Inner" '"orders" '"customer" '('"orders" '"a") '('"customer" '"b") '('('join_algo 'MapJoin)))) (return (EquiJoin '('() '"orders") '('() '"customer") $1 '())) ) )__"; diff --git a/ydb/library/yql/dq/opt/dq_opt_conflict_rules_collector.h b/ydb/library/yql/dq/opt/dq_opt_conflict_rules_collector.h index 9cbe532e926..4c4c20a9c71 100644 --- a/ydb/library/yql/dq/opt/dq_opt_conflict_rules_collector.h +++ b/ydb/library/yql/dq/opt/dq_opt_conflict_rules_collector.h @@ -57,14 +57,14 @@ public: private: auto GetLeftConflictsVisitor() { auto visitor = [this](const std::shared_ptr<TJoinOptimizerNode>& child) { - if (!OperatorsAreAssociative(child->JoinType, Root_->JoinType) || !Root_->IsReorderable || !child->IsReorderable) { + if (!OperatorsAreAssociative(child->JoinType, Root_->JoinType)) { ConflictRules_.emplace_back( SubtreeNodes_[child->RightArg], SubtreeNodes_[child->LeftArg] ); } - if (!OperatorsAreLeftAsscom(child->JoinType, Root_->JoinType) || !Root_->IsReorderable || !child->IsReorderable) { + if (!OperatorsAreLeftAsscom(child->JoinType, Root_->JoinType)) { ConflictRules_.emplace_back( SubtreeNodes_[child->LeftArg], SubtreeNodes_[child->RightArg] @@ -77,18 +77,18 @@ private: auto GetRightConflictsVisitor() { auto visitor = [this](const std::shared_ptr<TJoinOptimizerNode>& child) { - if (!OperatorsAreAssociative(Root_->JoinType, child->JoinType) || !Root_->IsReorderable || !child->IsReorderable) { + if (!OperatorsAreAssociative(Root_->JoinType, child->JoinType)) { ConflictRules_.emplace_back( SubtreeNodes_[child->LeftArg], SubtreeNodes_[child->RightArg] ); } - if (!OperatorsAreRightAsscom(Root_->JoinType, child->JoinType) || !Root_->IsReorderable || !child->IsReorderable) { + if (!OperatorsAreRightAsscom(Root_->JoinType, child->JoinType)) { ConflictRules_.emplace_back( SubtreeNodes_[child->RightArg], SubtreeNodes_[child->LeftArg] - ); + ); } }; @@ -106,6 +106,20 @@ private: VisitJoinTree(childJoinNode->LeftArg, visitor); VisitJoinTree(childJoinNode->RightArg, visitor); + if (childJoinNode->LeftAny || !childJoinNode->IsReorderable) { + ConflictRules_.emplace_back( + SubtreeNodes_[childJoinNode->LeftArg], + SubtreeNodes_[childJoinNode->RightArg] + ); + } + + if (childJoinNode->RightAny || !childJoinNode->IsReorderable) { + ConflictRules_.emplace_back( + SubtreeNodes_[childJoinNode->RightArg], + SubtreeNodes_[childJoinNode->LeftArg] + ); + } + visitor(childJoinNode); } diff --git a/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h b/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h index cb95d2e45ae..142e97bab4f 100644 --- a/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h +++ b/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h @@ -81,6 +81,8 @@ private: const std::shared_ptr<IBaseOptimizerNode>& left, const std::shared_ptr<IBaseOptimizerNode>& right, EJoinKind joinKind, + bool leftAny, + bool rightAny, bool isCommutative, const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions, const std::set<std::pair<TJoinColumn, TJoinColumn>>& reversedJoinConditions, @@ -409,6 +411,8 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS const std::shared_ptr<IBaseOptimizerNode>& left, const std::shared_ptr<IBaseOptimizerNode>& right, EJoinKind joinKind, + bool leftAny, + bool rightAny, bool isCommutative, const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions, const std::set<std::pair<TJoinColumn, TJoinColumn>>& reversedJoinConditions, @@ -419,7 +423,7 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS TJoinAlgoHints::TJoinAlgoHint* maybeJoinHint ) { double bestCost = std::numeric_limits<double>::infinity(); - EJoinAlgoType bestAlgo{}; + EJoinAlgoType bestAlgo = EJoinAlgoType::Undefined; bool bestJoinIsReversed = false; for (auto joinAlgo : AllJoinAlgos) { @@ -452,13 +456,13 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS } } - Y_ENSURE(bestCost != std::numeric_limits<double>::infinity(), "No join was chosen!"); + Y_ENSURE(bestAlgo != EJoinAlgoType::Undefined, "No join was chosen!"); if (bestJoinIsReversed) { - return MakeJoinInternal(right, left, reversedJoinConditions, rightJoinKeys, leftJoinKeys, joinKind, bestAlgo, ctx, maybeCardHint); + return MakeJoinInternal(right, left, reversedJoinConditions, rightJoinKeys, leftJoinKeys, joinKind, bestAlgo, rightAny, leftAny, ctx, maybeCardHint); } - return MakeJoinInternal(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, bestAlgo, ctx, maybeCardHint); + return MakeJoinInternal(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, bestAlgo, leftAny, rightAny, ctx, maybeCardHint); } /* @@ -489,6 +493,8 @@ template<typename TNodeSet> void TDPHypSolver<TNodeSet>::EmitCsgCmp(const TNodeS leftNodes, rightNodes, csgCmpEdge->JoinKind, + csgCmpEdge->LeftAny, + csgCmpEdge->RightAny, csgCmpEdge->IsCommutative, csgCmpEdge->JoinConditions, reversedEdge->JoinConditions, diff --git a/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp b/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp index ccbb1faf2d3..75c234f813f 100644 --- a/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp @@ -3,31 +3,47 @@ #include "dq_opt_log.h" #include "dq_opt_join.h" +#include <util/string/split.h> + #include "dq_opt_make_join_hypergraph.h" +#include "dq_opt_log.h" using namespace NYql; using namespace NNodes; using namespace NYql::NDq; std::shared_ptr<IBaseOptimizerNode> CreateChain(size_t size, TString onAttribute, TString tablePrefix="e") { - std::shared_ptr<IBaseOptimizerNode> root = std::make_shared<TRelOptimizerNode>(tablePrefix + "1", nullptr); + std::shared_ptr<IBaseOptimizerNode> root = std::make_shared<TRelOptimizerNode>(tablePrefix + "1", std::make_shared<TOptimizerStatistics>()); for (size_t i = 1; i < size; ++i) { auto eiStr = tablePrefix + ToString(i + 1); auto eiPrevStr = tablePrefix + ToString(i); - auto ei = std::make_shared<TRelOptimizerNode>(eiStr, nullptr); + auto ei = std::make_shared<TRelOptimizerNode>(eiStr, std::make_shared<TOptimizerStatistics>()); std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>> joinConditions; joinConditions.insert({TJoinColumn(eiPrevStr, onAttribute), TJoinColumn(eiStr, onAttribute)}); root = std::make_shared<TJoinOptimizerNode>( - root, ei, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined + root, ei, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false ); } return root; } + +std::shared_ptr<IBaseOptimizerNode> Enumerate(const std::shared_ptr<IBaseOptimizerNode>& root) { + auto ctx = TBaseProviderContext(); + auto optimizer = MakeNativeOptimizerNew(ctx, std::numeric_limits<ui32>::max()); + + Y_ENSURE(root->Kind == EOptimizerNodeKind::JoinNodeType); + auto res = optimizer->JoinSearch(std::static_pointer_cast<TJoinOptimizerNode>(root)); + Cout << "Optimized Tree:" << Endl; + std::stringstream ss; res->Print(ss); + Cout << ss.str() << Endl; + return res; +} + Y_UNIT_TEST_SUITE(HypergraphBuild) { using TNodeSet = std::bitset<64>; @@ -57,6 +73,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { UNIT_ASSERT(graph.GetEdges().size() == 6); CheckClique(graph); + Enumerate(root); } Y_UNIT_TEST(SimpleChain4NodesTransitiveClosure) { @@ -66,6 +83,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { UNIT_ASSERT(graph.GetEdges().size() == 12); CheckClique(graph); + Enumerate(root); } Y_UNIT_TEST(SimpleChain5NodesTransitiveClosure) { @@ -75,6 +93,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { UNIT_ASSERT(graph.GetEdges().size() == 20); CheckClique(graph); + Enumerate(root); } Y_UNIT_TEST(ComplexTransitiveClosure) { @@ -86,7 +105,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { // a1 --228-- a2 --228-- a3 --1337-- b1 --1337-- b2 auto root = std::make_shared<TJoinOptimizerNode>( - lhs, rhs, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined + lhs, rhs, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false ); joinConditions.clear(); @@ -97,7 +116,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { // a1 --228-- a2 --228-- a3 --1337-- b1 --1337-- b2 --123-- c1 --228-- c2 // ^ we don't want to have transitive closure between c and a root = std::make_shared<TJoinOptimizerNode>( - root, rhs, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined + root, rhs, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false ); auto graph = MakeJoinHypergraph<TNodeSet>(root); @@ -125,4 +144,114 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) { UNIT_ASSERT(!graph.FindEdgeBetween(c2, a2)); UNIT_ASSERT(!graph.FindEdgeBetween(c2, a3)); } + + template <typename TJoinArg> + std::shared_ptr<IBaseOptimizerNode> GetJoinArg(const TJoinArg& joinArg) { + if constexpr (std::is_same_v<TJoinArg, std::shared_ptr<IBaseOptimizerNode>>) { + return joinArg; + } else if (std::is_convertible_v<TJoinArg, std::string>) { + std::shared_ptr<IBaseOptimizerNode> root = std::make_shared<TRelOptimizerNode>(joinArg, std::make_shared<TOptimizerStatistics>()); + return root; + } else { + static_assert(std::is_convertible_v<TJoinArg, std::string> || + std::is_same_v<TJoinArg, std::shared_ptr<IBaseOptimizerNode>>, + "Args of join must be either Join or TString, for example: Join(Join('A', 'B'), 'C')"); + } + + Y_UNREACHABLE(); + } + + + template<typename TLhsArg, typename TRhsArg> + std::shared_ptr<IBaseOptimizerNode> Join(const TLhsArg& lhsArg, const TRhsArg& rhsArg, TString on="", TString onAttr="") { + if constexpr (std::is_convertible_v<TLhsArg, std::string> && std::is_convertible_v<TRhsArg, std::string>) { + on = Sprintf("%s,%s", lhsArg, rhsArg); + } + + if (on.empty()) { + throw std::invalid_argument("Bad argument."); + } + + std::string lhsCond, rhsCond; + Split(on, ",", lhsCond, rhsCond); + auto col = onAttr.empty()? ToString(rand()): onAttr; + std::shared_ptr<IBaseOptimizerNode> root = std::make_shared<TJoinOptimizerNode>( + TJoinOptimizerNode( + GetJoinArg(lhsArg), + GetJoinArg(rhsArg), + {{TJoinColumn(lhsCond.c_str(), col), TJoinColumn(rhsCond.c_str(), col)}}, + EJoinKind::InnerJoin, + EJoinAlgoType::Undefined, + false, + false + ) + ); + return root; + } + + Y_UNIT_TEST(AnyJoinWithTransitiveClosure) { + auto root = Join("A", Join("B", Join("C", "D", "C,D", "id"), "B,C", "id"), "A,B", "id"); + std::static_pointer_cast<TJoinOptimizerNode>(root)->LeftAny = true; + + auto graph = MakeJoinHypergraph<TNodeSet>(root); + Cout << graph.String() << Endl; + + auto A = graph.GetNodesByRelNames({"A"}); + auto B = graph.GetNodesByRelNames({"B"}); + auto C = graph.GetNodesByRelNames({"C"}); + auto D = graph.GetNodesByRelNames({"D"}); + + UNIT_ASSERT(graph.FindEdgeBetween(B, D)); + UNIT_ASSERT(!graph.FindEdgeBetween(A, D)); + UNIT_ASSERT(!graph.FindEdgeBetween(A, C)); + } + + Y_UNIT_TEST(AnyJoinConstraints1) { + auto anyJoin = Join(Join("A", "B"), "C", /*on=*/ "B,C"); + std::static_pointer_cast<TJoinOptimizerNode>(anyJoin)->LeftAny = true; + auto join = Join(anyJoin, "D", /*on=*/"A,D"); + + auto graph = MakeJoinHypergraph<TNodeSet>(join); + Cout << graph.String() << Endl; + UNIT_ASSERT(graph.GetEdges().size() != graph.GetSimpleEdges().size()); + + Enumerate(join); + } + + + Y_UNIT_TEST(AnyJoinConstraints2) { + auto anyJoin = Join(Join(Join("A", "B"), "C", /*on=*/ "B,C"), "D", "C,D"); + std::static_pointer_cast<TJoinOptimizerNode>(anyJoin)->LeftAny = true; + auto join = Join(anyJoin, "E", /*on=*/ "A,E"); + + auto graph = MakeJoinHypergraph<TNodeSet>(join); + Cout << graph.String() << Endl; + UNIT_ASSERT(graph.GetEdges().size() != graph.GetSimpleEdges().size()); + + Enumerate(join); + } + + Y_UNIT_TEST(AnyJoinConstraints3) { + auto anyJoin = Join(Join("A", "B"), Join("C", "D"), /*on=*/"B,C"); + std::static_pointer_cast<TJoinOptimizerNode>(anyJoin)->RightAny = true; + auto join = Join(anyJoin, "E", /*on=*/ "C,E"); + + auto graph = MakeJoinHypergraph<TNodeSet>(join); + Cout << graph.String() << Endl; + UNIT_ASSERT(graph.GetEdges().size() != graph.GetSimpleEdges().size()); + + Enumerate(join); + } + + Y_UNIT_TEST(IsReorderableConstraint) { + auto nonReorderable = Join(Join(Join("A", "B"), "C", /*on=*/ "B,C"), "D", "C,D"); + std::static_pointer_cast<TJoinOptimizerNode>(nonReorderable)->IsReorderable = false; + auto join = Join(nonReorderable, "E", /*on=*/ "A,E"); + + auto graph = MakeJoinHypergraph<TNodeSet>(join); + Cout << graph.String() << Endl; + UNIT_ASSERT(graph.GetEdges().size() != graph.GetSimpleEdges().size()); + + Enumerate(join); + } } diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp index e9e701fda15..32d3fd33f21 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp @@ -3,6 +3,7 @@ #include "dq_opt_make_join_hypergraph.h" #include <ydb/library/yql/core/expr_nodes/yql_expr_nodes.h> +#include <ydb/library/yql/core/yql_join.h> #include <ydb/library/yql/dq/opt/dq_opt.h> #include <ydb/library/yql/utils/log/log.h> @@ -96,7 +97,9 @@ std::shared_ptr<TJoinOptimizerNode> ConvertToJoinTree( TJoinColumn(rightScope, rightColumn))); } - return std::make_shared<TJoinOptimizerNode>(left, right, joinConds, ConvertToJoinKind(joinTuple.Type().StringValue()), EJoinAlgoType::Undefined); + const auto linkSettings = GetEquiJoinLinkSettings(joinTuple.Options().Ref()); + return std::make_shared<TJoinOptimizerNode>(left, right, joinConds, ConvertToJoinKind(joinTuple.Type().StringValue()), EJoinAlgoType::Undefined, + linkSettings.LeftHints.contains("any"), linkSettings.RightHints.contains("any")); } /** @@ -142,14 +145,32 @@ TExprBase BuildTree(TExprContext& ctx, const TCoEquiJoin& equiJoin, rightJoinColumns.push_back(BuildAtom(pair.second.AttributeName, equiJoin.Pos(), ctx)); } - auto optionsList = ctx.Builder(equiJoin.Pos()) - .List() - .List(0) - .Atom(0, "join_algo") - .Atom(1, ToString(reorderResult->JoinAlgo)) + TExprNode::TListType options(1U, + ctx.Builder(equiJoin.Pos()) + .List() + .Atom(0, "join_algo", TNodeFlags::Default) + .Atom(1, ToString(reorderResult->JoinAlgo), TNodeFlags::Default) .Seal() - .Seal() - .Build(); + .Build() + ); + + if (reorderResult->LeftAny) { + options.emplace_back(ctx.Builder(equiJoin.Pos()) + .List() + .Atom(0, "left", TNodeFlags::Default) + .Atom(1, "any", TNodeFlags::Default) + .Seal() + .Build()); + } + + if (reorderResult->RightAny) { + options.emplace_back(ctx.Builder(equiJoin.Pos()) + .List() + .Atom(0, "right", TNodeFlags::Default) + .Atom(1, "any", TNodeFlags::Default) + .Seal() + .Build()); + } // Build the final output return Build<TCoEquiJoinTuple>(ctx,equiJoin.Pos()) @@ -162,7 +183,9 @@ TExprBase BuildTree(TExprContext& ctx, const TCoEquiJoin& equiJoin, .RightKeys() .Add(rightJoinColumns) .Build() - .Options(optionsList) + .Options() + .Add(std::move(options)) + .Build() .Done(); } diff --git a/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h b/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h index 9a4fe0a3f49..c6f5be64fb2 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h +++ b/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h @@ -28,12 +28,16 @@ public: const TNodeSet& left, const TNodeSet& right, EJoinKind joinKind, + bool leftAny, + bool rightAny, bool isCommutative, const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions ) : Left(left) , Right(right) , JoinKind(joinKind) + , LeftAny(leftAny) + , RightAny(rightAny) , IsCommutative(isCommutative) , JoinConditions(joinConditions) , IsReversed(false) @@ -52,6 +56,7 @@ public: TNodeSet Left; TNodeSet Right; EJoinKind JoinKind; + bool LeftAny, RightAny; bool IsCommutative; std::set<std::pair<TJoinColumn, TJoinColumn>> JoinConditions; TVector<TString> LeftJoinKeys; @@ -97,10 +102,13 @@ public: TVector<TString> relNameByNodeId(Nodes_.size()); res.append("Nodes: ").append("\n"); for (const auto& [name, idx]: NodeIdByRelationName_) { - res.append(Sprintf("%ld: %s\n", idx, name.c_str())); relNameByNodeId[idx] = name; } + for (size_t idx = 0; idx < relNameByNodeId.size(); ++idx) { + res.append(Sprintf("%ld: %s\n", idx, relNameByNodeId[idx].c_str())); + } + res.append("Edges: ").append("\n"); auto edgeSideToString = @@ -387,7 +395,8 @@ public: [this](const THyperedge& edge) { return edge.IsReversed || - !(IsJoinTransitiveClosureSupported(edge.JoinKind) && edge.AreCondVectorEqual()); + !(IsJoinTransitiveClosureSupported(edge.JoinKind) && edge.AreCondVectorEqual()) || + edge.LeftAny || edge.RightAny; } ); @@ -463,7 +472,7 @@ private: }); } - auto e = THyperedge(lhs, rhs, groupJoinKind, isJoinCommutative, joinConditions); + auto e = THyperedge(lhs, rhs, groupJoinKind, false, false, isJoinCommutative, joinConditions); Graph_.AddEdge(std::move(e)); } } diff --git a/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp b/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp index 86b36d01d03..5b13ee7cbd6 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp @@ -10,10 +10,12 @@ std::shared_ptr<TJoinOptimizerNodeInternal> MakeJoinInternal( const TVector<TString>& rightJoinKeys, EJoinKind joinKind, EJoinAlgoType joinAlgo, + bool leftAny, + bool rightAny, IProviderContext& ctx, TCardinalityHints::TCardinalityHint* maybeHint) { - auto res = std::make_shared<TJoinOptimizerNodeInternal>(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, joinAlgo); + auto res = std::make_shared<TJoinOptimizerNodeInternal>(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, joinAlgo, leftAny, rightAny); res->Stats = std::make_shared<TOptimizerStatistics>(ctx.ComputeJoinStats(*left->Stats, *right->Stats, leftJoinKeys, rightJoinKeys, joinAlgo, joinKind, maybeHint)); return res; } @@ -37,7 +39,7 @@ std::shared_ptr<TJoinOptimizerNode> ConvertFromInternal(const std::shared_ptr<IB right = ConvertFromInternal(right); } - auto newJoin = std::make_shared<TJoinOptimizerNode>(left, right, join->JoinConditions, join->JoinType, join->JoinAlgo); + auto newJoin = std::make_shared<TJoinOptimizerNode>(left, right, join->JoinConditions, join->JoinType, join->JoinAlgo, join->LeftAny, join->RightAny); newJoin->Stats = join->Stats; return newJoin; } diff --git a/ydb/library/yql/dq/opt/dq_opt_join_tree_node.h b/ydb/library/yql/dq/opt/dq_opt_join_tree_node.h index 02ffb1e99db..9e626bc356b 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_tree_node.h +++ b/ydb/library/yql/dq/opt/dq_opt_join_tree_node.h @@ -22,7 +22,9 @@ struct TJoinOptimizerNodeInternal : public IBaseOptimizerNode { const TVector<TString>& leftJoinKeys, const TVector<TString>& rightJoinKeys, const EJoinKind joinType, - const EJoinAlgoType joinAlgo + const EJoinAlgoType joinAlgo, + const bool leftAny, + const bool rightAny ) : IBaseOptimizerNode(JoinNodeType) , LeftArg(left) @@ -32,6 +34,8 @@ struct TJoinOptimizerNodeInternal : public IBaseOptimizerNode { , RightJoinKeys(rightJoinKeys) , JoinType(joinType) , JoinAlgo(joinAlgo) + , LeftAny(leftAny) + , RightAny(rightAny) {} virtual ~TJoinOptimizerNodeInternal() = default; @@ -52,6 +56,8 @@ struct TJoinOptimizerNodeInternal : public IBaseOptimizerNode { const TVector<TString>& RightJoinKeys; EJoinKind JoinType; EJoinAlgoType JoinAlgo; + const bool LeftAny; + const bool RightAny; }; /** @@ -65,6 +71,8 @@ std::shared_ptr<TJoinOptimizerNodeInternal> MakeJoinInternal( const TVector<TString>& rightJoinKeys, EJoinKind joinKind, EJoinAlgoType joinAlgo, + bool leftAny, + bool rightAny, IProviderContext& ctx, TCardinalityHints::TCardinalityHint* maybeHint = nullptr ); diff --git a/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h b/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h index de59957f426..4e347ab5973 100644 --- a/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h +++ b/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h @@ -42,14 +42,13 @@ typename TJoinHypergraph<TNodeSet>::TEdge MakeHyperedge( TNodeSet TES = ConvertConflictRulesIntoTES(conditionUsedRels, conflictRules); - - /* For CROSS Join and degenerate predicates (if subtree tables and joinCondition tables do not intersect) */ - if (!Overlaps(TES, subtreeNodes[joinNode->LeftArg])) { + /* For CROSS, Non-Reorderable, ANY Joins and degenerate predicates (if subtree tables and joinCondition tables do not intersect) */ + if (!Overlaps(TES, subtreeNodes[joinNode->LeftArg]) || !joinNode->IsReorderable || joinNode->LeftAny) { TES |= subtreeNodes[joinNode->LeftArg]; TES = ConvertConflictRulesIntoTES(TES, conflictRules); } - if (!Overlaps(TES, subtreeNodes[joinNode->RightArg])) { + if (!Overlaps(TES, subtreeNodes[joinNode->RightArg]) || !joinNode->IsReorderable || joinNode->RightAny) { TES |= subtreeNodes[joinNode->RightArg]; TES = ConvertConflictRulesIntoTES(TES, conflictRules); } @@ -57,7 +56,8 @@ typename TJoinHypergraph<TNodeSet>::TEdge MakeHyperedge( TNodeSet left = TES & subtreeNodes[joinNode->LeftArg]; TNodeSet right = TES & subtreeNodes[joinNode->RightArg]; - return typename TJoinHypergraph<TNodeSet>::TEdge(left, right, joinNode->JoinType, OperatorIsCommutative(joinNode->JoinType) && joinNode->IsReorderable, joinNode->JoinConditions); + bool isCommutative = OperatorIsCommutative(joinNode->JoinType) && (joinNode->IsReorderable); + return typename TJoinHypergraph<TNodeSet>::TEdge(left, right, joinNode->JoinType, joinNode->LeftAny, joinNode->RightAny, isCommutative, joinNode->JoinConditions); } template<typename TNodeSet> diff --git a/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp b/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp index c9d54d1aea6..f4092b08b0b 100644 --- a/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp +++ b/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp @@ -81,7 +81,7 @@ Y_UNIT_TEST(NonReordable) { std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>> joinConditions; joinConditions.insert({NDq::TJoinColumn{"a", "b"}, NDq::TJoinColumn{"a","c"}}); auto root = std::make_shared<TJoinOptimizerNode>( - left, right, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::GraceJoin, true); + left, right, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::GraceJoin, false, false, true); TBaseProviderContext optCtx; std::unique_ptr<IOptimizerNew> opt = std::unique_ptr<IOptimizerNew>(NDq::MakeNativeOptimizerNew(optCtx, 1024)); auto result = opt->JoinSearch(root); diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp b/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp index 9721fbe2a2d..4c15ac7bdab 100644 --- a/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp +++ b/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp @@ -166,7 +166,10 @@ public: const EJoinKind joinType, const EJoinAlgoType joinAlgo, TYtJoinNodeOp* originalOp) - : TJoinOptimizerNode(left, right, joinConditions, joinType, joinAlgo, originalOp != nullptr) + : TJoinOptimizerNode(left, right, joinConditions, joinType, joinAlgo, + originalOp ? originalOp->LinkSettings.LeftHints.contains("any") : false, + originalOp ? originalOp->LinkSettings.RightHints.contains("any") : false, + originalOp != nullptr) , OriginalOp(originalOp) { } diff --git a/ydb/library/yql/sql/pg/optimizer.cpp b/ydb/library/yql/sql/pg/optimizer.cpp index f132427188e..d1134092cbb 100644 --- a/ydb/library/yql/sql/pg/optimizer.cpp +++ b/ydb/library/yql/sql/pg/optimizer.cpp @@ -654,7 +654,9 @@ struct TPgOptimizerImpl left, right, joinConditions, joinKind, - EJoinAlgoType::MapJoin + EJoinAlgoType::MapJoin, + false, + false ); } else { YQL_ENSURE(false, "Wrong CBO node"); |
