diff options
author | aozeritsky <aozeritsky@ydb.tech> | 2023-08-14 12:37:38 +0300 |
---|---|---|
committer | aozeritsky <aozeritsky@ydb.tech> | 2023-08-14 14:00:09 +0300 |
commit | 4caad3092762e92b7d0216e2f9b9b4a0f13b389c (patch) | |
tree | 4cf55daf3a6ca61aff92b921d6975c5a2b50c79b | |
parent | 30d989f97b9f0b7404b394e698fc515adab34afc (diff) | |
download | ydb-4caad3092762e92b7d0216e2f9b9b4a0f13b389c.tar.gz |
Support left join in pg optimizer wrapper
-rw-r--r-- | ydb/library/yql/core/cbo/cbo_optimizer.cpp | 2 | ||||
-rw-r--r-- | ydb/library/yql/core/cbo/cbo_optimizer.h | 5 | ||||
-rw-r--r-- | ydb/library/yql/sql/pg/optimizer.cpp | 115 | ||||
-rw-r--r-- | ydb/library/yql/sql/pg/optimizer.h | 2 | ||||
-rw-r--r-- | ydb/library/yql/sql/pg/optimizer_ut.cpp | 44 |
5 files changed, 155 insertions, 13 deletions
diff --git a/ydb/library/yql/core/cbo/cbo_optimizer.cpp b/ydb/library/yql/core/cbo/cbo_optimizer.cpp index 0008e45d3c..13d0a48b1a 100644 --- a/ydb/library/yql/core/cbo/cbo_optimizer.cpp +++ b/ydb/library/yql/core/cbo/cbo_optimizer.cpp @@ -45,6 +45,8 @@ void PrettyPrintNode(int level, TStringBuilder& b, const IOptimizer::TOutput& ou switch (node.Mode) { case IOptimizer::EJoinType::Unknown: b << prefix << " Node\n"; break; case IOptimizer::EJoinType::Inner: b << prefix << " Inner Join\n"; break; + case IOptimizer::EJoinType::Left: b << prefix << " Left Join\n"; break; + case IOptimizer::EJoinType::Right: b << prefix << " Right Join\n"; break; default: b << prefix << " Unknown\n"; break; } switch (node.Strategy) { diff --git a/ydb/library/yql/core/cbo/cbo_optimizer.h b/ydb/library/yql/core/cbo/cbo_optimizer.h index 4cd7884b11..2d2821c81d 100644 --- a/ydb/library/yql/core/cbo/cbo_optimizer.h +++ b/ydb/library/yql/core/cbo/cbo_optimizer.h @@ -28,6 +28,7 @@ struct IOptimizer { struct TInput { std::vector<TRel> Rels; std::vector<TEq> EqClasses; + std::vector<TEq> Left; TString ToString() const; void Normalize(); @@ -35,7 +36,9 @@ struct IOptimizer { enum class EJoinType { Unknown, - Inner + Inner, + Left, + Right, }; enum class EJoinStrategy { diff --git a/ydb/library/yql/sql/pg/optimizer.cpp b/ydb/library/yql/sql/pg/optimizer.cpp index 5cd4e8bac6..a2e8ff0954 100644 --- a/ydb/library/yql/sql/pg/optimizer.cpp +++ b/ydb/library/yql/sql/pg/optimizer.cpp @@ -44,7 +44,7 @@ bool RelationStatsHook( } // namespace -Var* MakeVar(int varno, int relno) { +Var* MakeVar(int relno, int varno) { Var* v = makeNode(Var); v->varno = relno; // table number v->varattno = varno; // column number in table @@ -72,7 +72,7 @@ RelOptInfo* MakeRelOptInfo(const IOptimizer::TRel& r, int relno) { PathTarget* t = makeNode(PathTarget); int maxattno = 0; for (int i = 0; i < (int)r.TargetVars.size(); i++) { - t->exprs = lappend(t->exprs, MakeVar(i+1, relno)); + t->exprs = lappend(t->exprs, MakeVar(relno, i+1)); maxattno = i+1; } t->width = 8; @@ -136,7 +136,7 @@ EquivalenceClass* TPgOptimizer::MakeEqClass(int i) { for (auto [relno, varno] : Input.EqClasses[i].Vars) { EquivalenceMember* m = makeNode(EquivalenceMember); - m->em_expr = (Expr*)MakeVar(std::make_tuple(varno, relno)); + m->em_expr = (Expr*)MakeVar(TVarId{relno, varno}); m->em_relids = bms_add_member(nullptr, relno); m->em_datatype = 20; eq->ec_opfamilies = list_make1_oid(1976); @@ -185,7 +185,6 @@ int TPgOptimizer::MakeOutputJoin(TOutput& output, Path* path) { } if (path->type != T_Path) { - node.Mode = EJoinType::Inner; node.Strategy = EJoinStrategy::Unknown; if (path->type == T_HashPath) { node.Strategy = EJoinStrategy::Hash; @@ -196,14 +195,44 @@ int TPgOptimizer::MakeOutputJoin(TOutput& output, Path* path) { } JoinPath* jpath = (JoinPath*)path; + switch (jpath->jointype) { + case JOIN_INNER: + node.Mode = EJoinType::Inner; + break; + case JOIN_LEFT: + node.Mode = EJoinType::Left; + break; + case JOIN_RIGHT: + node.Mode = EJoinType::Right; + break; + default: + YQL_ENSURE(false, "Unsupported join type"); + break; + } YQL_ENSURE(list_length(jpath->joinrestrictinfo) == 1, "Unsupported joinrestrictinfo len"); RestrictInfo* rinfo = (RestrictInfo*)jpath->joinrestrictinfo->elements[0].ptr_value; - YQL_ENSURE(rinfo->left_em->em_expr->type == T_Var, "Unsupported left em type"); - YQL_ENSURE(rinfo->right_em->em_expr->type == T_Var, "Unsupported right em type"); - - Var* left = (Var*)rinfo->left_em->em_expr; - Var* right = (Var*)rinfo->right_em->em_expr; + Var* left; + Var* right; + + if (jpath->jointype == JOIN_INNER) { + YQL_ENSURE(rinfo->left_em->em_expr->type == T_Var, "Unsupported left em type"); + YQL_ENSURE(rinfo->right_em->em_expr->type == T_Var, "Unsupported right em type"); + + left = (Var*)rinfo->left_em->em_expr; + right = (Var*)rinfo->right_em->em_expr; + } else if (jpath->jointype == JOIN_LEFT || jpath->jointype == JOIN_RIGHT) { + YQL_ENSURE(rinfo->clause->type == T_OpExpr); + OpExpr* expr = (OpExpr*)rinfo->clause; + YQL_ENSURE(list_length(expr->args) == 2); + Expr* a1 = (Expr*)list_nth(expr->args, 0); + Expr* a2 = (Expr*)list_nth(expr->args, 1); + YQL_ENSURE(a1->type == T_Var, "Unsupported left arg type"); + YQL_ENSURE(a2->type == T_Var, "Unsupported right arg type"); + + left = (Var*)a1; + right = (Var*)a2; + } node.LeftVar = std::make_tuple(left->varno, left->varattno); node.RightVar = std::make_tuple(right->varno, right->varattno); @@ -222,9 +251,50 @@ int TPgOptimizer::MakeOutputJoin(TOutput& output, Path* path) { } RelOptInfo* TPgOptimizer::JoinSearchInternal() { + std::vector<std::vector<RestrictInfo*>> restrictInfos(Input.Rels.size()+1); + std::vector<RestrictInfo*> allRestriction; allRestriction.reserve(Input.Left.size()); + for (const auto& eq : Input.Left) { + YQL_ENSURE(eq.Vars.size() == 2); + RestrictInfo* ri = makeNode(RestrictInfo); + ri->can_join = 1; + ri->norm_selec = -1; + ri->outer_selec = -1; + + OpExpr* oe = makeNode(OpExpr); + oe->opno = 410; + oe->opfuncid = 467; + oe->opresulttype = 16; + ri->clause = (Expr*)oe; + + bool left = true; + for (const auto [relId, varId] : eq.Vars) { + ri->required_relids = bms_add_member(ri->required_relids, relId); + ri->clause_relids = bms_add_member(ri->clause_relids, relId); + if (left) { + ri->outer_relids = bms_add_member(nullptr, relId); + ri->left_relids = bms_add_member(nullptr, relId); + left = false; + } else { + ri->right_relids = bms_add_member(nullptr, relId); + } + oe->args = lappend(oe->args, MakeVar(TVarId{relId, varId})); + + restrictInfos[relId].emplace_back(ri); + } + allRestriction.emplace_back(ri); + } + List* rels = MakeRelOptInfoList(Input); ListCell* l; + int relId = 1; + foreach (l, rels) { + RelOptInfo* rel = (RelOptInfo*)lfirst(l); + for (auto* ri : restrictInfos[relId++]) { + rel->joininfo = lappend(rel->joininfo, ri); + } + } + if (Log) { int i = 1; foreach (l, rels) { @@ -247,9 +317,26 @@ RelOptInfo* TPgOptimizer::JoinSearchInternal() { root.simple_rte_array[i] = makeNode(RangeTblEntry); root.simple_rte_array[i]->rtekind = RTE_RELATION; } - root.all_baserels = bms_add_range(nullptr, 1, rels->length+1); + root.all_baserels = bms_add_range(nullptr, 1, rels->length); root.eq_classes = MakeEqClasses(); + for (auto* ri : allRestriction) { + root.left_join_clauses = lappend(root.left_join_clauses, ri); + root.hasJoinRTEs = 1; + root.nullable_baserels = bms_add_members(root.nullable_baserels, ri->right_relids); + + SpecialJoinInfo* ji = makeNode(SpecialJoinInfo); + ji->min_lefthand = bms_add_member(ji->min_lefthand, bms_first_member(ri->left_relids)); + ji->min_righthand = bms_add_member(ji->min_righthand, bms_first_member(ri->right_relids)); + + ji->syn_lefthand = bms_add_members(ji->min_lefthand, ri->left_relids); + ji->syn_righthand = bms_add_members(ji->min_righthand, ri->right_relids); + ji->jointype = JOIN_LEFT; + ji->lhs_strict = 1; + + root.join_info_list = lappend(root.join_info_list, ji); + } + root.planner_cxt = CurrentMemoryContext; for (int i = 0; i < rels->length; i++) { @@ -262,10 +349,16 @@ RelOptInfo* TPgOptimizer::JoinSearchInternal() { root.simple_rel_array[relno]->eclass_indexes = bms_add_member( root.simple_rel_array[relno]->eclass_indexes, eqId); - root.simple_rel_array[relno]->has_eclass_joins = true; } } + for (int i = 0; i < rels->length; i++) { + root.simple_rel_array[i+1]->has_eclass_joins = bms_num_members(root.simple_rel_array[i+1]->eclass_indexes) > 1; + } + root.ec_merging_done = 1; + + LogNode("Context: ", &root); + auto* result = standard_join_search(&root, rels->length, rels); LogNode("Result: ", result); return result; diff --git a/ydb/library/yql/sql/pg/optimizer.h b/ydb/library/yql/sql/pg/optimizer.h index c2e9748fec..71eee7d350 100644 --- a/ydb/library/yql/sql/pg/optimizer.h +++ b/ydb/library/yql/sql/pg/optimizer.h @@ -34,7 +34,7 @@ private: }; // export for tests -Var* MakeVar(int varno, int relno); +Var* MakeVar(int relno, int varno); RelOptInfo* MakeRelOptInfo(const IOptimizer::TRel& r, int relno); List* MakeRelOptInfoList(const IOptimizer::TInput& input); diff --git a/ydb/library/yql/sql/pg/optimizer_ut.cpp b/ydb/library/yql/sql/pg/optimizer_ut.cpp index 554c7df15e..1b240146d5 100644 --- a/ydb/library/yql/sql/pg/optimizer_ut.cpp +++ b/ydb/library/yql/sql/pg/optimizer_ut.cpp @@ -51,6 +51,50 @@ Y_UNIT_TEST(PgJoinSearch2Rels) { UNIT_ASSERT_STRINGS_EQUAL(expected, resStr); } +Y_UNIT_TEST(PgJoinSearch2RelsLeft) { + IOptimizer::TRel rel1 = {100000, 1000000, {{'a'}}}; + IOptimizer::TRel rel2 = {1000000, 9000009, {{'b'}}}; + IOptimizer::TInput input = {{rel1, rel2}}; + + input.EqClasses.emplace_back(IOptimizer::TEq { + {{1, 1}} + }); + input.EqClasses.emplace_back(IOptimizer::TEq { + {{2, 1}} + }); + input.Left.emplace_back( + IOptimizer::TEq { + {{1, 1}, {2, 1}} + } + ); + + auto log = [](const TString& str) { + Cerr << str << "\n"; + }; + + auto optimizer = std::unique_ptr<IOptimizer>(MakePgOptimizer(input, log)); + + auto res = optimizer->JoinSearch(); + auto resStr = res.ToString(); + Cerr << resStr; + TString expected = R"__({ + Left Join + Loop Strategy + Rels: [1,2] + Op: a = b + { + Node + Rels: [1] + } + { + Node + Rels: [2] + } +} +)__"; + UNIT_ASSERT_STRINGS_EQUAL(expected, resStr); +} + Y_UNIT_TEST(PgJoinSearch3Rels) { IOptimizer::TRel rel1 = {100000, 1000000, {{'a'}}}; IOptimizer::TRel rel2 = {1000000, 9000009, {{'b'}}}; |