aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoraozeritsky <aozeritsky@ydb.tech>2023-08-14 12:37:38 +0300
committeraozeritsky <aozeritsky@ydb.tech>2023-08-14 14:00:09 +0300
commit4caad3092762e92b7d0216e2f9b9b4a0f13b389c (patch)
tree4cf55daf3a6ca61aff92b921d6975c5a2b50c79b
parent30d989f97b9f0b7404b394e698fc515adab34afc (diff)
downloadydb-4caad3092762e92b7d0216e2f9b9b4a0f13b389c.tar.gz
Support left join in pg optimizer wrapper
-rw-r--r--ydb/library/yql/core/cbo/cbo_optimizer.cpp2
-rw-r--r--ydb/library/yql/core/cbo/cbo_optimizer.h5
-rw-r--r--ydb/library/yql/sql/pg/optimizer.cpp115
-rw-r--r--ydb/library/yql/sql/pg/optimizer.h2
-rw-r--r--ydb/library/yql/sql/pg/optimizer_ut.cpp44
5 files changed, 155 insertions, 13 deletions
diff --git a/ydb/library/yql/core/cbo/cbo_optimizer.cpp b/ydb/library/yql/core/cbo/cbo_optimizer.cpp
index 0008e45d3c..13d0a48b1a 100644
--- a/ydb/library/yql/core/cbo/cbo_optimizer.cpp
+++ b/ydb/library/yql/core/cbo/cbo_optimizer.cpp
@@ -45,6 +45,8 @@ void PrettyPrintNode(int level, TStringBuilder& b, const IOptimizer::TOutput& ou
switch (node.Mode) {
case IOptimizer::EJoinType::Unknown: b << prefix << " Node\n"; break;
case IOptimizer::EJoinType::Inner: b << prefix << " Inner Join\n"; break;
+ case IOptimizer::EJoinType::Left: b << prefix << " Left Join\n"; break;
+ case IOptimizer::EJoinType::Right: b << prefix << " Right Join\n"; break;
default: b << prefix << " Unknown\n"; break;
}
switch (node.Strategy) {
diff --git a/ydb/library/yql/core/cbo/cbo_optimizer.h b/ydb/library/yql/core/cbo/cbo_optimizer.h
index 4cd7884b11..2d2821c81d 100644
--- a/ydb/library/yql/core/cbo/cbo_optimizer.h
+++ b/ydb/library/yql/core/cbo/cbo_optimizer.h
@@ -28,6 +28,7 @@ struct IOptimizer {
struct TInput {
std::vector<TRel> Rels;
std::vector<TEq> EqClasses;
+ std::vector<TEq> Left;
TString ToString() const;
void Normalize();
@@ -35,7 +36,9 @@ struct IOptimizer {
enum class EJoinType {
Unknown,
- Inner
+ Inner,
+ Left,
+ Right,
};
enum class EJoinStrategy {
diff --git a/ydb/library/yql/sql/pg/optimizer.cpp b/ydb/library/yql/sql/pg/optimizer.cpp
index 5cd4e8bac6..a2e8ff0954 100644
--- a/ydb/library/yql/sql/pg/optimizer.cpp
+++ b/ydb/library/yql/sql/pg/optimizer.cpp
@@ -44,7 +44,7 @@ bool RelationStatsHook(
} // namespace
-Var* MakeVar(int varno, int relno) {
+Var* MakeVar(int relno, int varno) {
Var* v = makeNode(Var);
v->varno = relno; // table number
v->varattno = varno; // column number in table
@@ -72,7 +72,7 @@ RelOptInfo* MakeRelOptInfo(const IOptimizer::TRel& r, int relno) {
PathTarget* t = makeNode(PathTarget);
int maxattno = 0;
for (int i = 0; i < (int)r.TargetVars.size(); i++) {
- t->exprs = lappend(t->exprs, MakeVar(i+1, relno));
+ t->exprs = lappend(t->exprs, MakeVar(relno, i+1));
maxattno = i+1;
}
t->width = 8;
@@ -136,7 +136,7 @@ EquivalenceClass* TPgOptimizer::MakeEqClass(int i) {
for (auto [relno, varno] : Input.EqClasses[i].Vars) {
EquivalenceMember* m = makeNode(EquivalenceMember);
- m->em_expr = (Expr*)MakeVar(std::make_tuple(varno, relno));
+ m->em_expr = (Expr*)MakeVar(TVarId{relno, varno});
m->em_relids = bms_add_member(nullptr, relno);
m->em_datatype = 20;
eq->ec_opfamilies = list_make1_oid(1976);
@@ -185,7 +185,6 @@ int TPgOptimizer::MakeOutputJoin(TOutput& output, Path* path) {
}
if (path->type != T_Path) {
- node.Mode = EJoinType::Inner;
node.Strategy = EJoinStrategy::Unknown;
if (path->type == T_HashPath) {
node.Strategy = EJoinStrategy::Hash;
@@ -196,14 +195,44 @@ int TPgOptimizer::MakeOutputJoin(TOutput& output, Path* path) {
}
JoinPath* jpath = (JoinPath*)path;
+ switch (jpath->jointype) {
+ case JOIN_INNER:
+ node.Mode = EJoinType::Inner;
+ break;
+ case JOIN_LEFT:
+ node.Mode = EJoinType::Left;
+ break;
+ case JOIN_RIGHT:
+ node.Mode = EJoinType::Right;
+ break;
+ default:
+ YQL_ENSURE(false, "Unsupported join type");
+ break;
+ }
YQL_ENSURE(list_length(jpath->joinrestrictinfo) == 1, "Unsupported joinrestrictinfo len");
RestrictInfo* rinfo = (RestrictInfo*)jpath->joinrestrictinfo->elements[0].ptr_value;
- YQL_ENSURE(rinfo->left_em->em_expr->type == T_Var, "Unsupported left em type");
- YQL_ENSURE(rinfo->right_em->em_expr->type == T_Var, "Unsupported right em type");
-
- Var* left = (Var*)rinfo->left_em->em_expr;
- Var* right = (Var*)rinfo->right_em->em_expr;
+ Var* left;
+ Var* right;
+
+ if (jpath->jointype == JOIN_INNER) {
+ YQL_ENSURE(rinfo->left_em->em_expr->type == T_Var, "Unsupported left em type");
+ YQL_ENSURE(rinfo->right_em->em_expr->type == T_Var, "Unsupported right em type");
+
+ left = (Var*)rinfo->left_em->em_expr;
+ right = (Var*)rinfo->right_em->em_expr;
+ } else if (jpath->jointype == JOIN_LEFT || jpath->jointype == JOIN_RIGHT) {
+ YQL_ENSURE(rinfo->clause->type == T_OpExpr);
+ OpExpr* expr = (OpExpr*)rinfo->clause;
+ YQL_ENSURE(list_length(expr->args) == 2);
+ Expr* a1 = (Expr*)list_nth(expr->args, 0);
+ Expr* a2 = (Expr*)list_nth(expr->args, 1);
+ YQL_ENSURE(a1->type == T_Var, "Unsupported left arg type");
+ YQL_ENSURE(a2->type == T_Var, "Unsupported right arg type");
+
+ left = (Var*)a1;
+ right = (Var*)a2;
+ }
node.LeftVar = std::make_tuple(left->varno, left->varattno);
node.RightVar = std::make_tuple(right->varno, right->varattno);
@@ -222,9 +251,50 @@ int TPgOptimizer::MakeOutputJoin(TOutput& output, Path* path) {
}
RelOptInfo* TPgOptimizer::JoinSearchInternal() {
+ std::vector<std::vector<RestrictInfo*>> restrictInfos(Input.Rels.size()+1);
+ std::vector<RestrictInfo*> allRestriction; allRestriction.reserve(Input.Left.size());
+ for (const auto& eq : Input.Left) {
+ YQL_ENSURE(eq.Vars.size() == 2);
+ RestrictInfo* ri = makeNode(RestrictInfo);
+ ri->can_join = 1;
+ ri->norm_selec = -1;
+ ri->outer_selec = -1;
+
+ OpExpr* oe = makeNode(OpExpr);
+ oe->opno = 410;
+ oe->opfuncid = 467;
+ oe->opresulttype = 16;
+ ri->clause = (Expr*)oe;
+
+ bool left = true;
+ for (const auto [relId, varId] : eq.Vars) {
+ ri->required_relids = bms_add_member(ri->required_relids, relId);
+ ri->clause_relids = bms_add_member(ri->clause_relids, relId);
+ if (left) {
+ ri->outer_relids = bms_add_member(nullptr, relId);
+ ri->left_relids = bms_add_member(nullptr, relId);
+ left = false;
+ } else {
+ ri->right_relids = bms_add_member(nullptr, relId);
+ }
+ oe->args = lappend(oe->args, MakeVar(TVarId{relId, varId}));
+
+ restrictInfos[relId].emplace_back(ri);
+ }
+ allRestriction.emplace_back(ri);
+ }
+
List* rels = MakeRelOptInfoList(Input);
ListCell* l;
+ int relId = 1;
+ foreach (l, rels) {
+ RelOptInfo* rel = (RelOptInfo*)lfirst(l);
+ for (auto* ri : restrictInfos[relId++]) {
+ rel->joininfo = lappend(rel->joininfo, ri);
+ }
+ }
+
if (Log) {
int i = 1;
foreach (l, rels) {
@@ -247,9 +317,26 @@ RelOptInfo* TPgOptimizer::JoinSearchInternal() {
root.simple_rte_array[i] = makeNode(RangeTblEntry);
root.simple_rte_array[i]->rtekind = RTE_RELATION;
}
- root.all_baserels = bms_add_range(nullptr, 1, rels->length+1);
+ root.all_baserels = bms_add_range(nullptr, 1, rels->length);
root.eq_classes = MakeEqClasses();
+ for (auto* ri : allRestriction) {
+ root.left_join_clauses = lappend(root.left_join_clauses, ri);
+ root.hasJoinRTEs = 1;
+ root.nullable_baserels = bms_add_members(root.nullable_baserels, ri->right_relids);
+
+ SpecialJoinInfo* ji = makeNode(SpecialJoinInfo);
+ ji->min_lefthand = bms_add_member(ji->min_lefthand, bms_first_member(ri->left_relids));
+ ji->min_righthand = bms_add_member(ji->min_righthand, bms_first_member(ri->right_relids));
+
+ ji->syn_lefthand = bms_add_members(ji->min_lefthand, ri->left_relids);
+ ji->syn_righthand = bms_add_members(ji->min_righthand, ri->right_relids);
+ ji->jointype = JOIN_LEFT;
+ ji->lhs_strict = 1;
+
+ root.join_info_list = lappend(root.join_info_list, ji);
+ }
+
root.planner_cxt = CurrentMemoryContext;
for (int i = 0; i < rels->length; i++) {
@@ -262,10 +349,16 @@ RelOptInfo* TPgOptimizer::JoinSearchInternal() {
root.simple_rel_array[relno]->eclass_indexes = bms_add_member(
root.simple_rel_array[relno]->eclass_indexes,
eqId);
- root.simple_rel_array[relno]->has_eclass_joins = true;
}
}
+ for (int i = 0; i < rels->length; i++) {
+ root.simple_rel_array[i+1]->has_eclass_joins = bms_num_members(root.simple_rel_array[i+1]->eclass_indexes) > 1;
+ }
+ root.ec_merging_done = 1;
+
+ LogNode("Context: ", &root);
+
auto* result = standard_join_search(&root, rels->length, rels);
LogNode("Result: ", result);
return result;
diff --git a/ydb/library/yql/sql/pg/optimizer.h b/ydb/library/yql/sql/pg/optimizer.h
index c2e9748fec..71eee7d350 100644
--- a/ydb/library/yql/sql/pg/optimizer.h
+++ b/ydb/library/yql/sql/pg/optimizer.h
@@ -34,7 +34,7 @@ private:
};
// export for tests
-Var* MakeVar(int varno, int relno);
+Var* MakeVar(int relno, int varno);
RelOptInfo* MakeRelOptInfo(const IOptimizer::TRel& r, int relno);
List* MakeRelOptInfoList(const IOptimizer::TInput& input);
diff --git a/ydb/library/yql/sql/pg/optimizer_ut.cpp b/ydb/library/yql/sql/pg/optimizer_ut.cpp
index 554c7df15e..1b240146d5 100644
--- a/ydb/library/yql/sql/pg/optimizer_ut.cpp
+++ b/ydb/library/yql/sql/pg/optimizer_ut.cpp
@@ -51,6 +51,50 @@ Y_UNIT_TEST(PgJoinSearch2Rels) {
UNIT_ASSERT_STRINGS_EQUAL(expected, resStr);
}
+Y_UNIT_TEST(PgJoinSearch2RelsLeft) {
+ IOptimizer::TRel rel1 = {100000, 1000000, {{'a'}}};
+ IOptimizer::TRel rel2 = {1000000, 9000009, {{'b'}}};
+ IOptimizer::TInput input = {{rel1, rel2}};
+
+ input.EqClasses.emplace_back(IOptimizer::TEq {
+ {{1, 1}}
+ });
+ input.EqClasses.emplace_back(IOptimizer::TEq {
+ {{2, 1}}
+ });
+ input.Left.emplace_back(
+ IOptimizer::TEq {
+ {{1, 1}, {2, 1}}
+ }
+ );
+
+ auto log = [](const TString& str) {
+ Cerr << str << "\n";
+ };
+
+ auto optimizer = std::unique_ptr<IOptimizer>(MakePgOptimizer(input, log));
+
+ auto res = optimizer->JoinSearch();
+ auto resStr = res.ToString();
+ Cerr << resStr;
+ TString expected = R"__({
+ Left Join
+ Loop Strategy
+ Rels: [1,2]
+ Op: a = b
+ {
+ Node
+ Rels: [1]
+ }
+ {
+ Node
+ Rels: [2]
+ }
+}
+)__";
+ UNIT_ASSERT_STRINGS_EQUAL(expected, resStr);
+}
+
Y_UNIT_TEST(PgJoinSearch3Rels) {
IOptimizer::TRel rel1 = {100000, 1000000, {{'a'}}};
IOptimizer::TRel rel2 = {1000000, 9000009, {{'b'}}};