summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTony-Romanov <[email protected]>2024-09-24 08:11:32 +0200
committerGitHub <[email protected]>2024-09-24 09:11:32 +0300
commitd2b896d3c5ddf1371fa3d332244ea5b7684e8f17 (patch)
tree57803a6b09a5b03372ba039c7348e60355b35ff3
parentf3bb311b382b307918adba2f34c600b6e21ca361 (diff)
Don't lose 'any' flag after CBO. (#8674)
-rw-r--r--ydb/core/kqp/ut/join/data/queries/any_join.sql0
-rw-r--r--ydb/core/kqp/ut/join/kqp_join_ut.cpp2
-rw-r--r--ydb/library/yql/core/cbo/cbo_optimizer_new.cpp45
-rw-r--r--ydb/library/yql/core/cbo/cbo_optimizer_new.h9
-rw-r--r--ydb/library/yql/dq/opt/dq_cbo_ut.cpp24
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_conflict_rules_collector.h24
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h14
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp139
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp41
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h15
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp6
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_join_tree_node.h10
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h10
-rw-r--r--ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp2
-rw-r--r--ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp5
-rw-r--r--ydb/library/yql/sql/pg/optimizer.cpp4
16 files changed, 289 insertions, 61 deletions
diff --git a/ydb/core/kqp/ut/join/data/queries/any_join.sql b/ydb/core/kqp/ut/join/data/queries/any_join.sql
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/ydb/core/kqp/ut/join/data/queries/any_join.sql
diff --git a/ydb/core/kqp/ut/join/kqp_join_ut.cpp b/ydb/core/kqp/ut/join/kqp_join_ut.cpp
index 5216a9a51ec..15efc66c9c8 100644
--- a/ydb/core/kqp/ut/join/kqp_join_ut.cpp
+++ b/ydb/core/kqp/ut/join/kqp_join_ut.cpp
@@ -1892,7 +1892,7 @@ Y_UNIT_TEST_SUITE(KqpJoin) {
UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString());
Cout << FormatResultSetYson(result.GetResultSet(0));
CompareYson(R"(
- [[["02"];#;["02"];["03"];#;["03"];["1"];#;["1"]];[["02"];#;["02"];["03"];#;["04"];["1"];#;["1"]];[["02"];#;["02"];["05"];#;["05"];["2"];#;["2"]];[["02"];#;["02"];["05"];#;["06"];["2"];#;["2"]];[["02"];#;["02"];["06"];#;["05"];["2"];#;["2"]];[["02"];#;["02"];["06"];#;["06"];["2"];#;["2"]];[["03"];["03"];["03"];["08"];["02"];["07"];["1"];["1"];["1"]];[["03"];["03"];["03"];["09"];["03"];["08"];["2"];["2"];["2"]];[["09"];#;["09"];["20"];#;["09"];["1"];#;["1"]];[["09"];#;["09"];["21"];#;["10"];["2"];#;["2"]]]
+ [[["02"];#;["02"];["03"];#;["03"];["1"];#;["1"]];[["02"];#;["02"];["05"];#;["05"];["2"];#;["2"]];[["02"];#;["02"];["06"];#;["05"];["2"];#;["2"]];[["03"];["03"];["03"];["08"];["02"];["07"];["1"];["1"];["1"]];[["03"];["03"];["03"];["09"];["03"];["08"];["2"];["2"];["2"]];[["09"];#;["09"];["20"];#;["09"];["1"];#;["1"]];[["09"];#;["09"];["21"];#;["10"];["2"];#;["2"]]]
)", FormatResultSetYson(result.GetResultSet(0)));
}
}
diff --git a/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp b/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp
index ea9d5d6f6e1..0bfda45956e 100644
--- a/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp
+++ b/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp
@@ -74,20 +74,30 @@ void TRelOptimizerNode::Print(std::stringstream& stream, int ntabs) {
stream << *Stats << "\n";
}
-TJoinOptimizerNode::TJoinOptimizerNode(const std::shared_ptr<IBaseOptimizerNode>& left, const std::shared_ptr<IBaseOptimizerNode>& right,
- const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions, const EJoinKind joinType, const EJoinAlgoType joinAlgo, bool nonReorderable) :
- IBaseOptimizerNode(JoinNodeType),
- LeftArg(left),
- RightArg(right),
- JoinConditions(joinConditions),
- JoinType(joinType),
- JoinAlgo(joinAlgo) {
- IsReorderable = !nonReorderable;
- for (auto [l,r] : joinConditions ) {
- LeftJoinKeys.push_back(l.AttributeName);
- RightJoinKeys.push_back(r.AttributeName);
- }
+TJoinOptimizerNode::TJoinOptimizerNode(
+ const std::shared_ptr<IBaseOptimizerNode>& left,
+ const std::shared_ptr<IBaseOptimizerNode>& right,
+ const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions,
+ const EJoinKind joinType,
+ const EJoinAlgoType joinAlgo,
+ bool leftAny,
+ bool rightAny,
+ bool nonReorderable
+) : IBaseOptimizerNode(JoinNodeType)
+ , LeftArg(left)
+ , RightArg(right)
+ , JoinConditions(joinConditions)
+ , JoinType(joinType)
+ , JoinAlgo(joinAlgo)
+ , LeftAny(leftAny)
+ , RightAny(rightAny)
+ , IsReorderable(!nonReorderable)
+{
+ for (const auto& [l,r] : joinConditions ) {
+ LeftJoinKeys.push_back(l.AttributeName);
+ RightJoinKeys.push_back(r.AttributeName);
}
+}
TVector<TString> TJoinOptimizerNode::Labels() {
auto res = LeftArg->Labels();
@@ -101,7 +111,14 @@ void TJoinOptimizerNode::Print(std::stringstream& stream, int ntabs) {
stream << " ";
}
- stream << "Join: (" << ToString(JoinType) << "," << ToString(JoinAlgo) << ") ";
+ stream << "Join: (" << ToString(JoinType) << "," << ToString(JoinAlgo);
+ if (LeftAny) {
+ stream << ",LeftAny";
+ }
+ if (RightAny) {
+ stream << ",RightAny";
+ }
+ stream << ") ";
for (auto c : JoinConditions){
stream << c.first.RelName << "." << c.first.AttributeName
diff --git a/ydb/library/yql/core/cbo/cbo_optimizer_new.h b/ydb/library/yql/core/cbo/cbo_optimizer_new.h
index cdb143eaa88..f40a45b55fa 100644
--- a/ydb/library/yql/core/cbo/cbo_optimizer_new.h
+++ b/ydb/library/yql/core/cbo/cbo_optimizer_new.h
@@ -295,6 +295,10 @@ struct TJoinOptimizerNode : public IBaseOptimizerNode {
TVector<TString> RightJoinKeys;
EJoinKind JoinType;
EJoinAlgoType JoinAlgo;
+ /////////////////// 'ANY' flag means leaving only one row from the join side.
+ bool LeftAny;
+ bool RightAny;
+ ///////////////////
bool IsReorderable;
TJoinOptimizerNode(const std::shared_ptr<IBaseOptimizerNode>& left,
@@ -302,7 +306,10 @@ struct TJoinOptimizerNode : public IBaseOptimizerNode {
const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
const EJoinKind joinType,
const EJoinAlgoType joinAlgo,
- bool nonReorderable=false);
+ bool leftAny,
+ bool rightAny,
+ bool nonReorderable = false
+ );
virtual ~TJoinOptimizerNode() {}
virtual TVector<TString> Labels();
virtual void Print(std::stringstream& stream, int ntabs=0);
diff --git a/ydb/library/yql/dq/opt/dq_cbo_ut.cpp b/ydb/library/yql/dq/opt/dq_cbo_ut.cpp
index ef3663626b9..3e088a59ded 100644
--- a/ydb/library/yql/dq/opt/dq_cbo_ut.cpp
+++ b/ydb/library/yql/dq/opt/dq_cbo_ut.cpp
@@ -55,13 +55,16 @@ Y_UNIT_TEST(JoinSearch2Rels) {
std::static_pointer_cast<IBaseOptimizerNode>(rel2),
joinConditions,
InnerJoin,
- EJoinAlgoType::GraceJoin
+ EJoinAlgoType::GraceJoin,
+ true,
+ false
);
auto res = optimizer->JoinSearch(op);
std::stringstream ss;
res->Print(ss);
- TString expected = R"__(Join: (InnerJoin,MapJoin) b.1=a.1,
+ Cout << ss.str() << '\n';
+ TString expected = R"__(Join: (InnerJoin,MapJoin,RightAny) b.1=a.1,
Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10, Sel: 1, Storage: NA
Rel: b
Type: BaseTable, Nrows: 1e+06, Ncols: 1, ByteSize: 0, Cost: 9.00001e+06, Sel: 1, Storage: NA
@@ -93,8 +96,10 @@ Y_UNIT_TEST(JoinSearch3Rels) {
std::static_pointer_cast<IBaseOptimizerNode>(rel2),
joinConditions,
InnerJoin,
- EJoinAlgoType::GraceJoin
- );
+ EJoinAlgoType::GraceJoin,
+ false,
+ false
+ );
joinConditions.insert({
NDq::TJoinColumn("a", "1"),
@@ -106,14 +111,17 @@ Y_UNIT_TEST(JoinSearch3Rels) {
std::static_pointer_cast<IBaseOptimizerNode>(rel3),
joinConditions,
InnerJoin,
- EJoinAlgoType::GraceJoin
- );
+ EJoinAlgoType::GraceJoin,
+ true,
+ false
+ );
auto res = optimizer->JoinSearch(op2);
std::stringstream ss;
res->Print(ss);
+ Cout << ss.str() << '\n';
- TString expected = R"__(Join: (InnerJoin,MapJoin) a.1=b.1,a.1=c.1,
+ TString expected = R"__(Join: (InnerJoin,MapJoin,LeftAny) a.1=b.1,a.1=c.1,
Type: ManyManyJoin, Nrows: 4e+13, Ncols: 3, ByteSize: 0, Cost: 4.004e+13, Sel: 1, Storage: NA
Join: (InnerJoin,MapJoin) b.1=a.1,
Type: ManyManyJoin, Nrows: 2e+10, Ncols: 2, ByteSize: 0, Cost: 2.00112e+10, Sel: 1, Storage: NA
@@ -223,7 +231,7 @@ void _DqOptimizeEquiJoinWithCosts(const std::function<IOptimizerNew*()>& optFact
UNIT_ASSERT(equiJoin.Maybe<TCoEquiJoin>());
auto resStr = NCommon::ExprToPrettyString(ctx, *res.Ptr());
auto expected = R"__((
-(let $1 '('"Inner" '"orders" '"customer" '('"orders" '"a") '('"customer" '"b") '('('"join_algo" '"MapJoin"))))
+(let $1 '('"Inner" '"orders" '"customer" '('"orders" '"a") '('"customer" '"b") '('('join_algo 'MapJoin))))
(return (EquiJoin '('() '"orders") '('() '"customer") $1 '()))
)
)__";
diff --git a/ydb/library/yql/dq/opt/dq_opt_conflict_rules_collector.h b/ydb/library/yql/dq/opt/dq_opt_conflict_rules_collector.h
index 9cbe532e926..4c4c20a9c71 100644
--- a/ydb/library/yql/dq/opt/dq_opt_conflict_rules_collector.h
+++ b/ydb/library/yql/dq/opt/dq_opt_conflict_rules_collector.h
@@ -57,14 +57,14 @@ public:
private:
auto GetLeftConflictsVisitor() {
auto visitor = [this](const std::shared_ptr<TJoinOptimizerNode>& child) {
- if (!OperatorsAreAssociative(child->JoinType, Root_->JoinType) || !Root_->IsReorderable || !child->IsReorderable) {
+ if (!OperatorsAreAssociative(child->JoinType, Root_->JoinType)) {
ConflictRules_.emplace_back(
SubtreeNodes_[child->RightArg],
SubtreeNodes_[child->LeftArg]
);
}
- if (!OperatorsAreLeftAsscom(child->JoinType, Root_->JoinType) || !Root_->IsReorderable || !child->IsReorderable) {
+ if (!OperatorsAreLeftAsscom(child->JoinType, Root_->JoinType)) {
ConflictRules_.emplace_back(
SubtreeNodes_[child->LeftArg],
SubtreeNodes_[child->RightArg]
@@ -77,18 +77,18 @@ private:
auto GetRightConflictsVisitor() {
auto visitor = [this](const std::shared_ptr<TJoinOptimizerNode>& child) {
- if (!OperatorsAreAssociative(Root_->JoinType, child->JoinType) || !Root_->IsReorderable || !child->IsReorderable) {
+ if (!OperatorsAreAssociative(Root_->JoinType, child->JoinType)) {
ConflictRules_.emplace_back(
SubtreeNodes_[child->LeftArg],
SubtreeNodes_[child->RightArg]
);
}
- if (!OperatorsAreRightAsscom(Root_->JoinType, child->JoinType) || !Root_->IsReorderable || !child->IsReorderable) {
+ if (!OperatorsAreRightAsscom(Root_->JoinType, child->JoinType)) {
ConflictRules_.emplace_back(
SubtreeNodes_[child->RightArg],
SubtreeNodes_[child->LeftArg]
- );
+ );
}
};
@@ -106,6 +106,20 @@ private:
VisitJoinTree(childJoinNode->LeftArg, visitor);
VisitJoinTree(childJoinNode->RightArg, visitor);
+ if (childJoinNode->LeftAny || !childJoinNode->IsReorderable) {
+ ConflictRules_.emplace_back(
+ SubtreeNodes_[childJoinNode->LeftArg],
+ SubtreeNodes_[childJoinNode->RightArg]
+ );
+ }
+
+ if (childJoinNode->RightAny || !childJoinNode->IsReorderable) {
+ ConflictRules_.emplace_back(
+ SubtreeNodes_[childJoinNode->RightArg],
+ SubtreeNodes_[childJoinNode->LeftArg]
+ );
+ }
+
visitor(childJoinNode);
}
diff --git a/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h b/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h
index cb95d2e45ae..142e97bab4f 100644
--- a/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h
+++ b/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h
@@ -81,6 +81,8 @@ private:
const std::shared_ptr<IBaseOptimizerNode>& left,
const std::shared_ptr<IBaseOptimizerNode>& right,
EJoinKind joinKind,
+ bool leftAny,
+ bool rightAny,
bool isCommutative,
const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions,
const std::set<std::pair<TJoinColumn, TJoinColumn>>& reversedJoinConditions,
@@ -409,6 +411,8 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS
const std::shared_ptr<IBaseOptimizerNode>& left,
const std::shared_ptr<IBaseOptimizerNode>& right,
EJoinKind joinKind,
+ bool leftAny,
+ bool rightAny,
bool isCommutative,
const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions,
const std::set<std::pair<TJoinColumn, TJoinColumn>>& reversedJoinConditions,
@@ -419,7 +423,7 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS
TJoinAlgoHints::TJoinAlgoHint* maybeJoinHint
) {
double bestCost = std::numeric_limits<double>::infinity();
- EJoinAlgoType bestAlgo{};
+ EJoinAlgoType bestAlgo = EJoinAlgoType::Undefined;
bool bestJoinIsReversed = false;
for (auto joinAlgo : AllJoinAlgos) {
@@ -452,13 +456,13 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS
}
}
- Y_ENSURE(bestCost != std::numeric_limits<double>::infinity(), "No join was chosen!");
+ Y_ENSURE(bestAlgo != EJoinAlgoType::Undefined, "No join was chosen!");
if (bestJoinIsReversed) {
- return MakeJoinInternal(right, left, reversedJoinConditions, rightJoinKeys, leftJoinKeys, joinKind, bestAlgo, ctx, maybeCardHint);
+ return MakeJoinInternal(right, left, reversedJoinConditions, rightJoinKeys, leftJoinKeys, joinKind, bestAlgo, rightAny, leftAny, ctx, maybeCardHint);
}
- return MakeJoinInternal(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, bestAlgo, ctx, maybeCardHint);
+ return MakeJoinInternal(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, bestAlgo, leftAny, rightAny, ctx, maybeCardHint);
}
/*
@@ -489,6 +493,8 @@ template<typename TNodeSet> void TDPHypSolver<TNodeSet>::EmitCsgCmp(const TNodeS
leftNodes,
rightNodes,
csgCmpEdge->JoinKind,
+ csgCmpEdge->LeftAny,
+ csgCmpEdge->RightAny,
csgCmpEdge->IsCommutative,
csgCmpEdge->JoinConditions,
reversedEdge->JoinConditions,
diff --git a/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp b/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp
index ccbb1faf2d3..75c234f813f 100644
--- a/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp
+++ b/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp
@@ -3,31 +3,47 @@
#include "dq_opt_log.h"
#include "dq_opt_join.h"
+#include <util/string/split.h>
+
#include "dq_opt_make_join_hypergraph.h"
+#include "dq_opt_log.h"
using namespace NYql;
using namespace NNodes;
using namespace NYql::NDq;
std::shared_ptr<IBaseOptimizerNode> CreateChain(size_t size, TString onAttribute, TString tablePrefix="e") {
- std::shared_ptr<IBaseOptimizerNode> root = std::make_shared<TRelOptimizerNode>(tablePrefix + "1", nullptr);
+ std::shared_ptr<IBaseOptimizerNode> root = std::make_shared<TRelOptimizerNode>(tablePrefix + "1", std::make_shared<TOptimizerStatistics>());
for (size_t i = 1; i < size; ++i) {
auto eiStr = tablePrefix + ToString(i + 1);
auto eiPrevStr = tablePrefix + ToString(i);
- auto ei = std::make_shared<TRelOptimizerNode>(eiStr, nullptr);
+ auto ei = std::make_shared<TRelOptimizerNode>(eiStr, std::make_shared<TOptimizerStatistics>());
std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>> joinConditions;
joinConditions.insert({TJoinColumn(eiPrevStr, onAttribute), TJoinColumn(eiStr, onAttribute)});
root = std::make_shared<TJoinOptimizerNode>(
- root, ei, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined
+ root, ei, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false
);
}
return root;
}
+
+std::shared_ptr<IBaseOptimizerNode> Enumerate(const std::shared_ptr<IBaseOptimizerNode>& root) {
+ auto ctx = TBaseProviderContext();
+ auto optimizer = MakeNativeOptimizerNew(ctx, std::numeric_limits<ui32>::max());
+
+ Y_ENSURE(root->Kind == EOptimizerNodeKind::JoinNodeType);
+ auto res = optimizer->JoinSearch(std::static_pointer_cast<TJoinOptimizerNode>(root));
+ Cout << "Optimized Tree:" << Endl;
+ std::stringstream ss; res->Print(ss);
+ Cout << ss.str() << Endl;
+ return res;
+}
+
Y_UNIT_TEST_SUITE(HypergraphBuild) {
using TNodeSet = std::bitset<64>;
@@ -57,6 +73,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
UNIT_ASSERT(graph.GetEdges().size() == 6);
CheckClique(graph);
+ Enumerate(root);
}
Y_UNIT_TEST(SimpleChain4NodesTransitiveClosure) {
@@ -66,6 +83,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
UNIT_ASSERT(graph.GetEdges().size() == 12);
CheckClique(graph);
+ Enumerate(root);
}
Y_UNIT_TEST(SimpleChain5NodesTransitiveClosure) {
@@ -75,6 +93,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
UNIT_ASSERT(graph.GetEdges().size() == 20);
CheckClique(graph);
+ Enumerate(root);
}
Y_UNIT_TEST(ComplexTransitiveClosure) {
@@ -86,7 +105,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
// a1 --228-- a2 --228-- a3 --1337-- b1 --1337-- b2
auto root = std::make_shared<TJoinOptimizerNode>(
- lhs, rhs, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined
+ lhs, rhs, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false
);
joinConditions.clear();
@@ -97,7 +116,7 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
// a1 --228-- a2 --228-- a3 --1337-- b1 --1337-- b2 --123-- c1 --228-- c2
// ^ we don't want to have transitive closure between c and a
root = std::make_shared<TJoinOptimizerNode>(
- root, rhs, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined
+ root, rhs, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false
);
auto graph = MakeJoinHypergraph<TNodeSet>(root);
@@ -125,4 +144,114 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
UNIT_ASSERT(!graph.FindEdgeBetween(c2, a2));
UNIT_ASSERT(!graph.FindEdgeBetween(c2, a3));
}
+
+ template <typename TJoinArg>
+ std::shared_ptr<IBaseOptimizerNode> GetJoinArg(const TJoinArg& joinArg) {
+ if constexpr (std::is_same_v<TJoinArg, std::shared_ptr<IBaseOptimizerNode>>) {
+ return joinArg;
+ } else if (std::is_convertible_v<TJoinArg, std::string>) {
+ std::shared_ptr<IBaseOptimizerNode> root = std::make_shared<TRelOptimizerNode>(joinArg, std::make_shared<TOptimizerStatistics>());
+ return root;
+ } else {
+ static_assert(std::is_convertible_v<TJoinArg, std::string> ||
+ std::is_same_v<TJoinArg, std::shared_ptr<IBaseOptimizerNode>>,
+ "Args of join must be either Join or TString, for example: Join(Join('A', 'B'), 'C')");
+ }
+
+ Y_UNREACHABLE();
+ }
+
+
+ template<typename TLhsArg, typename TRhsArg>
+ std::shared_ptr<IBaseOptimizerNode> Join(const TLhsArg& lhsArg, const TRhsArg& rhsArg, TString on="", TString onAttr="") {
+ if constexpr (std::is_convertible_v<TLhsArg, std::string> && std::is_convertible_v<TRhsArg, std::string>) {
+ on = Sprintf("%s,%s", lhsArg, rhsArg);
+ }
+
+ if (on.empty()) {
+ throw std::invalid_argument("Bad argument.");
+ }
+
+ std::string lhsCond, rhsCond;
+ Split(on, ",", lhsCond, rhsCond);
+ auto col = onAttr.empty()? ToString(rand()): onAttr;
+ std::shared_ptr<IBaseOptimizerNode> root = std::make_shared<TJoinOptimizerNode>(
+ TJoinOptimizerNode(
+ GetJoinArg(lhsArg),
+ GetJoinArg(rhsArg),
+ {{TJoinColumn(lhsCond.c_str(), col), TJoinColumn(rhsCond.c_str(), col)}},
+ EJoinKind::InnerJoin,
+ EJoinAlgoType::Undefined,
+ false,
+ false
+ )
+ );
+ return root;
+ }
+
+ Y_UNIT_TEST(AnyJoinWithTransitiveClosure) {
+ auto root = Join("A", Join("B", Join("C", "D", "C,D", "id"), "B,C", "id"), "A,B", "id");
+ std::static_pointer_cast<TJoinOptimizerNode>(root)->LeftAny = true;
+
+ auto graph = MakeJoinHypergraph<TNodeSet>(root);
+ Cout << graph.String() << Endl;
+
+ auto A = graph.GetNodesByRelNames({"A"});
+ auto B = graph.GetNodesByRelNames({"B"});
+ auto C = graph.GetNodesByRelNames({"C"});
+ auto D = graph.GetNodesByRelNames({"D"});
+
+ UNIT_ASSERT(graph.FindEdgeBetween(B, D));
+ UNIT_ASSERT(!graph.FindEdgeBetween(A, D));
+ UNIT_ASSERT(!graph.FindEdgeBetween(A, C));
+ }
+
+ Y_UNIT_TEST(AnyJoinConstraints1) {
+ auto anyJoin = Join(Join("A", "B"), "C", /*on=*/ "B,C");
+ std::static_pointer_cast<TJoinOptimizerNode>(anyJoin)->LeftAny = true;
+ auto join = Join(anyJoin, "D", /*on=*/"A,D");
+
+ auto graph = MakeJoinHypergraph<TNodeSet>(join);
+ Cout << graph.String() << Endl;
+ UNIT_ASSERT(graph.GetEdges().size() != graph.GetSimpleEdges().size());
+
+ Enumerate(join);
+ }
+
+
+ Y_UNIT_TEST(AnyJoinConstraints2) {
+ auto anyJoin = Join(Join(Join("A", "B"), "C", /*on=*/ "B,C"), "D", "C,D");
+ std::static_pointer_cast<TJoinOptimizerNode>(anyJoin)->LeftAny = true;
+ auto join = Join(anyJoin, "E", /*on=*/ "A,E");
+
+ auto graph = MakeJoinHypergraph<TNodeSet>(join);
+ Cout << graph.String() << Endl;
+ UNIT_ASSERT(graph.GetEdges().size() != graph.GetSimpleEdges().size());
+
+ Enumerate(join);
+ }
+
+ Y_UNIT_TEST(AnyJoinConstraints3) {
+ auto anyJoin = Join(Join("A", "B"), Join("C", "D"), /*on=*/"B,C");
+ std::static_pointer_cast<TJoinOptimizerNode>(anyJoin)->RightAny = true;
+ auto join = Join(anyJoin, "E", /*on=*/ "C,E");
+
+ auto graph = MakeJoinHypergraph<TNodeSet>(join);
+ Cout << graph.String() << Endl;
+ UNIT_ASSERT(graph.GetEdges().size() != graph.GetSimpleEdges().size());
+
+ Enumerate(join);
+ }
+
+ Y_UNIT_TEST(IsReorderableConstraint) {
+ auto nonReorderable = Join(Join(Join("A", "B"), "C", /*on=*/ "B,C"), "D", "C,D");
+ std::static_pointer_cast<TJoinOptimizerNode>(nonReorderable)->IsReorderable = false;
+ auto join = Join(nonReorderable, "E", /*on=*/ "A,E");
+
+ auto graph = MakeJoinHypergraph<TNodeSet>(join);
+ Cout << graph.String() << Endl;
+ UNIT_ASSERT(graph.GetEdges().size() != graph.GetSimpleEdges().size());
+
+ Enumerate(join);
+ }
}
diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp
index e9e701fda15..32d3fd33f21 100644
--- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp
+++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp
@@ -3,6 +3,7 @@
#include "dq_opt_make_join_hypergraph.h"
#include <ydb/library/yql/core/expr_nodes/yql_expr_nodes.h>
+#include <ydb/library/yql/core/yql_join.h>
#include <ydb/library/yql/dq/opt/dq_opt.h>
#include <ydb/library/yql/utils/log/log.h>
@@ -96,7 +97,9 @@ std::shared_ptr<TJoinOptimizerNode> ConvertToJoinTree(
TJoinColumn(rightScope, rightColumn)));
}
- return std::make_shared<TJoinOptimizerNode>(left, right, joinConds, ConvertToJoinKind(joinTuple.Type().StringValue()), EJoinAlgoType::Undefined);
+ const auto linkSettings = GetEquiJoinLinkSettings(joinTuple.Options().Ref());
+ return std::make_shared<TJoinOptimizerNode>(left, right, joinConds, ConvertToJoinKind(joinTuple.Type().StringValue()), EJoinAlgoType::Undefined,
+ linkSettings.LeftHints.contains("any"), linkSettings.RightHints.contains("any"));
}
/**
@@ -142,14 +145,32 @@ TExprBase BuildTree(TExprContext& ctx, const TCoEquiJoin& equiJoin,
rightJoinColumns.push_back(BuildAtom(pair.second.AttributeName, equiJoin.Pos(), ctx));
}
- auto optionsList = ctx.Builder(equiJoin.Pos())
- .List()
- .List(0)
- .Atom(0, "join_algo")
- .Atom(1, ToString(reorderResult->JoinAlgo))
+ TExprNode::TListType options(1U,
+ ctx.Builder(equiJoin.Pos())
+ .List()
+ .Atom(0, "join_algo", TNodeFlags::Default)
+ .Atom(1, ToString(reorderResult->JoinAlgo), TNodeFlags::Default)
.Seal()
- .Seal()
- .Build();
+ .Build()
+ );
+
+ if (reorderResult->LeftAny) {
+ options.emplace_back(ctx.Builder(equiJoin.Pos())
+ .List()
+ .Atom(0, "left", TNodeFlags::Default)
+ .Atom(1, "any", TNodeFlags::Default)
+ .Seal()
+ .Build());
+ }
+
+ if (reorderResult->RightAny) {
+ options.emplace_back(ctx.Builder(equiJoin.Pos())
+ .List()
+ .Atom(0, "right", TNodeFlags::Default)
+ .Atom(1, "any", TNodeFlags::Default)
+ .Seal()
+ .Build());
+ }
// Build the final output
return Build<TCoEquiJoinTuple>(ctx,equiJoin.Pos())
@@ -162,7 +183,9 @@ TExprBase BuildTree(TExprContext& ctx, const TCoEquiJoin& equiJoin,
.RightKeys()
.Add(rightJoinColumns)
.Build()
- .Options(optionsList)
+ .Options()
+ .Add(std::move(options))
+ .Build()
.Done();
}
diff --git a/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h b/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h
index 9a4fe0a3f49..c6f5be64fb2 100644
--- a/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h
+++ b/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h
@@ -28,12 +28,16 @@ public:
const TNodeSet& left,
const TNodeSet& right,
EJoinKind joinKind,
+ bool leftAny,
+ bool rightAny,
bool isCommutative,
const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions
)
: Left(left)
, Right(right)
, JoinKind(joinKind)
+ , LeftAny(leftAny)
+ , RightAny(rightAny)
, IsCommutative(isCommutative)
, JoinConditions(joinConditions)
, IsReversed(false)
@@ -52,6 +56,7 @@ public:
TNodeSet Left;
TNodeSet Right;
EJoinKind JoinKind;
+ bool LeftAny, RightAny;
bool IsCommutative;
std::set<std::pair<TJoinColumn, TJoinColumn>> JoinConditions;
TVector<TString> LeftJoinKeys;
@@ -97,10 +102,13 @@ public:
TVector<TString> relNameByNodeId(Nodes_.size());
res.append("Nodes: ").append("\n");
for (const auto& [name, idx]: NodeIdByRelationName_) {
- res.append(Sprintf("%ld: %s\n", idx, name.c_str()));
relNameByNodeId[idx] = name;
}
+ for (size_t idx = 0; idx < relNameByNodeId.size(); ++idx) {
+ res.append(Sprintf("%ld: %s\n", idx, relNameByNodeId[idx].c_str()));
+ }
+
res.append("Edges: ").append("\n");
auto edgeSideToString =
@@ -387,7 +395,8 @@ public:
[this](const THyperedge& edge) {
return
edge.IsReversed ||
- !(IsJoinTransitiveClosureSupported(edge.JoinKind) && edge.AreCondVectorEqual());
+ !(IsJoinTransitiveClosureSupported(edge.JoinKind) && edge.AreCondVectorEqual()) ||
+ edge.LeftAny || edge.RightAny;
}
);
@@ -463,7 +472,7 @@ private:
});
}
- auto e = THyperedge(lhs, rhs, groupJoinKind, isJoinCommutative, joinConditions);
+ auto e = THyperedge(lhs, rhs, groupJoinKind, false, false, isJoinCommutative, joinConditions);
Graph_.AddEdge(std::move(e));
}
}
diff --git a/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp b/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp
index 86b36d01d03..5b13ee7cbd6 100644
--- a/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp
+++ b/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp
@@ -10,10 +10,12 @@ std::shared_ptr<TJoinOptimizerNodeInternal> MakeJoinInternal(
const TVector<TString>& rightJoinKeys,
EJoinKind joinKind,
EJoinAlgoType joinAlgo,
+ bool leftAny,
+ bool rightAny,
IProviderContext& ctx,
TCardinalityHints::TCardinalityHint* maybeHint) {
- auto res = std::make_shared<TJoinOptimizerNodeInternal>(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, joinAlgo);
+ auto res = std::make_shared<TJoinOptimizerNodeInternal>(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, joinAlgo, leftAny, rightAny);
res->Stats = std::make_shared<TOptimizerStatistics>(ctx.ComputeJoinStats(*left->Stats, *right->Stats, leftJoinKeys, rightJoinKeys, joinAlgo, joinKind, maybeHint));
return res;
}
@@ -37,7 +39,7 @@ std::shared_ptr<TJoinOptimizerNode> ConvertFromInternal(const std::shared_ptr<IB
right = ConvertFromInternal(right);
}
- auto newJoin = std::make_shared<TJoinOptimizerNode>(left, right, join->JoinConditions, join->JoinType, join->JoinAlgo);
+ auto newJoin = std::make_shared<TJoinOptimizerNode>(left, right, join->JoinConditions, join->JoinType, join->JoinAlgo, join->LeftAny, join->RightAny);
newJoin->Stats = join->Stats;
return newJoin;
}
diff --git a/ydb/library/yql/dq/opt/dq_opt_join_tree_node.h b/ydb/library/yql/dq/opt/dq_opt_join_tree_node.h
index 02ffb1e99db..9e626bc356b 100644
--- a/ydb/library/yql/dq/opt/dq_opt_join_tree_node.h
+++ b/ydb/library/yql/dq/opt/dq_opt_join_tree_node.h
@@ -22,7 +22,9 @@ struct TJoinOptimizerNodeInternal : public IBaseOptimizerNode {
const TVector<TString>& leftJoinKeys,
const TVector<TString>& rightJoinKeys,
const EJoinKind joinType,
- const EJoinAlgoType joinAlgo
+ const EJoinAlgoType joinAlgo,
+ const bool leftAny,
+ const bool rightAny
)
: IBaseOptimizerNode(JoinNodeType)
, LeftArg(left)
@@ -32,6 +34,8 @@ struct TJoinOptimizerNodeInternal : public IBaseOptimizerNode {
, RightJoinKeys(rightJoinKeys)
, JoinType(joinType)
, JoinAlgo(joinAlgo)
+ , LeftAny(leftAny)
+ , RightAny(rightAny)
{}
virtual ~TJoinOptimizerNodeInternal() = default;
@@ -52,6 +56,8 @@ struct TJoinOptimizerNodeInternal : public IBaseOptimizerNode {
const TVector<TString>& RightJoinKeys;
EJoinKind JoinType;
EJoinAlgoType JoinAlgo;
+ const bool LeftAny;
+ const bool RightAny;
};
/**
@@ -65,6 +71,8 @@ std::shared_ptr<TJoinOptimizerNodeInternal> MakeJoinInternal(
const TVector<TString>& rightJoinKeys,
EJoinKind joinKind,
EJoinAlgoType joinAlgo,
+ bool leftAny,
+ bool rightAny,
IProviderContext& ctx,
TCardinalityHints::TCardinalityHint* maybeHint = nullptr
);
diff --git a/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h b/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h
index de59957f426..4e347ab5973 100644
--- a/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h
+++ b/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h
@@ -42,14 +42,13 @@ typename TJoinHypergraph<TNodeSet>::TEdge MakeHyperedge(
TNodeSet TES = ConvertConflictRulesIntoTES(conditionUsedRels, conflictRules);
-
- /* For CROSS Join and degenerate predicates (if subtree tables and joinCondition tables do not intersect) */
- if (!Overlaps(TES, subtreeNodes[joinNode->LeftArg])) {
+ /* For CROSS, Non-Reorderable, ANY Joins and degenerate predicates (if subtree tables and joinCondition tables do not intersect) */
+ if (!Overlaps(TES, subtreeNodes[joinNode->LeftArg]) || !joinNode->IsReorderable || joinNode->LeftAny) {
TES |= subtreeNodes[joinNode->LeftArg];
TES = ConvertConflictRulesIntoTES(TES, conflictRules);
}
- if (!Overlaps(TES, subtreeNodes[joinNode->RightArg])) {
+ if (!Overlaps(TES, subtreeNodes[joinNode->RightArg]) || !joinNode->IsReorderable || joinNode->RightAny) {
TES |= subtreeNodes[joinNode->RightArg];
TES = ConvertConflictRulesIntoTES(TES, conflictRules);
}
@@ -57,7 +56,8 @@ typename TJoinHypergraph<TNodeSet>::TEdge MakeHyperedge(
TNodeSet left = TES & subtreeNodes[joinNode->LeftArg];
TNodeSet right = TES & subtreeNodes[joinNode->RightArg];
- return typename TJoinHypergraph<TNodeSet>::TEdge(left, right, joinNode->JoinType, OperatorIsCommutative(joinNode->JoinType) && joinNode->IsReorderable, joinNode->JoinConditions);
+ bool isCommutative = OperatorIsCommutative(joinNode->JoinType) && (joinNode->IsReorderable);
+ return typename TJoinHypergraph<TNodeSet>::TEdge(left, right, joinNode->JoinType, joinNode->LeftAny, joinNode->RightAny, isCommutative, joinNode->JoinConditions);
}
template<typename TNodeSet>
diff --git a/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp b/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp
index c9d54d1aea6..f4092b08b0b 100644
--- a/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp
+++ b/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp
@@ -81,7 +81,7 @@ Y_UNIT_TEST(NonReordable) {
std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>> joinConditions;
joinConditions.insert({NDq::TJoinColumn{"a", "b"}, NDq::TJoinColumn{"a","c"}});
auto root = std::make_shared<TJoinOptimizerNode>(
- left, right, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::GraceJoin, true);
+ left, right, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::GraceJoin, false, false, true);
TBaseProviderContext optCtx;
std::unique_ptr<IOptimizerNew> opt = std::unique_ptr<IOptimizerNew>(NDq::MakeNativeOptimizerNew(optCtx, 1024));
auto result = opt->JoinSearch(root);
diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp b/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp
index 9721fbe2a2d..4c15ac7bdab 100644
--- a/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp
+++ b/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp
@@ -166,7 +166,10 @@ public:
const EJoinKind joinType,
const EJoinAlgoType joinAlgo,
TYtJoinNodeOp* originalOp)
- : TJoinOptimizerNode(left, right, joinConditions, joinType, joinAlgo, originalOp != nullptr)
+ : TJoinOptimizerNode(left, right, joinConditions, joinType, joinAlgo,
+ originalOp ? originalOp->LinkSettings.LeftHints.contains("any") : false,
+ originalOp ? originalOp->LinkSettings.RightHints.contains("any") : false,
+ originalOp != nullptr)
, OriginalOp(originalOp)
{ }
diff --git a/ydb/library/yql/sql/pg/optimizer.cpp b/ydb/library/yql/sql/pg/optimizer.cpp
index f132427188e..d1134092cbb 100644
--- a/ydb/library/yql/sql/pg/optimizer.cpp
+++ b/ydb/library/yql/sql/pg/optimizer.cpp
@@ -654,7 +654,9 @@ struct TPgOptimizerImpl
left, right,
joinConditions,
joinKind,
- EJoinAlgoType::MapJoin
+ EJoinAlgoType::MapJoin,
+ false,
+ false
);
} else {
YQL_ENSURE(false, "Wrong CBO node");