summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPavel Velikhov <[email protected]>2024-10-14 14:09:35 +0300
committerGitHub <[email protected]>2024-10-14 11:09:35 +0000
commit5a2282b60e1706cd2c19c4c2860c80befb05ac66 (patch)
tree43f843b22cc8c4fe42257b55304a139b36d19449
parent2baa7c5aa8da2f87d559b100adcd36d6c1bd85f8 (diff)
Refactored join conditions in CBO (#10366)
-rw-r--r--ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp28
-rw-r--r--ydb/core/kqp/opt/logical/kqp_opt_cbo.h3
-rw-r--r--ydb/library/yql/core/cbo/cbo_optimizer_new.cpp58
-rw-r--r--ydb/library/yql/core/cbo/cbo_optimizer_new.h49
-rw-r--r--ydb/library/yql/core/yql_cost_function.h9
-rw-r--r--ydb/library/yql/dq/opt/dq_cbo_ut.cpp31
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h24
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp24
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp23
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h115
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp9
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_join_tree_node.h16
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h12
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_stat.cpp35
-rw-r--r--ydb/library/yql/providers/dq/opt/logical_optimize.cpp7
-rw-r--r--ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp7
-rw-r--r--ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp22
-rw-r--r--ydb/library/yql/sql/pg/optimizer.cpp25
-rw-r--r--ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json6
19 files changed, 260 insertions, 243 deletions
diff --git a/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp b/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp
index 240f38fbaff..ebd541bb988 100644
--- a/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp
+++ b/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp
@@ -36,7 +36,7 @@ TMaybeNode<TKqlKeyInc> GetRightTableKeyPrefix(const TKqlKeyRange& range) {
/**
* KQP specific rule to check if a LookupJoin is applicable
*/
-bool IsLookupJoinApplicableDetailed(const std::shared_ptr<NYql::TRelOptimizerNode>& node, const TVector<TString>& joinColumns, const TKqpProviderContext& ctx) {
+bool IsLookupJoinApplicableDetailed(const std::shared_ptr<NYql::TRelOptimizerNode>& node, const TVector<TJoinColumn>& joinColumns, const TKqpProviderContext& ctx) {
auto rel = std::static_pointer_cast<TKqpRelOptimizerNode>(node);
auto expr = TExprBase(rel->Node);
@@ -45,7 +45,7 @@ bool IsLookupJoinApplicableDetailed(const std::shared_ptr<NYql::TRelOptimizerNod
return false;
}
- if (find_if(joinColumns.begin(), joinColumns.end(), [&] (const TString& s) { return node->Stats->KeyColumns->Data[0] == s;}) != joinColumns.end()) {
+ if (std::find_if(joinColumns.begin(), joinColumns.end(), [&] (const TJoinColumn& c) { return node->Stats->KeyColumns->Data[0] == c.AttributeName;}) != joinColumns.end()) {
return true;
}
@@ -97,8 +97,8 @@ bool IsLookupJoinApplicableDetailed(const std::shared_ptr<NYql::TRelOptimizerNod
return false;
}
- if (prefixSize < node->Stats->KeyColumns->Data.size() && (find_if(joinColumns.begin(), joinColumns.end(), [&] (const TString& s) {
- return node->Stats->KeyColumns->Data[prefixSize] == s;
+ if (prefixSize < node->Stats->KeyColumns->Data.size() && (std::find_if(joinColumns.begin(), joinColumns.end(), [&] (const TJoinColumn& c) {
+ return node->Stats->KeyColumns->Data[prefixSize] == c.AttributeName;
}) == joinColumns.end())){
return false;
}
@@ -108,12 +108,11 @@ bool IsLookupJoinApplicableDetailed(const std::shared_ptr<NYql::TRelOptimizerNod
bool IsLookupJoinApplicable(std::shared_ptr<IBaseOptimizerNode> left,
std::shared_ptr<IBaseOptimizerNode> right,
- const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions,
- const TVector<TString>& leftJoinKeys,
- const TVector<TString>& rightJoinKeys,
+ const TVector<TJoinColumn>& leftJoinKeys,
+ const TVector<TJoinColumn>& rightJoinKeys,
TKqpProviderContext& ctx
) {
- Y_UNUSED(left, joinConditions, leftJoinKeys);
+ Y_UNUSED(left, leftJoinKeys);
if (!(right->Stats->StorageType == EStorageType::RowStorage)) {
return false;
@@ -130,7 +129,7 @@ bool IsLookupJoinApplicable(std::shared_ptr<IBaseOptimizerNode> left,
}
for (auto rightCol : rightJoinKeys) {
- if (std::find(rightStats->KeyColumns->Data.begin(), rightStats->KeyColumns->Data.end(), rightCol) == rightStats->KeyColumns->Data.end()) {
+ if (find(rightStats->KeyColumns->Data.begin(), rightStats->KeyColumns->Data.end(), rightCol.AttributeName) == rightStats->KeyColumns->Data.end()) {
return false;
}
}
@@ -142,18 +141,17 @@ bool IsLookupJoinApplicable(std::shared_ptr<IBaseOptimizerNode> left,
bool TKqpProviderContext::IsJoinApplicable(const std::shared_ptr<IBaseOptimizerNode>& left,
const std::shared_ptr<IBaseOptimizerNode>& right,
- const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
- const TVector<TString>& leftJoinKeys,
- const TVector<TString>& rightJoinKeys,
+ const TVector<TJoinColumn>& leftJoinKeys,
+ const TVector<TJoinColumn>& rightJoinKeys,
EJoinAlgoType joinAlgo,
- EJoinKind joinKind) {
+ EJoinKind joinKind) {
switch( joinAlgo ) {
case EJoinAlgoType::LookupJoin:
if ((OptLevel != 3) && (left->Stats->Nrows > 1000)) {
return false;
}
- return IsLookupJoinApplicable(left, right, joinConditions, leftJoinKeys, rightJoinKeys, *this);
+ return IsLookupJoinApplicable(left, right, leftJoinKeys, rightJoinKeys, *this);
case EJoinAlgoType::LookupJoinReverse:
if (joinKind != EJoinKind::LeftSemi) {
@@ -162,7 +160,7 @@ bool TKqpProviderContext::IsJoinApplicable(const std::shared_ptr<IBaseOptimizerN
if ((OptLevel != 3) && (right->Stats->Nrows > 1000)) {
return false;
}
- return IsLookupJoinApplicable(right, left, joinConditions, rightJoinKeys, leftJoinKeys, *this);
+ return IsLookupJoinApplicable(right, left, rightJoinKeys, leftJoinKeys, *this);
case EJoinAlgoType::MapJoin:
return joinKind != EJoinKind::OuterJoin && joinKind != EJoinKind::Exclusion && right->Stats->ByteSize < 1e6;
diff --git a/ydb/core/kqp/opt/logical/kqp_opt_cbo.h b/ydb/core/kqp/opt/logical/kqp_opt_cbo.h
index 9df809aaacb..52aa93ef414 100644
--- a/ydb/core/kqp/opt/logical/kqp_opt_cbo.h
+++ b/ydb/core/kqp/opt/logical/kqp_opt_cbo.h
@@ -25,8 +25,7 @@ struct TKqpProviderContext : public NYql::TBaseProviderContext {
virtual bool IsJoinApplicable(const std::shared_ptr<NYql::IBaseOptimizerNode>& left,
const std::shared_ptr<NYql::IBaseOptimizerNode>& right,
- const std::set<std::pair<NYql::NDq::TJoinColumn, NYql::NDq::TJoinColumn>>& joinConditions,
- const TVector<TString>& leftJoinKeys, const TVector<TString>& rightJoinKeys,
+ const TVector<NYql::NDq::TJoinColumn>& leftJoinKeys, const TVector<NYql::NDq::TJoinColumn>& rightJoinKeys,
NYql::EJoinAlgoType joinAlgo, NYql::EJoinKind joinKind) override;
virtual double ComputeJoinCost(const NYql::TOptimizerStatistics& leftStats, const NYql::TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, NYql::EJoinAlgoType joinAlgo) const override;
diff --git a/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp b/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp
index eb7d07e429d..380f60c26e1 100644
--- a/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp
+++ b/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp
@@ -77,7 +77,8 @@ void TRelOptimizerNode::Print(std::stringstream& stream, int ntabs) {
TJoinOptimizerNode::TJoinOptimizerNode(
const std::shared_ptr<IBaseOptimizerNode>& left,
const std::shared_ptr<IBaseOptimizerNode>& right,
- const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions,
+ TVector<TJoinColumn> leftKeys,
+ TVector<TJoinColumn> rightKeys,
const EJoinKind joinType,
const EJoinAlgoType joinAlgo,
bool leftAny,
@@ -86,18 +87,14 @@ TJoinOptimizerNode::TJoinOptimizerNode(
) : IBaseOptimizerNode(JoinNodeType)
, LeftArg(left)
, RightArg(right)
- , JoinConditions(joinConditions)
+ , LeftJoinKeys(leftKeys)
+ , RightJoinKeys(rightKeys)
, JoinType(joinType)
, JoinAlgo(joinAlgo)
, LeftAny(leftAny)
, RightAny(rightAny)
, IsReorderable(!nonReorderable)
-{
- for (const auto& [l,r] : joinConditions ) {
- LeftJoinKeys.push_back(l.AttributeName);
- RightJoinKeys.push_back(r.AttributeName);
- }
-}
+{}
TVector<TString> TJoinOptimizerNode::Labels() {
auto res = LeftArg->Labels();
@@ -120,10 +117,10 @@ void TJoinOptimizerNode::Print(std::stringstream& stream, int ntabs) {
}
stream << ") ";
- for (auto c : JoinConditions){
- stream << c.first.RelName << "." << c.first.AttributeName
- << "=" << c.second.RelName << "."
- << c.second.AttributeName << ",";
+ for (size_t i=0; i<LeftJoinKeys.size(); i++){
+ stream << LeftJoinKeys[i].RelName << "." << LeftJoinKeys[i].AttributeName
+ << "=" << RightJoinKeys[i].RelName << "."
+ << RightJoinKeys[i].AttributeName << ",";
}
stream << "\n";
@@ -138,13 +135,14 @@ void TJoinOptimizerNode::Print(std::stringstream& stream, int ntabs) {
RightArg->Print(stream, ntabs+1);
}
-bool IsPKJoin(const TOptimizerStatistics& stats, const TVector<TString>& joinKeys) {
+bool IsPKJoin(const TOptimizerStatistics& stats, const TVector<TJoinColumn>& joinKeys) {
if (!stats.KeyColumns) {
return false;
}
for(size_t i = 0; i < stats.KeyColumns->Data.size(); i++){
- if (std::find(joinKeys.begin(), joinKeys.end(), stats.KeyColumns->Data[i]) == joinKeys.end()) {
+ if (std::find_if(joinKeys.begin(), joinKeys.end(),
+ [&] (const TJoinColumn& c) { return c.AttributeName == stats.KeyColumns->Data[i];}) == joinKeys.end()) {
return false;
}
}
@@ -153,15 +151,13 @@ bool IsPKJoin(const TOptimizerStatistics& stats, const TVector<TString>& joinKey
bool TBaseProviderContext::IsJoinApplicable(const std::shared_ptr<IBaseOptimizerNode>& left,
const std::shared_ptr<IBaseOptimizerNode>& right,
- const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
- const TVector<TString>& leftJoinKeys,
- const TVector<TString>& rightJoinKeys,
+ const TVector<TJoinColumn>& leftJoinKeys,
+ const TVector<TJoinColumn>& rightJoinKeys,
EJoinAlgoType joinAlgo,
EJoinKind joinKind) {
Y_UNUSED(left);
Y_UNUSED(right);
- Y_UNUSED(joinConditions);
Y_UNUSED(leftJoinKeys);
Y_UNUSED(rightJoinKeys);
Y_UNUSED(joinKind);
@@ -182,30 +178,12 @@ double TBaseProviderContext::ComputeJoinCost(const TOptimizerStatistics& leftSta
*
* The build is on the right side, so we make the build side a bit more expensive than the probe
*/
-TOptimizerStatistics TBaseProviderContext::ComputeJoinStats(
- const TOptimizerStatistics& leftStats,
- const TOptimizerStatistics& rightStats,
- const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
- EJoinAlgoType joinAlgo,
- EJoinKind joinKind,
- TCardinalityHints::TCardinalityHint* maybeHint) const
-{
- TVector<TString> leftJoinKeys;
- TVector<TString> rightJoinKeys;
-
- for (auto c : joinConditions) {
- leftJoinKeys.emplace_back(c.first.AttributeName);
- rightJoinKeys.emplace_back(c.second.AttributeName);
- }
-
- return ComputeJoinStats(leftStats, rightStats, leftJoinKeys, rightJoinKeys, joinAlgo, joinKind, maybeHint);
-}
TOptimizerStatistics TBaseProviderContext::ComputeJoinStats(
const TOptimizerStatistics& leftStats,
const TOptimizerStatistics& rightStats,
- const TVector<TString>& leftJoinKeys,
- const TVector<TString>& rightJoinKeys,
+ const TVector<TJoinColumn>& leftJoinKeys,
+ const TVector<TJoinColumn>& rightJoinKeys,
EJoinAlgoType joinAlgo,
EJoinKind joinKind,
TCardinalityHints::TCardinalityHint* maybeHint) const
@@ -265,9 +243,9 @@ TOptimizerStatistics TBaseProviderContext::ComputeJoinStats(
std::optional<double> lhsUniqueVals;
std::optional<double> rhsUniqueVals;
if (leftStats.ColumnStatistics && rightStats.ColumnStatistics && !leftJoinKeys.empty() && !rightJoinKeys.empty()) {
- auto lhs = leftJoinKeys[0];
+ auto lhs = leftJoinKeys[0].AttributeName;
lhsUniqueVals = leftStats.ColumnStatistics->Data[lhs].NumUniqueVals;
- auto rhs = rightJoinKeys[0];
+ auto rhs = rightJoinKeys[0].AttributeName;
rightStats.ColumnStatistics->Data[rhs];
rhsUniqueVals = leftStats.ColumnStatistics->Data[lhs].NumUniqueVals;
}
diff --git a/ydb/library/yql/core/cbo/cbo_optimizer_new.h b/ydb/library/yql/core/cbo/cbo_optimizer_new.h
index 0a564e4c359..af3b9452902 100644
--- a/ydb/library/yql/core/cbo/cbo_optimizer_new.h
+++ b/ydb/library/yql/core/cbo/cbo_optimizer_new.h
@@ -201,27 +201,18 @@ struct IProviderContext {
virtual TOptimizerStatistics ComputeJoinStats(
const TOptimizerStatistics& leftStats,
const TOptimizerStatistics& rightStats,
- const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
- EJoinAlgoType joinAlgo,
- EJoinKind joinKind,
- TCardinalityHints::TCardinalityHint* maybeHint = nullptr) const = 0;
-
- virtual TOptimizerStatistics ComputeJoinStats(
- const TOptimizerStatistics& leftStats,
- const TOptimizerStatistics& rightStats,
- const TVector<TString>& leftJoinKeys,
- const TVector<TString>& rightJoinKeys,
+ const TVector<NDq::TJoinColumn>& leftJoinKeys,
+ const TVector<NDq::TJoinColumn>& rightJoinKeys,
EJoinAlgoType joinAlgo,
EJoinKind joinKind,
TCardinalityHints::TCardinalityHint* maybeHint = nullptr) const = 0;
virtual bool IsJoinApplicable(const std::shared_ptr<IBaseOptimizerNode>& left,
const std::shared_ptr<IBaseOptimizerNode>& right,
- const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
- const TVector<TString>& leftJoinKeys,
- const TVector<TString>& rightJoinKeys,
+ const TVector<NDq::TJoinColumn>& leftJoinKeys,
+ const TVector<NDq::TJoinColumn>& rightJoinKeys,
EJoinAlgoType joinAlgo,
- EJoinKind joinKind) = 0;
+ EJoinKind joinKin) = 0;
};
/**
@@ -233,27 +224,19 @@ struct TBaseProviderContext : public IProviderContext {
double ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgo) const override;
- bool IsJoinApplicable(const std::shared_ptr<IBaseOptimizerNode>& left,
- const std::shared_ptr<IBaseOptimizerNode>& right,
- const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
- const TVector<TString>& leftJoinKeys,
- const TVector<TString>& rightJoinKeys,
+ bool IsJoinApplicable(
+ const std::shared_ptr<IBaseOptimizerNode>& leftStats,
+ const std::shared_ptr<IBaseOptimizerNode>& rightStats,
+ const TVector<NDq::TJoinColumn>& leftJoinKeys,
+ const TVector<NDq::TJoinColumn>& rightJoinKeys,
EJoinAlgoType joinAlgo,
EJoinKind joinKind) override;
virtual TOptimizerStatistics ComputeJoinStats(
const TOptimizerStatistics& leftStats,
const TOptimizerStatistics& rightStats,
- const TVector<TString>& leftJoinKeys,
- const TVector<TString>& rightJoinKeys,
- EJoinAlgoType joinAlgo,
- EJoinKind joinKind,
- TCardinalityHints::TCardinalityHint* maybeHint = nullptr) const override;
-
- virtual TOptimizerStatistics ComputeJoinStats(
- const TOptimizerStatistics& leftStats,
- const TOptimizerStatistics& rightStats,
- const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
+ const TVector<NDq::TJoinColumn>& leftJoinKeys,
+ const TVector<NDq::TJoinColumn>& rightJoinKeys,
EJoinAlgoType joinAlgo,
EJoinKind joinKind,
TCardinalityHints::TCardinalityHint* maybeHint = nullptr) const override;
@@ -290,9 +273,8 @@ struct TRelOptimizerNode : public IBaseOptimizerNode {
struct TJoinOptimizerNode : public IBaseOptimizerNode {
std::shared_ptr<IBaseOptimizerNode> LeftArg;
std::shared_ptr<IBaseOptimizerNode> RightArg;
- const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>> JoinConditions;
- TVector<TString> LeftJoinKeys;
- TVector<TString> RightJoinKeys;
+ TVector<NDq::TJoinColumn> LeftJoinKeys;
+ TVector<NDq::TJoinColumn> RightJoinKeys;
EJoinKind JoinType;
EJoinAlgoType JoinAlgo;
/////////////////// 'ANY' flag means leaving only one row from the join side.
@@ -303,7 +285,8 @@ struct TJoinOptimizerNode : public IBaseOptimizerNode {
TJoinOptimizerNode(const std::shared_ptr<IBaseOptimizerNode>& left,
const std::shared_ptr<IBaseOptimizerNode>& right,
- const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
+ TVector<NDq::TJoinColumn> leftKeys,
+ TVector<NDq::TJoinColumn> rightKeys,
const EJoinKind joinType,
const EJoinAlgoType joinAlgo,
bool leftAny,
diff --git a/ydb/library/yql/core/yql_cost_function.h b/ydb/library/yql/core/yql_cost_function.h
index cb12f37238b..b69c5941db6 100644
--- a/ydb/library/yql/core/yql_cost_function.h
+++ b/ydb/library/yql/core/yql_cost_function.h
@@ -38,9 +38,14 @@ namespace NDq {
struct TJoinColumn {
TString RelName;
TString AttributeName;
+ TString AttributeNameWithAliases;
+ ui32 EquivalenceClass = 0;
+ bool IsConstant = false;
- TJoinColumn(TString relName, TString attributeName) : RelName(relName),
- AttributeName(std::move(attributeName)) {}
+ TJoinColumn(TString relName, TString attributeName) :
+ RelName(relName),
+ AttributeName(attributeName),
+ AttributeNameWithAliases(attributeName) {}
bool operator == (const TJoinColumn& other) const {
return RelName == other.RelName && AttributeName == other.AttributeName;
diff --git a/ydb/library/yql/dq/opt/dq_cbo_ut.cpp b/ydb/library/yql/dq/opt/dq_cbo_ut.cpp
index 3e088a59ded..8973518e0b6 100644
--- a/ydb/library/yql/dq/opt/dq_cbo_ut.cpp
+++ b/ydb/library/yql/dq/opt/dq_cbo_ut.cpp
@@ -45,15 +45,14 @@ Y_UNIT_TEST(JoinSearch2Rels) {
auto rel2 = std::make_shared<TRelOptimizerNode>("b",
std::make_shared<TOptimizerStatistics>(BaseTable, 1000000, 1, 0, 9000009));
- std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>> joinConditions;
- joinConditions.insert({
- NDq::TJoinColumn("a", "1"),
- NDq::TJoinColumn("b", "1")
- });
+ TVector<NDq::TJoinColumn> leftKeys = {NDq::TJoinColumn("a", "1")};
+ TVector<NDq::TJoinColumn> rightKeys ={NDq::TJoinColumn("b", "1")};
+
auto op = std::make_shared<TJoinOptimizerNode>(
std::static_pointer_cast<IBaseOptimizerNode>(rel1),
std::static_pointer_cast<IBaseOptimizerNode>(rel2),
- joinConditions,
+ leftKeys,
+ rightKeys,
InnerJoin,
EJoinAlgoType::GraceJoin,
true,
@@ -86,30 +85,28 @@ Y_UNIT_TEST(JoinSearch3Rels) {
auto rel3 = std::make_shared<TRelOptimizerNode>("c",
std::make_shared<TOptimizerStatistics>(BaseTable, 10000, 1, 0, 9009));
- std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>> joinConditions;
- joinConditions.insert({
- NDq::TJoinColumn("a", "1"),
- NDq::TJoinColumn("b", "1")
- });
+ TVector<NDq::TJoinColumn> leftKeys = {NDq::TJoinColumn("a", "1")};
+ TVector<NDq::TJoinColumn> rightKeys ={NDq::TJoinColumn("b", "1")};
+
auto op1 = std::make_shared<TJoinOptimizerNode>(
std::static_pointer_cast<IBaseOptimizerNode>(rel1),
std::static_pointer_cast<IBaseOptimizerNode>(rel2),
- joinConditions,
+ leftKeys,
+ rightKeys,
InnerJoin,
EJoinAlgoType::GraceJoin,
false,
false
);
- joinConditions.insert({
- NDq::TJoinColumn("a", "1"),
- NDq::TJoinColumn("c", "1")
- });
+ leftKeys.push_back(NDq::TJoinColumn("a", "1"));
+ rightKeys.push_back(NDq::TJoinColumn("c", "1"));
auto op2 = std::make_shared<TJoinOptimizerNode>(
std::static_pointer_cast<IBaseOptimizerNode>(op1),
std::static_pointer_cast<IBaseOptimizerNode>(rel3),
- joinConditions,
+ leftKeys,
+ rightKeys,
InnerJoin,
EJoinAlgoType::GraceJoin,
true,
diff --git a/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h b/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h
index 32ae0fb96fb..09b3a676ffc 100644
--- a/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h
+++ b/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h
@@ -84,10 +84,8 @@ private:
bool leftAny,
bool rightAny,
bool isCommutative,
- const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions,
- const std::set<std::pair<TJoinColumn, TJoinColumn>>& reversedJoinConditions,
- const TVector<TString>& leftJoinKeys,
- const TVector<TString>& rightJoinKeys,
+ const TVector<TJoinColumn>& leftJoinKeys,
+ const TVector<TJoinColumn>& rightJoinKeys,
IProviderContext& ctx,
TCardinalityHints::TCardinalityHint* maybeCardHint,
TJoinAlgoHints::TJoinAlgoHint* maybeJoinHint
@@ -414,17 +412,15 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS
bool leftAny,
bool rightAny,
bool isCommutative,
- const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions,
- const std::set<std::pair<TJoinColumn, TJoinColumn>>& reversedJoinConditions,
- const TVector<TString>& leftJoinKeys,
- const TVector<TString>& rightJoinKeys,
+ const TVector<TJoinColumn>& leftJoinKeys,
+ const TVector<TJoinColumn>& rightJoinKeys,
IProviderContext& ctx,
TCardinalityHints::TCardinalityHint* maybeCardHint,
TJoinAlgoHints::TJoinAlgoHint* maybeJoinAlgoHint
) {
if (maybeJoinAlgoHint) {
maybeJoinAlgoHint->Applied = true;
- return MakeJoinInternal(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, maybeJoinAlgoHint->Algo, leftAny, rightAny, ctx, maybeCardHint);
+ return MakeJoinInternal(left, right, leftJoinKeys, rightJoinKeys, joinKind, maybeJoinAlgoHint->Algo, leftAny, rightAny, ctx, maybeCardHint);
}
double bestCost = std::numeric_limits<double>::infinity();
@@ -432,7 +428,7 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS
bool bestJoinIsReversed = false;
for (auto joinAlgo : AllJoinAlgos) {
- if (ctx.IsJoinApplicable(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinAlgo, joinKind)){
+ if (ctx.IsJoinApplicable(left, right, leftJoinKeys, rightJoinKeys, joinAlgo, joinKind)){
auto cost = ctx.ComputeJoinStats(*left->Stats, *right->Stats, leftJoinKeys, rightJoinKeys, joinAlgo, joinKind, maybeCardHint).Cost;
if (cost < bestCost) {
bestCost = cost;
@@ -442,7 +438,7 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS
}
if (isCommutative) {
- if (ctx.IsJoinApplicable(right, left, reversedJoinConditions, rightJoinKeys, leftJoinKeys, joinAlgo, joinKind)){
+ if (ctx.IsJoinApplicable(right, left, rightJoinKeys, leftJoinKeys, joinAlgo, joinKind)){
auto cost = ctx.ComputeJoinStats(*right->Stats, *left->Stats, rightJoinKeys, leftJoinKeys, joinAlgo, joinKind, maybeCardHint).Cost;
if (cost < bestCost) {
bestCost = cost;
@@ -456,10 +452,10 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS
Y_ENSURE(bestAlgo != EJoinAlgoType::Undefined, "No join was chosen!");
if (bestJoinIsReversed) {
- return MakeJoinInternal(right, left, reversedJoinConditions, rightJoinKeys, leftJoinKeys, joinKind, bestAlgo, rightAny, leftAny, ctx, maybeCardHint);
+ return MakeJoinInternal(right, left, rightJoinKeys, leftJoinKeys, joinKind, bestAlgo, rightAny, leftAny, ctx, maybeCardHint);
}
- return MakeJoinInternal(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, bestAlgo, leftAny, rightAny, ctx, maybeCardHint);
+ return MakeJoinInternal(left, right, leftJoinKeys, rightJoinKeys, joinKind, bestAlgo, leftAny, rightAny, ctx, maybeCardHint);
}
/*
@@ -493,8 +489,6 @@ template<typename TNodeSet> void TDPHypSolver<TNodeSet>::EmitCsgCmp(const TNodeS
csgCmpEdge->LeftAny,
csgCmpEdge->RightAny,
csgCmpEdge->IsCommutative,
- csgCmpEdge->JoinConditions,
- reversedEdge->JoinConditions,
csgCmpEdge->LeftJoinKeys,
csgCmpEdge->RightJoinKeys,
Pctx_,
diff --git a/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp b/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp
index 02e43b3bbbe..6a80394d662 100644
--- a/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp
+++ b/ydb/library/yql/dq/opt/dq_opt_hypergraph_ut.cpp
@@ -24,11 +24,11 @@ std::shared_ptr<IBaseOptimizerNode> CreateChain(size_t size, TString onAttribute
auto ei = std::make_shared<TRelOptimizerNode>(eiStr, std::make_shared<TOptimizerStatistics>());
ei->Stats->Labels = std::make_shared<TVector<TString>>(TVector<TString>{eiStr});
- std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>> joinConditions;
- joinConditions.insert({TJoinColumn(eiPrevStr, onAttribute), TJoinColumn(eiStr, onAttribute)});
+ TVector<NDq::TJoinColumn> leftKeys = {TJoinColumn(eiPrevStr, onAttribute)};
+ TVector<NDq::TJoinColumn> rightKeys = {TJoinColumn(eiStr, onAttribute)};
root = std::make_shared<TJoinOptimizerNode>(
- root, ei, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false
+ root, ei, leftKeys, rightKeys, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false
);
}
@@ -105,23 +105,26 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
auto lhs = CreateChain(3, "228", "a");
auto rhs = CreateChain(2, "1337", "b");
- std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>> joinConditions;
- joinConditions.insert({TJoinColumn("a3", "1337"), TJoinColumn("b1", "1337")});
+ TVector<NDq::TJoinColumn> leftKeys = {TJoinColumn("a3", "1337")};
+ TVector<NDq::TJoinColumn> rightKeys = {TJoinColumn("b1", "1337")};
// a1 --228-- a2 --228-- a3 --1337-- b1 --1337-- b2
auto root = std::make_shared<TJoinOptimizerNode>(
- lhs, rhs, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false
+ lhs, rhs, leftKeys, rightKeys, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false
);
- joinConditions.clear();
+ leftKeys.clear();
+ rightKeys.clear();
+
+ leftKeys.push_back(TJoinColumn("c2", "123"));
+ rightKeys.push_back(TJoinColumn("b2", "123"));
- joinConditions.insert({TJoinColumn("c2", "123"), TJoinColumn("b2", "123")});
rhs = CreateChain(2, "228", "c");
// a1 --228-- a2 --228-- a3 --1337-- b1 --1337-- b2 --123-- c1 --228-- c2
// ^ we don't want to have transitive closure between c and a
root = std::make_shared<TJoinOptimizerNode>(
- root, rhs, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false
+ root, rhs, leftKeys, rightKeys, EJoinKind::InnerJoin, EJoinAlgoType::Undefined, false, false
);
auto graph = MakeJoinHypergraph<TNodeSet>(root);
@@ -184,7 +187,8 @@ Y_UNIT_TEST_SUITE(HypergraphBuild) {
TJoinOptimizerNode(
GetJoinArg(lhsArg),
GetJoinArg(rhsArg),
- {{TJoinColumn(lhsCond.c_str(), col), TJoinColumn(rhsCond.c_str(), col)}},
+ {TJoinColumn(lhsCond.c_str(), col)},
+ {TJoinColumn(rhsCond.c_str(), col)},
EJoinKind::InnerJoin,
EJoinAlgoType::Undefined,
false,
diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp
index 32d3fd33f21..e5097ade867 100644
--- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp
+++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp
@@ -82,7 +82,8 @@ std::shared_ptr<TJoinOptimizerNode> ConvertToJoinTree(
right = *it;
}
- std::set<std::pair<TJoinColumn, TJoinColumn>> joinConds;
+ TVector<TJoinColumn> leftKeys;
+ TVector<TJoinColumn> rightKeys;
size_t joinKeysCount = joinTuple.LeftKeys().Size() / 2;
for (size_t i = 0; i < joinKeysCount; ++i) {
@@ -90,15 +91,15 @@ std::shared_ptr<TJoinOptimizerNode> ConvertToJoinTree(
auto leftScope = joinTuple.LeftKeys().Item(keyIndex).StringValue();
auto leftColumn = joinTuple.LeftKeys().Item(keyIndex + 1).StringValue();
+ leftKeys.push_back(TJoinColumn(leftScope, leftColumn));
+
auto rightScope = joinTuple.RightKeys().Item(keyIndex).StringValue();
auto rightColumn = joinTuple.RightKeys().Item(keyIndex + 1).StringValue();
-
- joinConds.insert( std::make_pair( TJoinColumn(leftScope, leftColumn),
- TJoinColumn(rightScope, rightColumn)));
+ rightKeys.push_back(TJoinColumn(rightScope, rightColumn));
}
const auto linkSettings = GetEquiJoinLinkSettings(joinTuple.Options().Ref());
- return std::make_shared<TJoinOptimizerNode>(left, right, joinConds, ConvertToJoinKind(joinTuple.Type().StringValue()), EJoinAlgoType::Undefined,
+ return std::make_shared<TJoinOptimizerNode>(left, right, leftKeys, rightKeys, ConvertToJoinKind(joinTuple.Type().StringValue()), EJoinAlgoType::Undefined,
linkSettings.LeftHints.contains("any"), linkSettings.RightHints.contains("any"));
}
@@ -138,11 +139,13 @@ TExprBase BuildTree(TExprContext& ctx, const TCoEquiJoin& equiJoin,
TVector<TExprBase> rightJoinColumns;
// Build join conditions
- for( auto pair : reorderResult->JoinConditions) {
- leftJoinColumns.push_back(BuildAtom(pair.first.RelName, equiJoin.Pos(), ctx));
- leftJoinColumns.push_back(BuildAtom(pair.first.AttributeName, equiJoin.Pos(), ctx));
- rightJoinColumns.push_back(BuildAtom(pair.second.RelName, equiJoin.Pos(), ctx));
- rightJoinColumns.push_back(BuildAtom(pair.second.AttributeName, equiJoin.Pos(), ctx));
+ for( auto leftKey : reorderResult->LeftJoinKeys) {
+ leftJoinColumns.push_back(BuildAtom(leftKey.RelName, equiJoin.Pos(), ctx));
+ leftJoinColumns.push_back(BuildAtom(leftKey.AttributeNameWithAliases, equiJoin.Pos(), ctx));
+ }
+ for( auto rightKey : reorderResult->RightJoinKeys) {
+ rightJoinColumns.push_back(BuildAtom(rightKey.RelName, equiJoin.Pos(), ctx));
+ rightJoinColumns.push_back(BuildAtom(rightKey.AttributeNameWithAliases, equiJoin.Pos(), ctx));
}
TExprNode::TListType options(1U,
diff --git a/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h b/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h
index c6f5be64fb2..6a73c7149a6 100644
--- a/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h
+++ b/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h
@@ -31,7 +31,8 @@ public:
bool leftAny,
bool rightAny,
bool isCommutative,
- const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions
+ TVector<TJoinColumn>& leftJoinKeys,
+ TVector<TJoinColumn>& rightJoinKeys
)
: Left(left)
, Right(right)
@@ -39,14 +40,24 @@ public:
, LeftAny(leftAny)
, RightAny(rightAny)
, IsCommutative(isCommutative)
- , JoinConditions(joinConditions)
+ , LeftJoinKeys(leftJoinKeys)
+ , RightJoinKeys(rightJoinKeys)
, IsReversed(false)
{
- BuildCondVectors();
+ RemoveAttributeAliases();
}
bool AreCondVectorEqual() const {
- return LeftJoinKeys == RightJoinKeys;
+ TVector<TString> leftAttrNames;
+ TVector<TString> rightAttrNames;
+ for (auto & l : LeftJoinKeys) {
+ leftAttrNames.push_back(l.AttributeName);
+ }
+ for (auto & r : RightJoinKeys) {
+ rightAttrNames.push_back(r.AttributeName);
+ }
+
+ return leftAttrNames == rightAttrNames;
}
inline bool IsSimple() const {
@@ -58,32 +69,25 @@ public:
EJoinKind JoinKind;
bool LeftAny, RightAny;
bool IsCommutative;
- std::set<std::pair<TJoinColumn, TJoinColumn>> JoinConditions;
- TVector<TString> LeftJoinKeys;
- TVector<TString> RightJoinKeys;
+ TVector<TJoinColumn> LeftJoinKeys;
+ TVector<TJoinColumn> RightJoinKeys;
// JoinKind may not be commutative, so we need to know which edge is original and which is reversed.
bool IsReversed;
int64_t ReversedEdgeId = -1;
- void BuildCondVectors() {
- LeftJoinKeys.clear();
- RightJoinKeys.clear();
+ void RemoveAttributeAliases() {
- for (const auto& [left, right] : JoinConditions) {
- auto leftKey = left.AttributeName;
- auto rightKey = right.AttributeName;
-
- if (auto idx = leftKey.find_last_of('.'); idx != TString::npos) {
- leftKey = leftKey.substr(idx+1);
+ for (auto& leftKey : LeftJoinKeys ) {
+ if (auto idx = leftKey.AttributeName.find_last_of('.'); idx != TString::npos) {
+ leftKey.AttributeName = leftKey.AttributeName.substr(idx+1);
}
+ }
- if (auto idx = rightKey.find_last_of('.'); idx != TString::npos) {
- rightKey = rightKey.substr(idx+1);
+ for (auto& rightKey : RightJoinKeys ) {
+ if (auto idx = rightKey.AttributeName.find_last_of('.'); idx != TString::npos) {
+ rightKey.AttributeName = rightKey.AttributeName.substr(idx+1);
}
-
- LeftJoinKeys.emplace_back(leftKey);
- RightJoinKeys.emplace_back(rightKey);
}
}
};
@@ -133,10 +137,30 @@ public:
};
for (const auto& edge: Edges_) {
+ TString leftKeyStr;
+ TString rightKeyStr;
+
+ for (auto& l: edge.LeftJoinKeys) {
+ leftKeyStr.append(l.RelName);
+ leftKeyStr.append(".");
+ leftKeyStr.append(l.AttributeName);
+ leftKeyStr.append(",");
+ }
+
+ for (auto& r: edge.RightJoinKeys) {
+ rightKeyStr.append(r.RelName);
+ rightKeyStr.append(".");
+ rightKeyStr.append(r.AttributeName);
+ rightKeyStr.append(",");
+ }
res
.append(edgeSideToString(edge.Left))
.append(" -> ")
.append(edgeSideToString(edge.Right))
+ .append(" on ")
+ .append(leftKeyStr)
+ .append("==")
+ .append(rightKeyStr)
.append("\n");
}
@@ -164,17 +188,12 @@ public:
AddEdgeImpl(edge);
- std::set<std::pair<TJoinColumn, TJoinColumn>> reversedJoinConditions;
- for (const auto& [lhs, rhs]: edge.JoinConditions) {
- reversedJoinConditions.insert({rhs, lhs});
- }
-
TEdge reversedEdge = std::move(edge);
std::swap(reversedEdge.Left, reversedEdge.Right);
- reversedEdge.JoinConditions = std::move(reversedJoinConditions);
+ std::swap(reversedEdge.LeftJoinKeys, reversedEdge.RightJoinKeys);
reversedEdge.IsReversed = true;
reversedEdge.ReversedEdgeId = edgeId;
- reversedEdge.BuildCondVectors();
+ reversedEdge.RemoveAttributeAliases();
AddEdgeImpl(reversedEdge);
}
@@ -404,8 +423,15 @@ public:
edges.begin(),
edges.end(),
[](const THyperedge& lhs, const THyperedge& rhs) {
- auto lhsAttributeNames = lhs.LeftJoinKeys;
- auto rhsAttributeNames = rhs.LeftJoinKeys;
+ TVector<TString> lhsAttributeNames;
+ TVector<TString> rhsAttributeNames;
+
+ for (auto & l : lhs.LeftJoinKeys ) {
+ lhsAttributeNames.push_back(l.AttributeName);
+ }
+ for (auto & r : rhs.LeftJoinKeys ) {
+ rhsAttributeNames.push_back(r.AttributeName);
+ }
std::sort(lhsAttributeNames.begin(), lhsAttributeNames.end());
std::sort(rhsAttributeNames.begin(), rhsAttributeNames.end());
@@ -439,9 +465,12 @@ private:
bool isJoinCommutative = edges[groupBegin].IsCommutative;
TVector<TString> groupConditionUsedAttributes;
- for (const auto& [lhs, rhs]: edges[groupBegin].JoinConditions) {
+ for (const auto& lhs: edges[groupBegin].LeftJoinKeys) {
groupConditionUsedAttributes.push_back(lhs.AttributeName);
}
+ for (const auto& rhs: edges[groupBegin].RightJoinKeys) {
+ groupConditionUsedAttributes.push_back(rhs.AttributeName);
+ }
TDisjointSets connectedComponents(nodeSetSize);
for (size_t edgeId = groupBegin; edgeId < groupEnd; ++edgeId) {
@@ -464,15 +493,15 @@ private:
TString lhsRelName = nodes[i].RelationOptimizerNode->Labels()[0];
TString rhsRelName = nodes[j].RelationOptimizerNode->Labels()[0];
- std::set<std::pair<TJoinColumn, TJoinColumn>> joinConditions;
+ TVector<TJoinColumn> leftKeys;
+ TVector<TJoinColumn> rightKeys;
+
for (const auto& attributeName: groupConditionUsedAttributes){
- joinConditions.insert({
- TJoinColumn(lhsRelName, attributeName),
- TJoinColumn(rhsRelName, attributeName)
- });
+ leftKeys.push_back(TJoinColumn(lhsRelName, attributeName));
+ rightKeys.push_back(TJoinColumn(rhsRelName, attributeName));
}
- auto e = THyperedge(lhs, rhs, groupJoinKind, false, false, isJoinCommutative, joinConditions);
+ auto e = THyperedge(lhs, rhs, groupJoinKind, false, false, isJoinCommutative, leftKeys, rightKeys);
Graph_.AddEdge(std::move(e));
}
}
@@ -480,8 +509,16 @@ private:
}
bool HasOneGroup(const THyperedge& lhs, const THyperedge& rhs) {
- auto lhsAttributeNames = lhs.LeftJoinKeys;
- auto rhsAttributeNames = rhs.LeftJoinKeys;
+ TVector<TString> lhsAttributeNames;
+ TVector<TString> rhsAttributeNames;
+
+ for (auto & l : lhs.LeftJoinKeys) {
+ lhsAttributeNames.push_back(l.AttributeName);
+ }
+
+ for (auto & r : rhs.LeftJoinKeys) {
+ rhsAttributeNames.push_back(r.AttributeName);
+ }
std::sort(lhsAttributeNames.begin(), lhsAttributeNames.end());
std::sort(rhsAttributeNames.begin(), rhsAttributeNames.end());
diff --git a/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp b/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp
index 5b13ee7cbd6..d54b793009b 100644
--- a/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp
+++ b/ydb/library/yql/dq/opt/dq_opt_join_tree_node.cpp
@@ -5,9 +5,8 @@ namespace NYql::NDq {
std::shared_ptr<TJoinOptimizerNodeInternal> MakeJoinInternal(
std::shared_ptr<IBaseOptimizerNode> left,
std::shared_ptr<IBaseOptimizerNode> right,
- const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions,
- const TVector<TString>& leftJoinKeys,
- const TVector<TString>& rightJoinKeys,
+ const TVector<TJoinColumn>& leftJoinKeys,
+ const TVector<TJoinColumn>& rightJoinKeys,
EJoinKind joinKind,
EJoinAlgoType joinAlgo,
bool leftAny,
@@ -15,7 +14,7 @@ std::shared_ptr<TJoinOptimizerNodeInternal> MakeJoinInternal(
IProviderContext& ctx,
TCardinalityHints::TCardinalityHint* maybeHint) {
- auto res = std::make_shared<TJoinOptimizerNodeInternal>(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, joinAlgo, leftAny, rightAny);
+ auto res = std::make_shared<TJoinOptimizerNodeInternal>(left, right, leftJoinKeys, rightJoinKeys, joinKind, joinAlgo, leftAny, rightAny);
res->Stats = std::make_shared<TOptimizerStatistics>(ctx.ComputeJoinStats(*left->Stats, *right->Stats, leftJoinKeys, rightJoinKeys, joinAlgo, joinKind, maybeHint));
return res;
}
@@ -39,7 +38,7 @@ std::shared_ptr<TJoinOptimizerNode> ConvertFromInternal(const std::shared_ptr<IB
right = ConvertFromInternal(right);
}
- auto newJoin = std::make_shared<TJoinOptimizerNode>(left, right, join->JoinConditions, join->JoinType, join->JoinAlgo, join->LeftAny, join->RightAny);
+ auto newJoin = std::make_shared<TJoinOptimizerNode>(left, right, join->LeftJoinKeys, join->RightJoinKeys, join->JoinType, join->JoinAlgo, join->LeftAny, join->RightAny);
newJoin->Stats = join->Stats;
return newJoin;
}
diff --git a/ydb/library/yql/dq/opt/dq_opt_join_tree_node.h b/ydb/library/yql/dq/opt/dq_opt_join_tree_node.h
index 9e626bc356b..f8e50f3b336 100644
--- a/ydb/library/yql/dq/opt/dq_opt_join_tree_node.h
+++ b/ydb/library/yql/dq/opt/dq_opt_join_tree_node.h
@@ -18,9 +18,8 @@ struct TJoinOptimizerNodeInternal : public IBaseOptimizerNode {
TJoinOptimizerNodeInternal(
const std::shared_ptr<IBaseOptimizerNode>& left,
const std::shared_ptr<IBaseOptimizerNode>& right,
- const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions,
- const TVector<TString>& leftJoinKeys,
- const TVector<TString>& rightJoinKeys,
+ const TVector<TJoinColumn>& leftJoinKeys,
+ const TVector<TJoinColumn>& rightJoinKeys,
const EJoinKind joinType,
const EJoinAlgoType joinAlgo,
const bool leftAny,
@@ -29,7 +28,6 @@ struct TJoinOptimizerNodeInternal : public IBaseOptimizerNode {
: IBaseOptimizerNode(JoinNodeType)
, LeftArg(left)
, RightArg(right)
- , JoinConditions(joinConditions)
, LeftJoinKeys(leftJoinKeys)
, RightJoinKeys(rightJoinKeys)
, JoinType(joinType)
@@ -51,9 +49,8 @@ struct TJoinOptimizerNodeInternal : public IBaseOptimizerNode {
std::shared_ptr<IBaseOptimizerNode> LeftArg;
std::shared_ptr<IBaseOptimizerNode> RightArg;
- const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& JoinConditions;
- const TVector<TString>& LeftJoinKeys;
- const TVector<TString>& RightJoinKeys;
+ const TVector<TJoinColumn>& LeftJoinKeys;
+ const TVector<TJoinColumn>& RightJoinKeys;
EJoinKind JoinType;
EJoinAlgoType JoinAlgo;
const bool LeftAny;
@@ -66,9 +63,8 @@ struct TJoinOptimizerNodeInternal : public IBaseOptimizerNode {
std::shared_ptr<TJoinOptimizerNodeInternal> MakeJoinInternal(
std::shared_ptr<IBaseOptimizerNode> left,
std::shared_ptr<IBaseOptimizerNode> right,
- const std::set<std::pair<TJoinColumn, TJoinColumn>>& joinConditions,
- const TVector<TString>& leftJoinKeys,
- const TVector<TString>& rightJoinKeys,
+ const TVector<TJoinColumn>& leftJoinKeys,
+ const TVector<TJoinColumn>& rightJoinKeys,
EJoinKind joinKind,
EJoinAlgoType joinAlgo,
bool leftAny,
diff --git a/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h b/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h
index 4e347ab5973..9d3443621e4 100644
--- a/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h
+++ b/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h
@@ -21,11 +21,13 @@ namespace NYql::NDq {
inline TVector<TString> GetConditionUsedRelationNames(const std::shared_ptr<TJoinOptimizerNode>& joinNode) {
TVector<TString> res;
- res.reserve(joinNode->JoinConditions.size());
+ res.reserve(joinNode->LeftJoinKeys.size());
- for (const auto& [lhsTable, rhsTable]: joinNode->JoinConditions) {
- res.push_back(lhsTable.RelName);
- res.push_back(rhsTable.RelName);
+ for (const auto& lhs : joinNode->LeftJoinKeys ) {
+ res.push_back(lhs.RelName);
+ }
+ for (const auto& rhs : joinNode->RightJoinKeys ) {
+ res.push_back(rhs.RelName);
}
return res;
@@ -57,7 +59,7 @@ typename TJoinHypergraph<TNodeSet>::TEdge MakeHyperedge(
TNodeSet right = TES & subtreeNodes[joinNode->RightArg];
bool isCommutative = OperatorIsCommutative(joinNode->JoinType) && (joinNode->IsReorderable);
- return typename TJoinHypergraph<TNodeSet>::TEdge(left, right, joinNode->JoinType, joinNode->LeftAny, joinNode->RightAny, isCommutative, joinNode->JoinConditions);
+ return typename TJoinHypergraph<TNodeSet>::TEdge(left, right, joinNode->JoinType, joinNode->LeftAny, joinNode->RightAny, isCommutative, joinNode->LeftJoinKeys, joinNode->RightJoinKeys);
}
template<typename TNodeSet>
diff --git a/ydb/library/yql/dq/opt/dq_opt_stat.cpp b/ydb/library/yql/dq/opt/dq_opt_stat.cpp
index 149f72f79b6..a4b2c1299c3 100644
--- a/ydb/library/yql/dq/opt/dq_opt_stat.cpp
+++ b/ydb/library/yql/dq/opt/dq_opt_stat.cpp
@@ -30,6 +30,17 @@ namespace {
return attributeName;
}
+ TString ExtractAlias(TString attributeName) {
+ if (auto idx = attributeName.find_last_of('.'); idx != TString::npos) {
+ auto substr = attributeName.substr(0, idx);
+ if (auto idx2 = substr.find_last_of('.'); idx != TString::npos) {
+ substr = substr.substr(idx2+1);
+ }
+ return substr;
+ }
+ return TString();
+ }
+
TVector<TString> InferLabels(std::shared_ptr<TOptimizerStatistics>& stats, TCoAtomList joinColumns) {
if(stats->Labels) {
return *stats->Labels;
@@ -261,14 +272,18 @@ void InferStatisticsForMapJoin(const TExprNode::TPtr& input, TTypeAnnotationCont
leftStats = ApplyCardinalityHints(leftStats, leftLabels, hints);
rightStats = ApplyCardinalityHints(rightStats, rightLabels, hints);
- TVector<TString> leftJoinKeys;
- TVector<TString> rightJoinKeys;
+ TVector<TJoinColumn> leftJoinKeys;
+ TVector<TJoinColumn> rightJoinKeys;
for (size_t i=0; i<join.LeftKeysColumnNames().Size(); i++) {
- leftJoinKeys.push_back(RemoveAliases(join.LeftKeysColumnNames().Item(i).StringValue()));
+ auto alias = ExtractAlias(join.LeftKeysColumnNames().Item(i).StringValue());
+ auto attrName = RemoveAliases(join.LeftKeysColumnNames().Item(i).StringValue());
+ leftJoinKeys.push_back(TJoinColumn(alias, attrName));
}
for (size_t i=0; i<join.RightKeysColumnNames().Size(); i++) {
- rightJoinKeys.push_back(RemoveAliases(join.RightKeysColumnNames().Item(i).StringValue()));
+ auto alias = ExtractAlias(join.RightKeysColumnNames().Item(i).StringValue());
+ auto attrName = RemoveAliases(join.RightKeysColumnNames().Item(i).StringValue());
+ rightJoinKeys.push_back(TJoinColumn(alias, attrName));
}
auto unionOfLabels = UnionLabels(leftLabels, rightLabels);
@@ -312,14 +327,18 @@ void InferStatisticsForGraceJoin(const TExprNode::TPtr& input, TTypeAnnotationCo
leftStats = ApplyCardinalityHints(leftStats, leftLabels, hints);
rightStats = ApplyCardinalityHints(rightStats, rightLabels, hints);
- TVector<TString> leftJoinKeys;
- TVector<TString> rightJoinKeys;
+ TVector<TJoinColumn> leftJoinKeys;
+ TVector<TJoinColumn> rightJoinKeys;
for (size_t i=0; i<join.LeftKeysColumnNames().Size(); i++) {
- leftJoinKeys.push_back(RemoveAliases(join.LeftKeysColumnNames().Item(i).StringValue()));
+ auto alias = ExtractAlias(join.LeftKeysColumnNames().Item(i).StringValue());
+ auto attrName = RemoveAliases(join.LeftKeysColumnNames().Item(i).StringValue());
+ leftJoinKeys.push_back(TJoinColumn(alias, attrName));
}
for (size_t i=0; i<join.RightKeysColumnNames().Size(); i++) {
- rightJoinKeys.push_back(RemoveAliases(join.RightKeysColumnNames().Item(i).StringValue()));
+ auto alias = ExtractAlias(join.RightKeysColumnNames().Item(i).StringValue());
+ auto attrName = RemoveAliases(join.RightKeysColumnNames().Item(i).StringValue());
+ rightJoinKeys.push_back(TJoinColumn(alias, attrName));
}
auto unionOfLabels = UnionLabels(leftLabels, rightLabels);
diff --git a/ydb/library/yql/providers/dq/opt/logical_optimize.cpp b/ydb/library/yql/providers/dq/opt/logical_optimize.cpp
index 71b478258a4..dfaff2a9115 100644
--- a/ydb/library/yql/providers/dq/opt/logical_optimize.cpp
+++ b/ydb/library/yql/providers/dq/opt/logical_optimize.cpp
@@ -49,8 +49,7 @@ struct TDqCBOProviderContext : public NYql::TBaseProviderContext {
virtual bool IsJoinApplicable(const std::shared_ptr<NYql::IBaseOptimizerNode>& left,
const std::shared_ptr<NYql::IBaseOptimizerNode>& right,
- const std::set<std::pair<NYql::NDq::TJoinColumn, NYql::NDq::TJoinColumn>>& joinConditions,
- const TVector<TString>& leftJoinKeys, const TVector<TString>& rightJoinKeys,
+ const TVector<TJoinColumn>& leftJoinKeys, const TVector<TJoinColumn>& rightJoinKeys,
NYql::EJoinAlgoType joinAlgo, NYql::EJoinKind joinKind) override;
virtual double ComputeJoinCost(const NYql::TOptimizerStatistics& leftStats, const NYql::TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, NYql::EJoinAlgoType joinAlgo) const override;
@@ -62,12 +61,10 @@ struct TDqCBOProviderContext : public NYql::TBaseProviderContext {
bool TDqCBOProviderContext::IsJoinApplicable(const std::shared_ptr<NYql::IBaseOptimizerNode>& left,
const std::shared_ptr<NYql::IBaseOptimizerNode>& right,
- const std::set<std::pair<NYql::NDq::TJoinColumn, NYql::NDq::TJoinColumn>>& joinConditions,
- const TVector<TString>& leftJoinKeys, const TVector<TString>& rightJoinKeys,
+ const TVector<TJoinColumn>& leftJoinKeys, const TVector<TJoinColumn>& rightJoinKeys,
NYql::EJoinAlgoType joinAlgo, NYql::EJoinKind joinKind) {
Y_UNUSED(left);
Y_UNUSED(right);
- Y_UNUSED(joinConditions);
Y_UNUSED(leftJoinKeys);
Y_UNUSED(rightJoinKeys);
diff --git a/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp b/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp
index f4092b08b0b..8ab1dc5962d 100644
--- a/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp
+++ b/ydb/library/yql/providers/yt/provider/ut/yql_yt_cbo_ut.cpp
@@ -78,10 +78,11 @@ Y_UNIT_TEST(NonReordable) {
auto left = std::make_shared<TRelOptimizerNode>("a", stat);
auto right = std::make_shared<TRelOptimizerNode>("a", stat);
- std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>> joinConditions;
- joinConditions.insert({NDq::TJoinColumn{"a", "b"}, NDq::TJoinColumn{"a","c"}});
+ TVector<NDq::TJoinColumn> leftKeys = {NDq::TJoinColumn{"a", "b"}};
+ TVector<NDq::TJoinColumn> rightKeys = {NDq::TJoinColumn{"a","c"}};
+
auto root = std::make_shared<TJoinOptimizerNode>(
- left, right, joinConditions, EJoinKind::InnerJoin, EJoinAlgoType::GraceJoin, false, false, true);
+ left, right, leftKeys, rightKeys, EJoinKind::InnerJoin, EJoinAlgoType::GraceJoin, false, false, true);
TBaseProviderContext optCtx;
std::unique_ptr<IOptimizerNew> opt = std::unique_ptr<IOptimizerNew>(NDq::MakeNativeOptimizerNew(optCtx, 1024));
auto result = opt->JoinSearch(root);
diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp b/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp
index 4c15ac7bdab..e09fa86d7b9 100644
--- a/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp
+++ b/ydb/library/yql/providers/yt/provider/yql_yt_join_reorder.cpp
@@ -162,11 +162,12 @@ class TYtJoinOptimizerNode: public TJoinOptimizerNode {
public:
TYtJoinOptimizerNode(const std::shared_ptr<IBaseOptimizerNode>& left,
const std::shared_ptr<IBaseOptimizerNode>& right,
- const std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>>& joinConditions,
+ const TVector<NDq::TJoinColumn>& leftKeys,
+ const TVector<NDq::TJoinColumn>& rightKeys,
const EJoinKind joinType,
const EJoinAlgoType joinAlgo,
TYtJoinNodeOp* originalOp)
- : TJoinOptimizerNode(left, right, joinConditions, joinType, joinAlgo,
+ : TJoinOptimizerNode(left, right, leftKeys, rightKeys, joinType, joinAlgo,
originalOp ? originalOp->LinkSettings.LeftHints.contains("any") : false,
originalOp ? originalOp->LinkSettings.RightHints.contains("any") : false,
originalOp != nullptr)
@@ -209,7 +210,8 @@ private:
std::shared_ptr<IBaseOptimizerNode> OnOp(TYtJoinNodeOp* op) {
auto joinKind = ConvertToJoinKind(TString(op->JoinKind->Content()));
YQL_ENSURE(op->LeftLabel->ChildrenSize() == op->RightLabel->ChildrenSize());
- std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>> joinConditions;
+ TVector<NDq::TJoinColumn> leftKeys;
+ TVector<NDq::TJoinColumn> rightKeys;
for (ui32 i = 0; i < op->LeftLabel->ChildrenSize(); i += 2) {
auto ltable = op->LeftLabel->Child(i)->Content();
auto lcolumn = op->LeftLabel->Child(i + 1)->Content();
@@ -219,7 +221,8 @@ private:
AddRelJoinColumn(TString(rtable), TString(rcolumn));
NDq::TJoinColumn lcol{TString(ltable), TString(lcolumn)};
NDq::TJoinColumn rcol{TString(rtable), TString(rcolumn)};
- joinConditions.insert({lcol, rcol});
+ leftKeys.push_back(lcol);
+ rightKeys.push_back(rcol);
}
auto left = ProcessNode(op->Left);
auto right = ProcessNode(op->Right);
@@ -228,7 +231,7 @@ private:
ProviderCtx->HasHints = ProviderCtx->HasHints || !op->LinkSettings.LeftHints.empty() || !op->LinkSettings.RightHints.empty();
return std::make_shared<TYtJoinOptimizerNode>(
- left, right, joinConditions, joinKind, EJoinAlgoType::GraceJoin, nonReorderable ? op : nullptr
+ left, right, leftKeys, rightKeys, joinKind, EJoinAlgoType::GraceJoin, nonReorderable ? op : nullptr
);
}
@@ -356,12 +359,13 @@ TYtJoinNode::TPtr BuildYtJoinTree(std::shared_ptr<IBaseOptimizerNode> node, TVec
ret = MakeIntrusive<TYtJoinNodeOp>();
ret->JoinKind = ctx.NewAtom(pos, ConvertToJoinString(op->JoinType));
TVector<TExprNodePtr> leftLabel, rightLabel;
- leftLabel.reserve(op->JoinConditions.size() * 2);
- rightLabel.reserve(op->JoinConditions.size() * 2);
- for (auto& [left, right] : op->JoinConditions) {
+ leftLabel.reserve(op->LeftJoinKeys.size() * 2);
+ rightLabel.reserve(op->RightJoinKeys.size() * 2);
+ for (auto& left : op->LeftJoinKeys) {
leftLabel.emplace_back(ctx.NewAtom(pos, left.RelName));
leftLabel.emplace_back(ctx.NewAtom(pos, left.AttributeName));
-
+ }
+ for (auto& right : op->RightJoinKeys) {
rightLabel.emplace_back(ctx.NewAtom(pos, right.RelName));
rightLabel.emplace_back(ctx.NewAtom(pos, right.AttributeName));
}
diff --git a/ydb/library/yql/sql/pg/optimizer.cpp b/ydb/library/yql/sql/pg/optimizer.cpp
index d1134092cbb..0548e7b5d5e 100644
--- a/ydb/library/yql/sql/pg/optimizer.cpp
+++ b/ydb/library/yql/sql/pg/optimizer.cpp
@@ -491,11 +491,11 @@ struct TPgOptimizerImpl
std::vector<std::tuple<int,int,TStringBuf,TStringBuf>>& rightVars,
const std::shared_ptr<TJoinOptimizerNode>& op)
{
- for (auto& [l, r]: op->JoinConditions) {
- auto& ltable = l.RelName;
- auto& lcol = l.AttributeName;
- auto& rtable = r.RelName;
- auto& rcol = r.AttributeName;
+ for (size_t i=0; i<op->LeftJoinKeys.size(); i++ ) {
+ auto& ltable = op->LeftJoinKeys[i].RelName;
+ auto& lcol = op->LeftJoinKeys[i].AttributeName;
+ auto& rtable = op->RightJoinKeys[i].RelName;
+ auto& rcol = op->RightJoinKeys[i].AttributeName;
const auto& lrelIds = Table2RelIds[ltable];
YQL_ENSURE(!lrelIds.empty());
@@ -562,7 +562,7 @@ struct TPgOptimizerImpl
MakeEqClasses(EqClasses, leftVars, rightVars);
} else if (op->JoinType == LeftJoin || op->JoinType == RightJoin) {
- CHECK(op->JoinConditions.size() == 1, "Only 1 var per join supported");
+ CHECK(op->LeftJoinKeys.size() == 1 && op->RightJoinKeys.size() == 1, "Only 1 var per join supported");
std::vector<std::tuple<int,int,TStringBuf,TStringBuf>> leftVars, rightVars;
ExtractVars(leftVars, rightVars, op);
@@ -637,22 +637,23 @@ struct TPgOptimizerImpl
YQL_ENSURE(node->LeftVars.size() == node->RightVars.size());
- std::set<std::pair<NDq::TJoinColumn, NDq::TJoinColumn>> joinConditions;
+ TVector<NDq::TJoinColumn> leftJoinKeys;
+ TVector<NDq::TJoinColumn> rightJoinKeys;
+
for (size_t i = 0; i < node->LeftVars.size(); i++) {
auto [lrelId, lvarId] = node->LeftVars[i];
auto [rrelId, rvarId] = node->RightVars[i];
auto [ltable, lcolumn] = Var2TableCol[lrelId - 1][lvarId - 1];
auto [rtable, rcolumn] = Var2TableCol[rrelId - 1][rvarId - 1];
- joinConditions.insert({
- NDq::TJoinColumn{TString(ltable), TString(lcolumn)},
- NDq::TJoinColumn{TString(rtable), TString(rcolumn)}
- });
+ leftJoinKeys.push_back(NDq::TJoinColumn(TString(ltable), TString(lcolumn)));
+ rightJoinKeys.push_back(NDq::TJoinColumn(TString(rtable), TString(rcolumn)));
}
return std::make_shared<TJoinOptimizerNode>(
left, right,
- joinConditions,
+ leftJoinKeys,
+ rightJoinKeys,
joinKind,
EJoinAlgoType::MapJoin,
false,
diff --git a/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json
index a8d3c9d90df..fb3e15ab50f 100644
--- a/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json
+++ b/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json
@@ -765,9 +765,9 @@
],
"test.test[dq-join_cbo_native_3_tables--Debug]": [
{
- "checksum": "91570a2f667516ba1f3f28642698441f",
- "size": 4802,
- "uri": "https://{canondata_backend}/1942278/d3f67196e7e0096e289743f5dbfd5dc2f990f9e6/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Debug_/opt.yql_patched"
+ "checksum": "bc4f0d3c80bc05fdb553d9d07ed58fd2",
+ "size": 4846,
+ "uri": "https://{canondata_backend}/1597364/aa2251cc1cffd9f5ef1d8d1793ee54509ab8cdfc/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Debug_/opt.yql_patched"
}
],
"test.test[dq-join_cbo_native_3_tables--Plan]": [