aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpilik <pudge1000-7@ydb.tech>2024-09-17 18:56:49 +0300
committerGitHub <noreply@github.com>2024-09-17 17:56:49 +0200
commitd7347606ebe15879a709a14f60eabf91b238961b (patch)
tree7e095e076fd19ac7f0b3b8e1215316d2e40b2306
parent162c9f5fa8fa3b2482a9d22de01288e7365a5037 (diff)
downloadydb-d7347606ebe15879a709a14f60eabf91b238961b.tar.gz
[CBO] Hints parser added (#9252)
-rw-r--r--ydb/core/kqp/opt/kqp_opt.h42
-rw-r--r--ydb/core/kqp/opt/kqp_statistics_transformer.h2
-rw-r--r--ydb/core/kqp/opt/logical/kqp_opt_log.cpp18
-rw-r--r--ydb/core/kqp/opt/logical/kqp_opt_log.h7
-rw-r--r--ydb/core/kqp/provider/yql_kikimr_settings.cpp4
-rw-r--r--ydb/core/kqp/provider/yql_kikimr_settings.h4
-rw-r--r--ydb/core/kqp/ut/join/data/queries/join_order_hints_complex.sql25
-rw-r--r--ydb/core/kqp/ut/join/data/queries/join_order_hints_many_hint_trees.sql25
-rw-r--r--ydb/core/kqp/ut/join/data/queries/join_order_hints_simple.sql21
-rw-r--r--ydb/core/kqp/ut/join/data/queries/test_join_hint.sql8
-rw-r--r--ydb/core/kqp/ut/join/data/queries/test_join_hint2.sql3
-rw-r--r--ydb/library/yql/core/cbo/cbo_hints.cpp296
-rw-r--r--ydb/library/yql/core/cbo/cbo_optimizer_new.cpp82
-rw-r--r--ydb/library/yql/core/cbo/cbo_optimizer_new.h93
-rw-r--r--ydb/library/yql/core/cbo/ya.make1
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h65
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp10
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h35
-rw-r--r--ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h6
19 files changed, 528 insertions, 219 deletions
diff --git a/ydb/core/kqp/opt/kqp_opt.h b/ydb/core/kqp/opt/kqp_opt.h
index 1904aef5215..731b94368f0 100644
--- a/ydb/core/kqp/opt/kqp_opt.h
+++ b/ydb/core/kqp/opt/kqp_opt.h
@@ -18,7 +18,7 @@ struct TKqpOptimizeContext : public TSimpleRefCount<TKqpOptimizeContext> {
, QueryCtx(queryCtx)
, Tables(tables)
, UserRequestContext(userRequestContext)
- {
+ {
YQL_ENSURE(QueryCtx);
YQL_ENSURE(Tables);
}
@@ -31,9 +31,7 @@ struct TKqpOptimizeContext : public TSimpleRefCount<TKqpOptimizeContext> {
int JoinsCount{};
int EquiJoinsCount{};
std::shared_ptr<NJson::TJsonValue> OverrideStatistics{};
- std::shared_ptr<NYql::TCardinalityHints> CardinalityHints{};
- std::shared_ptr<NYql::TJoinAlgoHints> JoinAlgoHints{};
- std::shared_ptr<NYql::TJoinOrderHints> JoinOrderHints{};
+ std::shared_ptr<NYql::TOptimizerHints> Hints{};
std::shared_ptr<NJson::TJsonValue> GetOverrideStatistics() {
if (Config->OptOverrideStatistics.Get()) {
@@ -49,37 +47,17 @@ struct TKqpOptimizeContext : public TSimpleRefCount<TKqpOptimizeContext> {
}
}
- NYql::TCardinalityHints GetCardinalityHints() {
- if (Config->OptCardinalityHints.Get()) {
- if (!CardinalityHints) {
- CardinalityHints = std::make_shared<NYql::TCardinalityHints>(*Config->OptCardinalityHints.Get());
+ NYql::TOptimizerHints GetOptimizerHints() {
+ if (Config->OptimizerHints.Get()) {
+ if (!Hints) {
+ Hints = std::make_shared<NYql::TOptimizerHints>(
+ NYql::TOptimizerHints::Parse(*Config->OptimizerHints.Get())
+ );
}
- return *CardinalityHints;
- } else {
- return NYql::TCardinalityHints();
- }
- }
-
- NYql::TJoinAlgoHints GetJoinAlgoHints() {
- if (Config->OptJoinAlgoHints.Get()) {
- if (!JoinAlgoHints) {
- JoinAlgoHints = std::make_shared<NYql::TJoinAlgoHints>(*Config->OptJoinAlgoHints.Get());
- }
- return *JoinAlgoHints;
- } else {
- return NYql::TJoinAlgoHints();
+ return *Hints;
}
- }
- NYql::TJoinOrderHints GetJoinOrderHints() {
- if (Config->OptJoinOrderHints.Get()) {
- if (!JoinOrderHints) {
- JoinOrderHints = std::make_shared<NYql::TJoinOrderHints>(*Config->OptJoinOrderHints.Get());
- }
- return *JoinOrderHints;
- } else {
- return NYql::TJoinOrderHints();
- }
+ return NYql::TOptimizerHints();
}
bool IsDataQuery() const {
diff --git a/ydb/core/kqp/opt/kqp_statistics_transformer.h b/ydb/core/kqp/opt/kqp_statistics_transformer.h
index e74646bc088..46195f739ed 100644
--- a/ydb/core/kqp/opt/kqp_statistics_transformer.h
+++ b/ydb/core/kqp/opt/kqp_statistics_transformer.h
@@ -35,7 +35,7 @@ class TKqpStatisticsTransformer : public NYql::NDq::TDqStatisticsTransformerBase
public:
TKqpStatisticsTransformer(const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx, TTypeAnnotationContext& typeCtx,
const TKikimrConfiguration::TPtr& config, const TKqpProviderContext& pctx) :
- TDqStatisticsTransformerBase(&typeCtx, pctx, kqpCtx->GetCardinalityHints()),
+ TDqStatisticsTransformerBase(&typeCtx, pctx, *kqpCtx->GetOptimizerHints().CardinalityHints),
Config(config),
KqpCtx(*kqpCtx) {}
diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp
index 3030256614a..f6471e103f0 100644
--- a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp
+++ b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp
@@ -11,6 +11,7 @@
#include <ydb/library/yql/dq/opt/dq_opt_join.h>
#include <ydb/library/yql/dq/opt/dq_opt_log.h>
#include <ydb/library/yql/dq/opt/dq_opt_hopping.h>
+#include <ydb/library/yql/utils/log/log.h>
#include <ydb/library/yql/providers/common/transform/yql_optimize.h>
#include <ydb/library/yql/providers/dq/common/yql_dq_settings.h>
@@ -87,6 +88,17 @@ public:
SetGlobal(4u);
}
+public:
+ TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) override {
+ auto status = TOptimizeTransformerBase::DoTransform(input, output, ctx);
+
+ for (const auto& hint: KqpCtx.GetOptimizerHints().GetUnappliedHintStrings()) {
+ YQL_CLOG(WARN, ProviderYdb) << "Unapplied hint: " + hint;
+ }
+
+ return status;
+ }
+
protected:
TMaybeNode<TExprBase> PushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx, const TGetParents& getParents) {
TExprBase output = KqpPushExtractedPredicateToReadTable(node, ctx, KqpCtx, TypesCtx, *getParents());
@@ -155,11 +167,7 @@ protected:
rels.emplace_back(std::make_shared<TKqpRelOptimizerNode>(TString(label), stat, node));
},
KqpCtx.EquiJoinsCount,
- TOptimizerHints{
- .CardinalityHints = KqpCtx.GetCardinalityHints(),
- .JoinAlgoHints = KqpCtx.GetJoinAlgoHints(),
- .JoinOrderHints = KqpCtx.GetJoinOrderHints()
- }
+ KqpCtx.GetOptimizerHints()
);
DumpAppliedRule("OptimizeEquiJoinWithCosts", node.Ptr(), output.Ptr(), ctx);
return output;
diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log.h b/ydb/core/kqp/opt/logical/kqp_opt_log.h
index cfccb10df8d..4e86d361c62 100644
--- a/ydb/core/kqp/opt/logical/kqp_opt_log.h
+++ b/ydb/core/kqp/opt/logical/kqp_opt_log.h
@@ -7,7 +7,10 @@ namespace NKikimr::NKqp::NOpt {
struct TKqpOptimizeContext;
-TAutoPtr<NYql::IGraphTransformer> CreateKqpLogOptTransformer(TIntrusivePtr<TKqpOptimizeContext>& kqpCtx,
- NYql::TTypeAnnotationContext& typesCtx, const NYql::TKikimrConfiguration::TPtr& config);
+TAutoPtr<NYql::IGraphTransformer> CreateKqpLogOptTransformer(
+ TIntrusivePtr<TKqpOptimizeContext>& kqpCtx,
+ NYql::TTypeAnnotationContext& typesCtx,
+ const NYql::TKikimrConfiguration::TPtr& config
+);
} // namespace NKikimr::NKqp::NOpt
diff --git a/ydb/core/kqp/provider/yql_kikimr_settings.cpp b/ydb/core/kqp/provider/yql_kikimr_settings.cpp
index 6380b81c2a4..3b74b8ecd76 100644
--- a/ydb/core/kqp/provider/yql_kikimr_settings.cpp
+++ b/ydb/core/kqp/provider/yql_kikimr_settings.cpp
@@ -82,9 +82,7 @@ TKikimrConfiguration::TKikimrConfiguration() {
REGISTER_SETTING(*this, OptEnableOlapPushdown);
REGISTER_SETTING(*this, OptEnableOlapProvideComputeSharding);
REGISTER_SETTING(*this, OptOverrideStatistics);
- REGISTER_SETTING(*this, OptCardinalityHints);
- REGISTER_SETTING(*this, OptJoinAlgoHints);
- REGISTER_SETTING(*this, OptJoinOrderHints);
+ REGISTER_SETTING(*this, OptimizerHints);
REGISTER_SETTING(*this, OverridePlanner);
REGISTER_SETTING(*this, UseGraceJoinCoreForMap);
diff --git a/ydb/core/kqp/provider/yql_kikimr_settings.h b/ydb/core/kqp/provider/yql_kikimr_settings.h
index d4d851c0fd6..8e6e706de43 100644
--- a/ydb/core/kqp/provider/yql_kikimr_settings.h
+++ b/ydb/core/kqp/provider/yql_kikimr_settings.h
@@ -55,9 +55,7 @@ struct TKikimrSettings {
NCommon::TConfSetting<bool, false> UseGraceJoinCoreForMap;
NCommon::TConfSetting<TString, false> OptOverrideStatistics;
- NCommon::TConfSetting<TString, false> OptCardinalityHints;
- NCommon::TConfSetting<TString, false> OptJoinAlgoHints;
- NCommon::TConfSetting<TString, false> OptJoinOrderHints;
+ NCommon::TConfSetting<TString, false> OptimizerHints;
/* Disable optimizer rules */
NCommon::TConfSetting<bool, false> OptDisableTopSort;
diff --git a/ydb/core/kqp/ut/join/data/queries/join_order_hints_complex.sql b/ydb/core/kqp/ut/join/data/queries/join_order_hints_complex.sql
index cba3a99a1d3..12501dbbed4 100644
--- a/ydb/core/kqp/ut/join/data/queries/join_order_hints_complex.sql
+++ b/ydb/core/kqp/ut/join/data/queries/join_order_hints_complex.sql
@@ -1,19 +1,18 @@
PRAGMA TablePathPrefix='/Root';
-PRAGMA ydb.OptJoinOrderHints=
- '[
- [["R", "S"], ["T", "U"]]
- ]';
+PRAGMA ydb.OptimizerHints =
+'
+ Card(Unused # 10e8)
+ JoinOrder( (Unused1 Unused2) (Unused3 Unused4) )
-PRAGMA ydb.OptCardinalityHints =
- '[
- {"labels":["R"], "op":"#", "value":10e8},
- {"labels":["T"], "op":"#", "value":1},
- {"labels":["R", "T"], "op":"#", "value":1},
- {"labels":["R", "S"], "op":"#", "value":10e8},
- {"labels":["T", "U"], "op":"#", "value":10e8},
- {"labels":["V"], "op":"#", "value":1}
- ]';
+ Card(R # 10e8)
+ Card(T # 1)
+ Card(R T # 1)
+ Card(R S # 10e8)
+ Card(T U # 10e8)
+ Card(V # 1)
+ JoinOrder( (R S) (T U) )
+';
SELECT * FROM
R INNER JOIN S on R.id = S.id
diff --git a/ydb/core/kqp/ut/join/data/queries/join_order_hints_many_hint_trees.sql b/ydb/core/kqp/ut/join/data/queries/join_order_hints_many_hint_trees.sql
index 5ddee5c2e54..c1b8aaeb993 100644
--- a/ydb/core/kqp/ut/join/data/queries/join_order_hints_many_hint_trees.sql
+++ b/ydb/core/kqp/ut/join/data/queries/join_order_hints_many_hint_trees.sql
@@ -1,19 +1,16 @@
PRAGMA TablePathPrefix='/Root';
-PRAGMA ydb.OptJoinOrderHints=
- '[
- ["R", "S"],
- ["T", "U"]
- ]';
-PRAGMA ydb.OptCardinalityHints =
- '[
- {"labels":["R"], "op":"#", "value":10e8},
- {"labels":["T"], "op":"#", "value":1},
- {"labels":["R", "T"], "op":"#", "value":1},
- {"labels":["R", "S"], "op":"#", "value":10e8},
- {"labels":["T", "U"], "op":"#", "value":10e8},
- {"labels":["V"], "op":"#", "value":1}
- ]';
+PRAGMA ydb.OptimizerHints =
+'
+ Card(R # 10e8)
+ Card(T # 1)
+ Card(R T # 1)
+ Card(R S # 10e8)
+ Card(T U # 10e8)
+ Card(V # 1)
+ JoinOrder(T U)
+ JoinOrder(R S)
+';
SELECT * FROM
R INNER JOIN S on R.id = S.id
diff --git a/ydb/core/kqp/ut/join/data/queries/join_order_hints_simple.sql b/ydb/core/kqp/ut/join/data/queries/join_order_hints_simple.sql
index 7482d192cea..9c53e3ad07f 100644
--- a/ydb/core/kqp/ut/join/data/queries/join_order_hints_simple.sql
+++ b/ydb/core/kqp/ut/join/data/queries/join_order_hints_simple.sql
@@ -1,17 +1,14 @@
PRAGMA TablePathPrefix='/Root';
-PRAGMA ydb.OptCardinalityHints =
- '[
- {"labels":["R"], "op":"#", "value":10e8},
- {"labels":["T"], "op":"#", "value":1},
- {"labels":["S"], "op":"#", "value":10e8},
- {"labels":["R", "T"], "op":"#", "value":1},
- {"labels":["R", "S"], "op":"#", "value":10e8}
- ]';
-PRAGMA ydb.OptJoinOrderHints=
- '[
- ["T", ["R", "S"]]
- ]';
+PRAGMA ydb.OptimizerHints =
+'
+ Card(R # 10e8)
+ Card(T # 1)
+ Card(S # 10e8)
+ Card(R T # 1)
+ Card(R S # 10e8)
+ JoinOrder(T (R S))
+';
SELECT * FROM
R INNER JOIN S on R.id = S.id
diff --git a/ydb/core/kqp/ut/join/data/queries/test_join_hint.sql b/ydb/core/kqp/ut/join/data/queries/test_join_hint.sql
index b58b49e3e3f..bf61264170f 100644
--- a/ydb/core/kqp/ut/join/data/queries/test_join_hint.sql
+++ b/ydb/core/kqp/ut/join/data/queries/test_join_hint.sql
@@ -1,5 +1,9 @@
-PRAGMA ydb.OptCardinalityHints = '[{"labels":["R"], "op":"#", "value":1}, {"labels":["R","S"], "op":"#", "value":10e6}]';
-PRAGMA ydb.OptJoinAlgoHints = '[{"labels":["R","S"], "algo":"GraceJoin"}]';
+PRAGMA ydb.OptimizerHints =
+'
+ JoinAlgo(R S Grace)
+ Card(R # 1)
+ Card(R S # 10e6)
+';
SELECT *
FROM `/Root/R` as R
diff --git a/ydb/core/kqp/ut/join/data/queries/test_join_hint2.sql b/ydb/core/kqp/ut/join/data/queries/test_join_hint2.sql
index 1e56ae6aef0..e932682c3a3 100644
--- a/ydb/core/kqp/ut/join/data/queries/test_join_hint2.sql
+++ b/ydb/core/kqp/ut/join/data/queries/test_join_hint2.sql
@@ -1,4 +1,5 @@
-PRAGMA ydb.OptCardinalityHints = '[{"labels":["R","S"], "op":"#", "value":1.0}]';
+PRAGMA ydb.OptimizerHints = 'Card(R S # 1)';
+
SELECT *
FROM `/Root/R` as R
diff --git a/ydb/library/yql/core/cbo/cbo_hints.cpp b/ydb/library/yql/core/cbo/cbo_hints.cpp
new file mode 100644
index 00000000000..4d598c2fd72
--- /dev/null
+++ b/ydb/library/yql/core/cbo/cbo_hints.cpp
@@ -0,0 +1,296 @@
+#include "cbo_optimizer_new.h"
+
+#include <util/string/join.h>
+#include <util/string/printf.h>
+#include <library/cpp/iterator/zip.h>
+
+using namespace NYql;
+
+class TOptimizerHintsParser {
+public:
+ TOptimizerHintsParser(const TString& text)
+ : Pos(-1)
+ , Size(static_cast<i32>(text.Size()) - 1)
+ , Text(text)
+ {}
+
+ TOptimizerHints Parse() {
+ Start();
+ return Hints;
+ }
+
+private:
+ void Start() {
+ while (Pos < Size) {
+ auto hintType = Keyword({"JoinOrder", "JoinAlgo", "Card"});
+ if (hintType == "JoinOrder") {
+ JoinOrder();
+ } else if (hintType == "JoinAlgo") {
+ JoinAlgo();
+ } else if (hintType == "Card"){
+ Card();
+ } else {
+ ParseError(Sprintf("Undefined hints type: %s", hintType.c_str()), Pos - hintType.Size());
+ }
+
+ SkipWhiteSpaces();
+ }
+ }
+
+ TVector<TString> CollectLabels() {
+ TVector<TString> labels;
+ while (auto maybeTerm = MaybeLabel()) {
+ labels.push_back(maybeTerm.value());
+ }
+ return labels;
+ }
+
+ void JoinAlgo() {
+ Keyword({"("});
+
+ i32 beginPos = Pos + 1;
+
+ i32 labelsBeginPos = Pos + 1;
+ TVector<TString> labels = CollectLabels();
+ if (labels.size() <= 1) {
+ ParseError(Sprintf("Bad labels for JoinAlgo hint: %s, example of the format: JoinAlgo(t1 t2 Grace)", JoinSeq(", ", labels).c_str()), labelsBeginPos);
+ }
+ TString reqJoinAlgoStr = std::move(labels.back());
+ labels.pop_back();
+
+ Keyword({")"});
+
+ TVector<EJoinAlgoType> joinAlgos = {EJoinAlgoType::GraceJoin, EJoinAlgoType::LookupJoin, EJoinAlgoType::MapJoin};
+ TVector<TString> joinAlgosStr = {"Grace", "Lookup", "Map"};
+
+ for (const auto& [joinAlgo, joinAlgoStr]: Zip(joinAlgos, joinAlgosStr)) {
+ if (reqJoinAlgoStr == joinAlgoStr) {
+ Hints.JoinAlgoHints->PushBack(std::move(labels), joinAlgo, "JoinOrder" + Text.substr(beginPos, Pos - beginPos + 1));
+ return;
+ }
+ }
+
+ ParseError(Sprintf("Unknown JoinAlgo: '%s', supported algos: [%s]", reqJoinAlgoStr.c_str(), JoinSeq(", ", joinAlgosStr).c_str()), Pos - reqJoinAlgoStr.Size());
+ Y_UNREACHABLE();
+ }
+
+ void JoinOrder() {
+ i32 beginPos = Pos + 1;
+
+ Keyword({"("});
+ auto joinOrderHintTree = JoinOrderLabels();
+ Keyword({")"});
+
+ Hints.JoinOrderHints->PushBack(std::move(joinOrderHintTree), "JoinOrder" + Text.substr(beginPos, Pos - beginPos + 1));
+ }
+
+ std::shared_ptr<TJoinOrderHints::ITreeNode> JoinOrderLabels() {
+ auto lhs = JoinOrderLabel();
+ auto rhs = JoinOrderLabel();
+ return std::make_shared<TJoinOrderHints::TJoinNode>(std::move(lhs), std::move(rhs));
+ }
+
+ std::shared_ptr<TJoinOrderHints::ITreeNode> JoinOrderLabel() {
+ if (auto maybeLabel = MaybeLabel()) {
+ return std::make_shared<TJoinOrderHints::TRelationNode>(std::move(maybeLabel.value()));
+ } else if (auto maybeBracket = MaybeKeyword({"("})) {
+ auto join = JoinOrderLabels();
+ Keyword({")"});
+ return join;
+ }
+
+ ParseError(Sprintf("JoinOrder args must be either a relation, either a join, example of the format: JoinOrder(t1 (t2 t3))"), Pos);
+ Y_UNREACHABLE();
+ }
+
+ void Card() {
+ i32 beginPos = Pos + 1;
+
+ Keyword({"("});
+
+ TVector<TString> labels = CollectLabels();
+ auto signStr = Keyword({"+", "-", "/", "*", "#"});
+ char sign = signStr[0];
+ auto value = Number();
+ Keyword({")"});
+
+ TCardinalityHints::ECardOperation op;
+ switch (sign) {
+ case '+': { op = TCardinalityHints::ECardOperation::Add; break; }
+ case '-': { op = TCardinalityHints::ECardOperation::Subtract; break; }
+ case '/': { op = TCardinalityHints::ECardOperation::Divide; break; }
+ case '*': { op = TCardinalityHints::ECardOperation::Multiply; break; }
+ case '#': { op = TCardinalityHints::ECardOperation::Replace; break; }
+ default: {ParseError(Sprintf("Unknown operation: '%c'", sign), Pos - 1); Y_UNREACHABLE();}
+ }
+
+ Hints.CardinalityHints->PushBack(std::move(labels), op, value, "Card" + Text.substr(beginPos, Pos - beginPos + 1));
+ }
+
+private:
+ // Expressions
+ void ParseError(const TString& err, i32 pos) {
+ Y_ENSURE(false, Sprintf("Optimizer hints parser error position:%d, msg: %s", pos, err.c_str()));
+ }
+
+ TString Label() {
+ return Term(Letters() | Digits());
+ }
+
+ std::optional<TString> MaybeLabel() {
+ try {
+ return Label();
+ } catch (...) {
+ return std::nullopt;
+ }
+ }
+
+ TString Term(const std::bitset<256>& allowedSym = {}) {
+ SkipWhiteSpaces();
+ Y_ENSURE(Pos < Size, "Expected <string>, but got end of the string.");
+
+ TString term;
+ while (Pos < Size) {
+ try {
+ term.push_back(Char(allowedSym));
+ } catch (...) {
+ break;
+ }
+ }
+
+ if (term.Empty()) {
+ ParseError("Expected a term!", Pos);
+ }
+ return term;
+ }
+
+ char Char(unsigned char c) {
+ std::bitset<256> allowed;
+ allowed[c] = 1;
+ return Char(allowed);
+ }
+
+ char Char(unsigned char intervalBegin, unsigned char intervalEnd) {
+ std::bitset<256> allowed;
+ for (size_t i = intervalBegin; i <= intervalEnd; ++i) {
+ allowed[i] = 1;
+ }
+ return Char(allowed);
+ }
+
+ char Char(const std::bitset<256>& allowedSymbols = {}) {
+ Y_ENSURE(Pos < Size, Sprintf("Expected [%s], but got end of the string.", ""));
+
+ char nextSym = Text[Pos + 1];
+ if (allowedSymbols.count() == 0) {
+ ++Pos;
+ return nextSym;
+ }
+
+ for (size_t i = 0; i < allowedSymbols.size(); ++i) {
+ if (allowedSymbols[i] && tolower(i) == tolower(nextSym)) {
+ ++Pos;
+ return nextSym;
+ }
+ }
+
+ ParseError(Sprintf("Expected [%s], but got [%c]", "", nextSym), Pos);
+ Y_UNREACHABLE();
+ }
+
+ std::optional<TString> MaybeKeyword(const TVector<TString>& keywords) {
+ try {
+ return Keyword(keywords);
+ } catch(...) {
+ return std::nullopt;
+ }
+ }
+
+ TString Keyword(const TVector<TString>& keywords) {
+ SkipWhiteSpaces();
+ Y_ENSURE(Pos < Size, Sprintf("Expected [%s], but got end of the string.", JoinSeq(", ", keywords).c_str()));
+
+ for (const auto& keyword: keywords) {
+ size_t lowInclude = Pos + 1;
+ size_t highExclude = lowInclude + keyword.Size();
+
+ if (Text.substr(lowInclude, highExclude - lowInclude).equal(keyword)) {
+ Pos += keyword.Size();
+ return keyword;
+ }
+ }
+
+ ParseError(Sprintf("Expected [%s], but got [%c]", JoinSeq(", ", keywords).c_str(), Text[Pos + 1]), Pos);
+ Y_UNREACHABLE();
+ }
+
+ double Number() {
+ SkipWhiteSpaces();
+ Y_ENSURE(Pos < Size, Sprintf("Expected number, but got end of the string."));
+
+ TString number;
+ if (auto maybeSign = MaybeKeyword({"+", "-"})) {
+ number.push_back(maybeSign.value()[0]);
+ }
+
+ auto term = Term(Digits() | Chars(".-e")); // for double like 1.0 / 1e9
+ try {
+ return std::stod(term);
+ } catch (...) {
+ ParseError(Sprintf("Expected a number, got [%s]", term.c_str()), Pos - term.Size());
+ }
+ Y_UNREACHABLE();
+ }
+
+private:
+ // Helpers
+ constexpr std::bitset<256> Chars(const TString& s) {
+ std::bitset<256> res;
+
+ for (char c: s) {
+ res[c] = 1;
+ }
+
+ return res;
+ }
+
+ constexpr std::bitset<256> Letters() {
+ std::bitset<256> res;
+
+ for (unsigned char i = 'a'; i <= 'z'; ++i) {
+ res[i] = 1;
+ }
+ for (unsigned char i = 'A'; i <= 'Z'; ++i) {
+ res[i] = 1;
+ }
+
+ return res;
+ }
+
+ constexpr std::bitset<256> Digits() {
+ std::bitset<256> res;
+
+ for (unsigned char i = '0'; i <= '9'; ++i) {
+ res[i] = 1;
+ }
+
+ return res;
+ }
+
+ void SkipWhiteSpaces() {
+ for (; Pos < Size && isspace(Text[Pos + 1]); ++Pos) {
+ }
+ }
+
+private:
+ i32 Pos;
+ const i32 Size;
+ const TString& Text;
+
+private:
+ TOptimizerHints Hints;
+};
+
+TOptimizerHints TOptimizerHints::Parse(const TString& text) {
+ return TOptimizerHintsParser(text).Parse();
+}
diff --git a/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp b/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp
index 9c4214a9b86..ea9d5d6f6e1 100644
--- a/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp
+++ b/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp
@@ -6,6 +6,7 @@
#include <util/generic/hash.h>
#include <util/generic/hash_set.h>
#include <util/string/cast.h>
+#include <util/string/join.h>
#include <util/string/printf.h>
const TString& ToString(NYql::EJoinKind);
@@ -285,81 +286,28 @@ const TBaseProviderContext& TBaseProviderContext::Instance() {
return staticContext;
}
-TCardinalityHints::TCardinalityHints(const TString& json) {
- auto jsonValue = NJson::TJsonValue();
- NJson::ReadJsonTree(json, &jsonValue, true);
-
- for (auto s : jsonValue.GetArraySafe()) {
- auto h = s.GetMapSafe();
-
- TCardinalityHints::TCardinalityHint hint;
+TVector<TString> TOptimizerHints::GetUnappliedHintStrings() {
+ TVector<TString> res;
- for (auto t : h.at("labels").GetArraySafe()) {
- hint.JoinLabels.push_back(t.GetStringSafe());
+ for (const auto& hint: JoinAlgoHints->Hints) {
+ if (!hint.Applied) {
+ res.push_back(hint.StringRepr);
}
-
- auto op = h.at("op").GetStringSafe();
- hint.Operation = HintOpMap.at(op);
-
- hint.Value = h.at("value").GetDoubleSafe();
- Hints.push_back(hint);
}
-}
-
-std::shared_ptr<IBaseOptimizerNode> MakeJoinTreeFromJson(const NJson::TJsonValue& jsonTree) {
- if (jsonTree.IsArray()) {
- auto children = jsonTree.GetArraySafe();
- Y_ENSURE(children.size() == 2, Sprintf("Expected 2 inputs for JoinOrder hints, got: %ld", children.size()));
-
- auto joinNode = TJoinOptimizerNode(
- MakeJoinTreeFromJson(children[0]),
- MakeJoinTreeFromJson(children[1]),
- {},
- EJoinKind::Cross, // just a stub
- EJoinAlgoType::Undefined,
- true
- );
- return std::make_shared<TJoinOptimizerNode>(std::move(joinNode));
- }
-
- Y_ENSURE(
- jsonTree.IsString(),
- Sprintf("A relation must be a string for JoinOrder hints! Got %s, expected a string.", jsonTree.GetStringRobust().c_str())
- );
- return std::make_shared<TRelOptimizerNode>(jsonTree.GetStringSafe(), nullptr);
-}
-TJoinOrderHints::TJoinOrderHints(const TString& json) {
- const static TString PARSING_FORMAT_ERROR =
- R"(Join order hints parsing failed. The example of the format: [ ["A", "B"], ["B", "C"] ])";
-
- NJson::TJsonValue jsonTree;
- NJson::ReadJsonTree(json, &jsonTree, true);
-
- Y_ENSURE(jsonTree.IsArray(), PARSING_FORMAT_ERROR);
- for (const auto& hintTreeJson: jsonTree.GetArray()) {
- HintTrees.push_back(MakeJoinTreeFromJson(hintTreeJson));
+ for (const auto& hint: JoinOrderHints->Hints) {
+ if (!hint.Applied) {
+ res.push_back(hint.StringRepr);
+ }
}
-}
-
-TJoinAlgoHints::TJoinAlgoHints(const TString& json) {
- auto jsonValue = NJson::TJsonValue();
- NJson::ReadJsonTree(json, &jsonValue, true);
-
- for (auto s : jsonValue.GetArraySafe()) {
- auto h = s.GetMapSafe();
- TJoinAlgoHints::TJoinAlgoHint hint;
-
- for (auto t : h.at("labels").GetArraySafe()) {
- hint.JoinLabels.push_back(t.GetStringSafe());
+ for (const auto& hint: CardinalityHints->Hints) {
+ if (!hint.Applied) {
+ res.push_back(hint.StringRepr);
}
-
- auto algo = h.at("algo").GetStringSafe();
- hint.JoinHint = FromString<EJoinAlgoType>(algo);
-
- Hints.push_back(hint);
}
+
+ return res;
}
} // namespace NYql
diff --git a/ydb/library/yql/core/cbo/cbo_optimizer_new.h b/ydb/library/yql/core/cbo/cbo_optimizer_new.h
index 72cd6c55295..cdb143eaa88 100644
--- a/ydb/library/yql/core/cbo/cbo_optimizer_new.h
+++ b/ydb/library/yql/core/cbo/cbo_optimizer_new.h
@@ -71,8 +71,12 @@ struct TCardinalityHints {
TVector<TString> JoinLabels;
ECardOperation Operation;
double Value;
+ TString StringRepr;
+ bool Applied = false;
double ApplyHint(double originalValue) {
+ Applied = true;
+
switch (Operation) {
case Add:
return originalValue + Value;
@@ -84,41 +88,104 @@ struct TCardinalityHints {
return originalValue / Value;
case Replace:
return Value;
-
}
}
};
TVector<TCardinalityHint> Hints;
-
- TCardinalityHints() {}
- TCardinalityHints(const TString& json);
+ void PushBack(TVector<TString> labels, ECardOperation op, double value, TString stringRepr) {
+ Hints.push_back({.JoinLabels = std::move(labels), .Operation = op, .Value = value, .StringRepr = std::move(stringRepr)});
+ }
};
struct TJoinAlgoHints {
-
struct TJoinAlgoHint {
TVector<TString> JoinLabels;
EJoinAlgoType JoinHint;
+ TString StringRepr;
+ bool Applied = false;
};
TVector<TJoinAlgoHint> Hints;
- TJoinAlgoHints() {}
- TJoinAlgoHints(const TString& json);
+ void PushBack(TVector<TString> labels, EJoinAlgoType joinHint, TString stringRepr) {
+ Hints.push_back({.JoinLabels = std::move(labels), .JoinHint = joinHint, .StringRepr = std::move(stringRepr)});
+ }
};
struct TJoinOrderHints {
- TVector<std::shared_ptr<IBaseOptimizerNode>> HintTrees;
+ struct ITreeNode {
+ enum _ : ui32 {
+ Relation,
+ Join
+ };
+
+ virtual TVector<TString> Labels() = 0;
+ bool IsRelation() { return Type == Relation; }
+ bool IsJoin() { return Type == Join; }
+ virtual ~ITreeNode() = default;
+
+ ui32 Type;
+ };
+
+ struct TJoinNode: public ITreeNode {
+ TJoinNode(std::shared_ptr<ITreeNode> lhs, std::shared_ptr<ITreeNode> rhs)
+ : Lhs(std::move(lhs))
+ , Rhs(std::move(rhs))
+ {
+ this->Type = ITreeNode::Join;
+ }
+
+ TVector<TString> Labels() override {
+ auto labels = Lhs->Labels();
+ auto rhsLabels = Rhs->Labels();
+ labels.insert(labels.end(), std::make_move_iterator(rhsLabels.begin()), std::make_move_iterator(rhsLabels.end()));
+ return labels;
+ }
+
+ std::shared_ptr<ITreeNode> Lhs;
+ std::shared_ptr<ITreeNode> Rhs;
+ };
+
+ struct TRelationNode: public ITreeNode {
+ TRelationNode(TString label)
+ : Label(std::move(label))
+ {
+ this->Type = ITreeNode::Relation;
+ }
+
+ TVector<TString> Labels() override { return {Label}; }
+
+ TString Label;
+ };
+
+ struct TJoinOrderHint {
+ std::shared_ptr<ITreeNode> Tree;
+ TString StringRepr;
+ bool Applied = false;
+ };
+
+ TVector<TJoinOrderHint> Hints;
- TJoinOrderHints() {}
- TJoinOrderHints(const TString& json);
+ void PushBack(std::shared_ptr<ITreeNode> hintTree, TString stringRepr) {
+ Hints.push_back({.Tree = std::move(hintTree), .StringRepr = std::move(stringRepr)});
+ }
};
struct TOptimizerHints {
- TCardinalityHints CardinalityHints;
- TJoinAlgoHints JoinAlgoHints;
- TJoinOrderHints JoinOrderHints;
+ std::shared_ptr<TCardinalityHints> CardinalityHints = std::make_shared<TCardinalityHints>();
+ std::shared_ptr<TJoinAlgoHints> JoinAlgoHints = std::make_shared<TJoinAlgoHints>();
+ std::shared_ptr<TJoinOrderHints> JoinOrderHints = std::make_shared<TJoinOrderHints>();
+
+ TVector<TString> GetUnappliedHintStrings();
+
+ /*
+ * The function accepts string with three type of expressions: array of (JoinAlgo | Card | JoinOrder):
+ * 1) JoinAlgo(t1 t2 ... tn Map | Grace | Lookup) to change join algo for join, where these labels take part
+ * 2) Card(t1 t2 ... tn (*|/|+|-) Number) to change cardinality for join, where these labels take part or labels only
+ * 3) JoinOrder( (t1 t2) (t3 (t4 ...)) ) - fixate this join subtree in the general join tree
+ */
+ static TOptimizerHints Parse(const TString&);
};
/**
diff --git a/ydb/library/yql/core/cbo/ya.make b/ydb/library/yql/core/cbo/ya.make
index 15e4923b631..894e444ffe4 100644
--- a/ydb/library/yql/core/cbo/ya.make
+++ b/ydb/library/yql/core/cbo/ya.make
@@ -2,6 +2,7 @@ LIBRARY()
SRCS(
cbo_optimizer_new.cpp
+ cbo_hints.cpp
)
GENERATE_ENUM_SERIALIZATION(cbo_optimizer_new.h)
diff --git a/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h b/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h
index 75cb4664bd0..cb95d2e45ae 100644
--- a/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h
+++ b/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h
@@ -38,26 +38,15 @@ class TDPHypSolver {
public:
TDPHypSolver(
TJoinHypergraph<TNodeSet>& graph,
- IProviderContext& ctx,
- const TCardinalityHints& hints,
- const TJoinAlgoHints& joinHints
+ IProviderContext& ctx
)
: Graph_(graph)
, NNodes_(graph.GetNodes().size())
, Pctx_(ctx)
- {
- for (const auto& h : hints.Hints) {
- TNodeSet hintSet = Graph_.GetNodesByRelNames(h.JoinLabels);
- CardHintsTable_[hintSet] = h;
- }
- for (const auto& h : joinHints.Hints) {
- TNodeSet hintSet = Graph_.GetNodesByRelNames(h.JoinLabels);
- JoinAlgoHintsTable_[hintSet] = h;
- }
- }
+ {}
// Run DPHyp algorithm and produce the join tree in CBO's internal representation
- std::shared_ptr<TJoinOptimizerNodeInternal> Solve();
+ std::shared_ptr<TJoinOptimizerNodeInternal> Solve(const TOptimizerHints& hints);
// Calculate the size of a dynamic programming table with a budget
ui32 CountCC(ui32 budget);
@@ -98,7 +87,7 @@ private:
const TVector<TString>& leftJoinKeys,
const TVector<TString>& rightJoinKeys,
IProviderContext& ctx,
- TCardinalityHints::TCardinalityHint* maybeHint,
+ TCardinalityHints::TCardinalityHint* maybeCardHint,
TJoinAlgoHints::TJoinAlgoHint* maybeJoinHint
);
@@ -115,8 +104,8 @@ private:
#endif
private:
THashMap<TNodeSet, std::shared_ptr<IBaseOptimizerNode>, std::hash<TNodeSet>> DpTable_;
- THashMap<TNodeSet, TCardinalityHints::TCardinalityHint, std::hash<TNodeSet>> CardHintsTable_;
- THashMap<TNodeSet, TJoinAlgoHints::TJoinAlgoHint, std::hash<TNodeSet>> JoinAlgoHintsTable_;
+ THashMap<TNodeSet, TCardinalityHints::TCardinalityHint*, std::hash<TNodeSet>> CardHintsTable_;
+ THashMap<TNodeSet, TJoinAlgoHints::TJoinAlgoHint*, std::hash<TNodeSet>> JoinAlgoHintsTable_;
};
/*
@@ -232,7 +221,17 @@ template<typename TNodeSet> TNodeSet TDPHypSolver<TNodeSet>::NextBitset(const TN
return res;
}
-template<typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypSolver<TNodeSet>::Solve() {
+template<typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypSolver<TNodeSet>::Solve(const TOptimizerHints& hints) {
+ for (auto& h : hints.CardinalityHints->Hints) {
+ TNodeSet hintSet = Graph_.GetNodesByRelNames(h.JoinLabels);
+ CardHintsTable_[hintSet] = &h;
+ }
+
+ for (auto& h : hints.JoinAlgoHints->Hints) {
+ TNodeSet hintSet = Graph_.GetNodesByRelNames(h.JoinLabels);
+ JoinAlgoHintsTable_[hintSet] = &h;
+ }
+
auto& nodes = Graph_.GetNodes();
Y_ASSERT(nodes.size() == NNodes_);
@@ -242,7 +241,7 @@ template<typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypSo
s[i] = 1;
DpTable_[s] = nodes[i].RelationOptimizerNode;
if (CardHintsTable_.contains(s)){
- DpTable_[s]->Stats->Nrows = CardHintsTable_.at(s).ApplyHint(DpTable_[s]->Stats->Nrows);
+ DpTable_[s]->Stats->Nrows = CardHintsTable_.at(s)->ApplyHint(DpTable_[s]->Stats->Nrows);
}
}
@@ -416,7 +415,7 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS
const TVector<TString>& leftJoinKeys,
const TVector<TString>& rightJoinKeys,
IProviderContext& ctx,
- TCardinalityHints::TCardinalityHint* maybeHint,
+ TCardinalityHints::TCardinalityHint* maybeCardHint,
TJoinAlgoHints::TJoinAlgoHint* maybeJoinHint
) {
double bestCost = std::numeric_limits<double>::infinity();
@@ -425,11 +424,10 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS
for (auto joinAlgo : AllJoinAlgos) {
if (ctx.IsJoinApplicable(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinAlgo, joinKind)){
- auto cost = ctx.ComputeJoinStats(*left->Stats, *right->Stats, leftJoinKeys, rightJoinKeys, joinAlgo, joinKind, maybeHint).Cost;
- if (maybeJoinHint) {
- if (joinAlgo == maybeJoinHint->JoinHint) {
- cost = -1;
- }
+ auto cost = ctx.ComputeJoinStats(*left->Stats, *right->Stats, leftJoinKeys, rightJoinKeys, joinAlgo, joinKind, maybeCardHint).Cost;
+ if (maybeJoinHint && joinAlgo == maybeJoinHint->JoinHint) {
+ cost = -1;
+ maybeJoinHint->Applied = true;
}
if (cost < bestCost) {
bestCost = cost;
@@ -440,11 +438,10 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS
if (isCommutative) {
if (ctx.IsJoinApplicable(right, left, reversedJoinConditions, rightJoinKeys, leftJoinKeys, joinAlgo, joinKind)){
- auto cost = ctx.ComputeJoinStats(*right->Stats, *left->Stats, rightJoinKeys, leftJoinKeys, joinAlgo, joinKind, maybeHint).Cost;
- if (maybeJoinHint) {
- if (joinAlgo == maybeJoinHint->JoinHint) {
- cost = -1;
- }
+ auto cost = ctx.ComputeJoinStats(*right->Stats, *left->Stats, rightJoinKeys, leftJoinKeys, joinAlgo, joinKind, maybeCardHint).Cost;
+ if (maybeJoinHint && joinAlgo == maybeJoinHint->JoinHint) {
+ cost = -1;
+ maybeJoinHint->Applied = true;
}
if (cost < bestCost) {
bestCost = cost;
@@ -458,10 +455,10 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS
Y_ENSURE(bestCost != std::numeric_limits<double>::infinity(), "No join was chosen!");
if (bestJoinIsReversed) {
- return MakeJoinInternal(right, left, reversedJoinConditions, rightJoinKeys, leftJoinKeys, joinKind, bestAlgo, ctx, maybeHint);
+ return MakeJoinInternal(right, left, reversedJoinConditions, rightJoinKeys, leftJoinKeys, joinKind, bestAlgo, ctx, maybeCardHint);
}
- return MakeJoinInternal(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, bestAlgo, ctx, maybeHint);
+ return MakeJoinInternal(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, bestAlgo, ctx, maybeCardHint);
}
/*
@@ -485,8 +482,8 @@ template<typename TNodeSet> void TDPHypSolver<TNodeSet>::EmitCsgCmp(const TNodeS
TNodeSet joined = s1 | s2;
- auto maybeCardHint = CardHintsTable_.contains(joined) ? & CardHintsTable_[joined] : nullptr;
- auto maybeJoinAlgoHint = JoinAlgoHintsTable_.contains(joined) ? & JoinAlgoHintsTable_[joined] : nullptr;
+ auto maybeCardHint = CardHintsTable_.contains(joined) ? CardHintsTable_[joined] : nullptr;
+ auto maybeJoinAlgoHint = JoinAlgoHintsTable_.contains(joined) ? JoinAlgoHintsTable_[joined] : nullptr;
auto bestJoin = PickBestJoin(
leftNodes,
diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp
index 7bdd545fe05..e9e701fda15 100644
--- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp
+++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp
@@ -242,8 +242,8 @@ private:
const std::shared_ptr<TJoinOptimizerNode>& joinTree,
const TOptimizerHints& hints = {}
) {
- TJoinHypergraph<TNodeSet> hypergraph = MakeJoinHypergraph<TNodeSet>(joinTree, hints.JoinOrderHints);
- TDPHypSolver<TNodeSet> solver(hypergraph, this->Pctx, hints.CardinalityHints, hints.JoinAlgoHints);
+ TJoinHypergraph<TNodeSet> hypergraph = MakeJoinHypergraph<TNodeSet>(joinTree, hints);
+ TDPHypSolver<TNodeSet> solver(hypergraph, this->Pctx);
if (solver.CountCC(MaxDPhypTableSize_) >= MaxDPhypTableSize_) {
YQL_CLOG(TRACE, CoreDq) << "Maximum DPhyp threshold exceeded\n";
@@ -251,7 +251,7 @@ private:
return joinTree;
}
- auto bestJoinOrder = solver.Solve();
+ auto bestJoinOrder = solver.Solve(hints);
return ConvertFromInternal(bestJoinOrder);
}
private:
@@ -329,7 +329,7 @@ TExprBase DqOptimizeEquiJoinWithCosts(
// Generate an initial tree
auto joinTree = ConvertToJoinTree(joinTuple, rels);
- if (NYql::NLog::YqlLogger().NeedToLog(NYql::NLog::EComponent::ProviderKqp, NYql::NLog::ELevel::TRACE)) {
+ if (NYql::NLog::YqlLogger().NeedToLog(NYql::NLog::EComponent::CoreDq, NYql::NLog::ELevel::TRACE)) {
std::stringstream str;
str << "Converted join tree:\n";
joinTree->Print(str);
@@ -338,7 +338,7 @@ TExprBase DqOptimizeEquiJoinWithCosts(
joinTree = opt.JoinSearch(joinTree, hints);
- if (NYql::NLog::YqlLogger().NeedToLog(NYql::NLog::EComponent::ProviderKqp, NYql::NLog::ELevel::TRACE)) {
+ if (NYql::NLog::YqlLogger().NeedToLog(NYql::NLog::EComponent::CoreDq, NYql::NLog::ELevel::TRACE)) {
std::stringstream str;
str << "Optimizied join tree:\n";
joinTree->Print(str);
diff --git a/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h b/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h
index 8eba22c1ea5..9a4fe0a3f49 100644
--- a/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h
+++ b/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h
@@ -200,6 +200,16 @@ public:
return simpleEdges;
}
+ bool HasLabels(const TVector<TString>& labels) const {
+ for (const auto& label: labels) {
+ if (!NodeIdByRelationName_.contains(label)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
inline const TVector<TNode>& GetNodes() const {
return Nodes_;
}
@@ -272,9 +282,13 @@ public:
: Graph_(graph)
{}
- void Apply(const TJoinOrderHints& hints) {
- for (const auto& hintTree: hints.HintTrees) {
- auto labels = ApplyHintsToSubgraph(hintTree);
+ void Apply(TJoinOrderHints& hints) {
+ for (auto& hint: hints.Hints) {
+ if (!Graph_.HasLabels(hint.Tree->Labels())) {
+ continue;
+ }
+
+ auto labels = ApplyHintsToSubgraph(hint.Tree);
auto nodes = Graph_.GetNodesByRelNames(labels);
for (size_t i = 0; i < Graph_.GetEdges().size(); ++i) {
@@ -290,15 +304,17 @@ public:
Graph_.UpdateEdgeSides(i, newLeft, newRight);
}
+
+ hint.Applied = true;
}
}
private:
- TVector<TString> ApplyHintsToSubgraph(const std::shared_ptr<IBaseOptimizerNode>& node) {
- if (node->Kind == EOptimizerNodeKind::JoinNodeType) {
- auto join = std::static_pointer_cast<TJoinOptimizerNode>(node);
- TVector<TString> lhsLabels = ApplyHintsToSubgraph(join->LeftArg);
- TVector<TString> rhsLabels = ApplyHintsToSubgraph(join->RightArg);
+ TVector<TString> ApplyHintsToSubgraph(const std::shared_ptr<TJoinOrderHints::ITreeNode>& node) {
+ if (node->IsJoin()) {
+ auto join = std::static_pointer_cast<TJoinOrderHints::TJoinNode>(node);
+ TVector<TString> lhsLabels = ApplyHintsToSubgraph(join->Lhs);
+ TVector<TString> rhsLabels = ApplyHintsToSubgraph(join->Rhs);
auto lhs = Graph_.GetNodesByRelNames(lhsLabels);
auto rhs = Graph_.GetNodesByRelNames(rhsLabels);
@@ -332,7 +348,8 @@ private:
return joinLabels;
}
- return node->Labels();
+ auto relation = std::static_pointer_cast<TJoinOrderHints::TRelationNode>(node);
+ return {relation->Label};
}
private:
diff --git a/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h b/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h
index a808bcfa414..de59957f426 100644
--- a/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h
+++ b/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h
@@ -89,7 +89,7 @@ void MakeJoinHypergraphRec(
template <typename TNodeSet>
TJoinHypergraph<TNodeSet> MakeJoinHypergraph(
const std::shared_ptr<IBaseOptimizerNode>& joinTree,
- const TJoinOrderHints& hints = {}
+ const TOptimizerHints& hints = {}
) {
TJoinHypergraph<TNodeSet> graph{};
std::unordered_map<std::shared_ptr<IBaseOptimizerNode>, TNodeSet> subtreeNodes{};
@@ -100,9 +100,9 @@ TJoinHypergraph<TNodeSet> MakeJoinHypergraph(
YQL_CLOG(TRACE, CoreDq) << graph.String();
}
- if (!hints.HintTrees.empty()) {
+ if (!hints.JoinOrderHints->Hints.empty()) {
TJoinOrderHintsApplier joinHints(graph);
- joinHints.Apply(hints);
+ joinHints.Apply(*hints.JoinOrderHints);
if (NYql::NLog::YqlLogger().NeedToLog(NYql::NLog::EComponent::CoreDq, NYql::NLog::ELevel::TRACE)) {
YQL_CLOG(TRACE, CoreDq) << "Hypergraph after hints: ";
YQL_CLOG(TRACE, CoreDq) << graph.String();