diff options
author | pilik <pudge1000-7@ydb.tech> | 2024-09-17 18:56:49 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-09-17 17:56:49 +0200 |
commit | d7347606ebe15879a709a14f60eabf91b238961b (patch) | |
tree | 7e095e076fd19ac7f0b3b8e1215316d2e40b2306 | |
parent | 162c9f5fa8fa3b2482a9d22de01288e7365a5037 (diff) | |
download | ydb-d7347606ebe15879a709a14f60eabf91b238961b.tar.gz |
[CBO] Hints parser added (#9252)
19 files changed, 528 insertions, 219 deletions
diff --git a/ydb/core/kqp/opt/kqp_opt.h b/ydb/core/kqp/opt/kqp_opt.h index 1904aef5215..731b94368f0 100644 --- a/ydb/core/kqp/opt/kqp_opt.h +++ b/ydb/core/kqp/opt/kqp_opt.h @@ -18,7 +18,7 @@ struct TKqpOptimizeContext : public TSimpleRefCount<TKqpOptimizeContext> { , QueryCtx(queryCtx) , Tables(tables) , UserRequestContext(userRequestContext) - { + { YQL_ENSURE(QueryCtx); YQL_ENSURE(Tables); } @@ -31,9 +31,7 @@ struct TKqpOptimizeContext : public TSimpleRefCount<TKqpOptimizeContext> { int JoinsCount{}; int EquiJoinsCount{}; std::shared_ptr<NJson::TJsonValue> OverrideStatistics{}; - std::shared_ptr<NYql::TCardinalityHints> CardinalityHints{}; - std::shared_ptr<NYql::TJoinAlgoHints> JoinAlgoHints{}; - std::shared_ptr<NYql::TJoinOrderHints> JoinOrderHints{}; + std::shared_ptr<NYql::TOptimizerHints> Hints{}; std::shared_ptr<NJson::TJsonValue> GetOverrideStatistics() { if (Config->OptOverrideStatistics.Get()) { @@ -49,37 +47,17 @@ struct TKqpOptimizeContext : public TSimpleRefCount<TKqpOptimizeContext> { } } - NYql::TCardinalityHints GetCardinalityHints() { - if (Config->OptCardinalityHints.Get()) { - if (!CardinalityHints) { - CardinalityHints = std::make_shared<NYql::TCardinalityHints>(*Config->OptCardinalityHints.Get()); + NYql::TOptimizerHints GetOptimizerHints() { + if (Config->OptimizerHints.Get()) { + if (!Hints) { + Hints = std::make_shared<NYql::TOptimizerHints>( + NYql::TOptimizerHints::Parse(*Config->OptimizerHints.Get()) + ); } - return *CardinalityHints; - } else { - return NYql::TCardinalityHints(); - } - } - - NYql::TJoinAlgoHints GetJoinAlgoHints() { - if (Config->OptJoinAlgoHints.Get()) { - if (!JoinAlgoHints) { - JoinAlgoHints = std::make_shared<NYql::TJoinAlgoHints>(*Config->OptJoinAlgoHints.Get()); - } - return *JoinAlgoHints; - } else { - return NYql::TJoinAlgoHints(); + return *Hints; } - } - NYql::TJoinOrderHints GetJoinOrderHints() { - if (Config->OptJoinOrderHints.Get()) { - if (!JoinOrderHints) { - JoinOrderHints = std::make_shared<NYql::TJoinOrderHints>(*Config->OptJoinOrderHints.Get()); - } - return *JoinOrderHints; - } else { - return NYql::TJoinOrderHints(); - } + return NYql::TOptimizerHints(); } bool IsDataQuery() const { diff --git a/ydb/core/kqp/opt/kqp_statistics_transformer.h b/ydb/core/kqp/opt/kqp_statistics_transformer.h index e74646bc088..46195f739ed 100644 --- a/ydb/core/kqp/opt/kqp_statistics_transformer.h +++ b/ydb/core/kqp/opt/kqp_statistics_transformer.h @@ -35,7 +35,7 @@ class TKqpStatisticsTransformer : public NYql::NDq::TDqStatisticsTransformerBase public: TKqpStatisticsTransformer(const TIntrusivePtr<TKqpOptimizeContext>& kqpCtx, TTypeAnnotationContext& typeCtx, const TKikimrConfiguration::TPtr& config, const TKqpProviderContext& pctx) : - TDqStatisticsTransformerBase(&typeCtx, pctx, kqpCtx->GetCardinalityHints()), + TDqStatisticsTransformerBase(&typeCtx, pctx, *kqpCtx->GetOptimizerHints().CardinalityHints), Config(config), KqpCtx(*kqpCtx) {} diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp index 3030256614a..f6471e103f0 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp @@ -11,6 +11,7 @@ #include <ydb/library/yql/dq/opt/dq_opt_join.h> #include <ydb/library/yql/dq/opt/dq_opt_log.h> #include <ydb/library/yql/dq/opt/dq_opt_hopping.h> +#include <ydb/library/yql/utils/log/log.h> #include <ydb/library/yql/providers/common/transform/yql_optimize.h> #include <ydb/library/yql/providers/dq/common/yql_dq_settings.h> @@ -87,6 +88,17 @@ public: SetGlobal(4u); } +public: + TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) override { + auto status = TOptimizeTransformerBase::DoTransform(input, output, ctx); + + for (const auto& hint: KqpCtx.GetOptimizerHints().GetUnappliedHintStrings()) { + YQL_CLOG(WARN, ProviderYdb) << "Unapplied hint: " + hint; + } + + return status; + } + protected: TMaybeNode<TExprBase> PushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx, const TGetParents& getParents) { TExprBase output = KqpPushExtractedPredicateToReadTable(node, ctx, KqpCtx, TypesCtx, *getParents()); @@ -155,11 +167,7 @@ protected: rels.emplace_back(std::make_shared<TKqpRelOptimizerNode>(TString(label), stat, node)); }, KqpCtx.EquiJoinsCount, - TOptimizerHints{ - .CardinalityHints = KqpCtx.GetCardinalityHints(), - .JoinAlgoHints = KqpCtx.GetJoinAlgoHints(), - .JoinOrderHints = KqpCtx.GetJoinOrderHints() - } + KqpCtx.GetOptimizerHints() ); DumpAppliedRule("OptimizeEquiJoinWithCosts", node.Ptr(), output.Ptr(), ctx); return output; diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log.h b/ydb/core/kqp/opt/logical/kqp_opt_log.h index cfccb10df8d..4e86d361c62 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log.h +++ b/ydb/core/kqp/opt/logical/kqp_opt_log.h @@ -7,7 +7,10 @@ namespace NKikimr::NKqp::NOpt { struct TKqpOptimizeContext; -TAutoPtr<NYql::IGraphTransformer> CreateKqpLogOptTransformer(TIntrusivePtr<TKqpOptimizeContext>& kqpCtx, - NYql::TTypeAnnotationContext& typesCtx, const NYql::TKikimrConfiguration::TPtr& config); +TAutoPtr<NYql::IGraphTransformer> CreateKqpLogOptTransformer( + TIntrusivePtr<TKqpOptimizeContext>& kqpCtx, + NYql::TTypeAnnotationContext& typesCtx, + const NYql::TKikimrConfiguration::TPtr& config +); } // namespace NKikimr::NKqp::NOpt diff --git a/ydb/core/kqp/provider/yql_kikimr_settings.cpp b/ydb/core/kqp/provider/yql_kikimr_settings.cpp index 6380b81c2a4..3b74b8ecd76 100644 --- a/ydb/core/kqp/provider/yql_kikimr_settings.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_settings.cpp @@ -82,9 +82,7 @@ TKikimrConfiguration::TKikimrConfiguration() { REGISTER_SETTING(*this, OptEnableOlapPushdown); REGISTER_SETTING(*this, OptEnableOlapProvideComputeSharding); REGISTER_SETTING(*this, OptOverrideStatistics); - REGISTER_SETTING(*this, OptCardinalityHints); - REGISTER_SETTING(*this, OptJoinAlgoHints); - REGISTER_SETTING(*this, OptJoinOrderHints); + REGISTER_SETTING(*this, OptimizerHints); REGISTER_SETTING(*this, OverridePlanner); REGISTER_SETTING(*this, UseGraceJoinCoreForMap); diff --git a/ydb/core/kqp/provider/yql_kikimr_settings.h b/ydb/core/kqp/provider/yql_kikimr_settings.h index d4d851c0fd6..8e6e706de43 100644 --- a/ydb/core/kqp/provider/yql_kikimr_settings.h +++ b/ydb/core/kqp/provider/yql_kikimr_settings.h @@ -55,9 +55,7 @@ struct TKikimrSettings { NCommon::TConfSetting<bool, false> UseGraceJoinCoreForMap; NCommon::TConfSetting<TString, false> OptOverrideStatistics; - NCommon::TConfSetting<TString, false> OptCardinalityHints; - NCommon::TConfSetting<TString, false> OptJoinAlgoHints; - NCommon::TConfSetting<TString, false> OptJoinOrderHints; + NCommon::TConfSetting<TString, false> OptimizerHints; /* Disable optimizer rules */ NCommon::TConfSetting<bool, false> OptDisableTopSort; diff --git a/ydb/core/kqp/ut/join/data/queries/join_order_hints_complex.sql b/ydb/core/kqp/ut/join/data/queries/join_order_hints_complex.sql index cba3a99a1d3..12501dbbed4 100644 --- a/ydb/core/kqp/ut/join/data/queries/join_order_hints_complex.sql +++ b/ydb/core/kqp/ut/join/data/queries/join_order_hints_complex.sql @@ -1,19 +1,18 @@ PRAGMA TablePathPrefix='/Root'; -PRAGMA ydb.OptJoinOrderHints= - '[ - [["R", "S"], ["T", "U"]] - ]'; +PRAGMA ydb.OptimizerHints = +' + Card(Unused # 10e8) + JoinOrder( (Unused1 Unused2) (Unused3 Unused4) ) -PRAGMA ydb.OptCardinalityHints = - '[ - {"labels":["R"], "op":"#", "value":10e8}, - {"labels":["T"], "op":"#", "value":1}, - {"labels":["R", "T"], "op":"#", "value":1}, - {"labels":["R", "S"], "op":"#", "value":10e8}, - {"labels":["T", "U"], "op":"#", "value":10e8}, - {"labels":["V"], "op":"#", "value":1} - ]'; + Card(R # 10e8) + Card(T # 1) + Card(R T # 1) + Card(R S # 10e8) + Card(T U # 10e8) + Card(V # 1) + JoinOrder( (R S) (T U) ) +'; SELECT * FROM R INNER JOIN S on R.id = S.id diff --git a/ydb/core/kqp/ut/join/data/queries/join_order_hints_many_hint_trees.sql b/ydb/core/kqp/ut/join/data/queries/join_order_hints_many_hint_trees.sql index 5ddee5c2e54..c1b8aaeb993 100644 --- a/ydb/core/kqp/ut/join/data/queries/join_order_hints_many_hint_trees.sql +++ b/ydb/core/kqp/ut/join/data/queries/join_order_hints_many_hint_trees.sql @@ -1,19 +1,16 @@ PRAGMA TablePathPrefix='/Root'; -PRAGMA ydb.OptJoinOrderHints= - '[ - ["R", "S"], - ["T", "U"] - ]'; -PRAGMA ydb.OptCardinalityHints = - '[ - {"labels":["R"], "op":"#", "value":10e8}, - {"labels":["T"], "op":"#", "value":1}, - {"labels":["R", "T"], "op":"#", "value":1}, - {"labels":["R", "S"], "op":"#", "value":10e8}, - {"labels":["T", "U"], "op":"#", "value":10e8}, - {"labels":["V"], "op":"#", "value":1} - ]'; +PRAGMA ydb.OptimizerHints = +' + Card(R # 10e8) + Card(T # 1) + Card(R T # 1) + Card(R S # 10e8) + Card(T U # 10e8) + Card(V # 1) + JoinOrder(T U) + JoinOrder(R S) +'; SELECT * FROM R INNER JOIN S on R.id = S.id diff --git a/ydb/core/kqp/ut/join/data/queries/join_order_hints_simple.sql b/ydb/core/kqp/ut/join/data/queries/join_order_hints_simple.sql index 7482d192cea..9c53e3ad07f 100644 --- a/ydb/core/kqp/ut/join/data/queries/join_order_hints_simple.sql +++ b/ydb/core/kqp/ut/join/data/queries/join_order_hints_simple.sql @@ -1,17 +1,14 @@ PRAGMA TablePathPrefix='/Root'; -PRAGMA ydb.OptCardinalityHints = - '[ - {"labels":["R"], "op":"#", "value":10e8}, - {"labels":["T"], "op":"#", "value":1}, - {"labels":["S"], "op":"#", "value":10e8}, - {"labels":["R", "T"], "op":"#", "value":1}, - {"labels":["R", "S"], "op":"#", "value":10e8} - ]'; -PRAGMA ydb.OptJoinOrderHints= - '[ - ["T", ["R", "S"]] - ]'; +PRAGMA ydb.OptimizerHints = +' + Card(R # 10e8) + Card(T # 1) + Card(S # 10e8) + Card(R T # 1) + Card(R S # 10e8) + JoinOrder(T (R S)) +'; SELECT * FROM R INNER JOIN S on R.id = S.id diff --git a/ydb/core/kqp/ut/join/data/queries/test_join_hint.sql b/ydb/core/kqp/ut/join/data/queries/test_join_hint.sql index b58b49e3e3f..bf61264170f 100644 --- a/ydb/core/kqp/ut/join/data/queries/test_join_hint.sql +++ b/ydb/core/kqp/ut/join/data/queries/test_join_hint.sql @@ -1,5 +1,9 @@ -PRAGMA ydb.OptCardinalityHints = '[{"labels":["R"], "op":"#", "value":1}, {"labels":["R","S"], "op":"#", "value":10e6}]'; -PRAGMA ydb.OptJoinAlgoHints = '[{"labels":["R","S"], "algo":"GraceJoin"}]'; +PRAGMA ydb.OptimizerHints = +' + JoinAlgo(R S Grace) + Card(R # 1) + Card(R S # 10e6) +'; SELECT * FROM `/Root/R` as R diff --git a/ydb/core/kqp/ut/join/data/queries/test_join_hint2.sql b/ydb/core/kqp/ut/join/data/queries/test_join_hint2.sql index 1e56ae6aef0..e932682c3a3 100644 --- a/ydb/core/kqp/ut/join/data/queries/test_join_hint2.sql +++ b/ydb/core/kqp/ut/join/data/queries/test_join_hint2.sql @@ -1,4 +1,5 @@ -PRAGMA ydb.OptCardinalityHints = '[{"labels":["R","S"], "op":"#", "value":1.0}]'; +PRAGMA ydb.OptimizerHints = 'Card(R S # 1)'; + SELECT * FROM `/Root/R` as R diff --git a/ydb/library/yql/core/cbo/cbo_hints.cpp b/ydb/library/yql/core/cbo/cbo_hints.cpp new file mode 100644 index 00000000000..4d598c2fd72 --- /dev/null +++ b/ydb/library/yql/core/cbo/cbo_hints.cpp @@ -0,0 +1,296 @@ +#include "cbo_optimizer_new.h" + +#include <util/string/join.h> +#include <util/string/printf.h> +#include <library/cpp/iterator/zip.h> + +using namespace NYql; + +class TOptimizerHintsParser { +public: + TOptimizerHintsParser(const TString& text) + : Pos(-1) + , Size(static_cast<i32>(text.Size()) - 1) + , Text(text) + {} + + TOptimizerHints Parse() { + Start(); + return Hints; + } + +private: + void Start() { + while (Pos < Size) { + auto hintType = Keyword({"JoinOrder", "JoinAlgo", "Card"}); + if (hintType == "JoinOrder") { + JoinOrder(); + } else if (hintType == "JoinAlgo") { + JoinAlgo(); + } else if (hintType == "Card"){ + Card(); + } else { + ParseError(Sprintf("Undefined hints type: %s", hintType.c_str()), Pos - hintType.Size()); + } + + SkipWhiteSpaces(); + } + } + + TVector<TString> CollectLabels() { + TVector<TString> labels; + while (auto maybeTerm = MaybeLabel()) { + labels.push_back(maybeTerm.value()); + } + return labels; + } + + void JoinAlgo() { + Keyword({"("}); + + i32 beginPos = Pos + 1; + + i32 labelsBeginPos = Pos + 1; + TVector<TString> labels = CollectLabels(); + if (labels.size() <= 1) { + ParseError(Sprintf("Bad labels for JoinAlgo hint: %s, example of the format: JoinAlgo(t1 t2 Grace)", JoinSeq(", ", labels).c_str()), labelsBeginPos); + } + TString reqJoinAlgoStr = std::move(labels.back()); + labels.pop_back(); + + Keyword({")"}); + + TVector<EJoinAlgoType> joinAlgos = {EJoinAlgoType::GraceJoin, EJoinAlgoType::LookupJoin, EJoinAlgoType::MapJoin}; + TVector<TString> joinAlgosStr = {"Grace", "Lookup", "Map"}; + + for (const auto& [joinAlgo, joinAlgoStr]: Zip(joinAlgos, joinAlgosStr)) { + if (reqJoinAlgoStr == joinAlgoStr) { + Hints.JoinAlgoHints->PushBack(std::move(labels), joinAlgo, "JoinOrder" + Text.substr(beginPos, Pos - beginPos + 1)); + return; + } + } + + ParseError(Sprintf("Unknown JoinAlgo: '%s', supported algos: [%s]", reqJoinAlgoStr.c_str(), JoinSeq(", ", joinAlgosStr).c_str()), Pos - reqJoinAlgoStr.Size()); + Y_UNREACHABLE(); + } + + void JoinOrder() { + i32 beginPos = Pos + 1; + + Keyword({"("}); + auto joinOrderHintTree = JoinOrderLabels(); + Keyword({")"}); + + Hints.JoinOrderHints->PushBack(std::move(joinOrderHintTree), "JoinOrder" + Text.substr(beginPos, Pos - beginPos + 1)); + } + + std::shared_ptr<TJoinOrderHints::ITreeNode> JoinOrderLabels() { + auto lhs = JoinOrderLabel(); + auto rhs = JoinOrderLabel(); + return std::make_shared<TJoinOrderHints::TJoinNode>(std::move(lhs), std::move(rhs)); + } + + std::shared_ptr<TJoinOrderHints::ITreeNode> JoinOrderLabel() { + if (auto maybeLabel = MaybeLabel()) { + return std::make_shared<TJoinOrderHints::TRelationNode>(std::move(maybeLabel.value())); + } else if (auto maybeBracket = MaybeKeyword({"("})) { + auto join = JoinOrderLabels(); + Keyword({")"}); + return join; + } + + ParseError(Sprintf("JoinOrder args must be either a relation, either a join, example of the format: JoinOrder(t1 (t2 t3))"), Pos); + Y_UNREACHABLE(); + } + + void Card() { + i32 beginPos = Pos + 1; + + Keyword({"("}); + + TVector<TString> labels = CollectLabels(); + auto signStr = Keyword({"+", "-", "/", "*", "#"}); + char sign = signStr[0]; + auto value = Number(); + Keyword({")"}); + + TCardinalityHints::ECardOperation op; + switch (sign) { + case '+': { op = TCardinalityHints::ECardOperation::Add; break; } + case '-': { op = TCardinalityHints::ECardOperation::Subtract; break; } + case '/': { op = TCardinalityHints::ECardOperation::Divide; break; } + case '*': { op = TCardinalityHints::ECardOperation::Multiply; break; } + case '#': { op = TCardinalityHints::ECardOperation::Replace; break; } + default: {ParseError(Sprintf("Unknown operation: '%c'", sign), Pos - 1); Y_UNREACHABLE();} + } + + Hints.CardinalityHints->PushBack(std::move(labels), op, value, "Card" + Text.substr(beginPos, Pos - beginPos + 1)); + } + +private: + // Expressions + void ParseError(const TString& err, i32 pos) { + Y_ENSURE(false, Sprintf("Optimizer hints parser error position:%d, msg: %s", pos, err.c_str())); + } + + TString Label() { + return Term(Letters() | Digits()); + } + + std::optional<TString> MaybeLabel() { + try { + return Label(); + } catch (...) { + return std::nullopt; + } + } + + TString Term(const std::bitset<256>& allowedSym = {}) { + SkipWhiteSpaces(); + Y_ENSURE(Pos < Size, "Expected <string>, but got end of the string."); + + TString term; + while (Pos < Size) { + try { + term.push_back(Char(allowedSym)); + } catch (...) { + break; + } + } + + if (term.Empty()) { + ParseError("Expected a term!", Pos); + } + return term; + } + + char Char(unsigned char c) { + std::bitset<256> allowed; + allowed[c] = 1; + return Char(allowed); + } + + char Char(unsigned char intervalBegin, unsigned char intervalEnd) { + std::bitset<256> allowed; + for (size_t i = intervalBegin; i <= intervalEnd; ++i) { + allowed[i] = 1; + } + return Char(allowed); + } + + char Char(const std::bitset<256>& allowedSymbols = {}) { + Y_ENSURE(Pos < Size, Sprintf("Expected [%s], but got end of the string.", "")); + + char nextSym = Text[Pos + 1]; + if (allowedSymbols.count() == 0) { + ++Pos; + return nextSym; + } + + for (size_t i = 0; i < allowedSymbols.size(); ++i) { + if (allowedSymbols[i] && tolower(i) == tolower(nextSym)) { + ++Pos; + return nextSym; + } + } + + ParseError(Sprintf("Expected [%s], but got [%c]", "", nextSym), Pos); + Y_UNREACHABLE(); + } + + std::optional<TString> MaybeKeyword(const TVector<TString>& keywords) { + try { + return Keyword(keywords); + } catch(...) { + return std::nullopt; + } + } + + TString Keyword(const TVector<TString>& keywords) { + SkipWhiteSpaces(); + Y_ENSURE(Pos < Size, Sprintf("Expected [%s], but got end of the string.", JoinSeq(", ", keywords).c_str())); + + for (const auto& keyword: keywords) { + size_t lowInclude = Pos + 1; + size_t highExclude = lowInclude + keyword.Size(); + + if (Text.substr(lowInclude, highExclude - lowInclude).equal(keyword)) { + Pos += keyword.Size(); + return keyword; + } + } + + ParseError(Sprintf("Expected [%s], but got [%c]", JoinSeq(", ", keywords).c_str(), Text[Pos + 1]), Pos); + Y_UNREACHABLE(); + } + + double Number() { + SkipWhiteSpaces(); + Y_ENSURE(Pos < Size, Sprintf("Expected number, but got end of the string.")); + + TString number; + if (auto maybeSign = MaybeKeyword({"+", "-"})) { + number.push_back(maybeSign.value()[0]); + } + + auto term = Term(Digits() | Chars(".-e")); // for double like 1.0 / 1e9 + try { + return std::stod(term); + } catch (...) { + ParseError(Sprintf("Expected a number, got [%s]", term.c_str()), Pos - term.Size()); + } + Y_UNREACHABLE(); + } + +private: + // Helpers + constexpr std::bitset<256> Chars(const TString& s) { + std::bitset<256> res; + + for (char c: s) { + res[c] = 1; + } + + return res; + } + + constexpr std::bitset<256> Letters() { + std::bitset<256> res; + + for (unsigned char i = 'a'; i <= 'z'; ++i) { + res[i] = 1; + } + for (unsigned char i = 'A'; i <= 'Z'; ++i) { + res[i] = 1; + } + + return res; + } + + constexpr std::bitset<256> Digits() { + std::bitset<256> res; + + for (unsigned char i = '0'; i <= '9'; ++i) { + res[i] = 1; + } + + return res; + } + + void SkipWhiteSpaces() { + for (; Pos < Size && isspace(Text[Pos + 1]); ++Pos) { + } + } + +private: + i32 Pos; + const i32 Size; + const TString& Text; + +private: + TOptimizerHints Hints; +}; + +TOptimizerHints TOptimizerHints::Parse(const TString& text) { + return TOptimizerHintsParser(text).Parse(); +} diff --git a/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp b/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp index 9c4214a9b86..ea9d5d6f6e1 100644 --- a/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp +++ b/ydb/library/yql/core/cbo/cbo_optimizer_new.cpp @@ -6,6 +6,7 @@ #include <util/generic/hash.h> #include <util/generic/hash_set.h> #include <util/string/cast.h> +#include <util/string/join.h> #include <util/string/printf.h> const TString& ToString(NYql::EJoinKind); @@ -285,81 +286,28 @@ const TBaseProviderContext& TBaseProviderContext::Instance() { return staticContext; } -TCardinalityHints::TCardinalityHints(const TString& json) { - auto jsonValue = NJson::TJsonValue(); - NJson::ReadJsonTree(json, &jsonValue, true); - - for (auto s : jsonValue.GetArraySafe()) { - auto h = s.GetMapSafe(); - - TCardinalityHints::TCardinalityHint hint; +TVector<TString> TOptimizerHints::GetUnappliedHintStrings() { + TVector<TString> res; - for (auto t : h.at("labels").GetArraySafe()) { - hint.JoinLabels.push_back(t.GetStringSafe()); + for (const auto& hint: JoinAlgoHints->Hints) { + if (!hint.Applied) { + res.push_back(hint.StringRepr); } - - auto op = h.at("op").GetStringSafe(); - hint.Operation = HintOpMap.at(op); - - hint.Value = h.at("value").GetDoubleSafe(); - Hints.push_back(hint); } -} - -std::shared_ptr<IBaseOptimizerNode> MakeJoinTreeFromJson(const NJson::TJsonValue& jsonTree) { - if (jsonTree.IsArray()) { - auto children = jsonTree.GetArraySafe(); - Y_ENSURE(children.size() == 2, Sprintf("Expected 2 inputs for JoinOrder hints, got: %ld", children.size())); - - auto joinNode = TJoinOptimizerNode( - MakeJoinTreeFromJson(children[0]), - MakeJoinTreeFromJson(children[1]), - {}, - EJoinKind::Cross, // just a stub - EJoinAlgoType::Undefined, - true - ); - return std::make_shared<TJoinOptimizerNode>(std::move(joinNode)); - } - - Y_ENSURE( - jsonTree.IsString(), - Sprintf("A relation must be a string for JoinOrder hints! Got %s, expected a string.", jsonTree.GetStringRobust().c_str()) - ); - return std::make_shared<TRelOptimizerNode>(jsonTree.GetStringSafe(), nullptr); -} -TJoinOrderHints::TJoinOrderHints(const TString& json) { - const static TString PARSING_FORMAT_ERROR = - R"(Join order hints parsing failed. The example of the format: [ ["A", "B"], ["B", "C"] ])"; - - NJson::TJsonValue jsonTree; - NJson::ReadJsonTree(json, &jsonTree, true); - - Y_ENSURE(jsonTree.IsArray(), PARSING_FORMAT_ERROR); - for (const auto& hintTreeJson: jsonTree.GetArray()) { - HintTrees.push_back(MakeJoinTreeFromJson(hintTreeJson)); + for (const auto& hint: JoinOrderHints->Hints) { + if (!hint.Applied) { + res.push_back(hint.StringRepr); + } } -} - -TJoinAlgoHints::TJoinAlgoHints(const TString& json) { - auto jsonValue = NJson::TJsonValue(); - NJson::ReadJsonTree(json, &jsonValue, true); - - for (auto s : jsonValue.GetArraySafe()) { - auto h = s.GetMapSafe(); - TJoinAlgoHints::TJoinAlgoHint hint; - - for (auto t : h.at("labels").GetArraySafe()) { - hint.JoinLabels.push_back(t.GetStringSafe()); + for (const auto& hint: CardinalityHints->Hints) { + if (!hint.Applied) { + res.push_back(hint.StringRepr); } - - auto algo = h.at("algo").GetStringSafe(); - hint.JoinHint = FromString<EJoinAlgoType>(algo); - - Hints.push_back(hint); } + + return res; } } // namespace NYql diff --git a/ydb/library/yql/core/cbo/cbo_optimizer_new.h b/ydb/library/yql/core/cbo/cbo_optimizer_new.h index 72cd6c55295..cdb143eaa88 100644 --- a/ydb/library/yql/core/cbo/cbo_optimizer_new.h +++ b/ydb/library/yql/core/cbo/cbo_optimizer_new.h @@ -71,8 +71,12 @@ struct TCardinalityHints { TVector<TString> JoinLabels; ECardOperation Operation; double Value; + TString StringRepr; + bool Applied = false; double ApplyHint(double originalValue) { + Applied = true; + switch (Operation) { case Add: return originalValue + Value; @@ -84,41 +88,104 @@ struct TCardinalityHints { return originalValue / Value; case Replace: return Value; - } } }; TVector<TCardinalityHint> Hints; - - TCardinalityHints() {} - TCardinalityHints(const TString& json); + void PushBack(TVector<TString> labels, ECardOperation op, double value, TString stringRepr) { + Hints.push_back({.JoinLabels = std::move(labels), .Operation = op, .Value = value, .StringRepr = std::move(stringRepr)}); + } }; struct TJoinAlgoHints { - struct TJoinAlgoHint { TVector<TString> JoinLabels; EJoinAlgoType JoinHint; + TString StringRepr; + bool Applied = false; }; TVector<TJoinAlgoHint> Hints; - TJoinAlgoHints() {} - TJoinAlgoHints(const TString& json); + void PushBack(TVector<TString> labels, EJoinAlgoType joinHint, TString stringRepr) { + Hints.push_back({.JoinLabels = std::move(labels), .JoinHint = joinHint, .StringRepr = std::move(stringRepr)}); + } }; struct TJoinOrderHints { - TVector<std::shared_ptr<IBaseOptimizerNode>> HintTrees; + struct ITreeNode { + enum _ : ui32 { + Relation, + Join + }; + + virtual TVector<TString> Labels() = 0; + bool IsRelation() { return Type == Relation; } + bool IsJoin() { return Type == Join; } + virtual ~ITreeNode() = default; + + ui32 Type; + }; + + struct TJoinNode: public ITreeNode { + TJoinNode(std::shared_ptr<ITreeNode> lhs, std::shared_ptr<ITreeNode> rhs) + : Lhs(std::move(lhs)) + , Rhs(std::move(rhs)) + { + this->Type = ITreeNode::Join; + } + + TVector<TString> Labels() override { + auto labels = Lhs->Labels(); + auto rhsLabels = Rhs->Labels(); + labels.insert(labels.end(), std::make_move_iterator(rhsLabels.begin()), std::make_move_iterator(rhsLabels.end())); + return labels; + } + + std::shared_ptr<ITreeNode> Lhs; + std::shared_ptr<ITreeNode> Rhs; + }; + + struct TRelationNode: public ITreeNode { + TRelationNode(TString label) + : Label(std::move(label)) + { + this->Type = ITreeNode::Relation; + } + + TVector<TString> Labels() override { return {Label}; } + + TString Label; + }; + + struct TJoinOrderHint { + std::shared_ptr<ITreeNode> Tree; + TString StringRepr; + bool Applied = false; + }; + + TVector<TJoinOrderHint> Hints; - TJoinOrderHints() {} - TJoinOrderHints(const TString& json); + void PushBack(std::shared_ptr<ITreeNode> hintTree, TString stringRepr) { + Hints.push_back({.Tree = std::move(hintTree), .StringRepr = std::move(stringRepr)}); + } }; struct TOptimizerHints { - TCardinalityHints CardinalityHints; - TJoinAlgoHints JoinAlgoHints; - TJoinOrderHints JoinOrderHints; + std::shared_ptr<TCardinalityHints> CardinalityHints = std::make_shared<TCardinalityHints>(); + std::shared_ptr<TJoinAlgoHints> JoinAlgoHints = std::make_shared<TJoinAlgoHints>(); + std::shared_ptr<TJoinOrderHints> JoinOrderHints = std::make_shared<TJoinOrderHints>(); + + TVector<TString> GetUnappliedHintStrings(); + + /* + * The function accepts string with three type of expressions: array of (JoinAlgo | Card | JoinOrder): + * 1) JoinAlgo(t1 t2 ... tn Map | Grace | Lookup) to change join algo for join, where these labels take part + * 2) Card(t1 t2 ... tn (*|/|+|-) Number) to change cardinality for join, where these labels take part or labels only + * 3) JoinOrder( (t1 t2) (t3 (t4 ...)) ) - fixate this join subtree in the general join tree + */ + static TOptimizerHints Parse(const TString&); }; /** diff --git a/ydb/library/yql/core/cbo/ya.make b/ydb/library/yql/core/cbo/ya.make index 15e4923b631..894e444ffe4 100644 --- a/ydb/library/yql/core/cbo/ya.make +++ b/ydb/library/yql/core/cbo/ya.make @@ -2,6 +2,7 @@ LIBRARY() SRCS( cbo_optimizer_new.cpp + cbo_hints.cpp ) GENERATE_ENUM_SERIALIZATION(cbo_optimizer_new.h) diff --git a/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h b/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h index 75cb4664bd0..cb95d2e45ae 100644 --- a/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h +++ b/ydb/library/yql/dq/opt/dq_opt_dphyp_solver.h @@ -38,26 +38,15 @@ class TDPHypSolver { public: TDPHypSolver( TJoinHypergraph<TNodeSet>& graph, - IProviderContext& ctx, - const TCardinalityHints& hints, - const TJoinAlgoHints& joinHints + IProviderContext& ctx ) : Graph_(graph) , NNodes_(graph.GetNodes().size()) , Pctx_(ctx) - { - for (const auto& h : hints.Hints) { - TNodeSet hintSet = Graph_.GetNodesByRelNames(h.JoinLabels); - CardHintsTable_[hintSet] = h; - } - for (const auto& h : joinHints.Hints) { - TNodeSet hintSet = Graph_.GetNodesByRelNames(h.JoinLabels); - JoinAlgoHintsTable_[hintSet] = h; - } - } + {} // Run DPHyp algorithm and produce the join tree in CBO's internal representation - std::shared_ptr<TJoinOptimizerNodeInternal> Solve(); + std::shared_ptr<TJoinOptimizerNodeInternal> Solve(const TOptimizerHints& hints); // Calculate the size of a dynamic programming table with a budget ui32 CountCC(ui32 budget); @@ -98,7 +87,7 @@ private: const TVector<TString>& leftJoinKeys, const TVector<TString>& rightJoinKeys, IProviderContext& ctx, - TCardinalityHints::TCardinalityHint* maybeHint, + TCardinalityHints::TCardinalityHint* maybeCardHint, TJoinAlgoHints::TJoinAlgoHint* maybeJoinHint ); @@ -115,8 +104,8 @@ private: #endif private: THashMap<TNodeSet, std::shared_ptr<IBaseOptimizerNode>, std::hash<TNodeSet>> DpTable_; - THashMap<TNodeSet, TCardinalityHints::TCardinalityHint, std::hash<TNodeSet>> CardHintsTable_; - THashMap<TNodeSet, TJoinAlgoHints::TJoinAlgoHint, std::hash<TNodeSet>> JoinAlgoHintsTable_; + THashMap<TNodeSet, TCardinalityHints::TCardinalityHint*, std::hash<TNodeSet>> CardHintsTable_; + THashMap<TNodeSet, TJoinAlgoHints::TJoinAlgoHint*, std::hash<TNodeSet>> JoinAlgoHintsTable_; }; /* @@ -232,7 +221,17 @@ template<typename TNodeSet> TNodeSet TDPHypSolver<TNodeSet>::NextBitset(const TN return res; } -template<typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypSolver<TNodeSet>::Solve() { +template<typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypSolver<TNodeSet>::Solve(const TOptimizerHints& hints) { + for (auto& h : hints.CardinalityHints->Hints) { + TNodeSet hintSet = Graph_.GetNodesByRelNames(h.JoinLabels); + CardHintsTable_[hintSet] = &h; + } + + for (auto& h : hints.JoinAlgoHints->Hints) { + TNodeSet hintSet = Graph_.GetNodesByRelNames(h.JoinLabels); + JoinAlgoHintsTable_[hintSet] = &h; + } + auto& nodes = Graph_.GetNodes(); Y_ASSERT(nodes.size() == NNodes_); @@ -242,7 +241,7 @@ template<typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypSo s[i] = 1; DpTable_[s] = nodes[i].RelationOptimizerNode; if (CardHintsTable_.contains(s)){ - DpTable_[s]->Stats->Nrows = CardHintsTable_.at(s).ApplyHint(DpTable_[s]->Stats->Nrows); + DpTable_[s]->Stats->Nrows = CardHintsTable_.at(s)->ApplyHint(DpTable_[s]->Stats->Nrows); } } @@ -416,7 +415,7 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS const TVector<TString>& leftJoinKeys, const TVector<TString>& rightJoinKeys, IProviderContext& ctx, - TCardinalityHints::TCardinalityHint* maybeHint, + TCardinalityHints::TCardinalityHint* maybeCardHint, TJoinAlgoHints::TJoinAlgoHint* maybeJoinHint ) { double bestCost = std::numeric_limits<double>::infinity(); @@ -425,11 +424,10 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS for (auto joinAlgo : AllJoinAlgos) { if (ctx.IsJoinApplicable(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinAlgo, joinKind)){ - auto cost = ctx.ComputeJoinStats(*left->Stats, *right->Stats, leftJoinKeys, rightJoinKeys, joinAlgo, joinKind, maybeHint).Cost; - if (maybeJoinHint) { - if (joinAlgo == maybeJoinHint->JoinHint) { - cost = -1; - } + auto cost = ctx.ComputeJoinStats(*left->Stats, *right->Stats, leftJoinKeys, rightJoinKeys, joinAlgo, joinKind, maybeCardHint).Cost; + if (maybeJoinHint && joinAlgo == maybeJoinHint->JoinHint) { + cost = -1; + maybeJoinHint->Applied = true; } if (cost < bestCost) { bestCost = cost; @@ -440,11 +438,10 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS if (isCommutative) { if (ctx.IsJoinApplicable(right, left, reversedJoinConditions, rightJoinKeys, leftJoinKeys, joinAlgo, joinKind)){ - auto cost = ctx.ComputeJoinStats(*right->Stats, *left->Stats, rightJoinKeys, leftJoinKeys, joinAlgo, joinKind, maybeHint).Cost; - if (maybeJoinHint) { - if (joinAlgo == maybeJoinHint->JoinHint) { - cost = -1; - } + auto cost = ctx.ComputeJoinStats(*right->Stats, *left->Stats, rightJoinKeys, leftJoinKeys, joinAlgo, joinKind, maybeCardHint).Cost; + if (maybeJoinHint && joinAlgo == maybeJoinHint->JoinHint) { + cost = -1; + maybeJoinHint->Applied = true; } if (cost < bestCost) { bestCost = cost; @@ -458,10 +455,10 @@ template <typename TNodeSet> std::shared_ptr<TJoinOptimizerNodeInternal> TDPHypS Y_ENSURE(bestCost != std::numeric_limits<double>::infinity(), "No join was chosen!"); if (bestJoinIsReversed) { - return MakeJoinInternal(right, left, reversedJoinConditions, rightJoinKeys, leftJoinKeys, joinKind, bestAlgo, ctx, maybeHint); + return MakeJoinInternal(right, left, reversedJoinConditions, rightJoinKeys, leftJoinKeys, joinKind, bestAlgo, ctx, maybeCardHint); } - return MakeJoinInternal(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, bestAlgo, ctx, maybeHint); + return MakeJoinInternal(left, right, joinConditions, leftJoinKeys, rightJoinKeys, joinKind, bestAlgo, ctx, maybeCardHint); } /* @@ -485,8 +482,8 @@ template<typename TNodeSet> void TDPHypSolver<TNodeSet>::EmitCsgCmp(const TNodeS TNodeSet joined = s1 | s2; - auto maybeCardHint = CardHintsTable_.contains(joined) ? & CardHintsTable_[joined] : nullptr; - auto maybeJoinAlgoHint = JoinAlgoHintsTable_.contains(joined) ? & JoinAlgoHintsTable_[joined] : nullptr; + auto maybeCardHint = CardHintsTable_.contains(joined) ? CardHintsTable_[joined] : nullptr; + auto maybeJoinAlgoHint = JoinAlgoHintsTable_.contains(joined) ? JoinAlgoHintsTable_[joined] : nullptr; auto bestJoin = PickBestJoin( leftNodes, diff --git a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp index 7bdd545fe05..e9e701fda15 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp @@ -242,8 +242,8 @@ private: const std::shared_ptr<TJoinOptimizerNode>& joinTree, const TOptimizerHints& hints = {} ) { - TJoinHypergraph<TNodeSet> hypergraph = MakeJoinHypergraph<TNodeSet>(joinTree, hints.JoinOrderHints); - TDPHypSolver<TNodeSet> solver(hypergraph, this->Pctx, hints.CardinalityHints, hints.JoinAlgoHints); + TJoinHypergraph<TNodeSet> hypergraph = MakeJoinHypergraph<TNodeSet>(joinTree, hints); + TDPHypSolver<TNodeSet> solver(hypergraph, this->Pctx); if (solver.CountCC(MaxDPhypTableSize_) >= MaxDPhypTableSize_) { YQL_CLOG(TRACE, CoreDq) << "Maximum DPhyp threshold exceeded\n"; @@ -251,7 +251,7 @@ private: return joinTree; } - auto bestJoinOrder = solver.Solve(); + auto bestJoinOrder = solver.Solve(hints); return ConvertFromInternal(bestJoinOrder); } private: @@ -329,7 +329,7 @@ TExprBase DqOptimizeEquiJoinWithCosts( // Generate an initial tree auto joinTree = ConvertToJoinTree(joinTuple, rels); - if (NYql::NLog::YqlLogger().NeedToLog(NYql::NLog::EComponent::ProviderKqp, NYql::NLog::ELevel::TRACE)) { + if (NYql::NLog::YqlLogger().NeedToLog(NYql::NLog::EComponent::CoreDq, NYql::NLog::ELevel::TRACE)) { std::stringstream str; str << "Converted join tree:\n"; joinTree->Print(str); @@ -338,7 +338,7 @@ TExprBase DqOptimizeEquiJoinWithCosts( joinTree = opt.JoinSearch(joinTree, hints); - if (NYql::NLog::YqlLogger().NeedToLog(NYql::NLog::EComponent::ProviderKqp, NYql::NLog::ELevel::TRACE)) { + if (NYql::NLog::YqlLogger().NeedToLog(NYql::NLog::EComponent::CoreDq, NYql::NLog::ELevel::TRACE)) { std::stringstream str; str << "Optimizied join tree:\n"; joinTree->Print(str); diff --git a/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h b/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h index 8eba22c1ea5..9a4fe0a3f49 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h +++ b/ydb/library/yql/dq/opt/dq_opt_join_hypergraph.h @@ -200,6 +200,16 @@ public: return simpleEdges; } + bool HasLabels(const TVector<TString>& labels) const { + for (const auto& label: labels) { + if (!NodeIdByRelationName_.contains(label)) { + return false; + } + } + + return true; + } + inline const TVector<TNode>& GetNodes() const { return Nodes_; } @@ -272,9 +282,13 @@ public: : Graph_(graph) {} - void Apply(const TJoinOrderHints& hints) { - for (const auto& hintTree: hints.HintTrees) { - auto labels = ApplyHintsToSubgraph(hintTree); + void Apply(TJoinOrderHints& hints) { + for (auto& hint: hints.Hints) { + if (!Graph_.HasLabels(hint.Tree->Labels())) { + continue; + } + + auto labels = ApplyHintsToSubgraph(hint.Tree); auto nodes = Graph_.GetNodesByRelNames(labels); for (size_t i = 0; i < Graph_.GetEdges().size(); ++i) { @@ -290,15 +304,17 @@ public: Graph_.UpdateEdgeSides(i, newLeft, newRight); } + + hint.Applied = true; } } private: - TVector<TString> ApplyHintsToSubgraph(const std::shared_ptr<IBaseOptimizerNode>& node) { - if (node->Kind == EOptimizerNodeKind::JoinNodeType) { - auto join = std::static_pointer_cast<TJoinOptimizerNode>(node); - TVector<TString> lhsLabels = ApplyHintsToSubgraph(join->LeftArg); - TVector<TString> rhsLabels = ApplyHintsToSubgraph(join->RightArg); + TVector<TString> ApplyHintsToSubgraph(const std::shared_ptr<TJoinOrderHints::ITreeNode>& node) { + if (node->IsJoin()) { + auto join = std::static_pointer_cast<TJoinOrderHints::TJoinNode>(node); + TVector<TString> lhsLabels = ApplyHintsToSubgraph(join->Lhs); + TVector<TString> rhsLabels = ApplyHintsToSubgraph(join->Rhs); auto lhs = Graph_.GetNodesByRelNames(lhsLabels); auto rhs = Graph_.GetNodesByRelNames(rhsLabels); @@ -332,7 +348,8 @@ private: return joinLabels; } - return node->Labels(); + auto relation = std::static_pointer_cast<TJoinOrderHints::TRelationNode>(node); + return {relation->Label}; } private: diff --git a/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h b/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h index a808bcfa414..de59957f426 100644 --- a/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h +++ b/ydb/library/yql/dq/opt/dq_opt_make_join_hypergraph.h @@ -89,7 +89,7 @@ void MakeJoinHypergraphRec( template <typename TNodeSet> TJoinHypergraph<TNodeSet> MakeJoinHypergraph( const std::shared_ptr<IBaseOptimizerNode>& joinTree, - const TJoinOrderHints& hints = {} + const TOptimizerHints& hints = {} ) { TJoinHypergraph<TNodeSet> graph{}; std::unordered_map<std::shared_ptr<IBaseOptimizerNode>, TNodeSet> subtreeNodes{}; @@ -100,9 +100,9 @@ TJoinHypergraph<TNodeSet> MakeJoinHypergraph( YQL_CLOG(TRACE, CoreDq) << graph.String(); } - if (!hints.HintTrees.empty()) { + if (!hints.JoinOrderHints->Hints.empty()) { TJoinOrderHintsApplier joinHints(graph); - joinHints.Apply(hints); + joinHints.Apply(*hints.JoinOrderHints); if (NYql::NLog::YqlLogger().NeedToLog(NYql::NLog::EComponent::CoreDq, NYql::NLog::ELevel::TRACE)) { YQL_CLOG(TRACE, CoreDq) << "Hypergraph after hints: "; YQL_CLOG(TRACE, CoreDq) << graph.String(); |