diff options
| author | pudge1000-7 <[email protected]> | 2025-06-09 10:56:06 +0300 |
|---|---|---|
| committer | pudge1000-7 <[email protected]> | 2025-06-09 11:15:08 +0300 |
| commit | a17ed1a95cd8e529af068d238d03d1faece71624 (patch) | |
| tree | ab3d3606096454879f454c277ba45bc19b8b1cc2 /yql/essentials | |
| parent | c2e48d700270ba7a6923c14f56a2864b7884f19e (diff) | |
[RBO] Improved FSM with sortings (added directions)
commit_hash:9231a4622fddb446383422832d990f02e9380055
Diffstat (limited to 'yql/essentials')
| -rw-r--r-- | yql/essentials/core/cbo/cbo_interesting_orderings.cpp | 219 | ||||
| -rw-r--r-- | yql/essentials/core/cbo/cbo_interesting_orderings.h | 90 | ||||
| -rw-r--r-- | yql/essentials/core/cbo/cbo_optimizer_new.h | 6 | ||||
| -rw-r--r-- | yql/essentials/core/yql_cost_function.h | 12 | ||||
| -rw-r--r-- | yql/essentials/core/yql_statistics.cpp | 13 | ||||
| -rw-r--r-- | yql/essentials/core/yql_statistics.h | 22 | ||||
| -rw-r--r-- | yql/essentials/core/yql_type_annotation.h | 1 |
7 files changed, 310 insertions, 53 deletions
diff --git a/yql/essentials/core/cbo/cbo_interesting_orderings.cpp b/yql/essentials/core/cbo/cbo_interesting_orderings.cpp index b34fe05628f..cd0f5dbe9d0 100644 --- a/yql/essentials/core/cbo/cbo_interesting_orderings.cpp +++ b/yql/essentials/core/cbo/cbo_interesting_orderings.cpp @@ -1,6 +1,7 @@ #include "cbo_interesting_orderings.h" #include <library/cpp/disjoint_sets/disjoint_sets.h> +#include <library/cpp/iterator/zip.h> #include <yql/essentials/utils/log/log.h> @@ -15,11 +16,39 @@ namespace NYql::NDq { bool TOrdering::operator==(const TOrdering& other) const { - return std::tie(this->Type, this->Items) == std::tie(other.Type, other.Items); + return + std::tie(this->Type, this->Items, this->Directions) == + std::tie(other.Type, other.Items, other.Directions); } TString TOrdering::ToString() const { - return "{" + JoinSeq(", ", Items) + "}"; + TVector<TString> itemStrings; + itemStrings.reserve(Items.size()); + + for (size_t i = 0; i < Items.size(); ++i) { + TString itemStr = ::ToString(Items[i]); + + if (i < Directions.size()) { + switch (Directions[i]) { + case TItem::EDirection::EAscending: + itemStr += "^"; + break; + case TItem::EDirection::EDescending: + itemStr += "v"; + break; + default: + break; + } + } + + itemStrings.push_back(std::move(itemStr)); + } + + return "{" + JoinSeq(", ", itemStrings) + "}"; +} + +bool TOrdering::HasItem(std::size_t item) const { + return std::find(Items.begin(), Items.end(), item) != Items.end(); } bool TFunctionalDependency::IsEquivalence() const { @@ -27,7 +56,7 @@ bool TFunctionalDependency::IsEquivalence() const { } bool TFunctionalDependency::IsImplication() const { - return Type == EType::EImplication; + return Type == EType::EImplication && !IsConstant(); } bool TFunctionalDependency::IsConstant() const { @@ -284,21 +313,55 @@ i64 TFDStorage::FindInterestingOrderingIdx( TOrdering::EType type, TTableAliasMap* tableAliases ) { - const auto& [_, orderingIdx] = ConvertColumnsAndFindExistingOrdering(interestingOrdering, type, false, tableAliases); + const auto& [_, orderingIdx] = ConvertColumnsAndFindExistingOrdering(interestingOrdering, {}, type, false, tableAliases); + return orderingIdx; +} + +std::size_t TFDStorage::FindSorting( + const std::vector<TJoinColumn>& interestingOrdering, + const std::vector<TOrdering::TItem::EDirection>& directions, + TTableAliasMap* tableAliases +) { + const auto& [_, orderingIdx] = ConvertColumnsAndFindExistingOrdering(interestingOrdering, directions, TOrdering::ESorting, false, tableAliases); return orderingIdx; } +std::size_t TFDStorage::AddSorting( + const std::vector<TJoinColumn>& interestingOrdering, + std::vector<TOrdering::TItem::EDirection> directions, + TTableAliasMap* tableAliases +) { + return AddInterestingOrdering(interestingOrdering, TOrdering::ESorting, directions, tableAliases); +} + +std::size_t TFDStorage::AddShuffling( + const std::vector<TJoinColumn>& interestingOrdering, + TTableAliasMap* tableAliases +) { + return AddInterestingOrdering(interestingOrdering, TOrdering::EShuffle, std::vector<TOrdering::TItem::EDirection>{}, tableAliases); +} + std::size_t TFDStorage::AddInterestingOrdering( - const TVector<TString>& interestingOrdering, + const std::vector<TJoinColumn>& interestingOrdering, TOrdering::EType type, + const std::vector<TOrdering::TItem::EDirection>& directions, TTableAliasMap* tableAliases ) { - std::vector<TJoinColumn> columns; - columns.reserve(interestingOrdering.size()); - for (const auto& column: interestingOrdering) { - columns.push_back(TJoinColumn("", column)); + if (interestingOrdering.empty()) { + return std::numeric_limits<std::size_t>::max(); } - return AddInterestingOrdering(columns, type, tableAliases); + + auto [items, foundIdx] = ConvertColumnsAndFindExistingOrdering(interestingOrdering, directions, type, true, tableAliases); + if (items.Items.empty()) { + return std::numeric_limits<std::size_t>::max(); + } + + if (foundIdx >= 0) { + return static_cast<std::size_t>(foundIdx); + } + + InterestingOrderings.push_back(std::move(items)); + return InterestingOrderings.size() - 1; } std::size_t TFDStorage::AddInterestingOrdering( @@ -310,18 +373,18 @@ std::size_t TFDStorage::AddInterestingOrdering( return std::numeric_limits<std::size_t>::max(); } - auto [items, foundIdx] = ConvertColumnsAndFindExistingOrdering(interestingOrdering, type, true, tableAliases); + auto [items, foundIdx] = ConvertColumnsAndFindExistingOrdering(interestingOrdering, {}, type, true, tableAliases); if (foundIdx >= 0) { return static_cast<std::size_t>(foundIdx); } - InterestingOrderings.emplace_back(std::move(items), type); + InterestingOrderings.emplace_back(std::move(items)); return InterestingOrderings.size() - 1; } TVector<TJoinColumn> TFDStorage::GetInterestingOrderingsColumnNamesByIdx(std::size_t interestingOrderingIdx) const { - Y_ASSERT(interestingOrderingIdx < InterestingOrderings.size()); + Y_ENSURE(interestingOrderingIdx < InterestingOrderings.size()); TVector<TJoinColumn> columns; columns.reserve(InterestingOrderings[interestingOrderingIdx].Items.size()); @@ -358,24 +421,38 @@ TString TFDStorage::ToString() const { return ss; } -std::pair<std::vector<std::size_t>, i64> TFDStorage::ConvertColumnsAndFindExistingOrdering( +std::pair<TOrdering, i64> TFDStorage::ConvertColumnsAndFindExistingOrdering( const std::vector<TJoinColumn>& interestingOrdering, + const std::vector<TOrdering::TItem::EDirection>& directions, TOrdering::EType type, bool createIfNotExists, TTableAliasMap* tableAliases ) { + if(!( + directions.empty() && type == TOrdering::EShuffle || + directions.size() == interestingOrdering.size() && type == TOrdering::ESorting + )) { + YQL_CLOG(TRACE, CoreDq) + << "Ordering and directions sizes mismatch : " << directions.size() << " vs " << interestingOrdering.size(); + return {TOrdering(), -1}; + } + std::vector<std::size_t> items = ConvertColumnIntoIndexes(interestingOrdering, createIfNotExists, tableAliases); if (items.empty()) { - return {{}, -1}; + return {TOrdering(), -1}; } for (std::size_t i = 0; i < InterestingOrderings.size(); ++i) { - if (items == InterestingOrderings[i].Items && type == InterestingOrderings[i].Type) { - return {items, static_cast<i64>(i)}; + if ( + items == InterestingOrderings[i].Items && + type == InterestingOrderings[i].Type && + directions == InterestingOrderings[i].Directions + ) { + return {InterestingOrderings[i], static_cast<i64>(i)}; } } - return {items, -1}; + return {TOrdering(std::move(items), directions, type), -1}; } std::vector<std::size_t> TFDStorage::ConvertColumnIntoIndexes( @@ -456,7 +533,7 @@ void TOrderingsStateMachine::TLogicalOrderings::RemoveState() { } void TOrderingsStateMachine::TLogicalOrderings::SetOrdering(i64 orderingIdx) { - if (orderingIdx < 0 || orderingIdx >= static_cast<i64>(DFSM->InitStateByOrderingIdx.size())) { + if (!IsInitialized() || orderingIdx < 0 || orderingIdx >= static_cast<i64>(DFSM->InitStateByOrderingIdx.size())) { RemoveState(); return; } @@ -556,12 +633,59 @@ TString TOrderingsStateMachine::ToString() const { return ss; } +void TOrderingsStateMachine::CollectItemInfo( + const std::vector<TFunctionalDependency>& fds, + const std::vector<TOrdering>& interestingOrderings +) { + std::size_t maxItem = 0; + + for (const auto& ordering: interestingOrderings) { + if (ordering.Items.empty()) { + continue; + } + + std::size_t orderingMaxItem = *std::max_element(ordering.Items.begin(), ordering.Items.end()); + maxItem = std::max(maxItem, orderingMaxItem); + } + + for (const auto& fd: fds) { + std::size_t maxAntecedentItems = 0; + if (!fd.AntecedentItems.empty()) { + maxAntecedentItems = *std::max_element(fd.AntecedentItems.begin(), fd.AntecedentItems.end()); + } + + maxItem = std::max({maxItem, fd.ConsequentItem, maxAntecedentItems}); + } + + ItemInfo.resize(maxItem + 1); + + for (const auto& ordering: interestingOrderings) { + Y_ENSURE(ordering.Items.size() == ordering.Directions.size() || ordering.Directions.empty()); + + for (const auto& [item, direction]: Zip(ordering.Items, ordering.Directions)) { + switch (direction) { + case TOrdering::TItem::EDirection::EAscending: { + ItemInfo[item].UsedInAscOrdering = true; + break; + } + case TOrdering::TItem::EDirection::EDescending: { + ItemInfo[item].UsedInDescOrdering = true; + break; + } + default: {} + } + } + } + +} + void TOrderingsStateMachine::Build( const std::vector<TFunctionalDependency>& fds, const std::vector<TOrdering>& interestingOrderings ) { + CollectItemInfo(fds, interestingOrderings); std::vector<TFunctionalDependency> processedFDs = PruneFDs(fds, interestingOrderings); - NFSM.Build(processedFDs, interestingOrderings); + NFSM.Build(processedFDs, interestingOrderings, ItemInfo); DFSM = MakeSimpleShared<TDFSM>(); DFSM->Build(NFSM, processedFDs, interestingOrderings); Built = true; @@ -605,13 +729,14 @@ TString TOrderingsStateMachine::TNFSM::ToString() const { void TOrderingsStateMachine::TNFSM::Build( const std::vector<TFunctionalDependency>& fds, - const std::vector<TOrdering>& interesting + const std::vector<TOrdering>& interesting, + const std::vector<TItemInfo>& itemInfo ) { for (std::size_t i = 0; i < interesting.size(); ++i) { AddNode(interesting[i], TNFSM::TNode::EInteresting, i); } - ApplyFDs(fds, interesting); + ApplyFDs(fds, interesting, itemInfo); PrefixClosure(); for (std::size_t idx = 0; idx < Edges.size(); ++idx) { @@ -673,7 +798,8 @@ void TOrderingsStateMachine::TNFSM::PrefixClosure() { void TOrderingsStateMachine::TNFSM::ApplyFDs( const std::vector<TFunctionalDependency>& fds, - const std::vector<TOrdering>& interestingOrderings + const std::vector<TOrdering>& interestingOrderings, + const std::vector<TItemInfo>& itemInfo ) { std::size_t maxInterestingOrderingSize = 0; if (!interestingOrderings.empty()) { @@ -685,12 +811,15 @@ void TOrderingsStateMachine::TNFSM::ApplyFDs( )->Items.size(); } - for (std::size_t nodeIdx = 0; nodeIdx < Nodes.size() && Nodes.size() < EMaxNFSMStates; ++nodeIdx) { for (std::size_t fdIdx = 0; fdIdx < fds.size() && Nodes.size() < EMaxNFSMStates; ++fdIdx) { + if (Nodes[nodeIdx].Ordering.Items.empty()) { + continue; + } + TFunctionalDependency fd = fds[fdIdx]; - auto applyFD = [this, nodeIdx, maxInterestingOrderingSize](const TFunctionalDependency& fd, std::size_t fdIdx) { + auto applyFD = [this, &itemInfo, nodeIdx, maxInterestingOrderingSize](const TFunctionalDependency& fd, std::size_t fdIdx) { if (Nodes.size() >= EMaxNFSMStates) { return; } @@ -702,9 +831,14 @@ void TOrderingsStateMachine::TNFSM::ApplyFDs( return; } bool isNewOrderingPrefixOfOld = (it == (newOrdering.end() - 1)); - newOrdering.erase(it); - std::size_t dstIdx = AddNode(TOrdering(std::move(newOrdering), Nodes[nodeIdx].Ordering.Type), TNode::EArtificial); + auto newDirections = Nodes[nodeIdx].Ordering.Directions; + if (!newDirections.empty()) { + newDirections.erase(newDirections.begin() + std::distance(newOrdering.begin(), it)); + newOrdering.erase(it); + } + + std::size_t dstIdx = AddNode(TOrdering(std::move(newOrdering), std::move(newDirections), Nodes[nodeIdx].Ordering.Type), TNode::EArtificial); if (!isNewOrderingPrefixOfOld || Nodes[nodeIdx].Ordering.Type == TOrdering::EShuffle) { AddEdge(nodeIdx, dstIdx, fdIdx); @@ -729,11 +863,11 @@ void TOrderingsStateMachine::TNFSM::ApplyFDs( auto it = std::find(Nodes[nodeIdx].Ordering.Items.begin(), Nodes[nodeIdx].Ordering.Items.end(), fd.ConsequentItem); it != Nodes[nodeIdx].Ordering.Items.end() ) { - if (fd.IsEquivalence()) { // (a, b) -> (b, a) + if (fd.IsEquivalence()) { // swap (a, b) -> (b, a) std::size_t consequentItemIdx = std::distance(Nodes[nodeIdx].Ordering.Items.begin(), it); auto newOrdering = Nodes[nodeIdx].Ordering.Items; std::swap(newOrdering[antecedentItemIdx], newOrdering[consequentItemIdx]); - std::size_t dstIdx = AddNode(TOrdering(std::move(newOrdering), Nodes[nodeIdx].Ordering.Type), TNode::EArtificial); + std::size_t dstIdx = AddNode(TOrdering(std::move(newOrdering), Nodes[nodeIdx].Ordering.Directions, Nodes[nodeIdx].Ordering.Type), TNode::EArtificial); AddEdge(nodeIdx, dstIdx, fdIdx); AddEdge(dstIdx, nodeIdx, fdIdx); } @@ -747,7 +881,7 @@ void TOrderingsStateMachine::TNFSM::ApplyFDs( std::vector<std::size_t> newOrdering = Nodes[nodeIdx].Ordering.Items; newOrdering[antecedentItemIdx] = fd.ConsequentItem; - std::size_t dstIdx = AddNode(TOrdering(std::move(newOrdering), Nodes[nodeIdx].Ordering.Type), TNode::EArtificial); + std::size_t dstIdx = AddNode(TOrdering(std::move(newOrdering), Nodes[nodeIdx].Ordering.Directions, Nodes[nodeIdx].Ordering.Type), TNode::EArtificial); AddEdge(nodeIdx, dstIdx, fdIdx); AddEdge(dstIdx, nodeIdx, fdIdx); } @@ -764,8 +898,25 @@ void TOrderingsStateMachine::TNFSM::ApplyFDs( std::vector<std::size_t> newOrdering = Nodes[nodeIdx].Ordering.Items; newOrdering.insert(newOrdering.begin() + i, fd.ConsequentItem); - std::size_t dstIdx = AddNode(TOrdering(std::move(newOrdering), Nodes[nodeIdx].Ordering.Type), TNode::EArtificial); - AddEdge(nodeIdx, dstIdx, fdIdx); // Epsilon edge will be added during PrefixClosure + auto newDirections = Nodes[nodeIdx].Ordering.Directions; + if (newDirections.empty()) { // smthing went wrong during ordering adding stage + return; + } + newDirections.insert(newDirections.begin() + i, TOrdering::TItem::EDirection::ENone); + + Y_ENSURE(fd.ConsequentItem < itemInfo.size()); + + if (itemInfo[fd.ConsequentItem].UsedInAscOrdering) { + newDirections[i] = TOrdering::TItem::EDirection::EAscending; + std::size_t dstIdx = AddNode(TOrdering(newOrdering, newDirections, Nodes[nodeIdx].Ordering.Type), TNode::EArtificial); + AddEdge(nodeIdx, dstIdx, fdIdx); // Epsilon edge will be added during PrefixClosure + } + + if (itemInfo[fd.ConsequentItem].UsedInDescOrdering) { + newDirections[i] = TOrdering::TItem::EDirection::EDescending; + std::size_t dstIdx = AddNode(TOrdering(std::move(newOrdering), std::move(newDirections), Nodes[nodeIdx].Ordering.Type), TNode::EArtificial); + AddEdge(nodeIdx, dstIdx, fdIdx); // Epsilon edge will be added during PrefixClosure + } } } }; @@ -956,14 +1107,14 @@ std::vector<TFunctionalDependency> TOrderingsStateMachine::PruneFDs( bool canLeadToInteresting = false; for (const auto& ordering: interestingOrderings) { - if (std::find(ordering.Items.begin(), ordering.Items.end(), fds[i].ConsequentItem) != ordering.Items.end()) { + if (ordering.HasItem(fds[i].ConsequentItem)) { canLeadToInteresting = true; break; } if ( fds[i].IsEquivalence() && - std::find(ordering.Items.begin(), ordering.Items.end(), fds[i].AntecedentItems[0]) != ordering.Items.end() + ordering.HasItem(fds[i].AntecedentItems[0]) ) { canLeadToInteresting = true; break; diff --git a/yql/essentials/core/cbo/cbo_interesting_orderings.h b/yql/essentials/core/cbo/cbo_interesting_orderings.h index 77a20ef1964..e4e86bcb0d9 100644 --- a/yql/essentials/core/cbo/cbo_interesting_orderings.h +++ b/yql/essentials/core/cbo/cbo_interesting_orderings.h @@ -28,17 +28,49 @@ struct TOrdering { bool operator==(const TOrdering& other) const; TString ToString() const; + struct TItem { + enum EDirection : uint32_t { + ENone = 0, + EAscending = 1, + EDescending = 2 + }; + }; + TOrdering( std::vector<std::size_t> items, + std::vector<TItem::EDirection> directions, EType type, bool isNatural = false ) : Items(std::move(items)) + , Directions(std::move(directions)) , Type(type) , IsNatural(isNatural) + { + Y_ENSURE( + Directions.empty() && type == TOrdering::EShuffle || + Directions.size() == Items.size() && type == TOrdering::ESorting + ); + } + + TOrdering( + std::vector<std::size_t> items, + EType type + ) + : TOrdering( + std::move(items), + std::vector<TItem::EDirection>{}, + type + ) {} + TOrdering() = default; + + bool HasItem(std::size_t item) const; + std::vector<std::size_t> Items; + std::vector<TItem::EDirection> Directions; + EType Type; /* Definition was taken from 'Complex Ordering Requirements' section. Not natural orderings are complex join predicates or grouping. */ bool IsNatural = false; @@ -135,8 +167,7 @@ public: TTableAliasMap* tableAliases = nullptr ); -public: - // Deprecated, use the others in this public section instead +public: // deprecated section, use the section below instead of this std::size_t AddFD( const TJoinColumn& antecedentColumn, const TJoinColumn& consequentColumn, @@ -145,6 +176,7 @@ public: TTableAliasMap* tableAliases = nullptr ); +public: std::size_t AddConstant( const TJoinColumn& constantColumn, bool alwaysActive, @@ -168,8 +200,7 @@ public: private: std::size_t AddFDImpl(TFunctionalDependency fd); -public: - +public: // deprecated section, use the section below instead of this i64 FindInterestingOrderingIdx( const std::vector<TJoinColumn>& interestingOrdering, TOrdering::EType type, @@ -182,12 +213,30 @@ public: TTableAliasMap* tableAliases = nullptr ); - std::size_t AddInterestingOrdering( - const TVector<TString>& interestingOrdering, // column names - TOrdering::EType type, +public: + std::size_t FindSorting( + const std::vector<TJoinColumn>& interestingOrdering, + const std::vector<TOrdering::TItem::EDirection>& directions, + TTableAliasMap* tableAliases = nullptr + ); + + std::size_t AddSorting( + const std::vector<TJoinColumn>& interestingOrdering, + std::vector<TOrdering::TItem::EDirection> directions, + TTableAliasMap* tableAliases = nullptr + ); + + std::size_t FindShuffling( + const std::vector<TJoinColumn>& interestingOrdering, + TTableAliasMap* tableAliases = nullptr + ); + + std::size_t AddShuffling( + const std::vector<TJoinColumn>& interestingOrdering, TTableAliasMap* tableAliases = nullptr ); +public: TVector<TJoinColumn> GetInterestingOrderingsColumnNamesByIdx(std::size_t interestingOrderingIdx) const; TString ToString() const; @@ -196,8 +245,9 @@ public: std::vector<TOrdering> InterestingOrderings; private: - std::pair<std::vector<std::size_t>, i64> ConvertColumnsAndFindExistingOrdering( + std::pair<TOrdering, i64> ConvertColumnsAndFindExistingOrdering( const std::vector<TJoinColumn>& interestingOrdering, + const std::vector<TOrdering::TItem::EDirection>& directions, TOrdering::EType type, bool createIfNotExists, TTableAliasMap* tableAliases = nullptr @@ -215,6 +265,13 @@ private: TTableAliasMap* tableAliases = nullptr ); + std::size_t AddInterestingOrdering( + const std::vector<TJoinColumn>& interestingOrdering, + TOrdering::EType type, + const std::vector<TOrdering::TItem::EDirection>& directions, + TTableAliasMap* tableAliases + ); + private: THashMap<TString, std::size_t> IdxByColumn; std::vector<TJoinColumn> ColumnByIdx; @@ -239,6 +296,11 @@ private: EMaxDFSMStates = 512, }; + struct TItemInfo { + bool UsedInAscOrdering = false; + bool UsedInDescOrdering = false; + }; + public: using TFDSet = std::bitset<EMaxFDCount>; @@ -364,7 +426,8 @@ private: TString ToString() const; void Build( const std::vector<TFunctionalDependency>& fds, - const std::vector<TOrdering>& interesting + const std::vector<TOrdering>& interesting, + const std::vector<TItemInfo>& itemInfo ); private: @@ -373,7 +436,8 @@ private: void PrefixClosure(); void ApplyFDs( const std::vector<TFunctionalDependency>& fds, - const std::vector<TOrdering>& interesting + const std::vector<TOrdering>& interesting, + const std::vector<TItemInfo>& itemInfo ); private: @@ -448,11 +512,17 @@ private: const std::vector<TOrdering>& interestingOrderings ); + void CollectItemInfo( + const std::vector<TFunctionalDependency>& fds, + const std::vector<TOrdering>& interestingOrderings + ); + private: TNFSM NFSM; TSimpleSharedPtr<TDFSM> DFSM; // it is important to have sharedptr here, otherwise all logicalorderings will invalidate after copying of FSM std::vector<i64> FdMapping; // We to remap FD idxes after the pruning + std::vector<TItemInfo> ItemInfo; bool Built = false; }; diff --git a/yql/essentials/core/cbo/cbo_optimizer_new.h b/yql/essentials/core/cbo/cbo_optimizer_new.h index 2a010d8f195..18f51639963 100644 --- a/yql/essentials/core/cbo/cbo_optimizer_new.h +++ b/yql/essentials/core/cbo/cbo_optimizer_new.h @@ -35,12 +35,6 @@ struct IBaseOptimizerNode { EOptimizerNodeKind Kind; TOptimizerStatistics Stats; - // for interesting orderings framework - - NDq::TOrderingsStateMachine::TLogicalOrderings LogicalOrderings; - - // - IBaseOptimizerNode(EOptimizerNodeKind k) : Kind(k) {} IBaseOptimizerNode(EOptimizerNodeKind k, TOptimizerStatistics s) : Kind(k), Stats(std::move(s)) {} diff --git a/yql/essentials/core/yql_cost_function.h b/yql/essentials/core/yql_cost_function.h index dbee46ad0c0..4775562dcc4 100644 --- a/yql/essentials/core/yql_cost_function.h +++ b/yql/essentials/core/yql_cost_function.h @@ -37,10 +37,12 @@ struct TJoinColumn { TString RelName{}; // TODO: this should be a list. now list of relations is separated by comma in this string TString AttributeName{}; TString AttributeNameWithAliases{}; - std::optional<TString> OriginalRelName; + std::optional<TString> OriginalRelName{}; std::optional<ui32> EquivalenceClass{}; bool IsConstant = false; + TJoinColumn() = default; + TJoinColumn(TString relName, TString attributeName) : RelName(relName), AttributeName(attributeName), @@ -50,6 +52,14 @@ struct TJoinColumn { return RelName == other.RelName && AttributeName == other.AttributeName; } + static TJoinColumn FromString(const TString& column) { + if (column.find('.') != TString::npos) { + return TJoinColumn(column.substr(0, column.find('.')), column.substr(column.find('.') + 1)); + } + + return TJoinColumn("", column); + } + struct THashFunction { size_t operator()(const TJoinColumn& c) const diff --git a/yql/essentials/core/yql_statistics.cpp b/yql/essentials/core/yql_statistics.cpp index 16ea755f08c..ad9fb2ab875 100644 --- a/yql/essentials/core/yql_statistics.cpp +++ b/yql/essentials/core/yql_statistics.cpp @@ -92,7 +92,16 @@ std::ostream& NYql::operator<<(std::ostream& os, const TOptimizerStatistics& s) } os << "[" << tmp << "]"; } - os << ", LogicalOrderings state: " << s.LogicalOrderings.GetState(); + os << ", LogicalOrderings (Shufflings) state: " << s.LogicalOrderings.GetState(); + os << ", SortingOrderings (Sortings) state: " << s.SortingOrderings.GetState(); + + if (s.ReversedSortingOrderings.HasState()) { + os << ", ReversedSortingOrderings (Sortings) state: " << s.ReversedSortingOrderings.GetState(); + } + + if (s.SortingOrderingIdx >= 0) { + os << ", SortingOrderingIdx: " << s.SortingOrderingIdx; + } os << ", Sel: " << s.Selectivity; os << ", Storage: " << ConvertToStatisticsTypeString(s.StorageType); @@ -103,7 +112,7 @@ std::ostream& NYql::operator<<(std::ostream& os, const TOptimizerStatistics& s) for (size_t i = 0; i < s.SortColumns->Columns.size() && i < s.SortColumns->Aliases.size(); i++) { auto c = s.SortColumns->Columns[i]; auto a = s.SortColumns->Aliases[i]; - if (a.empty()) { + if (!a.empty()) { tmp.append(a).append("."); } diff --git a/yql/essentials/core/yql_statistics.h b/yql/essentials/core/yql_statistics.h index 0b9c36e7553..969b87453d8 100644 --- a/yql/essentials/core/yql_statistics.h +++ b/yql/essentials/core/yql_statistics.h @@ -56,11 +56,22 @@ struct TOptimizerStatistics { struct TKeyColumns : public TSimpleRefCount<TKeyColumns> { TVector<TString> Data; TKeyColumns(TVector<TString> data) : Data(std::move(data)) {} + + TVector<NDq::TJoinColumn> ToJoinColumns(const TString& alias) { + TVector<NDq::TJoinColumn> columns; + columns.reserve(Data.size()); + for (std::size_t i = 0; i < Data.size(); ++i) { + columns.push_back(NDq::TJoinColumn(alias, Data[i])); + } + + return columns; + } }; struct TSortColumns : public TSimpleRefCount<TSortColumns> { TVector<TString> Columns; TVector<TString> Aliases; + TSortColumns(const TVector<TString>& cols, const TVector<TString>& aliases) : Columns(cols) , Aliases(aliases) @@ -99,7 +110,9 @@ struct TOptimizerStatistics { double Selectivity = 1.0; TIntrusivePtr<TKeyColumns> KeyColumns; TIntrusivePtr<TColumnStatMap> ColumnStatistics; + TIntrusivePtr<TShuffledByColumns> ShuffledByColumns; + TIntrusivePtr<TSortColumns> SortColumns; EStorageType StorageType = EStorageType::NA; std::shared_ptr<IProviderStatistics> Specific; @@ -108,8 +121,17 @@ struct TOptimizerStatistics { TString SourceTableName; TSimpleSharedPtr<THashSet<TString>> Aliases; TIntrusivePtr<NDq::TTableAliasMap> TableAliases; + NDq::TOrderingsStateMachine::TLogicalOrderings LogicalOrderings; + + NDq::TOrderingsStateMachine::TLogicalOrderings SortingOrderings; + NDq::TOrderingsStateMachine::TLogicalOrderings ReversedSortingOrderings; + std::optional<std::size_t> ShuffleOrderingIdx; + std::int64_t SortingOrderingIdx = -1; + + // special flag for equijoin + bool CBOFired = false; TOptimizerStatistics(TOptimizerStatistics&&) = default; TOptimizerStatistics& operator=(TOptimizerStatistics&&) = default; diff --git a/yql/essentials/core/yql_type_annotation.h b/yql/essentials/core/yql_type_annotation.h index ce70891e2e0..c2f73fa7fcf 100644 --- a/yql/essentials/core/yql_type_annotation.h +++ b/yql/essentials/core/yql_type_annotation.h @@ -383,6 +383,7 @@ inline TString GetRandomKey<TGUID>() { } struct TTypeAnnotationContext: public TThrRefBase { + TSimpleSharedPtr<NDq::TOrderingsStateMachine> SortingsFSM; TSimpleSharedPtr<NDq::TOrderingsStateMachine> OrderingsFSM; TLangVersion LangVer = MinLangVersion; THashMap<TString, TIntrusivePtr<TOptimizerStatistics::TColumnStatMap>> ColumnStatisticsByTableName; |
