diff options
| author | pudge1000-7 <[email protected]> | 2025-06-19 14:36:01 +0300 |
|---|---|---|
| committer | pudge1000-7 <[email protected]> | 2025-06-19 15:15:32 +0300 |
| commit | 8d42f4f2ab6e419a05a55ba8d71dda34c446c22b (patch) | |
| tree | 6547a074647776d64218ed325500d890307869df /yql/essentials | |
| parent | f18d41a0434cfa7ef77bd5d43f3c8d058fac0698 (diff) | |
[RBO] Improved Sortings FSM
commit_hash:3077f1e2a744069524dc6723bca2fbdbb92ae2ba
Diffstat (limited to 'yql/essentials')
| -rw-r--r-- | yql/essentials/core/cbo/cbo_interesting_orderings.cpp | 51 | ||||
| -rw-r--r-- | yql/essentials/core/cbo/cbo_interesting_orderings.h | 28 | ||||
| -rw-r--r-- | yql/essentials/core/yql_statistics.cpp | 2 |
3 files changed, 62 insertions, 19 deletions
diff --git a/yql/essentials/core/cbo/cbo_interesting_orderings.cpp b/yql/essentials/core/cbo/cbo_interesting_orderings.cpp index e2549c1bb5d..7af0c35bb90 100644 --- a/yql/essentials/core/cbo/cbo_interesting_orderings.cpp +++ b/yql/essentials/core/cbo/cbo_interesting_orderings.cpp @@ -98,21 +98,20 @@ void TTableAliasMap::AddMapping(const TString& table, const TString& alias) { TableByAlias_[alias] = table; } -void TTableAliasMap::AddRename(const TString& from, const TString& to) { +void TTableAliasMap::AddRename(TString from, TString to) { if (auto pointIdx = from.find('.'); pointIdx != TString::npos) { TString alias = from.substr(0, pointIdx); TString baseTable = GetBaseTableByAlias(alias); TString columnName = from.substr(pointIdx + 1); - if (auto it = BaseColumnByRename_.find(columnName); it != BaseColumnByRename_.end()) { - auto baseColumn = it->second; - BaseColumnByRename_[to] = BaseColumnByRename_[from] = it->second; - return; + if (pointIdx == 0) { + from = from.substr(1); + } + if (auto pointIdx = to.find('.'); pointIdx == 0) { + to = to.substr(1); } - auto fromColumn = TBaseColumn(alias, columnName); auto baseColumn = TBaseColumn(baseTable, columnName); - BaseColumnByRename_[to] = BaseColumnByRename_[from] = baseColumn; return; } @@ -174,6 +173,9 @@ void TTableAliasMap::Merge(const TTableAliasMap& other) { TableByAlias_[alias] = table; } for (const auto& [from, to] : other.BaseColumnByRename_) { + if (BaseColumnByRename_.contains(from)) { + continue; + } BaseColumnByRename_[from] = TBaseColumn(to.Relation, to.Column); } } @@ -318,20 +320,18 @@ i64 TFDStorage::FindInterestingOrderingIdx( } std::size_t TFDStorage::FindSorting( - const std::vector<TJoinColumn>& interestingOrdering, - const std::vector<TOrdering::TItem::EDirection>& directions, + const TSorting& sorting, TTableAliasMap* tableAliases ) { - const auto& [_, orderingIdx] = ConvertColumnsAndFindExistingOrdering(interestingOrdering, directions, TOrdering::ESorting, false, tableAliases); + const auto& [_, orderingIdx] = ConvertColumnsAndFindExistingOrdering(sorting.Ordering, sorting.Directions, TOrdering::ESorting, false, tableAliases); return orderingIdx; } std::size_t TFDStorage::AddSorting( - const std::vector<TJoinColumn>& interestingOrdering, - std::vector<TOrdering::TItem::EDirection> directions, + const TSorting& sorting, TTableAliasMap* tableAliases ) { - return AddInterestingOrdering(interestingOrdering, TOrdering::ESorting, directions, tableAliases); + return AddInterestingOrdering(sorting.Ordering, TOrdering::ESorting, sorting.Directions, tableAliases); } std::size_t TFDStorage::AddShuffling( @@ -395,6 +395,18 @@ TVector<TJoinColumn> TFDStorage::GetInterestingOrderingsColumnNamesByIdx(std::si return columns; } +TSorting TFDStorage::GetInterestingSortingByOrderingIdx(std::size_t interestingOrderingIdx) const { + Y_ENSURE(interestingOrderingIdx < InterestingOrderings.size()); + + TVector<TJoinColumn> columns; + columns.reserve(InterestingOrderings[interestingOrderingIdx].Items.size()); + for (std::size_t columnIdx: InterestingOrderings[interestingOrderingIdx].Items) { + columns.push_back(ColumnByIdx_[columnIdx]); + } + + return {columns, InterestingOrderings[interestingOrderingIdx].Directions}; +} + TString TFDStorage::ToString() const { auto toVectorString = [](auto seq) { TVector<TString> strVector; @@ -532,6 +544,10 @@ void TOrderingsStateMachine::TLogicalOrderings::RemoveState() { *this = TLogicalOrderings(Dfsm_); } +i64 TOrderingsStateMachine::TLogicalOrderings::GetInitOrderingIdx() const { + return InitOrderingIdx_; +} + void TOrderingsStateMachine::TLogicalOrderings::SetOrdering(i64 orderingIdx) { if (!IsInitialized() || orderingIdx < 0 || orderingIdx >= static_cast<i64>(Dfsm_->InitStateByOrderingIdx_.size())) { RemoveState(); @@ -542,6 +558,7 @@ void TOrderingsStateMachine::TLogicalOrderings::SetOrdering(i64 orderingIdx) { return; } + InitOrderingIdx_ = orderingIdx; auto state = Dfsm_->InitStateByOrderingIdx_[orderingIdx]; State_ = state.StateIdx; ShuffleHashFuncArgsCount_ = state.ShuffleHashFuncArgsCount; @@ -788,7 +805,13 @@ void TOrderingsStateMachine::TNFSM::PrefixClosure() { AddEdge(i, j, TNFSM::TEdge::EPSILON); } - if (Nodes_[i].Ordering.Type == TOrdering::ESorting) { + Y_ENSURE(Nodes_[i].Ordering.Directions.size() <= Nodes_[j].Ordering.Directions.size()); + bool areDirsCompatable = std::equal( + Nodes_[i].Ordering.Directions.begin(), + Nodes_[i].Ordering.Directions.end(), + Nodes_[j].Ordering.Directions.begin() + ); + if (Nodes_[i].Ordering.Type == TOrdering::ESorting && areDirsCompatable) { AddEdge(j, i, TNFSM::TEdge::EPSILON); } } diff --git a/yql/essentials/core/cbo/cbo_interesting_orderings.h b/yql/essentials/core/cbo/cbo_interesting_orderings.h index fd45db41a07..b4d94a90807 100644 --- a/yql/essentials/core/cbo/cbo_interesting_orderings.h +++ b/yql/essentials/core/cbo/cbo_interesting_orderings.h @@ -140,7 +140,7 @@ public: TTableAliasMap() = default; void AddMapping(const TString& table, const TString& alias); - void AddRename(const TString& from, const TString& to); + void AddRename(TString from, TString to); TBaseColumn GetBaseColumnByRename(const TString& renamedColumn); TBaseColumn GetBaseColumnByRename(const NDq::TJoinColumn& renamedColumn); TString ToString() const; @@ -155,6 +155,19 @@ private: THashMap<TString, TBaseColumn> BaseColumnByRename_; }; +struct TSorting { + TSorting( + std::vector<TJoinColumn> ordering, + std::vector<TOrdering::TItem::EDirection> directions + ) + : Ordering(std::move(ordering)) + , Directions(std::move(directions)) + {} + + std::vector<TJoinColumn> Ordering; + std::vector<TOrdering::TItem::EDirection> Directions; +}; + /* * This class contains internal representation of the columns (mapping [column -> int]), FDs and interesting orderings */ @@ -215,14 +228,12 @@ public: // deprecated section, use the section below instead of this public: std::size_t FindSorting( - const std::vector<TJoinColumn>& interestingOrdering, - const std::vector<TOrdering::TItem::EDirection>& directions, + const TSorting& sorting, TTableAliasMap* tableAliases = nullptr ); std::size_t AddSorting( - const std::vector<TJoinColumn>& interestingOrdering, - std::vector<TOrdering::TItem::EDirection> directions, + const TSorting& sortings, TTableAliasMap* tableAliases = nullptr ); @@ -238,6 +249,8 @@ public: public: TVector<TJoinColumn> GetInterestingOrderingsColumnNamesByIdx(std::size_t interestingOrderingIdx) const; + + TSorting GetInterestingSortingByOrderingIdx(std::size_t interestingOrderingIdx) const; TString ToString() const; public: @@ -328,6 +341,7 @@ public: TFDSet GetFDs(); bool IsSubsetOf(const TLogicalOrderings& logicalOrderings); i64 GetState() const; + i64 GetInitOrderingIdx() const; public: bool HasState(); @@ -342,7 +356,11 @@ public: TDFSM* Dfsm_ = nullptr; /* we can have different args in hash shuffle function, so shuffles can be incompitable in this case */ i64 ShuffleHashFuncArgsCount_ = -1; + i64 State_ = -1; + + /* Index of the state which was set in SetOrdering */ + i64 InitOrderingIdx_ = -1; TFDSet AppliedFDs_{}; }; diff --git a/yql/essentials/core/yql_statistics.cpp b/yql/essentials/core/yql_statistics.cpp index ad9fb2ab875..9bae477f8ae 100644 --- a/yql/essentials/core/yql_statistics.cpp +++ b/yql/essentials/core/yql_statistics.cpp @@ -93,7 +93,9 @@ std::ostream& NYql::operator<<(std::ostream& os, const TOptimizerStatistics& s) os << "[" << tmp << "]"; } os << ", LogicalOrderings (Shufflings) state: " << s.LogicalOrderings.GetState(); + os << ", Init Shuffling: " << s.LogicalOrderings.GetInitOrderingIdx(); os << ", SortingOrderings (Sortings) state: " << s.SortingOrderings.GetState(); + os << ", Init Sorting: " << s.SortingOrderings.GetInitOrderingIdx(); if (s.ReversedSortingOrderings.HasState()) { os << ", ReversedSortingOrderings (Sortings) state: " << s.ReversedSortingOrderings.GetState(); |
