diff options
| author | pudge1000-7 <[email protected]> | 2025-06-19 14:36:01 +0300 | 
|---|---|---|
| committer | pudge1000-7 <[email protected]> | 2025-06-19 15:15:32 +0300 | 
| commit | 8d42f4f2ab6e419a05a55ba8d71dda34c446c22b (patch) | |
| tree | 6547a074647776d64218ed325500d890307869df | |
| parent | f18d41a0434cfa7ef77bd5d43f3c8d058fac0698 (diff) | |
[RBO] Improved Sortings FSM
commit_hash:3077f1e2a744069524dc6723bca2fbdbb92ae2ba
| -rw-r--r-- | yql/essentials/core/cbo/cbo_interesting_orderings.cpp | 51 | ||||
| -rw-r--r-- | yql/essentials/core/cbo/cbo_interesting_orderings.h | 28 | ||||
| -rw-r--r-- | yql/essentials/core/yql_statistics.cpp | 2 | 
3 files changed, 62 insertions, 19 deletions
diff --git a/yql/essentials/core/cbo/cbo_interesting_orderings.cpp b/yql/essentials/core/cbo/cbo_interesting_orderings.cpp index e2549c1bb5d..7af0c35bb90 100644 --- a/yql/essentials/core/cbo/cbo_interesting_orderings.cpp +++ b/yql/essentials/core/cbo/cbo_interesting_orderings.cpp @@ -98,21 +98,20 @@ void TTableAliasMap::AddMapping(const TString& table, const TString& alias) {      TableByAlias_[alias] = table;  } -void TTableAliasMap::AddRename(const TString& from, const TString& to) { +void TTableAliasMap::AddRename(TString from, TString to) {      if (auto pointIdx = from.find('.'); pointIdx != TString::npos) {          TString alias = from.substr(0, pointIdx);          TString baseTable = GetBaseTableByAlias(alias);          TString columnName = from.substr(pointIdx + 1); -        if (auto it = BaseColumnByRename_.find(columnName); it != BaseColumnByRename_.end()) { -            auto baseColumn = it->second; -            BaseColumnByRename_[to] = BaseColumnByRename_[from] = it->second; -            return; +        if (pointIdx == 0) { +            from = from.substr(1); +        } +        if (auto pointIdx = to.find('.'); pointIdx == 0) { +            to = to.substr(1);          } -        auto fromColumn = TBaseColumn(alias, columnName);          auto baseColumn = TBaseColumn(baseTable, columnName); -          BaseColumnByRename_[to] = BaseColumnByRename_[from] = baseColumn;          return;      } @@ -174,6 +173,9 @@ void TTableAliasMap::Merge(const TTableAliasMap& other) {          TableByAlias_[alias] = table;      }      for (const auto& [from, to] : other.BaseColumnByRename_) { +        if (BaseColumnByRename_.contains(from)) { +            continue; +        }          BaseColumnByRename_[from] = TBaseColumn(to.Relation, to.Column);      }  } @@ -318,20 +320,18 @@ i64 TFDStorage::FindInterestingOrderingIdx(  }  std::size_t TFDStorage::FindSorting( -    const std::vector<TJoinColumn>& interestingOrdering, -    const std::vector<TOrdering::TItem::EDirection>& directions, +    const TSorting& sorting,      TTableAliasMap* tableAliases  ) { -    const auto& [_, orderingIdx] = ConvertColumnsAndFindExistingOrdering(interestingOrdering, directions, TOrdering::ESorting, false, tableAliases); +    const auto& [_, orderingIdx] = ConvertColumnsAndFindExistingOrdering(sorting.Ordering, sorting.Directions, TOrdering::ESorting, false, tableAliases);      return orderingIdx;  }  std::size_t TFDStorage::AddSorting( -    const std::vector<TJoinColumn>& interestingOrdering, -    std::vector<TOrdering::TItem::EDirection> directions, +    const TSorting& sorting,      TTableAliasMap* tableAliases  ) { -    return AddInterestingOrdering(interestingOrdering, TOrdering::ESorting, directions, tableAliases); +    return AddInterestingOrdering(sorting.Ordering, TOrdering::ESorting, sorting.Directions, tableAliases);  }  std::size_t TFDStorage::AddShuffling( @@ -395,6 +395,18 @@ TVector<TJoinColumn> TFDStorage::GetInterestingOrderingsColumnNamesByIdx(std::si      return columns;  } +TSorting TFDStorage::GetInterestingSortingByOrderingIdx(std::size_t interestingOrderingIdx) const { +    Y_ENSURE(interestingOrderingIdx < InterestingOrderings.size()); + +    TVector<TJoinColumn> columns; +    columns.reserve(InterestingOrderings[interestingOrderingIdx].Items.size()); +    for (std::size_t columnIdx: InterestingOrderings[interestingOrderingIdx].Items) { +        columns.push_back(ColumnByIdx_[columnIdx]); +    } + +    return {columns, InterestingOrderings[interestingOrderingIdx].Directions}; +} +  TString TFDStorage::ToString() const {      auto toVectorString = [](auto seq) {          TVector<TString> strVector; @@ -532,6 +544,10 @@ void TOrderingsStateMachine::TLogicalOrderings::RemoveState() {      *this = TLogicalOrderings(Dfsm_);  } +i64 TOrderingsStateMachine::TLogicalOrderings::GetInitOrderingIdx() const { +    return InitOrderingIdx_; +} +  void TOrderingsStateMachine::TLogicalOrderings::SetOrdering(i64 orderingIdx) {      if (!IsInitialized() || orderingIdx < 0 || orderingIdx >= static_cast<i64>(Dfsm_->InitStateByOrderingIdx_.size())) {          RemoveState(); @@ -542,6 +558,7 @@ void TOrderingsStateMachine::TLogicalOrderings::SetOrdering(i64 orderingIdx) {          return;      } +    InitOrderingIdx_ = orderingIdx;      auto state = Dfsm_->InitStateByOrderingIdx_[orderingIdx];      State_ = state.StateIdx;      ShuffleHashFuncArgsCount_ = state.ShuffleHashFuncArgsCount; @@ -788,7 +805,13 @@ void TOrderingsStateMachine::TNFSM::PrefixClosure() {                      AddEdge(i, j, TNFSM::TEdge::EPSILON);                  } -                if (Nodes_[i].Ordering.Type == TOrdering::ESorting) { +                Y_ENSURE(Nodes_[i].Ordering.Directions.size() <= Nodes_[j].Ordering.Directions.size()); +                bool areDirsCompatable = std::equal( +                    Nodes_[i].Ordering.Directions.begin(), +                    Nodes_[i].Ordering.Directions.end(), +                    Nodes_[j].Ordering.Directions.begin() +                ); +                if (Nodes_[i].Ordering.Type == TOrdering::ESorting && areDirsCompatable) {                      AddEdge(j, i, TNFSM::TEdge::EPSILON);                  }              } diff --git a/yql/essentials/core/cbo/cbo_interesting_orderings.h b/yql/essentials/core/cbo/cbo_interesting_orderings.h index fd45db41a07..b4d94a90807 100644 --- a/yql/essentials/core/cbo/cbo_interesting_orderings.h +++ b/yql/essentials/core/cbo/cbo_interesting_orderings.h @@ -140,7 +140,7 @@ public:      TTableAliasMap() = default;      void AddMapping(const TString& table, const TString& alias); -    void AddRename(const TString& from, const TString& to); +    void AddRename(TString from, TString to);      TBaseColumn GetBaseColumnByRename(const TString& renamedColumn);      TBaseColumn GetBaseColumnByRename(const NDq::TJoinColumn& renamedColumn);      TString ToString() const; @@ -155,6 +155,19 @@ private:      THashMap<TString, TBaseColumn> BaseColumnByRename_;  }; +struct TSorting { +    TSorting( +        std::vector<TJoinColumn> ordering, +        std::vector<TOrdering::TItem::EDirection> directions +    ) +        : Ordering(std::move(ordering)) +        , Directions(std::move(directions)) +    {} + +    std::vector<TJoinColumn> Ordering; +    std::vector<TOrdering::TItem::EDirection> Directions; +}; +  /*   * This class contains internal representation of the columns (mapping [column -> int]), FDs and interesting orderings   */ @@ -215,14 +228,12 @@ public: // deprecated section, use the section below instead of this  public:      std::size_t FindSorting( -        const std::vector<TJoinColumn>& interestingOrdering, -        const std::vector<TOrdering::TItem::EDirection>& directions, +        const TSorting& sorting,          TTableAliasMap* tableAliases = nullptr      );      std::size_t AddSorting( -        const std::vector<TJoinColumn>& interestingOrdering, -        std::vector<TOrdering::TItem::EDirection> directions, +        const TSorting& sortings,          TTableAliasMap* tableAliases = nullptr      ); @@ -238,6 +249,8 @@ public:  public:      TVector<TJoinColumn> GetInterestingOrderingsColumnNamesByIdx(std::size_t interestingOrderingIdx) const; + +    TSorting GetInterestingSortingByOrderingIdx(std::size_t interestingOrderingIdx) const;      TString ToString() const;  public: @@ -328,6 +341,7 @@ public:          TFDSet GetFDs();          bool IsSubsetOf(const TLogicalOrderings& logicalOrderings);          i64 GetState() const; +        i64 GetInitOrderingIdx() const;      public:          bool HasState(); @@ -342,7 +356,11 @@ public:          TDFSM* Dfsm_ = nullptr;          /* we can have different args in hash shuffle function, so shuffles can be incompitable in this case */          i64 ShuffleHashFuncArgsCount_ = -1; +          i64 State_ = -1; + +        /* Index of the state which was set in SetOrdering */ +        i64 InitOrderingIdx_ = -1;          TFDSet AppliedFDs_{};      }; diff --git a/yql/essentials/core/yql_statistics.cpp b/yql/essentials/core/yql_statistics.cpp index ad9fb2ab875..9bae477f8ae 100644 --- a/yql/essentials/core/yql_statistics.cpp +++ b/yql/essentials/core/yql_statistics.cpp @@ -93,7 +93,9 @@ std::ostream& NYql::operator<<(std::ostream& os, const TOptimizerStatistics& s)          os << "[" << tmp << "]";      }      os << ", LogicalOrderings (Shufflings) state: " << s.LogicalOrderings.GetState(); +    os << ", Init Shuffling: " << s.LogicalOrderings.GetInitOrderingIdx();      os << ", SortingOrderings (Sortings) state: "   << s.SortingOrderings.GetState(); +    os << ", Init Sorting: " << s.SortingOrderings.GetInitOrderingIdx();      if (s.ReversedSortingOrderings.HasState()) {          os << ", ReversedSortingOrderings (Sortings) state: "   << s.ReversedSortingOrderings.GetState();  | 
