summaryrefslogtreecommitdiffstats
path: root/yql/essentials
diff options
context:
space:
mode:
authorpudge1000-7 <[email protected]>2025-06-19 14:36:01 +0300
committerpudge1000-7 <[email protected]>2025-06-19 15:15:32 +0300
commit8d42f4f2ab6e419a05a55ba8d71dda34c446c22b (patch)
tree6547a074647776d64218ed325500d890307869df /yql/essentials
parentf18d41a0434cfa7ef77bd5d43f3c8d058fac0698 (diff)
[RBO] Improved Sortings FSM
commit_hash:3077f1e2a744069524dc6723bca2fbdbb92ae2ba
Diffstat (limited to 'yql/essentials')
-rw-r--r--yql/essentials/core/cbo/cbo_interesting_orderings.cpp51
-rw-r--r--yql/essentials/core/cbo/cbo_interesting_orderings.h28
-rw-r--r--yql/essentials/core/yql_statistics.cpp2
3 files changed, 62 insertions, 19 deletions
diff --git a/yql/essentials/core/cbo/cbo_interesting_orderings.cpp b/yql/essentials/core/cbo/cbo_interesting_orderings.cpp
index e2549c1bb5d..7af0c35bb90 100644
--- a/yql/essentials/core/cbo/cbo_interesting_orderings.cpp
+++ b/yql/essentials/core/cbo/cbo_interesting_orderings.cpp
@@ -98,21 +98,20 @@ void TTableAliasMap::AddMapping(const TString& table, const TString& alias) {
TableByAlias_[alias] = table;
}
-void TTableAliasMap::AddRename(const TString& from, const TString& to) {
+void TTableAliasMap::AddRename(TString from, TString to) {
if (auto pointIdx = from.find('.'); pointIdx != TString::npos) {
TString alias = from.substr(0, pointIdx);
TString baseTable = GetBaseTableByAlias(alias);
TString columnName = from.substr(pointIdx + 1);
- if (auto it = BaseColumnByRename_.find(columnName); it != BaseColumnByRename_.end()) {
- auto baseColumn = it->second;
- BaseColumnByRename_[to] = BaseColumnByRename_[from] = it->second;
- return;
+ if (pointIdx == 0) {
+ from = from.substr(1);
+ }
+ if (auto pointIdx = to.find('.'); pointIdx == 0) {
+ to = to.substr(1);
}
- auto fromColumn = TBaseColumn(alias, columnName);
auto baseColumn = TBaseColumn(baseTable, columnName);
-
BaseColumnByRename_[to] = BaseColumnByRename_[from] = baseColumn;
return;
}
@@ -174,6 +173,9 @@ void TTableAliasMap::Merge(const TTableAliasMap& other) {
TableByAlias_[alias] = table;
}
for (const auto& [from, to] : other.BaseColumnByRename_) {
+ if (BaseColumnByRename_.contains(from)) {
+ continue;
+ }
BaseColumnByRename_[from] = TBaseColumn(to.Relation, to.Column);
}
}
@@ -318,20 +320,18 @@ i64 TFDStorage::FindInterestingOrderingIdx(
}
std::size_t TFDStorage::FindSorting(
- const std::vector<TJoinColumn>& interestingOrdering,
- const std::vector<TOrdering::TItem::EDirection>& directions,
+ const TSorting& sorting,
TTableAliasMap* tableAliases
) {
- const auto& [_, orderingIdx] = ConvertColumnsAndFindExistingOrdering(interestingOrdering, directions, TOrdering::ESorting, false, tableAliases);
+ const auto& [_, orderingIdx] = ConvertColumnsAndFindExistingOrdering(sorting.Ordering, sorting.Directions, TOrdering::ESorting, false, tableAliases);
return orderingIdx;
}
std::size_t TFDStorage::AddSorting(
- const std::vector<TJoinColumn>& interestingOrdering,
- std::vector<TOrdering::TItem::EDirection> directions,
+ const TSorting& sorting,
TTableAliasMap* tableAliases
) {
- return AddInterestingOrdering(interestingOrdering, TOrdering::ESorting, directions, tableAliases);
+ return AddInterestingOrdering(sorting.Ordering, TOrdering::ESorting, sorting.Directions, tableAliases);
}
std::size_t TFDStorage::AddShuffling(
@@ -395,6 +395,18 @@ TVector<TJoinColumn> TFDStorage::GetInterestingOrderingsColumnNamesByIdx(std::si
return columns;
}
+TSorting TFDStorage::GetInterestingSortingByOrderingIdx(std::size_t interestingOrderingIdx) const {
+ Y_ENSURE(interestingOrderingIdx < InterestingOrderings.size());
+
+ TVector<TJoinColumn> columns;
+ columns.reserve(InterestingOrderings[interestingOrderingIdx].Items.size());
+ for (std::size_t columnIdx: InterestingOrderings[interestingOrderingIdx].Items) {
+ columns.push_back(ColumnByIdx_[columnIdx]);
+ }
+
+ return {columns, InterestingOrderings[interestingOrderingIdx].Directions};
+}
+
TString TFDStorage::ToString() const {
auto toVectorString = [](auto seq) {
TVector<TString> strVector;
@@ -532,6 +544,10 @@ void TOrderingsStateMachine::TLogicalOrderings::RemoveState() {
*this = TLogicalOrderings(Dfsm_);
}
+i64 TOrderingsStateMachine::TLogicalOrderings::GetInitOrderingIdx() const {
+ return InitOrderingIdx_;
+}
+
void TOrderingsStateMachine::TLogicalOrderings::SetOrdering(i64 orderingIdx) {
if (!IsInitialized() || orderingIdx < 0 || orderingIdx >= static_cast<i64>(Dfsm_->InitStateByOrderingIdx_.size())) {
RemoveState();
@@ -542,6 +558,7 @@ void TOrderingsStateMachine::TLogicalOrderings::SetOrdering(i64 orderingIdx) {
return;
}
+ InitOrderingIdx_ = orderingIdx;
auto state = Dfsm_->InitStateByOrderingIdx_[orderingIdx];
State_ = state.StateIdx;
ShuffleHashFuncArgsCount_ = state.ShuffleHashFuncArgsCount;
@@ -788,7 +805,13 @@ void TOrderingsStateMachine::TNFSM::PrefixClosure() {
AddEdge(i, j, TNFSM::TEdge::EPSILON);
}
- if (Nodes_[i].Ordering.Type == TOrdering::ESorting) {
+ Y_ENSURE(Nodes_[i].Ordering.Directions.size() <= Nodes_[j].Ordering.Directions.size());
+ bool areDirsCompatable = std::equal(
+ Nodes_[i].Ordering.Directions.begin(),
+ Nodes_[i].Ordering.Directions.end(),
+ Nodes_[j].Ordering.Directions.begin()
+ );
+ if (Nodes_[i].Ordering.Type == TOrdering::ESorting && areDirsCompatable) {
AddEdge(j, i, TNFSM::TEdge::EPSILON);
}
}
diff --git a/yql/essentials/core/cbo/cbo_interesting_orderings.h b/yql/essentials/core/cbo/cbo_interesting_orderings.h
index fd45db41a07..b4d94a90807 100644
--- a/yql/essentials/core/cbo/cbo_interesting_orderings.h
+++ b/yql/essentials/core/cbo/cbo_interesting_orderings.h
@@ -140,7 +140,7 @@ public:
TTableAliasMap() = default;
void AddMapping(const TString& table, const TString& alias);
- void AddRename(const TString& from, const TString& to);
+ void AddRename(TString from, TString to);
TBaseColumn GetBaseColumnByRename(const TString& renamedColumn);
TBaseColumn GetBaseColumnByRename(const NDq::TJoinColumn& renamedColumn);
TString ToString() const;
@@ -155,6 +155,19 @@ private:
THashMap<TString, TBaseColumn> BaseColumnByRename_;
};
+struct TSorting {
+ TSorting(
+ std::vector<TJoinColumn> ordering,
+ std::vector<TOrdering::TItem::EDirection> directions
+ )
+ : Ordering(std::move(ordering))
+ , Directions(std::move(directions))
+ {}
+
+ std::vector<TJoinColumn> Ordering;
+ std::vector<TOrdering::TItem::EDirection> Directions;
+};
+
/*
* This class contains internal representation of the columns (mapping [column -> int]), FDs and interesting orderings
*/
@@ -215,14 +228,12 @@ public: // deprecated section, use the section below instead of this
public:
std::size_t FindSorting(
- const std::vector<TJoinColumn>& interestingOrdering,
- const std::vector<TOrdering::TItem::EDirection>& directions,
+ const TSorting& sorting,
TTableAliasMap* tableAliases = nullptr
);
std::size_t AddSorting(
- const std::vector<TJoinColumn>& interestingOrdering,
- std::vector<TOrdering::TItem::EDirection> directions,
+ const TSorting& sortings,
TTableAliasMap* tableAliases = nullptr
);
@@ -238,6 +249,8 @@ public:
public:
TVector<TJoinColumn> GetInterestingOrderingsColumnNamesByIdx(std::size_t interestingOrderingIdx) const;
+
+ TSorting GetInterestingSortingByOrderingIdx(std::size_t interestingOrderingIdx) const;
TString ToString() const;
public:
@@ -328,6 +341,7 @@ public:
TFDSet GetFDs();
bool IsSubsetOf(const TLogicalOrderings& logicalOrderings);
i64 GetState() const;
+ i64 GetInitOrderingIdx() const;
public:
bool HasState();
@@ -342,7 +356,11 @@ public:
TDFSM* Dfsm_ = nullptr;
/* we can have different args in hash shuffle function, so shuffles can be incompitable in this case */
i64 ShuffleHashFuncArgsCount_ = -1;
+
i64 State_ = -1;
+
+ /* Index of the state which was set in SetOrdering */
+ i64 InitOrderingIdx_ = -1;
TFDSet AppliedFDs_{};
};
diff --git a/yql/essentials/core/yql_statistics.cpp b/yql/essentials/core/yql_statistics.cpp
index ad9fb2ab875..9bae477f8ae 100644
--- a/yql/essentials/core/yql_statistics.cpp
+++ b/yql/essentials/core/yql_statistics.cpp
@@ -93,7 +93,9 @@ std::ostream& NYql::operator<<(std::ostream& os, const TOptimizerStatistics& s)
os << "[" << tmp << "]";
}
os << ", LogicalOrderings (Shufflings) state: " << s.LogicalOrderings.GetState();
+ os << ", Init Shuffling: " << s.LogicalOrderings.GetInitOrderingIdx();
os << ", SortingOrderings (Sortings) state: " << s.SortingOrderings.GetState();
+ os << ", Init Sorting: " << s.SortingOrderings.GetInitOrderingIdx();
if (s.ReversedSortingOrderings.HasState()) {
os << ", ReversedSortingOrderings (Sortings) state: " << s.ReversedSortingOrderings.GetState();