summaryrefslogtreecommitdiffstats
path: root/yql/essentials
diff options
context:
space:
mode:
authorpudge1000-7 <[email protected]>2025-06-09 10:56:06 +0300
committerpudge1000-7 <[email protected]>2025-06-09 11:15:08 +0300
commita17ed1a95cd8e529af068d238d03d1faece71624 (patch)
treeab3d3606096454879f454c277ba45bc19b8b1cc2 /yql/essentials
parentc2e48d700270ba7a6923c14f56a2864b7884f19e (diff)
[RBO] Improved FSM with sortings (added directions)
commit_hash:9231a4622fddb446383422832d990f02e9380055
Diffstat (limited to 'yql/essentials')
-rw-r--r--yql/essentials/core/cbo/cbo_interesting_orderings.cpp219
-rw-r--r--yql/essentials/core/cbo/cbo_interesting_orderings.h90
-rw-r--r--yql/essentials/core/cbo/cbo_optimizer_new.h6
-rw-r--r--yql/essentials/core/yql_cost_function.h12
-rw-r--r--yql/essentials/core/yql_statistics.cpp13
-rw-r--r--yql/essentials/core/yql_statistics.h22
-rw-r--r--yql/essentials/core/yql_type_annotation.h1
7 files changed, 310 insertions, 53 deletions
diff --git a/yql/essentials/core/cbo/cbo_interesting_orderings.cpp b/yql/essentials/core/cbo/cbo_interesting_orderings.cpp
index b34fe05628f..cd0f5dbe9d0 100644
--- a/yql/essentials/core/cbo/cbo_interesting_orderings.cpp
+++ b/yql/essentials/core/cbo/cbo_interesting_orderings.cpp
@@ -1,6 +1,7 @@
#include "cbo_interesting_orderings.h"
#include <library/cpp/disjoint_sets/disjoint_sets.h>
+#include <library/cpp/iterator/zip.h>
#include <yql/essentials/utils/log/log.h>
@@ -15,11 +16,39 @@
namespace NYql::NDq {
bool TOrdering::operator==(const TOrdering& other) const {
- return std::tie(this->Type, this->Items) == std::tie(other.Type, other.Items);
+ return
+ std::tie(this->Type, this->Items, this->Directions) ==
+ std::tie(other.Type, other.Items, other.Directions);
}
TString TOrdering::ToString() const {
- return "{" + JoinSeq(", ", Items) + "}";
+ TVector<TString> itemStrings;
+ itemStrings.reserve(Items.size());
+
+ for (size_t i = 0; i < Items.size(); ++i) {
+ TString itemStr = ::ToString(Items[i]);
+
+ if (i < Directions.size()) {
+ switch (Directions[i]) {
+ case TItem::EDirection::EAscending:
+ itemStr += "^";
+ break;
+ case TItem::EDirection::EDescending:
+ itemStr += "v";
+ break;
+ default:
+ break;
+ }
+ }
+
+ itemStrings.push_back(std::move(itemStr));
+ }
+
+ return "{" + JoinSeq(", ", itemStrings) + "}";
+}
+
+bool TOrdering::HasItem(std::size_t item) const {
+ return std::find(Items.begin(), Items.end(), item) != Items.end();
}
bool TFunctionalDependency::IsEquivalence() const {
@@ -27,7 +56,7 @@ bool TFunctionalDependency::IsEquivalence() const {
}
bool TFunctionalDependency::IsImplication() const {
- return Type == EType::EImplication;
+ return Type == EType::EImplication && !IsConstant();
}
bool TFunctionalDependency::IsConstant() const {
@@ -284,21 +313,55 @@ i64 TFDStorage::FindInterestingOrderingIdx(
TOrdering::EType type,
TTableAliasMap* tableAliases
) {
- const auto& [_, orderingIdx] = ConvertColumnsAndFindExistingOrdering(interestingOrdering, type, false, tableAliases);
+ const auto& [_, orderingIdx] = ConvertColumnsAndFindExistingOrdering(interestingOrdering, {}, type, false, tableAliases);
+ return orderingIdx;
+}
+
+std::size_t TFDStorage::FindSorting(
+ const std::vector<TJoinColumn>& interestingOrdering,
+ const std::vector<TOrdering::TItem::EDirection>& directions,
+ TTableAliasMap* tableAliases
+) {
+ const auto& [_, orderingIdx] = ConvertColumnsAndFindExistingOrdering(interestingOrdering, directions, TOrdering::ESorting, false, tableAliases);
return orderingIdx;
}
+std::size_t TFDStorage::AddSorting(
+ const std::vector<TJoinColumn>& interestingOrdering,
+ std::vector<TOrdering::TItem::EDirection> directions,
+ TTableAliasMap* tableAliases
+) {
+ return AddInterestingOrdering(interestingOrdering, TOrdering::ESorting, directions, tableAliases);
+}
+
+std::size_t TFDStorage::AddShuffling(
+ const std::vector<TJoinColumn>& interestingOrdering,
+ TTableAliasMap* tableAliases
+) {
+ return AddInterestingOrdering(interestingOrdering, TOrdering::EShuffle, std::vector<TOrdering::TItem::EDirection>{}, tableAliases);
+}
+
std::size_t TFDStorage::AddInterestingOrdering(
- const TVector<TString>& interestingOrdering,
+ const std::vector<TJoinColumn>& interestingOrdering,
TOrdering::EType type,
+ const std::vector<TOrdering::TItem::EDirection>& directions,
TTableAliasMap* tableAliases
) {
- std::vector<TJoinColumn> columns;
- columns.reserve(interestingOrdering.size());
- for (const auto& column: interestingOrdering) {
- columns.push_back(TJoinColumn("", column));
+ if (interestingOrdering.empty()) {
+ return std::numeric_limits<std::size_t>::max();
}
- return AddInterestingOrdering(columns, type, tableAliases);
+
+ auto [items, foundIdx] = ConvertColumnsAndFindExistingOrdering(interestingOrdering, directions, type, true, tableAliases);
+ if (items.Items.empty()) {
+ return std::numeric_limits<std::size_t>::max();
+ }
+
+ if (foundIdx >= 0) {
+ return static_cast<std::size_t>(foundIdx);
+ }
+
+ InterestingOrderings.push_back(std::move(items));
+ return InterestingOrderings.size() - 1;
}
std::size_t TFDStorage::AddInterestingOrdering(
@@ -310,18 +373,18 @@ std::size_t TFDStorage::AddInterestingOrdering(
return std::numeric_limits<std::size_t>::max();
}
- auto [items, foundIdx] = ConvertColumnsAndFindExistingOrdering(interestingOrdering, type, true, tableAliases);
+ auto [items, foundIdx] = ConvertColumnsAndFindExistingOrdering(interestingOrdering, {}, type, true, tableAliases);
if (foundIdx >= 0) {
return static_cast<std::size_t>(foundIdx);
}
- InterestingOrderings.emplace_back(std::move(items), type);
+ InterestingOrderings.emplace_back(std::move(items));
return InterestingOrderings.size() - 1;
}
TVector<TJoinColumn> TFDStorage::GetInterestingOrderingsColumnNamesByIdx(std::size_t interestingOrderingIdx) const {
- Y_ASSERT(interestingOrderingIdx < InterestingOrderings.size());
+ Y_ENSURE(interestingOrderingIdx < InterestingOrderings.size());
TVector<TJoinColumn> columns;
columns.reserve(InterestingOrderings[interestingOrderingIdx].Items.size());
@@ -358,24 +421,38 @@ TString TFDStorage::ToString() const {
return ss;
}
-std::pair<std::vector<std::size_t>, i64> TFDStorage::ConvertColumnsAndFindExistingOrdering(
+std::pair<TOrdering, i64> TFDStorage::ConvertColumnsAndFindExistingOrdering(
const std::vector<TJoinColumn>& interestingOrdering,
+ const std::vector<TOrdering::TItem::EDirection>& directions,
TOrdering::EType type,
bool createIfNotExists,
TTableAliasMap* tableAliases
) {
+ if(!(
+ directions.empty() && type == TOrdering::EShuffle ||
+ directions.size() == interestingOrdering.size() && type == TOrdering::ESorting
+ )) {
+ YQL_CLOG(TRACE, CoreDq)
+ << "Ordering and directions sizes mismatch : " << directions.size() << " vs " << interestingOrdering.size();
+ return {TOrdering(), -1};
+ }
+
std::vector<std::size_t> items = ConvertColumnIntoIndexes(interestingOrdering, createIfNotExists, tableAliases);
if (items.empty()) {
- return {{}, -1};
+ return {TOrdering(), -1};
}
for (std::size_t i = 0; i < InterestingOrderings.size(); ++i) {
- if (items == InterestingOrderings[i].Items && type == InterestingOrderings[i].Type) {
- return {items, static_cast<i64>(i)};
+ if (
+ items == InterestingOrderings[i].Items &&
+ type == InterestingOrderings[i].Type &&
+ directions == InterestingOrderings[i].Directions
+ ) {
+ return {InterestingOrderings[i], static_cast<i64>(i)};
}
}
- return {items, -1};
+ return {TOrdering(std::move(items), directions, type), -1};
}
std::vector<std::size_t> TFDStorage::ConvertColumnIntoIndexes(
@@ -456,7 +533,7 @@ void TOrderingsStateMachine::TLogicalOrderings::RemoveState() {
}
void TOrderingsStateMachine::TLogicalOrderings::SetOrdering(i64 orderingIdx) {
- if (orderingIdx < 0 || orderingIdx >= static_cast<i64>(DFSM->InitStateByOrderingIdx.size())) {
+ if (!IsInitialized() || orderingIdx < 0 || orderingIdx >= static_cast<i64>(DFSM->InitStateByOrderingIdx.size())) {
RemoveState();
return;
}
@@ -556,12 +633,59 @@ TString TOrderingsStateMachine::ToString() const {
return ss;
}
+void TOrderingsStateMachine::CollectItemInfo(
+ const std::vector<TFunctionalDependency>& fds,
+ const std::vector<TOrdering>& interestingOrderings
+) {
+ std::size_t maxItem = 0;
+
+ for (const auto& ordering: interestingOrderings) {
+ if (ordering.Items.empty()) {
+ continue;
+ }
+
+ std::size_t orderingMaxItem = *std::max_element(ordering.Items.begin(), ordering.Items.end());
+ maxItem = std::max(maxItem, orderingMaxItem);
+ }
+
+ for (const auto& fd: fds) {
+ std::size_t maxAntecedentItems = 0;
+ if (!fd.AntecedentItems.empty()) {
+ maxAntecedentItems = *std::max_element(fd.AntecedentItems.begin(), fd.AntecedentItems.end());
+ }
+
+ maxItem = std::max({maxItem, fd.ConsequentItem, maxAntecedentItems});
+ }
+
+ ItemInfo.resize(maxItem + 1);
+
+ for (const auto& ordering: interestingOrderings) {
+ Y_ENSURE(ordering.Items.size() == ordering.Directions.size() || ordering.Directions.empty());
+
+ for (const auto& [item, direction]: Zip(ordering.Items, ordering.Directions)) {
+ switch (direction) {
+ case TOrdering::TItem::EDirection::EAscending: {
+ ItemInfo[item].UsedInAscOrdering = true;
+ break;
+ }
+ case TOrdering::TItem::EDirection::EDescending: {
+ ItemInfo[item].UsedInDescOrdering = true;
+ break;
+ }
+ default: {}
+ }
+ }
+ }
+
+}
+
void TOrderingsStateMachine::Build(
const std::vector<TFunctionalDependency>& fds,
const std::vector<TOrdering>& interestingOrderings
) {
+ CollectItemInfo(fds, interestingOrderings);
std::vector<TFunctionalDependency> processedFDs = PruneFDs(fds, interestingOrderings);
- NFSM.Build(processedFDs, interestingOrderings);
+ NFSM.Build(processedFDs, interestingOrderings, ItemInfo);
DFSM = MakeSimpleShared<TDFSM>();
DFSM->Build(NFSM, processedFDs, interestingOrderings);
Built = true;
@@ -605,13 +729,14 @@ TString TOrderingsStateMachine::TNFSM::ToString() const {
void TOrderingsStateMachine::TNFSM::Build(
const std::vector<TFunctionalDependency>& fds,
- const std::vector<TOrdering>& interesting
+ const std::vector<TOrdering>& interesting,
+ const std::vector<TItemInfo>& itemInfo
) {
for (std::size_t i = 0; i < interesting.size(); ++i) {
AddNode(interesting[i], TNFSM::TNode::EInteresting, i);
}
- ApplyFDs(fds, interesting);
+ ApplyFDs(fds, interesting, itemInfo);
PrefixClosure();
for (std::size_t idx = 0; idx < Edges.size(); ++idx) {
@@ -673,7 +798,8 @@ void TOrderingsStateMachine::TNFSM::PrefixClosure() {
void TOrderingsStateMachine::TNFSM::ApplyFDs(
const std::vector<TFunctionalDependency>& fds,
- const std::vector<TOrdering>& interestingOrderings
+ const std::vector<TOrdering>& interestingOrderings,
+ const std::vector<TItemInfo>& itemInfo
) {
std::size_t maxInterestingOrderingSize = 0;
if (!interestingOrderings.empty()) {
@@ -685,12 +811,15 @@ void TOrderingsStateMachine::TNFSM::ApplyFDs(
)->Items.size();
}
-
for (std::size_t nodeIdx = 0; nodeIdx < Nodes.size() && Nodes.size() < EMaxNFSMStates; ++nodeIdx) {
for (std::size_t fdIdx = 0; fdIdx < fds.size() && Nodes.size() < EMaxNFSMStates; ++fdIdx) {
+ if (Nodes[nodeIdx].Ordering.Items.empty()) {
+ continue;
+ }
+
TFunctionalDependency fd = fds[fdIdx];
- auto applyFD = [this, nodeIdx, maxInterestingOrderingSize](const TFunctionalDependency& fd, std::size_t fdIdx) {
+ auto applyFD = [this, &itemInfo, nodeIdx, maxInterestingOrderingSize](const TFunctionalDependency& fd, std::size_t fdIdx) {
if (Nodes.size() >= EMaxNFSMStates) {
return;
}
@@ -702,9 +831,14 @@ void TOrderingsStateMachine::TNFSM::ApplyFDs(
return;
}
bool isNewOrderingPrefixOfOld = (it == (newOrdering.end() - 1));
- newOrdering.erase(it);
- std::size_t dstIdx = AddNode(TOrdering(std::move(newOrdering), Nodes[nodeIdx].Ordering.Type), TNode::EArtificial);
+ auto newDirections = Nodes[nodeIdx].Ordering.Directions;
+ if (!newDirections.empty()) {
+ newDirections.erase(newDirections.begin() + std::distance(newOrdering.begin(), it));
+ newOrdering.erase(it);
+ }
+
+ std::size_t dstIdx = AddNode(TOrdering(std::move(newOrdering), std::move(newDirections), Nodes[nodeIdx].Ordering.Type), TNode::EArtificial);
if (!isNewOrderingPrefixOfOld || Nodes[nodeIdx].Ordering.Type == TOrdering::EShuffle) {
AddEdge(nodeIdx, dstIdx, fdIdx);
@@ -729,11 +863,11 @@ void TOrderingsStateMachine::TNFSM::ApplyFDs(
auto it = std::find(Nodes[nodeIdx].Ordering.Items.begin(), Nodes[nodeIdx].Ordering.Items.end(), fd.ConsequentItem);
it != Nodes[nodeIdx].Ordering.Items.end()
) {
- if (fd.IsEquivalence()) { // (a, b) -> (b, a)
+ if (fd.IsEquivalence()) { // swap (a, b) -> (b, a)
std::size_t consequentItemIdx = std::distance(Nodes[nodeIdx].Ordering.Items.begin(), it);
auto newOrdering = Nodes[nodeIdx].Ordering.Items;
std::swap(newOrdering[antecedentItemIdx], newOrdering[consequentItemIdx]);
- std::size_t dstIdx = AddNode(TOrdering(std::move(newOrdering), Nodes[nodeIdx].Ordering.Type), TNode::EArtificial);
+ std::size_t dstIdx = AddNode(TOrdering(std::move(newOrdering), Nodes[nodeIdx].Ordering.Directions, Nodes[nodeIdx].Ordering.Type), TNode::EArtificial);
AddEdge(nodeIdx, dstIdx, fdIdx);
AddEdge(dstIdx, nodeIdx, fdIdx);
}
@@ -747,7 +881,7 @@ void TOrderingsStateMachine::TNFSM::ApplyFDs(
std::vector<std::size_t> newOrdering = Nodes[nodeIdx].Ordering.Items;
newOrdering[antecedentItemIdx] = fd.ConsequentItem;
- std::size_t dstIdx = AddNode(TOrdering(std::move(newOrdering), Nodes[nodeIdx].Ordering.Type), TNode::EArtificial);
+ std::size_t dstIdx = AddNode(TOrdering(std::move(newOrdering), Nodes[nodeIdx].Ordering.Directions, Nodes[nodeIdx].Ordering.Type), TNode::EArtificial);
AddEdge(nodeIdx, dstIdx, fdIdx);
AddEdge(dstIdx, nodeIdx, fdIdx);
}
@@ -764,8 +898,25 @@ void TOrderingsStateMachine::TNFSM::ApplyFDs(
std::vector<std::size_t> newOrdering = Nodes[nodeIdx].Ordering.Items;
newOrdering.insert(newOrdering.begin() + i, fd.ConsequentItem);
- std::size_t dstIdx = AddNode(TOrdering(std::move(newOrdering), Nodes[nodeIdx].Ordering.Type), TNode::EArtificial);
- AddEdge(nodeIdx, dstIdx, fdIdx); // Epsilon edge will be added during PrefixClosure
+ auto newDirections = Nodes[nodeIdx].Ordering.Directions;
+ if (newDirections.empty()) { // smthing went wrong during ordering adding stage
+ return;
+ }
+ newDirections.insert(newDirections.begin() + i, TOrdering::TItem::EDirection::ENone);
+
+ Y_ENSURE(fd.ConsequentItem < itemInfo.size());
+
+ if (itemInfo[fd.ConsequentItem].UsedInAscOrdering) {
+ newDirections[i] = TOrdering::TItem::EDirection::EAscending;
+ std::size_t dstIdx = AddNode(TOrdering(newOrdering, newDirections, Nodes[nodeIdx].Ordering.Type), TNode::EArtificial);
+ AddEdge(nodeIdx, dstIdx, fdIdx); // Epsilon edge will be added during PrefixClosure
+ }
+
+ if (itemInfo[fd.ConsequentItem].UsedInDescOrdering) {
+ newDirections[i] = TOrdering::TItem::EDirection::EDescending;
+ std::size_t dstIdx = AddNode(TOrdering(std::move(newOrdering), std::move(newDirections), Nodes[nodeIdx].Ordering.Type), TNode::EArtificial);
+ AddEdge(nodeIdx, dstIdx, fdIdx); // Epsilon edge will be added during PrefixClosure
+ }
}
}
};
@@ -956,14 +1107,14 @@ std::vector<TFunctionalDependency> TOrderingsStateMachine::PruneFDs(
bool canLeadToInteresting = false;
for (const auto& ordering: interestingOrderings) {
- if (std::find(ordering.Items.begin(), ordering.Items.end(), fds[i].ConsequentItem) != ordering.Items.end()) {
+ if (ordering.HasItem(fds[i].ConsequentItem)) {
canLeadToInteresting = true;
break;
}
if (
fds[i].IsEquivalence() &&
- std::find(ordering.Items.begin(), ordering.Items.end(), fds[i].AntecedentItems[0]) != ordering.Items.end()
+ ordering.HasItem(fds[i].AntecedentItems[0])
) {
canLeadToInteresting = true;
break;
diff --git a/yql/essentials/core/cbo/cbo_interesting_orderings.h b/yql/essentials/core/cbo/cbo_interesting_orderings.h
index 77a20ef1964..e4e86bcb0d9 100644
--- a/yql/essentials/core/cbo/cbo_interesting_orderings.h
+++ b/yql/essentials/core/cbo/cbo_interesting_orderings.h
@@ -28,17 +28,49 @@ struct TOrdering {
bool operator==(const TOrdering& other) const;
TString ToString() const;
+ struct TItem {
+ enum EDirection : uint32_t {
+ ENone = 0,
+ EAscending = 1,
+ EDescending = 2
+ };
+ };
+
TOrdering(
std::vector<std::size_t> items,
+ std::vector<TItem::EDirection> directions,
EType type,
bool isNatural = false
)
: Items(std::move(items))
+ , Directions(std::move(directions))
, Type(type)
, IsNatural(isNatural)
+ {
+ Y_ENSURE(
+ Directions.empty() && type == TOrdering::EShuffle ||
+ Directions.size() == Items.size() && type == TOrdering::ESorting
+ );
+ }
+
+ TOrdering(
+ std::vector<std::size_t> items,
+ EType type
+ )
+ : TOrdering(
+ std::move(items),
+ std::vector<TItem::EDirection>{},
+ type
+ )
{}
+ TOrdering() = default;
+
+ bool HasItem(std::size_t item) const;
+
std::vector<std::size_t> Items;
+ std::vector<TItem::EDirection> Directions;
+
EType Type;
/* Definition was taken from 'Complex Ordering Requirements' section. Not natural orderings are complex join predicates or grouping. */
bool IsNatural = false;
@@ -135,8 +167,7 @@ public:
TTableAliasMap* tableAliases = nullptr
);
-public:
- // Deprecated, use the others in this public section instead
+public: // deprecated section, use the section below instead of this
std::size_t AddFD(
const TJoinColumn& antecedentColumn,
const TJoinColumn& consequentColumn,
@@ -145,6 +176,7 @@ public:
TTableAliasMap* tableAliases = nullptr
);
+public:
std::size_t AddConstant(
const TJoinColumn& constantColumn,
bool alwaysActive,
@@ -168,8 +200,7 @@ public:
private:
std::size_t AddFDImpl(TFunctionalDependency fd);
-public:
-
+public: // deprecated section, use the section below instead of this
i64 FindInterestingOrderingIdx(
const std::vector<TJoinColumn>& interestingOrdering,
TOrdering::EType type,
@@ -182,12 +213,30 @@ public:
TTableAliasMap* tableAliases = nullptr
);
- std::size_t AddInterestingOrdering(
- const TVector<TString>& interestingOrdering, // column names
- TOrdering::EType type,
+public:
+ std::size_t FindSorting(
+ const std::vector<TJoinColumn>& interestingOrdering,
+ const std::vector<TOrdering::TItem::EDirection>& directions,
+ TTableAliasMap* tableAliases = nullptr
+ );
+
+ std::size_t AddSorting(
+ const std::vector<TJoinColumn>& interestingOrdering,
+ std::vector<TOrdering::TItem::EDirection> directions,
+ TTableAliasMap* tableAliases = nullptr
+ );
+
+ std::size_t FindShuffling(
+ const std::vector<TJoinColumn>& interestingOrdering,
+ TTableAliasMap* tableAliases = nullptr
+ );
+
+ std::size_t AddShuffling(
+ const std::vector<TJoinColumn>& interestingOrdering,
TTableAliasMap* tableAliases = nullptr
);
+public:
TVector<TJoinColumn> GetInterestingOrderingsColumnNamesByIdx(std::size_t interestingOrderingIdx) const;
TString ToString() const;
@@ -196,8 +245,9 @@ public:
std::vector<TOrdering> InterestingOrderings;
private:
- std::pair<std::vector<std::size_t>, i64> ConvertColumnsAndFindExistingOrdering(
+ std::pair<TOrdering, i64> ConvertColumnsAndFindExistingOrdering(
const std::vector<TJoinColumn>& interestingOrdering,
+ const std::vector<TOrdering::TItem::EDirection>& directions,
TOrdering::EType type,
bool createIfNotExists,
TTableAliasMap* tableAliases = nullptr
@@ -215,6 +265,13 @@ private:
TTableAliasMap* tableAliases = nullptr
);
+ std::size_t AddInterestingOrdering(
+ const std::vector<TJoinColumn>& interestingOrdering,
+ TOrdering::EType type,
+ const std::vector<TOrdering::TItem::EDirection>& directions,
+ TTableAliasMap* tableAliases
+ );
+
private:
THashMap<TString, std::size_t> IdxByColumn;
std::vector<TJoinColumn> ColumnByIdx;
@@ -239,6 +296,11 @@ private:
EMaxDFSMStates = 512,
};
+ struct TItemInfo {
+ bool UsedInAscOrdering = false;
+ bool UsedInDescOrdering = false;
+ };
+
public:
using TFDSet = std::bitset<EMaxFDCount>;
@@ -364,7 +426,8 @@ private:
TString ToString() const;
void Build(
const std::vector<TFunctionalDependency>& fds,
- const std::vector<TOrdering>& interesting
+ const std::vector<TOrdering>& interesting,
+ const std::vector<TItemInfo>& itemInfo
);
private:
@@ -373,7 +436,8 @@ private:
void PrefixClosure();
void ApplyFDs(
const std::vector<TFunctionalDependency>& fds,
- const std::vector<TOrdering>& interesting
+ const std::vector<TOrdering>& interesting,
+ const std::vector<TItemInfo>& itemInfo
);
private:
@@ -448,11 +512,17 @@ private:
const std::vector<TOrdering>& interestingOrderings
);
+ void CollectItemInfo(
+ const std::vector<TFunctionalDependency>& fds,
+ const std::vector<TOrdering>& interestingOrderings
+ );
+
private:
TNFSM NFSM;
TSimpleSharedPtr<TDFSM> DFSM; // it is important to have sharedptr here, otherwise all logicalorderings will invalidate after copying of FSM
std::vector<i64> FdMapping; // We to remap FD idxes after the pruning
+ std::vector<TItemInfo> ItemInfo;
bool Built = false;
};
diff --git a/yql/essentials/core/cbo/cbo_optimizer_new.h b/yql/essentials/core/cbo/cbo_optimizer_new.h
index 2a010d8f195..18f51639963 100644
--- a/yql/essentials/core/cbo/cbo_optimizer_new.h
+++ b/yql/essentials/core/cbo/cbo_optimizer_new.h
@@ -35,12 +35,6 @@ struct IBaseOptimizerNode {
EOptimizerNodeKind Kind;
TOptimizerStatistics Stats;
- // for interesting orderings framework
-
- NDq::TOrderingsStateMachine::TLogicalOrderings LogicalOrderings;
-
- //
-
IBaseOptimizerNode(EOptimizerNodeKind k) : Kind(k) {}
IBaseOptimizerNode(EOptimizerNodeKind k, TOptimizerStatistics s) :
Kind(k), Stats(std::move(s)) {}
diff --git a/yql/essentials/core/yql_cost_function.h b/yql/essentials/core/yql_cost_function.h
index dbee46ad0c0..4775562dcc4 100644
--- a/yql/essentials/core/yql_cost_function.h
+++ b/yql/essentials/core/yql_cost_function.h
@@ -37,10 +37,12 @@ struct TJoinColumn {
TString RelName{}; // TODO: this should be a list. now list of relations is separated by comma in this string
TString AttributeName{};
TString AttributeNameWithAliases{};
- std::optional<TString> OriginalRelName;
+ std::optional<TString> OriginalRelName{};
std::optional<ui32> EquivalenceClass{};
bool IsConstant = false;
+ TJoinColumn() = default;
+
TJoinColumn(TString relName, TString attributeName) :
RelName(relName),
AttributeName(attributeName),
@@ -50,6 +52,14 @@ struct TJoinColumn {
return RelName == other.RelName && AttributeName == other.AttributeName;
}
+ static TJoinColumn FromString(const TString& column) {
+ if (column.find('.') != TString::npos) {
+ return TJoinColumn(column.substr(0, column.find('.')), column.substr(column.find('.') + 1));
+ }
+
+ return TJoinColumn("", column);
+ }
+
struct THashFunction
{
size_t operator()(const TJoinColumn& c) const
diff --git a/yql/essentials/core/yql_statistics.cpp b/yql/essentials/core/yql_statistics.cpp
index 16ea755f08c..ad9fb2ab875 100644
--- a/yql/essentials/core/yql_statistics.cpp
+++ b/yql/essentials/core/yql_statistics.cpp
@@ -92,7 +92,16 @@ std::ostream& NYql::operator<<(std::ostream& os, const TOptimizerStatistics& s)
}
os << "[" << tmp << "]";
}
- os << ", LogicalOrderings state: " << s.LogicalOrderings.GetState();
+ os << ", LogicalOrderings (Shufflings) state: " << s.LogicalOrderings.GetState();
+ os << ", SortingOrderings (Sortings) state: " << s.SortingOrderings.GetState();
+
+ if (s.ReversedSortingOrderings.HasState()) {
+ os << ", ReversedSortingOrderings (Sortings) state: " << s.ReversedSortingOrderings.GetState();
+ }
+
+ if (s.SortingOrderingIdx >= 0) {
+ os << ", SortingOrderingIdx: " << s.SortingOrderingIdx;
+ }
os << ", Sel: " << s.Selectivity;
os << ", Storage: " << ConvertToStatisticsTypeString(s.StorageType);
@@ -103,7 +112,7 @@ std::ostream& NYql::operator<<(std::ostream& os, const TOptimizerStatistics& s)
for (size_t i = 0; i < s.SortColumns->Columns.size() && i < s.SortColumns->Aliases.size(); i++) {
auto c = s.SortColumns->Columns[i];
auto a = s.SortColumns->Aliases[i];
- if (a.empty()) {
+ if (!a.empty()) {
tmp.append(a).append(".");
}
diff --git a/yql/essentials/core/yql_statistics.h b/yql/essentials/core/yql_statistics.h
index 0b9c36e7553..969b87453d8 100644
--- a/yql/essentials/core/yql_statistics.h
+++ b/yql/essentials/core/yql_statistics.h
@@ -56,11 +56,22 @@ struct TOptimizerStatistics {
struct TKeyColumns : public TSimpleRefCount<TKeyColumns> {
TVector<TString> Data;
TKeyColumns(TVector<TString> data) : Data(std::move(data)) {}
+
+ TVector<NDq::TJoinColumn> ToJoinColumns(const TString& alias) {
+ TVector<NDq::TJoinColumn> columns;
+ columns.reserve(Data.size());
+ for (std::size_t i = 0; i < Data.size(); ++i) {
+ columns.push_back(NDq::TJoinColumn(alias, Data[i]));
+ }
+
+ return columns;
+ }
};
struct TSortColumns : public TSimpleRefCount<TSortColumns> {
TVector<TString> Columns;
TVector<TString> Aliases;
+
TSortColumns(const TVector<TString>& cols, const TVector<TString>& aliases)
: Columns(cols)
, Aliases(aliases)
@@ -99,7 +110,9 @@ struct TOptimizerStatistics {
double Selectivity = 1.0;
TIntrusivePtr<TKeyColumns> KeyColumns;
TIntrusivePtr<TColumnStatMap> ColumnStatistics;
+
TIntrusivePtr<TShuffledByColumns> ShuffledByColumns;
+
TIntrusivePtr<TSortColumns> SortColumns;
EStorageType StorageType = EStorageType::NA;
std::shared_ptr<IProviderStatistics> Specific;
@@ -108,8 +121,17 @@ struct TOptimizerStatistics {
TString SourceTableName;
TSimpleSharedPtr<THashSet<TString>> Aliases;
TIntrusivePtr<NDq::TTableAliasMap> TableAliases;
+
NDq::TOrderingsStateMachine::TLogicalOrderings LogicalOrderings;
+
+ NDq::TOrderingsStateMachine::TLogicalOrderings SortingOrderings;
+ NDq::TOrderingsStateMachine::TLogicalOrderings ReversedSortingOrderings;
+
std::optional<std::size_t> ShuffleOrderingIdx;
+ std::int64_t SortingOrderingIdx = -1;
+
+ // special flag for equijoin
+ bool CBOFired = false;
TOptimizerStatistics(TOptimizerStatistics&&) = default;
TOptimizerStatistics& operator=(TOptimizerStatistics&&) = default;
diff --git a/yql/essentials/core/yql_type_annotation.h b/yql/essentials/core/yql_type_annotation.h
index ce70891e2e0..c2f73fa7fcf 100644
--- a/yql/essentials/core/yql_type_annotation.h
+++ b/yql/essentials/core/yql_type_annotation.h
@@ -383,6 +383,7 @@ inline TString GetRandomKey<TGUID>() {
}
struct TTypeAnnotationContext: public TThrRefBase {
+ TSimpleSharedPtr<NDq::TOrderingsStateMachine> SortingsFSM;
TSimpleSharedPtr<NDq::TOrderingsStateMachine> OrderingsFSM;
TLangVersion LangVer = MinLangVersion;
THashMap<TString, TIntrusivePtr<TOptimizerStatistics::TColumnStatMap>> ColumnStatisticsByTableName;