diff options
author | zalyalov <zalyalov@ydb.tech> | 2023-12-05 19:02:18 +0300 |
---|---|---|
committer | zalyalov <zalyalov@ydb.tech> | 2023-12-05 20:19:45 +0300 |
commit | da7e2c7c4dce703d3521174098ca91539925383e (patch) | |
tree | 48f4504b8af6269bbede02af719fdc8a09fa152a | |
parent | f5bedf05574cf14e111598330bf7171931471d07 (diff) | |
download | ydb-da7e2c7c4dce703d3521174098ca91539925383e.tar.gz |
fixes for tracking object distribution with restarts KIKIMR-19696
-rw-r--r-- | ydb/core/mind/hive/hive_impl.h | 2 | ||||
-rw-r--r-- | ydb/core/mind/hive/hive_statics.cpp | 21 | ||||
-rw-r--r-- | ydb/core/mind/hive/node_info.h | 22 | ||||
-rw-r--r-- | ydb/core/mind/hive/object_distribution.h | 52 | ||||
-rw-r--r-- | ydb/core/mind/hive/tablet_info.cpp | 12 |
5 files changed, 59 insertions, 50 deletions
diff --git a/ydb/core/mind/hive/hive_impl.h b/ydb/core/mind/hive/hive_impl.h index 65e08a8a80..e3318ad1e4 100644 --- a/ydb/core/mind/hive/hive_impl.h +++ b/ydb/core/mind/hive/hive_impl.h @@ -150,6 +150,8 @@ void MakeTabletTypeSet(std::vector<TTabletTypes::EType>& list); bool IsValidTabletType(TTabletTypes::EType type); bool IsValidObjectId(const TFullObjectId& objectId); TString GetRunningTabletsText(ui64 runningTablets, ui64 totalTablets, bool warmUp); +bool IsResourceDrainingState(TTabletInfo::EVolatileState state); +bool IsAliveState(TTabletInfo::EVolatileState state); class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveSharedSettings { public: diff --git a/ydb/core/mind/hive/hive_statics.cpp b/ydb/core/mind/hive/hive_statics.cpp index b6ec140a1f..e3d6c65eb7 100644 --- a/ydb/core/mind/hive/hive_statics.cpp +++ b/ydb/core/mind/hive/hive_statics.cpp @@ -417,5 +417,26 @@ TString GetRunningTabletsText(ui64 runningTablets, ui64 totalTablets, bool warmU return str; } +bool IsResourceDrainingState(TTabletInfo::EVolatileState state) { + switch (state) { + case TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_STARTING: + case TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_RUNNING: + case TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_UNKNOWN: + return true; + default: + return false; + } +} + +bool IsAliveState(TTabletInfo::EVolatileState state) { + switch (state) { + case TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_STARTING: + case TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_RUNNING: + return true; + default: + return false; + } +} + } // NHive } // NKikimr diff --git a/ydb/core/mind/hive/node_info.h b/ydb/core/mind/hive/node_info.h index 2fde2dc505..f5baa4828b 100644 --- a/ydb/core/mind/hive/node_info.h +++ b/ydb/core/mind/hive/node_info.h @@ -74,28 +74,6 @@ public: } void ChangeVolatileState(EVolatileState state); - - static bool IsResourceDrainingState(TTabletInfo::EVolatileState state) { - switch (state) { - case TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_STARTING: - case TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_RUNNING: - case TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_UNKNOWN: - return true; - default: - return false; - } - } - - static bool IsAliveState(TTabletInfo::EVolatileState state) { - switch (state) { - case TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_STARTING: - case TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_RUNNING: - return true; - default: - return false; - } - } - bool OnTabletChangeVolatileState(TTabletInfo* tablet, TTabletInfo::EVolatileState newState); void UpdateResourceValues(const TTabletInfo* tablet, const NKikimrTabletBase::TMetrics& before, const NKikimrTabletBase::TMetrics& after); diff --git a/ydb/core/mind/hive/object_distribution.h b/ydb/core/mind/hive/object_distribution.h index 8f76faf56f..a2adb382a4 100644 --- a/ydb/core/mind/hive/object_distribution.h +++ b/ydb/core/mind/hive/object_distribution.h @@ -13,7 +13,18 @@ namespace NKikimr { namespace NHive { struct TObjectDistribution { - std::multiset<i64> SortedDistribution; + struct CountOnNode { + i64 Count; + TNodeId Node; + + CountOnNode(i64 count, TNodeId node) : Count(count), Node(node) {} + + bool operator<(const CountOnNode& other) const { + return std::tie(Count, Node) < std::tie(other.Count, other.Node); + } + }; + + std::set<CountOnNode> SortedDistribution; std::unordered_map<TNodeId, i64> Distribution; const TFullObjectId Id; double Mean = 0; @@ -29,8 +40,8 @@ struct TObjectDistribution { if (SortedDistribution.empty()) { return 0; } - i64 minVal = *SortedDistribution.begin(); - i64 maxVal = *SortedDistribution.rbegin(); + i64 minVal = SortedDistribution.begin()->Count; + i64 maxVal = SortedDistribution.rbegin()->Count; if (maxVal == 0) { return 0; } @@ -44,52 +55,52 @@ struct TObjectDistribution { return VarianceNumerator / Distribution.size(); } - void RemoveFromSortedDistribution(i64 value) { + void RemoveFromSortedDistribution(CountOnNode value) { + auto cnt = value.Count; i64 numNodes = Distribution.size(); auto it = SortedDistribution.find(value); + if (it == SortedDistribution.end()) { + return; + } SortedDistribution.erase(it); double meanWithoutNode = 0; if (numNodes > 1) { - meanWithoutNode = (Mean * numNodes - value) / (numNodes - 1); + meanWithoutNode = (Mean * numNodes - cnt) / (numNodes - 1); } - VarianceNumerator -= (Mean - value) * (meanWithoutNode - value); + VarianceNumerator -= (Mean - cnt) * (meanWithoutNode - cnt); Mean = meanWithoutNode; } void UpdateCount(const TNodeInfo& node, i64 diff) { if (!node.MatchesFilter(NodeFilter) || !node.IsAllowedToRunTablet()) { + // We should not use this node for computing imbalance, hence we ignore it in SortedDistribution + // But we still account for it in Distribution, because it might become relevant later + Distribution[node.Id] += diff; return; } auto [it, newNode] = Distribution.insert({node.Id, 0}); i64& value = it->second; i64 numNodes = Distribution.size(); if (!newNode) { - RemoveFromSortedDistribution(value); + RemoveFromSortedDistribution({value, node.Id}); } if (diff + value < 0) { BLOG_ERROR("UpdateObjectCount: new value " << diff + value << " is negative"); } Y_DEBUG_ABORT_UNLESS(diff + value >= 0); value += diff; - SortedDistribution.insert(value); + SortedDistribution.emplace(value, node.Id); double newMean = (Mean * (numNodes - 1) + value) / numNodes; VarianceNumerator += (Mean - value) * (newMean - value); Mean = newMean; } - void SetCount(const TNodeInfo& node, i64 value) { - auto it = Distribution.find(node.Id); - i64 oldValue = (it == Distribution.end()) ? 0 : it->second; - UpdateCount(node, value - oldValue); - } - void RemoveNode(TNodeId node) { auto it = Distribution.find(node); if (it == Distribution.end()) { return; } - RemoveFromSortedDistribution(it->second); - Distribution.erase(node); + RemoveFromSortedDistribution({it->second, node}); } bool operator<(const TObjectDistribution& other) const { @@ -126,7 +137,7 @@ struct TObjectDistributions { return TObjectToBalance(TFullObjectId()); } const auto& dist = *SortedDistributions.rbegin(); - i64 maxCnt = *dist.SortedDistribution.rbegin(); + i64 maxCnt = dist.SortedDistribution.rbegin()->Count; TObjectToBalance result(dist.Id); for (const auto& [node, cnt] : dist.Distribution) { if (cnt == maxCnt) { @@ -207,13 +218,6 @@ struct TObjectDistributions { for (const auto& [obj, it] : Distributions) { UpdateCount(obj, node, 0); } - for (const auto& [obj, tablets] : node.TabletsOfObject) { - ui64 cnt = tablets.size(); - auto updateFunc = [&](TObjectDistribution& dist) { - dist.SetCount(node, cnt); - }; - UpdateDistribution(obj, updateFunc); - } } void RemoveNode(const TNodeInfo& node) { diff --git a/ydb/core/mind/hive/tablet_info.cpp b/ydb/core/mind/hive/tablet_info.cpp index d7c162fe0a..15a7c80968 100644 --- a/ydb/core/mind/hive/tablet_info.cpp +++ b/ydb/core/mind/hive/tablet_info.cpp @@ -374,10 +374,14 @@ void TTabletInfo::UpdateResourceUsage(const NKikimrTabletBase::TMetrics& metrics i64 counterAfter = ResourceValues.GetCounter(); const auto& after = ResourceValues; if (Node != nullptr) { - Node->UpdateResourceValues(this, before, after); - i64 deltaCounter = counterAfter - counterBefore; - if (deltaCounter != 0 && IsLeader()) { - Hive.UpdateObjectCount(AsLeader(), *Node, deltaCounter); + if (IsResourceDrainingState(VolatileState)) { + Node->UpdateResourceValues(this, before, after); + } + if (IsAliveState(VolatileState)) { + i64 deltaCounter = counterAfter - counterBefore; + if (deltaCounter != 0 && IsLeader()) { + Hive.UpdateObjectCount(AsLeader(), *Node, deltaCounter); + } } } } |