aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorzalyalov <zalyalov@ydb.tech>2023-12-05 19:02:18 +0300
committerzalyalov <zalyalov@ydb.tech>2023-12-05 20:19:45 +0300
commitda7e2c7c4dce703d3521174098ca91539925383e (patch)
tree48f4504b8af6269bbede02af719fdc8a09fa152a
parentf5bedf05574cf14e111598330bf7171931471d07 (diff)
downloadydb-da7e2c7c4dce703d3521174098ca91539925383e.tar.gz
fixes for tracking object distribution with restarts KIKIMR-19696
-rw-r--r--ydb/core/mind/hive/hive_impl.h2
-rw-r--r--ydb/core/mind/hive/hive_statics.cpp21
-rw-r--r--ydb/core/mind/hive/node_info.h22
-rw-r--r--ydb/core/mind/hive/object_distribution.h52
-rw-r--r--ydb/core/mind/hive/tablet_info.cpp12
5 files changed, 59 insertions, 50 deletions
diff --git a/ydb/core/mind/hive/hive_impl.h b/ydb/core/mind/hive/hive_impl.h
index 65e08a8a80..e3318ad1e4 100644
--- a/ydb/core/mind/hive/hive_impl.h
+++ b/ydb/core/mind/hive/hive_impl.h
@@ -150,6 +150,8 @@ void MakeTabletTypeSet(std::vector<TTabletTypes::EType>& list);
bool IsValidTabletType(TTabletTypes::EType type);
bool IsValidObjectId(const TFullObjectId& objectId);
TString GetRunningTabletsText(ui64 runningTablets, ui64 totalTablets, bool warmUp);
+bool IsResourceDrainingState(TTabletInfo::EVolatileState state);
+bool IsAliveState(TTabletInfo::EVolatileState state);
class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveSharedSettings {
public:
diff --git a/ydb/core/mind/hive/hive_statics.cpp b/ydb/core/mind/hive/hive_statics.cpp
index b6ec140a1f..e3d6c65eb7 100644
--- a/ydb/core/mind/hive/hive_statics.cpp
+++ b/ydb/core/mind/hive/hive_statics.cpp
@@ -417,5 +417,26 @@ TString GetRunningTabletsText(ui64 runningTablets, ui64 totalTablets, bool warmU
return str;
}
+bool IsResourceDrainingState(TTabletInfo::EVolatileState state) {
+ switch (state) {
+ case TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_STARTING:
+ case TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_RUNNING:
+ case TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_UNKNOWN:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool IsAliveState(TTabletInfo::EVolatileState state) {
+ switch (state) {
+ case TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_STARTING:
+ case TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_RUNNING:
+ return true;
+ default:
+ return false;
+ }
+}
+
} // NHive
} // NKikimr
diff --git a/ydb/core/mind/hive/node_info.h b/ydb/core/mind/hive/node_info.h
index 2fde2dc505..f5baa4828b 100644
--- a/ydb/core/mind/hive/node_info.h
+++ b/ydb/core/mind/hive/node_info.h
@@ -74,28 +74,6 @@ public:
}
void ChangeVolatileState(EVolatileState state);
-
- static bool IsResourceDrainingState(TTabletInfo::EVolatileState state) {
- switch (state) {
- case TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_STARTING:
- case TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_RUNNING:
- case TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_UNKNOWN:
- return true;
- default:
- return false;
- }
- }
-
- static bool IsAliveState(TTabletInfo::EVolatileState state) {
- switch (state) {
- case TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_STARTING:
- case TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_RUNNING:
- return true;
- default:
- return false;
- }
- }
-
bool OnTabletChangeVolatileState(TTabletInfo* tablet, TTabletInfo::EVolatileState newState);
void UpdateResourceValues(const TTabletInfo* tablet, const NKikimrTabletBase::TMetrics& before, const NKikimrTabletBase::TMetrics& after);
diff --git a/ydb/core/mind/hive/object_distribution.h b/ydb/core/mind/hive/object_distribution.h
index 8f76faf56f..a2adb382a4 100644
--- a/ydb/core/mind/hive/object_distribution.h
+++ b/ydb/core/mind/hive/object_distribution.h
@@ -13,7 +13,18 @@ namespace NKikimr {
namespace NHive {
struct TObjectDistribution {
- std::multiset<i64> SortedDistribution;
+ struct CountOnNode {
+ i64 Count;
+ TNodeId Node;
+
+ CountOnNode(i64 count, TNodeId node) : Count(count), Node(node) {}
+
+ bool operator<(const CountOnNode& other) const {
+ return std::tie(Count, Node) < std::tie(other.Count, other.Node);
+ }
+ };
+
+ std::set<CountOnNode> SortedDistribution;
std::unordered_map<TNodeId, i64> Distribution;
const TFullObjectId Id;
double Mean = 0;
@@ -29,8 +40,8 @@ struct TObjectDistribution {
if (SortedDistribution.empty()) {
return 0;
}
- i64 minVal = *SortedDistribution.begin();
- i64 maxVal = *SortedDistribution.rbegin();
+ i64 minVal = SortedDistribution.begin()->Count;
+ i64 maxVal = SortedDistribution.rbegin()->Count;
if (maxVal == 0) {
return 0;
}
@@ -44,52 +55,52 @@ struct TObjectDistribution {
return VarianceNumerator / Distribution.size();
}
- void RemoveFromSortedDistribution(i64 value) {
+ void RemoveFromSortedDistribution(CountOnNode value) {
+ auto cnt = value.Count;
i64 numNodes = Distribution.size();
auto it = SortedDistribution.find(value);
+ if (it == SortedDistribution.end()) {
+ return;
+ }
SortedDistribution.erase(it);
double meanWithoutNode = 0;
if (numNodes > 1) {
- meanWithoutNode = (Mean * numNodes - value) / (numNodes - 1);
+ meanWithoutNode = (Mean * numNodes - cnt) / (numNodes - 1);
}
- VarianceNumerator -= (Mean - value) * (meanWithoutNode - value);
+ VarianceNumerator -= (Mean - cnt) * (meanWithoutNode - cnt);
Mean = meanWithoutNode;
}
void UpdateCount(const TNodeInfo& node, i64 diff) {
if (!node.MatchesFilter(NodeFilter) || !node.IsAllowedToRunTablet()) {
+ // We should not use this node for computing imbalance, hence we ignore it in SortedDistribution
+ // But we still account for it in Distribution, because it might become relevant later
+ Distribution[node.Id] += diff;
return;
}
auto [it, newNode] = Distribution.insert({node.Id, 0});
i64& value = it->second;
i64 numNodes = Distribution.size();
if (!newNode) {
- RemoveFromSortedDistribution(value);
+ RemoveFromSortedDistribution({value, node.Id});
}
if (diff + value < 0) {
BLOG_ERROR("UpdateObjectCount: new value " << diff + value << " is negative");
}
Y_DEBUG_ABORT_UNLESS(diff + value >= 0);
value += diff;
- SortedDistribution.insert(value);
+ SortedDistribution.emplace(value, node.Id);
double newMean = (Mean * (numNodes - 1) + value) / numNodes;
VarianceNumerator += (Mean - value) * (newMean - value);
Mean = newMean;
}
- void SetCount(const TNodeInfo& node, i64 value) {
- auto it = Distribution.find(node.Id);
- i64 oldValue = (it == Distribution.end()) ? 0 : it->second;
- UpdateCount(node, value - oldValue);
- }
-
void RemoveNode(TNodeId node) {
auto it = Distribution.find(node);
if (it == Distribution.end()) {
return;
}
- RemoveFromSortedDistribution(it->second);
- Distribution.erase(node);
+ RemoveFromSortedDistribution({it->second, node});
}
bool operator<(const TObjectDistribution& other) const {
@@ -126,7 +137,7 @@ struct TObjectDistributions {
return TObjectToBalance(TFullObjectId());
}
const auto& dist = *SortedDistributions.rbegin();
- i64 maxCnt = *dist.SortedDistribution.rbegin();
+ i64 maxCnt = dist.SortedDistribution.rbegin()->Count;
TObjectToBalance result(dist.Id);
for (const auto& [node, cnt] : dist.Distribution) {
if (cnt == maxCnt) {
@@ -207,13 +218,6 @@ struct TObjectDistributions {
for (const auto& [obj, it] : Distributions) {
UpdateCount(obj, node, 0);
}
- for (const auto& [obj, tablets] : node.TabletsOfObject) {
- ui64 cnt = tablets.size();
- auto updateFunc = [&](TObjectDistribution& dist) {
- dist.SetCount(node, cnt);
- };
- UpdateDistribution(obj, updateFunc);
- }
}
void RemoveNode(const TNodeInfo& node) {
diff --git a/ydb/core/mind/hive/tablet_info.cpp b/ydb/core/mind/hive/tablet_info.cpp
index d7c162fe0a..15a7c80968 100644
--- a/ydb/core/mind/hive/tablet_info.cpp
+++ b/ydb/core/mind/hive/tablet_info.cpp
@@ -374,10 +374,14 @@ void TTabletInfo::UpdateResourceUsage(const NKikimrTabletBase::TMetrics& metrics
i64 counterAfter = ResourceValues.GetCounter();
const auto& after = ResourceValues;
if (Node != nullptr) {
- Node->UpdateResourceValues(this, before, after);
- i64 deltaCounter = counterAfter - counterBefore;
- if (deltaCounter != 0 && IsLeader()) {
- Hive.UpdateObjectCount(AsLeader(), *Node, deltaCounter);
+ if (IsResourceDrainingState(VolatileState)) {
+ Node->UpdateResourceValues(this, before, after);
+ }
+ if (IsAliveState(VolatileState)) {
+ i64 deltaCounter = counterAfter - counterBefore;
+ if (deltaCounter != 0 && IsLeader()) {
+ Hive.UpdateObjectCount(AsLeader(), *Node, deltaCounter);
+ }
}
}
}