aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorvporyadke <zalyalov@ydb.tech>2024-01-31 15:15:07 +0100
committerGitHub <noreply@github.com>2024-01-31 17:15:07 +0300
commitb50b67d8c078f026eceffa421c906d220dc9259a (patch)
tree94856ea33dff4521e71adbc40bae13daf1fa7a68
parent6c3f871955b3fb4d820b31748bf4a225a672df47 (diff)
downloadydb-b50b67d8c078f026eceffa421c906d220dc9259a.tar.gz
storage balancer info in hive ui & sensors KIKIMR-2190 (#1200)
-rw-r--r--ydb/core/mind/hive/hive_impl.cpp5
-rw-r--r--ydb/core/mind/hive/hive_impl.h6
-rw-r--r--ydb/core/mind/hive/hive_ut.cpp1
-rw-r--r--ydb/core/mind/hive/monitoring.cpp15
-rw-r--r--ydb/core/mind/hive/tx__update_tablet_groups.cpp1
-rw-r--r--ydb/core/protos/config.proto9
-rw-r--r--ydb/core/protos/counters_hive.proto2
7 files changed, 34 insertions, 5 deletions
diff --git a/ydb/core/mind/hive/hive_impl.cpp b/ydb/core/mind/hive/hive_impl.cpp
index 76fdcdeb1c..a866ea08e3 100644
--- a/ydb/core/mind/hive/hive_impl.cpp
+++ b/ydb/core/mind/hive/hive_impl.cpp
@@ -2382,7 +2382,9 @@ void THive::Handle(TEvPrivate::TEvProcessStorageBalancer::TPtr&) {
auto& [stats, pool] = *std::max_element(poolStats.begin(), poolStats.end(), [](const TPoolStat& lhs, const TPoolStat& rhs) {
return lhs.first.Scatter < rhs.first.Scatter;
});
- if (stats.Scatter > GetMinStorageScatterToBalance()) {
+ StorageScatter = stats.Scatter;
+ TabletCounters->Simple()[NHive::COUNTER_STORAGE_SCATTER].Set(StorageScatter * 100);
+ if (StorageScatter > GetMinStorageScatterToBalance()) {
BLOG_D("Storage Scatter = " << stats.Scatter << " in pool " << pool.Name << ", starting StorageBalancer");
ui64 numReassigns = 1;
auto it = pool.Groups.find(stats.MaxUsageGroupId);
@@ -2395,6 +2397,7 @@ void THive::Handle(TEvPrivate::TEvProcessStorageBalancer::TPtr&) {
}
StartHiveStorageBalancer({
.NumReassigns = numReassigns,
+ .MaxInFlight = GetStorageBalancerInflight(),
.StoragePool = pool.Name
});
}
diff --git a/ydb/core/mind/hive/hive_impl.h b/ydb/core/mind/hive/hive_impl.h
index 33da8f8f31..587219a41e 100644
--- a/ydb/core/mind/hive/hive_impl.h
+++ b/ydb/core/mind/hive/hive_impl.h
@@ -233,6 +233,7 @@ protected:
friend class TTxTabletOwnersReply;
friend class TTxRequestTabletOwners;
friend class TTxUpdateTabletsObject;
+ friend class TTxUpdateTabletGroups;
friend class TDeleteTabletActor;
@@ -327,6 +328,7 @@ protected:
ui32 DataCenters = 1;
ui32 RegisteredDataCenters = 1;
TObjectDistributions ObjectDistributions;
+ double StorageScatter = 0;
bool AreWeRootHive() const { return RootHiveId == HiveId; }
bool AreWeSubDomainHive() const { return RootHiveId != HiveId; }
@@ -925,6 +927,10 @@ public:
return CurrentConfig.GetMinStorageScatterToBalance();
}
+ ui64 GetStorageBalancerInflight() const {
+ return CurrentConfig.GetStorageBalancerInflight();
+ }
+
static void ActualizeRestartStatistics(google::protobuf::RepeatedField<google::protobuf::uint64>& restartTimestamps, ui64 barrier);
static ui64 GetRestartsPerPeriod(const google::protobuf::RepeatedField<google::protobuf::uint64>& restartTimestamps, ui64 barrier);
static bool IsSystemTablet(TTabletTypes::EType type);
diff --git a/ydb/core/mind/hive/hive_ut.cpp b/ydb/core/mind/hive/hive_ut.cpp
index ca238edb21..58367a6040 100644
--- a/ydb/core/mind/hive/hive_ut.cpp
+++ b/ydb/core/mind/hive/hive_ut.cpp
@@ -2786,6 +2786,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
Setup(runtime, true, 2, [](TAppPrepare& app) {
app.HiveConfig.SetMinPeriodBetweenReassign(0);
app.HiveConfig.SetStorageInfoRefreshFrequency(200);
+ app.HiveConfig.SetMinStorageScatterToBalance(0.5);
});
const ui64 hiveTablet = MakeDefaultHiveID(0);
const ui64 testerTablet = MakeDefaultHiveID(1);
diff --git a/ydb/core/mind/hive/monitoring.cpp b/ydb/core/mind/hive/monitoring.cpp
index e3757b674d..09c8040aa0 100644
--- a/ydb/core/mind/hive/monitoring.cpp
+++ b/ydb/core/mind/hive/monitoring.cpp
@@ -828,6 +828,12 @@ public:
UpdateConfig(db, "MaxWarmUpPeriod");
UpdateConfig(db, "WarmUpEnabled");
UpdateConfig(db, "ObjectImbalanceToBalance");
+ UpdateConfig(db, "ChannelBalanceStrategy");
+ UpdateConfig(db, "MaxChannelHistorySize");
+ UpdateConfig(db, "StorageInfoRefreshFrequency");
+ UpdateConfig(db, "MinStorageScatterToBalance");
+ UpdateConfig(db, "MinGroupUsageToBalance");
+ UpdateConfig(db, "StorageBalancerInflight");
if (params.contains("BalancerIgnoreTabletTypes")) {
TVector<TString> tabletTypeNames = SplitString(params.Get("BalancerIgnoreTabletTypes"), ";");
@@ -1111,6 +1117,12 @@ public:
ShowConfig(out, "MaxWarmUpPeriod");
ShowConfig(out, "WarmUpEnabled");
ShowConfig(out, "ObjectImbalanceToBalance");
+ ShowConfig(out, "ChannelBalanceStrategy");
+ ShowConfig(out, "MaxChannelHistorySize");
+ ShowConfig(out, "StorageInfoRefreshFrequency");
+ ShowConfig(out, "MinStorageScatterToBalance");
+ ShowConfig(out, "MinGroupUsageToBalance");
+ ShowConfig(out, "StorageBalancerInflight");
ShowConfigForBalancerIgnoreTabletTypes(out);
out << "<div class='row' style='margin-top:40px'>";
@@ -1405,6 +1417,7 @@ public:
out << "<tr><td>Network</td><td id='resourceScatterNetwork'></td></tr>";
out << "<tr><td>MaxUsage</td><td id='maxUsage'></td></tr>";
out << "<tr><td>Imbalance</td><td id='objectImbalance'></td></tr>";
+ out << "<tr><td>Storage</td><td id='storageScatter'></td></tr>";
out << "</table></div>";
out << "<div style='min-width:220px'><table class='simple-table3'>";
out << "<tr><th>Balancer</th><th style='min-width:50px'>Runs</th><th style='min-width:50px'>Moves</th>";
@@ -1896,6 +1909,7 @@ function fillDataShort(result) {
$('#waitQueue').html(result.WaitQueueSize);
$('#maxUsage').html(result.MaxUsage);
$('#objectImbalance').html(result.ObjectImbalance);
+ $('#storageScatter').html(result.StorageScatter);
$('#resourceTotalCounter').html(result.ResourceTotal.Counter);
$('#resourceTotalCPU').html(result.ResourceTotal.CPU);
@@ -2205,6 +2219,7 @@ public:
jsonData["ScatterHtml"]["Memory"] = std::get<NMetrics::EResource::Memory>(scatterHtml);
jsonData["ScatterHtml"]["Network"] = std::get<NMetrics::EResource::Network>(scatterHtml);
jsonData["ObjectImbalance"] = GetValueWithColoredGlyph(Self->ObjectDistributions.GetMaxImbalance(), Self->GetObjectImbalanceToBalance());
+ jsonData["StorageScatter"] = GetValueWithColoredGlyph(Self->StorageScatter, Self->GetMinStorageScatterToBalance());
jsonData["WarmUp"] = Self->WarmUp;
if (Cgi.Get("nodes") == "1") {
diff --git a/ydb/core/mind/hive/tx__update_tablet_groups.cpp b/ydb/core/mind/hive/tx__update_tablet_groups.cpp
index be2900c19f..5511594bf3 100644
--- a/ydb/core/mind/hive/tx__update_tablet_groups.cpp
+++ b/ydb/core/mind/hive/tx__update_tablet_groups.cpp
@@ -285,6 +285,7 @@ public:
// Use best effort to kill currently running tablet
SideEffects.Register(CreateTabletKiller(TabletId, /* nodeId */ 0, tablet->KnownGeneration));
}
+ SideEffects.Callback([counters = Self->TabletCounters] { counters->Cumulative()[NHive::COUNTER_TABLETS_STORAGE_REASSIGNED].Increment(1); });
}
if (needToIncreaseGeneration) {
tablet->IncreaseGeneration();
diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto
index cf31b90b8c..9f0f5a1e52 100644
--- a/ydb/core/protos/config.proto
+++ b/ydb/core/protos/config.proto
@@ -1375,9 +1375,9 @@ message THiveConfig {
}
enum EHiveChannelBalanceStrategy {
- HIVE_CHANNEL_BALANCE_STRATEGY_HEAVIEST = 1;
- HIVE_CHANNEL_BALANCE_STRATEGY_RANDOM = 2;
- HIVE_CHANNEL_BALANCE_STRATEGY_WEIGHTED_RANDOM = 3;
+ HIVE_CHANNEL_BALANCE_STRATEGY_HEAVIEST = 0;
+ HIVE_CHANNEL_BALANCE_STRATEGY_RANDOM = 1;
+ HIVE_CHANNEL_BALANCE_STRATEGY_WEIGHTED_RANDOM = 2;
}
enum EHiveNodeSelectStrategy {
@@ -1460,8 +1460,9 @@ message THiveConfig {
optional EHiveChannelBalanceStrategy ChannelBalanceStrategy = 68 [default = HIVE_CHANNEL_BALANCE_STRATEGY_WEIGHTED_RANDOM];
optional uint64 MaxChannelHistorySize = 69 [default = 200];
optional uint64 StorageInfoRefreshFrequency = 70 [default = 600000]; // send a query to BSC every x milliseconds
- optional double MinStorageScatterToBalance = 71 [default = 0.5];
+ optional double MinStorageScatterToBalance = 71 [default = 999]; // storage balancer trigger is disabled by default
optional double MinGroupUsageToBalance = 72 [default = 0.1];
+ optional uint64 StorageBalancerInflight = 73 [default = 1];
}
message TColumnShardConfig {
diff --git a/ydb/core/protos/counters_hive.proto b/ydb/core/protos/counters_hive.proto
index a936a0aa7c..47dc89070b 100644
--- a/ydb/core/protos/counters_hive.proto
+++ b/ydb/core/protos/counters_hive.proto
@@ -28,6 +28,7 @@ enum ESimpleCounters {
COUNTER_BALANCE_OBJECT_IMBALANCE = 18 [(CounterOpts) = {Name: "BalanceObjectImbalance"}];
COUNTER_IMBALANCED_OBJECTS = 19 [(CounterOpts) = {Name: "ImbalancedObjects"}];
COUNTER_WORST_OBJECT_VARIANCE = 20 [(CounterOpts) = {Name: "WorstObjectVariance"}];
+ COUNTER_STORAGE_SCATTER = 21 [(CounterOpts) = {Name: "StorageScatter"}];
}
enum ECumulativeCounters {
@@ -44,6 +45,7 @@ enum ECumulativeCounters {
COUNTER_SUGGESTED_SCALE_UP = 10 [(CounterOpts) = {Name: "SuggestedScaleUp"}];
COUNTER_SUGGESTED_SCALE_DOWN = 11 [(CounterOpts) = {Name: "SuggestedScaleDown"}];
COUNTER_STORAGE_BALANCER_EXECUTED = 12 [(CounterOpts) = {Name: "StorageBalancerExecuted"}];
+ COUNTER_TABLETS_STORAGE_REASSIGNED = 13 [(CounterOpts) = {Name: "TabletsStorageReassigned"}];
}
enum EPercentileCounters {