diff options
author | vporyadke <zalyalov@ydb.tech> | 2024-01-31 15:15:07 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-31 17:15:07 +0300 |
commit | b50b67d8c078f026eceffa421c906d220dc9259a (patch) | |
tree | 94856ea33dff4521e71adbc40bae13daf1fa7a68 | |
parent | 6c3f871955b3fb4d820b31748bf4a225a672df47 (diff) | |
download | ydb-b50b67d8c078f026eceffa421c906d220dc9259a.tar.gz |
storage balancer info in hive ui & sensors KIKIMR-2190 (#1200)
-rw-r--r-- | ydb/core/mind/hive/hive_impl.cpp | 5 | ||||
-rw-r--r-- | ydb/core/mind/hive/hive_impl.h | 6 | ||||
-rw-r--r-- | ydb/core/mind/hive/hive_ut.cpp | 1 | ||||
-rw-r--r-- | ydb/core/mind/hive/monitoring.cpp | 15 | ||||
-rw-r--r-- | ydb/core/mind/hive/tx__update_tablet_groups.cpp | 1 | ||||
-rw-r--r-- | ydb/core/protos/config.proto | 9 | ||||
-rw-r--r-- | ydb/core/protos/counters_hive.proto | 2 |
7 files changed, 34 insertions, 5 deletions
diff --git a/ydb/core/mind/hive/hive_impl.cpp b/ydb/core/mind/hive/hive_impl.cpp index 76fdcdeb1c..a866ea08e3 100644 --- a/ydb/core/mind/hive/hive_impl.cpp +++ b/ydb/core/mind/hive/hive_impl.cpp @@ -2382,7 +2382,9 @@ void THive::Handle(TEvPrivate::TEvProcessStorageBalancer::TPtr&) { auto& [stats, pool] = *std::max_element(poolStats.begin(), poolStats.end(), [](const TPoolStat& lhs, const TPoolStat& rhs) { return lhs.first.Scatter < rhs.first.Scatter; }); - if (stats.Scatter > GetMinStorageScatterToBalance()) { + StorageScatter = stats.Scatter; + TabletCounters->Simple()[NHive::COUNTER_STORAGE_SCATTER].Set(StorageScatter * 100); + if (StorageScatter > GetMinStorageScatterToBalance()) { BLOG_D("Storage Scatter = " << stats.Scatter << " in pool " << pool.Name << ", starting StorageBalancer"); ui64 numReassigns = 1; auto it = pool.Groups.find(stats.MaxUsageGroupId); @@ -2395,6 +2397,7 @@ void THive::Handle(TEvPrivate::TEvProcessStorageBalancer::TPtr&) { } StartHiveStorageBalancer({ .NumReassigns = numReassigns, + .MaxInFlight = GetStorageBalancerInflight(), .StoragePool = pool.Name }); } diff --git a/ydb/core/mind/hive/hive_impl.h b/ydb/core/mind/hive/hive_impl.h index 33da8f8f31..587219a41e 100644 --- a/ydb/core/mind/hive/hive_impl.h +++ b/ydb/core/mind/hive/hive_impl.h @@ -233,6 +233,7 @@ protected: friend class TTxTabletOwnersReply; friend class TTxRequestTabletOwners; friend class TTxUpdateTabletsObject; + friend class TTxUpdateTabletGroups; friend class TDeleteTabletActor; @@ -327,6 +328,7 @@ protected: ui32 DataCenters = 1; ui32 RegisteredDataCenters = 1; TObjectDistributions ObjectDistributions; + double StorageScatter = 0; bool AreWeRootHive() const { return RootHiveId == HiveId; } bool AreWeSubDomainHive() const { return RootHiveId != HiveId; } @@ -925,6 +927,10 @@ public: return CurrentConfig.GetMinStorageScatterToBalance(); } + ui64 GetStorageBalancerInflight() const { + return CurrentConfig.GetStorageBalancerInflight(); + } + static void ActualizeRestartStatistics(google::protobuf::RepeatedField<google::protobuf::uint64>& restartTimestamps, ui64 barrier); static ui64 GetRestartsPerPeriod(const google::protobuf::RepeatedField<google::protobuf::uint64>& restartTimestamps, ui64 barrier); static bool IsSystemTablet(TTabletTypes::EType type); diff --git a/ydb/core/mind/hive/hive_ut.cpp b/ydb/core/mind/hive/hive_ut.cpp index ca238edb21..58367a6040 100644 --- a/ydb/core/mind/hive/hive_ut.cpp +++ b/ydb/core/mind/hive/hive_ut.cpp @@ -2786,6 +2786,7 @@ Y_UNIT_TEST_SUITE(THiveTest) { Setup(runtime, true, 2, [](TAppPrepare& app) { app.HiveConfig.SetMinPeriodBetweenReassign(0); app.HiveConfig.SetStorageInfoRefreshFrequency(200); + app.HiveConfig.SetMinStorageScatterToBalance(0.5); }); const ui64 hiveTablet = MakeDefaultHiveID(0); const ui64 testerTablet = MakeDefaultHiveID(1); diff --git a/ydb/core/mind/hive/monitoring.cpp b/ydb/core/mind/hive/monitoring.cpp index e3757b674d..09c8040aa0 100644 --- a/ydb/core/mind/hive/monitoring.cpp +++ b/ydb/core/mind/hive/monitoring.cpp @@ -828,6 +828,12 @@ public: UpdateConfig(db, "MaxWarmUpPeriod"); UpdateConfig(db, "WarmUpEnabled"); UpdateConfig(db, "ObjectImbalanceToBalance"); + UpdateConfig(db, "ChannelBalanceStrategy"); + UpdateConfig(db, "MaxChannelHistorySize"); + UpdateConfig(db, "StorageInfoRefreshFrequency"); + UpdateConfig(db, "MinStorageScatterToBalance"); + UpdateConfig(db, "MinGroupUsageToBalance"); + UpdateConfig(db, "StorageBalancerInflight"); if (params.contains("BalancerIgnoreTabletTypes")) { TVector<TString> tabletTypeNames = SplitString(params.Get("BalancerIgnoreTabletTypes"), ";"); @@ -1111,6 +1117,12 @@ public: ShowConfig(out, "MaxWarmUpPeriod"); ShowConfig(out, "WarmUpEnabled"); ShowConfig(out, "ObjectImbalanceToBalance"); + ShowConfig(out, "ChannelBalanceStrategy"); + ShowConfig(out, "MaxChannelHistorySize"); + ShowConfig(out, "StorageInfoRefreshFrequency"); + ShowConfig(out, "MinStorageScatterToBalance"); + ShowConfig(out, "MinGroupUsageToBalance"); + ShowConfig(out, "StorageBalancerInflight"); ShowConfigForBalancerIgnoreTabletTypes(out); out << "<div class='row' style='margin-top:40px'>"; @@ -1405,6 +1417,7 @@ public: out << "<tr><td>Network</td><td id='resourceScatterNetwork'></td></tr>"; out << "<tr><td>MaxUsage</td><td id='maxUsage'></td></tr>"; out << "<tr><td>Imbalance</td><td id='objectImbalance'></td></tr>"; + out << "<tr><td>Storage</td><td id='storageScatter'></td></tr>"; out << "</table></div>"; out << "<div style='min-width:220px'><table class='simple-table3'>"; out << "<tr><th>Balancer</th><th style='min-width:50px'>Runs</th><th style='min-width:50px'>Moves</th>"; @@ -1896,6 +1909,7 @@ function fillDataShort(result) { $('#waitQueue').html(result.WaitQueueSize); $('#maxUsage').html(result.MaxUsage); $('#objectImbalance').html(result.ObjectImbalance); + $('#storageScatter').html(result.StorageScatter); $('#resourceTotalCounter').html(result.ResourceTotal.Counter); $('#resourceTotalCPU').html(result.ResourceTotal.CPU); @@ -2205,6 +2219,7 @@ public: jsonData["ScatterHtml"]["Memory"] = std::get<NMetrics::EResource::Memory>(scatterHtml); jsonData["ScatterHtml"]["Network"] = std::get<NMetrics::EResource::Network>(scatterHtml); jsonData["ObjectImbalance"] = GetValueWithColoredGlyph(Self->ObjectDistributions.GetMaxImbalance(), Self->GetObjectImbalanceToBalance()); + jsonData["StorageScatter"] = GetValueWithColoredGlyph(Self->StorageScatter, Self->GetMinStorageScatterToBalance()); jsonData["WarmUp"] = Self->WarmUp; if (Cgi.Get("nodes") == "1") { diff --git a/ydb/core/mind/hive/tx__update_tablet_groups.cpp b/ydb/core/mind/hive/tx__update_tablet_groups.cpp index be2900c19f..5511594bf3 100644 --- a/ydb/core/mind/hive/tx__update_tablet_groups.cpp +++ b/ydb/core/mind/hive/tx__update_tablet_groups.cpp @@ -285,6 +285,7 @@ public: // Use best effort to kill currently running tablet SideEffects.Register(CreateTabletKiller(TabletId, /* nodeId */ 0, tablet->KnownGeneration)); } + SideEffects.Callback([counters = Self->TabletCounters] { counters->Cumulative()[NHive::COUNTER_TABLETS_STORAGE_REASSIGNED].Increment(1); }); } if (needToIncreaseGeneration) { tablet->IncreaseGeneration(); diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto index cf31b90b8c..9f0f5a1e52 100644 --- a/ydb/core/protos/config.proto +++ b/ydb/core/protos/config.proto @@ -1375,9 +1375,9 @@ message THiveConfig { } enum EHiveChannelBalanceStrategy { - HIVE_CHANNEL_BALANCE_STRATEGY_HEAVIEST = 1; - HIVE_CHANNEL_BALANCE_STRATEGY_RANDOM = 2; - HIVE_CHANNEL_BALANCE_STRATEGY_WEIGHTED_RANDOM = 3; + HIVE_CHANNEL_BALANCE_STRATEGY_HEAVIEST = 0; + HIVE_CHANNEL_BALANCE_STRATEGY_RANDOM = 1; + HIVE_CHANNEL_BALANCE_STRATEGY_WEIGHTED_RANDOM = 2; } enum EHiveNodeSelectStrategy { @@ -1460,8 +1460,9 @@ message THiveConfig { optional EHiveChannelBalanceStrategy ChannelBalanceStrategy = 68 [default = HIVE_CHANNEL_BALANCE_STRATEGY_WEIGHTED_RANDOM]; optional uint64 MaxChannelHistorySize = 69 [default = 200]; optional uint64 StorageInfoRefreshFrequency = 70 [default = 600000]; // send a query to BSC every x milliseconds - optional double MinStorageScatterToBalance = 71 [default = 0.5]; + optional double MinStorageScatterToBalance = 71 [default = 999]; // storage balancer trigger is disabled by default optional double MinGroupUsageToBalance = 72 [default = 0.1]; + optional uint64 StorageBalancerInflight = 73 [default = 1]; } message TColumnShardConfig { diff --git a/ydb/core/protos/counters_hive.proto b/ydb/core/protos/counters_hive.proto index a936a0aa7c..47dc89070b 100644 --- a/ydb/core/protos/counters_hive.proto +++ b/ydb/core/protos/counters_hive.proto @@ -28,6 +28,7 @@ enum ESimpleCounters { COUNTER_BALANCE_OBJECT_IMBALANCE = 18 [(CounterOpts) = {Name: "BalanceObjectImbalance"}]; COUNTER_IMBALANCED_OBJECTS = 19 [(CounterOpts) = {Name: "ImbalancedObjects"}]; COUNTER_WORST_OBJECT_VARIANCE = 20 [(CounterOpts) = {Name: "WorstObjectVariance"}]; + COUNTER_STORAGE_SCATTER = 21 [(CounterOpts) = {Name: "StorageScatter"}]; } enum ECumulativeCounters { @@ -44,6 +45,7 @@ enum ECumulativeCounters { COUNTER_SUGGESTED_SCALE_UP = 10 [(CounterOpts) = {Name: "SuggestedScaleUp"}]; COUNTER_SUGGESTED_SCALE_DOWN = 11 [(CounterOpts) = {Name: "SuggestedScaleDown"}]; COUNTER_STORAGE_BALANCER_EXECUTED = 12 [(CounterOpts) = {Name: "StorageBalancerExecuted"}]; + COUNTER_TABLETS_STORAGE_REASSIGNED = 13 [(CounterOpts) = {Name: "TabletsStorageReassigned"}]; } enum EPercentileCounters { |