diff options
author | zalyalov <zalyalov@yandex-team.com> | 2023-09-26 18:00:15 +0300 |
---|---|---|
committer | zalyalov <zalyalov@yandex-team.com> | 2023-09-26 19:05:46 +0300 |
commit | bff841e7ec8c7d10411bcaee1d0436ed75557a3f (patch) | |
tree | 9402c309914af285e94e9fc7de5775bfe5b70283 | |
parent | ee63e910d4e4b6ad961613b9aa01d1fbbb0f0ba0 (diff) | |
download | ydb-bff841e7ec8c7d10411bcaee1d0436ed75557a3f.tar.gz |
separate scatter per resource type
-rw-r--r-- | ydb/core/mind/hive/balancer.cpp | 82 | ||||
-rw-r--r-- | ydb/core/mind/hive/balancer.h | 4 | ||||
-rw-r--r-- | ydb/core/mind/hive/hive.cpp | 35 | ||||
-rw-r--r-- | ydb/core/mind/hive/hive.h | 19 | ||||
-rw-r--r-- | ydb/core/mind/hive/hive_impl.cpp | 65 | ||||
-rw-r--r-- | ydb/core/mind/hive/hive_impl.h | 51 | ||||
-rw-r--r-- | ydb/core/mind/hive/hive_impl_ut.cpp | 22 | ||||
-rw-r--r-- | ydb/core/mind/hive/hive_ut.cpp | 81 | ||||
-rw-r--r-- | ydb/core/mind/hive/monitoring.cpp | 14 | ||||
-rw-r--r-- | ydb/core/mind/hive/node_info.cpp | 11 | ||||
-rw-r--r-- | ydb/core/mind/hive/node_info.h | 4 | ||||
-rw-r--r-- | ydb/core/mind/hive/tablet_info.h | 23 | ||||
-rw-r--r-- | ydb/core/protos/config.proto | 6 | ||||
-rw-r--r-- | ydb/core/util/tuples.h | 42 |
14 files changed, 356 insertions, 103 deletions
diff --git a/ydb/core/mind/hive/balancer.cpp b/ydb/core/mind/hive/balancer.cpp index 27261627b15..f30953c8bfb 100644 --- a/ydb/core/mind/hive/balancer.cpp +++ b/ydb/core/mind/hive/balancer.cpp @@ -9,13 +9,13 @@ namespace NKikimr { namespace NHive { template<> -void BalanceNodes<NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_OLD_WEIGHTED_RANDOM>(std::vector<TNodeInfo*>& nodes) { +void BalanceNodes<NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_OLD_WEIGHTED_RANDOM>(std::vector<TNodeInfo*>& nodes, EResourceToBalance resourceToBalance) { auto& randGen = *TAppData::RandomProvider.Get(); // weighted random shuffle std::vector<double> usages; usages.reserve(nodes.size()); for (auto it = nodes.begin(); it != nodes.end(); ++it) { - usages.emplace_back((*it)->GetNodeUsage()); + usages.emplace_back((*it)->GetNodeUsage(resourceToBalance)); } auto itN = nodes.begin(); auto itU = usages.begin(); @@ -31,12 +31,12 @@ void BalanceNodes<NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_OLD_WEI } template<> -void BalanceNodes<NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_WEIGHTED_RANDOM>(std::vector<TNodeInfo*>& nodes) { +void BalanceNodes<NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_WEIGHTED_RANDOM>(std::vector<TNodeInfo*>& nodes, EResourceToBalance resourceToBalance) { auto& randGen = *TAppData::RandomProvider.Get(); std::vector<std::pair<double, TNodeInfo*>> weights; weights.reserve(nodes.size()); for (TNodeInfo* node : nodes) { - double weight = node->GetNodeUsage(); + double weight = node->GetNodeUsage(resourceToBalance); weights.emplace_back(weight * randGen(), node); } std::sort(weights.begin(), weights.end(), [](const auto& a, const auto& b) -> bool { @@ -48,26 +48,26 @@ void BalanceNodes<NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_WEIGHTE } template<> -void BalanceNodes<NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_HEAVIEST>(std::vector<TNodeInfo*>& nodes) { - std::sort(nodes.begin(), nodes.end(), [](const TNodeInfo* a, const TNodeInfo* b) -> bool { - return a->GetNodeUsage() > b->GetNodeUsage(); +void BalanceNodes<NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_HEAVIEST>(std::vector<TNodeInfo*>& nodes, EResourceToBalance resourceToBalance) { + std::sort(nodes.begin(), nodes.end(), [resourceToBalance](const TNodeInfo* a, const TNodeInfo* b) -> bool { + return a->GetNodeUsage(resourceToBalance) > b->GetNodeUsage(resourceToBalance); }); } template<> -void BalanceNodes<NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_RANDOM>(std::vector<TNodeInfo*>& nodes) { +void BalanceNodes<NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_RANDOM>(std::vector<TNodeInfo*>& nodes, EResourceToBalance) { auto& randGen = *TAppData::RandomProvider.Get(); std::shuffle(nodes.begin(), nodes.end(), randGen); } template<> -void BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_OLD_WEIGHTED_RANDOM>(std::vector<TTabletInfo*>& tablets) { +void BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_OLD_WEIGHTED_RANDOM>(std::vector<TTabletInfo*>& tablets, EResourceToBalance resourceToBalance) { auto& randGen = *TAppData::RandomProvider.Get(); // weighted random shuffle std::vector<double> weights; weights.reserve(tablets.size()); for (auto it = tablets.begin(); it != tablets.end(); ++it) { - weights.emplace_back((*it)->Weight); + weights.emplace_back((*it)->GetWeight(resourceToBalance)); } auto itT = tablets.begin(); auto itW = weights.begin(); @@ -83,25 +83,25 @@ void BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_OLD } template<> -void BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_HEAVIEST>(std::vector<TTabletInfo*>& tablets) { - std::sort(tablets.begin(), tablets.end(), [](const TTabletInfo* a, const TTabletInfo* b) -> bool { - return a->Weight > b->Weight; +void BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_HEAVIEST>(std::vector<TTabletInfo*>& tablets, EResourceToBalance resourceToBalance) { + std::sort(tablets.begin(), tablets.end(), [resourceToBalance](const TTabletInfo* a, const TTabletInfo* b) -> bool { + return a->GetWeight(resourceToBalance) > b->GetWeight(resourceToBalance); }); } template<> -void BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_RANDOM>(std::vector<TTabletInfo*>& tablets) { +void BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_RANDOM>(std::vector<TTabletInfo*>& tablets, EResourceToBalance) { auto& randGen = *TAppData::RandomProvider.Get(); std::shuffle(tablets.begin(), tablets.end(), randGen); } template<> -void BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_WEIGHTED_RANDOM>(std::vector<TTabletInfo*>& tablets) { +void BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_WEIGHTED_RANDOM>(std::vector<TTabletInfo*>& tablets, EResourceToBalance resourceToBalance) { auto& randGen = *TAppData::RandomProvider.Get(); std::vector<std::pair<double, TTabletInfo*>> weights; weights.reserve(tablets.size()); for (TTabletInfo* tablet : tablets) { - double weight = tablet->Weight; + double weight = tablet->GetWeight(resourceToBalance); weights.emplace_back(weight * randGen(), tablet); } std::sort(weights.begin(), weights.end(), [](const auto& a, const auto& b) -> bool { @@ -118,11 +118,8 @@ protected: THive* Hive; using TTabletId = TFullTabletId; ui64 KickInFlight; - ui64 MaxKickInFlight; int Movements; - int MaxMovements; - bool RecheckOnFinish; - std::vector<TNodeId> FilterNodeIds; + TBalancerSettings Settings; TString GetLogPrefix() const { return Hive->GetLogPrefix(); @@ -135,7 +132,7 @@ protected: if (Movements == 0) { Hive->TabletCounters->Cumulative()[NHive::COUNTER_BALANCER_FAILED].Increment(1); // we failed to balance specific nodes - for (TNodeId nodeId : FilterNodeIds) { + for (TNodeId nodeId : Settings.FilterNodeIds) { TNodeInfo* node = Hive->FindNode(nodeId); if (node != nullptr && node->IsOverloaded()) { BLOG_D("Balancer suggests scale-up"); @@ -144,7 +141,7 @@ protected: } } } - if (RecheckOnFinish && MaxMovements != 0 && Movements >= MaxMovements) { + if (Settings.RecheckOnFinish && Settings.MaxMovements != 0 && Movements >= Settings.MaxMovements) { BLOG_D("Balancer initiated recheck"); Hive->ProcessTabletBalancer(); } else { @@ -158,12 +155,12 @@ protected: } bool CanKickNextTablet() const { - return KickInFlight < MaxKickInFlight; + return KickInFlight < Settings.MaxInFlight; } void UpdateProgress() { - if (MaxMovements != 0) { - Hive->BalancerProgress = Movements * 100 / MaxMovements; + if (Settings.MaxMovements != 0) { + Hive->BalancerProgress = Movements * 100 / Settings.MaxMovements; } else { if (Hive->TabletsTotal != 0) { Hive->BalancerProgress = Movements * 100 / Hive->TabletsTotal; @@ -177,7 +174,7 @@ protected: if (!CanKickNextTablet()) { return; } - if (MaxMovements != 0 && Movements >= MaxMovements) { + if (Settings.MaxMovements != 0 && Movements >= Settings.MaxMovements) { if (KickInFlight > 0) { return; } else { @@ -197,9 +194,9 @@ protected: TInstant now = TActivationContext::Now(); std::vector<TNodeInfo*> nodes; - if (!FilterNodeIds.empty()) { - nodes.reserve(FilterNodeIds.size()); - for (TNodeId nodeId : FilterNodeIds) { + if (!Settings.FilterNodeIds.empty()) { + nodes.reserve(Settings.FilterNodeIds.size()); + for (TNodeId nodeId : Settings.FilterNodeIds) { TNodeInfo* node = Hive->FindNode(nodeId); if (node != nullptr && node->IsAlive()) { nodes.emplace_back(node); @@ -216,16 +213,16 @@ protected: switch (Hive->GetNodeBalanceStrategy()) { case NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_OLD_WEIGHTED_RANDOM: - BalanceNodes<NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_OLD_WEIGHTED_RANDOM>(nodes); + BalanceNodes<NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_OLD_WEIGHTED_RANDOM>(nodes, Settings.ResourceToBalance); break; case NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_WEIGHTED_RANDOM: - BalanceNodes<NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_WEIGHTED_RANDOM>(nodes); + BalanceNodes<NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_WEIGHTED_RANDOM>(nodes, Settings.ResourceToBalance); break; case NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_HEAVIEST: - BalanceNodes<NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_HEAVIEST>(nodes); + BalanceNodes<NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_HEAVIEST>(nodes, Settings.ResourceToBalance); break; case NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_RANDOM: - BalanceNodes<NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_RANDOM>(nodes); + BalanceNodes<NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_RANDOM>(nodes, Settings.ResourceToBalance); break; } for (const TNodeInfo* node : nodes) { @@ -246,16 +243,16 @@ protected: if (!tablets.empty()) { switch (Hive->GetTabletBalanceStrategy()) { case NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_OLD_WEIGHTED_RANDOM: - BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_OLD_WEIGHTED_RANDOM>(tablets); + BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_OLD_WEIGHTED_RANDOM>(tablets, Settings.ResourceToBalance); break; case NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_WEIGHTED_RANDOM: - BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_WEIGHTED_RANDOM>(tablets); + BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_WEIGHTED_RANDOM>(tablets, Settings.ResourceToBalance); break; case NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_HEAVIEST: - BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_HEAVIEST>(tablets); + BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_HEAVIEST>(tablets, Settings.ResourceToBalance); break; case NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_RANDOM: - BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_RANDOM>(tablets); + BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_RANDOM>(tablets, Settings.ResourceToBalance); break; } for (TTabletInfo* tablet : tablets) { @@ -301,14 +298,11 @@ public: return NKikimrServices::TActivity::HIVE_BALANCER_ACTOR; } - THiveBalancer(THive* hive, int maxMovements = 0, bool recheckOnFinish = false, ui64 maxInFlight = 1, const std::vector<TNodeId>& filterNodeIds = {}) + THiveBalancer(THive* hive, TBalancerSettings settings) : Hive(hive) , KickInFlight(0) - , MaxKickInFlight(maxInFlight) , Movements(0) - , MaxMovements(maxMovements) - , RecheckOnFinish(recheckOnFinish) - , FilterNodeIds(filterNodeIds) + , Settings(std::move(settings)) {} void Bootstrap() { @@ -327,9 +321,9 @@ public: } }; -void THive::StartHiveBalancer(int maxMovements, bool recheckOnFinish, ui64 maxInFlight, const std::vector<TNodeId>& filterNodeIds) { +void THive::StartHiveBalancer(TBalancerSettings settings) { if (BalancerProgress == -1) { - auto* balancer = new THiveBalancer(this, maxMovements, recheckOnFinish, maxInFlight, filterNodeIds); + auto* balancer = new THiveBalancer(this, std::move(settings)); SubActors.emplace_back(balancer); BalancerProgress = -2; RegisterWithSameMailbox(balancer); diff --git a/ydb/core/mind/hive/balancer.h b/ydb/core/mind/hive/balancer.h index cf3d538c829..267d827fa1c 100644 --- a/ydb/core/mind/hive/balancer.h +++ b/ydb/core/mind/hive/balancer.h @@ -6,10 +6,10 @@ namespace NKikimr { namespace NHive { template<NKikimrConfig::THiveConfig::EHiveNodeBalanceStrategy EHiveNodeBalanceStrategy> -void BalanceNodes(std::vector<TNodeInfo*>& nodes); +void BalanceNodes(std::vector<TNodeInfo*>& nodes, EResourceToBalance resourceTobalance); template<NKikimrConfig::THiveConfig::EHiveTabletBalanceStrategy EHiveTabletBalanceStrategy> -void BalanceTablets(std::vector<TTabletInfo*>& tablets); +void BalanceTablets(std::vector<TTabletInfo*>& tablets, EResourceToBalance resourceToBalance); } } diff --git a/ydb/core/mind/hive/hive.cpp b/ydb/core/mind/hive/hive.cpp index 437bbd477b7..9c13ae51dc6 100644 --- a/ydb/core/mind/hive/hive.cpp +++ b/ydb/core/mind/hive/hive.cpp @@ -1,5 +1,7 @@ #include "hive.h" +#include <ydb/core/util/tuples.h> + namespace NKikimr { namespace NHive { @@ -35,29 +37,25 @@ TString EBalancerTypeName(EBalancerType value) { } } -TResourceNormalizedValues NormalizeRawValues(const TResourceRawValues& values, const TResourceRawValues& maximum) { - TResourceNormalizedValues normValues = {}; - if (std::get<NMetrics::EResource::Counter>(maximum) != 0) { - std::get<NMetrics::EResource::Counter>(normValues) = - static_cast<double>(std::get<NMetrics::EResource::Counter>(values)) / std::get<NMetrics::EResource::Counter>(maximum); - } - if (std::get<NMetrics::EResource::CPU>(maximum) != 0) { - std::get<NMetrics::EResource::CPU>(normValues) = - static_cast<double>(std::get<NMetrics::EResource::CPU>(values)) / std::get<NMetrics::EResource::CPU>(maximum); +EResourceToBalance ToResourceToBalance(NMetrics::EResource resource) { + switch (resource) { + case NMetrics::EResource::CPU: return EResourceToBalance::CPU; + case NMetrics::EResource::Memory: return EResourceToBalance::Memory; + case NMetrics::EResource::Network: return EResourceToBalance::Network; + case NMetrics::EResource::Counter: return EResourceToBalance::Counter; } - if (std::get<NMetrics::EResource::Memory>(maximum) != 0) { - std::get<NMetrics::EResource::Memory>(normValues) = - static_cast<double>(std::get<NMetrics::EResource::Memory>(values)) / std::get<NMetrics::EResource::Memory>(maximum); - } - if (std::get<NMetrics::EResource::Network>(maximum) != 0) { - std::get<NMetrics::EResource::Network>(normValues) = - static_cast<double>(std::get<NMetrics::EResource::Network>(values)) / std::get<NMetrics::EResource::Network>(maximum); - } - return normValues; +} + +TResourceNormalizedValues NormalizeRawValues(const TResourceRawValues& values, const TResourceRawValues& maximum) { + return safe_div(values, maximum); } NMetrics::EResource GetDominantResourceType(const TResourceRawValues& values, const TResourceRawValues& maximum) { TResourceNormalizedValues normValues = NormalizeRawValues(values, maximum); + return GetDominantResourceType(normValues); +} + +NMetrics::EResource GetDominantResourceType(const TResourceNormalizedValues& normValues) { NMetrics::EResource dominant = NMetrics::EResource::Counter; auto value = std::get<NMetrics::EResource::Counter>(normValues); if (std::get<NMetrics::EResource::CPU>(normValues) > value) { @@ -74,6 +72,5 @@ NMetrics::EResource GetDominantResourceType(const TResourceRawValues& values, co } return dominant; } - } } diff --git a/ydb/core/mind/hive/hive.h b/ydb/core/mind/hive/hive.h index 2031119817c..c2759df5663 100644 --- a/ydb/core/mind/hive/hive.h +++ b/ydb/core/mind/hive/hive.h @@ -82,6 +82,16 @@ enum class EBalancerType { TString EBalancerTypeName(EBalancerType value); +enum class EResourceToBalance { + Dominant, + CPU, + Memory, + Network, + Counter, +}; + +EResourceToBalance ToResourceToBalance(NMetrics::EResource resource); + struct ISubActor { virtual void Cleanup() = 0; }; @@ -165,6 +175,7 @@ struct TSideEffects : TCompleteNotifications, TCompleteActions { TResourceNormalizedValues NormalizeRawValues(const TResourceRawValues& values, const TResourceRawValues& maximum); NMetrics::EResource GetDominantResourceType(const TResourceRawValues& values, const TResourceRawValues& maximum); +NMetrics::EResource GetDominantResourceType(const TResourceNormalizedValues& normValues); template <typename... ResourceTypes> inline std::tuple<ResourceTypes...> GetStDev(const TVector<std::tuple<ResourceTypes...>>& values) { @@ -229,6 +240,14 @@ struct TDrainSettings { ui32 DrainInFlight = 0; }; +struct TBalancerSettings { + int MaxMovements = 0; + bool RecheckOnFinish = false; + ui64 MaxInFlight = 1; + const std::vector<TNodeId> FilterNodeIds = {}; + EResourceToBalance ResourceToBalance = EResourceToBalance::Dominant; +}; + } // NHive } // NKikimr diff --git a/ydb/core/mind/hive/hive_impl.cpp b/ydb/core/mind/hive/hive_impl.cpp index 85ec0d84bfb..a38273cd4e1 100644 --- a/ydb/core/mind/hive/hive_impl.cpp +++ b/ydb/core/mind/hive/hive_impl.cpp @@ -2089,7 +2089,8 @@ THive::THiveStats THive::GetStats() const { stats.Values.reserve(Nodes.size()); for (const auto& ni : Nodes) { if (ni.second.IsAlive() && !ni.second.Down) { - stats.Values.push_back({ni.first, ni.second.GetNodeUsage()}); + auto nodeValues = NormalizeRawValues(ni.second.ResourceValues, ni.second.GetResourceMaximumValues()); + stats.Values.emplace_back(ni.first, ni.second.GetNodeUsage(nodeValues), nodeValues); } } if (stats.Values.empty()) { @@ -2102,14 +2103,21 @@ THive::THiveStats THive::GetStats() const { stats.MaxUsageNodeId = it.second->NodeId; stats.MinUsage = it.first->Usage; stats.MinUsageNodeId = it.first->NodeId; - if (stats.MaxUsage > 0) { - double minUsageToBalance = GetMinNodeUsageToBalance(); - double minUsage = std::max(stats.MinUsage, minUsageToBalance); - double maxUsage = std::max(stats.MaxUsage, minUsageToBalance); - stats.Scatter = (maxUsage - minUsage) / maxUsage; - } else { - stats.Scatter = 0; + + TResourceNormalizedValues minValues = stats.Values.front().ResourceNormValues; + TResourceNormalizedValues maxValues = stats.Values.front().ResourceNormValues; + for (size_t i = 1; i < stats.Values.size(); ++i) { + minValues = piecewise_min(minValues, stats.Values[i].ResourceNormValues); + maxValues = piecewise_max(maxValues, stats.Values[i].ResourceNormValues); } + + + auto minValuesToBalance = GetMinNodeUsageToBalance(); + maxValues = piecewise_max(maxValues, minValuesToBalance); + minValues = piecewise_max(minValues, minValuesToBalance); + stats.ScatterByResource = safe_div(maxValues - minValues, maxValues); + stats.Scatter = max(stats.ScatterByResource); + return stats; } @@ -2123,6 +2131,24 @@ double THive::GetUsage() const { return stats.MaxUsage; } +std::optional<EResourceToBalance> THive::CheckScatter(const TResourceNormalizedValues& scatterByResource) const { + auto minScatterToBalance = GetMinScatterToBalance(); + auto cmp = piecewise_compare(scatterByResource, minScatterToBalance); + if (std::get<NMetrics::EResource::Counter>(cmp) == std::partial_ordering::greater) { + return EResourceToBalance::Counter; + } + if (std::get<NMetrics::EResource::CPU>(cmp) == std::partial_ordering::greater) { + return EResourceToBalance::CPU; + } + if (std::get<NMetrics::EResource::Memory>(cmp) == std::partial_ordering::greater) { + return EResourceToBalance::Memory; + } + if (std::get<NMetrics::EResource::Network>(cmp) == std::partial_ordering::greater) { + return EResourceToBalance::Network; + } + return std::nullopt; +} + void THive::Handle(TEvPrivate::TEvProcessTabletBalancer::TPtr&) { ProcessTabletBalancerScheduled = false; if (!SubActors.empty()) { @@ -2158,20 +2184,33 @@ void THive::Handle(TEvPrivate::TEvProcessTabletBalancer::TPtr&) { if (!overloadedNodes.empty()) { BLOG_D("Nodes " << overloadedNodes << " with usage over limit " << GetMaxNodeUsageToKick() << " - starting balancer"); LastBalancerTrigger = EBalancerType::Emergency; - StartHiveBalancer(CurrentConfig.GetMaxMovementsOnEmergencyBalancer(), CurrentConfig.GetContinueEmergencyBalancer(), GetEmergencyBalancerInflight(), overloadedNodes); + TBalancerSettings emergencySettings{ + .MaxMovements = (int)CurrentConfig.GetMaxMovementsOnEmergencyBalancer(), + .RecheckOnFinish = CurrentConfig.GetContinueEmergencyBalancer(), + .MaxInFlight = GetEmergencyBalancerInflight(), + .FilterNodeIds = overloadedNodes, + }; + StartHiveBalancer(std::move(emergencySettings)); return; } } - if (stats.MaxUsage < GetMinNodeUsageToBalance()) { + if (stats.MaxUsage < CurrentConfig.GetMinNodeUsageToBalance()) { TabletCounters->Cumulative()[NHive::COUNTER_SUGGESTED_SCALE_DOWN].Increment(1); } - if (stats.Scatter >= GetMinScatterToBalance()) { - BLOG_TRACE("Scatter " << stats.Scatter << " over limit " + auto scatteredResource = CheckScatter(stats.ScatterByResource); + if (scatteredResource) { + BLOG_TRACE("Scatter " << stats.ScatterByResource << " over limit " << GetMinScatterToBalance() << " - starting balancer"); LastBalancerTrigger = EBalancerType::Scatter; - StartHiveBalancer(CurrentConfig.GetMaxMovementsOnAutoBalancer(), CurrentConfig.GetContinueAutoBalancer(), GetBalancerInflight()); + TBalancerSettings scatterSettings{ + .MaxMovements = (int)CurrentConfig.GetMaxMovementsOnAutoBalancer(), + .RecheckOnFinish = CurrentConfig.GetContinueAutoBalancer(), + .MaxInFlight = GetBalancerInflight(), + .ResourceToBalance = *scatteredResource, + }; + StartHiveBalancer(std::move(scatterSettings)); } } diff --git a/ydb/core/mind/hive/hive_impl.h b/ydb/core/mind/hive/hive_impl.h index 63ab0344eb7..23759504761 100644 --- a/ydb/core/mind/hive/hive_impl.h +++ b/ydb/core/mind/hive/hive_impl.h @@ -228,7 +228,7 @@ protected: friend struct TStoragePoolInfo; - void StartHiveBalancer(int maxMovements = 0, bool recheckOnFinish = false, ui64 maxInFlight = 1, const std::vector<TNodeId>& filterNodeIds = {}); + void StartHiveBalancer(TBalancerSettings settings); void StartHiveDrain(TNodeId nodeId, TDrainSettings settings); void StartHiveFill(TNodeId nodeId, const TActorId& initiator); void CreateEvMonitoring(NMon::TEvRemoteHttpInfo::TPtr& ev, const TActorContext& ctx); @@ -714,16 +714,46 @@ public: return CurrentConfig.GetMaxBootBatchSize(); } - double GetMinScatterToBalance() const { - return CurrentConfig.GetMinScatterToBalance(); + TResourceNormalizedValues GetMinScatterToBalance() const { + TResourceNormalizedValues minScatter; + std::get<NMetrics::EResource::CPU>(minScatter) = CurrentConfig.GetMinCPUScatterToBalance(); + std::get<NMetrics::EResource::Memory>(minScatter) = CurrentConfig.GetMinMemoryScatterToBalance(); + std::get<NMetrics::EResource::Network>(minScatter) = CurrentConfig.GetMinNetworkScatterToBalance(); + std::get<NMetrics::EResource::Counter>(minScatter) = CurrentConfig.GetMinCounterScatterToBalance(); + + if (CurrentConfig.HasMinScatterToBalance()) { + if (!CurrentConfig.HasMinCPUScatterToBalance()) { + std::get<NMetrics::EResource::CPU>(minScatter) = CurrentConfig.GetMinScatterToBalance(); + } + if (!CurrentConfig.HasMinNetworkScatterToBalance()) { + std::get<NMetrics::EResource::Network>(minScatter) = CurrentConfig.GetMinScatterToBalance(); + } + if (!CurrentConfig.HasMinMemoryScatterToBalance()) { + std::get<NMetrics::EResource::Memory>(minScatter) = CurrentConfig.GetMinScatterToBalance(); + } + } + + return minScatter; } double GetMaxNodeUsageToKick() const { return CurrentConfig.GetMaxNodeUsageToKick(); } - double GetMinNodeUsageToBalance() const { - return CurrentConfig.GetMinNodeUsageToBalance(); + TResourceNormalizedValues GetMinNodeUsageToBalance() const { + // MinNodeUsageToBalance is needed so that small fluctuations in metrics do not cause scatter + // when cluster load is low. Counter does not fluctuate, so it does not need it. + // However, we still do not want a difference of 1 in Counter to be able to cause scatter. + double minUsageToBalance = CurrentConfig.GetMinNodeUsageToBalance(); + TResourceNormalizedValues minValuesToBalance; + std::get<NMetrics::EResource::CPU>(minValuesToBalance) = minUsageToBalance; + std::get<NMetrics::EResource::Memory>(minValuesToBalance) = minUsageToBalance; + std::get<NMetrics::EResource::Network>(minValuesToBalance) = minUsageToBalance; + auto counterScatterThreshold = std::get<NMetrics::EResource::Counter>(GetMinScatterToBalance()); + if (counterScatterThreshold != 0 && CurrentConfig.GetMaxResourceCounter() != 0) { + std::get<NMetrics::EResource::Counter>(minValuesToBalance) = 1.0 / (counterScatterThreshold * CurrentConfig.GetMaxResourceCounter()); + } + return minValuesToBalance; } ui64 GetMaxTabletsScheduled() const { @@ -852,11 +882,21 @@ protected: TResourceRawValues GetDefaultResourceInitialMaximumValues(); double GetScatter() const; double GetUsage() const; + // If the scatter is considered okay, returns nullopt. Otherwise, returns the resource that should be better balanced. + std::optional<EResourceToBalance> CheckScatter(const TResourceNormalizedValues& scatterByResource) const; struct THiveStats { struct TNodeStat { TNodeId NodeId; double Usage; + TResourceNormalizedValues ResourceNormValues; + + TNodeStat(TNodeId node, double usage, TResourceNormalizedValues values) + : NodeId(node) + , Usage(usage) + , ResourceNormValues(values) + { + } }; double MinUsage; @@ -864,6 +904,7 @@ protected: double MaxUsage; TNodeId MaxUsageNodeId; double Scatter; + TResourceNormalizedValues ScatterByResource; std::vector<TNodeStat> Values; }; diff --git a/ydb/core/mind/hive/hive_impl_ut.cpp b/ydb/core/mind/hive/hive_impl_ut.cpp index 2784c6b5a3d..55027d00603 100644 --- a/ydb/core/mind/hive/hive_impl_ut.cpp +++ b/ydb/core/mind/hive/hive_impl_ut.cpp @@ -109,7 +109,8 @@ Y_UNIT_TEST_SUITE(THiveImplTest) { auto CheckSpeedAndDistribution = []( std::unordered_map<ui64, TLeaderTabletInfo>& allTablets, - std::function<void(std::vector<TTabletInfo*>&)> func) -> void { + std::function<void(std::vector<TTabletInfo*>&, EResourceToBalance)> func, + EResourceToBalance resource) -> void { std::vector<TTabletInfo*> tablets; for (auto& [id, tab] : allTablets) { @@ -118,7 +119,7 @@ Y_UNIT_TEST_SUITE(THiveImplTest) { TProfileTimer timer; - func(tablets); + func(tablets, resource); double passed = timer.Get().SecondsFloat(); @@ -134,11 +135,12 @@ Y_UNIT_TEST_SUITE(THiveImplTest) { size_t revs = 0; double prev = 0; for (size_t n = 0; n < tablets.size(); ++n) { - buckets[n / (NUM_TABLETS / NUM_BUCKETS)] += tablets[n]->Weight; - if (n != 0 && tablets[n]->Weight >= prev) { + double weight = tablets[n]->GetWeight(resource); + buckets[n / (NUM_TABLETS / NUM_BUCKETS)] += weight; + if (n != 0 && weight >= prev) { ++revs; } - prev = tablets[n]->Weight; + prev = weight; } Ctest << "Indirection=" << revs * 100 / NUM_TABLETS << "%" << Endl; @@ -176,19 +178,21 @@ Y_UNIT_TEST_SUITE(THiveImplTest) { for (ui64 i = 0; i < NUM_TABLETS; ++i) { TLeaderTabletInfo& tablet = allTablets.emplace(std::piecewise_construct, std::tuple<TTabletId>(i), std::tuple<TTabletId, THive&>(i, hive)).first->second; - tablet.Weight = RandomNumber<double>(); + NKikimrTabletBase::TMetrics metrics; + metrics.SetMemory(RandomNumber<double>()); + tablet.UpdateResourceUsage(metrics); } Ctest << "HIVE_TABLET_BALANCE_STRATEGY_HEAVIEST" << Endl; - CheckSpeedAndDistribution(allTablets, BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_HEAVIEST>); + CheckSpeedAndDistribution(allTablets, BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_HEAVIEST>, EResourceToBalance::Memory); //Ctest << "HIVE_TABLET_BALANCE_STRATEGY_OLD_WEIGHTED_RANDOM" << Endl; //CheckSpeedAndDistribution(allTablets, BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_OLD_WEIGHTED_RANDOM>); Ctest << "HIVE_TABLET_BALANCE_STRATEGY_WEIGHTED_RANDOM" << Endl; - CheckSpeedAndDistribution(allTablets, BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_WEIGHTED_RANDOM>); + CheckSpeedAndDistribution(allTablets, BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_WEIGHTED_RANDOM>, EResourceToBalance::Memory); Ctest << "HIVE_TABLET_BALANCE_STRATEGY_RANDOM" << Endl; - CheckSpeedAndDistribution(allTablets, BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_RANDOM>); + CheckSpeedAndDistribution(allTablets, BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_RANDOM>, EResourceToBalance::Memory); } } diff --git a/ydb/core/mind/hive/hive_ut.cpp b/ydb/core/mind/hive/hive_ut.cpp index b8f2bc5fb9d..f4ad5e4a636 100644 --- a/ydb/core/mind/hive/hive_ut.cpp +++ b/ydb/core/mind/hive/hive_ut.cpp @@ -4226,6 +4226,87 @@ Y_UNIT_TEST_SUITE(THiveTest) { UNIT_ASSERT_VALUES_UNEQUAL(wasTabletUpdated(newDistribution[0][0]), wasTabletUpdated(newDistribution[0][1])); } + Y_UNIT_TEST(TestHiveBalancerDifferentResources) { + static constexpr ui64 TABLETS_PER_NODE = 4; + TTestBasicRuntime runtime(2, false); + Setup(runtime, true, 1, [](TAppPrepare& app) { + app.HiveConfig.SetTabletKickCooldownPeriod(0); + app.HiveConfig.SetResourceChangeReactionPeriod(0); + }); + const int nodeBase = runtime.GetNodeId(0); + TActorId senderA = runtime.AllocateEdgeActor(); + const ui64 hiveTablet = MakeDefaultHiveID(0); + const ui64 testerTablet = MakeDefaultHiveID(1); + + auto getDistribution = [hiveTablet, nodeBase, senderA, &runtime]() -> std::array<std::vector<ui64>, 2> { + std::array<std::vector<ui64>, 2> nodeTablets = {}; + { + runtime.SendToPipe(hiveTablet, senderA, new TEvHive::TEvRequestHiveInfo()); + TAutoPtr<IEventHandle> handle; + TEvHive::TEvResponseHiveInfo* response = runtime.GrabEdgeEventRethrow<TEvHive::TEvResponseHiveInfo>(handle); + for (const NKikimrHive::TTabletInfo& tablet : response->Record.GetTablets()) { + UNIT_ASSERT_C(((int)tablet.GetNodeID() - nodeBase >= 0) && (tablet.GetNodeID() - nodeBase < 2), + "nodeId# " << tablet.GetNodeID() << " nodeBase# " << nodeBase); + nodeTablets[tablet.GetNodeID() - nodeBase].push_back(tablet.GetTabletID()); + } + } + // Check even distribution + UNIT_ASSERT_VALUES_EQUAL(nodeTablets[0].size(), TABLETS_PER_NODE); + UNIT_ASSERT_VALUES_EQUAL(nodeTablets[1].size(), TABLETS_PER_NODE); + return nodeTablets; + }; + + CreateTestBootstrapper(runtime, CreateTestTabletInfo(hiveTablet, TTabletTypes::Hive), &CreateDefaultHive); + + // wait for creation of nodes + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvLocal::EvStatus, 2); + runtime.DispatchEvents(options); + } + + TTabletTypes::EType tabletType = TTabletTypes::Dummy; + for (size_t i = 0; i < 2 * TABLETS_PER_NODE; ++i) { + THolder<TEvHive::TEvCreateTablet> ev(new TEvHive::TEvCreateTablet(testerTablet, 100500 + i, tabletType, BINDED_CHANNELS)); + ev->Record.SetObjectId(i); + ui64 tabletId = SendCreateTestTablet(runtime, hiveTablet, testerTablet, std::move(ev), 0, true); + MakeSureTabletIsUp(runtime, tabletId, 0); + } + + auto initialDistribution = getDistribution(); + + // report metrics: CPU for the first node, network for the second + for (size_t i = 0; i < TABLETS_PER_NODE; ++i) { + THolder<TEvHive::TEvTabletMetrics> metrics = MakeHolder<TEvHive::TEvTabletMetrics>(); + NKikimrHive::TTabletMetrics* cpu = metrics->Record.AddTabletMetrics(); + cpu->SetTabletID(initialDistribution[0][i]); + cpu->MutableResourceUsage()->SetCPU(7'000'000 / TABLETS_PER_NODE); + NKikimrHive::TTabletMetrics* network = metrics->Record.AddTabletMetrics(); + network->SetTabletID(initialDistribution[1][i]); + network->MutableResourceUsage()->SetNetwork(700'000'000 / TABLETS_PER_NODE); + + runtime.SendToPipe(hiveTablet, senderA, metrics.Release()); + } + + { + TDispatchOptions options; + options.FinalEvents.emplace_back(NHive::TEvPrivate::EvBalancerOut); + runtime.DispatchEvents(options, TDuration::Seconds(10)); + } + + // Check that balancer made some movements + auto newDistribution = getDistribution(); + ui64 movedToFirstNode = 0; + for (auto tablet : newDistribution[0]) { + if (std::find(initialDistribution[0].begin(), initialDistribution[0].end(), tablet) == initialDistribution[0].end()) { + ++movedToFirstNode; + } + } + UNIT_ASSERT_GT(movedToFirstNode, 0); + UNIT_ASSERT_LE(movedToFirstNode, TABLETS_PER_NODE / 2); + } + + Y_UNIT_TEST(TestUpdateTabletsObjectUpdatesMetrics) { TTestBasicRuntime runtime(1, false); Setup(runtime, true); diff --git a/ydb/core/mind/hive/monitoring.cpp b/ydb/core/mind/hive/monitoring.cpp index d197f5b065d..3cecccc40cd 100644 --- a/ydb/core/mind/hive/monitoring.cpp +++ b/ydb/core/mind/hive/monitoring.cpp @@ -767,6 +767,10 @@ public: UpdateConfig(db, "MaxResourceNetwork", TSchemeIds::State::MaxResourceNetwork); UpdateConfig(db, "MaxResourceCounter", TSchemeIds::State::MaxResourceCounter); UpdateConfig(db, "MinScatterToBalance", TSchemeIds::State::MinScatterToBalance); + UpdateConfig(db, "MinCPUScatterToBalance"); + UpdateConfig(db, "MinMemoryScatterToBalance"); + UpdateConfig(db, "MinNetworkScatterToBalance"); + UpdateConfig(db, "MinCounterScatterToBalance"); UpdateConfig(db, "MaxNodeUsageToKick", TSchemeIds::State::MaxNodeUsageToKick); UpdateConfig(db, "ResourceChangeReactionPeriod", TSchemeIds::State::ResourceChangeReactionPeriod); UpdateConfig(db, "TabletKickCooldownPeriod", TSchemeIds::State::TabletKickCooldownPeriod); @@ -1045,6 +1049,10 @@ public: ShowConfig(out, "MaxBootBatchSize"); ShowConfig(out, "DrainInflight"); ShowConfig(out, "MinScatterToBalance"); + ShowConfig(out, "MinCPUScatterToBalance"); + ShowConfig(out, "MinMemoryScatterToBalance"); + ShowConfig(out, "MinNetworkScatterToBalance"); + ShowConfig(out, "MinCounterScatterToBalance"); ShowConfig(out, "MinNodeUsageToBalance"); ShowConfig(out, "MaxNodeUsageToKick"); ShowConfig(out, "ResourceChangeReactionPeriod"); @@ -1360,7 +1368,7 @@ public: << convert(Self->GetStDevResourceValues(), [](double d) -> TString { return Sprintf("%.9f", d); }) << "</td></tr>"; THive::THiveStats stats = Self->GetStats(); out << "<tr><td>" << "Max usage:" << "<td id='maxUsage'>" << GetValueWithColoredGlyph(stats.MaxUsage, Self->GetMaxNodeUsageToKick()) << "</td></tr>"; - out << "<tr><td>" << "Scatter:" << "<td id='scatter'>" << GetValueWithColoredGlyph(stats.Scatter, Self->GetMinScatterToBalance()) << "</td></tr>"; + out << "<tr><td>" << "Scatter:" << "<td id='scatter'>" << convert(stats.ScatterByResource, Self->GetMinScatterToBalance(), GetValueWithColoredGlyph) << "</td></tr>"; out << "</table>"; out << "<table id='node_table' class='table simple-table2 table-hover table-condensed'>"; @@ -2064,7 +2072,7 @@ public: jsonData["WaitQueueSize"] = Self->BootQueue.WaitQueue.size(); jsonData["BalancerProgress"] = GetBalancerProgressText(Self->BalancerProgress, Self->LastBalancerTrigger); jsonData["MaxUsage"] = GetValueWithColoredGlyph(stats.MaxUsage, Self->GetMaxNodeUsageToKick()) ; - jsonData["Scatter"] = GetValueWithColoredGlyph(stats.Scatter, Self->GetMinScatterToBalance()); + jsonData["Scatter"] = TStringBuilder() << convert(stats.ScatterByResource, Self->GetMinScatterToBalance(), GetValueWithColoredGlyph); jsonData["RunningTabletsText"] = GetRunningTabletsText(runningTablets, tablets, Self->WarmUp); TVector<TNodeInfo*> nodeInfos; @@ -2347,7 +2355,7 @@ public: bool Execute(TTransactionContext&, const TActorContext&) override { Self->LastBalancerTrigger = EBalancerType::Manual; - Self->StartHiveBalancer(MaxMovements); + Self->StartHiveBalancer({.MaxMovements = MaxMovements}); return true; } diff --git a/ydb/core/mind/hive/node_info.cpp b/ydb/core/mind/hive/node_info.cpp index 0033986addd..7c4207808d1 100644 --- a/ydb/core/mind/hive/node_info.cpp +++ b/ydb/core/mind/hive/node_info.cpp @@ -378,14 +378,19 @@ double TNodeInfo::GetNodeUsageForTablet(const TTabletInfo& tablet) const { return usage; } -double TNodeInfo::GetNodeUsage() const { - double usage = TTabletInfo::GetUsage(GetResourceCurrentValues(), GetResourceMaximumValues()); - if (AveragedNodeTotalUsage.IsValueStable()) { +double TNodeInfo::GetNodeUsage(const TResourceNormalizedValues& normValues, EResourceToBalance resource) const { + double usage = TTabletInfo::ExtractResourceUsage(normValues, resource); + if (resource == EResourceToBalance::Dominant && AveragedNodeTotalUsage.IsValueStable()) { usage = std::max(usage, AveragedNodeTotalUsage.GetValue()); } return usage; } +double TNodeInfo::GetNodeUsage(EResourceToBalance resource) const { + auto normValues = NormalizeRawValues(GetResourceCurrentValues(), GetResourceMaximumValues()); + return GetNodeUsage(normValues, resource); +} + ui64 TNodeInfo::GetTabletsRunningByType(TTabletTypes::EType tabletType) const { auto itRunningByType = TabletsRunningByType.find(tabletType); if (itRunningByType != TabletsRunningByType.end()) { diff --git a/ydb/core/mind/hive/node_info.h b/ydb/core/mind/hive/node_info.h index 15d7054a68b..abe9a25deb4 100644 --- a/ydb/core/mind/hive/node_info.h +++ b/ydb/core/mind/hive/node_info.h @@ -233,7 +233,9 @@ public: } double GetNodeUsageForTablet(const TTabletInfo& tablet) const; - double GetNodeUsage() const; + double GetNodeUsage(EResourceToBalance resource = EResourceToBalance::Dominant) const; + double GetNodeUsage(const TResourceNormalizedValues& normValues, + EResourceToBalance resource = EResourceToBalance::Dominant) const; ui64 GetTabletsRunningByType(TTabletTypes::EType tabletType) const; diff --git a/ydb/core/mind/hive/tablet_info.h b/ydb/core/mind/hive/tablet_info.h index 023437edae5..be4ca7d5865 100644 --- a/ydb/core/mind/hive/tablet_info.h +++ b/ydb/core/mind/hive/tablet_info.h @@ -152,6 +152,7 @@ public: protected: NKikimrTabletBase::TMetrics ResourceValues; // current values of various metrics TTabletMetricsAggregates ResourceMetricsAggregates; + TResourceNormalizedValues ResourceNormalizedValues; public: TVector<TActorId> ActorsToNotify; // ...OnCreation persistent @@ -234,8 +235,19 @@ public: void FilterRawValues(TResourceNormalizedValues& values) const; template <typename ResourcesType> - static double GetUsage(const ResourcesType& current, const ResourcesType& maximum) { - return max(NormalizeRawValues(current, maximum)); + static double GetUsage(const ResourcesType& current, const ResourcesType& maximum, EResourceToBalance resource = EResourceToBalance::Dominant) { + auto normValues = NormalizeRawValues(current, maximum); + return ExtractResourceUsage(normValues, resource); + } + + static double ExtractResourceUsage(const TResourceNormalizedValues& normValues, EResourceToBalance resource = EResourceToBalance::Dominant) { + switch (resource) { + case EResourceToBalance::CPU: return std::get<NMetrics::EResource::CPU>(normValues); + case EResourceToBalance::Memory: return std::get<NMetrics::EResource::Memory>(normValues); + case EResourceToBalance::Network: return std::get<NMetrics::EResource::Network>(normValues); + case EResourceToBalance::Counter: return std::get<NMetrics::EResource::Counter>(normValues); + case EResourceToBalance::Dominant: return max(normValues); + } } void UpdateWeight() { @@ -244,7 +256,12 @@ public: FilterRawValues(current); FilterRawValues(maximum); - Weight = GetUsage(current, maximum); + ResourceNormalizedValues = NormalizeRawValues(current, maximum); + Weight = ExtractResourceUsage(ResourceNormalizedValues); + } + + double GetWeight(EResourceToBalance resourceToBalance) const { + return ExtractResourceUsage(ResourceNormalizedValues, resourceToBalance); } void PostponeStart(TInstant nextStart) { diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto index 3e9524c5f4e..af4ab175d43 100644 --- a/ydb/core/protos/config.proto +++ b/ydb/core/protos/config.proto @@ -1708,7 +1708,7 @@ message THiveConfig { optional uint64 MaxResourceCPU = 4 [default = 10000000]; optional uint64 MaxResourceMemory = 5 [default = 512000000000]; optional uint64 MaxResourceNetwork = 6 [default = 1000000000]; - optional double MinScatterToBalance = 7 [default = 0.5]; + optional double MinScatterToBalance = 7 [default = 0.5]; // Does not affect Counter. For other resources can be overriden with resource-specific settings bellow optional bool SpreadNeighbours = 8 [default = true]; optional uint64 MaxBootBatchSize = 9 [default = 1000]; optional uint64 DrainInflight = 10 [default = 10]; @@ -1762,6 +1762,10 @@ message THiveConfig { optional double MinPeriodBetweenEmergencyBalance = 59 [default = 0.1]; // seconds optional EHiveBootStrategy BootStrategy = 60 [default = HIVE_BOOT_STRATEGY_BALANCED]; optional uint64 TabletRestartsMaxCount = 61 [default = 2]; + optional double MinCPUScatterToBalance = 62 [default = 0.5]; + optional double MinMemoryScatterToBalance = 63 [default = 0.5]; + optional double MinNetworkScatterToBalance = 64 [default = 0.5]; + optional double MinCounterScatterToBalance = 65 [default = 0.01]; } message TDataShardConfig { diff --git a/ydb/core/util/tuples.h b/ydb/core/util/tuples.h index 0ca06891984..0992b0228d3 100644 --- a/ydb/core/util/tuples.h +++ b/ydb/core/util/tuples.h @@ -135,6 +135,19 @@ operator /(const std::tuple<A...>& a, const std::tuple<B...>& b) { return div(std::make_index_sequence<sizeof...(A)>(), a, b); } +// safe_div: same as operator/, but casts everything to double & returns 0 when denominator is 0 + +template <std::size_t... I, typename... A, typename... B> +auto safe_div(std::index_sequence<I...>, const std::tuple<A...>& a, const std::tuple<B...>& b) { + return std::make_tuple(std::get<I>(b) ? static_cast<double>(std::get<I>(a)) / static_cast<double>(std::get<I>(b)) : double(0)...); +} + +template <typename... A, typename... B> +auto safe_div(const std::tuple<A...> a, const std::tuple<B...> b) { + static_assert(sizeof...(A) == sizeof...(B), "Tuples should be the same size"); + return safe_div(std::make_index_sequence<sizeof...(A)>(), a, b); +} + ///// template <std::size_t... I, typename... A, typename V> @@ -227,6 +240,7 @@ decltype(sqrt(std::make_index_sequence<sizeof...(T)>(), std::tuple<T...>())) sqr } // convert(tuple<>, f) - converts every tuple element using f +// convert(tuple<> a, tuple<> b, f) - returns a tuple of f(a_i, b_i) template <std::size_t... I, typename...T, typename F> decltype(std::make_tuple((*(F*)(nullptr))(std::get<I>(std::tuple<T...>()))...)) convert(std::index_sequence<I...>, const std::tuple<T...>& a, F f) { @@ -238,6 +252,16 @@ decltype(convert(std::make_index_sequence<sizeof...(T)>(), std::tuple<T...>(), * return convert(std::make_index_sequence<sizeof...(T)>(), a, f); } +template<std::size_t... I, typename... T, typename... U, typename F> +auto convert(std::index_sequence<I...>, const std::tuple<T...>& a, const std::tuple<U...>& b, F f) { + return std::make_tuple(f(std::get<I>(a), std::get<I>(b))...); +} + +template <typename... T, typename... U, typename F> +auto convert(const std::tuple<T...>& a, const std::tuple<T...>& b, F f) { + return convert(std::make_index_sequence<sizeof...(T)>(), a, b, f); +} + template <typename... T> struct tuple_cast { template <std::size_t... I, typename... F> @@ -358,4 +382,22 @@ inline std::tuple<Ts...> piecewise_max(std::index_sequence<I...>, const std::tup template <typename... Ts> inline std::tuple<Ts...> piecewise_max(const std::tuple<Ts...>& a, const std::tuple<Ts...>& b) { return piecewise_max(std::make_index_sequence<sizeof...(Ts)>(), a, b); } +// piecewise_min(tuple<>, tuple<>) + +template <std::size_t... I, typename... Ts> +inline std::tuple<Ts...> piecewise_min(std::index_sequence<I...>, const std::tuple<Ts...>& a, const std::tuple<Ts...>& b) { return std::tuple<Ts...>({std::min<Ts>(std::get<I>(a), std::get<I>(b))...}); } +template <typename... Ts> +inline std::tuple<Ts...> piecewise_min(const std::tuple<Ts...>& a, const std::tuple<Ts...>& b) { return piecewise_min(std::make_index_sequence<sizeof...(Ts)>(), a, b); } + +// piecewise_compare(tuple<>, tuple<>) -> tuple<ordering> + +template <std::size_t... I, typename... Ts> +inline auto piecewise_compare(std::index_sequence<I...>, const std::tuple<Ts...>&a, const std::tuple<Ts...>& b) { + return std::make_tuple((std::get<I>(a) <=> std::get<I>(b))...); +} +template <typename... Ts> +inline auto piecewise_compare(const std::tuple<Ts...>& a, const std::tuple<Ts...>& b) { + return piecewise_compare(std::make_index_sequence<sizeof...(Ts)>(), a, b); +} + } |