aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorzalyalov <zalyalov@yandex-team.com>2023-11-27 15:33:43 +0300
committerzalyalov <zalyalov@yandex-team.com>2023-11-27 16:10:14 +0300
commit3925e80494b76ef9552f9f694a8d2fd2c2b22c19 (patch)
tree87e66f5f500f6f68703dba1497b4e96ac5b536b1
parent3107333bc85fb115374cdd4f772f631a418b6ea3 (diff)
downloadydb-3925e80494b76ef9552f9f694a8d2fd2c2b22c19.tar.gz
make balancer yield every 10 tablets KIKIMR-20196
-rw-r--r--ydb/core/mind/hive/balancer.cpp137
-rw-r--r--ydb/core/mind/hive/hive_ut.cpp78
2 files changed, 165 insertions, 50 deletions
diff --git a/ydb/core/mind/hive/balancer.cpp b/ydb/core/mind/hive/balancer.cpp
index 894cc259d8..adfb735ed8 100644
--- a/ydb/core/mind/hive/balancer.cpp
+++ b/ydb/core/mind/hive/balancer.cpp
@@ -122,6 +122,12 @@ protected:
int Movements;
TBalancerSettings Settings;
TBalancerStats& Stats;
+ std::vector<TNodeId> Nodes;
+ std::vector<TNodeId>::iterator NextNode;
+ std::vector<TFullTabletId> Tablets;
+ std::vector<TFullTabletId>::iterator NextTablet;
+
+ static constexpr ui64 MAX_TABLETS_PROCESSED = 10;
TString GetLogPrefix() const {
return Hive->GetLogPrefix();
@@ -167,29 +173,7 @@ protected:
Stats.CurrentMovements = Movements;
}
- void KickNextTablet() {
- if (!CanKickNextTablet()) {
- return;
- }
- if (Settings.MaxMovements != 0 && Movements >= Settings.MaxMovements) {
- if (KickInFlight > 0) {
- return;
- } else {
- return PassAway();
- }
- }
-
- struct TBalancerNodeInfo {
- const TNodeInfo* Node;
- double Usage;
-
- TBalancerNodeInfo(const TNodeInfo* node, double usage)
- : Node(node)
- , Usage(usage)
- {}
- };
-
- TInstant now = TActivationContext::Now();
+ void BalanceNodes() {
std::vector<TNodeInfo*> nodes;
if (!Settings.FilterNodeIds.empty()) {
nodes.reserve(Settings.FilterNodeIds.size());
@@ -222,7 +206,25 @@ protected:
BalanceNodes<NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_RANDOM>(nodes, Settings.ResourceToBalance);
break;
}
- for (const TNodeInfo* node : nodes) {
+
+ Nodes.reserve(nodes.size());
+ for (auto node : nodes) {
+ Nodes.push_back(node->Id);
+ }
+
+ NextNode = Nodes.begin();
+ Tablets.clear();
+ }
+
+ std::optional<TFullTabletId> GetNextTablet(TInstant now) {
+ for (; Tablets.empty() || NextTablet == Tablets.end(); ++NextNode) {
+ if (NextNode == Nodes.end()) {
+ return std::nullopt;
+ }
+ TNodeInfo* node = Hive->FindNode(*NextNode);
+ if (node == nullptr) {
+ continue;
+ }
BLOG_TRACE("Balancer selected node " << node->Id);
auto itTablets = node->Tablets.find(TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_RUNNING);
if (itTablets == node->Tablets.end()) {
@@ -237,7 +239,7 @@ protected:
tablets.emplace_back(tablet);
}
}
- BLOG_TRACE("Balancer on node " << node->Id << ": " << tablets.size() << "/" << nodeTablets.size() << " tablets is suitable for balancing");
+ BLOG_TRACE("Balancer on node " << node->Id << ": " << tablets.size() << "/" << nodeTablets.size() << " tablets are suitable for balancing");
if (!tablets.empty()) {
switch (Hive->GetTabletBalanceStrategy()) {
case NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_OLD_WEIGHTED_RANDOM:
@@ -253,29 +255,65 @@ protected:
BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_RANDOM>(tablets, Settings.ResourceToBalance);
break;
}
- for (TTabletInfo* tablet : tablets) {
- BLOG_TRACE("Balancer selected tablet " << tablet->ToString());
- THive::TBestNodeResult result = Hive->FindBestNode(*tablet);
- if (result.BestNode != nullptr && result.BestNode != tablet->Node) {
- if (Hive->IsTabletMoveExpedient(*tablet, *result.BestNode)) {
- tablet->MakeBalancerDecision(now);
- tablet->ActorsToNotifyOnRestart.emplace_back(SelfId()); // volatile settings, will not persist upon restart
- ++KickInFlight;
- ++Movements;
- BLOG_D("Balancer moving tablet " << tablet->ToString() << " " << tablet->GetResourceValues()
- << " from node " << tablet->Node->Id << " " << tablet->Node->ResourceValues
- << " to node " << result.BestNode->Id << " " << result.BestNode->ResourceValues);
- Hive->RecordTabletMove(THive::TTabletMoveInfo(now, *tablet, tablet->Node->Id, result.BestNode->Id));
- Hive->Execute(Hive->CreateRestartTablet(tablet->GetFullTabletId(), result.BestNode->Id));
- UpdateProgress();
- if (!CanKickNextTablet()) {
- return;
- }
- }
- }
+ Tablets.clear();
+ Tablets.reserve(tablets.size());
+ for (auto tablet : tablets) {
+ Tablets.push_back(tablet->GetFullTabletId());
}
}
+ NextTablet = Tablets.begin();
}
+ return *(NextTablet++);
+ }
+
+ void KickNextTablet() {
+ if (!CanKickNextTablet()) {
+ return;
+ }
+ if (Settings.MaxMovements != 0 && Movements >= Settings.MaxMovements) {
+ if (KickInFlight > 0) {
+ return;
+ } else {
+ return PassAway();
+ }
+ }
+
+ TInstant now = TActivationContext::Now();
+ ui64 tabletsProcessed = 0;
+
+ while (CanKickNextTablet()) {
+ if (tabletsProcessed == MAX_TABLETS_PROCESSED) {
+ BLOG_TRACE("Balancer - rescheduling");
+ Send(SelfId(), new TEvents::TEvWakeup);
+ return;
+ }
+ std::optional<TFullTabletId> tabletId = GetNextTablet(now);
+ if (!tabletId) {
+ break;
+ }
+ TTabletInfo* tablet = Hive->FindTablet(*tabletId);
+ if (tablet == nullptr || !tablet->IsRunning()) {
+ continue;
+ }
+ BLOG_TRACE("Balancer selected tablet " << tablet->ToString());
+ THive::TBestNodeResult result = Hive->FindBestNode(*tablet);
+ if (result.BestNode != nullptr && result.BestNode != tablet->Node) {
+ if (Hive->IsTabletMoveExpedient(*tablet, *result.BestNode)) {
+ tablet->MakeBalancerDecision(now);
+ tablet->ActorsToNotifyOnRestart.emplace_back(SelfId()); // volatile settings, will not persist upon restart
+ ++KickInFlight;
+ ++Movements;
+ BLOG_D("Balancer moving tablet " << tablet->ToString() << " " << tablet->GetResourceValues()
+ << " from node " << tablet->Node->Id << " " << tablet->Node->ResourceValues
+ << " to node " << result.BestNode->Id << " " << result.BestNode->ResourceValues);
+ Hive->RecordTabletMove(THive::TTabletMoveInfo(now, *tablet, tablet->Node->Id, result.BestNode->Id));
+ Hive->Execute(Hive->CreateRestartTablet(tablet->GetFullTabletId(), result.BestNode->Id));
+ UpdateProgress();
+ }
+ }
+ ++tabletsProcessed;
+ }
+
if (KickInFlight == 0) {
return PassAway();
}
@@ -284,12 +322,10 @@ protected:
void Handle(TEvPrivate::TEvRestartComplete::TPtr& ev) {
BLOG_D("Balancer " << SelfId() << " received " << ev->Get()->Status << " for tablet " << ev->Get()->TabletId);
--KickInFlight;
+ BalanceNodes();
KickNextTablet();
}
- void Timeout() {
- PassAway();
- }
public:
static constexpr NKikimrServices::TActivity::EType ActorActivityType() {
@@ -312,7 +348,8 @@ public:
void Bootstrap() {
UpdateProgress();
Hive->TabletCounters->Cumulative()[NHive::COUNTER_BALANCER_EXECUTED].Increment(1);
- Become(&THiveBalancer::StateWork, TIMEOUT, new TEvents::TEvWakeup());
+ Become(&THiveBalancer::StateWork, TIMEOUT, new TEvents::TEvPoison());
+ BalanceNodes();
KickNextTablet();
}
@@ -320,7 +357,7 @@ public:
switch (ev->GetTypeRewrite()) {
cFunc(TEvents::TSystem::PoisonPill, PassAway);
hFunc(TEvPrivate::TEvRestartComplete, Handle);
- cFunc(TEvents::TSystem::Wakeup, Timeout);
+ cFunc(TEvents::TSystem::Wakeup, KickNextTablet);
}
}
};
diff --git a/ydb/core/mind/hive/hive_ut.cpp b/ydb/core/mind/hive/hive_ut.cpp
index 9b7287a210..d248fd3833 100644
--- a/ydb/core/mind/hive/hive_ut.cpp
+++ b/ydb/core/mind/hive/hive_ut.cpp
@@ -4419,6 +4419,84 @@ Y_UNIT_TEST_SUITE(THiveTest) {
UNIT_ASSERT_LE(movedToFirstNode, TABLETS_PER_NODE / 2);
}
+ Y_UNIT_TEST(TestHiveBalancerWithImmovableTablets) {
+ static constexpr ui64 TABLETS_PER_NODE = 10;
+ TTestBasicRuntime runtime(3, false);
+ Setup(runtime, true, 1, [](TAppPrepare& app) {
+ app.HiveConfig.SetTabletKickCooldownPeriod(0);
+ app.HiveConfig.SetResourceChangeReactionPeriod(0);
+ });
+ const int nodeBase = runtime.GetNodeId(0);
+ TActorId senderA = runtime.AllocateEdgeActor();
+ const ui64 hiveTablet = MakeDefaultHiveID(0);
+ const ui64 testerTablet = MakeDefaultHiveID(1);
+
+ auto getDistribution = [hiveTablet, nodeBase, senderA, &runtime]() -> std::array<std::vector<ui64>, 3> {
+ std::array<std::vector<ui64>, 3> nodeTablets = {};
+ {
+ runtime.SendToPipe(hiveTablet, senderA, new TEvHive::TEvRequestHiveInfo());
+ TAutoPtr<IEventHandle> handle;
+ TEvHive::TEvResponseHiveInfo* response = runtime.GrabEdgeEventRethrow<TEvHive::TEvResponseHiveInfo>(handle);
+ for (const NKikimrHive::TTabletInfo& tablet : response->Record.GetTablets()) {
+ UNIT_ASSERT_C(((int)tablet.GetNodeID() - nodeBase >= 0) && (tablet.GetNodeID() - nodeBase < 3),
+ "nodeId# " << tablet.GetNodeID() << " nodeBase# " << nodeBase);
+ nodeTablets[tablet.GetNodeID() - nodeBase].push_back(tablet.GetTabletID());
+ }
+ }
+ return nodeTablets;
+ };
+
+ CreateTestBootstrapper(runtime, CreateTestTabletInfo(hiveTablet, TTabletTypes::Hive), &CreateDefaultHive);
+
+ // wait for creation of nodes
+ {
+ TDispatchOptions options;
+ options.FinalEvents.emplace_back(TEvLocal::EvStatus, 2);
+ runtime.DispatchEvents(options);
+ }
+
+ // every 3rd tablet is tied to the first node
+ TTabletTypes::EType tabletType = TTabletTypes::Dummy;
+ for (size_t i = 0; i < 3 * TABLETS_PER_NODE; ++i) {
+ THolder<TEvHive::TEvCreateTablet> ev(new TEvHive::TEvCreateTablet(testerTablet, 100500 + i, tabletType, BINDED_CHANNELS));
+ ev->Record.SetObjectId(i);
+ if (i % 3 == 0) {
+ ev->Record.AddAllowedNodeIDs(nodeBase);
+ }
+ ui64 tabletId = SendCreateTestTablet(runtime, hiveTablet, testerTablet, std::move(ev), 0, true);
+ MakeSureTabletIsUp(runtime, tabletId, 0);
+ }
+
+ // Check initial distribution
+ auto initialDistribution = getDistribution();
+ for (size_t i = 0; i < 3; ++i) {
+ UNIT_ASSERT_VALUES_EQUAL(initialDistribution[i].size(), TABLETS_PER_NODE);
+ }
+
+ // report metrics for all tablets on first node, and two tablets on second node
+ std::vector<ui64> tabletsWithMetrics = initialDistribution[0];
+ tabletsWithMetrics.push_back(initialDistribution[1][0]);
+ tabletsWithMetrics.push_back(initialDistribution[1][1]);
+ for (auto tabletId : tabletsWithMetrics) {
+ THolder<TEvHive::TEvTabletMetrics> metrics = MakeHolder<TEvHive::TEvTabletMetrics>();
+ NKikimrHive::TTabletMetrics* cpu = metrics->Record.AddTabletMetrics();
+ cpu->SetTabletID(tabletId);
+ cpu->MutableResourceUsage()->SetCPU(500'000);
+
+ runtime.SendToPipe(hiveTablet, senderA, metrics.Release());
+ }
+
+ {
+ TDispatchOptions options;
+ options.FinalEvents.emplace_back(NHive::TEvPrivate::EvRestartComplete);
+ runtime.DispatchEvents(options, TDuration::Seconds(10));
+ }
+
+ // Check that a tablet was moved from the second node to the third
+ auto newDistribution = getDistribution();
+ UNIT_ASSERT_VALUES_EQUAL(newDistribution[0].size(), TABLETS_PER_NODE);
+ UNIT_ASSERT_VALUES_EQUAL(newDistribution[1].size(), TABLETS_PER_NODE - 1);
+ }
Y_UNIT_TEST(TestUpdateTabletsObjectUpdatesMetrics) {
TTestBasicRuntime runtime(1, false);