diff options
author | zalyalov <zalyalov@yandex-team.com> | 2023-11-27 15:33:43 +0300 |
---|---|---|
committer | zalyalov <zalyalov@yandex-team.com> | 2023-11-27 16:10:14 +0300 |
commit | 3925e80494b76ef9552f9f694a8d2fd2c2b22c19 (patch) | |
tree | 87e66f5f500f6f68703dba1497b4e96ac5b536b1 | |
parent | 3107333bc85fb115374cdd4f772f631a418b6ea3 (diff) | |
download | ydb-3925e80494b76ef9552f9f694a8d2fd2c2b22c19.tar.gz |
make balancer yield every 10 tablets KIKIMR-20196
-rw-r--r-- | ydb/core/mind/hive/balancer.cpp | 137 | ||||
-rw-r--r-- | ydb/core/mind/hive/hive_ut.cpp | 78 |
2 files changed, 165 insertions, 50 deletions
diff --git a/ydb/core/mind/hive/balancer.cpp b/ydb/core/mind/hive/balancer.cpp index 894cc259d8..adfb735ed8 100644 --- a/ydb/core/mind/hive/balancer.cpp +++ b/ydb/core/mind/hive/balancer.cpp @@ -122,6 +122,12 @@ protected: int Movements; TBalancerSettings Settings; TBalancerStats& Stats; + std::vector<TNodeId> Nodes; + std::vector<TNodeId>::iterator NextNode; + std::vector<TFullTabletId> Tablets; + std::vector<TFullTabletId>::iterator NextTablet; + + static constexpr ui64 MAX_TABLETS_PROCESSED = 10; TString GetLogPrefix() const { return Hive->GetLogPrefix(); @@ -167,29 +173,7 @@ protected: Stats.CurrentMovements = Movements; } - void KickNextTablet() { - if (!CanKickNextTablet()) { - return; - } - if (Settings.MaxMovements != 0 && Movements >= Settings.MaxMovements) { - if (KickInFlight > 0) { - return; - } else { - return PassAway(); - } - } - - struct TBalancerNodeInfo { - const TNodeInfo* Node; - double Usage; - - TBalancerNodeInfo(const TNodeInfo* node, double usage) - : Node(node) - , Usage(usage) - {} - }; - - TInstant now = TActivationContext::Now(); + void BalanceNodes() { std::vector<TNodeInfo*> nodes; if (!Settings.FilterNodeIds.empty()) { nodes.reserve(Settings.FilterNodeIds.size()); @@ -222,7 +206,25 @@ protected: BalanceNodes<NKikimrConfig::THiveConfig::HIVE_NODE_BALANCE_STRATEGY_RANDOM>(nodes, Settings.ResourceToBalance); break; } - for (const TNodeInfo* node : nodes) { + + Nodes.reserve(nodes.size()); + for (auto node : nodes) { + Nodes.push_back(node->Id); + } + + NextNode = Nodes.begin(); + Tablets.clear(); + } + + std::optional<TFullTabletId> GetNextTablet(TInstant now) { + for (; Tablets.empty() || NextTablet == Tablets.end(); ++NextNode) { + if (NextNode == Nodes.end()) { + return std::nullopt; + } + TNodeInfo* node = Hive->FindNode(*NextNode); + if (node == nullptr) { + continue; + } BLOG_TRACE("Balancer selected node " << node->Id); auto itTablets = node->Tablets.find(TTabletInfo::EVolatileState::TABLET_VOLATILE_STATE_RUNNING); if (itTablets == node->Tablets.end()) { @@ -237,7 +239,7 @@ protected: tablets.emplace_back(tablet); } } - BLOG_TRACE("Balancer on node " << node->Id << ": " << tablets.size() << "/" << nodeTablets.size() << " tablets is suitable for balancing"); + BLOG_TRACE("Balancer on node " << node->Id << ": " << tablets.size() << "/" << nodeTablets.size() << " tablets are suitable for balancing"); if (!tablets.empty()) { switch (Hive->GetTabletBalanceStrategy()) { case NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_OLD_WEIGHTED_RANDOM: @@ -253,29 +255,65 @@ protected: BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_RANDOM>(tablets, Settings.ResourceToBalance); break; } - for (TTabletInfo* tablet : tablets) { - BLOG_TRACE("Balancer selected tablet " << tablet->ToString()); - THive::TBestNodeResult result = Hive->FindBestNode(*tablet); - if (result.BestNode != nullptr && result.BestNode != tablet->Node) { - if (Hive->IsTabletMoveExpedient(*tablet, *result.BestNode)) { - tablet->MakeBalancerDecision(now); - tablet->ActorsToNotifyOnRestart.emplace_back(SelfId()); // volatile settings, will not persist upon restart - ++KickInFlight; - ++Movements; - BLOG_D("Balancer moving tablet " << tablet->ToString() << " " << tablet->GetResourceValues() - << " from node " << tablet->Node->Id << " " << tablet->Node->ResourceValues - << " to node " << result.BestNode->Id << " " << result.BestNode->ResourceValues); - Hive->RecordTabletMove(THive::TTabletMoveInfo(now, *tablet, tablet->Node->Id, result.BestNode->Id)); - Hive->Execute(Hive->CreateRestartTablet(tablet->GetFullTabletId(), result.BestNode->Id)); - UpdateProgress(); - if (!CanKickNextTablet()) { - return; - } - } - } + Tablets.clear(); + Tablets.reserve(tablets.size()); + for (auto tablet : tablets) { + Tablets.push_back(tablet->GetFullTabletId()); } } + NextTablet = Tablets.begin(); } + return *(NextTablet++); + } + + void KickNextTablet() { + if (!CanKickNextTablet()) { + return; + } + if (Settings.MaxMovements != 0 && Movements >= Settings.MaxMovements) { + if (KickInFlight > 0) { + return; + } else { + return PassAway(); + } + } + + TInstant now = TActivationContext::Now(); + ui64 tabletsProcessed = 0; + + while (CanKickNextTablet()) { + if (tabletsProcessed == MAX_TABLETS_PROCESSED) { + BLOG_TRACE("Balancer - rescheduling"); + Send(SelfId(), new TEvents::TEvWakeup); + return; + } + std::optional<TFullTabletId> tabletId = GetNextTablet(now); + if (!tabletId) { + break; + } + TTabletInfo* tablet = Hive->FindTablet(*tabletId); + if (tablet == nullptr || !tablet->IsRunning()) { + continue; + } + BLOG_TRACE("Balancer selected tablet " << tablet->ToString()); + THive::TBestNodeResult result = Hive->FindBestNode(*tablet); + if (result.BestNode != nullptr && result.BestNode != tablet->Node) { + if (Hive->IsTabletMoveExpedient(*tablet, *result.BestNode)) { + tablet->MakeBalancerDecision(now); + tablet->ActorsToNotifyOnRestart.emplace_back(SelfId()); // volatile settings, will not persist upon restart + ++KickInFlight; + ++Movements; + BLOG_D("Balancer moving tablet " << tablet->ToString() << " " << tablet->GetResourceValues() + << " from node " << tablet->Node->Id << " " << tablet->Node->ResourceValues + << " to node " << result.BestNode->Id << " " << result.BestNode->ResourceValues); + Hive->RecordTabletMove(THive::TTabletMoveInfo(now, *tablet, tablet->Node->Id, result.BestNode->Id)); + Hive->Execute(Hive->CreateRestartTablet(tablet->GetFullTabletId(), result.BestNode->Id)); + UpdateProgress(); + } + } + ++tabletsProcessed; + } + if (KickInFlight == 0) { return PassAway(); } @@ -284,12 +322,10 @@ protected: void Handle(TEvPrivate::TEvRestartComplete::TPtr& ev) { BLOG_D("Balancer " << SelfId() << " received " << ev->Get()->Status << " for tablet " << ev->Get()->TabletId); --KickInFlight; + BalanceNodes(); KickNextTablet(); } - void Timeout() { - PassAway(); - } public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { @@ -312,7 +348,8 @@ public: void Bootstrap() { UpdateProgress(); Hive->TabletCounters->Cumulative()[NHive::COUNTER_BALANCER_EXECUTED].Increment(1); - Become(&THiveBalancer::StateWork, TIMEOUT, new TEvents::TEvWakeup()); + Become(&THiveBalancer::StateWork, TIMEOUT, new TEvents::TEvPoison()); + BalanceNodes(); KickNextTablet(); } @@ -320,7 +357,7 @@ public: switch (ev->GetTypeRewrite()) { cFunc(TEvents::TSystem::PoisonPill, PassAway); hFunc(TEvPrivate::TEvRestartComplete, Handle); - cFunc(TEvents::TSystem::Wakeup, Timeout); + cFunc(TEvents::TSystem::Wakeup, KickNextTablet); } } }; diff --git a/ydb/core/mind/hive/hive_ut.cpp b/ydb/core/mind/hive/hive_ut.cpp index 9b7287a210..d248fd3833 100644 --- a/ydb/core/mind/hive/hive_ut.cpp +++ b/ydb/core/mind/hive/hive_ut.cpp @@ -4419,6 +4419,84 @@ Y_UNIT_TEST_SUITE(THiveTest) { UNIT_ASSERT_LE(movedToFirstNode, TABLETS_PER_NODE / 2); } + Y_UNIT_TEST(TestHiveBalancerWithImmovableTablets) { + static constexpr ui64 TABLETS_PER_NODE = 10; + TTestBasicRuntime runtime(3, false); + Setup(runtime, true, 1, [](TAppPrepare& app) { + app.HiveConfig.SetTabletKickCooldownPeriod(0); + app.HiveConfig.SetResourceChangeReactionPeriod(0); + }); + const int nodeBase = runtime.GetNodeId(0); + TActorId senderA = runtime.AllocateEdgeActor(); + const ui64 hiveTablet = MakeDefaultHiveID(0); + const ui64 testerTablet = MakeDefaultHiveID(1); + + auto getDistribution = [hiveTablet, nodeBase, senderA, &runtime]() -> std::array<std::vector<ui64>, 3> { + std::array<std::vector<ui64>, 3> nodeTablets = {}; + { + runtime.SendToPipe(hiveTablet, senderA, new TEvHive::TEvRequestHiveInfo()); + TAutoPtr<IEventHandle> handle; + TEvHive::TEvResponseHiveInfo* response = runtime.GrabEdgeEventRethrow<TEvHive::TEvResponseHiveInfo>(handle); + for (const NKikimrHive::TTabletInfo& tablet : response->Record.GetTablets()) { + UNIT_ASSERT_C(((int)tablet.GetNodeID() - nodeBase >= 0) && (tablet.GetNodeID() - nodeBase < 3), + "nodeId# " << tablet.GetNodeID() << " nodeBase# " << nodeBase); + nodeTablets[tablet.GetNodeID() - nodeBase].push_back(tablet.GetTabletID()); + } + } + return nodeTablets; + }; + + CreateTestBootstrapper(runtime, CreateTestTabletInfo(hiveTablet, TTabletTypes::Hive), &CreateDefaultHive); + + // wait for creation of nodes + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvLocal::EvStatus, 2); + runtime.DispatchEvents(options); + } + + // every 3rd tablet is tied to the first node + TTabletTypes::EType tabletType = TTabletTypes::Dummy; + for (size_t i = 0; i < 3 * TABLETS_PER_NODE; ++i) { + THolder<TEvHive::TEvCreateTablet> ev(new TEvHive::TEvCreateTablet(testerTablet, 100500 + i, tabletType, BINDED_CHANNELS)); + ev->Record.SetObjectId(i); + if (i % 3 == 0) { + ev->Record.AddAllowedNodeIDs(nodeBase); + } + ui64 tabletId = SendCreateTestTablet(runtime, hiveTablet, testerTablet, std::move(ev), 0, true); + MakeSureTabletIsUp(runtime, tabletId, 0); + } + + // Check initial distribution + auto initialDistribution = getDistribution(); + for (size_t i = 0; i < 3; ++i) { + UNIT_ASSERT_VALUES_EQUAL(initialDistribution[i].size(), TABLETS_PER_NODE); + } + + // report metrics for all tablets on first node, and two tablets on second node + std::vector<ui64> tabletsWithMetrics = initialDistribution[0]; + tabletsWithMetrics.push_back(initialDistribution[1][0]); + tabletsWithMetrics.push_back(initialDistribution[1][1]); + for (auto tabletId : tabletsWithMetrics) { + THolder<TEvHive::TEvTabletMetrics> metrics = MakeHolder<TEvHive::TEvTabletMetrics>(); + NKikimrHive::TTabletMetrics* cpu = metrics->Record.AddTabletMetrics(); + cpu->SetTabletID(tabletId); + cpu->MutableResourceUsage()->SetCPU(500'000); + + runtime.SendToPipe(hiveTablet, senderA, metrics.Release()); + } + + { + TDispatchOptions options; + options.FinalEvents.emplace_back(NHive::TEvPrivate::EvRestartComplete); + runtime.DispatchEvents(options, TDuration::Seconds(10)); + } + + // Check that a tablet was moved from the second node to the third + auto newDistribution = getDistribution(); + UNIT_ASSERT_VALUES_EQUAL(newDistribution[0].size(), TABLETS_PER_NODE); + UNIT_ASSERT_VALUES_EQUAL(newDistribution[1].size(), TABLETS_PER_NODE - 1); + } Y_UNIT_TEST(TestUpdateTabletsObjectUpdatesMetrics) { TTestBasicRuntime runtime(1, false); |