diff options
author | xenoxeno <xeno@ydb.tech> | 2022-07-10 17:55:50 +0300 |
---|---|---|
committer | xenoxeno <xeno@ydb.tech> | 2022-07-10 17:55:50 +0300 |
commit | 9b4f67031921f90daa279b28361aad4af068637d (patch) | |
tree | 8bec2e4b47dcbeaa3306e520f640d26faa9ab685 | |
parent | d6bc1736fb9c0e5ccd815586513eb0ec8c869ee7 (diff) | |
download | ydb-9b4f67031921f90daa279b28361aad4af068637d.tar.gz |
dynamically adjust number of cross-dc followers
-rw-r--r-- | ydb/core/mind/hive/hive_impl.cpp | 89 | ||||
-rw-r--r-- | ydb/core/mind/hive/hive_impl.h | 9 | ||||
-rw-r--r-- | ydb/core/mind/hive/hive_ut.cpp | 218 | ||||
-rw-r--r-- | ydb/core/mind/hive/tx__create_tablet.cpp | 75 | ||||
-rw-r--r-- | ydb/core/mind/hive/tx__kill_node.cpp | 3 | ||||
-rw-r--r-- | ydb/core/mind/hive/tx__load_everything.cpp | 23 | ||||
-rw-r--r-- | ydb/core/mind/hive/tx__process_boot_queue.cpp | 5 | ||||
-rw-r--r-- | ydb/core/mind/hive/tx__status.cpp | 2 | ||||
-rw-r--r-- | ydb/core/mind/hive/tx__update_tablet_status.cpp | 1 |
9 files changed, 328 insertions, 97 deletions
diff --git a/ydb/core/mind/hive/hive_impl.cpp b/ydb/core/mind/hive/hive_impl.cpp index 21a4c0c982..d2fc1cddd3 100644 --- a/ydb/core/mind/hive/hive_impl.cpp +++ b/ydb/core/mind/hive/hive_impl.cpp @@ -177,7 +177,7 @@ void THive::DeleteTabletWithoutStorage(TLeaderTabletInfo* tablet, TSideEffects& sideEffects.Send(SelfId(), new TEvTabletBase::TEvDeleteTabletResult(NKikimrProto::OK, tablet->Id)); } -void THive::ExecuteProcessBootQueue(TCompleteNotifications& notifications) { +void THive::ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffects) { TInstant now = TActivationContext::Now(); BLOG_D("Handle ProcessBootQueue (size: " << BootQueue.BootQueue.size() << ")"); THPTimer bootQueueProcessingTimer; @@ -212,9 +212,13 @@ void THive::ExecuteProcessBootQueue(TCompleteNotifications& notifications) { break; } else { for (const TActorId actorToNotify : tablet->ActorsToNotifyOnRestart) { - notifications.Send(actorToNotify, new TEvPrivate::TEvRestartComplete(tablet->GetFullTabletId(), "boot delay")); + sideEffects.Send(actorToNotify, new TEvPrivate::TEvRestartComplete(tablet->GetFullTabletId(), "boot delay")); } tablet->ActorsToNotifyOnRestart.clear(); + if (tablet->IsFollower()) { + TLeaderTabletInfo& leader = tablet->GetLeader(); + UpdateTabletFollowersNumber(leader, db, sideEffects); + } BootQueue.AddToWaitQueue(record); // waiting for new node continue; } @@ -237,7 +241,7 @@ void THive::ExecuteProcessBootQueue(TCompleteNotifications& notifications) { BootQueue.AddToBootQueue(record); } if (TabletCounters != nullptr) { - TabletCounters->Simple()[NHive::COUNTER_BOOTQUEUE_SIZE].Set(BootQueue.BootQueue.size()); + UpdateCounterBootQueueSize(BootQueue.BootQueue.size()); TabletCounters->Simple()[NHive::COUNTER_WAITQUEUE_SIZE].Set(BootQueue.WaitQueue.size()); TabletCounters->Cumulative()[NHive::COUNTER_BOOTQUEUE_PROCESSED].Increment(1); TabletCounters->Cumulative()[NHive::COUNTER_BOOTQUEUE_TIME].Increment(ui64(1000000. * bootQueueProcessingTimer.PassedReset())); @@ -413,9 +417,6 @@ void THive::Handle(TEvPrivate::TEvBootTablets::TPtr&) { BLOG_D("Handle BootTablets"); RequestPoolsInformation(); for (auto& [id, node] : Nodes) { - if (node.LocationAcquired) { - UpdateRegisteredDataCenters(node.Location.GetDataCenterId()); - } if (node.IsUnknown() && node.Local) { node.Ping(); } @@ -652,9 +653,6 @@ void THive::Handle(TEvInterconnect::TEvNodeInfo::TPtr &ev) { hiveNodeInfo->Location = nodeInfo.Location; hiveNodeInfo->LocationAcquired = true; BLOG_D("TEvInterconnect::TEvNodeInfo NodeId " << nodeInfo.NodeId << " Location " << GetLocationString(hiveNodeInfo->Location)); - if (hiveNodeInfo->IsRegistered()) { - UpdateRegisteredDataCenters(hiveNodeInfo->Location.GetDataCenterId()); - } } } } @@ -2217,19 +2215,78 @@ void THive::SendReconnect(const TActorId& local) { } ui32 THive::GetDataCenters() { - return DataCenters; + return DataCenters ? DataCenters : 1; } ui32 THive::GetRegisteredDataCenters() { - return RegisteredDataCenters; + return RegisteredDataCenters ? RegisteredDataCenters : 1; } -void THive::UpdateRegisteredDataCenters(TDataCenterId dataCenterId) { +void THive::UpdateRegisteredDataCenters() { + if (RegisteredDataCenters != RegisteredDataCenterNodes.size()) { + BLOG_D("THive (UpdateRegisteredDC) DataCenters=" << DataCenters << " RegisteredDataCenters=" << RegisteredDataCenters << "->" << RegisteredDataCenterNodes.size()); + RegisteredDataCenters = RegisteredDataCenterNodes.size(); + } +} + +void THive::AddRegisteredDataCentersNode(TDataCenterId dataCenterId, TNodeId nodeId) { if (dataCenterId != 0) { // ignore default data center id if exists - RegisteredDataCenterIds.insert(dataCenterId); - if (RegisteredDataCenters != RegisteredDataCenterIds.size()) { - RegisteredDataCenters = RegisteredDataCenterIds.size(); - BLOG_D("THive (UpdateRegisteredDC) DataCenters=" << DataCenters << " RegisteredDataCenters=" << RegisteredDataCenters); + if (RegisteredDataCenterNodes[dataCenterId].insert(nodeId).second) { + if (RegisteredDataCenters != RegisteredDataCenterNodes.size()) { + UpdateRegisteredDataCenters(); + } + } + } +} + +void THive::RemoveRegisteredDataCentersNode(TDataCenterId dataCenterId, TNodeId nodeId) { + if (dataCenterId != 0) { // ignore default data center id if exists + RegisteredDataCenterNodes[dataCenterId].erase(nodeId); + if (RegisteredDataCenterNodes[dataCenterId].size() == 0) { + RegisteredDataCenterNodes.erase(dataCenterId); + } + if (RegisteredDataCenters != RegisteredDataCenterNodes.size()) { + UpdateRegisteredDataCenters(); + } + } +} + +void THive::UpdateTabletFollowersNumber(TLeaderTabletInfo& tablet, NIceDb::TNiceDb& db, TSideEffects& sideEffects) { + BLOG_D("UpdateTabletFollowersNumber Tablet " << tablet.ToString() << " RegisteredDataCenters=" << GetRegisteredDataCenters()); + for (TFollowerGroup& group : tablet.FollowerGroups) { + ui32 followerCount = tablet.GetActualFollowerCount(group.Id); + ui32 requiredFollowerCount = group.GetComputedFollowerCount(GetRegisteredDataCenters()); + + while (followerCount < requiredFollowerCount) { + BLOG_D("UpdateTabletFollowersNumber Tablet " << tablet.ToString() << " is increasing number of followers (" << followerCount << "<" << requiredFollowerCount << ")"); + + TFollowerTabletInfo& follower = tablet.AddFollower(group); + follower.Statistics.SetLastAliveTimestamp(TlsActivationContext->Now().MilliSeconds()); + db.Table<Schema::TabletFollowerTablet>().Key(tablet.Id, follower.Id).Update( + NIceDb::TUpdate<Schema::TabletFollowerTablet::GroupID>(follower.FollowerGroup.Id), + NIceDb::TUpdate<Schema::TabletFollowerTablet::FollowerNode>(0), + NIceDb::TUpdate<Schema::TabletFollowerTablet::Statistics>(follower.Statistics)); + follower.InitTabletMetrics(); + follower.BecomeStopped(); + ++followerCount; + } + + while (followerCount > requiredFollowerCount) { + BLOG_D("UpdateTabletFollowersNumber Tablet " << tablet.ToString() << " is decreasing number of followers (" << followerCount << ">" << requiredFollowerCount << ")"); + + auto itFollower = tablet.Followers.rbegin(); + while (itFollower != tablet.Followers.rend() && itFollower->FollowerGroup.Id != group.Id) { + ++itFollower; + } + if (itFollower == tablet.Followers.rend()) { + break; + } + TFollowerTabletInfo& follower = *itFollower; + db.Table<Schema::TabletFollowerTablet>().Key(tablet.Id, follower.Id).Delete(); + db.Table<Schema::Metrics>().Key(tablet.Id, follower.Id).Delete(); + follower.InitiateStop(sideEffects); + tablet.Followers.erase(std::prev(itFollower.base())); + --followerCount; } } } diff --git a/ydb/core/mind/hive/hive_impl.h b/ydb/core/mind/hive/hive_impl.h index f8b884adfb..85f6ea922d 100644 --- a/ydb/core/mind/hive/hive_impl.h +++ b/ydb/core/mind/hive/hive_impl.h @@ -401,7 +401,7 @@ protected: NKikimrConfig::THiveConfig DatabaseConfig; std::unordered_map<TTabletTypes::EType, NKikimrConfig::THiveTabletLimit> TabletLimit; // built from CurrentConfig std::unordered_map<TTabletTypes::EType, NKikimrHive::TDataCentersPreference> DefaultDataCentersPreference; - std::unordered_set<TDataCenterId> RegisteredDataCenterIds; + std::unordered_map<TDataCenterId, std::unordered_set<TNodeId>> RegisteredDataCenterNodes; std::unordered_set<TNodeId> ConnectedNodes; // to be removed later @@ -594,14 +594,17 @@ public: void ExecuteStartTablet(TFullTabletId tabletId, const TActorId& local, ui64 cookie, bool external); ui32 GetDataCenters(); ui32 GetRegisteredDataCenters(); - void UpdateRegisteredDataCenters(TDataCenterId dataCenterId); + void UpdateRegisteredDataCenters(); + void AddRegisteredDataCentersNode(TDataCenterId dataCenterId, TNodeId nodeId); + void RemoveRegisteredDataCentersNode(TDataCenterId dataCenterId, TNodeId nodeId); void SendPing(const TActorId& local, TNodeId id); void SendReconnect(const TActorId& local); static THolder<NKikimrBlobStorage::TEvControllerSelectGroups::TGroupParameters> BuildGroupParametersForChannel(const TLeaderTabletInfo& tablet, ui32 channelId); void KickTablet(const TTabletInfo& tablet); void StopTablet(const TActorId& local, const TTabletInfo& tablet); void StopTablet(const TActorId& local, TFullTabletId tabletId); - void ExecuteProcessBootQueue(TCompleteNotifications& notifications); + void ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffects); + void UpdateTabletFollowersNumber(TLeaderTabletInfo& tablet, NIceDb::TNiceDb& db, TSideEffects& sideEffects); TTabletMetricsAggregates DefaultResourceMetricsAggregates; ui64 MetricsWindowSize = TDuration::Minutes(1).MilliSeconds(); diff --git a/ydb/core/mind/hive/hive_ut.cpp b/ydb/core/mind/hive/hive_ut.cpp index 567993e9d7..c4686a90c7 100644 --- a/ydb/core/mind/hive/hive_ut.cpp +++ b/ydb/core/mind/hive/hive_ut.cpp @@ -8,6 +8,7 @@ #include <ydb/core/blobstorage/nodewarden/node_warden.h> #include <ydb/core/blobstorage/base/blobstorage_events.h> #include <ydb/core/blobstorage/pdisk/blobstorage_pdisk_tools.h> +#include <ydb/core/protos/counters_hive.pb.h> #include <ydb/core/mind/bscontroller/bsc.h> #include <ydb/core/mind/tenant_pool.h> #include <ydb/core/tablet_flat/tablet_flat_executed.h> @@ -795,6 +796,44 @@ Y_UNIT_TEST_SUITE(THiveTest) { runtime.DispatchEvents(options); } + NKikimrTabletBase::TEvGetCountersResponse GetCounters(TTestBasicRuntime& runtime, ui64 tabletId) { + const auto sender = runtime.AllocateEdgeActor(); + runtime.SendToPipe(tabletId, sender, new TEvTablet::TEvGetCounters); + auto ev = runtime.GrabEdgeEvent<TEvTablet::TEvGetCountersResponse>(sender); + + UNIT_ASSERT(ev); + return ev->Get()->Record; + } + + ui64 GetSimpleCounter(TTestBasicRuntime& runtime, ui64 tabletId, const TString& name) { + const auto counters = GetCounters(runtime, tabletId); + for (const auto& counter : counters.GetTabletCounters().GetAppCounters().GetSimpleCounters()) { + if (name != counter.GetName()) { + continue; + } + + return counter.GetValue(); + } + + UNIT_ASSERT_C(false, "Counter not found: " << name); + return 0; // unreachable + } + + void WaitForBootQueue(TTestBasicRuntime& runtime, ui64 hiveTabletId) { + for (;;) { + auto counters = GetCounters(runtime, hiveTabletId); + ui64 bootQueueSize = counters.GetTabletCounters().GetAppCounters().GetSimpleCounters(NHive::COUNTER_BOOTQUEUE_SIZE).GetValue(); + ui64 waitQueueSize = counters.GetTabletCounters().GetAppCounters().GetSimpleCounters(NHive::COUNTER_WAITQUEUE_SIZE).GetValue(); + Ctest << "Hive/BootQueueSize=" << bootQueueSize << Endl; + Ctest << "Hive/WaitQueueSize=" << bootQueueSize << Endl; + if (bootQueueSize == 0 && waitQueueSize == 0) { + break; + } + TDispatchOptions options; + runtime.DispatchEvents(options, TDuration::MilliSeconds(500)); + } + } + Y_UNIT_TEST(TestCreateTablet) { TTestBasicRuntime runtime(1, false); Setup(runtime, true); @@ -3174,6 +3213,162 @@ Y_UNIT_TEST_SUITE(THiveTest) { } } + Y_UNIT_TEST(TestHiveFollowersWithChangingDC) { + static const int NUM_NODES = 6; + static const int NUM_TABLETS = 1; + TTestBasicRuntime runtime(NUM_NODES, false); + + runtime.LocationCallback = GetLocation; + + Setup(runtime, false); + //const int nodeBase = runtime.GetNodeId(0); + CreateLocal(runtime, 0); + CreateLocal(runtime, 1); + TActorId senderA = runtime.AllocateEdgeActor(); + const ui64 hiveTablet = MakeDefaultHiveID(0); + const ui64 testerTablet = MakeDefaultHiveID(1); + CreateTestBootstrapper(runtime, CreateTestTabletInfo(hiveTablet, TTabletTypes::Hive), &CreateDefaultHive); + + // creating NUM_TABLETS tablets + TTabletTypes::EType tabletType = TTabletTypes::Dummy; + TVector<ui64> tablets; + for (int i = 0; i < NUM_TABLETS; ++i) { + THolder<TEvHive::TEvCreateTablet> ev(new TEvHive::TEvCreateTablet(testerTablet, 100500 + i, tabletType, BINDED_CHANNELS)); + ev->Record.SetCrossDataCenterFollowerCount(1); + ui64 tabletId = SendCreateTestTablet(runtime, hiveTablet, testerTablet, std::move(ev), 0, true); + tablets.emplace_back(tabletId); + MakeSureTabletIsUp(runtime, tabletId, 0); + } + + // checking distribution, all leaders should be on the first node + { + int leaders = 0; + int tablets = 0; + { + THolder<TEvHive::TEvRequestHiveInfo> request = MakeHolder<TEvHive::TEvRequestHiveInfo>(); + request->Record.SetReturnFollowers(true); + runtime.SendToPipe(hiveTablet, senderA, request.Release()); + TAutoPtr<IEventHandle> handle; + TEvHive::TEvResponseHiveInfo* response = runtime.GrabEdgeEventRethrow<TEvHive::TEvResponseHiveInfo>(handle); + for (const NKikimrHive::TTabletInfo& tablet : response->Record.GetTablets()) { + if (tablet.GetFollowerID() == 0) { + leaders++; + } + tablets++; + Ctest << "tablet " << tablet.GetTabletID() << "." << tablet.GetFollowerID() << " on node " << tablet.GetNodeID() << Endl; + } + } + UNIT_ASSERT_VALUES_EQUAL(leaders, 1); + UNIT_ASSERT_VALUES_EQUAL(tablets, 2); + } + + CreateLocal(runtime, 2); + CreateLocal(runtime, 3); + + // kill all tablets + for (ui64 tabletId : tablets) { + runtime.Register(CreateTabletKiller(tabletId)); + + // wait for tablet to stop and start back up again + TDispatchOptions options; + // leader (death, start) + new extra follower + options.FinalEvents.emplace_back(TDispatchOptions::TFinalEventCondition(TEvLocal::EvTabletStatus, 3)); + runtime.DispatchEvents(options); + } + + { + int leaders = 0; + int tablets = 0; + { + THolder<TEvHive::TEvRequestHiveInfo> request = MakeHolder<TEvHive::TEvRequestHiveInfo>(); + request->Record.SetReturnFollowers(true); + runtime.SendToPipe(hiveTablet, senderA, request.Release()); + TAutoPtr<IEventHandle> handle; + TEvHive::TEvResponseHiveInfo* response = runtime.GrabEdgeEventRethrow<TEvHive::TEvResponseHiveInfo>(handle); + for (const NKikimrHive::TTabletInfo& tablet : response->Record.GetTablets()) { + if (tablet.GetFollowerID() == 0) { + leaders++; + } + tablets++; + Ctest << "tablet " << tablet.GetTabletID() << "." << tablet.GetFollowerID() << " on node " << tablet.GetNodeID() << Endl; + } + } + UNIT_ASSERT_VALUES_EQUAL(leaders, 1); + UNIT_ASSERT_VALUES_EQUAL(tablets, 3); + } + + CreateLocal(runtime, 4); + CreateLocal(runtime, 5); + + // kill all tablets + for (ui64 tabletId : tablets) { + runtime.Register(CreateTabletKiller(tabletId)); + + // wait for tablet to stop and start back up again + TDispatchOptions options; + // leader (death, start) + new extra follower + options.FinalEvents.emplace_back(TDispatchOptions::TFinalEventCondition(TEvLocal::EvTabletStatus, 3)); + runtime.DispatchEvents(options); + } + + { + int leaders = 0; + int tablets = 0; + { + THolder<TEvHive::TEvRequestHiveInfo> request = MakeHolder<TEvHive::TEvRequestHiveInfo>(); + request->Record.SetReturnFollowers(true); + runtime.SendToPipe(hiveTablet, senderA, request.Release()); + TAutoPtr<IEventHandle> handle; + TEvHive::TEvResponseHiveInfo* response = runtime.GrabEdgeEventRethrow<TEvHive::TEvResponseHiveInfo>(handle); + for (const NKikimrHive::TTabletInfo& tablet : response->Record.GetTablets()) { + if (tablet.GetFollowerID() == 0) { + leaders++; + } + tablets++; + Ctest << "tablet " << tablet.GetTabletID() << "." << tablet.GetFollowerID() << " on node " << tablet.GetNodeID() << Endl; + } + } + UNIT_ASSERT_VALUES_EQUAL(leaders, 1); + UNIT_ASSERT_VALUES_EQUAL(tablets, 4); + } + + SendKillLocal(runtime, 2); + SendKillLocal(runtime, 3); + SendKillLocal(runtime, 4); + SendKillLocal(runtime, 5); + + WaitForBootQueue(runtime, hiveTablet); + + //TDispatchOptions options; + //runtime.DispatchEvents(options, TDuration::MilliSeconds(1000)); + + { + int leaders = 0; + int tablets = 0; + { + THolder<TEvHive::TEvRequestHiveInfo> request = MakeHolder<TEvHive::TEvRequestHiveInfo>(); + request->Record.SetReturnFollowers(true); + runtime.SendToPipe(hiveTablet, senderA, request.Release()); + TAutoPtr<IEventHandle> handle; + TEvHive::TEvResponseHiveInfo* response = runtime.GrabEdgeEventRethrow<TEvHive::TEvResponseHiveInfo>(handle); + for (const NKikimrHive::TTabletInfo& tablet : response->Record.GetTablets()) { + if (tablet.GetFollowerID() == 0) { + leaders++; + } + tablets++; + Ctest << "tablet " << tablet.GetTabletID() << "." << tablet.GetFollowerID() << " on node " << tablet.GetNodeID() << Endl; + } + } + UNIT_ASSERT_VALUES_EQUAL(leaders, 1); + UNIT_ASSERT_VALUES_EQUAL(tablets, 2); + // if (tablets != 2) { + // TDispatchOptions options; + // runtime.DispatchEvents(options, TDuration::MilliSeconds(1000)); + // UNIT_ASSERT(false); + // } + } + } + Y_UNIT_TEST(TestHiveBalancerWithSystemTablets) { static const int NUM_NODES = 6; static const int NUM_TABLETS = 12; @@ -3779,7 +3974,10 @@ Y_UNIT_TEST_SUITE(THiveTest) { TTabletTypes::EType tabletType = TTabletTypes::Dummy; THolder<TEvHive::TEvCreateTablet> ev(new TEvHive::TEvCreateTablet(testerTablet, 100500, tabletType, BINDED_CHANNELS)); ev->Record.SetObjectId(1337); - ev->Record.SetCrossDataCenterFollowerCount(FOLLOWERS); + auto* followerGroup = ev->Record.AddFollowerGroups(); + followerGroup->SetFollowerCount(FOLLOWERS); + followerGroup->SetFollowerCountPerDataCenter(true); + followerGroup->SetRequireAllDataCenters(true); ui64 tabletId = SendCreateTestTablet(runtime, hiveTablet, testerTablet, std::move(ev), 0, true); ui32 leaderNode = 0; @@ -3827,9 +4025,19 @@ Y_UNIT_TEST_SUITE(THiveTest) { } SendKillLocal(runtime, followersNode); WaitForEvServerDisconnected(runtime); - WaitForTabletsBecomeActive(runtime, 1); // hive + //WaitForTabletsBecomeActive(runtime, 1); // hive CreateLocal(runtime, followersNode); - WaitForTabletsBecomeActive(runtime, 1); // follower + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvLocal::EvSyncTablets, NODES); + runtime.DispatchEvents(options); + } + runtime.Register(CreateTabletKiller(tabletId)); + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvLocal::EvTabletStatus, 2); + runtime.DispatchEvents(options); + } { NTabletPipe::TClientConfig pipeConfig; @@ -3857,9 +4065,9 @@ Y_UNIT_TEST_SUITE(THiveTest) { } } } - UNIT_ASSERT_VALUES_EQUAL(followers, FOLLOWERS * DCS); + UNIT_ASSERT(followers >= (FOLLOWERS * DCS - 1)); UNIT_ASSERT_VALUES_EQUAL(leaders, 1); - UNIT_ASSERT_VALUES_EQUAL(total, 1 + FOLLOWERS * DCS); + UNIT_ASSERT(total >= FOLLOWERS * DCS); } } diff --git a/ydb/core/mind/hive/tx__create_tablet.cpp b/ydb/core/mind/hive/tx__create_tablet.cpp index 8e92d92e53..5606a10f22 100644 --- a/ydb/core/mind/hive/tx__create_tablet.cpp +++ b/ydb/core/mind/hive/tx__create_tablet.cpp @@ -82,11 +82,13 @@ public: compatibilityGroup.SetAllowLeaderPromotion(RequestData.GetAllowFollowerPromotion()); } if (RequestData.HasCrossDataCenterFollowers()) { - compatibilityGroup.SetFollowerCount(Self->GetDataCenters()); + compatibilityGroup.SetFollowerCount(1); + compatibilityGroup.SetFollowerCountPerDataCenter(true); compatibilityGroup.SetRequireAllDataCenters(true); } if (RequestData.HasCrossDataCenterFollowerCount()) { - compatibilityGroup.SetFollowerCount(RequestData.GetCrossDataCenterFollowerCount() * Self->GetDataCenters()); + compatibilityGroup.SetFollowerCount(RequestData.GetCrossDataCenterFollowerCount()); + compatibilityGroup.SetFollowerCountPerDataCenter(true); compatibilityGroup.SetRequireAllDataCenters(true); } if (RequestData.HasFollowerCount()) { @@ -273,50 +275,33 @@ public: auto itFollowerGroup = tablet->FollowerGroups.begin(); for (const auto& srcFollowerGroup : FollowerGroups) { - TFollowerGroup& followerGroup = itFollowerGroup != tablet->FollowerGroups.end() ? *itFollowerGroup : tablet->AddFollowerGroup(); - ui32 oldFollowerCount = tablet->GetActualFollowerCount(followerGroup.Id); - followerGroup = srcFollowerGroup; + TFollowerGroup* followerGroup; + if (itFollowerGroup != tablet->FollowerGroups.end()) { + followerGroup = &*itFollowerGroup; + ++itFollowerGroup; + } else { + followerGroup = &tablet->AddFollowerGroup(); + itFollowerGroup = tablet->FollowerGroups.end(); + } + *followerGroup = srcFollowerGroup; TVector<ui32> allowedDataCenters; - for (const TDataCenterId& dc : followerGroup.AllowedDataCenters) { + for (const TDataCenterId& dc : followerGroup->AllowedDataCenters) { allowedDataCenters.push_back(DataCenterFromString(dc)); } - db.Table<Schema::TabletFollowerGroup>().Key(TabletId, followerGroup.Id).Update( - NIceDb::TUpdate<Schema::TabletFollowerGroup::FollowerCount>(followerGroup.GetRawFollowerCount()), - NIceDb::TUpdate<Schema::TabletFollowerGroup::AllowLeaderPromotion>(followerGroup.AllowLeaderPromotion), - NIceDb::TUpdate<Schema::TabletFollowerGroup::AllowClientRead>(followerGroup.AllowClientRead), - NIceDb::TUpdate<Schema::TabletFollowerGroup::AllowedNodes>(followerGroup.AllowedNodes), + db.Table<Schema::TabletFollowerGroup>().Key(TabletId, followerGroup->Id).Update( + NIceDb::TUpdate<Schema::TabletFollowerGroup::FollowerCount>(followerGroup->GetRawFollowerCount()), + NIceDb::TUpdate<Schema::TabletFollowerGroup::AllowLeaderPromotion>(followerGroup->AllowLeaderPromotion), + NIceDb::TUpdate<Schema::TabletFollowerGroup::AllowClientRead>(followerGroup->AllowClientRead), + NIceDb::TUpdate<Schema::TabletFollowerGroup::AllowedNodes>(followerGroup->AllowedNodes), NIceDb::TUpdate<Schema::TabletFollowerGroup::AllowedDataCenters>(allowedDataCenters), - NIceDb::TUpdate<Schema::TabletFollowerGroup::AllowedDataCenterIds>(followerGroup.AllowedDataCenters), - NIceDb::TUpdate<Schema::TabletFollowerGroup::RequireAllDataCenters>(followerGroup.RequireAllDataCenters), - NIceDb::TUpdate<Schema::TabletFollowerGroup::FollowerCountPerDataCenter>(followerGroup.FollowerCountPerDataCenter), - NIceDb::TUpdate<Schema::TabletFollowerGroup::RequireDifferentNodes>(followerGroup.RequireDifferentNodes)); - - for (ui32 i = oldFollowerCount; i < followerGroup.GetComputedFollowerCount(Self->GetDataCenters()); ++i) { - TFollowerTabletInfo& follower = tablet->AddFollower(followerGroup); - db.Table<Schema::TabletFollowerTablet>().Key(TabletId, follower.Id).Update( - NIceDb::TUpdate<Schema::TabletFollowerTablet::GroupID>(follower.FollowerGroup.Id), - NIceDb::TUpdate<Schema::TabletFollowerTablet::FollowerNode>(0)); - follower.InitTabletMetrics(); - follower.BecomeStopped(); - } - - for (ui32 i = followerGroup.GetComputedFollowerCount(Self->GetDataCenters()); i < oldFollowerCount; ++i) { - auto itFollower = tablet->Followers.rbegin(); - while (itFollower != tablet->Followers.rend() && itFollower->FollowerGroup.Id != followerGroup.Id) { - ++itFollower; - } - if (itFollower == tablet->Followers.rend()) { - break; - } - TFollowerTabletInfo& follower = *itFollower; - db.Table<Schema::TabletFollowerTablet>().Key(TabletId, follower.Id).Delete(); - follower.InitiateStop(SideEffects); - tablet->Followers.erase(std::prev(itFollower.base())); - } - ++itFollowerGroup; + NIceDb::TUpdate<Schema::TabletFollowerGroup::AllowedDataCenterIds>(followerGroup->AllowedDataCenters), + NIceDb::TUpdate<Schema::TabletFollowerGroup::RequireAllDataCenters>(followerGroup->RequireAllDataCenters), + NIceDb::TUpdate<Schema::TabletFollowerGroup::FollowerCountPerDataCenter>(followerGroup->FollowerCountPerDataCenter), + NIceDb::TUpdate<Schema::TabletFollowerGroup::RequireDifferentNodes>(followerGroup->RequireDifferentNodes)); } + Self->UpdateTabletFollowersNumber(*tablet, db, SideEffects); ProcessTablet(*tablet); BLOG_D("THive::TTxCreateTablet::Execute Existing tablet " << tablet->ToString() << " has been successfully updated"); @@ -469,19 +454,9 @@ public: NIceDb::TUpdate<Schema::TabletFollowerGroup::AllowedDataCenterIds>(followerGroup.AllowedDataCenters), NIceDb::TUpdate<Schema::TabletFollowerGroup::RequireAllDataCenters>(followerGroup.RequireAllDataCenters), NIceDb::TUpdate<Schema::TabletFollowerGroup::FollowerCountPerDataCenter>(followerGroup.FollowerCountPerDataCenter)); - - for (ui32 i = 0; i < followerGroup.GetComputedFollowerCount(Self->GetDataCenters()); ++i) { - TFollowerTabletInfo& follower = tablet.AddFollower(followerGroup); - follower.Statistics.SetLastAliveTimestamp(now.MilliSeconds()); - db.Table<Schema::TabletFollowerTablet>().Key(TabletId, follower.Id).Update( - NIceDb::TUpdate<Schema::TabletFollowerTablet::GroupID>(follower.FollowerGroup.Id), - NIceDb::TUpdate<Schema::TabletFollowerTablet::FollowerNode>(0), - NIceDb::TUpdate<Schema::TabletFollowerTablet::Statistics>(follower.Statistics)); - follower.InitTabletMetrics(); - follower.BecomeStopped(); - } } + Self->UpdateTabletFollowersNumber(tablet, db, SideEffects); Self->OwnerToTablet.emplace(ownerIdx, TabletId); Self->ObjectToTabletMetrics[tablet.ObjectId].IncreaseCount(); Self->TabletTypeToTabletMetrics[tablet.Type].IncreaseCount(); diff --git a/ydb/core/mind/hive/tx__kill_node.cpp b/ydb/core/mind/hive/tx__kill_node.cpp index 9e21d6cddf..bcfba26feb 100644 --- a/ydb/core/mind/hive/tx__kill_node.cpp +++ b/ydb/core/mind/hive/tx__kill_node.cpp @@ -43,6 +43,9 @@ public: db.Table<Schema::Node>().Key(NodeId).Update<Schema::Node::Statistics>(node->Statistics); } node->BecomeDisconnected(); + if (node->LocationAcquired) { + Self->RemoveRegisteredDataCentersNode(node->Location.GetDataCenterId(), node->Id); + } for (const TActorId& pipeServer : node->PipeServers) { BLOG_TRACE("THive::TTxKillNode - killing pipe server " << pipeServer); SideEffects.Send(pipeServer, new TEvents::TEvPoisonPill()); diff --git a/ydb/core/mind/hive/tx__load_everything.cpp b/ydb/core/mind/hive/tx__load_everything.cpp index d611fcd606..9dfe745d5c 100644 --- a/ydb/core/mind/hive/tx__load_everything.cpp +++ b/ydb/core/mind/hive/tx__load_everything.cpp @@ -36,6 +36,7 @@ public: Self->Keeper.Clear(); Self->Domains.clear(); Self->BlockedOwners.clear(); + Self->RegisteredDataCenterNodes.clear(); Self->Domains[Self->RootDomainKey].Path = Self->RootDomainName; Self->Domains[Self->RootDomainKey].HiveId = rootHiveId; @@ -306,6 +307,8 @@ public: if (node.CanBeDeleted()) { db.Table<Schema::Node>().Key(nodeId).Delete(); Self->Nodes.erase(nodeId); + } else if (node.IsUnknown() && node.LocationAcquired) { + Self->AddRegisteredDataCentersNode(node.Location.GetDataCenterId(), node.Id); } if (!nodeRowset.Next()) return false; @@ -487,8 +490,6 @@ public: tabletInfo.AcquireAllocationUnits(); } - std::unordered_map<std::pair<TLeaderTabletInfo*, TFollowerGroup*>, ui32> followersPerGroup; - { auto tabletFollowerGroupRowset = db.Table<Schema::TabletFollowerGroup>().Select(); if (!tabletFollowerGroupRowset.IsReady()) @@ -518,7 +519,6 @@ public: followerGroup.LocalNodeOnly = tabletFollowerGroupRowset.GetValueOrDefault<Schema::TabletFollowerGroup::LocalNodeOnly>(); followerGroup.FollowerCountPerDataCenter = tabletFollowerGroupRowset.GetValueOrDefault<Schema::TabletFollowerGroup::FollowerCountPerDataCenter>(); followerGroup.RequireDifferentNodes = tabletFollowerGroupRowset.GetValueOrDefault<Schema::TabletFollowerGroup::RequireDifferentNodes>(); - followersPerGroup[{tablet, &followerGroup}] = 0; } if (!tabletFollowerGroupRowset.Next()) return false; @@ -550,7 +550,6 @@ public: follower.BecomeStopped(); } } - followersPerGroup[{tablet, &followerGroup}]++; } if (!tabletFollowerRowset.Next()) return false; @@ -558,22 +557,6 @@ public: } { - for (auto& [id, count] : followersPerGroup) { - TFollowerGroup& followerGroup(*id.second); - while (followerGroup.GetComputedFollowerCount(Self->GetDataCenters()) > count) { - TFollowerTabletInfo& follower = id.first->AddFollower(followerGroup); - follower.InitTabletMetrics(); - follower.BecomeStopped(); - db.Table<Schema::TabletFollowerTablet>() - .Key(id.first->Id, follower.Id) - .Update(NIceDb::TUpdate<Schema::TabletFollowerTablet::FollowerNode>(0), - NIceDb::TUpdate<Schema::TabletFollowerTablet::GroupID>(followerGroup.Id)); - ++count; - } - } - } - - { auto metricsRowset = db.Table<Schema::Metrics>().Select(); if (!metricsRowset.IsReady()) return false; diff --git a/ydb/core/mind/hive/tx__process_boot_queue.cpp b/ydb/core/mind/hive/tx__process_boot_queue.cpp index 0ca8a6d23a..56160d9113 100644 --- a/ydb/core/mind/hive/tx__process_boot_queue.cpp +++ b/ydb/core/mind/hive/tx__process_boot_queue.cpp @@ -14,10 +14,11 @@ public: TTxType GetTxType() const override { return NHive::TXTYPE_PROCESS_BOOT_QUEUE; } - bool Execute(TTransactionContext&, const TActorContext&) override { + bool Execute(TTransactionContext& txc, const TActorContext&) override { BLOG_D("THive::TTxProcessBootQueue()::Execute"); SideEffects.Reset(Self->SelfId()); - Self->ExecuteProcessBootQueue(SideEffects); + NIceDb::TNiceDb db(txc.DB); + Self->ExecuteProcessBootQueue(db, SideEffects); return true; } diff --git a/ydb/core/mind/hive/tx__status.cpp b/ydb/core/mind/hive/tx__status.cpp index ae940878a7..5803f7c577 100644 --- a/ydb/core/mind/hive/tx__status.cpp +++ b/ydb/core/mind/hive/tx__status.cpp @@ -33,7 +33,7 @@ public: NActorsInterconnect::TNodeLocation location; node.Location.Serialize(&location, false); db.Table<Schema::Node>().Key(nodeId).Update<Schema::Node::Location>(location); - Self->UpdateRegisteredDataCenters(node.Location.GetDataCenterId()); + Self->AddRegisteredDataCentersNode(node.Location.GetDataCenterId(), node.Id); } Self->ProcessWaitQueue(); // new node connected if (node.Drain && Self->BalancerNodes.count(nodeId) == 0) { diff --git a/ydb/core/mind/hive/tx__update_tablet_status.cpp b/ydb/core/mind/hive/tx__update_tablet_status.cpp index 3aae018458..c736be00aa 100644 --- a/ydb/core/mind/hive/tx__update_tablet_status.cpp +++ b/ydb/core/mind/hive/tx__update_tablet_status.cpp @@ -111,6 +111,7 @@ public: db.Table<Schema::Tablet>().Key(TabletId).Update(NIceDb::TUpdate<Schema::Tablet::LeaderNode>(tablet->NodeId), NIceDb::TUpdate<Schema::Tablet::KnownGeneration>(Generation), NIceDb::TUpdate<Schema::Tablet::Statistics>(tablet->Statistics)); + Self->UpdateTabletFollowersNumber(leader, db, SideEffects); } else { db.Table<Schema::TabletFollowerTablet>().Key(TabletId, FollowerId).Update( NIceDb::TUpdate<Schema::TabletFollowerTablet::GroupID>(tablet->AsFollower().FollowerGroup.Id), |