aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorxenoxeno <xeno@ydb.tech>2022-07-10 17:55:50 +0300
committerxenoxeno <xeno@ydb.tech>2022-07-10 17:55:50 +0300
commit9b4f67031921f90daa279b28361aad4af068637d (patch)
tree8bec2e4b47dcbeaa3306e520f640d26faa9ab685
parentd6bc1736fb9c0e5ccd815586513eb0ec8c869ee7 (diff)
downloadydb-9b4f67031921f90daa279b28361aad4af068637d.tar.gz
dynamically adjust number of cross-dc followers
-rw-r--r--ydb/core/mind/hive/hive_impl.cpp89
-rw-r--r--ydb/core/mind/hive/hive_impl.h9
-rw-r--r--ydb/core/mind/hive/hive_ut.cpp218
-rw-r--r--ydb/core/mind/hive/tx__create_tablet.cpp75
-rw-r--r--ydb/core/mind/hive/tx__kill_node.cpp3
-rw-r--r--ydb/core/mind/hive/tx__load_everything.cpp23
-rw-r--r--ydb/core/mind/hive/tx__process_boot_queue.cpp5
-rw-r--r--ydb/core/mind/hive/tx__status.cpp2
-rw-r--r--ydb/core/mind/hive/tx__update_tablet_status.cpp1
9 files changed, 328 insertions, 97 deletions
diff --git a/ydb/core/mind/hive/hive_impl.cpp b/ydb/core/mind/hive/hive_impl.cpp
index 21a4c0c982..d2fc1cddd3 100644
--- a/ydb/core/mind/hive/hive_impl.cpp
+++ b/ydb/core/mind/hive/hive_impl.cpp
@@ -177,7 +177,7 @@ void THive::DeleteTabletWithoutStorage(TLeaderTabletInfo* tablet, TSideEffects&
sideEffects.Send(SelfId(), new TEvTabletBase::TEvDeleteTabletResult(NKikimrProto::OK, tablet->Id));
}
-void THive::ExecuteProcessBootQueue(TCompleteNotifications& notifications) {
+void THive::ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffects) {
TInstant now = TActivationContext::Now();
BLOG_D("Handle ProcessBootQueue (size: " << BootQueue.BootQueue.size() << ")");
THPTimer bootQueueProcessingTimer;
@@ -212,9 +212,13 @@ void THive::ExecuteProcessBootQueue(TCompleteNotifications& notifications) {
break;
} else {
for (const TActorId actorToNotify : tablet->ActorsToNotifyOnRestart) {
- notifications.Send(actorToNotify, new TEvPrivate::TEvRestartComplete(tablet->GetFullTabletId(), "boot delay"));
+ sideEffects.Send(actorToNotify, new TEvPrivate::TEvRestartComplete(tablet->GetFullTabletId(), "boot delay"));
}
tablet->ActorsToNotifyOnRestart.clear();
+ if (tablet->IsFollower()) {
+ TLeaderTabletInfo& leader = tablet->GetLeader();
+ UpdateTabletFollowersNumber(leader, db, sideEffects);
+ }
BootQueue.AddToWaitQueue(record); // waiting for new node
continue;
}
@@ -237,7 +241,7 @@ void THive::ExecuteProcessBootQueue(TCompleteNotifications& notifications) {
BootQueue.AddToBootQueue(record);
}
if (TabletCounters != nullptr) {
- TabletCounters->Simple()[NHive::COUNTER_BOOTQUEUE_SIZE].Set(BootQueue.BootQueue.size());
+ UpdateCounterBootQueueSize(BootQueue.BootQueue.size());
TabletCounters->Simple()[NHive::COUNTER_WAITQUEUE_SIZE].Set(BootQueue.WaitQueue.size());
TabletCounters->Cumulative()[NHive::COUNTER_BOOTQUEUE_PROCESSED].Increment(1);
TabletCounters->Cumulative()[NHive::COUNTER_BOOTQUEUE_TIME].Increment(ui64(1000000. * bootQueueProcessingTimer.PassedReset()));
@@ -413,9 +417,6 @@ void THive::Handle(TEvPrivate::TEvBootTablets::TPtr&) {
BLOG_D("Handle BootTablets");
RequestPoolsInformation();
for (auto& [id, node] : Nodes) {
- if (node.LocationAcquired) {
- UpdateRegisteredDataCenters(node.Location.GetDataCenterId());
- }
if (node.IsUnknown() && node.Local) {
node.Ping();
}
@@ -652,9 +653,6 @@ void THive::Handle(TEvInterconnect::TEvNodeInfo::TPtr &ev) {
hiveNodeInfo->Location = nodeInfo.Location;
hiveNodeInfo->LocationAcquired = true;
BLOG_D("TEvInterconnect::TEvNodeInfo NodeId " << nodeInfo.NodeId << " Location " << GetLocationString(hiveNodeInfo->Location));
- if (hiveNodeInfo->IsRegistered()) {
- UpdateRegisteredDataCenters(hiveNodeInfo->Location.GetDataCenterId());
- }
}
}
}
@@ -2217,19 +2215,78 @@ void THive::SendReconnect(const TActorId& local) {
}
ui32 THive::GetDataCenters() {
- return DataCenters;
+ return DataCenters ? DataCenters : 1;
}
ui32 THive::GetRegisteredDataCenters() {
- return RegisteredDataCenters;
+ return RegisteredDataCenters ? RegisteredDataCenters : 1;
}
-void THive::UpdateRegisteredDataCenters(TDataCenterId dataCenterId) {
+void THive::UpdateRegisteredDataCenters() {
+ if (RegisteredDataCenters != RegisteredDataCenterNodes.size()) {
+ BLOG_D("THive (UpdateRegisteredDC) DataCenters=" << DataCenters << " RegisteredDataCenters=" << RegisteredDataCenters << "->" << RegisteredDataCenterNodes.size());
+ RegisteredDataCenters = RegisteredDataCenterNodes.size();
+ }
+}
+
+void THive::AddRegisteredDataCentersNode(TDataCenterId dataCenterId, TNodeId nodeId) {
if (dataCenterId != 0) { // ignore default data center id if exists
- RegisteredDataCenterIds.insert(dataCenterId);
- if (RegisteredDataCenters != RegisteredDataCenterIds.size()) {
- RegisteredDataCenters = RegisteredDataCenterIds.size();
- BLOG_D("THive (UpdateRegisteredDC) DataCenters=" << DataCenters << " RegisteredDataCenters=" << RegisteredDataCenters);
+ if (RegisteredDataCenterNodes[dataCenterId].insert(nodeId).second) {
+ if (RegisteredDataCenters != RegisteredDataCenterNodes.size()) {
+ UpdateRegisteredDataCenters();
+ }
+ }
+ }
+}
+
+void THive::RemoveRegisteredDataCentersNode(TDataCenterId dataCenterId, TNodeId nodeId) {
+ if (dataCenterId != 0) { // ignore default data center id if exists
+ RegisteredDataCenterNodes[dataCenterId].erase(nodeId);
+ if (RegisteredDataCenterNodes[dataCenterId].size() == 0) {
+ RegisteredDataCenterNodes.erase(dataCenterId);
+ }
+ if (RegisteredDataCenters != RegisteredDataCenterNodes.size()) {
+ UpdateRegisteredDataCenters();
+ }
+ }
+}
+
+void THive::UpdateTabletFollowersNumber(TLeaderTabletInfo& tablet, NIceDb::TNiceDb& db, TSideEffects& sideEffects) {
+ BLOG_D("UpdateTabletFollowersNumber Tablet " << tablet.ToString() << " RegisteredDataCenters=" << GetRegisteredDataCenters());
+ for (TFollowerGroup& group : tablet.FollowerGroups) {
+ ui32 followerCount = tablet.GetActualFollowerCount(group.Id);
+ ui32 requiredFollowerCount = group.GetComputedFollowerCount(GetRegisteredDataCenters());
+
+ while (followerCount < requiredFollowerCount) {
+ BLOG_D("UpdateTabletFollowersNumber Tablet " << tablet.ToString() << " is increasing number of followers (" << followerCount << "<" << requiredFollowerCount << ")");
+
+ TFollowerTabletInfo& follower = tablet.AddFollower(group);
+ follower.Statistics.SetLastAliveTimestamp(TlsActivationContext->Now().MilliSeconds());
+ db.Table<Schema::TabletFollowerTablet>().Key(tablet.Id, follower.Id).Update(
+ NIceDb::TUpdate<Schema::TabletFollowerTablet::GroupID>(follower.FollowerGroup.Id),
+ NIceDb::TUpdate<Schema::TabletFollowerTablet::FollowerNode>(0),
+ NIceDb::TUpdate<Schema::TabletFollowerTablet::Statistics>(follower.Statistics));
+ follower.InitTabletMetrics();
+ follower.BecomeStopped();
+ ++followerCount;
+ }
+
+ while (followerCount > requiredFollowerCount) {
+ BLOG_D("UpdateTabletFollowersNumber Tablet " << tablet.ToString() << " is decreasing number of followers (" << followerCount << ">" << requiredFollowerCount << ")");
+
+ auto itFollower = tablet.Followers.rbegin();
+ while (itFollower != tablet.Followers.rend() && itFollower->FollowerGroup.Id != group.Id) {
+ ++itFollower;
+ }
+ if (itFollower == tablet.Followers.rend()) {
+ break;
+ }
+ TFollowerTabletInfo& follower = *itFollower;
+ db.Table<Schema::TabletFollowerTablet>().Key(tablet.Id, follower.Id).Delete();
+ db.Table<Schema::Metrics>().Key(tablet.Id, follower.Id).Delete();
+ follower.InitiateStop(sideEffects);
+ tablet.Followers.erase(std::prev(itFollower.base()));
+ --followerCount;
}
}
}
diff --git a/ydb/core/mind/hive/hive_impl.h b/ydb/core/mind/hive/hive_impl.h
index f8b884adfb..85f6ea922d 100644
--- a/ydb/core/mind/hive/hive_impl.h
+++ b/ydb/core/mind/hive/hive_impl.h
@@ -401,7 +401,7 @@ protected:
NKikimrConfig::THiveConfig DatabaseConfig;
std::unordered_map<TTabletTypes::EType, NKikimrConfig::THiveTabletLimit> TabletLimit; // built from CurrentConfig
std::unordered_map<TTabletTypes::EType, NKikimrHive::TDataCentersPreference> DefaultDataCentersPreference;
- std::unordered_set<TDataCenterId> RegisteredDataCenterIds;
+ std::unordered_map<TDataCenterId, std::unordered_set<TNodeId>> RegisteredDataCenterNodes;
std::unordered_set<TNodeId> ConnectedNodes;
// to be removed later
@@ -594,14 +594,17 @@ public:
void ExecuteStartTablet(TFullTabletId tabletId, const TActorId& local, ui64 cookie, bool external);
ui32 GetDataCenters();
ui32 GetRegisteredDataCenters();
- void UpdateRegisteredDataCenters(TDataCenterId dataCenterId);
+ void UpdateRegisteredDataCenters();
+ void AddRegisteredDataCentersNode(TDataCenterId dataCenterId, TNodeId nodeId);
+ void RemoveRegisteredDataCentersNode(TDataCenterId dataCenterId, TNodeId nodeId);
void SendPing(const TActorId& local, TNodeId id);
void SendReconnect(const TActorId& local);
static THolder<NKikimrBlobStorage::TEvControllerSelectGroups::TGroupParameters> BuildGroupParametersForChannel(const TLeaderTabletInfo& tablet, ui32 channelId);
void KickTablet(const TTabletInfo& tablet);
void StopTablet(const TActorId& local, const TTabletInfo& tablet);
void StopTablet(const TActorId& local, TFullTabletId tabletId);
- void ExecuteProcessBootQueue(TCompleteNotifications& notifications);
+ void ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffects);
+ void UpdateTabletFollowersNumber(TLeaderTabletInfo& tablet, NIceDb::TNiceDb& db, TSideEffects& sideEffects);
TTabletMetricsAggregates DefaultResourceMetricsAggregates;
ui64 MetricsWindowSize = TDuration::Minutes(1).MilliSeconds();
diff --git a/ydb/core/mind/hive/hive_ut.cpp b/ydb/core/mind/hive/hive_ut.cpp
index 567993e9d7..c4686a90c7 100644
--- a/ydb/core/mind/hive/hive_ut.cpp
+++ b/ydb/core/mind/hive/hive_ut.cpp
@@ -8,6 +8,7 @@
#include <ydb/core/blobstorage/nodewarden/node_warden.h>
#include <ydb/core/blobstorage/base/blobstorage_events.h>
#include <ydb/core/blobstorage/pdisk/blobstorage_pdisk_tools.h>
+#include <ydb/core/protos/counters_hive.pb.h>
#include <ydb/core/mind/bscontroller/bsc.h>
#include <ydb/core/mind/tenant_pool.h>
#include <ydb/core/tablet_flat/tablet_flat_executed.h>
@@ -795,6 +796,44 @@ Y_UNIT_TEST_SUITE(THiveTest) {
runtime.DispatchEvents(options);
}
+ NKikimrTabletBase::TEvGetCountersResponse GetCounters(TTestBasicRuntime& runtime, ui64 tabletId) {
+ const auto sender = runtime.AllocateEdgeActor();
+ runtime.SendToPipe(tabletId, sender, new TEvTablet::TEvGetCounters);
+ auto ev = runtime.GrabEdgeEvent<TEvTablet::TEvGetCountersResponse>(sender);
+
+ UNIT_ASSERT(ev);
+ return ev->Get()->Record;
+ }
+
+ ui64 GetSimpleCounter(TTestBasicRuntime& runtime, ui64 tabletId, const TString& name) {
+ const auto counters = GetCounters(runtime, tabletId);
+ for (const auto& counter : counters.GetTabletCounters().GetAppCounters().GetSimpleCounters()) {
+ if (name != counter.GetName()) {
+ continue;
+ }
+
+ return counter.GetValue();
+ }
+
+ UNIT_ASSERT_C(false, "Counter not found: " << name);
+ return 0; // unreachable
+ }
+
+ void WaitForBootQueue(TTestBasicRuntime& runtime, ui64 hiveTabletId) {
+ for (;;) {
+ auto counters = GetCounters(runtime, hiveTabletId);
+ ui64 bootQueueSize = counters.GetTabletCounters().GetAppCounters().GetSimpleCounters(NHive::COUNTER_BOOTQUEUE_SIZE).GetValue();
+ ui64 waitQueueSize = counters.GetTabletCounters().GetAppCounters().GetSimpleCounters(NHive::COUNTER_WAITQUEUE_SIZE).GetValue();
+ Ctest << "Hive/BootQueueSize=" << bootQueueSize << Endl;
+ Ctest << "Hive/WaitQueueSize=" << bootQueueSize << Endl;
+ if (bootQueueSize == 0 && waitQueueSize == 0) {
+ break;
+ }
+ TDispatchOptions options;
+ runtime.DispatchEvents(options, TDuration::MilliSeconds(500));
+ }
+ }
+
Y_UNIT_TEST(TestCreateTablet) {
TTestBasicRuntime runtime(1, false);
Setup(runtime, true);
@@ -3174,6 +3213,162 @@ Y_UNIT_TEST_SUITE(THiveTest) {
}
}
+ Y_UNIT_TEST(TestHiveFollowersWithChangingDC) {
+ static const int NUM_NODES = 6;
+ static const int NUM_TABLETS = 1;
+ TTestBasicRuntime runtime(NUM_NODES, false);
+
+ runtime.LocationCallback = GetLocation;
+
+ Setup(runtime, false);
+ //const int nodeBase = runtime.GetNodeId(0);
+ CreateLocal(runtime, 0);
+ CreateLocal(runtime, 1);
+ TActorId senderA = runtime.AllocateEdgeActor();
+ const ui64 hiveTablet = MakeDefaultHiveID(0);
+ const ui64 testerTablet = MakeDefaultHiveID(1);
+ CreateTestBootstrapper(runtime, CreateTestTabletInfo(hiveTablet, TTabletTypes::Hive), &CreateDefaultHive);
+
+ // creating NUM_TABLETS tablets
+ TTabletTypes::EType tabletType = TTabletTypes::Dummy;
+ TVector<ui64> tablets;
+ for (int i = 0; i < NUM_TABLETS; ++i) {
+ THolder<TEvHive::TEvCreateTablet> ev(new TEvHive::TEvCreateTablet(testerTablet, 100500 + i, tabletType, BINDED_CHANNELS));
+ ev->Record.SetCrossDataCenterFollowerCount(1);
+ ui64 tabletId = SendCreateTestTablet(runtime, hiveTablet, testerTablet, std::move(ev), 0, true);
+ tablets.emplace_back(tabletId);
+ MakeSureTabletIsUp(runtime, tabletId, 0);
+ }
+
+ // checking distribution, all leaders should be on the first node
+ {
+ int leaders = 0;
+ int tablets = 0;
+ {
+ THolder<TEvHive::TEvRequestHiveInfo> request = MakeHolder<TEvHive::TEvRequestHiveInfo>();
+ request->Record.SetReturnFollowers(true);
+ runtime.SendToPipe(hiveTablet, senderA, request.Release());
+ TAutoPtr<IEventHandle> handle;
+ TEvHive::TEvResponseHiveInfo* response = runtime.GrabEdgeEventRethrow<TEvHive::TEvResponseHiveInfo>(handle);
+ for (const NKikimrHive::TTabletInfo& tablet : response->Record.GetTablets()) {
+ if (tablet.GetFollowerID() == 0) {
+ leaders++;
+ }
+ tablets++;
+ Ctest << "tablet " << tablet.GetTabletID() << "." << tablet.GetFollowerID() << " on node " << tablet.GetNodeID() << Endl;
+ }
+ }
+ UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
+ UNIT_ASSERT_VALUES_EQUAL(tablets, 2);
+ }
+
+ CreateLocal(runtime, 2);
+ CreateLocal(runtime, 3);
+
+ // kill all tablets
+ for (ui64 tabletId : tablets) {
+ runtime.Register(CreateTabletKiller(tabletId));
+
+ // wait for tablet to stop and start back up again
+ TDispatchOptions options;
+ // leader (death, start) + new extra follower
+ options.FinalEvents.emplace_back(TDispatchOptions::TFinalEventCondition(TEvLocal::EvTabletStatus, 3));
+ runtime.DispatchEvents(options);
+ }
+
+ {
+ int leaders = 0;
+ int tablets = 0;
+ {
+ THolder<TEvHive::TEvRequestHiveInfo> request = MakeHolder<TEvHive::TEvRequestHiveInfo>();
+ request->Record.SetReturnFollowers(true);
+ runtime.SendToPipe(hiveTablet, senderA, request.Release());
+ TAutoPtr<IEventHandle> handle;
+ TEvHive::TEvResponseHiveInfo* response = runtime.GrabEdgeEventRethrow<TEvHive::TEvResponseHiveInfo>(handle);
+ for (const NKikimrHive::TTabletInfo& tablet : response->Record.GetTablets()) {
+ if (tablet.GetFollowerID() == 0) {
+ leaders++;
+ }
+ tablets++;
+ Ctest << "tablet " << tablet.GetTabletID() << "." << tablet.GetFollowerID() << " on node " << tablet.GetNodeID() << Endl;
+ }
+ }
+ UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
+ UNIT_ASSERT_VALUES_EQUAL(tablets, 3);
+ }
+
+ CreateLocal(runtime, 4);
+ CreateLocal(runtime, 5);
+
+ // kill all tablets
+ for (ui64 tabletId : tablets) {
+ runtime.Register(CreateTabletKiller(tabletId));
+
+ // wait for tablet to stop and start back up again
+ TDispatchOptions options;
+ // leader (death, start) + new extra follower
+ options.FinalEvents.emplace_back(TDispatchOptions::TFinalEventCondition(TEvLocal::EvTabletStatus, 3));
+ runtime.DispatchEvents(options);
+ }
+
+ {
+ int leaders = 0;
+ int tablets = 0;
+ {
+ THolder<TEvHive::TEvRequestHiveInfo> request = MakeHolder<TEvHive::TEvRequestHiveInfo>();
+ request->Record.SetReturnFollowers(true);
+ runtime.SendToPipe(hiveTablet, senderA, request.Release());
+ TAutoPtr<IEventHandle> handle;
+ TEvHive::TEvResponseHiveInfo* response = runtime.GrabEdgeEventRethrow<TEvHive::TEvResponseHiveInfo>(handle);
+ for (const NKikimrHive::TTabletInfo& tablet : response->Record.GetTablets()) {
+ if (tablet.GetFollowerID() == 0) {
+ leaders++;
+ }
+ tablets++;
+ Ctest << "tablet " << tablet.GetTabletID() << "." << tablet.GetFollowerID() << " on node " << tablet.GetNodeID() << Endl;
+ }
+ }
+ UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
+ UNIT_ASSERT_VALUES_EQUAL(tablets, 4);
+ }
+
+ SendKillLocal(runtime, 2);
+ SendKillLocal(runtime, 3);
+ SendKillLocal(runtime, 4);
+ SendKillLocal(runtime, 5);
+
+ WaitForBootQueue(runtime, hiveTablet);
+
+ //TDispatchOptions options;
+ //runtime.DispatchEvents(options, TDuration::MilliSeconds(1000));
+
+ {
+ int leaders = 0;
+ int tablets = 0;
+ {
+ THolder<TEvHive::TEvRequestHiveInfo> request = MakeHolder<TEvHive::TEvRequestHiveInfo>();
+ request->Record.SetReturnFollowers(true);
+ runtime.SendToPipe(hiveTablet, senderA, request.Release());
+ TAutoPtr<IEventHandle> handle;
+ TEvHive::TEvResponseHiveInfo* response = runtime.GrabEdgeEventRethrow<TEvHive::TEvResponseHiveInfo>(handle);
+ for (const NKikimrHive::TTabletInfo& tablet : response->Record.GetTablets()) {
+ if (tablet.GetFollowerID() == 0) {
+ leaders++;
+ }
+ tablets++;
+ Ctest << "tablet " << tablet.GetTabletID() << "." << tablet.GetFollowerID() << " on node " << tablet.GetNodeID() << Endl;
+ }
+ }
+ UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
+ UNIT_ASSERT_VALUES_EQUAL(tablets, 2);
+ // if (tablets != 2) {
+ // TDispatchOptions options;
+ // runtime.DispatchEvents(options, TDuration::MilliSeconds(1000));
+ // UNIT_ASSERT(false);
+ // }
+ }
+ }
+
Y_UNIT_TEST(TestHiveBalancerWithSystemTablets) {
static const int NUM_NODES = 6;
static const int NUM_TABLETS = 12;
@@ -3779,7 +3974,10 @@ Y_UNIT_TEST_SUITE(THiveTest) {
TTabletTypes::EType tabletType = TTabletTypes::Dummy;
THolder<TEvHive::TEvCreateTablet> ev(new TEvHive::TEvCreateTablet(testerTablet, 100500, tabletType, BINDED_CHANNELS));
ev->Record.SetObjectId(1337);
- ev->Record.SetCrossDataCenterFollowerCount(FOLLOWERS);
+ auto* followerGroup = ev->Record.AddFollowerGroups();
+ followerGroup->SetFollowerCount(FOLLOWERS);
+ followerGroup->SetFollowerCountPerDataCenter(true);
+ followerGroup->SetRequireAllDataCenters(true);
ui64 tabletId = SendCreateTestTablet(runtime, hiveTablet, testerTablet, std::move(ev), 0, true);
ui32 leaderNode = 0;
@@ -3827,9 +4025,19 @@ Y_UNIT_TEST_SUITE(THiveTest) {
}
SendKillLocal(runtime, followersNode);
WaitForEvServerDisconnected(runtime);
- WaitForTabletsBecomeActive(runtime, 1); // hive
+ //WaitForTabletsBecomeActive(runtime, 1); // hive
CreateLocal(runtime, followersNode);
- WaitForTabletsBecomeActive(runtime, 1); // follower
+ {
+ TDispatchOptions options;
+ options.FinalEvents.emplace_back(TEvLocal::EvSyncTablets, NODES);
+ runtime.DispatchEvents(options);
+ }
+ runtime.Register(CreateTabletKiller(tabletId));
+ {
+ TDispatchOptions options;
+ options.FinalEvents.emplace_back(TEvLocal::EvTabletStatus, 2);
+ runtime.DispatchEvents(options);
+ }
{
NTabletPipe::TClientConfig pipeConfig;
@@ -3857,9 +4065,9 @@ Y_UNIT_TEST_SUITE(THiveTest) {
}
}
}
- UNIT_ASSERT_VALUES_EQUAL(followers, FOLLOWERS * DCS);
+ UNIT_ASSERT(followers >= (FOLLOWERS * DCS - 1));
UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
- UNIT_ASSERT_VALUES_EQUAL(total, 1 + FOLLOWERS * DCS);
+ UNIT_ASSERT(total >= FOLLOWERS * DCS);
}
}
diff --git a/ydb/core/mind/hive/tx__create_tablet.cpp b/ydb/core/mind/hive/tx__create_tablet.cpp
index 8e92d92e53..5606a10f22 100644
--- a/ydb/core/mind/hive/tx__create_tablet.cpp
+++ b/ydb/core/mind/hive/tx__create_tablet.cpp
@@ -82,11 +82,13 @@ public:
compatibilityGroup.SetAllowLeaderPromotion(RequestData.GetAllowFollowerPromotion());
}
if (RequestData.HasCrossDataCenterFollowers()) {
- compatibilityGroup.SetFollowerCount(Self->GetDataCenters());
+ compatibilityGroup.SetFollowerCount(1);
+ compatibilityGroup.SetFollowerCountPerDataCenter(true);
compatibilityGroup.SetRequireAllDataCenters(true);
}
if (RequestData.HasCrossDataCenterFollowerCount()) {
- compatibilityGroup.SetFollowerCount(RequestData.GetCrossDataCenterFollowerCount() * Self->GetDataCenters());
+ compatibilityGroup.SetFollowerCount(RequestData.GetCrossDataCenterFollowerCount());
+ compatibilityGroup.SetFollowerCountPerDataCenter(true);
compatibilityGroup.SetRequireAllDataCenters(true);
}
if (RequestData.HasFollowerCount()) {
@@ -273,50 +275,33 @@ public:
auto itFollowerGroup = tablet->FollowerGroups.begin();
for (const auto& srcFollowerGroup : FollowerGroups) {
- TFollowerGroup& followerGroup = itFollowerGroup != tablet->FollowerGroups.end() ? *itFollowerGroup : tablet->AddFollowerGroup();
- ui32 oldFollowerCount = tablet->GetActualFollowerCount(followerGroup.Id);
- followerGroup = srcFollowerGroup;
+ TFollowerGroup* followerGroup;
+ if (itFollowerGroup != tablet->FollowerGroups.end()) {
+ followerGroup = &*itFollowerGroup;
+ ++itFollowerGroup;
+ } else {
+ followerGroup = &tablet->AddFollowerGroup();
+ itFollowerGroup = tablet->FollowerGroups.end();
+ }
+ *followerGroup = srcFollowerGroup;
TVector<ui32> allowedDataCenters;
- for (const TDataCenterId& dc : followerGroup.AllowedDataCenters) {
+ for (const TDataCenterId& dc : followerGroup->AllowedDataCenters) {
allowedDataCenters.push_back(DataCenterFromString(dc));
}
- db.Table<Schema::TabletFollowerGroup>().Key(TabletId, followerGroup.Id).Update(
- NIceDb::TUpdate<Schema::TabletFollowerGroup::FollowerCount>(followerGroup.GetRawFollowerCount()),
- NIceDb::TUpdate<Schema::TabletFollowerGroup::AllowLeaderPromotion>(followerGroup.AllowLeaderPromotion),
- NIceDb::TUpdate<Schema::TabletFollowerGroup::AllowClientRead>(followerGroup.AllowClientRead),
- NIceDb::TUpdate<Schema::TabletFollowerGroup::AllowedNodes>(followerGroup.AllowedNodes),
+ db.Table<Schema::TabletFollowerGroup>().Key(TabletId, followerGroup->Id).Update(
+ NIceDb::TUpdate<Schema::TabletFollowerGroup::FollowerCount>(followerGroup->GetRawFollowerCount()),
+ NIceDb::TUpdate<Schema::TabletFollowerGroup::AllowLeaderPromotion>(followerGroup->AllowLeaderPromotion),
+ NIceDb::TUpdate<Schema::TabletFollowerGroup::AllowClientRead>(followerGroup->AllowClientRead),
+ NIceDb::TUpdate<Schema::TabletFollowerGroup::AllowedNodes>(followerGroup->AllowedNodes),
NIceDb::TUpdate<Schema::TabletFollowerGroup::AllowedDataCenters>(allowedDataCenters),
- NIceDb::TUpdate<Schema::TabletFollowerGroup::AllowedDataCenterIds>(followerGroup.AllowedDataCenters),
- NIceDb::TUpdate<Schema::TabletFollowerGroup::RequireAllDataCenters>(followerGroup.RequireAllDataCenters),
- NIceDb::TUpdate<Schema::TabletFollowerGroup::FollowerCountPerDataCenter>(followerGroup.FollowerCountPerDataCenter),
- NIceDb::TUpdate<Schema::TabletFollowerGroup::RequireDifferentNodes>(followerGroup.RequireDifferentNodes));
-
- for (ui32 i = oldFollowerCount; i < followerGroup.GetComputedFollowerCount(Self->GetDataCenters()); ++i) {
- TFollowerTabletInfo& follower = tablet->AddFollower(followerGroup);
- db.Table<Schema::TabletFollowerTablet>().Key(TabletId, follower.Id).Update(
- NIceDb::TUpdate<Schema::TabletFollowerTablet::GroupID>(follower.FollowerGroup.Id),
- NIceDb::TUpdate<Schema::TabletFollowerTablet::FollowerNode>(0));
- follower.InitTabletMetrics();
- follower.BecomeStopped();
- }
-
- for (ui32 i = followerGroup.GetComputedFollowerCount(Self->GetDataCenters()); i < oldFollowerCount; ++i) {
- auto itFollower = tablet->Followers.rbegin();
- while (itFollower != tablet->Followers.rend() && itFollower->FollowerGroup.Id != followerGroup.Id) {
- ++itFollower;
- }
- if (itFollower == tablet->Followers.rend()) {
- break;
- }
- TFollowerTabletInfo& follower = *itFollower;
- db.Table<Schema::TabletFollowerTablet>().Key(TabletId, follower.Id).Delete();
- follower.InitiateStop(SideEffects);
- tablet->Followers.erase(std::prev(itFollower.base()));
- }
- ++itFollowerGroup;
+ NIceDb::TUpdate<Schema::TabletFollowerGroup::AllowedDataCenterIds>(followerGroup->AllowedDataCenters),
+ NIceDb::TUpdate<Schema::TabletFollowerGroup::RequireAllDataCenters>(followerGroup->RequireAllDataCenters),
+ NIceDb::TUpdate<Schema::TabletFollowerGroup::FollowerCountPerDataCenter>(followerGroup->FollowerCountPerDataCenter),
+ NIceDb::TUpdate<Schema::TabletFollowerGroup::RequireDifferentNodes>(followerGroup->RequireDifferentNodes));
}
+ Self->UpdateTabletFollowersNumber(*tablet, db, SideEffects);
ProcessTablet(*tablet);
BLOG_D("THive::TTxCreateTablet::Execute Existing tablet " << tablet->ToString() << " has been successfully updated");
@@ -469,19 +454,9 @@ public:
NIceDb::TUpdate<Schema::TabletFollowerGroup::AllowedDataCenterIds>(followerGroup.AllowedDataCenters),
NIceDb::TUpdate<Schema::TabletFollowerGroup::RequireAllDataCenters>(followerGroup.RequireAllDataCenters),
NIceDb::TUpdate<Schema::TabletFollowerGroup::FollowerCountPerDataCenter>(followerGroup.FollowerCountPerDataCenter));
-
- for (ui32 i = 0; i < followerGroup.GetComputedFollowerCount(Self->GetDataCenters()); ++i) {
- TFollowerTabletInfo& follower = tablet.AddFollower(followerGroup);
- follower.Statistics.SetLastAliveTimestamp(now.MilliSeconds());
- db.Table<Schema::TabletFollowerTablet>().Key(TabletId, follower.Id).Update(
- NIceDb::TUpdate<Schema::TabletFollowerTablet::GroupID>(follower.FollowerGroup.Id),
- NIceDb::TUpdate<Schema::TabletFollowerTablet::FollowerNode>(0),
- NIceDb::TUpdate<Schema::TabletFollowerTablet::Statistics>(follower.Statistics));
- follower.InitTabletMetrics();
- follower.BecomeStopped();
- }
}
+ Self->UpdateTabletFollowersNumber(tablet, db, SideEffects);
Self->OwnerToTablet.emplace(ownerIdx, TabletId);
Self->ObjectToTabletMetrics[tablet.ObjectId].IncreaseCount();
Self->TabletTypeToTabletMetrics[tablet.Type].IncreaseCount();
diff --git a/ydb/core/mind/hive/tx__kill_node.cpp b/ydb/core/mind/hive/tx__kill_node.cpp
index 9e21d6cddf..bcfba26feb 100644
--- a/ydb/core/mind/hive/tx__kill_node.cpp
+++ b/ydb/core/mind/hive/tx__kill_node.cpp
@@ -43,6 +43,9 @@ public:
db.Table<Schema::Node>().Key(NodeId).Update<Schema::Node::Statistics>(node->Statistics);
}
node->BecomeDisconnected();
+ if (node->LocationAcquired) {
+ Self->RemoveRegisteredDataCentersNode(node->Location.GetDataCenterId(), node->Id);
+ }
for (const TActorId& pipeServer : node->PipeServers) {
BLOG_TRACE("THive::TTxKillNode - killing pipe server " << pipeServer);
SideEffects.Send(pipeServer, new TEvents::TEvPoisonPill());
diff --git a/ydb/core/mind/hive/tx__load_everything.cpp b/ydb/core/mind/hive/tx__load_everything.cpp
index d611fcd606..9dfe745d5c 100644
--- a/ydb/core/mind/hive/tx__load_everything.cpp
+++ b/ydb/core/mind/hive/tx__load_everything.cpp
@@ -36,6 +36,7 @@ public:
Self->Keeper.Clear();
Self->Domains.clear();
Self->BlockedOwners.clear();
+ Self->RegisteredDataCenterNodes.clear();
Self->Domains[Self->RootDomainKey].Path = Self->RootDomainName;
Self->Domains[Self->RootDomainKey].HiveId = rootHiveId;
@@ -306,6 +307,8 @@ public:
if (node.CanBeDeleted()) {
db.Table<Schema::Node>().Key(nodeId).Delete();
Self->Nodes.erase(nodeId);
+ } else if (node.IsUnknown() && node.LocationAcquired) {
+ Self->AddRegisteredDataCentersNode(node.Location.GetDataCenterId(), node.Id);
}
if (!nodeRowset.Next())
return false;
@@ -487,8 +490,6 @@ public:
tabletInfo.AcquireAllocationUnits();
}
- std::unordered_map<std::pair<TLeaderTabletInfo*, TFollowerGroup*>, ui32> followersPerGroup;
-
{
auto tabletFollowerGroupRowset = db.Table<Schema::TabletFollowerGroup>().Select();
if (!tabletFollowerGroupRowset.IsReady())
@@ -518,7 +519,6 @@ public:
followerGroup.LocalNodeOnly = tabletFollowerGroupRowset.GetValueOrDefault<Schema::TabletFollowerGroup::LocalNodeOnly>();
followerGroup.FollowerCountPerDataCenter = tabletFollowerGroupRowset.GetValueOrDefault<Schema::TabletFollowerGroup::FollowerCountPerDataCenter>();
followerGroup.RequireDifferentNodes = tabletFollowerGroupRowset.GetValueOrDefault<Schema::TabletFollowerGroup::RequireDifferentNodes>();
- followersPerGroup[{tablet, &followerGroup}] = 0;
}
if (!tabletFollowerGroupRowset.Next())
return false;
@@ -550,7 +550,6 @@ public:
follower.BecomeStopped();
}
}
- followersPerGroup[{tablet, &followerGroup}]++;
}
if (!tabletFollowerRowset.Next())
return false;
@@ -558,22 +557,6 @@ public:
}
{
- for (auto& [id, count] : followersPerGroup) {
- TFollowerGroup& followerGroup(*id.second);
- while (followerGroup.GetComputedFollowerCount(Self->GetDataCenters()) > count) {
- TFollowerTabletInfo& follower = id.first->AddFollower(followerGroup);
- follower.InitTabletMetrics();
- follower.BecomeStopped();
- db.Table<Schema::TabletFollowerTablet>()
- .Key(id.first->Id, follower.Id)
- .Update(NIceDb::TUpdate<Schema::TabletFollowerTablet::FollowerNode>(0),
- NIceDb::TUpdate<Schema::TabletFollowerTablet::GroupID>(followerGroup.Id));
- ++count;
- }
- }
- }
-
- {
auto metricsRowset = db.Table<Schema::Metrics>().Select();
if (!metricsRowset.IsReady())
return false;
diff --git a/ydb/core/mind/hive/tx__process_boot_queue.cpp b/ydb/core/mind/hive/tx__process_boot_queue.cpp
index 0ca8a6d23a..56160d9113 100644
--- a/ydb/core/mind/hive/tx__process_boot_queue.cpp
+++ b/ydb/core/mind/hive/tx__process_boot_queue.cpp
@@ -14,10 +14,11 @@ public:
TTxType GetTxType() const override { return NHive::TXTYPE_PROCESS_BOOT_QUEUE; }
- bool Execute(TTransactionContext&, const TActorContext&) override {
+ bool Execute(TTransactionContext& txc, const TActorContext&) override {
BLOG_D("THive::TTxProcessBootQueue()::Execute");
SideEffects.Reset(Self->SelfId());
- Self->ExecuteProcessBootQueue(SideEffects);
+ NIceDb::TNiceDb db(txc.DB);
+ Self->ExecuteProcessBootQueue(db, SideEffects);
return true;
}
diff --git a/ydb/core/mind/hive/tx__status.cpp b/ydb/core/mind/hive/tx__status.cpp
index ae940878a7..5803f7c577 100644
--- a/ydb/core/mind/hive/tx__status.cpp
+++ b/ydb/core/mind/hive/tx__status.cpp
@@ -33,7 +33,7 @@ public:
NActorsInterconnect::TNodeLocation location;
node.Location.Serialize(&location, false);
db.Table<Schema::Node>().Key(nodeId).Update<Schema::Node::Location>(location);
- Self->UpdateRegisteredDataCenters(node.Location.GetDataCenterId());
+ Self->AddRegisteredDataCentersNode(node.Location.GetDataCenterId(), node.Id);
}
Self->ProcessWaitQueue(); // new node connected
if (node.Drain && Self->BalancerNodes.count(nodeId) == 0) {
diff --git a/ydb/core/mind/hive/tx__update_tablet_status.cpp b/ydb/core/mind/hive/tx__update_tablet_status.cpp
index 3aae018458..c736be00aa 100644
--- a/ydb/core/mind/hive/tx__update_tablet_status.cpp
+++ b/ydb/core/mind/hive/tx__update_tablet_status.cpp
@@ -111,6 +111,7 @@ public:
db.Table<Schema::Tablet>().Key(TabletId).Update(NIceDb::TUpdate<Schema::Tablet::LeaderNode>(tablet->NodeId),
NIceDb::TUpdate<Schema::Tablet::KnownGeneration>(Generation),
NIceDb::TUpdate<Schema::Tablet::Statistics>(tablet->Statistics));
+ Self->UpdateTabletFollowersNumber(leader, db, SideEffects);
} else {
db.Table<Schema::TabletFollowerTablet>().Key(TabletId, FollowerId).Update(
NIceDb::TUpdate<Schema::TabletFollowerTablet::GroupID>(tablet->AsFollower().FollowerGroup.Id),