diff options
author | Alexander Rutkovsky <alexvru@mail.ru> | 2022-03-10 13:43:22 +0300 |
---|---|---|
committer | Alexander Rutkovsky <alexvru@mail.ru> | 2022-03-10 13:43:22 +0300 |
commit | bd789f93c8f22764332d81dec16b2a39ab32e274 (patch) | |
tree | f57c95b217cbb8e3e6f0d1bde51ca7a705ae31af | |
parent | 13ec41e2ad5944d88687beb484d24250d9f96d3f (diff) | |
download | ydb-bd789f93c8f22764332d81dec16b2a39ab32e274.tar.gz |
Fix NodeWarden pipe connect race KIKIMR-13568
ref:b7652cfd6c7d3752ae1533df9ec2140bd73dc9e9
-rw-r--r-- | ydb/core/mind/bscontroller/config.cpp | 2 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/group_reconfigure_wipe.cpp | 2 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/impl.h | 2 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/register_node.cpp | 14 |
4 files changed, 11 insertions, 9 deletions
diff --git a/ydb/core/mind/bscontroller/config.cpp b/ydb/core/mind/bscontroller/config.cpp index aa7d6efceea..47b522f24dc 100644 --- a/ydb/core/mind/bscontroller/config.cpp +++ b/ydb/core/mind/bscontroller/config.cpp @@ -23,7 +23,7 @@ namespace NKikimr::NBsController { for (auto &pair : Services) { const TNodeId &nodeId = pair.first; - if (TNodeInfo *node = Self->FindNode(nodeId); node && node->IsRegistered) { + if (TNodeInfo *node = Self->FindNode(nodeId); node && node->ConnectedCount) { auto event = MakeHolder<TEvBlobStorage::TEvControllerNodeServiceSetUpdate>(); auto& record = event->Record; pair.second.Swap(&record); diff --git a/ydb/core/mind/bscontroller/group_reconfigure_wipe.cpp b/ydb/core/mind/bscontroller/group_reconfigure_wipe.cpp index b3bf230de82..7c274224baf 100644 --- a/ydb/core/mind/bscontroller/group_reconfigure_wipe.cpp +++ b/ydb/core/mind/bscontroller/group_reconfigure_wipe.cpp @@ -46,7 +46,7 @@ public: db.Table<Schema::VSlot>().Key(id.GetKey()).Update<Schema::VSlot::Mood>(info->Mood); // Prepare results for nodes - if (TNodeInfo *node = Self->FindNode(id.NodeId); node && node->IsRegistered) { + if (TNodeInfo *node = Self->FindNode(id.NodeId); node && node->ConnectedCount) { auto& msg = ResultForNode[id.NodeId]; msg = MakeHolder<TEvBlobStorage::TEvControllerNodeServiceSetUpdate>(NKikimrProto::OK, id.NodeId); Self->ReadVSlot(*info, msg.Get()); diff --git a/ydb/core/mind/bscontroller/impl.h b/ydb/core/mind/bscontroller/impl.h index ba311a254c3..2cf3989ca42 100644 --- a/ydb/core/mind/bscontroller/impl.h +++ b/ydb/core/mind/bscontroller/impl.h @@ -730,7 +730,7 @@ public: public: using Table = Schema::Node; - bool IsRegistered = false; + ui32 ConnectedCount = 0; Table::NextPDiskID::Type NextPDiskID; // in-mem only std::map<TString, NPDisk::TDriveData> KnownDrives; diff --git a/ydb/core/mind/bscontroller/register_node.cpp b/ydb/core/mind/bscontroller/register_node.cpp index 9e0df640a8e..2a88fec38e2 100644 --- a/ydb/core/mind/bscontroller/register_node.cpp +++ b/ydb/core/mind/bscontroller/register_node.cpp @@ -233,8 +233,6 @@ public: } } - TNodeInfo& nodeInfo = Self->GetNode(nodeId); - auto res = std::make_unique<TEvBlobStorage::TEvControllerNodeServiceSetUpdate>(NKikimrProto::OK, nodeId); TSet<ui32> groupIDsToRead; @@ -285,8 +283,6 @@ public: Self->ReadGroups(groupsToDiscard, true, res.get()); - nodeInfo.IsRegistered = true; - for (auto it = Self->PDisks.lower_bound(minPDiskId); it != Self->PDisks.end() && it->first.NodeId == nodeId; ++it) { Self->ReadPDisk(it->first, *it->second, res.get(), NKikimrBlobStorage::INITIAL); } @@ -426,6 +422,9 @@ void TBlobStorageController::OnRegisterNode(const TActorId& serverId, TNodeId no } void TBlobStorageController::OnWardenConnected(TNodeId nodeId) { + TNodeInfo& node = GetNode(nodeId); + ++node.ConnectedCount; + for (auto it = PDisks.lower_bound(TPDiskId::MinForNode(nodeId)); it != PDisks.end() && it->first.NodeId == nodeId; ++it) { it->second->UpdateOperational(true); SysViewChangedPDisks.insert(it->first); @@ -433,6 +432,11 @@ void TBlobStorageController::OnWardenConnected(TNodeId nodeId) { } void TBlobStorageController::OnWardenDisconnected(TNodeId nodeId) { + TNodeInfo& node = GetNode(nodeId); + if (--node.ConnectedCount) { + return; // there are still some connections from this NW + } + const TInstant now = TActivationContext::Now(); std::vector<std::pair<TVSlotId, TInstant>> lastSeenReadyQ; for (auto it = PDisks.lower_bound(TPDiskId::MinForNode(nodeId)); it != PDisks.end() && it->first.NodeId == nodeId; ++it) { @@ -461,8 +465,6 @@ void TBlobStorageController::OnWardenDisconnected(TNodeId nodeId) { Send(SelfHealId, sh.Release()); } ScrubState.OnNodeDisconnected(nodeId); - TNodeInfo& node = GetNode(nodeId); - node.IsRegistered = false; EraseKnownDrivesOnDisconnected(&node); if (!lastSeenReadyQ.empty()) { Execute(CreateTxUpdateLastSeenReady(std::move(lastSeenReadyQ))); |