aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Rutkovsky <alexvru@mail.ru>2022-03-10 13:43:22 +0300
committerAlexander Rutkovsky <alexvru@mail.ru>2022-03-10 13:43:22 +0300
commitbd789f93c8f22764332d81dec16b2a39ab32e274 (patch)
treef57c95b217cbb8e3e6f0d1bde51ca7a705ae31af
parent13ec41e2ad5944d88687beb484d24250d9f96d3f (diff)
downloadydb-bd789f93c8f22764332d81dec16b2a39ab32e274.tar.gz
Fix NodeWarden pipe connect race KIKIMR-13568
ref:b7652cfd6c7d3752ae1533df9ec2140bd73dc9e9
-rw-r--r--ydb/core/mind/bscontroller/config.cpp2
-rw-r--r--ydb/core/mind/bscontroller/group_reconfigure_wipe.cpp2
-rw-r--r--ydb/core/mind/bscontroller/impl.h2
-rw-r--r--ydb/core/mind/bscontroller/register_node.cpp14
4 files changed, 11 insertions, 9 deletions
diff --git a/ydb/core/mind/bscontroller/config.cpp b/ydb/core/mind/bscontroller/config.cpp
index aa7d6efceea..47b522f24dc 100644
--- a/ydb/core/mind/bscontroller/config.cpp
+++ b/ydb/core/mind/bscontroller/config.cpp
@@ -23,7 +23,7 @@ namespace NKikimr::NBsController {
for (auto &pair : Services) {
const TNodeId &nodeId = pair.first;
- if (TNodeInfo *node = Self->FindNode(nodeId); node && node->IsRegistered) {
+ if (TNodeInfo *node = Self->FindNode(nodeId); node && node->ConnectedCount) {
auto event = MakeHolder<TEvBlobStorage::TEvControllerNodeServiceSetUpdate>();
auto& record = event->Record;
pair.second.Swap(&record);
diff --git a/ydb/core/mind/bscontroller/group_reconfigure_wipe.cpp b/ydb/core/mind/bscontroller/group_reconfigure_wipe.cpp
index b3bf230de82..7c274224baf 100644
--- a/ydb/core/mind/bscontroller/group_reconfigure_wipe.cpp
+++ b/ydb/core/mind/bscontroller/group_reconfigure_wipe.cpp
@@ -46,7 +46,7 @@ public:
db.Table<Schema::VSlot>().Key(id.GetKey()).Update<Schema::VSlot::Mood>(info->Mood);
// Prepare results for nodes
- if (TNodeInfo *node = Self->FindNode(id.NodeId); node && node->IsRegistered) {
+ if (TNodeInfo *node = Self->FindNode(id.NodeId); node && node->ConnectedCount) {
auto& msg = ResultForNode[id.NodeId];
msg = MakeHolder<TEvBlobStorage::TEvControllerNodeServiceSetUpdate>(NKikimrProto::OK, id.NodeId);
Self->ReadVSlot(*info, msg.Get());
diff --git a/ydb/core/mind/bscontroller/impl.h b/ydb/core/mind/bscontroller/impl.h
index ba311a254c3..2cf3989ca42 100644
--- a/ydb/core/mind/bscontroller/impl.h
+++ b/ydb/core/mind/bscontroller/impl.h
@@ -730,7 +730,7 @@ public:
public:
using Table = Schema::Node;
- bool IsRegistered = false;
+ ui32 ConnectedCount = 0;
Table::NextPDiskID::Type NextPDiskID;
// in-mem only
std::map<TString, NPDisk::TDriveData> KnownDrives;
diff --git a/ydb/core/mind/bscontroller/register_node.cpp b/ydb/core/mind/bscontroller/register_node.cpp
index 9e0df640a8e..2a88fec38e2 100644
--- a/ydb/core/mind/bscontroller/register_node.cpp
+++ b/ydb/core/mind/bscontroller/register_node.cpp
@@ -233,8 +233,6 @@ public:
}
}
- TNodeInfo& nodeInfo = Self->GetNode(nodeId);
-
auto res = std::make_unique<TEvBlobStorage::TEvControllerNodeServiceSetUpdate>(NKikimrProto::OK, nodeId);
TSet<ui32> groupIDsToRead;
@@ -285,8 +283,6 @@ public:
Self->ReadGroups(groupsToDiscard, true, res.get());
- nodeInfo.IsRegistered = true;
-
for (auto it = Self->PDisks.lower_bound(minPDiskId); it != Self->PDisks.end() && it->first.NodeId == nodeId; ++it) {
Self->ReadPDisk(it->first, *it->second, res.get(), NKikimrBlobStorage::INITIAL);
}
@@ -426,6 +422,9 @@ void TBlobStorageController::OnRegisterNode(const TActorId& serverId, TNodeId no
}
void TBlobStorageController::OnWardenConnected(TNodeId nodeId) {
+ TNodeInfo& node = GetNode(nodeId);
+ ++node.ConnectedCount;
+
for (auto it = PDisks.lower_bound(TPDiskId::MinForNode(nodeId)); it != PDisks.end() && it->first.NodeId == nodeId; ++it) {
it->second->UpdateOperational(true);
SysViewChangedPDisks.insert(it->first);
@@ -433,6 +432,11 @@ void TBlobStorageController::OnWardenConnected(TNodeId nodeId) {
}
void TBlobStorageController::OnWardenDisconnected(TNodeId nodeId) {
+ TNodeInfo& node = GetNode(nodeId);
+ if (--node.ConnectedCount) {
+ return; // there are still some connections from this NW
+ }
+
const TInstant now = TActivationContext::Now();
std::vector<std::pair<TVSlotId, TInstant>> lastSeenReadyQ;
for (auto it = PDisks.lower_bound(TPDiskId::MinForNode(nodeId)); it != PDisks.end() && it->first.NodeId == nodeId; ++it) {
@@ -461,8 +465,6 @@ void TBlobStorageController::OnWardenDisconnected(TNodeId nodeId) {
Send(SelfHealId, sh.Release());
}
ScrubState.OnNodeDisconnected(nodeId);
- TNodeInfo& node = GetNode(nodeId);
- node.IsRegistered = false;
EraseKnownDrivesOnDisconnected(&node);
if (!lastSeenReadyQ.empty()) {
Execute(CreateTxUpdateLastSeenReady(std::move(lastSeenReadyQ)));