diff options
author | alexvru <alexvru@ydb.tech> | 2023-03-17 10:10:30 +0300 |
---|---|---|
committer | alexvru <alexvru@ydb.tech> | 2023-03-17 10:10:30 +0300 |
commit | 0223f0e221c12bd5eb3a532c2c12d3dc00ae77e8 (patch) | |
tree | 867ca35fabe91353788da54c7045933bcf527b39 | |
parent | b968d8bab7e871258ee24a0b36cbc52f54c57e93 (diff) | |
download | ydb-0223f0e221c12bd5eb3a532c2c12d3dc00ae77e8.tar.gz |
Fix slay bug
4 files changed, 31 insertions, 22 deletions
diff --git a/ydb/core/blobstorage/nodewarden/node_warden_impl.cpp b/ydb/core/blobstorage/nodewarden/node_warden_impl.cpp index 9a9c5fa811..05e2f3c32e 100644 --- a/ydb/core/blobstorage/nodewarden/node_warden_impl.cpp +++ b/ydb/core/blobstorage/nodewarden/node_warden_impl.cpp @@ -182,31 +182,38 @@ void TNodeWarden::Handle(TEvInterconnect::TEvNodeInfo::TPtr ev) { void TNodeWarden::Handle(NPDisk::TEvSlayResult::TPtr ev) { const NPDisk::TEvSlayResult &msg = *ev->Get(); const TVSlotId vslotId(LocalNodeId, msg.PDiskId, msg.VSlotId); - STLOG(PRI_INFO, BS_NODE, NW28, "Handle(NPDisk::TEvSlayResult)", (Msg, msg.ToString())); + const auto it = SlayInFlight.find(vslotId); + Y_VERIFY_DEBUG(it != SlayInFlight.end()); + STLOG(PRI_INFO, BS_NODE, NW28, "Handle(NPDisk::TEvSlayResult)", (Msg, msg.ToString()), + (ExpectedRound, it != SlayInFlight.end() ? std::make_optional(it->second) : std::nullopt)); + if (it == SlayInFlight.end() || it->second != msg.SlayOwnerRound) { + return; // outdated response + } switch (msg.Status) { - case NKikimrProto::NOTREADY: + case NKikimrProto::NOTREADY: { + const ui64 round = NextLocalPDiskInitOwnerRound(); TActivationContext::Schedule(TDuration::Seconds(1), new IEventHandleFat(MakeBlobStoragePDiskID(LocalNodeId, - msg.PDiskId), SelfId(), new NPDisk::TEvSlay(msg.VDiskId, msg.SlayOwnerRound, msg.PDiskId, msg.VSlotId))); + msg.PDiskId), SelfId(), new NPDisk::TEvSlay(msg.VDiskId, round, msg.PDiskId, msg.VSlotId))); + it->second = round; break; + } case NKikimrProto::OK: - case NKikimrProto::ALREADY: { + case NKikimrProto::ALREADY: + SlayInFlight.erase(it); if (const auto vdiskIt = LocalVDisks.find(vslotId); vdiskIt == LocalVDisks.end()) { SendVDiskReport(vslotId, msg.VDiskId, NKikimrBlobStorage::TEvControllerNodeReport::DESTROYED); } else { SendVDiskReport(vslotId, msg.VDiskId, NKikimrBlobStorage::TEvControllerNodeReport::WIPED); - TVDiskRecord& vdisk = vdiskIt->second; - Y_VERIFY(vdisk.SlayInFlight); - vdisk.SlayInFlight = false; StartLocalVDiskActor(vdisk, TDuration::Zero()); // restart actor after successful wiping SendDiskMetrics(false); } break; - } - case NKikimrProto::CORRUPTED: + case NKikimrProto::CORRUPTED: // this branch doesn't really work case NKikimrProto::ERROR: + SlayInFlight.erase(it); STLOG(PRI_ERROR, BS_NODE, NW29, "Handle(NPDisk::TEvSlayResult) error", (Msg, msg.ToString())); SendVDiskReport(vslotId, msg.VDiskId, NKikimrBlobStorage::TEvControllerNodeReport::OPERATION_ERROR); break; diff --git a/ydb/core/blobstorage/nodewarden/node_warden_impl.h b/ydb/core/blobstorage/nodewarden/node_warden_impl.h index fed2d406c3..fb329fc516 100644 --- a/ydb/core/blobstorage/nodewarden/node_warden_impl.h +++ b/ydb/core/blobstorage/nodewarden/node_warden_impl.h @@ -261,8 +261,6 @@ namespace NKikimr::NStorage { std::optional<TVDiskID> WhiteboardVDiskId; ui64 WhiteboardInstanceGuid; - bool SlayInFlight = false; - NKikimrBlobStorage::EVDiskStatus Status = NKikimrBlobStorage::EVDiskStatus::INIT_PENDING; std::optional<NKikimrBlobStorage::EVDiskStatus> ReportedVDiskStatus; // last reported to BSC @@ -307,6 +305,7 @@ namespace NKikimr::NStorage { }; std::map<TVSlotId, TVDiskRecord> LocalVDisks; + std::map<TVSlotId, ui64> SlayInFlight; TIntrusiveList<TVDiskRecord, TUnreportedMetricTag> VDisksWithUnreportedMetrics; void DestroyLocalVDisk(TVDiskRecord& vdisk); diff --git a/ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp b/ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp index c37c86421a..7a5dce1cfa 100644 --- a/ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp +++ b/ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp @@ -230,9 +230,11 @@ namespace NKikimr::NStorage { PoisonLocalVDisk(value); vdisks << (std::exchange(first, false) ? "" : ", ") << value.GetVDiskId().ToString(); - if (value.SlayInFlight) { - Send(MakeBlobStoragePDiskID(key.NodeId, key.PDiskId), new NPDisk::TEvSlay(value.GetVDiskId(), - NextLocalPDiskInitOwnerRound(), key.PDiskId, key.VDiskSlotId)); + if (const auto it = SlayInFlight.find(key); it != SlayInFlight.end()) { + const ui64 round = NextLocalPDiskInitOwnerRound(); + Send(MakeBlobStoragePDiskID(key.NodeId, key.PDiskId), new NPDisk::TEvSlay(value.GetVDiskId(), round, + key.PDiskId, key.VDiskSlotId)); + it->second = round; } else { StartLocalVDiskActor(value, TDuration::Zero()); } @@ -245,7 +247,7 @@ namespace NKikimr::NStorage { } else { for (auto it = LocalVDisks.lower_bound(from); it != LocalVDisks.end() && it->first <= to; ++it) { auto& [key, value] = *it; - if (!value.RuntimeData && !value.SlayInFlight) { + if (!value.RuntimeData && !SlayInFlight.contains(key)) { StartLocalVDiskActor(value, TDuration::Zero()); } } diff --git a/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp b/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp index c76597d6be..04b02ed56e 100644 --- a/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp +++ b/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp @@ -59,10 +59,10 @@ namespace NKikimr::NStorage { const bool donorMode = vdisk.Config.HasDonorMode(); STLOG(PRI_DEBUG, BS_NODE, NW23, "StartLocalVDiskActor", (RestartInFlight, restartInFlight), - (SlayInFlight, vdisk.SlayInFlight), (VDiskId, vdisk.GetVDiskId()), (VSlotId, vslotId), + (SlayInFlight, SlayInFlight.contains(vslotId)), (VDiskId, vdisk.GetVDiskId()), (VSlotId, vslotId), (PDiskGuid, pdiskGuid), (DonorMode, donorMode)); - if (restartInFlight || vdisk.SlayInFlight) { + if (restartInFlight || SlayInFlight.contains(vslotId)) { return; } @@ -268,15 +268,16 @@ namespace NKikimr::NStorage { } void TNodeWarden::Slay(TVDiskRecord& vdisk) { + const TVSlotId vslotId = vdisk.GetVSlotId(); STLOG(PRI_INFO, BS_NODE, NW33, "Slay", (VDiskId, vdisk.GetVDiskId()), (VSlotId, vdisk.GetVSlotId()), - (SlayInFlight, vdisk.SlayInFlight)); - if (!vdisk.SlayInFlight) { + (SlayInFlight, SlayInFlight.contains(vslotId))); + if (!SlayInFlight.contains(vslotId)) { PoisonLocalVDisk(vdisk); const TVSlotId vslotId = vdisk.GetVSlotId(); const TActorId pdiskServiceId = MakeBlobStoragePDiskID(vslotId.NodeId, vslotId.PDiskId); - Send(pdiskServiceId, new NPDisk::TEvSlay(vdisk.GetVDiskId(), NextLocalPDiskInitOwnerRound(), - vslotId.PDiskId, vslotId.VDiskSlotId)); - vdisk.SlayInFlight = true; + const ui64 round = NextLocalPDiskInitOwnerRound(); + Send(pdiskServiceId, new NPDisk::TEvSlay(vdisk.GetVDiskId(), round, vslotId.PDiskId, vslotId.VDiskSlotId)); + SlayInFlight.emplace(vslotId, round); } } |