aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoralexvru <alexvru@ydb.tech>2023-03-17 10:10:30 +0300
committeralexvru <alexvru@ydb.tech>2023-03-17 10:10:30 +0300
commit0223f0e221c12bd5eb3a532c2c12d3dc00ae77e8 (patch)
tree867ca35fabe91353788da54c7045933bcf527b39
parentb968d8bab7e871258ee24a0b36cbc52f54c57e93 (diff)
downloadydb-0223f0e221c12bd5eb3a532c2c12d3dc00ae77e8.tar.gz
Fix slay bug
-rw-r--r--ydb/core/blobstorage/nodewarden/node_warden_impl.cpp25
-rw-r--r--ydb/core/blobstorage/nodewarden/node_warden_impl.h3
-rw-r--r--ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp10
-rw-r--r--ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp15
4 files changed, 31 insertions, 22 deletions
diff --git a/ydb/core/blobstorage/nodewarden/node_warden_impl.cpp b/ydb/core/blobstorage/nodewarden/node_warden_impl.cpp
index 9a9c5fa811..05e2f3c32e 100644
--- a/ydb/core/blobstorage/nodewarden/node_warden_impl.cpp
+++ b/ydb/core/blobstorage/nodewarden/node_warden_impl.cpp
@@ -182,31 +182,38 @@ void TNodeWarden::Handle(TEvInterconnect::TEvNodeInfo::TPtr ev) {
void TNodeWarden::Handle(NPDisk::TEvSlayResult::TPtr ev) {
const NPDisk::TEvSlayResult &msg = *ev->Get();
const TVSlotId vslotId(LocalNodeId, msg.PDiskId, msg.VSlotId);
- STLOG(PRI_INFO, BS_NODE, NW28, "Handle(NPDisk::TEvSlayResult)", (Msg, msg.ToString()));
+ const auto it = SlayInFlight.find(vslotId);
+ Y_VERIFY_DEBUG(it != SlayInFlight.end());
+ STLOG(PRI_INFO, BS_NODE, NW28, "Handle(NPDisk::TEvSlayResult)", (Msg, msg.ToString()),
+ (ExpectedRound, it != SlayInFlight.end() ? std::make_optional(it->second) : std::nullopt));
+ if (it == SlayInFlight.end() || it->second != msg.SlayOwnerRound) {
+ return; // outdated response
+ }
switch (msg.Status) {
- case NKikimrProto::NOTREADY:
+ case NKikimrProto::NOTREADY: {
+ const ui64 round = NextLocalPDiskInitOwnerRound();
TActivationContext::Schedule(TDuration::Seconds(1), new IEventHandleFat(MakeBlobStoragePDiskID(LocalNodeId,
- msg.PDiskId), SelfId(), new NPDisk::TEvSlay(msg.VDiskId, msg.SlayOwnerRound, msg.PDiskId, msg.VSlotId)));
+ msg.PDiskId), SelfId(), new NPDisk::TEvSlay(msg.VDiskId, round, msg.PDiskId, msg.VSlotId)));
+ it->second = round;
break;
+ }
case NKikimrProto::OK:
- case NKikimrProto::ALREADY: {
+ case NKikimrProto::ALREADY:
+ SlayInFlight.erase(it);
if (const auto vdiskIt = LocalVDisks.find(vslotId); vdiskIt == LocalVDisks.end()) {
SendVDiskReport(vslotId, msg.VDiskId, NKikimrBlobStorage::TEvControllerNodeReport::DESTROYED);
} else {
SendVDiskReport(vslotId, msg.VDiskId, NKikimrBlobStorage::TEvControllerNodeReport::WIPED);
-
TVDiskRecord& vdisk = vdiskIt->second;
- Y_VERIFY(vdisk.SlayInFlight);
- vdisk.SlayInFlight = false;
StartLocalVDiskActor(vdisk, TDuration::Zero()); // restart actor after successful wiping
SendDiskMetrics(false);
}
break;
- }
- case NKikimrProto::CORRUPTED:
+ case NKikimrProto::CORRUPTED: // this branch doesn't really work
case NKikimrProto::ERROR:
+ SlayInFlight.erase(it);
STLOG(PRI_ERROR, BS_NODE, NW29, "Handle(NPDisk::TEvSlayResult) error", (Msg, msg.ToString()));
SendVDiskReport(vslotId, msg.VDiskId, NKikimrBlobStorage::TEvControllerNodeReport::OPERATION_ERROR);
break;
diff --git a/ydb/core/blobstorage/nodewarden/node_warden_impl.h b/ydb/core/blobstorage/nodewarden/node_warden_impl.h
index fed2d406c3..fb329fc516 100644
--- a/ydb/core/blobstorage/nodewarden/node_warden_impl.h
+++ b/ydb/core/blobstorage/nodewarden/node_warden_impl.h
@@ -261,8 +261,6 @@ namespace NKikimr::NStorage {
std::optional<TVDiskID> WhiteboardVDiskId;
ui64 WhiteboardInstanceGuid;
- bool SlayInFlight = false;
-
NKikimrBlobStorage::EVDiskStatus Status = NKikimrBlobStorage::EVDiskStatus::INIT_PENDING;
std::optional<NKikimrBlobStorage::EVDiskStatus> ReportedVDiskStatus; // last reported to BSC
@@ -307,6 +305,7 @@ namespace NKikimr::NStorage {
};
std::map<TVSlotId, TVDiskRecord> LocalVDisks;
+ std::map<TVSlotId, ui64> SlayInFlight;
TIntrusiveList<TVDiskRecord, TUnreportedMetricTag> VDisksWithUnreportedMetrics;
void DestroyLocalVDisk(TVDiskRecord& vdisk);
diff --git a/ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp b/ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp
index c37c86421a..7a5dce1cfa 100644
--- a/ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp
+++ b/ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp
@@ -230,9 +230,11 @@ namespace NKikimr::NStorage {
PoisonLocalVDisk(value);
vdisks << (std::exchange(first, false) ? "" : ", ") << value.GetVDiskId().ToString();
- if (value.SlayInFlight) {
- Send(MakeBlobStoragePDiskID(key.NodeId, key.PDiskId), new NPDisk::TEvSlay(value.GetVDiskId(),
- NextLocalPDiskInitOwnerRound(), key.PDiskId, key.VDiskSlotId));
+ if (const auto it = SlayInFlight.find(key); it != SlayInFlight.end()) {
+ const ui64 round = NextLocalPDiskInitOwnerRound();
+ Send(MakeBlobStoragePDiskID(key.NodeId, key.PDiskId), new NPDisk::TEvSlay(value.GetVDiskId(), round,
+ key.PDiskId, key.VDiskSlotId));
+ it->second = round;
} else {
StartLocalVDiskActor(value, TDuration::Zero());
}
@@ -245,7 +247,7 @@ namespace NKikimr::NStorage {
} else {
for (auto it = LocalVDisks.lower_bound(from); it != LocalVDisks.end() && it->first <= to; ++it) {
auto& [key, value] = *it;
- if (!value.RuntimeData && !value.SlayInFlight) {
+ if (!value.RuntimeData && !SlayInFlight.contains(key)) {
StartLocalVDiskActor(value, TDuration::Zero());
}
}
diff --git a/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp b/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp
index c76597d6be..04b02ed56e 100644
--- a/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp
+++ b/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp
@@ -59,10 +59,10 @@ namespace NKikimr::NStorage {
const bool donorMode = vdisk.Config.HasDonorMode();
STLOG(PRI_DEBUG, BS_NODE, NW23, "StartLocalVDiskActor", (RestartInFlight, restartInFlight),
- (SlayInFlight, vdisk.SlayInFlight), (VDiskId, vdisk.GetVDiskId()), (VSlotId, vslotId),
+ (SlayInFlight, SlayInFlight.contains(vslotId)), (VDiskId, vdisk.GetVDiskId()), (VSlotId, vslotId),
(PDiskGuid, pdiskGuid), (DonorMode, donorMode));
- if (restartInFlight || vdisk.SlayInFlight) {
+ if (restartInFlight || SlayInFlight.contains(vslotId)) {
return;
}
@@ -268,15 +268,16 @@ namespace NKikimr::NStorage {
}
void TNodeWarden::Slay(TVDiskRecord& vdisk) {
+ const TVSlotId vslotId = vdisk.GetVSlotId();
STLOG(PRI_INFO, BS_NODE, NW33, "Slay", (VDiskId, vdisk.GetVDiskId()), (VSlotId, vdisk.GetVSlotId()),
- (SlayInFlight, vdisk.SlayInFlight));
- if (!vdisk.SlayInFlight) {
+ (SlayInFlight, SlayInFlight.contains(vslotId)));
+ if (!SlayInFlight.contains(vslotId)) {
PoisonLocalVDisk(vdisk);
const TVSlotId vslotId = vdisk.GetVSlotId();
const TActorId pdiskServiceId = MakeBlobStoragePDiskID(vslotId.NodeId, vslotId.PDiskId);
- Send(pdiskServiceId, new NPDisk::TEvSlay(vdisk.GetVDiskId(), NextLocalPDiskInitOwnerRound(),
- vslotId.PDiskId, vslotId.VDiskSlotId));
- vdisk.SlayInFlight = true;
+ const ui64 round = NextLocalPDiskInitOwnerRound();
+ Send(pdiskServiceId, new NPDisk::TEvSlay(vdisk.GetVDiskId(), round, vslotId.PDiskId, vslotId.VDiskSlotId));
+ SlayInFlight.emplace(vslotId, round);
}
}