diff options
author | cthulhu <cthulhu@ydb.tech> | 2023-05-24 10:33:26 +0300 |
---|---|---|
committer | cthulhu <cthulhu@ydb.tech> | 2023-05-24 10:33:26 +0300 |
commit | 3109099daf326009d12fe2b8e11b9bc17858b5be (patch) | |
tree | ae5304460824fb9ed4ec8233e6a8c94214c155a7 | |
parent | 7cf122beb332410caf13409c7e4cf1b08863ffa6 (diff) | |
download | ydb-3109099daf326009d12fe2b8e11b9bc17858b5be.tar.gz |
Fix PDisk log-kill owner race,
-rw-r--r-- | ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp | 14 | ||||
-rw-r--r-- | ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_log.cpp | 3 | ||||
-rw-r--r-- | ydb/core/blobstorage/pdisk/blobstorage_pdisk_state.h | 7 |
3 files changed, 18 insertions, 6 deletions
diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp index 5c3027d21a..e2f726d266 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp @@ -2075,6 +2075,15 @@ void TPDisk::KillOwner(TOwner owner, TOwnerRound killOwnerRound, TCompletionEven } } } + if (!pushedOwnerIntoQuarantine && OwnerData[owner].HaveRequestsInFlight()) { + pushedOwnerIntoQuarantine = true; + ADD_RECORD_WITH_TIMESTAMP_TO_OPERATION_LOG(OwnerData[owner].OperationLog, "KillOwner(), Add owner to quarantine, " + << "HaveRequestsInFlight, OwnerId# " << owner); + QuarantineOwners.push_back(owner); + LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId + << " push ownerId# " << owner + << " into quarantine as there are requests in flight"); + } if (!pushedOwnerIntoQuarantine) { ADD_RECORD_WITH_TIMESTAMP_TO_OPERATION_LOG(OwnerData[owner].OperationLog, "KillOwner(), Remove owner without quarantine, OwnerId# " << owner); Keeper.RemoveOwner(owner); @@ -2326,10 +2335,13 @@ void TPDisk::ClearQuarantineChunks() { { const auto it = std::partition(QuarantineOwners.begin(), QuarantineOwners.end(), [&] (TOwner i) { - return Keeper.GetOwnerUsed(i); + return Keeper.GetOwnerUsed(i) || OwnerData[i].HaveRequestsInFlight(); }); for (auto delIt = it; delIt != QuarantineOwners.end(); ++delIt) { ADD_RECORD_WITH_TIMESTAMP_TO_OPERATION_LOG(OwnerData[*delIt].OperationLog, "Remove owner from quarantine, OwnerId# " << *delIt); + TOwnerRound ownerRound = OwnerData[*delIt].OwnerRound; + OwnerData[*delIt].Reset(false); + OwnerData[*delIt].OwnerRound = ownerRound; Keeper.RemoveOwner(*delIt); LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId << " removed ownerId# " << *delIt << " from chunks Keeper through QuarantineOwners"); diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_log.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_log.cpp index 74951e1676..7e519ef2be 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_log.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_log.cpp @@ -452,10 +452,9 @@ void TPDisk::ProcessLogReadQueue() { } } ui64 firstLsnToKeep = 0; - ui64 firstNonceToKeep = 0; + ui64 firstNonceToKeep = SysLogFirstNoncesToKeep.FirstNonceToKeep[logRead.Owner]; if (ownerData.VDiskId != TVDiskID::InvalidId) { firstLsnToKeep = ownerData.CurrentFirstLsnToKeep; - firstNonceToKeep = SysLogFirstNoncesToKeep.FirstNonceToKeep[logRead.Owner]; LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " PrepareToRead ownerId# %" PRIu32 " FirstLsnToKeep: %" PRIu64 " FirstNonceToKeep: %" PRIu64, (ui32)PDiskId, (ui32)logRead.Owner, (ui64)firstLsnToKeep, (ui64)firstNonceToKeep); diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_state.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_state.h index f6a836aa8d..a8ccc2d741 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_state.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_state.h @@ -199,9 +199,10 @@ struct TOwnerData { WriteThroughput = NMetrics::TDecayingAverageValue<ui64, NMetrics::DurationPerMinute, NMetrics::DurationPerSecond>(); VDiskSlotId = 0; - LogReader.Reset(); - InFlight.Reset(TIntrusivePtr<TOwnerInflight>(new TOwnerInflight)); - + if (!quarantine) { + LogReader.Reset(); + InFlight.Reset(TIntrusivePtr<TOwnerInflight>(new TOwnerInflight)); + } OnQuarantine = quarantine; } }; |