aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorcthulhu <cthulhu@ydb.tech>2023-05-24 10:33:26 +0300
committercthulhu <cthulhu@ydb.tech>2023-05-24 10:33:26 +0300
commit3109099daf326009d12fe2b8e11b9bc17858b5be (patch)
treeae5304460824fb9ed4ec8233e6a8c94214c155a7
parent7cf122beb332410caf13409c7e4cf1b08863ffa6 (diff)
downloadydb-3109099daf326009d12fe2b8e11b9bc17858b5be.tar.gz
Fix PDisk log-kill owner race,
-rw-r--r--ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp14
-rw-r--r--ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_log.cpp3
-rw-r--r--ydb/core/blobstorage/pdisk/blobstorage_pdisk_state.h7
3 files changed, 18 insertions, 6 deletions
diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp
index 5c3027d21a..e2f726d266 100644
--- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp
+++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp
@@ -2075,6 +2075,15 @@ void TPDisk::KillOwner(TOwner owner, TOwnerRound killOwnerRound, TCompletionEven
}
}
}
+ if (!pushedOwnerIntoQuarantine && OwnerData[owner].HaveRequestsInFlight()) {
+ pushedOwnerIntoQuarantine = true;
+ ADD_RECORD_WITH_TIMESTAMP_TO_OPERATION_LOG(OwnerData[owner].OperationLog, "KillOwner(), Add owner to quarantine, "
+ << "HaveRequestsInFlight, OwnerId# " << owner);
+ QuarantineOwners.push_back(owner);
+ LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId
+ << " push ownerId# " << owner
+ << " into quarantine as there are requests in flight");
+ }
if (!pushedOwnerIntoQuarantine) {
ADD_RECORD_WITH_TIMESTAMP_TO_OPERATION_LOG(OwnerData[owner].OperationLog, "KillOwner(), Remove owner without quarantine, OwnerId# " << owner);
Keeper.RemoveOwner(owner);
@@ -2326,10 +2335,13 @@ void TPDisk::ClearQuarantineChunks() {
{
const auto it = std::partition(QuarantineOwners.begin(), QuarantineOwners.end(), [&] (TOwner i) {
- return Keeper.GetOwnerUsed(i);
+ return Keeper.GetOwnerUsed(i) || OwnerData[i].HaveRequestsInFlight();
});
for (auto delIt = it; delIt != QuarantineOwners.end(); ++delIt) {
ADD_RECORD_WITH_TIMESTAMP_TO_OPERATION_LOG(OwnerData[*delIt].OperationLog, "Remove owner from quarantine, OwnerId# " << *delIt);
+ TOwnerRound ownerRound = OwnerData[*delIt].OwnerRound;
+ OwnerData[*delIt].Reset(false);
+ OwnerData[*delIt].OwnerRound = ownerRound;
Keeper.RemoveOwner(*delIt);
LOG_NOTICE_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << PDiskId
<< " removed ownerId# " << *delIt << " from chunks Keeper through QuarantineOwners");
diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_log.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_log.cpp
index 74951e1676..7e519ef2be 100644
--- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_log.cpp
+++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl_log.cpp
@@ -452,10 +452,9 @@ void TPDisk::ProcessLogReadQueue() {
}
}
ui64 firstLsnToKeep = 0;
- ui64 firstNonceToKeep = 0;
+ ui64 firstNonceToKeep = SysLogFirstNoncesToKeep.FirstNonceToKeep[logRead.Owner];
if (ownerData.VDiskId != TVDiskID::InvalidId) {
firstLsnToKeep = ownerData.CurrentFirstLsnToKeep;
- firstNonceToKeep = SysLogFirstNoncesToKeep.FirstNonceToKeep[logRead.Owner];
LOG_INFO(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# %" PRIu32 " PrepareToRead ownerId# %" PRIu32
" FirstLsnToKeep: %" PRIu64 " FirstNonceToKeep: %" PRIu64,
(ui32)PDiskId, (ui32)logRead.Owner, (ui64)firstLsnToKeep, (ui64)firstNonceToKeep);
diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_state.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_state.h
index f6a836aa8d..a8ccc2d741 100644
--- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_state.h
+++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_state.h
@@ -199,9 +199,10 @@ struct TOwnerData {
WriteThroughput = NMetrics::TDecayingAverageValue<ui64, NMetrics::DurationPerMinute, NMetrics::DurationPerSecond>();
VDiskSlotId = 0;
- LogReader.Reset();
- InFlight.Reset(TIntrusivePtr<TOwnerInflight>(new TOwnerInflight));
-
+ if (!quarantine) {
+ LogReader.Reset();
+ InFlight.Reset(TIntrusivePtr<TOwnerInflight>(new TOwnerInflight));
+ }
OnQuarantine = quarantine;
}
};