aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Rutkovsky <alexander.rutkovsky@gmail.com>2022-04-25 11:50:29 +0300
committerAlexander Rutkovsky <alexander.rutkovsky@gmail.com>2022-04-25 11:50:29 +0300
commita908a67206e5372201544f3989eb15059b44a3d2 (patch)
tree2b834af923a919beb69b77d3e0460adb8c7987c8
parentec37b4bad95b11fec649eaa2ffa5c2d23576b633 (diff)
downloadydb-a908a67206e5372201544f3989eb15059b44a3d2.tar.gz
Add defrag worker actor timeout support to prevent long snapshot holding KIKIMR-14651
ref:40f30199d9541b2affbf8c1d5d227df4627319c1
-rw-r--r--ydb/core/base/blobstorage.h1
-rw-r--r--ydb/core/blobstorage/vdisk/defrag/defrag_quantum.cpp60
-rw-r--r--ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.cpp7
-rw-r--r--ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.h9
-rw-r--r--ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.cpp11
-rw-r--r--ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.h2
6 files changed, 64 insertions, 26 deletions
diff --git a/ydb/core/base/blobstorage.h b/ydb/core/base/blobstorage.h
index a2faee326e5..d7cd026c814 100644
--- a/ydb/core/base/blobstorage.h
+++ b/ydb/core/base/blobstorage.h
@@ -672,6 +672,7 @@ struct TEvBlobStorage {
EvHugeLockChunks,
EvHugeStat,
EvForwardToSkeleton,
+ EvHugeUnlockChunks,
EvYardInitResult = EvPut + 9 * 512, /// 268 636 672
EvLogResult,
diff --git a/ydb/core/blobstorage/vdisk/defrag/defrag_quantum.cpp b/ydb/core/blobstorage/vdisk/defrag/defrag_quantum.cpp
index 2bc876deec9..c3bc4e8c9d2 100644
--- a/ydb/core/blobstorage/vdisk/defrag/defrag_quantum.cpp
+++ b/ydb/core/blobstorage/vdisk/defrag/defrag_quantum.cpp
@@ -24,6 +24,10 @@ namespace NKikimr {
EvResume = EventSpaceBegin(TEvents::ES_PRIVATE)
};
+ struct TExTimeout {};
+
+ static constexpr TDuration MaxSnapshotHoldDuration = TDuration::Seconds(30);
+
public:
TDefragQuantum(const std::shared_ptr<TDefragCtx>& dctx, const TVDiskID& selfVDiskId,
std::optional<TChunksToDefrag> chunksToDefrag)
@@ -54,38 +58,41 @@ namespace NKikimr {
stat.FreedChunks = ChunksToDefrag->Chunks;
stat.Eof = stat.FoundChunksToDefrag < DCtx->MaxChunksToDefrag;
- LockChunks(*ChunksToDefrag);
+ auto lockedChunks = LockChunks(*ChunksToDefrag);
THPTimer timer;
TDefragQuantumFindRecords findRecords(GetSnapshot(), std::move(*ChunksToDefrag));
- findRecords.Scan(TDuration::MilliSeconds(10), std::bind(&TDefragQuantum::Yield, this));
- if (auto duration = TDuration::Seconds(timer.Passed()); duration >= TDuration::Seconds(30)) {
- STLOG(PRI_ERROR, BS_VDISK_DEFRAG, BSVDD06, VDISKP(DCtx->VCtx->VDiskLogPrefix, "scan too long"),
- (Duration, duration));
+ Schedule(MaxSnapshotHoldDuration, new TEvents::TEvWakeup);
+ try {
+ findRecords.Scan(TDuration::MilliSeconds(10), std::bind(&TDefragQuantum::Yield, this));
+
+ const TActorId rewriterActorId = Register(CreateDefragRewriter(DCtx, SelfVDiskId, SelfActorId,
+ findRecords.RetrieveSnapshot(), findRecords.GetRecordsToRewrite()));
+ THolder<TEvDefragRewritten::THandle> ev;
+ try {
+ ev = WaitForSpecificEvent<TEvDefragRewritten>();
+ } catch (const TPoisonPillException&) {
+ Send(new IEventHandle(TEvents::TSystem::Poison, 0, rewriterActorId, {}, nullptr, 0));
+ throw;
+ } catch (const TExTimeout&) {
+ Send(new IEventHandle(TEvents::TSystem::Poison, 0, rewriterActorId, {}, nullptr, 0));
+ throw;
+ }
+ stat.RewrittenRecs = ev->Get()->RewrittenRecs;
+ stat.RewrittenBytes = ev->Get()->RewrittenBytes;
+ } catch (const TExTimeout&) {
+ Send(DCtx->HugeKeeperId, new TEvHugeUnlockChunks(std::move(lockedChunks)));
+ STLOG(PRI_ERROR, BS_VDISK_DEFRAG, BSVDD06, VDISKP(DCtx->VCtx->VDiskLogPrefix, "defrag worker timed out"));
}
- const TActorId rewriterActorId = Register(CreateDefragRewriter(DCtx, SelfVDiskId, SelfActorId,
- findRecords.RetrieveSnapshot(), findRecords.GetRecordsToRewrite()));
- THolder<TEvDefragRewritten::THandle> ev;
try {
- ev = WaitForSpecificEvent<TEvDefragRewritten>();
- } catch (const TPoisonPillException& ex) {
- Send(new IEventHandle(TEvents::TSystem::Poison, 0, rewriterActorId, {}, nullptr, 0));
- throw;
- }
+ Compact();
- if (auto duration = TDuration::Seconds(timer.Passed()); duration >= TDuration::Seconds(30)) {
- STLOG(PRI_ERROR, BS_VDISK_DEFRAG, BSVDD07, VDISKP(DCtx->VCtx->VDiskLogPrefix, "scan + rewrite too long"),
- (Duration, duration));
+ auto hugeStat = GetHugeStat();
+ Y_VERIFY(hugeStat.LockedChunks.size() < 100);
+ } catch (const TExTimeout&) {
+ // ignore timeout
}
-
- stat.RewrittenRecs = ev->Get()->RewrittenRecs;
- stat.RewrittenBytes = ev->Get()->RewrittenBytes;
-
- Compact();
-
- auto hugeStat = GetHugeStat();
- Y_VERIFY(hugeStat.LockedChunks.size() < 100);
}
Send(ParentActorId, new TEvDefragQuantumResult(std::move(stat)));
@@ -101,9 +108,10 @@ namespace NKikimr {
WaitForSpecificEvent([](IEventHandle& ev) { return ev.Type == EvResume; });
}
- void LockChunks(const TChunksToDefrag& chunks) {
+ TDefragChunks LockChunks(const TChunksToDefrag& chunks) {
Send(DCtx->HugeKeeperId, new TEvHugeLockChunks(chunks.Chunks));
- WaitForSpecificEvent<TEvHugeLockChunksResult>();
+ auto res = WaitForSpecificEvent<TEvHugeLockChunksResult>();
+ return res->Get()->LockedChunks;
}
void Compact() {
diff --git a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.cpp b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.cpp
index 30d2aeece40..989c5e920c5 100644
--- a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.cpp
+++ b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.cpp
@@ -871,6 +871,12 @@ namespace NKikimr {
ctx.Send(ev->Sender, new TEvHugeLockChunksResult(std::move(lockedChunks)));
}
+ void Handle(TEvHugeUnlockChunks::TPtr& ev, const TActorContext& /*ctx*/) {
+ for (const auto& d : ev->Get()->Chunks) {
+ State.Pers->Heap->UnlockChunk(d.ChunkId, d.SlotSize);
+ }
+ }
+
void Handle(TEvHugeStat::TPtr &ev, const TActorContext &ctx) {
LOG_DEBUG(ctx, BS_HULLHUGE,
VDISKP(HugeKeeperCtx->VCtx->VDiskLogPrefix,
@@ -929,6 +935,7 @@ namespace NKikimr {
HFunc(TEvHullHugeWritten, Handle)
HFunc(TEvHullHugeBlobLogged, Handle)
HFunc(TEvHugeLockChunks, Handle)
+ HFunc(TEvHugeUnlockChunks, Handle)
HFunc(TEvHugeStat, Handle)
HFunc(NPDisk::TEvCutLog, Handle)
HFunc(NMon::TEvHttpInfo, Handle)
diff --git a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.h b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.h
index 66f3c049594..ae6d7536eab 100644
--- a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.h
+++ b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.h
@@ -158,6 +158,15 @@ namespace NKikimr {
}
};
+ class TEvHugeUnlockChunks : public TEventLocal<TEvHugeUnlockChunks, TEvBlobStorage::EvHugeUnlockChunks> {
+ public:
+ TDefragChunks Chunks;
+
+ TEvHugeUnlockChunks(TDefragChunks chunks)
+ : Chunks(std::move(chunks))
+ {}
+ };
+
////////////////////////////////////////////////////////////////////////////
// TEvHugeLockChunksResult
////////////////////////////////////////////////////////////////////////////
diff --git a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.cpp b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.cpp
index e60200de987..ba034bec71f 100644
--- a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.cpp
+++ b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.cpp
@@ -180,6 +180,12 @@ namespace NKikimr {
}
}
+ void TChain::UnlockChunk(TChunkID chunkId) {
+ if (auto it = LockedChunks.find(chunkId); it != LockedChunks.end()) {
+ FreeSpace.insert(LockedChunks.extract(it));
+ }
+ }
+
THeapStat TChain::GetStat() const {
// how many chunks are required to represent slotsNum
auto slotsToChunks = [] (ui32 slotsNum, ui32 slotsInChunk) {
@@ -773,6 +779,11 @@ namespace NKikimr {
return cd->ChainPtr->LockChunkForAllocation(chunkId);
}
+ void THeap::UnlockChunk(ui32 chunkId, ui32 slotSize) {
+ TChainDelegator *cd = Chains.GetChain(slotSize);
+ cd->ChainPtr->UnlockChunk(chunkId);
+ }
+
THeapStat THeap::GetStat() const {
return Chains.GetStat();
}
diff --git a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.h b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.h
index 651117f2b94..0c0d3370def 100644
--- a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.h
+++ b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.h
@@ -120,6 +120,7 @@ namespace NKikimr {
// returns freed ChunkID if any
TFreeRes Free(const NPrivate::TChunkSlot &id);
bool LockChunkForAllocation(TChunkID chunkId);
+ void UnlockChunk(TChunkID chunkId);
THeapStat GetStat() const;
// returns true is allocated, false otherwise
bool RecoveryModeAllocate(const NPrivate::TChunkSlot &id);
@@ -297,6 +298,7 @@ namespace NKikimr {
ui32 RemoveChunk();
// make chunk not available for allocations, it is used for heap defragmentation
bool LockChunkForAllocation(ui32 chunkId, ui32 slotSize);
+ void UnlockChunk(ui32 chunkId, ui32 slotSize);
THeapStat GetStat() const;
//////////////////////////////////////////////////////////////////////////////////////////