diff options
author | Alexander Rutkovsky <alexander.rutkovsky@gmail.com> | 2022-04-25 11:50:29 +0300 |
---|---|---|
committer | Alexander Rutkovsky <alexander.rutkovsky@gmail.com> | 2022-04-25 11:50:29 +0300 |
commit | a908a67206e5372201544f3989eb15059b44a3d2 (patch) | |
tree | 2b834af923a919beb69b77d3e0460adb8c7987c8 | |
parent | ec37b4bad95b11fec649eaa2ffa5c2d23576b633 (diff) | |
download | ydb-a908a67206e5372201544f3989eb15059b44a3d2.tar.gz |
Add defrag worker actor timeout support to prevent long snapshot holding KIKIMR-14651
ref:40f30199d9541b2affbf8c1d5d227df4627319c1
6 files changed, 64 insertions, 26 deletions
diff --git a/ydb/core/base/blobstorage.h b/ydb/core/base/blobstorage.h index a2faee326e5..d7cd026c814 100644 --- a/ydb/core/base/blobstorage.h +++ b/ydb/core/base/blobstorage.h @@ -672,6 +672,7 @@ struct TEvBlobStorage { EvHugeLockChunks, EvHugeStat, EvForwardToSkeleton, + EvHugeUnlockChunks, EvYardInitResult = EvPut + 9 * 512, /// 268 636 672 EvLogResult, diff --git a/ydb/core/blobstorage/vdisk/defrag/defrag_quantum.cpp b/ydb/core/blobstorage/vdisk/defrag/defrag_quantum.cpp index 2bc876deec9..c3bc4e8c9d2 100644 --- a/ydb/core/blobstorage/vdisk/defrag/defrag_quantum.cpp +++ b/ydb/core/blobstorage/vdisk/defrag/defrag_quantum.cpp @@ -24,6 +24,10 @@ namespace NKikimr { EvResume = EventSpaceBegin(TEvents::ES_PRIVATE) }; + struct TExTimeout {}; + + static constexpr TDuration MaxSnapshotHoldDuration = TDuration::Seconds(30); + public: TDefragQuantum(const std::shared_ptr<TDefragCtx>& dctx, const TVDiskID& selfVDiskId, std::optional<TChunksToDefrag> chunksToDefrag) @@ -54,38 +58,41 @@ namespace NKikimr { stat.FreedChunks = ChunksToDefrag->Chunks; stat.Eof = stat.FoundChunksToDefrag < DCtx->MaxChunksToDefrag; - LockChunks(*ChunksToDefrag); + auto lockedChunks = LockChunks(*ChunksToDefrag); THPTimer timer; TDefragQuantumFindRecords findRecords(GetSnapshot(), std::move(*ChunksToDefrag)); - findRecords.Scan(TDuration::MilliSeconds(10), std::bind(&TDefragQuantum::Yield, this)); - if (auto duration = TDuration::Seconds(timer.Passed()); duration >= TDuration::Seconds(30)) { - STLOG(PRI_ERROR, BS_VDISK_DEFRAG, BSVDD06, VDISKP(DCtx->VCtx->VDiskLogPrefix, "scan too long"), - (Duration, duration)); + Schedule(MaxSnapshotHoldDuration, new TEvents::TEvWakeup); + try { + findRecords.Scan(TDuration::MilliSeconds(10), std::bind(&TDefragQuantum::Yield, this)); + + const TActorId rewriterActorId = Register(CreateDefragRewriter(DCtx, SelfVDiskId, SelfActorId, + findRecords.RetrieveSnapshot(), findRecords.GetRecordsToRewrite())); + THolder<TEvDefragRewritten::THandle> ev; + try { + ev = WaitForSpecificEvent<TEvDefragRewritten>(); + } catch (const TPoisonPillException&) { + Send(new IEventHandle(TEvents::TSystem::Poison, 0, rewriterActorId, {}, nullptr, 0)); + throw; + } catch (const TExTimeout&) { + Send(new IEventHandle(TEvents::TSystem::Poison, 0, rewriterActorId, {}, nullptr, 0)); + throw; + } + stat.RewrittenRecs = ev->Get()->RewrittenRecs; + stat.RewrittenBytes = ev->Get()->RewrittenBytes; + } catch (const TExTimeout&) { + Send(DCtx->HugeKeeperId, new TEvHugeUnlockChunks(std::move(lockedChunks))); + STLOG(PRI_ERROR, BS_VDISK_DEFRAG, BSVDD06, VDISKP(DCtx->VCtx->VDiskLogPrefix, "defrag worker timed out")); } - const TActorId rewriterActorId = Register(CreateDefragRewriter(DCtx, SelfVDiskId, SelfActorId, - findRecords.RetrieveSnapshot(), findRecords.GetRecordsToRewrite())); - THolder<TEvDefragRewritten::THandle> ev; try { - ev = WaitForSpecificEvent<TEvDefragRewritten>(); - } catch (const TPoisonPillException& ex) { - Send(new IEventHandle(TEvents::TSystem::Poison, 0, rewriterActorId, {}, nullptr, 0)); - throw; - } + Compact(); - if (auto duration = TDuration::Seconds(timer.Passed()); duration >= TDuration::Seconds(30)) { - STLOG(PRI_ERROR, BS_VDISK_DEFRAG, BSVDD07, VDISKP(DCtx->VCtx->VDiskLogPrefix, "scan + rewrite too long"), - (Duration, duration)); + auto hugeStat = GetHugeStat(); + Y_VERIFY(hugeStat.LockedChunks.size() < 100); + } catch (const TExTimeout&) { + // ignore timeout } - - stat.RewrittenRecs = ev->Get()->RewrittenRecs; - stat.RewrittenBytes = ev->Get()->RewrittenBytes; - - Compact(); - - auto hugeStat = GetHugeStat(); - Y_VERIFY(hugeStat.LockedChunks.size() < 100); } Send(ParentActorId, new TEvDefragQuantumResult(std::move(stat))); @@ -101,9 +108,10 @@ namespace NKikimr { WaitForSpecificEvent([](IEventHandle& ev) { return ev.Type == EvResume; }); } - void LockChunks(const TChunksToDefrag& chunks) { + TDefragChunks LockChunks(const TChunksToDefrag& chunks) { Send(DCtx->HugeKeeperId, new TEvHugeLockChunks(chunks.Chunks)); - WaitForSpecificEvent<TEvHugeLockChunksResult>(); + auto res = WaitForSpecificEvent<TEvHugeLockChunksResult>(); + return res->Get()->LockedChunks; } void Compact() { diff --git a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.cpp b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.cpp index 30d2aeece40..989c5e920c5 100644 --- a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.cpp +++ b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.cpp @@ -871,6 +871,12 @@ namespace NKikimr { ctx.Send(ev->Sender, new TEvHugeLockChunksResult(std::move(lockedChunks))); } + void Handle(TEvHugeUnlockChunks::TPtr& ev, const TActorContext& /*ctx*/) { + for (const auto& d : ev->Get()->Chunks) { + State.Pers->Heap->UnlockChunk(d.ChunkId, d.SlotSize); + } + } + void Handle(TEvHugeStat::TPtr &ev, const TActorContext &ctx) { LOG_DEBUG(ctx, BS_HULLHUGE, VDISKP(HugeKeeperCtx->VCtx->VDiskLogPrefix, @@ -929,6 +935,7 @@ namespace NKikimr { HFunc(TEvHullHugeWritten, Handle) HFunc(TEvHullHugeBlobLogged, Handle) HFunc(TEvHugeLockChunks, Handle) + HFunc(TEvHugeUnlockChunks, Handle) HFunc(TEvHugeStat, Handle) HFunc(NPDisk::TEvCutLog, Handle) HFunc(NMon::TEvHttpInfo, Handle) diff --git a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.h b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.h index 66f3c049594..ae6d7536eab 100644 --- a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.h +++ b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhuge.h @@ -158,6 +158,15 @@ namespace NKikimr { } }; + class TEvHugeUnlockChunks : public TEventLocal<TEvHugeUnlockChunks, TEvBlobStorage::EvHugeUnlockChunks> { + public: + TDefragChunks Chunks; + + TEvHugeUnlockChunks(TDefragChunks chunks) + : Chunks(std::move(chunks)) + {} + }; + //////////////////////////////////////////////////////////////////////////// // TEvHugeLockChunksResult //////////////////////////////////////////////////////////////////////////// diff --git a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.cpp b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.cpp index e60200de987..ba034bec71f 100644 --- a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.cpp +++ b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.cpp @@ -180,6 +180,12 @@ namespace NKikimr { } } + void TChain::UnlockChunk(TChunkID chunkId) { + if (auto it = LockedChunks.find(chunkId); it != LockedChunks.end()) { + FreeSpace.insert(LockedChunks.extract(it)); + } + } + THeapStat TChain::GetStat() const { // how many chunks are required to represent slotsNum auto slotsToChunks = [] (ui32 slotsNum, ui32 slotsInChunk) { @@ -773,6 +779,11 @@ namespace NKikimr { return cd->ChainPtr->LockChunkForAllocation(chunkId); } + void THeap::UnlockChunk(ui32 chunkId, ui32 slotSize) { + TChainDelegator *cd = Chains.GetChain(slotSize); + cd->ChainPtr->UnlockChunk(chunkId); + } + THeapStat THeap::GetStat() const { return Chains.GetStat(); } diff --git a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.h b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.h index 651117f2b94..0c0d3370def 100644 --- a/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.h +++ b/ydb/core/blobstorage/vdisk/huge/blobstorage_hullhugeheap.h @@ -120,6 +120,7 @@ namespace NKikimr { // returns freed ChunkID if any TFreeRes Free(const NPrivate::TChunkSlot &id); bool LockChunkForAllocation(TChunkID chunkId); + void UnlockChunk(TChunkID chunkId); THeapStat GetStat() const; // returns true is allocated, false otherwise bool RecoveryModeAllocate(const NPrivate::TChunkSlot &id); @@ -297,6 +298,7 @@ namespace NKikimr { ui32 RemoveChunk(); // make chunk not available for allocations, it is used for heap defragmentation bool LockChunkForAllocation(ui32 chunkId, ui32 slotSize); + void UnlockChunk(ui32 chunkId, ui32 slotSize); THeapStat GetStat() const; ////////////////////////////////////////////////////////////////////////////////////////// |