diff options
author | chertus <azuikov@ydb.tech> | 2023-04-07 19:07:22 +0300 |
---|---|---|
committer | chertus <azuikov@ydb.tech> | 2023-04-07 19:07:22 +0300 |
commit | e3de97527b32f1a34cad4916af34e6acaa0cefa2 (patch) | |
tree | 46779c5679412dd6dc864bfdecd3c6018c76e429 | |
parent | 262c15b53fe9cc3a5cf3a2b859ccf81e19d91f41 (diff) | |
download | ydb-e3de97527b32f1a34cad4916af34e6acaa0cefa2.tar.gz |
fix infinite loop in GC logic
-rw-r--r-- | ydb/core/tx/columnshard/blob_manager.cpp | 12 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/blob_manager.h | 4 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/blob_manager_txs.cpp | 13 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/columnshard_impl.cpp | 6 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/columnshard_impl.h | 2 |
5 files changed, 24 insertions, 13 deletions
diff --git a/ydb/core/tx/columnshard/blob_manager.cpp b/ydb/core/tx/columnshard/blob_manager.cpp index db33d71e5fc..4526fceb7a1 100644 --- a/ydb/core/tx/columnshard/blob_manager.cpp +++ b/ydb/core/tx/columnshard/blob_manager.cpp @@ -211,11 +211,13 @@ bool TBlobManager::LoadState(IBlobManagerDb& db) { return true; } -bool TBlobManager::CanCollectGarbage() const { +bool TBlobManager::CanCollectGarbage(bool cleanupOnly) const { if (KeepsToErase.size() || DeletesToErase.size()) { return true; } - + if (cleanupOnly) { + return false; + } return NeedStorageCG(); } @@ -367,12 +369,11 @@ THashMap<ui32, std::unique_ptr<TEvBlobStorage::TEvCollectGarbage>> TBlobManager: return requests; } -size_t TBlobManager::CleanupFlaggedBlobs(IBlobManagerDb& db) { +size_t TBlobManager::CleanupFlaggedBlobs(IBlobManagerDb& db, size_t maxBlobsToCleanup) { if (KeepsToErase.empty() && DeletesToErase.empty()) { return 0; } - static constexpr size_t maxBlobsToCleanup = TLimits::MAX_BLOBS_TO_DELETE; size_t numBlobs = 0; for (; !KeepsToErase.empty() && numBlobs < maxBlobsToCleanup; ++numBlobs) { @@ -385,6 +386,7 @@ size_t TBlobManager::CleanupFlaggedBlobs(IBlobManagerDb& db) { DeletesToErase.pop_front(); } + Y_VERIFY(numBlobs <= maxBlobsToCleanup); return numBlobs; } @@ -405,7 +407,7 @@ void TBlobManager::OnGCResult(TEvBlobStorage::TEvCollectGarbageResult::TPtr ev, // NOTE: It clears blobs of different groups. // It's expected to be safe cause we have GC result for the blobs or don't need such result. size_t maxBlobsToCleanup = TLimits::MAX_BLOBS_TO_DELETE; - maxBlobsToCleanup -= CleanupFlaggedBlobs(db); + maxBlobsToCleanup -= CleanupFlaggedBlobs(db, maxBlobsToCleanup); size_t blobsToForget = keepList.size() + dontKeepList.size(); diff --git a/ydb/core/tx/columnshard/blob_manager.h b/ydb/core/tx/columnshard/blob_manager.h index 3302097ff0c..f9922a928dc 100644 --- a/ydb/core/tx/columnshard/blob_manager.h +++ b/ydb/core/tx/columnshard/blob_manager.h @@ -210,14 +210,14 @@ public: // Loads the state at startup bool LoadState(IBlobManagerDb& db); - bool CanCollectGarbage() const; + bool CanCollectGarbage(bool cleanupOnly = false) const; bool NeedStorageCG() const; // Prepares Keep/DontKeep lists and GC barrier THashMap<ui32, std::unique_ptr<TEvBlobStorage::TEvCollectGarbage>> PreparePerGroupGCRequests(); // Cleanup blobs that have correct flags (skipped or already marked with correct flags) - size_t CleanupFlaggedBlobs(IBlobManagerDb& db); + size_t CleanupFlaggedBlobs(IBlobManagerDb& db, size_t maxBlobsToCleanup); // Called with GC result received from Distributed Storage void OnGCResult(TEvBlobStorage::TEvCollectGarbageResult::TPtr ev, IBlobManagerDb& db); diff --git a/ydb/core/tx/columnshard/blob_manager_txs.cpp b/ydb/core/tx/columnshard/blob_manager_txs.cpp index 68efa9ee1f2..8f1206d09c7 100644 --- a/ydb/core/tx/columnshard/blob_manager_txs.cpp +++ b/ydb/core/tx/columnshard/blob_manager_txs.cpp @@ -10,17 +10,21 @@ namespace NKikimr::NColumnShard { // Run GC related logic of the BlobManager class TTxRunGC : public NTabletFlatExecutor::TTransactionBase<TColumnShard> { THashMap<ui32, std::unique_ptr<TEvBlobStorage::TEvCollectGarbage>> Requests; + bool Cleanup = false; + public: TTxRunGC(TColumnShard* self) : TBase(self) {} bool Execute(TTransactionContext& txc, const TActorContext& ctx) override { + LOG_S_TRACE("TTxRunGC.Execute at tablet " << Self->TabletID()); Y_UNUSED(ctx); // Cleanup delayed blobs before next GC TBlobManagerDb blobManagerDb(txc.DB); - if (Self->BlobManager->CleanupFlaggedBlobs(blobManagerDb)) { + if (Self->BlobManager->CleanupFlaggedBlobs(blobManagerDb, TLimits::MAX_BLOBS_TO_DELETE)) { + Cleanup = true; return true; } @@ -29,8 +33,11 @@ public: } void Complete(const TActorContext& ctx) override { - if (Requests.empty()) { - Self->ScheduleNextGC(ctx); + LOG_S_TRACE("TTxRunGC.Complete at tablet " << Self->TabletID()); + + /// @warning it's a loop Complete -> Execute. We must exit from it fo sure. + if (Cleanup) { + Self->ScheduleNextGC(ctx, true); } for (auto& r : Requests) { diff --git a/ydb/core/tx/columnshard/columnshard_impl.cpp b/ydb/core/tx/columnshard/columnshard_impl.cpp index 3acaa70ec87..1cb8d84cd4b 100644 --- a/ydb/core/tx/columnshard/columnshard_impl.cpp +++ b/ydb/core/tx/columnshard/columnshard_impl.cpp @@ -582,9 +582,11 @@ void TColumnShard::RunAlterStore(const NKikimrTxColumnShard::TAlterStore& proto, } } -void TColumnShard::ScheduleNextGC(const TActorContext& ctx) { +void TColumnShard::ScheduleNextGC(const TActorContext& ctx, bool cleanupOnly) { + LOG_S_DEBUG("Scheduling GC at tablet " << TabletID()); + UpdateBlobMangerCounters(); - if (BlobManager->CanCollectGarbage()) { + if (BlobManager->CanCollectGarbage(cleanupOnly)) { Execute(CreateTxRunGc(), ctx); } } diff --git a/ydb/core/tx/columnshard/columnshard_impl.h b/ydb/core/tx/columnshard/columnshard_impl.h index 1d31ed4879a..5122eb7a0bf 100644 --- a/ydb/core/tx/columnshard/columnshard_impl.h +++ b/ydb/core/tx/columnshard/columnshard_impl.h @@ -463,7 +463,7 @@ private: bool GetExportedBlob(const TActorContext& ctx, TActorId dst, ui64 cookie, const TString& tierName, NOlap::TEvictedBlob&& evicted, std::vector<NOlap::TBlobRange>&& ranges); - void ScheduleNextGC(const TActorContext& ctx); + void ScheduleNextGC(const TActorContext& ctx, bool cleanupOnly = false); std::unique_ptr<TEvPrivate::TEvIndexing> SetupIndexation(); std::unique_ptr<TEvPrivate::TEvCompaction> SetupCompaction(); |