diff options
author | ivanmorozov333 <ivanmorozov@ydb.tech> | 2024-07-07 13:12:46 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-07 13:12:46 +0300 |
commit | bc77d99eea5d69402f7a0806f930529a8c3c7df8 (patch) | |
tree | be743d9465a1dd77c0d647ad52a5cd70f4e708a4 | |
parent | f088e97a0b02cf4f7c010b570335034841f4db34 (diff) | |
download | ydb-bc77d99eea5d69402f7a0806f930529a8c3c7df8.tar.gz |
fix huge blobs volume for start mode withno barrier moving (#6366)
-rw-r--r-- | ydb/core/tx/columnshard/blobs_action/abstract/blob_set.h | 54 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/blobs_action/bs/blob_manager.cpp | 228 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/blobs_action/bs/blob_manager.h | 14 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/blobs_action/bs/gc.cpp | 6 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/blobs_action/bs/gc.h | 6 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/counters/blobs_manager.cpp | 85 | ||||
-rw-r--r-- | ydb/core/tx/columnshard/counters/blobs_manager.h | 121 |
7 files changed, 252 insertions, 262 deletions
diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/blob_set.h b/ydb/core/tx/columnshard/blobs_action/abstract/blob_set.h index d14d7b2dee..51db43f0f0 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/blob_set.h +++ b/ydb/core/tx/columnshard/blobs_action/abstract/blob_set.h @@ -1,6 +1,7 @@ #pragma once #include <ydb/core/tx/columnshard/common/tablet_id.h> #include <ydb/core/tx/columnshard/blob.h> +#include <ydb/core/util/gen_step.h> #include <ydb/library/accessor/accessor.h> #include <ydb/library/services/services.pb.h> @@ -44,6 +45,59 @@ public: }; +class TBlobsByGenStep { +private: + struct Comparator { + bool operator()(const TLogoBlobID& l, const TLogoBlobID& r) const { + TGenStep gsl(l); + TGenStep gsr(r); + if (gsl == gsr) { + return l < r; + } else { + return gsl < gsr; + } + } + }; + std::set<TLogoBlobID, Comparator> Blobs; +public: + [[nodiscard]] bool Add(const TLogoBlobID& blobId) { + return Blobs.emplace(blobId).second; + } + [[nodiscard]] bool Remove(const TLogoBlobID& blobId) { + return Blobs.erase(blobId); + } + bool IsEmpty() const { + return Blobs.empty(); + } + ui32 GetSize() const { + return Blobs.size(); + } + + TGenStep GetMinGenStepVerified() const { + AFL_VERIFY(Blobs.size()); + return TGenStep(*Blobs.begin()); + } + + template <class TActor> + bool ExtractTo(const TGenStep includeBorder, const ui32 countLimit, const TActor& actor) { + ui32 idx = 0; + for (auto it = Blobs.begin(); it != Blobs.end(); ++it) { + TGenStep gs(*it); + if (includeBorder < gs) { + Blobs.erase(Blobs.begin(), it); + return true; + } + if (++idx > countLimit) { + Blobs.erase(Blobs.begin(), it); + return false; + } + actor(gs, *it); + } + Blobs.clear(); + return true; + } +}; + class TTabletsByBlob { private: THashMap<TUnifiedBlobId, THashSet<TTabletId>> Data; diff --git a/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.cpp b/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.cpp index 67e0deccb1..d034e05e18 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.cpp +++ b/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.cpp @@ -88,7 +88,6 @@ void TBlobBatch::SendWriteBlobRequest(const TString& blobData, const TUnifiedBlo } void TBlobBatch::OnBlobWriteResult(const TLogoBlobID& blobId, const NKikimrProto::EReplyStatus status) { - BatchInfo->Counters.OnPutResult(blobId.BlobSize()); Y_ABORT_UNLESS(status == NKikimrProto::OK, "The caller must handle unsuccessful status"); Y_ABORT_UNLESS(BatchInfo); Y_ABORT_UNLESS(blobId.Cookie() < BatchInfo->InFlight.size()); @@ -129,6 +128,8 @@ TBlobManager::TBlobManager(TIntrusivePtr<TTabletStorageInfo> tabletInfo, ui32 ge , CurrentGen(gen) , CurrentStep(0) { + BlobsManagerCounters.CurrentGen->Set(CurrentGen); + BlobsManagerCounters.CurrentStep->Set(CurrentStep); } void TBlobManager::RegisterControls(NKikimr::TControlBoard& /*icb*/) { @@ -151,43 +152,19 @@ bool TBlobManager::LoadState(IBlobManagerDb& db, const TTabletId selfTabletId) { return false; } - for (auto it = BlobsToDelete.GetIterator(); it.IsValid(); ++it) { - BlobsManagerCounters.OnDeleteBlobMarker(it.GetBlobId().BlobSize()); - } - BlobsManagerCounters.OnBlobsDelete(BlobsToDelete); + BlobsManagerCounters.OnBlobsToDelete(BlobsToDelete); // Build the list of steps that cannot be garbage collected before Keep flag is set on the blobs - THashSet<TGenStep> genStepsWithBlobsToKeep; - std::map<TGenStep, std::set<TLogoBlobID>> blobsToKeepLocal; + TBlobsByGenStep blobsToKeepLocal; for (const auto& unifiedBlobId : blobsToKeep) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("add_blob_to_keep", unifiedBlobId.ToStringNew()); TLogoBlobID blobId = unifiedBlobId.GetLogoBlobId(); - TGenStep genStep(blobId); - Y_ABORT_UNLESS(LastCollectedGenStep < genStep); - - AFL_VERIFY(blobsToKeepLocal[genStep].emplace(blobId).second)("blob_to_keep_double", unifiedBlobId.ToStringNew()); - BlobsManagerCounters.OnKeepMarker(blobId.BlobSize()); - const ui64 groupId = dsGroupSelector.GetGroup(blobId); - // Keep + DontKeep (probably in different gen:steps) - // GC could go through it to a greater LastCollectedGenStep - if (BlobsToDelete.Contains(SelfTabletId, TUnifiedBlobId(groupId, blobId))) { - continue; - } + Y_ABORT_UNLESS(LastCollectedGenStep < TGenStep(blobId)); - genStepsWithBlobsToKeep.insert(genStep); + AFL_VERIFY(blobsToKeepLocal.Add(blobId))("blob_to_keep_double", unifiedBlobId.ToStringNew()); } std::swap(blobsToKeepLocal, BlobsToKeep); - BlobsManagerCounters.OnBlobsKeep(BlobsToKeep); - - AllocatedGenSteps.clear(); - for (const auto& gs : genStepsWithBlobsToKeep) { - AllocatedGenSteps.push_back(new TAllocatedGenStep(gs)); - } - AllocatedGenSteps.push_back(new TAllocatedGenStep({ CurrentGen, 0 })); - - Sort(AllocatedGenSteps.begin(), AllocatedGenSteps.end(), [](const TAllocatedGenStepConstPtr& a, const TAllocatedGenStepConstPtr& b) { - return a->GenStep < b->GenStep; - }); + BlobsManagerCounters.OnBlobsToKeep(BlobsToKeep); return true; } @@ -201,7 +178,7 @@ void TBlobManager::PopGCBarriers(const TGenStep gs) { std::deque<TGenStep> TBlobManager::FindNewGCBarriers() { TGenStep newCollectGenStep = LastCollectedGenStep; std::deque<TGenStep> result; - if (AllocatedGenSteps.empty()) { + if (AllocatedGenSteps.empty() && LastCollectedGenStep < TGenStep(CurrentGen, CurrentStep)) { result.emplace_back(TGenStep(CurrentGen, CurrentStep)); } for (auto& allocated : AllocatedGenSteps) { @@ -212,9 +189,6 @@ std::deque<TGenStep> TBlobManager::FindNewGCBarriers() { result.emplace_back(allocated->GenStep); newCollectGenStep = allocated->GenStep; } - if (result.empty() || LastCollectedGenStep < result.front()) { - result.emplace_front(LastCollectedGenStep); - } return result; } @@ -226,6 +200,22 @@ private: YDB_ACCESSOR_DEF(std::deque<TUnifiedBlobId>, KeepsToErase); YDB_READONLY_DEF(std::shared_ptr<NDataSharing::TStorageSharedBlobsManager>, SharedBlobsManager); public: + ui64 GetKeepBytes() const { + ui64 size = 0; + for (auto&& i : KeepsToErase) { + size += i.BlobSize(); + } + return size; + } + + ui64 GetDeleteBytes() const { + ui64 size = 0; + for (TTabletsByBlob::TIterator it(ExtractedToRemoveFromDB); it.IsValid(); ++it) { + size += it.GetBlobId().BlobSize(); + } + return size; + } + TGCContext(const std::shared_ptr<NDataSharing::TStorageSharedBlobsManager>& sharedBlobsManager) : SharedBlobsManager(sharedBlobsManager) { @@ -237,7 +227,6 @@ public: // TODO: we need only actual channel history here for (ui32 channelIdx = 2; channelIdx < tabletInfo->Channels.size(); ++channelIdx) { const auto& channelHistory = tabletInfo->ChannelInfo(channelIdx)->History; - for (auto it = channelHistory.begin(); it != channelHistory.end(); ++it) { PerGroupGCListsInFlight[TBlobAddress(it->GroupID, channelIdx)]; } @@ -281,78 +270,61 @@ void TBlobManager::DrainDeleteTo(const TGenStep& dest, TGCContext& gcContext) { } } -bool TBlobManager::DrainKeepTo(const TGenStep& dest, TGCContext& gcContext, const bool controlCapacity) { - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("event", "PreparePerGroupGCRequests")("gen_step", dest)("gs_blobs_to_keep_count", BlobsToKeep.size()); - for (; BlobsToKeep.size() && (!controlCapacity || !gcContext.IsFull()); BlobsToKeep.erase(BlobsToKeep.begin())) { - auto gsBlobs = BlobsToKeep.begin(); - TGenStep genStep = gsBlobs->first; +bool TBlobManager::DrainKeepTo(const TGenStep& dest, TGCContext& gcContext) { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("event", "PreparePerGroupGCRequests")("gen_step", dest)("gs_blobs_to_keep_count", BlobsToKeep.GetSize()); + + const auto pred = [&](const TGenStep& genStep, const TLogoBlobID& logoBlobId) { AFL_VERIFY(LastCollectedGenStep < genStep)("last", LastCollectedGenStep.ToString())("gen", genStep.ToString()); - if (dest < genStep) { - return true; - } - for (auto&& keepBlobIt : gsBlobs->second) { - const ui32 blobGroup = TabletInfo->GroupFor(keepBlobIt.Channel(), keepBlobIt.Generation()); - TBlobAddress bAddress(blobGroup, keepBlobIt.Channel()); - const TUnifiedBlobId keepUnified(blobGroup, keepBlobIt); - gcContext.MutableKeepsToErase().emplace_back(keepUnified); - if (BlobsToDelete.ExtractBlobTo(keepUnified, gcContext.MutableExtractedToRemoveFromDB())) { - if (keepBlobIt.Generation() == CurrentGen) { - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep", keepUnified.ToStringNew()); - continue; - } - if (gcContext.GetSharedBlobsManager()->BuildStoreCategories({ keepUnified }).GetDirect().IsEmpty()) { - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep_not_direct", keepUnified.ToStringNew()); - continue; - } - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep_old", keepUnified.ToStringNew()); - gcContext.MutablePerGroupGCListsInFlight()[bAddress].DontKeepList.insert(keepBlobIt); - } else { - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_keep", keepUnified.ToStringNew()); - gcContext.MutablePerGroupGCListsInFlight()[bAddress].KeepList.insert(keepBlobIt); + const ui32 blobGroup = TabletInfo->GroupFor(logoBlobId.Channel(), logoBlobId.Generation()); + TBlobAddress bAddress(blobGroup, logoBlobId.Channel()); + const TUnifiedBlobId keepUnified(blobGroup, logoBlobId); + gcContext.MutableKeepsToErase().emplace_back(keepUnified); + if (BlobsToDelete.ExtractBlobTo(keepUnified, gcContext.MutableExtractedToRemoveFromDB())) { + if (logoBlobId.Generation() == CurrentGen) { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep", keepUnified.ToStringNew()); + return; } + if (gcContext.GetSharedBlobsManager()->BuildStoreCategories({ keepUnified }).GetDirect().IsEmpty()) { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep_not_direct", keepUnified.ToStringNew()); + return; + } + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep_old", keepUnified.ToStringNew()); + gcContext.MutablePerGroupGCListsInFlight()[bAddress].DontKeepList.insert(logoBlobId); + } else { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_keep", keepUnified.ToStringNew()); + gcContext.MutablePerGroupGCListsInFlight()[bAddress].KeepList.insert(logoBlobId); } - } - return BlobsToKeep.empty(); + }; + + return BlobsToKeep.ExtractTo(dest, gcContext.GetFreeSpace(), pred); } std::shared_ptr<NBlobOperations::NBlobStorage::TGCTask> TBlobManager::BuildGCTask(const TString& storageId, const std::shared_ptr<TBlobManager>& manager, const std::shared_ptr<NDataSharing::TStorageSharedBlobsManager>& sharedBlobsInfo, const std::shared_ptr<NBlobOperations::TRemoveGCCounters>& counters) noexcept { AFL_VERIFY(!CollectGenStepInFlight); - if (BlobsToKeep.empty() && BlobsToDelete.IsEmpty() && LastCollectedGenStep == TGenStep{ CurrentGen, CurrentStep }) { + if (BlobsToKeep.IsEmpty() && BlobsToDelete.IsEmpty() && LastCollectedGenStep == TGenStep{ CurrentGen, CurrentStep }) { + BlobsManagerCounters.GCCounters.SkipCollectionEmpty->Add(1); ACFL_DEBUG("event", "TBlobManager::BuildGCTask skip")("current_gen", CurrentGen)("current_step", CurrentStep); return nullptr; } if (AppData()->TimeProvider->Now() - PreviousGCTime < NYDBTest::TControllers::GetColumnShardController()->GetOverridenGCPeriod(TDuration::Seconds(GC_INTERVAL_SECONDS))) { + BlobsManagerCounters.GCCounters.SkipCollectionThrottling->Add(1); return nullptr; } PreviousGCTime = AppData()->TimeProvider->Now(); TGCContext gcContext(sharedBlobsInfo); - if (FirstGC) { - gcContext.InitializeFirst(TabletInfo); - FirstGC = false; - } - NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("action_id", TGUID::CreateTimebased().AsGuidString()); const std::deque<TGenStep> newCollectGenSteps = FindNewGCBarriers(); - AFL_VERIFY(newCollectGenSteps.size()); - AFL_VERIFY(newCollectGenSteps.front() == LastCollectedGenStep); if (GCBarrierPreparation != LastCollectedGenStep) { - if (!GCBarrierPreparation.Generation()) { - for (auto&& newCollectGenStep : newCollectGenSteps) { - if (!DrainKeepTo(newCollectGenStep, gcContext)) { - break; - } - CollectGenStepInFlight = std::max(CollectGenStepInFlight.value_or(newCollectGenStep), newCollectGenStep); - } - AFL_VERIFY(LastCollectedGenStep <= CollectGenStepInFlight)("last", LastCollectedGenStep)("collect", CollectGenStepInFlight); - } else { - AFL_VERIFY(GCBarrierPreparation.Generation() != CurrentGen); + if (GCBarrierPreparation.Generation()) { + AFL_VERIFY(GCBarrierPreparation.Generation() < CurrentGen); AFL_VERIFY(LastCollectedGenStep <= GCBarrierPreparation); - CollectGenStepInFlight = GCBarrierPreparation; - AFL_VERIFY(DrainKeepTo(*CollectGenStepInFlight, gcContext, false)); + if (DrainKeepTo(GCBarrierPreparation, gcContext)) { + CollectGenStepInFlight = GCBarrierPreparation; + } } } else { DrainDeleteTo(LastCollectedGenStep, gcContext); @@ -361,36 +333,45 @@ std::shared_ptr<NBlobOperations::NBlobStorage::TGCTask> TBlobManager::BuildGCTas if (!DrainKeepTo(newCollectGenStep, gcContext)) { break; } - CollectGenStepInFlight = std::max(CollectGenStepInFlight.value_or(newCollectGenStep), newCollectGenStep); + if (newCollectGenStep.Generation() == CurrentGen) { + CollectGenStepInFlight = std::max(CollectGenStepInFlight.value_or(newCollectGenStep), newCollectGenStep); + } } - if (!CollectGenStepInFlight) { - CollectGenStepInFlight = LastCollectedGenStep; + if (CollectGenStepInFlight) { + PopGCBarriers(*CollectGenStepInFlight); + if (FirstGC) { + gcContext.InitializeFirst(TabletInfo); + FirstGC = false; + } + if (!BlobsToKeep.IsEmpty()) { + AFL_VERIFY(*CollectGenStepInFlight < BlobsToKeep.GetMinGenStepVerified())("gs", *CollectGenStepInFlight)("first", BlobsToKeep.GetMinGenStepVerified()); + } + AFL_VERIFY(LastCollectedGenStep < *CollectGenStepInFlight); } - PopGCBarriers(*CollectGenStepInFlight); - AFL_VERIFY(LastCollectedGenStep <= *CollectGenStepInFlight); - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("notice", "collect_gen_step")("value", *CollectGenStepInFlight)("current_gen", CurrentGen); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("notice", "collect_gen_step")("value", CollectGenStepInFlight)("current_gen", CurrentGen); - const bool isFull = gcContext.IsFull(); + if (gcContext.IsFull()) { + PreviousGCTime = TInstant::Zero(); + } + BlobsManagerCounters.GCCounters.OnGCTask(gcContext.GetKeepsToErase().size(), gcContext.GetKeepBytes(), + gcContext.GetExtractedToRemoveFromDB().GetSize(), gcContext.GetDeleteBytes(), gcContext.IsFull(), !!CollectGenStepInFlight); auto removeCategories = sharedBlobsInfo->BuildRemoveCategories(std::move(gcContext.MutableExtractedToRemoveFromDB())); - - auto result = std::make_shared<NBlobOperations::NBlobStorage::TGCTask>(storageId, std::move(gcContext.MutablePerGroupGCListsInFlight()), *CollectGenStepInFlight, - std::move(gcContext.MutableKeepsToErase()), manager, std::move(removeCategories), counters, TabletInfo->TabletID, CurrentGen); + auto result = std::make_shared<NBlobOperations::NBlobStorage::TGCTask>(storageId, std::move(gcContext.MutablePerGroupGCListsInFlight()), + CollectGenStepInFlight, std::move(gcContext.MutableKeepsToErase()), manager, std::move(removeCategories), counters, TabletInfo->TabletID, CurrentGen); if (result->IsEmpty()) { + BlobsManagerCounters.GCCounters.OnEmptyGCTask(); CollectGenStepInFlight = {}; return nullptr; } - if (isFull) { - PreviousGCTime = TInstant::Zero(); - } - return result; } TBlobBatch TBlobManager::StartBlobBatch() { AFL_VERIFY(++CurrentStep < Max<ui32>() - 10); + BlobsManagerCounters.CurrentStep->Set(CurrentStep); AFL_VERIFY(TabletInfo->Channels.size() > 2); const auto& channel = TabletInfo->Channels[(CurrentStep % (TabletInfo->Channels.size() - 2)) + 2]; ++CountersUpdate.BatchesStarted; @@ -418,11 +399,9 @@ void TBlobManager::DoSaveBlobBatchOnComplete(TBlobBatch&& blobBatch) { AFL_VERIFY(genStep > edgeGenStep)("gen_step", genStep)("edge_gen_step", edgeGenStep)("blob_id", blobId.ToStringNew()); AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_keep", logoBlobId.ToString()); - BlobsManagerCounters.OnKeepMarker(logoBlobId.BlobSize()); - AFL_VERIFY(BlobsToKeep[genStep].emplace(logoBlobId).second); + AFL_VERIFY(BlobsToKeep.Add(logoBlobId)); + BlobsManagerCounters.OnBlobsToKeep(BlobsToKeep); } - BlobsManagerCounters.OnBlobsKeep(BlobsToKeep); - blobBatch.BatchInfo->GenStepRef.Reset(); } @@ -458,33 +437,42 @@ void TBlobManager::DeleteBlobOnComplete(const TTabletId tabletId, const TUnified if (!IsBlobInUsage(blobId)) { LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delete Blob " << blobId); AFL_VERIFY(BlobsToDelete.Add(tabletId, blobId)); - BlobsManagerCounters.OnDeleteBlobMarker(blobId.BlobSize()); - BlobsManagerCounters.OnBlobsDelete(BlobsToDelete); + BlobsManagerCounters.OnBlobsToDelete(BlobsToDelete); } else { - BlobsManagerCounters.OnDeleteBlobDelayedMarker(blobId.BlobSize()); LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delay Delete Blob " << blobId); - BlobsToDeleteDelayed.Add(tabletId, blobId); + AFL_VERIFY(BlobsToDeleteDelayed.Add(tabletId, blobId)); + BlobsManagerCounters.OnBlobsToDeleteDelayed(BlobsToDeleteDelayed); } } -void TBlobManager::OnGCFinishedOnExecute(const TGenStep& genStep, IBlobManagerDb& db) { - db.SaveLastGcBarrier(genStep); +void TBlobManager::OnGCFinishedOnExecute(const std::optional<TGenStep>& genStep, IBlobManagerDb& db) { + if (genStep) { + db.SaveLastGcBarrier(*genStep); + } } -void TBlobManager::OnGCFinishedOnComplete(const TGenStep& genStep) { - LastCollectedGenStep = genStep; - AFL_VERIFY(GCBarrierPreparation == LastCollectedGenStep)("prepare", GCBarrierPreparation)("last", LastCollectedGenStep); - CollectGenStepInFlight.reset(); +void TBlobManager::OnGCFinishedOnComplete(const std::optional<TGenStep>& genStep) { + if (genStep) { + LastCollectedGenStep = *genStep; + AFL_VERIFY(GCBarrierPreparation == LastCollectedGenStep)("prepare", GCBarrierPreparation)("last", LastCollectedGenStep); + CollectGenStepInFlight.reset(); + } else { + AFL_VERIFY(!CollectGenStepInFlight); + } } -void TBlobManager::OnGCStartOnExecute(const TGenStep& genStep, IBlobManagerDb& db) { - AFL_VERIFY(LastCollectedGenStep <= genStep)("last", LastCollectedGenStep)("prepared", genStep); - db.SaveGCBarrierPreparation(genStep); +void TBlobManager::OnGCStartOnExecute(const std::optional<TGenStep>& genStep, IBlobManagerDb& db) { + if (genStep) { + AFL_VERIFY(LastCollectedGenStep < *genStep)("last", LastCollectedGenStep)("prepared", genStep); + db.SaveGCBarrierPreparation(*genStep); + } } -void TBlobManager::OnGCStartOnComplete(const TGenStep& genStep) { - AFL_VERIFY(GCBarrierPreparation <= genStep)("last", GCBarrierPreparation)("prepared", genStep); - GCBarrierPreparation = genStep; +void TBlobManager::OnGCStartOnComplete(const std::optional<TGenStep>& genStep) { + if (genStep) { + AFL_VERIFY(GCBarrierPreparation <= *genStep)("last", GCBarrierPreparation)("prepared", genStep); + GCBarrierPreparation = *genStep; + } } void TBlobManager::OnBlobFree(const TUnifiedBlobId& blobId) { @@ -492,8 +480,8 @@ void TBlobManager::OnBlobFree(const TUnifiedBlobId& blobId) { // Check if the blob is marked for delayed deletion if (BlobsToDeleteDelayed.ExtractBlobTo(blobId, BlobsToDelete)) { AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("blob_id", blobId)("event", "blob_delayed_deleted"); - BlobsManagerCounters.OnBlobsDelete(BlobsToDelete); - BlobsManagerCounters.OnDeleteBlobMarker(blobId.GetLogoBlobId().BlobSize()); + BlobsManagerCounters.OnBlobsToDelete(BlobsToDelete); + BlobsManagerCounters.OnBlobsToDeleteDelayed(BlobsToDeleteDelayed); } } diff --git a/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.h b/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.h index c7a507553c..90094e62b7 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.h +++ b/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.h @@ -145,7 +145,7 @@ private: ui32 CurrentStep; std::optional<TGenStep> CollectGenStepInFlight; // Lists of blobs that need Keep flag to be set - std::map<TGenStep, std::set<TLogoBlobID>> BlobsToKeep; + TBlobsByGenStep BlobsToKeep; // Lists of blobs that need DoNotKeep flag to be set TTabletsByBlob BlobsToDelete; @@ -173,7 +173,7 @@ private: virtual void DoSaveBlobBatchOnExecute(const TBlobBatch& blobBatch, IBlobManagerDb& db) override; virtual void DoSaveBlobBatchOnComplete(TBlobBatch&& blobBatch) override; void DrainDeleteTo(const TGenStep& dest, TGCContext& gcContext); - [[nodiscard]] bool DrainKeepTo(const TGenStep& dest, TGCContext& gcContext, const bool controlCapacity = true); + [[nodiscard]] bool DrainKeepTo(const TGenStep& dest, TGCContext& gcContext); public: TBlobManager(TIntrusivePtr<TTabletStorageInfo> tabletInfo, const ui32 gen, const TTabletId selfTabletId); @@ -215,11 +215,11 @@ public: const std::shared_ptr<TBlobManager>& manager, const std::shared_ptr<NDataSharing::TStorageSharedBlobsManager>& sharedBlobsInfo, const std::shared_ptr<NBlobOperations::TRemoveGCCounters>& counters) noexcept; - void OnGCFinishedOnExecute(const TGenStep& genStep, IBlobManagerDb& db); - void OnGCFinishedOnComplete(const TGenStep& genStep); + void OnGCFinishedOnExecute(const std::optional<TGenStep>& genStep, IBlobManagerDb& db); + void OnGCFinishedOnComplete(const std::optional<TGenStep>& genStep); - void OnGCStartOnExecute(const TGenStep& genStep, IBlobManagerDb& db); - void OnGCStartOnComplete(const TGenStep& genStep); + void OnGCStartOnExecute(const std::optional<TGenStep>& genStep, IBlobManagerDb& db); + void OnGCStartOnComplete(const std::optional<TGenStep>& genStep); TBlobManagerCounters GetCountersUpdate() { TBlobManagerCounters res = CountersUpdate; @@ -239,7 +239,7 @@ private: bool ExtractEvicted(TEvictedBlob& evict, TEvictMetadata& meta, bool fromDropped = false); TGenStep EdgeGenStep() const { - return CollectGenStepInFlight ? *CollectGenStepInFlight : LastCollectedGenStep; + return CollectGenStepInFlight ? *CollectGenStepInFlight : std::max(GCBarrierPreparation, LastCollectedGenStep); } }; diff --git a/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp b/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp index ca22c12bd9..a72c6fb413 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp +++ b/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp @@ -31,7 +31,7 @@ bool TGCTask::DoOnCompleteTxBeforeCleaning(NColumnShard::TColumnShard& /*self*/, return true; } -TGCTask::TGCTask(const TString& storageId, TGCListsByGroup&& listsByGroupId, const TGenStep& collectGenStepInFlight, std::deque<TUnifiedBlobId>&& keepsToErase, +TGCTask::TGCTask(const TString& storageId, TGCListsByGroup&& listsByGroupId, const std::optional<TGenStep>& collectGenStepInFlight, std::deque<TUnifiedBlobId>&& keepsToErase, const std::shared_ptr<TBlobManager>& manager, TBlobsCategories&& blobsToRemove, const std::shared_ptr<TRemoveGCCounters>& counters, const ui64 tabletId, const ui64 currentGen) : TBase(storageId, std::move(blobsToRemove), counters) @@ -65,8 +65,8 @@ std::unique_ptr<TEvBlobStorage::TEvCollectGarbage> TGCTask::BuildRequest(const T ("count", it->second.RequestsCount); auto result = std::make_unique<TEvBlobStorage::TEvCollectGarbage>( TabletId, CurrentGen, PerGenerationCounter.Val(), - address.GetChannelId(), true, - CollectGenStepInFlight.Generation(), CollectGenStepInFlight.Step(), + address.GetChannelId(), !!CollectGenStepInFlight, + CollectGenStepInFlight ? CollectGenStepInFlight->Generation() : 0, CollectGenStepInFlight ? CollectGenStepInFlight->Step() : 0, new TVector<TLogoBlobID>(it->second.KeepList.begin(), it->second.KeepList.end()), new TVector<TLogoBlobID>(it->second.DontKeepList.begin(), it->second.DontKeepList.end()), TInstant::Max(), true); diff --git a/ydb/core/tx/columnshard/blobs_action/bs/gc.h b/ydb/core/tx/columnshard/blobs_action/bs/gc.h index a8f3347804..5471fc04c0 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/gc.h +++ b/ydb/core/tx/columnshard/blobs_action/bs/gc.h @@ -21,7 +21,7 @@ public: using TGCListsByGroup = THashMap<TBlobAddress, TGCLists>; private: TGCListsByGroup ListsByGroupId; - const TGenStep CollectGenStepInFlight; + const std::optional<TGenStep> CollectGenStepInFlight; const ui64 TabletId; const ui64 CurrentGen; std::deque<TUnifiedBlobId> KeepsToErase; @@ -35,11 +35,11 @@ protected: virtual bool DoOnCompleteTxBeforeCleaning(NColumnShard::TColumnShard& self, const std::shared_ptr<IBlobsGCAction>& taskAction) override; virtual bool DoIsEmpty() const override { - return false; + return !CollectGenStepInFlight && KeepsToErase.empty(); } public: - TGCTask(const TString& storageId, TGCListsByGroup&& listsByGroupId, const TGenStep& collectGenStepInFlight, std::deque<TUnifiedBlobId>&& keepsToErase, + TGCTask(const TString& storageId, TGCListsByGroup&& listsByGroupId, const std::optional<TGenStep>& collectGenStepInFlight, std::deque<TUnifiedBlobId>&& keepsToErase, const std::shared_ptr<TBlobManager>& manager, TBlobsCategories&& blobsToRemove, const std::shared_ptr<TRemoveGCCounters>& counters, const ui64 tabletId, const ui64 currentGen); const TGCListsByGroup& GetListsByGroupId() const { diff --git a/ydb/core/tx/columnshard/counters/blobs_manager.cpp b/ydb/core/tx/columnshard/counters/blobs_manager.cpp index 1da1ac7ff8..edd9603d0d 100644 --- a/ydb/core/tx/columnshard/counters/blobs_manager.cpp +++ b/ydb/core/tx/columnshard/counters/blobs_manager.cpp @@ -7,58 +7,51 @@ namespace NKikimr::NColumnShard { TBlobsManagerCounters::TBlobsManagerCounters(const TString& module) : TCommonCountersOwner(module) -{ - SkipCollection = TBase::GetDeriviative("GC/Skip/Count"); - StartCollection = TBase::GetDeriviative("GC/Start/Count"); - CollectDropExplicitBytes = TBase::GetDeriviative("GC/Drop/Explicit/Bytes"); - CollectDropExplicitCount = TBase::GetDeriviative("GC/Drop/Explicit/Count"); - CollectDropImplicitBytes = TBase::GetDeriviative("GC/Drop/Implicit/Bytes"); - CollectDropImplicitCount = TBase::GetDeriviative("GC/Drop/Implicit/Count"); - CollectKeepBytes = TBase::GetDeriviative("GC/Keep/Bytes"); - CollectKeepCount = TBase::GetDeriviative("GC/Keep/Count"); - PutBlobBytes = TBase::GetDeriviative("GC/PutBlob/Bytes"); - PutBlobCount = TBase::GetDeriviative("GC/PutBlob/Count"); - CollectGen = TBase::GetValue("GC/Gen"); - CollectStep = TBase::GetValue("GC/Step"); - - DeleteBlobMarkerBytes = TBase::GetDeriviative("GC/MarkerDeleteBlob/Bytes"); - DeleteBlobMarkerCount = TBase::GetDeriviative("GC/MarkerDeleteBlob/Count"); - DeleteBlobDelayedMarkerBytes = TBase::GetDeriviative("GC/MarkerDelayedDeleteBlob/Bytes"); - DeleteBlobDelayedMarkerCount = TBase::GetDeriviative("GC/MarkerDelayedDeleteBlob/Count"); - AddSmallBlobBytes = TBase::GetDeriviative("GC/AddSmallBlob/Bytes"); - AddSmallBlobCount = TBase::GetDeriviative("GC/AddSmallBlob/Count"); - DeleteSmallBlobBytes = TBase::GetDeriviative("GC/DeleteSmallBlob/Bytes"); - DeleteSmallBlobCount = TBase::GetDeriviative("GC/DeleteSmallBlob/Count"); - - BlobsKeepCount = TBase::GetValue("GC/BlobsKeep/Count"); - BlobsKeepBytes = TBase::GetValue("GC/BlobsKeep/Bytes"); - BlobsDeleteCount = TBase::GetValue("GC/BlobsDelete/Count"); - BlobsDeleteBytes = TBase::GetValue("GC/BlobsDelete/Bytes"); + , BlobsToDeleteCount(TBase::GetValue("BlobsToDelete/Count")) + , BlobsToDeleteDelayedCount(TBase::GetValue("BlobsToDeleteDelayed/Count")) + , BlobsToKeepCount(TBase::GetValue("BlobsToKeep/Count")) + , CurrentGen(TBase::GetValue("CurrentGen")) + , CurrentStep(TBase::GetValue("CurrentStep")) + , GCCounters(*this, "GC") - BrokenKeepCount = TBase::GetDeriviative("GC/BrokenKeep/Count"); - BrokenKeepBytes = TBase::GetDeriviative("GC/BrokenKeep/Bytes"); +{ - KeepMarkerCount = TBase::GetDeriviative("GC/KeepMarker/Count"); - KeepMarkerBytes = TBase::GetDeriviative("GC/KeepMarker/Bytes"); } -void TBlobsManagerCounters::OnBlobsKeep(const std::map<::NKikimr::TGenStep, std::set<TLogoBlobID>>& blobs) const { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "OnBlobsKeep")("count", blobs.size()); -// BlobsKeepCount->Set(blobs.size()); -// ui64 size = 0; -// for (auto&& i : blobs) { -// size += i.BlobSize(); -// } -// BlobsKeepBytes->Set(size); +TBlobsManagerGCCounters::TBlobsManagerGCCounters(const TCommonCountersOwner& sameAs, const TString& componentName) + : TBase(sameAs, componentName) + , SkipCollectionEmpty(TBase::GetDeriviative("Skip/Empty/Count")) + , SkipCollectionThrottling(TBase::GetDeriviative("Skip/Throttling/Count")) +{ + KeepsCountTasks = TBase::GetHistogram("Tasks/Keeps/Count", NMonitoring::ExponentialHistogram(16, 2, 100)); + KeepsCountBlobs = TBase::GetHistogram("Tasks/Keeps/Blobs", NMonitoring::ExponentialHistogram(16, 2, 100)); + KeepsCountBytes = TBase::GetHistogram("Tasks/Keeps/Bytes", NMonitoring::ExponentialHistogram(16, 2, 1024)); + DeletesCountBlobs = TBase::GetHistogram("Tasks/Deletes/Count", NMonitoring::ExponentialHistogram(16, 2, 100)); + DeletesCountTasks = TBase::GetHistogram("Tasks/Deletes/Blobs", NMonitoring::ExponentialHistogram(16, 2, 100)); + DeletesCountBytes = TBase::GetHistogram("Tasks/Deletes/Bytes", NMonitoring::ExponentialHistogram(16, 2, 1024)); + FullGCTasks = TBase::GetDeriviative("Tasks/Full/Count"); + MoveBarriers = TBase::GetDeriviative("Tasks/Barrier/Move"); + DontMoveBarriers = TBase::GetDeriviative("Tasks/Barrier/DontMove"); + GCTasks = TBase::GetDeriviative("Tasks/All/Count"); + EmptyGCTasks = TBase::GetDeriviative("Tasks/Empty/Count"); } -void TBlobsManagerCounters::OnBlobsDelete(const NOlap::TTabletsByBlob& /*blobs*/) const { - // BlobsDeleteCount->Set(blobs.size()); - // ui64 size = 0; - // for (auto&& i : blobs) { - // size += i.BlobSize(); - // } - // BlobsDeleteBytes->Set(size); +void TBlobsManagerGCCounters::OnGCTask(const ui32 keepsCount, const ui32 keepBytes, const ui32 deleteCount, const ui32 deleteBytes, const bool isFull, const bool moveBarrier) const { + GCTasks->Add(1); + if (isFull) { + FullGCTasks->Add(1); + } + KeepsCountTasks->Collect(keepsCount); + KeepsCountBlobs->Collect((i64)keepsCount, keepsCount); + KeepsCountBytes->Collect((i64)keepsCount, keepBytes); + DeletesCountTasks->Collect(deleteCount); + DeletesCountBlobs->Collect((i64)deleteCount, deleteCount); + DeletesCountBytes->Collect((i64)deleteCount, deleteBytes); + if (moveBarrier) { + MoveBarriers->Add(1); + } else { + DontMoveBarriers->Add(1); + } } } diff --git a/ydb/core/tx/columnshard/counters/blobs_manager.h b/ydb/core/tx/columnshard/counters/blobs_manager.h index 2c555eaac9..8490ff3881 100644 --- a/ydb/core/tx/columnshard/counters/blobs_manager.h +++ b/ydb/core/tx/columnshard/counters/blobs_manager.h @@ -2,6 +2,7 @@ #include "common/owner.h" #include <ydb/core/base/logoblob.h> +#include <ydb/core/tx/columnshard/blobs_action/abstract/blob_set.h> #include <ydb/core/tx/columnshard/blobs_action/abstract/common.h> #include <library/cpp/monlib/dynamic_counters/counters.h> @@ -13,99 +14,53 @@ class TTabletsByBlob; namespace NKikimr::NColumnShard { -class TBlobsManagerCounters: public TCommonCountersOwner { +class TBlobsManagerGCCounters: public TCommonCountersOwner { private: using TBase = TCommonCountersOwner; - NMonitoring::TDynamicCounters::TCounterPtr CollectDropExplicitBytes; - NMonitoring::TDynamicCounters::TCounterPtr CollectDropExplicitCount; - NMonitoring::TDynamicCounters::TCounterPtr CollectDropImplicitBytes; - NMonitoring::TDynamicCounters::TCounterPtr CollectDropImplicitCount; - NMonitoring::TDynamicCounters::TCounterPtr CollectKeepBytes; - NMonitoring::TDynamicCounters::TCounterPtr CollectKeepCount; - NMonitoring::TDynamicCounters::TCounterPtr PutBlobBytes; - NMonitoring::TDynamicCounters::TCounterPtr PutBlobCount; - NMonitoring::TDynamicCounters::TCounterPtr CollectGen; - NMonitoring::TDynamicCounters::TCounterPtr CollectStep; - NMonitoring::TDynamicCounters::TCounterPtr DeleteBlobMarkerBytes; - NMonitoring::TDynamicCounters::TCounterPtr DeleteBlobMarkerCount; - NMonitoring::TDynamicCounters::TCounterPtr DeleteBlobDelayedMarkerBytes; - NMonitoring::TDynamicCounters::TCounterPtr DeleteBlobDelayedMarkerCount; - NMonitoring::TDynamicCounters::TCounterPtr AddSmallBlobBytes; - NMonitoring::TDynamicCounters::TCounterPtr AddSmallBlobCount; - NMonitoring::TDynamicCounters::TCounterPtr DeleteSmallBlobBytes; - NMonitoring::TDynamicCounters::TCounterPtr DeleteSmallBlobCount; - NMonitoring::TDynamicCounters::TCounterPtr BrokenKeepCount; - NMonitoring::TDynamicCounters::TCounterPtr BrokenKeepBytes; - NMonitoring::TDynamicCounters::TCounterPtr BlobsKeepCount; - NMonitoring::TDynamicCounters::TCounterPtr BlobsKeepBytes; - NMonitoring::TDynamicCounters::TCounterPtr BlobsDeleteCount; - NMonitoring::TDynamicCounters::TCounterPtr BlobsDeleteBytes; - NMonitoring::TDynamicCounters::TCounterPtr KeepMarkerCount; - NMonitoring::TDynamicCounters::TCounterPtr KeepMarkerBytes; - + NMonitoring::THistogramPtr KeepsCountBytes; + NMonitoring::THistogramPtr KeepsCountBlobs; + NMonitoring::THistogramPtr KeepsCountTasks; + NMonitoring::THistogramPtr DeletesCountBytes; + NMonitoring::THistogramPtr DeletesCountBlobs; + NMonitoring::THistogramPtr DeletesCountTasks; + NMonitoring::TDynamicCounters::TCounterPtr FullGCTasks; + NMonitoring::TDynamicCounters::TCounterPtr MoveBarriers; + NMonitoring::TDynamicCounters::TCounterPtr DontMoveBarriers; + NMonitoring::TDynamicCounters::TCounterPtr GCTasks; + NMonitoring::TDynamicCounters::TCounterPtr EmptyGCTasks; public: - NMonitoring::TDynamicCounters::TCounterPtr SkipCollection; - NMonitoring::TDynamicCounters::TCounterPtr StartCollection; - - TBlobsManagerCounters(const TString& module); - - void OnKeepMarker(const ui64 size) const { - KeepMarkerCount->Add(1); - KeepMarkerBytes->Add(size); - } - - void OnBlobsKeep(const std::map<::NKikimr::TGenStep, std::set<TLogoBlobID>>& blobs) const; - - void OnBlobsDelete(const NOlap::TTabletsByBlob& blobs) const; - - void OnAddSmallBlob(const ui32 bSize) const { - AddSmallBlobBytes->Add(bSize); - AddSmallBlobCount->Add(1); - } - - void OnDeleteBlobDelayedMarker(const ui32 bSize) const { - DeleteBlobDelayedMarkerBytes->Add(bSize); - DeleteBlobDelayedMarkerCount->Add(1); - } - - void OnDeleteBlobMarker(const ui32 bSize) const { - DeleteBlobMarkerBytes->Add(bSize); - DeleteBlobMarkerCount->Add(1); - } + const NMonitoring::TDynamicCounters::TCounterPtr SkipCollectionEmpty; + const NMonitoring::TDynamicCounters::TCounterPtr SkipCollectionThrottling; - void OnNewCollectStep(const ui32 gen, const ui32 step) const { - CollectGen->Set(gen); - CollectStep->Set(step); - } + TBlobsManagerGCCounters(const TCommonCountersOwner& sameAs, const TString& componentName); - void OnDeleteSmallBlob(const ui32 bSize) const { - DeleteSmallBlobBytes->Add(bSize); - DeleteSmallBlobCount->Add(1); - } + void OnGCTask(const ui32 keepsCount, const ui32 keepBytes, const ui32 deleteCount, const ui32 deleteBytes, + const bool isFull, const bool moveBarrier) const; - void OnPutResult(const ui32 bSize) const { - PutBlobBytes->Add(bSize); - PutBlobCount->Add(1); - } - - void OnCollectKeep(const ui32 bSize) const { - CollectKeepBytes->Add(bSize); - CollectKeepCount->Add(1); + void OnEmptyGCTask() const { + EmptyGCTasks->Add(1); } +}; - void OnBrokenKeep(const ui32 bSize) const { - BrokenKeepBytes->Add(bSize); - BrokenKeepCount->Add(1); +class TBlobsManagerCounters: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + const NMonitoring::TDynamicCounters::TCounterPtr BlobsToDeleteCount; + const NMonitoring::TDynamicCounters::TCounterPtr BlobsToDeleteDelayedCount; + const NMonitoring::TDynamicCounters::TCounterPtr BlobsToKeepCount; +public: + const NMonitoring::TDynamicCounters::TCounterPtr CurrentGen; + const NMonitoring::TDynamicCounters::TCounterPtr CurrentStep; + const TBlobsManagerGCCounters GCCounters; + TBlobsManagerCounters(const TString& module); + void OnBlobsToDelete(const NOlap::TTabletsByBlob& blobs) const { + BlobsToDeleteCount->Set(blobs.GetSize()); } - - void OnCollectDropExplicit(const ui32 bSize) const { - CollectDropExplicitBytes->Add(bSize); - CollectDropExplicitCount->Add(1); + void OnBlobsToKeep(const NOlap::TBlobsByGenStep& blobs) const { + BlobsToKeepCount->Set(blobs.GetSize()); } - - void OnCollectDropImplicit(const ui32 bSize) const { - CollectDropImplicitBytes->Add(bSize); - CollectDropImplicitCount->Add(1); + void OnBlobsToDeleteDelayed(const NOlap::TTabletsByBlob& blobs) const { + BlobsToDeleteDelayedCount->Set(blobs.GetSize()); } }; |