aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorchertus <azuikov@ydb.tech>2023-02-10 21:46:17 +0300
committerchertus <azuikov@ydb.tech>2023-02-10 21:46:17 +0300
commitdf0b1817f194e6c9b6294c06c6d79b173fa71480 (patch)
treea0769a93a46ad8b1be084d6457938d8580cda2b4
parent000ba3aa252e9317251b1f7555b9262ed6f6526f (diff)
downloadydb-df0b1817f194e6c9b6294c06c6d79b173fa71480.tar.gz
cleanup deleted S3 blobs & log export oparations
-rw-r--r--ydb/core/tx/columnshard/blob_manager.cpp11
-rw-r--r--ydb/core/tx/columnshard/blob_manager.h2
-rw-r--r--ydb/core/tx/columnshard/columnshard__export.cpp34
-rw-r--r--ydb/core/tx/columnshard/columnshard__forget.cpp6
-rw-r--r--ydb/core/tx/columnshard/columnshard__write_index.cpp6
-rw-r--r--ydb/core/tx/columnshard/columnshard_impl.cpp45
-rw-r--r--ydb/core/tx/columnshard/columnshard_impl.h7
-rw-r--r--ydb/core/tx/columnshard/columnshard_private_events.h4
-rw-r--r--ydb/core/tx/tiering/s3_actor.cpp13
9 files changed, 88 insertions, 40 deletions
diff --git a/ydb/core/tx/columnshard/blob_manager.cpp b/ydb/core/tx/columnshard/blob_manager.cpp
index 14bcdcec570..b931b62de90 100644
--- a/ydb/core/tx/columnshard/blob_manager.cpp
+++ b/ydb/core/tx/columnshard/blob_manager.cpp
@@ -639,6 +639,17 @@ TEvictedBlob TBlobManager::GetDropped(const TUnifiedBlobId& blobId, TEvictMetada
return {};
}
+void TBlobManager::GetCleanupBlobs(THashSet<TEvictedBlob>& cleanup) const {
+ TString strBlobs;
+ for (auto& [evict, _] : DroppedEvictedBlobs) {
+ strBlobs += "'" + evict.Blob.ToStringNew() + "' ";
+ cleanup.insert(evict);
+ }
+ if (!strBlobs.empty()) {
+ LOG_S_NOTICE("Cleanup evicted blobs " << strBlobs << "at tablet " << TabletInfo->TabletID);
+ }
+}
+
void TBlobManager::DeleteSmallBlob(const TUnifiedBlobId& blobId, IBlobManagerDb& db) {
LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delete Small Blob " << blobId);
db.EraseSmallBlob(blobId);
diff --git a/ydb/core/tx/columnshard/blob_manager.h b/ydb/core/tx/columnshard/blob_manager.h
index 8b6405cd80a..3eb80f5c28c 100644
--- a/ydb/core/tx/columnshard/blob_manager.h
+++ b/ydb/core/tx/columnshard/blob_manager.h
@@ -97,6 +97,7 @@ public:
virtual bool LoadOneToOneExport(IBlobManagerDb& db) = 0;
virtual TEvictedBlob GetEvicted(const TUnifiedBlobId& blob, TEvictMetadata& meta) = 0;
virtual TEvictedBlob GetDropped(const TUnifiedBlobId& blobId, TEvictMetadata& meta) = 0;
+ virtual void GetCleanupBlobs(THashSet<TEvictedBlob>& cleanup) const = 0;
virtual bool HasExternBlobs() const = 0;
};
@@ -240,6 +241,7 @@ public:
bool LoadOneToOneExport(IBlobManagerDb& db) override;
TEvictedBlob GetEvicted(const TUnifiedBlobId& blobId, TEvictMetadata& meta) override;
TEvictedBlob GetDropped(const TUnifiedBlobId& blobId, TEvictMetadata& meta) override;
+ void GetCleanupBlobs(THashSet<TEvictedBlob>& cleanup) const override;
bool HasExternBlobs() const override {
return EvictedBlobs.size() || DroppedEvictedBlobs.size();
diff --git a/ydb/core/tx/columnshard/columnshard__export.cpp b/ydb/core/tx/columnshard/columnshard__export.cpp
index cc7dce2d392..f2d7b8eb0a9 100644
--- a/ydb/core/tx/columnshard/columnshard__export.cpp
+++ b/ydb/core/tx/columnshard/columnshard__export.cpp
@@ -19,7 +19,7 @@ public:
private:
TEvPrivate::TEvExport::TPtr Ev;
- std::vector<NOlap::TEvictedBlob> BlobsToForget;
+ THashSet<NOlap::TEvictedBlob> BlobsToForget;
};
@@ -59,26 +59,20 @@ bool TTxExportFinish::Execute(TTransactionContext& txc, const TActorContext&) {
// Delayed erase of evicted blob. Blob could be already deleted.
if (present && !dropped) {
- LOG_S_DEBUG("Delete exported blob '" << blobId.ToStringNew() << "' at tablet " << Self->TabletID());
+ LOG_S_NOTICE("Blob exported '" << blobId.ToStringNew() << "' at tablet " << Self->TabletID());
Self->BlobManager->DeleteBlob(blobId, blobManagerDb);
Self->IncCounter(COUNTER_BLOBS_ERASED);
Self->IncCounter(COUNTER_BYTES_ERASED, blobId.BlobSize());
- } else if (present) {
- LOG_S_DEBUG("Stale exported blob '" << blobId.ToStringNew() << "' at tablet " << Self->TabletID());
+ } else if (present && dropped) {
+ LOG_S_NOTICE("Stale blob exported '" << blobId.ToStringNew() << "' at tablet " << Self->TabletID());
TEvictMetadata meta;
evict = Self->BlobManager->GetDropped(blobId, meta);
Y_VERIFY(evict.State == EEvictState::EXTERN);
- if (Self->DelayedForgetBlobs.count(blobId)) {
- Self->DelayedForgetBlobs.erase(blobId);
- BlobsToForget.emplace_back(std::move(evict));
- } else {
- LOG_S_ERROR("No delayed forget for stale exported blob '"
- << blobId.ToStringNew() << "' at tablet " << Self->TabletID());
- }
+ BlobsToForget.emplace(std::move(evict));
} else {
- LOG_S_ERROR("Exported but unknown blob '" << blobId.ToStringNew() << "' at tablet " << Self->TabletID());
+ LOG_S_ERROR("Unknown blob exported '" << blobId.ToStringNew() << "' at tablet " << Self->TabletID());
}
// TODO: delete not present in S3 for sure (avoid race between export and forget)
@@ -97,10 +91,8 @@ void TTxExportFinish::Complete(const TActorContext& ctx) {
Y_VERIFY(Ev);
LOG_S_DEBUG("TTxExportFinish.Complete at tablet " << Self->TabletID());
- auto& msg = *Ev->Get();
- Y_VERIFY(!msg.TierName.empty());
if (!BlobsToForget.empty()) {
- Self->ForgetBlobs(ctx, msg.TierName, std::move(BlobsToForget));
+ Self->ForgetBlobs(ctx, BlobsToForget);
}
Y_VERIFY(Self->ActiveEvictions, "Unexpected active evictions count at tablet %lu", Self->TabletID());
@@ -109,17 +101,17 @@ void TTxExportFinish::Complete(const TActorContext& ctx) {
void TColumnShard::Handle(TEvPrivate::TEvExport::TPtr& ev, const TActorContext& ctx) {
- auto status = ev->Get()->Status;
+ auto& msg = *ev->Get();
+ auto status = msg.Status;
Y_VERIFY(ActiveEvictions, "Unexpected active evictions count at tablet %lu", TabletID());
- ui64 exportNo = ev->Get()->ExportNo;
- auto& tierName = ev->Get()->TierName;
+ ui64 exportNo = msg.ExportNo;
+ auto& tierName = msg.TierName;
+ ui64 pathId = msg.PathId;
if (status == NKikimrProto::UNKNOWN) {
LOG_S_DEBUG("Export (write): id " << exportNo << " tier '" << tierName << "' at tablet " << TabletID());
- auto& tierBlobs = ev->Get()->Blobs;
- Y_VERIFY(tierBlobs.size());
- ExportBlobs(ctx, exportNo, tierName, std::move(tierBlobs));
+ ExportBlobs(ctx, exportNo, tierName, pathId, std::move(msg.Blobs));
} else if (status == NKikimrProto::OK) {
LOG_S_DEBUG("Export (apply): id " << exportNo << " tier '" << tierName << "' at tablet " << TabletID());
Execute(new TTxExportFinish(this, ev), ctx);
diff --git a/ydb/core/tx/columnshard/columnshard__forget.cpp b/ydb/core/tx/columnshard/columnshard__forget.cpp
index 89c5838d37f..02cdbfb25b7 100644
--- a/ydb/core/tx/columnshard/columnshard__forget.cpp
+++ b/ydb/core/tx/columnshard/columnshard__forget.cpp
@@ -34,12 +34,16 @@ bool TTxForget::Execute(TTransactionContext& txc, const TActorContext&) {
if (status == NKikimrProto::OK) {
TBlobManagerDb blobManagerDb(txc.DB);
+ TString strBlobs;
for (auto& evict : msg.Evicted) {
bool erased = Self->BlobManager->EraseOneToOne(evict, blobManagerDb);
- if (!erased) {
+ if (erased) {
+ strBlobs += "'" + evict.Blob.ToStringNew() + "' ";
+ } else {
LOG_S_ERROR("Forget unknown blob " << evict.Blob << " at tablet " << Self->TabletID());
}
}
+ LOG_S_NOTICE("Forget evicted blobs " << strBlobs << "at tablet " << Self->TabletID());
Self->IncCounter(COUNTER_FORGET_SUCCESS);
} else {
diff --git a/ydb/core/tx/columnshard/columnshard__write_index.cpp b/ydb/core/tx/columnshard/columnshard__write_index.cpp
index 3cc6d103223..d82680e73c3 100644
--- a/ydb/core/tx/columnshard/columnshard__write_index.cpp
+++ b/ydb/core/tx/columnshard/columnshard__write_index.cpp
@@ -34,7 +34,7 @@ private:
TEvPrivate::TEvWriteIndex::TPtr Ev;
THashMap<TString, TPathIdBlobs> ExportTierBlobs;
- THashSet<TUnifiedBlobId> BlobsToForget;
+ THashSet<NOlap::TEvictedBlob> BlobsToForget;
ui64 ExportNo = 0;
};
@@ -193,7 +193,7 @@ bool TTxWriteIndex::Execute(TTransactionContext& txc, const TActorContext& ctx)
auto evict = Self->BlobManager->GetDropped(blobId, meta);
Y_VERIFY(evict.State != EEvictState::UNKNOWN);
- BlobsToForget.insert(blobId);
+ BlobsToForget.emplace(std::move(evict));
if (NOlap::IsDeleted(evict.State)) {
LOG_S_DEBUG("Skip delete blob '" << blobId.ToStringNew() << "' at tablet " << Self->TabletID());
@@ -270,6 +270,8 @@ bool TTxWriteIndex::Execute(TTransactionContext& txc, const TActorContext& ctx)
} else if (changes->IsCleanup()) {
Self->ActiveCleanup = false;
+ Self->BlobManager->GetCleanupBlobs(BlobsToForget);
+
Self->IncCounter(ok ? COUNTER_CLEANUP_SUCCESS : COUNTER_CLEANUP_FAIL);
} else if (changes->IsTtl()) {
Self->ActiveTtl = false;
diff --git a/ydb/core/tx/columnshard/columnshard_impl.cpp b/ydb/core/tx/columnshard/columnshard_impl.cpp
index d45c08eec84..afcf4e31d62 100644
--- a/ydb/core/tx/columnshard/columnshard_impl.cpp
+++ b/ydb/core/tx/columnshard/columnshard_impl.cpp
@@ -712,6 +712,11 @@ void TColumnShard::EnqueueBackgroundActivities(bool periodic, bool insertOnly) {
if (auto event = SetupCleanup()) {
ctx.Send(SelfId(), event.release());
+ } else {
+ // Small cleanup (no index changes)
+ THashSet<NOlap::TEvictedBlob> blobsToForget;
+ BlobManager->GetCleanupBlobs(blobsToForget);
+ ForgetBlobs(ctx, blobsToForget);
}
if (auto event = SetupTtl()) {
@@ -1039,15 +1044,24 @@ void TColumnShard::MapExternBlobs(const TActorContext& /*ctx*/, NOlap::TReadMeta
}
}
-void TColumnShard::ExportBlobs(const TActorContext& ctx, ui64 exportNo, const TString& tierName,
- TEvPrivate::TEvExport::TBlobDataMap&& blobsInfo) const {
+void TColumnShard::ExportBlobs(const TActorContext& ctx, ui64 exportNo, const TString& tierName, ui64 pathId,
+ TEvPrivate::TEvExport::TBlobDataMap&& blobsInfo) const {
+ Y_VERIFY(blobsInfo.size());
+
+ TString strBlobs;
+ for (auto& [blobId, _] : blobsInfo) {
+ strBlobs += "'" + blobId.ToStringNew() + "' ";
+ }
+ LOG_S_NOTICE("Export blobs " << strBlobs << "at tablet " << TabletID());
+
if (auto s3 = GetS3ActorForTier(tierName)) {
- auto event = std::make_unique<TEvPrivate::TEvExport>(exportNo, tierName, s3, std::move(blobsInfo));
+ auto event = std::make_unique<TEvPrivate::TEvExport>(exportNo, tierName, pathId, s3, std::move(blobsInfo));
ctx.Register(CreateExportActor(TabletID(), ctx.SelfID, event.release()));
}
}
-void TColumnShard::ForgetBlobs(const TActorContext& ctx, const TString& tierName, std::vector<NOlap::TEvictedBlob>&& blobs) const {
+// It should be called from ForgetBlobs() only to log all S3 activity
+void TColumnShard::ForgetTierBlobs(const TActorContext& ctx, const TString& tierName, std::vector<NOlap::TEvictedBlob>&& blobs) const {
if (auto s3 = GetS3ActorForTier(tierName)) {
auto forget = std::make_unique<TEvPrivate::TEvForget>();
forget->Evicted = std::move(blobs);
@@ -1055,26 +1069,37 @@ void TColumnShard::ForgetBlobs(const TActorContext& ctx, const TString& tierName
}
}
-void TColumnShard::ForgetBlobs(const TActorContext& ctx, const THashSet<TUnifiedBlobId>& blobs) {
+void TColumnShard::ForgetBlobs(const TActorContext& ctx, const THashSet<NOlap::TEvictedBlob>& evictedBlobs) {
THashMap<TString, std::vector<NOlap::TEvictedBlob>> tierBlobs;
- for (const auto& blobId : blobs) {
+ TString strBlobs;
+ TString strBlobsDelayed;
+
+ for (const auto& ev : evictedBlobs) {
+ auto& blobId = ev.Blob;
TEvictMetadata meta;
auto evict = BlobManager->GetDropped(blobId, meta);
+ Y_VERIFY(evict.Blob == blobId);
if (evict.State == EEvictState::UNKNOWN) {
LOG_S_ERROR("Forget unknown blob '" << blobId.ToStringNew() << "' at tablet " << TabletID());
} else if (NOlap::IsExported(evict.State)) {
- LOG_S_DEBUG("Forget blob '" << blobId.ToStringNew() << "' at tablet " << TabletID());
+ strBlobs += "'" + blobId.ToStringNew() + "' ";
tierBlobs[meta.GetTierName()].emplace_back(std::move(evict));
} else {
- LOG_S_DEBUG("Forget blob (deleyed) '" << blobId.ToStringNew() << "' at tablet " << TabletID());
- DelayedForgetBlobs.insert(blobId);
+ strBlobsDelayed += "'" + blobId.ToStringNew() + "' ";
}
}
+ if (strBlobs.size()) {
+ LOG_S_NOTICE("Forget blobs " << strBlobs << "at tablet " << TabletID());
+ }
+ if (strBlobsDelayed.size()) {
+ LOG_S_NOTICE("Forget blobs (deleyed) " << strBlobsDelayed << "at tablet " << TabletID());
+ }
+
for (auto& [tierName, blobs] : tierBlobs) {
- ForgetBlobs(ctx, tierName, std::move(blobs));
+ ForgetTierBlobs(ctx, tierName, std::move(blobs));
}
}
diff --git a/ydb/core/tx/columnshard/columnshard_impl.h b/ydb/core/tx/columnshard/columnshard_impl.h
index 930579664c6..b88e5fd0711 100644
--- a/ydb/core/tx/columnshard/columnshard_impl.h
+++ b/ydb/core/tx/columnshard/columnshard_impl.h
@@ -365,7 +365,6 @@ private:
std::unique_ptr<NOlap::TInsertTable> InsertTable;
std::unique_ptr<NOlap::IColumnEngine> PrimaryIndex;
TBatchCache BatchCache;
- THashSet<NOlap::TUnifiedBlobId> DelayedForgetBlobs;
TTtl Ttl;
THashMap<ui64, TBasicTxInfo> BasicTxInfo;
@@ -452,10 +451,10 @@ private:
NOlap::TIndexInfo ConvertSchema(const NKikimrSchemeOp::TColumnTableSchema& schema);
void MapExternBlobs(const TActorContext& ctx, NOlap::TReadMetadata& metadata);
TActorId GetS3ActorForTier(const TString& tierId) const;
- void ExportBlobs(const TActorContext& ctx, ui64 exportNo, const TString& tierName,
+ void ExportBlobs(const TActorContext& ctx, ui64 exportNo, const TString& tierName, ui64 pathId,
TEvPrivate::TEvExport::TBlobDataMap&& blobsInfo) const;
- void ForgetBlobs(const TActorContext& ctx, const TString& tierName, std::vector<NOlap::TEvictedBlob>&& blobs) const;
- void ForgetBlobs(const TActorContext& ctx, const THashSet<TUnifiedBlobId>& blobs);
+ void ForgetTierBlobs(const TActorContext& ctx, const TString& tierName, std::vector<NOlap::TEvictedBlob>&& blobs) const;
+ void ForgetBlobs(const TActorContext& ctx, const THashSet<NOlap::TEvictedBlob>& blobs);
bool GetExportedBlob(const TActorContext& ctx, TActorId dst, ui64 cookie, const TString& tierName,
NOlap::TEvictedBlob&& evicted, std::vector<NOlap::TBlobRange>&& ranges);
diff --git a/ydb/core/tx/columnshard/columnshard_private_events.h b/ydb/core/tx/columnshard/columnshard_private_events.h
index b91ab112fe7..b00f8e04410 100644
--- a/ydb/core/tx/columnshard/columnshard_private_events.h
+++ b/ydb/core/tx/columnshard/columnshard_private_events.h
@@ -147,14 +147,16 @@ struct TEvPrivate {
Y_VERIFY(!Blobs.empty());
}
- TEvExport(ui64 exportNo, const TString& tierName, TActorId dstActor, TBlobDataMap&& blobs)
+ TEvExport(ui64 exportNo, const TString& tierName, ui64 pathId, TActorId dstActor, TBlobDataMap&& blobs)
: ExportNo(exportNo)
, TierName(tierName)
+ , PathId(pathId)
, DstActor(dstActor)
, Blobs(std::move(blobs))
{
Y_VERIFY(ExportNo);
Y_VERIFY(!TierName.empty());
+ Y_VERIFY(PathId);
Y_VERIFY(DstActor);
Y_VERIFY(!Blobs.empty());
}
diff --git a/ydb/core/tx/tiering/s3_actor.cpp b/ydb/core/tx/tiering/s3_actor.cpp
index b0893c72fa8..5caaa56f454 100644
--- a/ydb/core/tx/tiering/s3_actor.cpp
+++ b/ydb/core/tx/tiering/s3_actor.cpp
@@ -134,6 +134,17 @@ public:
}
void Handle(TEvPrivate::TEvForget::TPtr& ev) {
+ // It's possible to get several forgets for the same blob (remove + cleanup)
+ for (auto& evict : ev->Get()->Evicted) {
+ if (evict.ExternBlob.IsS3Blob()) {
+ const TString& key = evict.ExternBlob.GetS3Key();
+ if (ForgettingKeys.count(key)) {
+ LOG_S_NOTICE("[S3] Ignore forget '" << evict.Blob.ToStringNew() << "' at tablet " << TabletId);
+ return; // TODO: return an error?
+ }
+ }
+ }
+
ui64 forgetNo = ++ForgetNo;
Forgets[forgetNo] = TS3Forget(ev->Release());
@@ -146,7 +157,7 @@ public:
}
const TString& key = evict.ExternBlob.GetS3Key();
- Y_VERIFY(!ForgettingKeys.count(key)); // TODO
+ Y_VERIFY(!ForgettingKeys.count(key));
forget.KeysToDelete.emplace(key);
ForgettingKeys[key] = forgetNo;