summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoralexvru <[email protected]>2023-06-13 14:53:43 +0300
committeralexvru <[email protected]>2023-06-13 14:53:43 +0300
commit386bf4e770cafacd9d3feab250fbb6fe5a54c82f (patch)
treebcff073d6ed6f1dab5afbdd5c46b6cca4e5248cc
parentaffae615a0d8ac632d677d05957254aa3868ec35 (diff)
Fix scrub garbage keeping
-rw-r--r--ydb/core/blobstorage/ut_blobstorage/CMakeLists.darwin-x86_64.txt1
-rw-r--r--ydb/core/blobstorage/ut_blobstorage/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/core/blobstorage/ut_blobstorage/CMakeLists.linux-x86_64.txt1
-rw-r--r--ydb/core/blobstorage/ut_blobstorage/CMakeLists.windows-x86_64.txt1
-rw-r--r--ydb/core/blobstorage/ut_blobstorage/scrub_fast.cpp79
-rw-r--r--ydb/core/blobstorage/ut_blobstorage/ya.make1
-rw-r--r--ydb/core/blobstorage/vdisk/scrub/blob_recovery_process.cpp14
-rw-r--r--ydb/core/blobstorage/vdisk/scrub/blob_recovery_queue.cpp4
-rw-r--r--ydb/core/blobstorage/vdisk/scrub/blob_recovery_request.cpp5
-rw-r--r--ydb/core/blobstorage/vdisk/scrub/restore_corrupted_blob_actor.cpp2
-rw-r--r--ydb/core/blobstorage/vdisk/scrub/scrub_actor.cpp8
-rw-r--r--ydb/core/blobstorage/vdisk/scrub/scrub_actor_impl.h6
-rw-r--r--ydb/core/blobstorage/vdisk/scrub/scrub_actor_snapshot.cpp2
-rw-r--r--ydb/core/blobstorage/vdisk/scrub/scrub_actor_unreadable.cpp33
14 files changed, 133 insertions, 25 deletions
diff --git a/ydb/core/blobstorage/ut_blobstorage/CMakeLists.darwin-x86_64.txt b/ydb/core/blobstorage/ut_blobstorage/CMakeLists.darwin-x86_64.txt
index b9e04e45c05..844cc787c3c 100644
--- a/ydb/core/blobstorage/ut_blobstorage/CMakeLists.darwin-x86_64.txt
+++ b/ydb/core/blobstorage/ut_blobstorage/CMakeLists.darwin-x86_64.txt
@@ -49,6 +49,7 @@ target_sources(ydb-core-blobstorage-ut_blobstorage PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/main.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/mirror3of4.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/sanitize_groups.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/scrub_fast.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/snapshots.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/space_check.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/sync.cpp
diff --git a/ydb/core/blobstorage/ut_blobstorage/CMakeLists.linux-aarch64.txt b/ydb/core/blobstorage/ut_blobstorage/CMakeLists.linux-aarch64.txt
index 288764a4856..7134c089845 100644
--- a/ydb/core/blobstorage/ut_blobstorage/CMakeLists.linux-aarch64.txt
+++ b/ydb/core/blobstorage/ut_blobstorage/CMakeLists.linux-aarch64.txt
@@ -52,6 +52,7 @@ target_sources(ydb-core-blobstorage-ut_blobstorage PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/main.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/mirror3of4.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/sanitize_groups.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/scrub_fast.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/snapshots.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/space_check.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/sync.cpp
diff --git a/ydb/core/blobstorage/ut_blobstorage/CMakeLists.linux-x86_64.txt b/ydb/core/blobstorage/ut_blobstorage/CMakeLists.linux-x86_64.txt
index 6d77d0beca0..f3c7bbe3d6b 100644
--- a/ydb/core/blobstorage/ut_blobstorage/CMakeLists.linux-x86_64.txt
+++ b/ydb/core/blobstorage/ut_blobstorage/CMakeLists.linux-x86_64.txt
@@ -53,6 +53,7 @@ target_sources(ydb-core-blobstorage-ut_blobstorage PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/main.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/mirror3of4.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/sanitize_groups.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/scrub_fast.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/snapshots.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/space_check.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/sync.cpp
diff --git a/ydb/core/blobstorage/ut_blobstorage/CMakeLists.windows-x86_64.txt b/ydb/core/blobstorage/ut_blobstorage/CMakeLists.windows-x86_64.txt
index 81435da3081..2537ba3d23e 100644
--- a/ydb/core/blobstorage/ut_blobstorage/CMakeLists.windows-x86_64.txt
+++ b/ydb/core/blobstorage/ut_blobstorage/CMakeLists.windows-x86_64.txt
@@ -42,6 +42,7 @@ target_sources(ydb-core-blobstorage-ut_blobstorage PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/main.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/mirror3of4.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/sanitize_groups.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/scrub_fast.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/snapshots.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/space_check.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/sync.cpp
diff --git a/ydb/core/blobstorage/ut_blobstorage/scrub_fast.cpp b/ydb/core/blobstorage/ut_blobstorage/scrub_fast.cpp
new file mode 100644
index 00000000000..b0e49b75fdc
--- /dev/null
+++ b/ydb/core/blobstorage/ut_blobstorage/scrub_fast.cpp
@@ -0,0 +1,79 @@
+#include <ydb/core/blobstorage/ut_blobstorage/lib/env.h>
+#include <ydb/core/blobstorage/vdisk/scrub/scrub_actor.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+void Test() {
+ SetRandomSeed(1);
+ TEnvironmentSetup env{{
+ .Erasure = TBlobStorageGroupType::Erasure4Plus2Block
+ }};
+ auto& runtime = env.Runtime;
+ env.CreateBoxAndPool();
+ env.SetScrubPeriodicity(TDuration::Seconds(60));
+ env.Sim(TDuration::Minutes(1));
+ auto groups = env.GetGroups();
+ auto info = env.GetGroupInfo(groups[0]);
+
+ TString data = TString::Uninitialized(8_MB);
+ memset(data.Detach(), 'X', data.size());
+ TLogoBlobID id(1, 1, 1, 0, data.size(), 0);
+
+ { // write data to group
+ TActorId sender = runtime->AllocateEdgeActor(1);
+ runtime->WrapInActorContext(sender, [&] {
+ SendToBSProxy(sender, info->GroupID, new TEvBlobStorage::TEvPut(id, data, TInstant::Max()));
+ });
+ auto res = env.WaitForEdgeActorEvent<TEvBlobStorage::TEvPutResult>(sender);
+ UNIT_ASSERT_VALUES_EQUAL(res->Get()->Status, NKikimrProto::OK);
+ }
+
+ auto checkReadable = [&](NKikimrProto::EReplyStatus status) {
+ TActorId sender = runtime->AllocateEdgeActor(1);
+ runtime->WrapInActorContext(sender, [&] {
+ SendToBSProxy(sender, info->GroupID, new TEvBlobStorage::TEvGet(id, 0, 0, TInstant::Max(),
+ NKikimrBlobStorage::EGetHandleClass::FastRead));
+ });
+ auto res = env.WaitForEdgeActorEvent<TEvBlobStorage::TEvGetResult>(sender);
+ UNIT_ASSERT_VALUES_EQUAL(res->Get()->Status, NKikimrProto::OK);
+ UNIT_ASSERT_VALUES_EQUAL(res->Get()->ResponseSz, 1);
+ auto& r = res->Get()->Responses[0];
+ UNIT_ASSERT_VALUES_EQUAL(r.Status, status);
+ if (status == NKikimrProto::OK) {
+ UNIT_ASSERT_VALUES_EQUAL(r.Buffer, data);
+ }
+ };
+
+ checkReadable(NKikimrProto::OK);
+
+ for (ui32 i = 0; i < info->GetTotalVDisksNum(); ++i) {
+ const TActorId vdiskActorId = info->GetActorId(i);
+
+ ui32 nodeId, pdiskId;
+ std::tie(nodeId, pdiskId, std::ignore) = DecomposeVDiskServiceId(vdiskActorId);
+ auto it = env.PDiskMockStates.find(std::make_pair(nodeId, pdiskId));
+ Y_VERIFY(it != env.PDiskMockStates.end());
+
+ const TActorId sender = runtime->AllocateEdgeActor(vdiskActorId.NodeId());
+ env.Runtime->Send(new IEventHandle(vdiskActorId, sender, new TEvBlobStorage::TEvCaptureVDiskLayout), sender.NodeId());
+ auto res = env.WaitForEdgeActorEvent<TEvBlobStorage::TEvCaptureVDiskLayoutResult>(sender);
+
+ for (auto& item : res->Get()->Layout) {
+ using T = TEvBlobStorage::TEvCaptureVDiskLayoutResult;
+ if (item.Database == T::EDatabase::LogoBlobs && item.RecordType == T::ERecordType::HugeBlob) {
+ const TDiskPart& part = item.Location;
+ it->second->SetCorruptedArea(part.ChunkIdx, part.Offset, part.Offset + part.Size, true);
+ break;
+ }
+ }
+
+ checkReadable(NKikimrProto::OK);
+ }
+
+ env.Sim(TDuration::Seconds(60));
+}
+
+Y_UNIT_TEST_SUITE(ScrubFast) {
+ Y_UNIT_TEST(SingleBlob) {
+ Test();
+ }
+}
diff --git a/ydb/core/blobstorage/ut_blobstorage/ya.make b/ydb/core/blobstorage/ut_blobstorage/ya.make
index 0ae92e59249..9fd17327c12 100644
--- a/ydb/core/blobstorage/ut_blobstorage/ya.make
+++ b/ydb/core/blobstorage/ut_blobstorage/ya.make
@@ -25,6 +25,7 @@ SRCS(
main.cpp
mirror3of4.cpp
sanitize_groups.cpp
+ scrub_fast.cpp
snapshots.cpp
space_check.cpp
sync.cpp
diff --git a/ydb/core/blobstorage/vdisk/scrub/blob_recovery_process.cpp b/ydb/core/blobstorage/vdisk/scrub/blob_recovery_process.cpp
index d78757e78ac..43a2f3f8633 100644
--- a/ydb/core/blobstorage/vdisk/scrub/blob_recovery_process.cpp
+++ b/ydb/core/blobstorage/vdisk/scrub/blob_recovery_process.cpp
@@ -18,16 +18,16 @@ namespace NKikimr {
}
switch (TIngress::IngressMode(gtype)) {
case TIngress::EMode::GENERIC:
- if (i < gtype.TotalPartCount() && needed.Get(i)) { // only main replica
- const TLogoBlobID partId(id, i + 1);
- AddExtremeQuery(vdiskId, partId, deadline, gtype.PartSize(partId));
+ ui32 maxSize;
+ maxSize = 0;
+ if (gtype.GetErasure() == TBlobStorageGroupType::ErasureMirror3dc) {
+ maxSize += gtype.PartSize(TLogoBlobID(id, i % 3 + 1));
} else {
- ui32 maxSize = 0;
- for (ui32 i = 0; i < gtype.TotalPartCount(); ++i) {
- maxSize += gtype.PartSize(TLogoBlobID(id, i + 1));
+ for (ui32 k = 0; k < gtype.TotalPartCount(); ++k) {
+ maxSize += i >= gtype.TotalPartCount() || k == i ? gtype.PartSize(TLogoBlobID(id, k + 1)) : 0;
}
- AddExtremeQuery(vdiskId, id, deadline, maxSize);
}
+ AddExtremeQuery(vdiskId, id, deadline, maxSize);
break;
case TIngress::EMode::MIRROR3OF4:
diff --git a/ydb/core/blobstorage/vdisk/scrub/blob_recovery_queue.cpp b/ydb/core/blobstorage/vdisk/scrub/blob_recovery_queue.cpp
index 6e4c2b66758..d07f17c0641 100644
--- a/ydb/core/blobstorage/vdisk/scrub/blob_recovery_queue.cpp
+++ b/ydb/core/blobstorage/vdisk/scrub/blob_recovery_queue.cpp
@@ -38,9 +38,7 @@ namespace NKikimr {
it->second.IsConnected = ev->Get()->IsConnected;
STLOG(PRI_INFO, BS_VDISK_SCRUB, VDS29, VDISKP(LogPrefix, "BS_QUEUE state update"), (SelfId, SelfId()),
(VDiskId, it->first), (IsConnected, it->second.IsConnected));
- if (it->second.IsConnected) {
- EvaluateConnectionQuorum();
- }
+ EvaluateConnectionQuorum();
}
void TBlobRecoveryActor::EvaluateConnectionQuorum() {
diff --git a/ydb/core/blobstorage/vdisk/scrub/blob_recovery_request.cpp b/ydb/core/blobstorage/vdisk/scrub/blob_recovery_request.cpp
index ec2e24ff440..fcfbd6ed9f3 100644
--- a/ydb/core/blobstorage/vdisk/scrub/blob_recovery_request.cpp
+++ b/ydb/core/blobstorage/vdisk/scrub/blob_recovery_request.cpp
@@ -29,8 +29,7 @@ namespace NKikimr {
// create timer to process deadlines if not yet created
if (!WakeupScheduled) {
- const TInstant now = TActivationContext::Now();
- Schedule(msg->Deadline - now, new TEvents::TEvWakeup);
+ Schedule(msg->Deadline, new TEvents::TEvWakeup);
WakeupScheduled = true;
}
}
@@ -57,7 +56,7 @@ namespace NKikimr {
// reschedule timer
if (deadline != TInstant::Max()) {
- Schedule(deadline - now, new TEvents::TEvWakeup);
+ Schedule(deadline, new TEvents::TEvWakeup);
} else {
WakeupScheduled = false;
}
diff --git a/ydb/core/blobstorage/vdisk/scrub/restore_corrupted_blob_actor.cpp b/ydb/core/blobstorage/vdisk/scrub/restore_corrupted_blob_actor.cpp
index c6e4a658339..af7e9b8282c 100644
--- a/ydb/core/blobstorage/vdisk/scrub/restore_corrupted_blob_actor.cpp
+++ b/ydb/core/blobstorage/vdisk/scrub/restore_corrupted_blob_actor.cpp
@@ -235,7 +235,7 @@ namespace NKikimr {
}
for (const auto& item : Items) {
if (item.Status == NKikimrProto::UNKNOWN) {
- return Schedule(TDuration::Seconds(1), new TEvIssueQuery);
+ return Schedule(TDuration::Seconds(5), new TEvIssueQuery);
}
}
IssueQuery();
diff --git a/ydb/core/blobstorage/vdisk/scrub/scrub_actor.cpp b/ydb/core/blobstorage/vdisk/scrub/scrub_actor.cpp
index d00f8d139a5..3d1e6988d8f 100644
--- a/ydb/core/blobstorage/vdisk/scrub/scrub_actor.cpp
+++ b/ydb/core/blobstorage/vdisk/scrub/scrub_actor.cpp
@@ -35,7 +35,6 @@ namespace NKikimr {
fFunc(TEvBlobStorage::EvRecoverBlob, ForwardToBlobRecoveryActor);
hFunc(TEvRestoreCorruptedBlobResult, Handle);
hFunc(TEvNonrestoredCorruptedBlobNotify, Handle);
- cFunc(EvGenerateRestoreCorruptedBlobQuery, HandleGenerateRestoreCorruptedBlobQuery);
hFunc(NPDisk::TEvLogResult, Handle);
hFunc(NPDisk::TEvCutLog, Handle);
@@ -76,9 +75,9 @@ namespace NKikimr {
const TInstant end = start + TDuration::Seconds(30);
do {
Quantum();
- Send(ScrubCtx->SkeletonId, new TEvReportScrubStatus(UnreadableBlobs.size()));
+ Send(ScrubCtx->SkeletonId, new TEvReportScrubStatus(!UnreadableBlobs.empty()));
} while (State && TActorCoroImpl::Now() < end);
- Send(ScrubCtx->SkeletonId, new TEvReportScrubStatus(UnreadableBlobs.size()));
+ Send(ScrubCtx->SkeletonId, new TEvReportScrubStatus(!UnreadableBlobs.empty()));
CommitStateUpdate();
}
} catch (const TExDie&) {
@@ -154,6 +153,9 @@ namespace NKikimr {
}
}
+ // validate currently held blobs
+ FilterUnreadableBlobs(*Snap, *GetBarriersEssence());
+
ReleaseSnapshot();
if (const ui64 cookie = GenerateRestoreCorruptedBlobQuery()) {
diff --git a/ydb/core/blobstorage/vdisk/scrub/scrub_actor_impl.h b/ydb/core/blobstorage/vdisk/scrub/scrub_actor_impl.h
index 60b2bb8adaa..0563dfb8186 100644
--- a/ydb/core/blobstorage/vdisk/scrub/scrub_actor_impl.h
+++ b/ydb/core/blobstorage/vdisk/scrub/scrub_actor_impl.h
@@ -52,10 +52,6 @@ namespace NKikimr {
bool Success = false;
- enum {
- EvGenerateRestoreCorruptedBlobQuery = EventSpaceBegin(TEvents::ES_PRIVATE),
- };
-
private:
struct TUnreadableBlobState {
NMatrix::TVectorType UnreadableParts; // parts we're going to recover from peer disks
@@ -79,6 +75,8 @@ namespace NKikimr {
void AddUnreadableParts(const TLogoBlobID& fullId, NMatrix::TVectorType corrupted, TDiskPart corruptedPart);
void UpdateUnreadableParts(const TLogoBlobID& fullId, NMatrix::TVectorType corrupted, TDiskPart corruptedPart);
void UpdateReadableParts(const TLogoBlobID& fullId, NMatrix::TVectorType readable);
+ void Handle(TEvTakeHullSnapshotResult::TPtr ev);
+ void FilterUnreadableBlobs(THullDsSnap& snap, TBarriersSnapshot::TBarriersEssence& barriers);
ui64 GenerateRestoreCorruptedBlobQuery();
void Handle(TAutoPtr<TEventHandle<TEvRestoreCorruptedBlobResult>> ev);
diff --git a/ydb/core/blobstorage/vdisk/scrub/scrub_actor_snapshot.cpp b/ydb/core/blobstorage/vdisk/scrub/scrub_actor_snapshot.cpp
index daa5181cc60..dd26dc98808 100644
--- a/ydb/core/blobstorage/vdisk/scrub/scrub_actor_snapshot.cpp
+++ b/ydb/core/blobstorage/vdisk/scrub/scrub_actor_snapshot.cpp
@@ -7,7 +7,7 @@ namespace NKikimr {
CurrentState = TStringBuilder() << "waiting for Hull snapshot";
auto res = WaitForSpecificEvent<TEvTakeHullSnapshotResult>(&TScrubCoroImpl::ProcessUnexpectedEvent);
Snap.emplace(std::move(res->Get()->Snap));
- Snap->BarriersSnap.Destroy(); // barriers are not needed for operation
+ Snap->BlocksSnap.Destroy(); // blocks are not needed for operation
}
void TScrubCoroImpl::ReleaseSnapshot() {
diff --git a/ydb/core/blobstorage/vdisk/scrub/scrub_actor_unreadable.cpp b/ydb/core/blobstorage/vdisk/scrub/scrub_actor_unreadable.cpp
index 7d717019269..200796b53c0 100644
--- a/ydb/core/blobstorage/vdisk/scrub/scrub_actor_unreadable.cpp
+++ b/ydb/core/blobstorage/vdisk/scrub/scrub_actor_unreadable.cpp
@@ -135,8 +135,8 @@ namespace NKikimr {
}
}
if (when != TInstant::Max()) {
- GetActorSystem()->Schedule(when, new IEventHandle(EvGenerateRestoreCorruptedBlobQuery, 0, SelfActorId,
- {}, nullptr, 0));
+ GetActorSystem()->Schedule(when, new IEventHandle(ScrubCtx->SkeletonId, SelfActorId,
+ new TEvTakeHullSnapshot(false)));
GenerateRestoreCorruptedBlobQueryScheduled = true;
}
}
@@ -151,10 +151,37 @@ namespace NKikimr {
GenerateRestoreCorruptedBlobQuery();
}
- void TScrubCoroImpl::HandleGenerateRestoreCorruptedBlobQuery() {
+ void TScrubCoroImpl::Handle(TEvTakeHullSnapshotResult::TPtr ev) {
Y_VERIFY(GenerateRestoreCorruptedBlobQueryScheduled);
GenerateRestoreCorruptedBlobQueryScheduled = false;
+
+ auto& snap = ev->Get()->Snap;
+ auto barriers = snap.BarriersSnap.CreateEssence(snap.HullCtx);
+ FilterUnreadableBlobs(snap, *barriers);
+
GenerateRestoreCorruptedBlobQuery();
}
+ void TScrubCoroImpl::FilterUnreadableBlobs(THullDsSnap& snap, TBarriersSnapshot::TBarriersEssence& barriers) {
+ TLevelIndexSnapshot::TForwardIterator iter(snap.HullCtx, &snap.LogoBlobsSnap);
+ TIndexRecordMerger merger(Info->Type);
+
+ for (auto it = UnreadableBlobs.begin(); it != UnreadableBlobs.end(); ) {
+ const TLogoBlobID id = it->first;
+ ++it;
+
+ bool keepData = false;
+ if (iter.Seek(id); iter.Valid() && iter.GetCurKey().LogoBlobID() == id) {
+ iter.PutToMerger(&merger);
+ merger.Finish();
+ keepData = barriers.Keep(id, merger.GetMemRec(), merger.GetMemRecsMerged(), snap.HullCtx->AllowKeepFlags).KeepData;
+ merger.Clear();
+ }
+
+ if (!keepData) {
+ DropGarbageBlob(id);
+ }
+ }
+ }
+
} // NKikimr