aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSergey Belyakov <serg-belyakov@ydb.tech>2025-07-30 11:31:19 +0300
committerGitHub <noreply@github.com>2025-07-30 11:31:19 +0300
commit48b4a0295db7d06dd98d284d3fdc1fce50938512 (patch)
treeee35607d17e42617b3ee3cbcc696d8a1ba439bdf
parent53e0418cb3697140dceb9c0d0b1d7aa570988785 (diff)
downloadydb-48b4a0295db7d06dd98d284d3fdc1fce50938512.tar.gz
More detailed metrics for DeepScrubbing and print reports in a single line (#21888)
-rw-r--r--ydb/core/blobstorage/ut_blobstorage/lib/env.h25
-rw-r--r--ydb/core/blobstorage/ut_blobstorage/scrub.cpp34
-rw-r--r--ydb/core/blobstorage/vdisk/common/vdisk_mongroups.h61
-rw-r--r--ydb/core/blobstorage/vdisk/scrub/scrub_actor.cpp65
-rw-r--r--ydb/core/blobstorage/vdisk/scrub/scrub_actor_impl.h2
5 files changed, 140 insertions, 47 deletions
diff --git a/ydb/core/blobstorage/ut_blobstorage/lib/env.h b/ydb/core/blobstorage/ut_blobstorage/lib/env.h
index 04bf43c7890..6847c643e82 100644
--- a/ydb/core/blobstorage/ut_blobstorage/lib/env.h
+++ b/ydb/core/blobstorage/ut_blobstorage/lib/env.h
@@ -1051,6 +1051,31 @@ struct TEnvironmentSetup {
return ctr;
}
+ template <class TCallback>
+ ui64 AggregateVDiskCountersWithCallback(TString storagePool, ui32 nodesCount, ui32 groupSize, ui32 groupId,
+ const std::vector<ui32>& pdiskLayout, TCallback callback) {
+ ui64 ctr = 0;
+
+ for (ui32 nodeId = 1; nodeId <= nodesCount; ++nodeId) {
+ auto* appData = Runtime->GetNode(nodeId)->AppData.get();
+ for (ui32 i = 0; i < groupSize; ++i) {
+ TStringStream ss;
+ ss << LeftPad(i, 2, '0');
+ TString orderNumber = ss.Str();
+ ss.Clear();
+ ss << LeftPad(pdiskLayout[i], 9, '0');
+ TString pdisk = ss.Str();
+ ctr += callback(GetServiceCounters(appData->Counters, "vdisks")->
+ GetSubgroup("storagePool", storagePool)->
+ GetSubgroup("group", std::to_string(groupId))->
+ GetSubgroup("orderNumber", orderNumber)->
+ GetSubgroup("pdisk", pdisk)->
+ GetSubgroup("media", "rot"));
+ }
+ }
+ return ctr;
+ }
+
ui64 AggregateVDiskCounters(TString storagePool, ui32 nodesCount, ui32 groupSize, ui32 groupId,
const std::vector<ui32>& pdiskLayout, TString subsystem, TString counter, bool derivative = false) {
return AggregateVDiskCountersBase(storagePool, nodesCount, groupSize, groupId, pdiskLayout,
diff --git a/ydb/core/blobstorage/ut_blobstorage/scrub.cpp b/ydb/core/blobstorage/ut_blobstorage/scrub.cpp
index dff6342a047..ea08f6a45a4 100644
--- a/ydb/core/blobstorage/ut_blobstorage/scrub.cpp
+++ b/ydb/core/blobstorage/ut_blobstorage/scrub.cpp
@@ -433,6 +433,25 @@ Y_UNIT_TEST_SUITE(DeepScrubbing) {
, PartCorruptionMask(partCorruptionMask)
{}
+ struct TAggregateScrubMetrics {
+ TAggregateScrubMetrics(TString counterName, bool isHuge, TErasureType::EErasureSpecies erasure)
+ : CounterName(counterName)
+ , IsHuge(isHuge)
+ , Erasure(erasure)
+ {}
+
+ ui64 operator()(TIntrusivePtr<NMonitoring::TDynamicCounters> counters) const {
+ return counters->GetSubgroup("subsystem", "deepScrubbing")
+ ->GetSubgroup("blobSize", IsHuge ? "huge" : "small")
+ ->GetSubgroup("erasure", TErasureType::ErasureSpeciesName(Erasure))
+ ->GetCounter(CounterName, true)->Val();
+ }
+
+ TString CounterName;
+ bool IsHuge;
+ TErasureType::EErasureSpecies Erasure;
+ };
+
void RunTest() {
Initialize();
AllocateEdgeActor(true);
@@ -506,19 +525,16 @@ Y_UNIT_TEST_SUITE(DeepScrubbing) {
}
}
+ bool isHuge = (BlobSize == EBlobSize::Val_HugeBlob);
+
std::vector<ui32> pdiskLayout = MakePDiskLayout(BaseConfig, groupInfo->GetTopology(), GroupId);
ui64 blobsScrubbed =
- Env->AggregateVDiskCounters(Env->StoragePoolName, NodeCount, Erasure.BlobSubgroupSize(),
- GroupId, pdiskLayout, "deepScrubbing", "SmallBlobsChecked", false) +
- Env->AggregateVDiskCounters(Env->StoragePoolName, NodeCount, Erasure.BlobSubgroupSize(),
- GroupId, pdiskLayout, "deepScrubbing", "HugeBlobsChecked", false);
-
+ Env->AggregateVDiskCountersWithCallback(Env->StoragePoolName, NodeCount, Erasure.BlobSubgroupSize(),
+ GroupId, pdiskLayout, TAggregateScrubMetrics("BlobsChecked", isHuge, Erasure.GetErasure()));
ui64 dataIssues =
- Env->AggregateVDiskCounters(Env->StoragePoolName, NodeCount, Erasure.BlobSubgroupSize(),
- GroupId, pdiskLayout, "deepScrubbing", "DataIssuesSmallBlobs", false) +
- Env->AggregateVDiskCounters(Env->StoragePoolName, NodeCount, Erasure.BlobSubgroupSize(),
- GroupId, pdiskLayout, "deepScrubbing", "DataIssuesHugeBlobs", false);
+ Env->AggregateVDiskCountersWithCallback(Env->StoragePoolName, NodeCount, Erasure.BlobSubgroupSize(),
+ GroupId, pdiskLayout, TAggregateScrubMetrics("DataIssues", isHuge, Erasure.GetErasure()));
UNIT_ASSERT_VALUES_UNEQUAL_C(blobsScrubbed, 0, makePrefix());
UNIT_ASSERT_VALUES_UNEQUAL_C(dataIssues, 0, makePrefix()
diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_mongroups.h b/ydb/core/blobstorage/vdisk/common/vdisk_mongroups.h
index 01f80990785..72b2495789c 100644
--- a/ydb/core/blobstorage/vdisk/common/vdisk_mongroups.h
+++ b/ydb/core/blobstorage/vdisk/common/vdisk_mongroups.h
@@ -933,33 +933,60 @@ public:
COUNTER_DEF(BarriersBalance);
};
+ ///////////////////////////////////////////////////////////////////////////////////
+ // TDeepScrubbingGroup
+ ///////////////////////////////////////////////////////////////////////////////////
class TDeepScrubbingGroup : public TBase {
public:
GROUP_CONSTRUCTOR(TDeepScrubbingGroup)
{
- COUNTER_INIT(SmallBlobsChecked, true);
- COUNTER_INIT(HugeBlobsChecked, true);
- COUNTER_INIT(CheckIntegritySuccesses, false);
- COUNTER_INIT(CheckIntegrityErrors, false);
-
- COUNTER_INIT(PlacementIssuesSmallBlobs, false);
- COUNTER_INIT(DataIssuesSmallBlobs, false);
-
- COUNTER_INIT(PlacementIssuesHugeBlobs, false);
- COUNTER_INIT(DataIssuesHugeBlobs, false);
+ COUNTER_INIT(BlobsChecked, true);
+ COUNTER_INIT(CheckIntegritySuccesses, true);
+ COUNTER_INIT(CheckIntegrityErrors, true);
+ COUNTER_INIT(UnknownDataStatus, true);
+ COUNTER_INIT(UnknownPlacementStatus, true);
+ COUNTER_INIT(DataIssues, true);
+ COUNTER_INIT(PlacementIssues, true);
}
- COUNTER_DEF(SmallBlobsChecked);
- COUNTER_DEF(HugeBlobsChecked);
-
+ COUNTER_DEF(BlobsChecked);
COUNTER_DEF(CheckIntegritySuccesses);
COUNTER_DEF(CheckIntegrityErrors);
+ COUNTER_DEF(UnknownDataStatus);
+ COUNTER_DEF(UnknownPlacementStatus);
+ COUNTER_DEF(DataIssues);
+ COUNTER_DEF(PlacementIssues);
+ };
- COUNTER_DEF(PlacementIssuesSmallBlobs);
- COUNTER_DEF(DataIssuesSmallBlobs);
+ class TDeepScrubbingSubgroups {
+ public:
+ TDeepScrubbingSubgroups(TIntrusivePtr<NMonitoring::TDynamicCounters> counters) {
+ for (bool isHuge : {true, false}) {
+ for (TErasureType::EErasureSpecies erasure :
+ {TErasureType::ErasureNone, TErasureType::Erasure4Plus2Block,
+ TErasureType::ErasureMirror3of4, TErasureType::ErasureMirror3dc}) {
+ ::NMonitoring::TDynamicCounterPtr subgroup = counters
+ ->GetSubgroup("blobSize", isHuge ? "huge" : "small")
+ ->GetSubgroup("erasure", TErasureType::ErasureSpeciesName(erasure));
+ Subgroups.insert({GetKey(isHuge, erasure), TDeepScrubbingGroup(subgroup)});
+ }
+ }
+ }
+
+ TDeepScrubbingGroup* GetCounters(bool isHuge, TErasureType::EErasureSpecies erasure) {
+ auto it = Subgroups.find(GetKey(isHuge, erasure));
+ if (it == Subgroups.end()) {
+ return nullptr;
+ }
+ return &it->second;
+ }
- COUNTER_DEF(PlacementIssuesHugeBlobs);
- COUNTER_DEF(DataIssuesHugeBlobs);
+ private:
+ std::unordered_map<ui64, TDeepScrubbingGroup> Subgroups;
+
+ ui64 GetKey(bool isHuge, TErasureType::EErasureSpecies erasure) {
+ return ((ui64)isHuge << 32) + (ui64)erasure;
+ }
};
///////////////////////////////////////////////////////////////////////////////////
diff --git a/ydb/core/blobstorage/vdisk/scrub/scrub_actor.cpp b/ydb/core/blobstorage/vdisk/scrub/scrub_actor.cpp
index a0500be9aa2..b437e823847 100644
--- a/ydb/core/blobstorage/vdisk/scrub/scrub_actor.cpp
+++ b/ydb/core/blobstorage/vdisk/scrub/scrub_actor.cpp
@@ -14,7 +14,7 @@ namespace NKikimr {
, LogPrefix(VCtx->VDiskLogPrefix)
, Counters(VCtx->VDiskCounters->GetSubgroup("subsystem", "scrub"))
, MonGroup(Counters)
- , DeepScrubbingGroup(VCtx->VDiskCounters->GetSubgroup("subsystem", "deepScrubbing"))
+ , DeepScrubbingSubgroups(VCtx->VDiskCounters->GetSubgroup("subsystem", "deepScrubbing"))
, Arena(&TScrubCoroImpl::AllocateRopeArenaChunk)
, ScrubEntrypoint(std::move(scrubEntrypoint))
, ScrubEntrypointLsn(scrubEntrypointLsn)
@@ -239,38 +239,63 @@ namespace NKikimr {
void TScrubCoroImpl::CheckIntegrity(const TLogoBlobID& blobId, bool isHuge) {
SendToBSProxy(SelfActorId, Info->GroupID, new TEvBlobStorage::TEvCheckIntegrity(blobId, TInstant::Max(),
- NKikimrBlobStorage::EGetHandleClass::LowRead));
+ NKikimrBlobStorage::EGetHandleClass::LowRead, true));
auto res = WaitForPDiskEvent<TEvBlobStorage::TEvCheckIntegrityResult>();
- if (isHuge) {
- ++DeepScrubbingGroup.HugeBlobsChecked();
- } else {
- ++DeepScrubbingGroup.SmallBlobsChecked();
+ TErasureType::EErasureSpecies erasure = Info->Type.GetErasure();
+
+ NMonGroup::TDeepScrubbingGroup* counters = DeepScrubbingSubgroups.GetCounters(isHuge, erasure);
+ if (counters) {
+ ++counters->BlobsChecked();
}
if (res->Get()->Status != NKikimrProto::OK) {
STLOGX(GetActorContext(), PRI_WARN, BS_VDISK_SCRUB, VDS97, VDISKP(LogPrefix, "TEvCheckIntegrity request failed"),
(BlobId, blobId), (ErrorReason, res->Get()->ErrorReason));
- ++DeepScrubbingGroup.CheckIntegrityErrors();
+ if (counters) {
+ ++counters->CheckIntegrityErrors();
+ }
} else {
- ++DeepScrubbingGroup.CheckIntegritySuccesses();
- if (res->Get()->PlacementStatus != TEvBlobStorage::TEvCheckIntegrityResult::PS_OK) {
+ if (counters) {
+ ++counters->CheckIntegritySuccesses();
+ }
+
+ switch (res->Get()->PlacementStatus) {
+ case TEvBlobStorage::TEvCheckIntegrityResult::PS_UNKNOWN:
+ case TEvBlobStorage::TEvCheckIntegrityResult::PS_REPLICATION_IN_PROGRESS:
+ if (counters) {
+ ++counters->UnknownPlacementStatus();
+ }
+ break;
+ case TEvBlobStorage::TEvCheckIntegrityResult::PS_BLOB_IS_LOST:
+ case TEvBlobStorage::TEvCheckIntegrityResult::PS_BLOB_IS_RECOVERABLE:
STLOGX(GetActorContext(), PRI_CRIT, BS_VDISK_SCRUB, VDS98, VDISKP(LogPrefix, "TEvCheckIntegrity discovered placement issue"),
- (BlobId, blobId), (CheckIntegrityResult, res->Get()->ToString()));
- if (isHuge) {
- ++DeepScrubbingGroup.PlacementIssuesHugeBlobs();
- } else {
- ++DeepScrubbingGroup.PlacementIssuesSmallBlobs();
+ (BlobId, blobId), (Erasure, TErasureType::ErasureSpeciesName(erasure)), (CheckIntegrityResult, res->Get()->ToString()));
+ if (counters) {
+ ++counters->PlacementIssues();
}
+ break;
+ case TEvBlobStorage::TEvCheckIntegrityResult::PS_OK:
+ default:
+ break; // nothing to do
}
- if (res->Get()->DataStatus != TEvBlobStorage::TEvCheckIntegrityResult::DS_OK) {
+
+ switch (res->Get()->DataStatus) {
+ case TEvBlobStorage::TEvCheckIntegrityResult::DS_UNKNOWN:
+ if (counters) {
+ ++counters->UnknownDataStatus();
+ }
+ break;
+ case TEvBlobStorage::TEvCheckIntegrityResult::DS_ERROR:
STLOGX(GetActorContext(), PRI_CRIT, BS_VDISK_SCRUB, VDS99, VDISKP(LogPrefix, "TEvCheckIntegrity discovered data issue"),
- (BlobId, blobId), (CheckIntegrityResult, res->Get()->ToString()));
- if (isHuge) {
- ++DeepScrubbingGroup.DataIssuesHugeBlobs();
- } else {
- ++DeepScrubbingGroup.DataIssuesSmallBlobs();
+ (BlobId, blobId), (Erasure, TErasureType::ErasureSpeciesName(erasure)), (CheckIntegrityResult, res->Get()->ToString()));
+ if (counters) {
+ ++counters->DataIssues();
}
+ break;
+ case TEvBlobStorage::TEvCheckIntegrityResult::DS_OK:
+ default:
+ break; // nothing to do
}
}
}
diff --git a/ydb/core/blobstorage/vdisk/scrub/scrub_actor_impl.h b/ydb/core/blobstorage/vdisk/scrub/scrub_actor_impl.h
index 5a74a2505c1..cbc9baeeadd 100644
--- a/ydb/core/blobstorage/vdisk/scrub/scrub_actor_impl.h
+++ b/ydb/core/blobstorage/vdisk/scrub/scrub_actor_impl.h
@@ -25,7 +25,7 @@ namespace NKikimr {
::NMonitoring::TDynamicCounterPtr Counters;
NMonGroup::TScrubGroup MonGroup;
- NMonGroup::TDeepScrubbingGroup DeepScrubbingGroup;
+ NMonGroup::TDeepScrubbingSubgroups DeepScrubbingSubgroups;
TRopeArena Arena;