diff options
| author | Aleksandr Dmitriev <[email protected]> | 2025-05-26 20:20:01 +0300 |
|---|---|---|
| committer | GitHub <[email protected]> | 2025-05-26 20:20:01 +0300 |
| commit | 8687edcceb6023d97f2c054dc349d222689b9613 (patch) | |
| tree | cea826d44fd1f3aa8005443fd95534f1779c93d8 | |
| parent | 9b92253b68c58fba01919cadf28a47bb460b90dd (diff) | |
check data integrity of the blob parts; more tests (#18702)
| -rw-r--r-- | ydb/core/base/blobstorage.h | 3 | ||||
| -rw-r--r-- | ydb/core/blobstorage/dsproxy/dsproxy_check_integrity_get.cpp | 50 | ||||
| -rw-r--r-- | ydb/core/blobstorage/groupinfo/blobstorage_groupinfo.cpp | 39 | ||||
| -rw-r--r-- | ydb/core/blobstorage/groupinfo/blobstorage_groupinfo.h | 25 | ||||
| -rw-r--r-- | ydb/core/blobstorage/groupinfo/blobstorage_groupinfo_data_check.h | 292 | ||||
| -rw-r--r-- | ydb/core/blobstorage/groupinfo/ya.make | 1 | ||||
| -rw-r--r-- | ydb/core/blobstorage/ut_blobstorage/check_integrity.cpp | 330 |
7 files changed, 713 insertions, 27 deletions
diff --git a/ydb/core/base/blobstorage.h b/ydb/core/base/blobstorage.h index 36b476658ce..9d0590171f9 100644 --- a/ydb/core/base/blobstorage.h +++ b/ydb/core/base/blobstorage.h @@ -1473,13 +1473,13 @@ struct TEvBlobStorage { }; EPlacementStatus PlacementStatus; - // TODO: calculate data status enum EDataStatus { DS_OK = 1, // all data parts contain valid data DS_ERROR = 2, // some parts definitely contain invalid data DS_UNKNOWN = 3, // status is unknown because of missing disks or network problems }; EDataStatus DataStatus; + TString DataErrorInfo; // textual info about errors in blob data std::shared_ptr<TExecutionRelay> ExecutionRelay; @@ -1495,6 +1495,7 @@ struct TEvBlobStorage { << " ErrorReason# " << ErrorReason << " PlacementStatus# " << (int)PlacementStatus << " DataStatus# " << (int)DataStatus + << " DataErrorInfo# " << DataErrorInfo << " }"; return str.Str(); } diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_check_integrity_get.cpp b/ydb/core/blobstorage/dsproxy/dsproxy_check_integrity_get.cpp index 19348bd1b26..b4587b8e0d9 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_check_integrity_get.cpp +++ b/ydb/core/blobstorage/dsproxy/dsproxy_check_integrity_get.cpp @@ -1,7 +1,9 @@ #include "dsproxy.h" #include "dsproxy_mon.h" #include "dsproxy_quorum_tracker.h" -#include "dsproxy_blob_tracker.h" + +#include <ydb/core/blobstorage/groupinfo/blobstorage_groupinfo_data_check.h> +#include <ydb/core/blobstorage/groupinfo/blobstorage_groupinfo_partlayout.h> #include <ydb/core/blobstorage/vdisk/common/vdisk_events.h> namespace NKikimr { @@ -16,6 +18,8 @@ class TBlobStorageGroupCheckIntegrityRequest : public TBlobStorageGroupRequestAc TSubgroupPartLayout PartLayout; TSubgroupPartLayout PartLayoutWithNotYet; + TBlobStorageGroupInfo::IDataIntegrityChecker::TPartsData PartsData; + bool HasErrorDisks = false; ui32 VGetsInFlight = 0; @@ -41,7 +45,8 @@ class TBlobStorageGroupCheckIntegrityRequest : public TBlobStorageGroupRequestAc return str.Str(); } - void UpdateFromResponseData(const NKikimrBlobStorage::TQueryResult& result, const TVDiskID& vDiskId) { + void UpdateFromResponseData(TEvBlobStorage::TEvVGetResult* ev, + const NKikimrBlobStorage::TQueryResult& result, const TVDiskID& vDiskId) { if (!result.HasBlobID()) { return; } @@ -54,7 +59,7 @@ class TBlobStorageGroupCheckIntegrityRequest : public TBlobStorageGroupRequestAc } const NKikimrProto::EReplyStatus status = result.GetStatus(); - ui32 nodeId = Info->GetTopology().GetIdxInSubgroup(vDiskId, Id.Hash()); + ui32 diskIdx = Info->GetTopology().GetIdxInSubgroup(vDiskId, Id.Hash()); const ui32 partId = id.PartId(); if (!partId) { @@ -63,12 +68,13 @@ class TBlobStorageGroupCheckIntegrityRequest : public TBlobStorageGroupRequestAc switch (status) { case NKikimrProto::OK: - PartLayout.AddItem(nodeId, partId - 1, Info->Type); - PartLayoutWithNotYet.AddItem(nodeId, partId - 1, Info->Type); + PartLayout.AddItem(diskIdx, partId - 1, Info->Type); + PartLayoutWithNotYet.AddItem(diskIdx, partId - 1, Info->Type); + PartsData.Parts[partId - 1].push_back(std::make_pair(diskIdx, ev->GetBlobData(result))); break; case NKikimrProto::NOT_YET: - PartLayoutWithNotYet.AddItem(nodeId, partId - 1, Info->Type); + PartLayoutWithNotYet.AddItem(diskIdx, partId - 1, Info->Type); break; default: @@ -114,7 +120,7 @@ class TBlobStorageGroupCheckIntegrityRequest : public TBlobStorageGroupRequestAc if (status == NKikimrProto::OK) { for (size_t i = 0; i < record.ResultSize(); ++i) { - UpdateFromResponseData(record.GetResult(i), vDiskId); + UpdateFromResponseData(ev->Get(), record.GetResult(i), vDiskId); } } else { HasErrorDisks = true; @@ -128,12 +134,12 @@ class TBlobStorageGroupCheckIntegrityRequest : public TBlobStorageGroupRequestAc void Analyze() { PendingResult.reset(new TEvCheckIntegrityResult(NKikimrProto::OK)); PendingResult->Id = Id; - PendingResult->DataStatus = TEvCheckIntegrityResult::DS_UNKNOWN; // TODO + PendingResult->DataStatus = TEvCheckIntegrityResult::DS_UNKNOWN; TBlobStorageGroupInfo::TSubgroupVDisks faultyDisks(&Info->GetTopology()); // empty set - const auto& checker = Info->GetQuorumChecker(); - TBlobStorageGroupInfo::EBlobState state = checker.GetBlobStateWithoutLayoutCheck( + const auto& quorumChecker = Info->GetQuorumChecker(); + TBlobStorageGroupInfo::EBlobState state = quorumChecker.GetBlobStateWithoutLayoutCheck( PartLayout, faultyDisks); switch (state) { @@ -149,7 +155,7 @@ class TBlobStorageGroupCheckIntegrityRequest : public TBlobStorageGroupRequestAc case TBlobStorageGroupInfo::EBS_RECOVERABLE_FRAGMENTARY: case TBlobStorageGroupInfo::EBS_UNRECOVERABLE_FRAGMENTARY: case TBlobStorageGroupInfo::EBS_RECOVERABLE_DOUBTED: { - TBlobStorageGroupInfo::EBlobState stateNotYet = checker.GetBlobStateWithoutLayoutCheck( + TBlobStorageGroupInfo::EBlobState stateNotYet = quorumChecker.GetBlobStateWithoutLayoutCheck( PartLayoutWithNotYet, faultyDisks); if (stateNotYet == TBlobStorageGroupInfo::EBS_FULL) { @@ -165,6 +171,26 @@ class TBlobStorageGroupCheckIntegrityRequest : public TBlobStorageGroupRequestAc } } + const auto& dataChecker = Info->GetTopology().GetDataIntegrityChecker(); + auto partsState = dataChecker.GetDataState(Id, PartsData); + + if (partsState.IsOk) { + PendingResult->DataStatus = (PendingResult->PlacementStatus == TEvCheckIntegrityResult::PS_UNKNOWN) ? + TEvCheckIntegrityResult::DS_UNKNOWN : TEvCheckIntegrityResult::DS_OK; + } else { + PendingResult->DataStatus = TEvCheckIntegrityResult::DS_ERROR; + } + + TStringStream str; + str << "Disks:" << Endl; + for (ui32 diskIdx = 0; diskIdx < Info->Type.BlobSubgroupSize(); ++diskIdx) { + auto vDiskIdShort = Info->GetTopology().GetVDiskInSubgroup(diskIdx, Id.Hash()); + str << diskIdx << ": " << Info->CreateVDiskID(vDiskIdShort) << Endl; + } + + PendingResult->DataErrorInfo = str.Str(); + PendingResult->DataErrorInfo += partsState.DataErrorInfo; + ReplyAndDie(NKikimrProto::OK); } @@ -195,6 +221,8 @@ public: {} void Bootstrap() override { + PartsData.Parts.resize(Info->Type.TotalPartCount()); + for (const auto& vdisk : Info->GetVDisks()) { auto vDiskId = Info->GetVDiskId(vdisk.OrderNumber); diff --git a/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo.cpp b/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo.cpp index 861dcf53a18..12a120e4f03 100644 --- a/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo.cpp +++ b/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo.cpp @@ -3,6 +3,7 @@ #include "blobstorage_groupinfo_iter.h" #include "blobstorage_groupinfo_sets.h" #include "blobstorage_groupinfo_partlayout.h" +#include "blobstorage_groupinfo_data_check.h" #include <ydb/core/base/services/blobstorage_service_id.h> #include <ydb/core/blobstorage/vdisk/ingress/blobstorage_ingress.h> #include <ydb/core/protos/blobstorage.pb.h> @@ -386,6 +387,8 @@ void TBlobStorageGroupInfo::TTopology::FinalizeConstruction() { BlobMapper.reset(CreateMapper(GType, this)); // create quorum checker QuorumChecker.reset(CreateQuorumChecker(this)); + // create data integrity checker + DataIntegrityChecker.reset(CreateDataIntegrityChecker(this)); } bool TBlobStorageGroupInfo::TTopology::IsValidId(const TVDiskID& vdisk) const { @@ -552,6 +555,42 @@ TBlobStorageGroupInfo::IQuorumChecker *TBlobStorageGroupInfo::TTopology::CreateQ Y_ABORT(); } +TBlobStorageGroupInfo::IDataIntegrityChecker* +TBlobStorageGroupInfo::TTopology::CreateDataIntegrityChecker(const TTopology* topology) { + switch (topology->GType.GetErasure()) { + case TBlobStorageGroupType::ErasureNone: + case TBlobStorageGroupType::ErasureMirror3: + case TBlobStorageGroupType::Erasure3Plus1Block: + case TBlobStorageGroupType::Erasure3Plus1Stripe: + case TBlobStorageGroupType::Erasure3Plus2Block: + case TBlobStorageGroupType::Erasure4Plus2Stripe: + case TBlobStorageGroupType::Erasure3Plus2Stripe: + case TBlobStorageGroupType::ErasureMirror3Plus2: + case TBlobStorageGroupType::Erasure4Plus3Block: + case TBlobStorageGroupType::Erasure4Plus3Stripe: + case TBlobStorageGroupType::Erasure3Plus3Block: + case TBlobStorageGroupType::Erasure3Plus3Stripe: + case TBlobStorageGroupType::Erasure2Plus3Block: + case TBlobStorageGroupType::Erasure2Plus3Stripe: + case TBlobStorageGroupType::Erasure2Plus2Block: + case TBlobStorageGroupType::Erasure2Plus2Stripe: + return new TDataIntegrityCheckerTrivial(topology); + + case TBlobStorageGroupType::Erasure4Plus2Block: + return new TDataIntegrityCheckerBlock42(topology); + + case TBlobStorageGroupType::ErasureMirror3dc: + return new TDataIntegrityCheckerMirror3dc(topology); + + case TBlobStorageGroupType::ErasureMirror3of4: + return new TDataIntegrityCheckerMirror3of4(topology); + + default: + Y_ABORT("unexpected erasure type 0x%08" PRIx32, + static_cast<ui32>(topology->GType.GetErasure())); + } +} + TString TBlobStorageGroupInfo::TTopology::ToString() const { TStringStream str; str << "{GType# " << GType.ToString(); diff --git a/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo.h b/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo.h index 44069cc1c2c..bfbcbb66528 100644 --- a/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo.h +++ b/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo.h @@ -156,6 +156,26 @@ public: }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // BLOB PART DATA INTEGRITY CHECKER + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct IDataIntegrityChecker { + virtual ~IDataIntegrityChecker() = default; + + using TPart = std::pair<ui32, TRope>; + + struct TPartsData { + std::vector<std::vector<TPart>> Parts; // partId - 1 -> [ {diskIdx; data} ] + }; + + struct TPartsState { + bool IsOk = true; + TString DataErrorInfo; + }; + + virtual TPartsState GetDataState(const TLogoBlobID& id, const TPartsData& partsData) const = 0; + }; + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // TOPOLOGY //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// struct TVDiskInfo { @@ -198,6 +218,8 @@ public: std::unique_ptr<IQuorumChecker> QuorumChecker; // map to quickly get (Short)VDisk id from its order number inside the group TVector<TVDiskIdShort> VDiskIdForOrderNumber; + // data integrity checker + std::unique_ptr<IDataIntegrityChecker> DataIntegrityChecker; TTopology(TBlobStorageGroupType gtype); TTopology(TBlobStorageGroupType gtype, ui32 numFailRealms, ui32 numFailDomainsPerFailRealm, ui32 numVDisksPerFailDomain, @@ -236,6 +258,8 @@ public: ui32 GetNumFailDomainsPerFailRealm() const { return FailRealms[0].FailDomains.size(); } // get quorum checker const IQuorumChecker& GetQuorumChecker() const { return *QuorumChecker; } + // get data integrity checker + const IDataIntegrityChecker& GetDataIntegrityChecker() const { return *DataIntegrityChecker; } ////////////////////////////////////////////////////////////////////////////////////// // IBlobToDiskMapper interface @@ -277,6 +301,7 @@ public: private: static IBlobToDiskMapper *CreateMapper(TBlobStorageGroupType gtype, const TTopology *topology); static IQuorumChecker *CreateQuorumChecker(const TTopology *topology); + static IDataIntegrityChecker *CreateDataIntegrityChecker(const TTopology *topology); }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo_data_check.h b/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo_data_check.h new file mode 100644 index 00000000000..e30457e0a37 --- /dev/null +++ b/ydb/core/blobstorage/groupinfo/blobstorage_groupinfo_data_check.h @@ -0,0 +1,292 @@ +#include "blobstorage_groupinfo.h" + +namespace NKikimr { + +class TDataIntegrityCheckerBase : public TBlobStorageGroupInfo::IDataIntegrityChecker { +protected: + const TBlobStorageGroupInfo::TTopology *Top; + +public: + explicit TDataIntegrityCheckerBase(const TBlobStorageGroupInfo::TTopology *top) + : Top(top) + {} +}; + +class TDataIntegrityCheckerTrivial : public TDataIntegrityCheckerBase { +public: + using TDataIntegrityCheckerBase::TDataIntegrityCheckerBase; + + TPartsState GetDataState(const TLogoBlobID& id, const TPartsData& partsData) const override { + Y_UNUSED(id); + Y_UNUSED(partsData); + return {}; + } +}; + +class TDataIntegrityCheckerBlock42 : public TDataIntegrityCheckerBase { +public: + using TDataIntegrityCheckerBase::TDataIntegrityCheckerBase; + + TPartsState GetDataState(const TLogoBlobID& id, const TPartsData& partsData) const override { + Y_ABORT_UNLESS(partsData.Parts.size() == 6); + + TPartsState partsState; + + struct TSeenPart { + TRope Data; + std::vector<ui32> DiskIdxs; + }; + std::array<std::vector<TSeenPart>, 6> seenParts; + + // find all distinct copies of each part + for (ui32 partId = 0; partId < 6; ++partId) { + auto& seen = seenParts[partId]; + for (const auto& [diskIdx, data] : partsData.Parts[partId]) { + bool isNew = true; + for (auto& seenPart : seen) { + if (!TRope::Compare(data, seenPart.Data)) { + seenPart.DiskIdxs.push_back(diskIdx); + isNew = false; + break; + } + } + if (isNew) { + seen.push_back({data, {diskIdx}}); + } + } + } + + // checking layout + TStringStream layoutReport; + layoutReport << "Layout info:" << Endl; + + TStringStream str; + bool hasUnequalParts = false; + for (ui32 partId = 0; partId < 6; ++partId) { + const auto& seen = seenParts[partId]; + if (seen.size() > 1) { + hasUnequalParts = true; + } + str << "part " << partId + 1 << ": "; + ui32 ver = 0; + for (const auto& seenPart : seen) { + if (ver > 0) { + str << ", "; + } + str << "ver" << ver << " disks [ "; + for (const auto& diskIdx : seenPart.DiskIdxs) { + str << diskIdx << " "; + } + str << "]"; + ++ver; + } + str << Endl; + } + + layoutReport << str.Str(); + if (hasUnequalParts) { + partsState.IsOk = false; + layoutReport << "ERROR: There are unequal parts" << Endl; + } + partsState.DataErrorInfo = layoutReport.Str(); + + // checking erasure + TStringStream erasureReport; + erasureReport << "Erasure info:" << Endl; + + std::vector<ui32> partIds; + partIds.reserve(6); + bool erasureError = false; + + TErasureType::ECrcMode crcMode = (TErasureType::ECrcMode)id.CrcMode(); + + auto checkCombination = [&]() { + // iterate over combinations of part versions + for (const auto& seen0 : seenParts[partIds[0]]) { + for (const auto& seen1 : seenParts[partIds[1]]) { + for (const auto& seen2 : seenParts[partIds[2]]) { + for (const auto& seen3 : seenParts[partIds[3]]) { + std::array<TRope, 6> data; + data[partIds[0]] = seen0.Data; + data[partIds[1]] = seen1.Data; + data[partIds[2]] = seen2.Data; + data[partIds[3]] = seen3.Data; + + ui32 restoreMask = 0; + restoreMask |= (1 << partIds[4]); + if (partIds.size() == 6) { + restoreMask |= (1 << partIds[5]); + } + + ErasureRestore(crcMode, TErasureType::Erasure4Plus2Block, id.BlobSize(), nullptr, data, restoreMask); + + std::array<std::vector<ui32>, 4> diskIdxs{ + seen0.DiskIdxs, seen1.DiskIdxs, seen2.DiskIdxs, seen3.DiskIdxs}; + + auto checkOnePart = [&](ui32 partId) { + for (const auto& seen : seenParts[partId]) { + TStringStream str; + str << "{ "; + for (ui32 part = 0; part < 4; ++part) { + str << "part " << partIds[part] + 1 << " disks [ "; + for (const auto& diskIdx : diskIdxs[part]) { + str << diskIdx << " "; + } + str << "]; "; + } + str << "} CHECK part " << partId + 1 << " disks [ "; + for (const auto& diskIdx : seen.DiskIdxs) { + str << diskIdx << " "; + } + str << "] -> "; + + int cmp = TRope::Compare(seen.Data, data[partId]); + if (cmp) { + erasureError = true; + } else { + str << "OK" << Endl; + erasureReport << str.Str(); // report only succesful restore + } + } + }; + + checkOnePart(partIds[4]); + if (partIds.size() == 6) { + checkOnePart(partIds[5]); + } + }}}} + }; + + for (ui32 partId = 0; partId < 6; ++partId) { + if (!seenParts[partId].empty()) { + partIds.push_back(partId); + } + } + if (partIds.size() <= 4) { // 4 or less parts total, nothing to check + return partsState; + } + + // fast path: there's no unequal parts; if simple check is ok, return + if (!hasUnequalParts) { + checkCombination(); + if (!erasureError) { + partsState.DataErrorInfo += erasureReport.Str(); + return partsState; + } + } + + if (partIds.size() == 5) { + checkCombination(); + } else { // partIds.size() == 6 + // iterate over different combinations to find good parts + for (ui8 gap1 = 0; gap1 < 5; ++gap1) { + for (ui8 gap2 = gap1 + 1; gap2 < 6; ++gap2) { + ui8 idx = 0; + for (ui8 p = 0; p < 6; ++p) { + if (p == gap1) { + partIds[4] = p; + } else if (p == gap2) { + partIds[5] = p; + } else { + partIds[idx] = p; + ++idx; + } + } + checkCombination(); + } + } + } + + if (erasureError) { + partsState.IsOk = false; + erasureReport << "ERROR: There are erasure restore fails" << Endl; + } + + partsState.DataErrorInfo += erasureReport.Str(); + return partsState; + } +}; + +class TDataIntegrityCheckerMirror : public TDataIntegrityCheckerBase { +private: + virtual ui32 DataPartsCount() const = 0; + +public: + using TDataIntegrityCheckerBase::TDataIntegrityCheckerBase; + + TPartsState GetDataState(const TLogoBlobID& id, const TPartsData& partsData) const override { + Y_UNUSED(id); + Y_ABORT_UNLESS(partsData.Parts.size() == 3); + + TPartsState partsState; + + struct TSeenPart { + TRope Data; + std::vector<ui32> DiskIdxs; + }; + std::vector<TSeenPart> seenParts; + + // find all distinct copies of the blob + for (ui32 partId = 0; partId < DataPartsCount(); ++partId) { + for (const auto& [diskIdx, data] : partsData.Parts[partId]) { + bool isNew = true; + for (auto& seenPart : seenParts) { + if (!TRope::Compare(data, seenPart.Data)) { + seenPart.DiskIdxs.push_back(diskIdx); + isNew = false; + break; + } + } + if (isNew) { + seenParts.push_back({data, {diskIdx}}); + } + } + } + + TStringStream layoutReport; + layoutReport << "Layout info:" << Endl; + + TStringStream str; + bool hasUnequalParts = (seenParts.size() > 1); + ui32 ver = 0; + for (const auto& seenPart : seenParts) { + if (ver > 0) { + str << ", "; + } + str << "ver" << ver << " disks [ "; + for (const auto& diskIdx : seenPart.DiskIdxs) { + str << diskIdx << " "; + } + str << "]"; + ++ver; + } + str << Endl; + layoutReport << str.Str(); + + if (hasUnequalParts) { + partsState.IsOk = false; + layoutReport << "ERROR: There are unequal parts" << Endl; + } + partsState.DataErrorInfo = layoutReport.Str(); + + return partsState; + } +}; + +class TDataIntegrityCheckerMirror3dc : public TDataIntegrityCheckerMirror { +private: + ui32 DataPartsCount() const override { return 3; } + +public: + using TDataIntegrityCheckerMirror::TDataIntegrityCheckerMirror; +}; + +class TDataIntegrityCheckerMirror3of4 : public TDataIntegrityCheckerMirror { +private: + ui32 DataPartsCount() const override { return 2; } + +public: + using TDataIntegrityCheckerMirror::TDataIntegrityCheckerMirror; +}; + +} // NKikimr diff --git a/ydb/core/blobstorage/groupinfo/ya.make b/ydb/core/blobstorage/groupinfo/ya.make index 0851c464dc0..cdc07a6d7a3 100644 --- a/ydb/core/blobstorage/groupinfo/ya.make +++ b/ydb/core/blobstorage/groupinfo/ya.make @@ -15,6 +15,7 @@ SRCS( blobstorage_groupinfo_blobmap.h blobstorage_groupinfo.cpp blobstorage_groupinfo.h + blobstorage_groupinfo_data_check.h blobstorage_groupinfo_iter.h blobstorage_groupinfo_partlayout.cpp blobstorage_groupinfo_partlayout.h diff --git a/ydb/core/blobstorage/ut_blobstorage/check_integrity.cpp b/ydb/core/blobstorage/ut_blobstorage/check_integrity.cpp index 705cd2b82ba..2c00675ab03 100644 --- a/ydb/core/blobstorage/ut_blobstorage/check_integrity.cpp +++ b/ydb/core/blobstorage/ut_blobstorage/check_integrity.cpp @@ -1,11 +1,16 @@ #include <ydb/core/blobstorage/ut_blobstorage/lib/env.h> +#include <util/random/random.h> + struct TCheckIntegrityEnvBase { TEnvironmentSetup Env; TIntrusivePtr<TBlobStorageGroupInfo> Info; TLogoBlobID Id; std::vector<TVDiskID> VDisks; + TString Data; + TString ErrorData; + std::unique_ptr<IEventHandle> Result; TCheckIntegrityEnvBase(TEnvironmentSetup::TSettings&& settings) @@ -19,8 +24,23 @@ struct TCheckIntegrityEnvBase { Info = Env.GetGroupInfo(groups.front()); TString error; - const bool success = TLogoBlobID::Parse(Id, "[72075186270680851:57:3905:6:786432:4194304:0]", error); + const bool success = TLogoBlobID::Parse(Id, "[72075186270680851:57:3905:6:786432:1024:0]", error); UNIT_ASSERT(success); + + auto size = Id.BlobSize(); + Data.resize(size); + for (ui32 i = 0; i < size; ++i) { + Data[i] = RandomNumber<ui8>(); + } + ErrorData.resize(size); + for (ui32 i = 0; i < size; ++i) { + ErrorData[i] = RandomNumber<ui8>(); + } + + for (ui32 i = 0; i < Info->Type.BlobSubgroupSize(); ++i) { + auto vDiskIdShort = Info->GetTopology().GetVDiskInSubgroup(i, Id.Hash()); + VDisks.push_back(Info->CreateVDiskID(vDiskIdShort)); + } } TEvBlobStorage::TEvCheckIntegrityResult* Request() { @@ -65,6 +85,7 @@ struct TCheckIntegrityEnvBase { struct TCheckIntegrityEnvBlock42 : public TCheckIntegrityEnvBase { std::vector<TString> Parts; + std::vector<TString> ErrorParts; TCheckIntegrityEnvBlock42() : TCheckIntegrityEnvBase(TEnvironmentSetup::TSettings{ @@ -72,37 +93,39 @@ struct TCheckIntegrityEnvBlock42 : public TCheckIntegrityEnvBase { .Erasure = TBlobStorageGroupType::Erasure4Plus2Block, }) { - TString data = TString(Id.BlobSize(), 'X'); + auto crcMode = (TErasureType::ECrcMode)Id.CrcMode(); TDataPartSet partSet; - Info->Type.SplitData((TErasureType::ECrcMode)Id.CrcMode(), data, partSet); + Info->Type.SplitData(crcMode, Data, partSet); for (ui32 i = 0; i < partSet.Parts.size(); ++i) { Parts.push_back(partSet.Parts[i].OwnedString.ConvertToString()); } - for (ui32 i = 0; i < 8; ++i) { - auto vDiskIdShort = Info->GetTopology().GetVDiskInSubgroup(i, Id.Hash()); - VDisks.push_back(Info->CreateVDiskID(vDiskIdShort)); + TDataPartSet errorPartSet; + Info->Type.SplitData(crcMode, ErrorData, errorPartSet); + for (ui32 i = 0; i < errorPartSet.Parts.size(); ++i) { + ErrorParts.push_back(errorPartSet.Parts[i].OwnedString.ConvertToString()); } } }; struct TCheckIntegrityEnvMirror3dc : public TCheckIntegrityEnvBase { - TString Data; - TCheckIntegrityEnvMirror3dc() : TCheckIntegrityEnvBase(TEnvironmentSetup::TSettings{ .NodeCount = 9, .Erasure = TBlobStorageGroupType::ErasureMirror3dc, }) - { - Data = TString(Id.BlobSize(), 'X'); + {} +}; - for (ui32 i = 0; i < 9; ++i) { - auto vDiskIdShort = Info->GetTopology().GetVDiskInSubgroup(i, Id.Hash()); - VDisks.push_back(Info->CreateVDiskID(vDiskIdShort)); - } - } +struct TCheckIntegrityEnvMirror3of4 : public TCheckIntegrityEnvBase { + + TCheckIntegrityEnvMirror3of4() + : TCheckIntegrityEnvBase(TEnvironmentSetup::TSettings{ + .NodeCount = 8, + .Erasure = TBlobStorageGroupType::ErasureMirror3of4, + }) + {} }; Y_UNIT_TEST_SUITE(CheckIntegrityBlock42) { @@ -244,6 +267,179 @@ Y_UNIT_TEST_SUITE(CheckIntegrityBlock42) { UNIT_ASSERT(result->PlacementStatus == TEvBlobStorage::TEvCheckIntegrityResult::PS_RECOVERABLE); } + Y_UNIT_TEST(PlacementStatusUnknown) { + TCheckIntegrityEnvBlock42 check; + + for (ui32 i = 0; i < 5; ++i) { + check.Env.PutBlob(check.VDisks[i], TLogoBlobID(check.Id, i + 1), check.Parts[i]); + } + + THashSet<TVDiskID> errorDisks; + errorDisks.insert(check.VDisks[0]); + errorDisks.insert(check.VDisks[1]); + + check.Env.Runtime->FilterFunction = [&](ui32, std::unique_ptr<IEventHandle>& ev) { + return check.InjectError(NKikimrProto::ERROR, errorDisks, ev); + }; + + auto result = check.Request(); + UNIT_ASSERT(result->Status == NKikimrProto::OK); + UNIT_ASSERT(result->PlacementStatus == TEvBlobStorage::TEvCheckIntegrityResult::PS_UNKNOWN); + } + + Y_UNIT_TEST(DataOk) { + TCheckIntegrityEnvBlock42 check; + + for (ui32 i = 0; i < 6; ++i) { + check.Env.PutBlob(check.VDisks[i], TLogoBlobID(check.Id, i + 1), check.Parts[i]); + } + + auto result = check.Request(); + UNIT_ASSERT(result->Status == NKikimrProto::OK); + UNIT_ASSERT(result->PlacementStatus == TEvBlobStorage::TEvCheckIntegrityResult::PS_OK); + UNIT_ASSERT(result->DataStatus == TEvBlobStorage::TEvCheckIntegrityResult::DS_OK); + + Cerr << result->DataErrorInfo << Endl; + } + + Y_UNIT_TEST(DataOkAdditionalEqualParts) { + TCheckIntegrityEnvBlock42 check; + + for (ui32 i = 0; i < 6; ++i) { + check.Env.PutBlob(check.VDisks[i], TLogoBlobID(check.Id, i + 1), check.Parts[i]); + } + check.Env.PutBlob(check.VDisks[6], TLogoBlobID(check.Id, 1), check.Parts[0]); + check.Env.PutBlob(check.VDisks[7], TLogoBlobID(check.Id, 2), check.Parts[1]); + + auto result = check.Request(); + UNIT_ASSERT(result->Status == NKikimrProto::OK); + UNIT_ASSERT(result->PlacementStatus == TEvBlobStorage::TEvCheckIntegrityResult::PS_OK); + UNIT_ASSERT(result->DataStatus == TEvBlobStorage::TEvCheckIntegrityResult::DS_OK); + + Cerr << result->DataErrorInfo << Endl; + } + + Y_UNIT_TEST(DataErrorAdditionalUnequalParts) { + TCheckIntegrityEnvBlock42 check; + + for (ui32 i = 0; i < 6; ++i) { + check.Env.PutBlob(check.VDisks[i], TLogoBlobID(check.Id, i + 1), check.Parts[i]); + } + check.Env.PutBlob(check.VDisks[6], TLogoBlobID(check.Id, 1), check.ErrorParts[0]); + check.Env.PutBlob(check.VDisks[7], TLogoBlobID(check.Id, 1), check.ErrorParts[1]); + + auto result = check.Request(); + UNIT_ASSERT(result->Status == NKikimrProto::OK); + UNIT_ASSERT(result->PlacementStatus == TEvBlobStorage::TEvCheckIntegrityResult::PS_OK); + UNIT_ASSERT(result->DataStatus == TEvBlobStorage::TEvCheckIntegrityResult::DS_ERROR); + + Cerr << result->DataErrorInfo << Endl; + } + + Y_UNIT_TEST(DataErrorSixPartsOneBroken) { + TCheckIntegrityEnvBlock42 check; + + for (ui32 i = 0; i < 5; ++i) { + check.Env.PutBlob(check.VDisks[i], TLogoBlobID(check.Id, i + 1), check.Parts[i]); + } + check.Env.PutBlob(check.VDisks[5], TLogoBlobID(check.Id, 6), check.ErrorParts[5]); + + auto result = check.Request(); + UNIT_ASSERT(result->Status == NKikimrProto::OK); + UNIT_ASSERT(result->PlacementStatus == TEvBlobStorage::TEvCheckIntegrityResult::PS_OK); + UNIT_ASSERT(result->DataStatus == TEvBlobStorage::TEvCheckIntegrityResult::DS_ERROR); + + Cerr << result->DataErrorInfo << Endl; + } + + Y_UNIT_TEST(DataErrorSixPartsTwoBroken) { + TCheckIntegrityEnvBlock42 check; + + for (ui32 i = 0; i < 4; ++i) { + check.Env.PutBlob(check.VDisks[i], TLogoBlobID(check.Id, i + 1), check.Parts[i]); + } + check.Env.PutBlob(check.VDisks[4], TLogoBlobID(check.Id, 5), check.ErrorParts[4]); + check.Env.PutBlob(check.VDisks[5], TLogoBlobID(check.Id, 6), check.ErrorParts[5]); + + auto result = check.Request(); + UNIT_ASSERT(result->Status == NKikimrProto::OK); + UNIT_ASSERT(result->PlacementStatus == TEvBlobStorage::TEvCheckIntegrityResult::PS_OK); + UNIT_ASSERT(result->DataStatus == TEvBlobStorage::TEvCheckIntegrityResult::DS_ERROR); + + Cerr << result->DataErrorInfo << Endl; + } + + Y_UNIT_TEST(DataOkErasureFiveParts) { + TCheckIntegrityEnvBlock42 check; + + for (ui32 i = 0; i < 5; ++i) { + check.Env.PutBlob(check.VDisks[i], TLogoBlobID(check.Id, i + 1), check.Parts[i]); + } + + auto result = check.Request(); + UNIT_ASSERT(result->Status == NKikimrProto::OK); + UNIT_ASSERT(result->PlacementStatus == TEvBlobStorage::TEvCheckIntegrityResult::PS_RECOVERABLE); + UNIT_ASSERT(result->DataStatus == TEvBlobStorage::TEvCheckIntegrityResult::DS_OK); + + Cerr << result->DataErrorInfo << Endl; + } + + Y_UNIT_TEST(DataErrorFivePartsOneBroken) { + TCheckIntegrityEnvBlock42 check; + + for (ui32 i = 0; i < 4; ++i) { + check.Env.PutBlob(check.VDisks[i], TLogoBlobID(check.Id, i + 1), check.Parts[i]); + } + check.Env.PutBlob(check.VDisks[4], TLogoBlobID(check.Id, 5), check.ErrorParts[4]); + + auto result = check.Request(); + UNIT_ASSERT(result->Status == NKikimrProto::OK); + UNIT_ASSERT(result->PlacementStatus == TEvBlobStorage::TEvCheckIntegrityResult::PS_RECOVERABLE); + UNIT_ASSERT(result->DataStatus == TEvBlobStorage::TEvCheckIntegrityResult::DS_ERROR); + + Cerr << result->DataErrorInfo << Endl; + } + + Y_UNIT_TEST(DataErrorHeavySixPartsWithManyBroken) { + TCheckIntegrityEnvBlock42 check; + + for (ui32 i = 0; i < 6; ++i) { + check.Env.PutBlob(check.VDisks[i], TLogoBlobID(check.Id, i + 1), check.Parts[i]); + } + for (ui32 i = 0; i < 6; ++i) { + check.Env.PutBlob(check.VDisks[6], TLogoBlobID(check.Id, i + 1), check.ErrorParts[i]); + } + + auto result = check.Request(); + UNIT_ASSERT(result->Status == NKikimrProto::OK); + UNIT_ASSERT(result->PlacementStatus == TEvBlobStorage::TEvCheckIntegrityResult::PS_OK); + UNIT_ASSERT(result->DataStatus == TEvBlobStorage::TEvCheckIntegrityResult::DS_ERROR); + + Cerr << result->DataErrorInfo << Endl; + } + + Y_UNIT_TEST(DataStatusUnknown) { + TCheckIntegrityEnvBlock42 check; + + for (ui32 i = 0; i < 5; ++i) { + check.Env.PutBlob(check.VDisks[i], TLogoBlobID(check.Id, i + 1), check.Parts[i]); + } + + THashSet<TVDiskID> errorDisks; + errorDisks.insert(check.VDisks[0]); + errorDisks.insert(check.VDisks[1]); + + check.Env.Runtime->FilterFunction = [&](ui32, std::unique_ptr<IEventHandle>& ev) { + return check.InjectError(NKikimrProto::ERROR, errorDisks, ev); + }; + + auto result = check.Request(); + UNIT_ASSERT(result->Status == NKikimrProto::OK); + UNIT_ASSERT(result->PlacementStatus == TEvBlobStorage::TEvCheckIntegrityResult::PS_UNKNOWN); + UNIT_ASSERT(result->DataStatus == TEvBlobStorage::TEvCheckIntegrityResult::DS_UNKNOWN); + + Cerr << result->DataErrorInfo << Endl; + } } Y_UNIT_TEST_SUITE(CheckIntegrityMirror3dc) { @@ -351,4 +547,108 @@ Y_UNIT_TEST_SUITE(CheckIntegrityMirror3dc) { UNIT_ASSERT(result->Status == NKikimrProto::OK); UNIT_ASSERT(result->PlacementStatus == TEvBlobStorage::TEvCheckIntegrityResult::PS_RECOVERABLE); } + + Y_UNIT_TEST(DataOk) { + TCheckIntegrityEnvMirror3dc check; + + for (ui32 i = 0; i < 3; ++i) { + check.Env.PutBlob(check.VDisks[i], TLogoBlobID(check.Id, i + 1), check.Data); + } + + auto result = check.Request(); + UNIT_ASSERT(result->Status == NKikimrProto::OK); + UNIT_ASSERT(result->PlacementStatus == TEvBlobStorage::TEvCheckIntegrityResult::PS_OK); + UNIT_ASSERT(result->DataStatus == TEvBlobStorage::TEvCheckIntegrityResult::DS_OK); + + Cerr << result->DataErrorInfo << Endl; + } + + Y_UNIT_TEST(DataErrorOneCopy) { + TCheckIntegrityEnvMirror3dc check; + + for (ui32 i = 0; i < 2; ++i) { + check.Env.PutBlob(check.VDisks[i], TLogoBlobID(check.Id, i + 1), check.Data); + } + check.Env.PutBlob(check.VDisks[2], TLogoBlobID(check.Id, 3), check.ErrorData); + + auto result = check.Request(); + UNIT_ASSERT(result->Status == NKikimrProto::OK); + UNIT_ASSERT(result->PlacementStatus == TEvBlobStorage::TEvCheckIntegrityResult::PS_OK); + UNIT_ASSERT(result->DataStatus == TEvBlobStorage::TEvCheckIntegrityResult::DS_ERROR); + + Cerr << result->DataErrorInfo << Endl; + } + + Y_UNIT_TEST(DataErrorManyCopies) { + TCheckIntegrityEnvMirror3dc check; + + for (ui32 i = 0; i < 3; ++i) { + check.Env.PutBlob(check.VDisks[i], TLogoBlobID(check.Id, i + 1), check.Data); + } + for (ui32 i = 0; i < 3; ++i) { + check.Env.PutBlob(check.VDisks[i + 3], TLogoBlobID(check.Id, i + 1), check.ErrorData); + } + + auto result = check.Request(); + UNIT_ASSERT(result->Status == NKikimrProto::OK); + UNIT_ASSERT(result->PlacementStatus == TEvBlobStorage::TEvCheckIntegrityResult::PS_OK); + UNIT_ASSERT(result->DataStatus == TEvBlobStorage::TEvCheckIntegrityResult::DS_ERROR); + + Cerr << result->DataErrorInfo << Endl; + } +} + +Y_UNIT_TEST_SUITE(CheckIntegrityMirror3of4) { + + Y_UNIT_TEST(PlacementOk) { + TCheckIntegrityEnvMirror3of4 check; + + check.Env.PutBlob(check.VDisks[0], TLogoBlobID(check.Id, 1), check.Data); + check.Env.PutBlob(check.VDisks[1], TLogoBlobID(check.Id, 2), check.Data); + check.Env.PutBlob(check.VDisks[2], TLogoBlobID(check.Id, 1), check.Data); + check.Env.PutBlob(check.VDisks[4], TLogoBlobID(check.Id, 3), {}); + check.Env.PutBlob(check.VDisks[5], TLogoBlobID(check.Id, 3), {}); + + auto result = check.Request(); + UNIT_ASSERT(result->Status == NKikimrProto::OK); + UNIT_ASSERT(result->PlacementStatus == TEvBlobStorage::TEvCheckIntegrityResult::PS_OK); + } + + Y_UNIT_TEST(PlacementMissingParts) { + TCheckIntegrityEnvMirror3of4 check; + + check.Env.PutBlob(check.VDisks[0], TLogoBlobID(check.Id, 1), check.Data); + + auto result = check.Request(); + UNIT_ASSERT(result->Status == NKikimrProto::OK); + UNIT_ASSERT(result->PlacementStatus == TEvBlobStorage::TEvCheckIntegrityResult::PS_RECOVERABLE); + } + + Y_UNIT_TEST(PlacementBlobIsLost) { + TCheckIntegrityEnvMirror3of4 check; + + check.Env.PutBlob(check.VDisks[4], TLogoBlobID(check.Id, 3), {}); + check.Env.PutBlob(check.VDisks[5], TLogoBlobID(check.Id, 3), {}); + + auto result = check.Request(); + UNIT_ASSERT(result->Status == NKikimrProto::OK); + UNIT_ASSERT(result->PlacementStatus == TEvBlobStorage::TEvCheckIntegrityResult::PS_ERROR); + } + + Y_UNIT_TEST(PlacementDisintegrated) { + TCheckIntegrityEnvMirror3of4 check; + + THashSet<TVDiskID> errorDisks; + for (ui32 i = 5; i < 8; ++i) { + errorDisks.insert(check.VDisks[i]); + } + + check.Env.Runtime->FilterFunction = [&](ui32, std::unique_ptr<IEventHandle>& ev) { + return check.InjectError(NKikimrProto::ERROR, errorDisks, ev); + }; + + auto result = check.Request(); + UNIT_ASSERT(result->Status == NKikimrProto::ERROR); + Cerr << result->ErrorReason << Endl; + } } |
