diff options
author | alexvru <alexvru@ydb.tech> | 2023-07-04 21:28:22 +0300 |
---|---|---|
committer | alexvru <alexvru@ydb.tech> | 2023-07-04 21:28:22 +0300 |
commit | a0221c687e0fbaf81ce4114dfa86473f6b38df10 (patch) | |
tree | 00ba733c6577bd3524e537f1305d101233b23dee | |
parent | c256b7af4712afd39574801143bec23240865aef (diff) | |
download | ydb-a0221c687e0fbaf81ce4114dfa86473f6b38df10.tar.gz |
Improve block-4-2 restoration
22 files changed, 934 insertions, 471 deletions
diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_blackboard.cpp b/ydb/core/blobstorage/dsproxy/dsproxy_blackboard.cpp index 00cdea5ab4..6114d155b7 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_blackboard.cpp +++ b/ydb/core/blobstorage/dsproxy/dsproxy_blackboard.cpp @@ -12,13 +12,10 @@ void TBlobState::TState::AddResponseData(ui32 fullSize, ui32 shift, TRope&& data Y_VERIFY(size); Y_VERIFY(shift < fullSize && size <= fullSize - shift); Data.Write(shift, std::move(data)); - // Mark the interval as present in the Data buffer - Here.Add(shift, shift + size); } void TBlobState::TState::AddPartToPut(TRope&& partData) { Y_VERIFY(partData); - Here.Assign(0, partData.size()); Data.SetMonolith(std::move(partData)); } @@ -42,7 +39,6 @@ void TBlobState::Init(const TLogoBlobID &id, const TBlobStorageGroupInfo &info) void TBlobState::AddNeeded(ui64 begin, ui64 size) { Y_VERIFY(bool(Id)); Whole.Needed.Add(begin, begin + size); - Whole.NotHere.Add(begin, begin + size); IsChanged = true; } @@ -61,44 +57,35 @@ void TBlobState::MarkBlobReadyToPut(ui8 blobIdx) { } bool TBlobState::Restore(const TBlobStorageGroupInfo &info) { - TIntervalVec<i32> fullBlobInterval(0, Id.BlobSize()); - if (fullBlobInterval.IsSubsetOf(Whole.Here)) { + const TIntervalVec<i32> fullBlobInterval(0, Id.BlobSize()); + const TIntervalSet<i32> here = Whole.Here(); + Y_VERIFY_DEBUG((here - fullBlobInterval).IsEmpty()); // ensure no excessive data outsize blob's boundaries + if (fullBlobInterval.IsSubsetOf(here)) { // we already have 'whole' part, no need for restoration return true; } - const ui32 parts = info.Type.TotalPartCount(); + TStackVec<TRope, TypicalPartsInBlob> parts(info.Type.TotalPartCount()); ui32 partsPresent = 0; - for (ui32 i = 0; i < parts; ++i) { + for (ui32 i = 0; i < parts.size(); ++i) { if (const ui32 partSize = info.Type.PartSize(TLogoBlobID(Id, i + 1))) { - if (TIntervalVec<i32>(0, partSize).IsSubsetOf(Parts[i].Here)) { - ++partsPresent; - } - } - } - if (partsPresent < info.Type.MinimalRestorablePartCount()) { - return false; - } - - TDataPartSet partSet; - partSet.Parts.resize(parts); - for (ui32 i = 0; i < parts; ++i) { - if (const ui32 partSize = info.Type.PartSize(TLogoBlobID(Id, i + 1))) { - if (TIntervalVec<i32>(0, partSize).IsSubsetOf(Parts[i].Here)) { - partSet.PartsMask |= (1 << i); - TRope data(MakeIntrusive<TRopeSharedDataBackend>(TSharedData::Uninitialized(partSize))); - Parts[i].Data.Read(0, data.UnsafeGetContiguousSpanMut().data(), partSize); - partSet.Parts[i].ReferenceTo(data); + const TIntervalVec<i32> fullPartInterval(0, partSize); + const TIntervalSet<i32> partHere = Parts[i].Here(); + Y_VERIFY_DEBUG((partHere - fullPartInterval).IsEmpty()); // ensure no excessive part data outside boundaries + if (fullPartInterval.IsSubsetOf(partHere)) { + parts[i] = Parts[i].Data.Read(0, partSize); + if (++partsPresent >= info.Type.MinimalRestorablePartCount()) { + TRope whole; + ErasureRestore((TErasureType::ECrcMode)Id.CrcMode(), info.Type, Id.BlobSize(), &whole, parts, 0, 0, false); + Y_VERIFY(whole.size() == Id.BlobSize()); + Whole.Data.SetMonolith(std::move(whole)); + Y_VERIFY_DEBUG(Whole.Here() == fullBlobInterval); + return true; + } } } } - partSet.FullDataSize = Id.BlobSize(); - TRope whole; - info.Type.RestoreData((TErasureType::ECrcMode)Id.CrcMode(), partSet, whole, false, true, false); - Whole.Data.Write(0, whole.GetContiguousSpan().data(), Id.BlobSize()); - Whole.Here.Add(fullBlobInterval); - Whole.NotHere.Subtract(fullBlobInterval); - return true; + return false; } void TBlobState::AddResponseData(const TBlobStorageGroupInfo &info, const TLogoBlobID &id, ui32 orderNumber, @@ -315,7 +302,7 @@ TString TBlobState::TDiskPart::ToString() const { TString TBlobState::TState::ToString() const { TStringStream str; str << "{Data# " << Data.Print(); - str << " Here# " << Here.ToString(); + str << " Here# " << Here().ToString(); str << "}"; return str.Str(); } @@ -323,9 +310,9 @@ TString TBlobState::TState::ToString() const { TString TBlobState::TWholeState::ToString() const { TStringStream str; str << "{Data# " << Data.Print(); - str << " Here# " << Here.ToString(); + str << " Here# " << Here().ToString(); str << " Needed# " << Needed.ToString(); - str << " NotHere# " << NotHere.ToString(); + str << " NotHere# " << NotHere().ToString(); str << "}"; return str.Str(); } diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_blackboard.h b/ydb/core/blobstorage/dsproxy/dsproxy_blackboard.h index 09f8093864..d61e250f32 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_blackboard.h +++ b/ydb/core/blobstorage/dsproxy/dsproxy_blackboard.h @@ -48,16 +48,17 @@ struct TBlobState { }; struct TState { TFragmentedBuffer Data; - TIntervalSet<i32> Here; // Present in the Data buffer void AddResponseData(ui32 fullSize, ui32 shift, TRope&& data); void AddPartToPut(TRope&& partData); TString ToString() const; + TIntervalSet<i32> Here() const { return Data.GetIntervalSet(); } }; struct TWholeState : TState { TIntervalSet<i32> Needed; // Requested by the external user - TIntervalSet<i32> NotHere; // Requested by the external user, but not present in the Data buffer yet + TString ToString() const; + TIntervalSet<i32> NotHere() const { return Needed - Here(); } }; struct TDiskPart { TIntervalSet<i32> Requested; @@ -232,4 +233,11 @@ struct TBlackboard { TBlobState& operator [](const TLogoBlobID& id); }; +inline bool RestoreWholeFromMirror(TBlobState& state) { + for (const TBlobState::TState& part : state.Parts) { + state.Whole.Data.CopyFrom(part.Data, part.Here() & state.Whole.NotHere()); + } + return !state.Whole.NotHere(); +} + }//NKikimr diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_get_impl.cpp b/ydb/core/blobstorage/dsproxy/dsproxy_get_impl.cpp index 373fe52984..ca277ffb8b 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_get_impl.cpp +++ b/ydb/core/blobstorage/dsproxy/dsproxy_get_impl.cpp @@ -206,10 +206,12 @@ TString TGetImpl::DumpFullState() const { str << Endl; str << " ReportDetailedPartMap# " << ReportDetailedPartMap; str << Endl; - str << " ForceBlockTabletId# " << ForceBlockTabletData->Id; - str << Endl; - str << " ForceBlockTabletGeneration# " << ForceBlockTabletData->Generation; - str << Endl; + if (ForceBlockTabletData) { + str << " ForceBlockTabletId# " << ForceBlockTabletData->Id; + str << Endl; + str << " ForceBlockTabletGeneration# " << ForceBlockTabletData->Generation; + str << Endl; + } str << " ReplyBytes# " << ReplyBytes; str << Endl; diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_put.cpp b/ydb/core/blobstorage/dsproxy/dsproxy_put.cpp index 6edf7d4d83..1b7d434371 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_put.cpp +++ b/ydb/core/blobstorage/dsproxy/dsproxy_put.cpp @@ -38,7 +38,7 @@ class TBlobStorageGroupPutRequest : public TBlobStorageGroupRequestActor<TBlobSt ui32 CurrentEncryptionOffset = 0; ui32 BlobsSplit = 0; TErasureSplitContext ErasureSplitContext = TErasureSplitContext::Init(MaxBytesToSplitAtOnce); - TBatchedVec<TStackVec<TRope, 8>> PartSets; + TBatchedVec<TStackVec<TRope, TypicalPartsInBlob>> PartSets; TStackVec<ui64, TypicalDisksInGroup> WaitingVDiskResponseCount; ui64 WaitingVDiskCount = 0; diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_put_impl.h b/ydb/core/blobstorage/dsproxy/dsproxy_put_impl.h index 9e6392ac15..e3259263bf 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_put_impl.h +++ b/ydb/core/blobstorage/dsproxy/dsproxy_put_impl.h @@ -171,7 +171,7 @@ public: } template <typename TVPutEvent> - void GenerateInitialRequests(TLogContext &logCtx, TBatchedVec<TStackVec<TRope, 8>>& partSets, + void GenerateInitialRequests(TLogContext &logCtx, TBatchedVec<TStackVec<TRope, TypicalPartsInBlob>>& partSets, TDeque<std::unique_ptr<TVPutEvent>> &outVPuts) { Y_UNUSED(logCtx); Y_VERIFY_S(partSets.size() == Blobs.size(), "partSets.size# " << partSets.size() diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_strategy_base.cpp b/ydb/core/blobstorage/dsproxy/dsproxy_strategy_base.cpp index 0a134b1d57..223ae27d90 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_strategy_base.cpp +++ b/ydb/core/blobstorage/dsproxy/dsproxy_strategy_base.cpp @@ -281,7 +281,6 @@ void TStrategyBase::PreparePutsForPartPlacement(TLogContext &logCtx, TBlobState const ui32 partIdx = record.PartIdx; Y_VERIFY_DEBUG(partIdx < state.Parts.size()); auto& part = state.Parts[partIdx]; - Y_VERIFY_DEBUG(part.Data.GetIntervalSet() == part.Here); if (!part.Data.IsMonolith() || part.Data.GetMonolith().size() != info.Type.PartSize(TLogoBlobID(state.Id, partIdx + 1))) { isPartsAvailable = false; break; @@ -291,10 +290,9 @@ void TStrategyBase::PreparePutsForPartPlacement(TLogContext &logCtx, TBlobState if (!isPartsAvailable) { // Prepare new put request set TIntervalSet<i32> fullInterval(0, state.Id.BlobSize()); - Y_VERIFY_DEBUG(state.Whole.Data.GetIntervalSet() == state.Whole.Here); - Y_VERIFY(fullInterval == state.Whole.Here, "Can't put unrestored blob! Unexpected blob state# %s", state.ToString().c_str()); + Y_VERIFY(fullInterval == state.Whole.Here(), "Can't put unrestored blob! Unexpected blob state# %s", state.ToString().c_str()); - TStackVec<TRope, 8> partData(info.Type.TotalPartCount()); + TStackVec<TRope, TypicalPartsInBlob> partData(info.Type.TotalPartCount()); ErasureSplit((TErasureType::ECrcMode)state.Id.CrcMode(), info.Type, state.Whole.Data.Read(0, state.Id.BlobSize()), partData); @@ -305,7 +303,6 @@ void TStrategyBase::PreparePutsForPartPlacement(TLogContext &logCtx, TBlobState state.AddPartToPut(partIdx, std::move(partData[partIdx])); Y_VERIFY(part.Data.IsMonolith()); Y_VERIFY(part.Data.GetMonolith().size() == partSize); - Y_VERIFY(part.Here == TIntervalSet<i32>(0, partSize)); } } } diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_bold.h b/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_bold.h index 0ea411ef72..5788df96cd 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_bold.h +++ b/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_bold.h @@ -58,7 +58,7 @@ public: if (partSituation == TBlobState::ESituation::Unknown || partSituation == TBlobState::ESituation::Present) { TIntervalSet<i32> fullPartInterval(0, partSize); - fullPartInterval.Subtract(state.Parts[partIdx].Here); + fullPartInterval.Subtract(state.Parts[partIdx].Here()); fullPartInterval.Subtract(disk.DiskParts[partIdx].Requested); if (!fullPartInterval.IsEmpty()) { // TODO(cthulhu): Consider the case when we just need to know that there is a copy diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_m3dc_basic.h b/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_m3dc_basic.h index c6476c941f..3d7d7f9b8b 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_m3dc_basic.h +++ b/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_m3dc_basic.h @@ -28,17 +28,13 @@ namespace NKikimr { switch (diskPart.Situation) { case TBlobState::ESituation::Unknown: { // get the request -- all the needed parts except already got and already requested - TIntervalSet<i32> request(state.Whole.Needed); - request.Subtract(state.Whole.Here); - // remove parts that were already requested, but not yet answered - request.Subtract(diskPart.Requested); - if (!request.IsEmpty()) { + if (const TIntervalSet<i32> request = state.Whole.NotHere() - diskPart.Requested) { TLogoBlobID id(state.Id, partIdx + 1); groupDiskRequests.AddGet(disk.OrderNumber, id, request); diskPart.Requested.Add(request); } else { // ensure that we are waiting for some data to come - Y_VERIFY(!diskPart.Requested.IsEmpty()); + Y_VERIFY(diskPart.Requested); } // return true indicating that we have a request that is not yet satisfied return true; @@ -64,34 +60,8 @@ namespace NKikimr { return EStrategyOutcome::DONE; } - const ui32 totalPartCount = info.Type.TotalPartCount(); - // merge found data parts in our blob - for (ui32 partIdx = 0; partIdx < totalPartCount; ++partIdx) { - const TBlobState::TState& part = state.Parts[partIdx]; - - // check if we can obtain some _new_ data from the part - if (!part.Here.IsSubsetOf(state.Whole.Here)) { - // scan through all the intervals - for (const auto& range : part.Here) { - ui64 begin = range.first; - const ui64 end = range.second; - TIntervalVec<i32> interval(begin, end); - // check if this interval contains some data which is not in state.Whole.Here - if (!interval.IsSubsetOf(state.Whole.Here)) { - char buffer[4096]; - while (begin != end) { - const ui64 len = Min<ui64>(sizeof(buffer), end - begin); - part.Data.Read(begin, buffer, len); - state.Whole.Data.Write(begin, buffer, len); - begin += len; - } - state.Whole.Here.Add(interval); - } - } - } - } - if (state.Whole.Needed.IsSubsetOf(state.Whole.Here)) { + if (RestoreWholeFromMirror(state)) { // we are not going to restore this blob and we have all required data read, so we can exit now state.WholeSituation = TBlobState::ESituation::Present; return EStrategyOutcome::DONE; @@ -131,7 +101,7 @@ namespace NKikimr { } // create an array defining order in which we traverse the disks - TStackVec<ui32, 32> diskIdxList; + TStackVec<ui32, TypicalDisksInGroup> diskIdxList; for (ui32 i = 0; i < state.Disks.size(); ++i) { diskIdxList.push_back(i); } @@ -201,7 +171,7 @@ namespace NKikimr { // we can't finish request now, because the VGet was just issued or still being executed, so we // drop status to UNKNOWN return EStrategyOutcome::IN_PROGRESS; - } else if (!state.Whole.Needed.IsSubsetOf(state.Whole.Here)) { + } else if (!state.Whole.Needed.IsSubsetOf(state.Whole.Here())) { // we haven't requested anything, but there is no required data in buffer, so blob is lost R_LOG_WARN_SX(logCtx, "BPG48", "missing blob# " << state.Id.ToString() << " state# " << state.ToString()); state.WholeSituation = TBlobState::ESituation::Absent; diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_m3dc_restore.h b/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_m3dc_restore.h index 161a1b3f9f..35556793bd 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_m3dc_restore.h +++ b/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_m3dc_restore.h @@ -31,7 +31,8 @@ namespace NKikimr { if (BlobIsPresentAtMainOr2x2(state)) { // everything is in place -- all data is present; generate output blob content and signal success for // this one - CreateOutputBlob(state); + const bool success = RestoreWholeFromMirror(state); + Y_VERIFY(success); state.WholeSituation = TBlobState::ESituation::Present; return EStrategyOutcome::DONE; } @@ -42,9 +43,7 @@ namespace NKikimr { return EStrategyOutcome::IN_PROGRESS; } // Remaining disks may never answer, start the restoration - CreateOutputBlob(state); - // Wait for at least one DATA response - if (!state.Whole.Needed.IsSubsetOf(state.Whole.Here)) { + if (!RestoreWholeFromMirror(state)) { return EStrategyOutcome::IN_PROGRESS; } @@ -108,27 +107,6 @@ namespace NKikimr { return checker.CheckFailModelForSubgroup(beingWaitedFor); } - - static void CreateOutputBlob(TBlobState& state) { - for (const TBlobState::TState& part : state.Parts) { - // calculate the interval of the part buffer we have to copy into the output buffer - TIntervalSet<i32> dataToAdd(part.Here); - dataToAdd.Subtract(state.Whole.Here); - state.Whole.Here.Add(dataToAdd); - for (const auto& range : dataToAdd) { - ui32 begin = range.first; - const ui32 end = range.second; - char buffer[4096]; - while (begin != end) { - const ui64 len = Min<ui64>(sizeof(buffer), end - begin); - part.Data.Read(begin, buffer, len); - state.Whole.Data.Write(begin, buffer, len); - begin += len; - } - } - } - } - static bool BlobIsPresentAtMainOr2x2(const TBlobState& state) { ui32 numPresentRings = 0; ui32 numPresentX2Rings = 0; diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_m3of4.h b/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_m3of4.h index 30abbb2687..818b4ed062 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_m3of4.h +++ b/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_m3of4.h @@ -15,7 +15,7 @@ namespace NKikimr { } // check if the blob is already restored and can be returned to caller - if (state.WholeSituation == TBlobState::ESituation::Present || Restore(state, info)) { + if (state.WholeSituation == TBlobState::ESituation::Present || RestoreWholeFromMirror(state)) { state.WholeSituation = TBlobState::ESituation::Present; Y_VERIFY(state.Whole.Data && state.Whole.Data.GetTotalSize(), "%s", state.ToString().data()); return EStrategyOutcome::DONE; @@ -105,8 +105,8 @@ namespace NKikimr { auto& part = disk.DiskParts[group.PartIdx]; Y_VERIFY(part.Requested.IsEmpty()); // ensure we haven't requested any data yet const TLogoBlobID id(state.Id, group.PartIdx + 1); - groupDiskRequests.AddGet(disk.OrderNumber, id, state.Whole.NotHere); - part.Requested.Add(state.Whole.NotHere); + groupDiskRequests.AddGet(disk.OrderNumber, id, state.Whole.NotHere()); + part.Requested.Add(state.Whole.NotHere()); } } else if (!numRequestedMetadata) { // no metadata was requested, but we need it to make decision -- issue queries to all disks for (auto& disk : state.Disks) { @@ -123,21 +123,6 @@ namespace NKikimr { } private: - bool Restore(TBlobState& state, const TBlobStorageGroupInfo& info) { - const ui32 totalParts = info.Type.TotalPartCount(); - for (ui32 i = 0; i < totalParts; ++i) { - if (const ui32 partSize = info.Type.PartSize(TLogoBlobID(state.Id, i + 1))) { - TBlobState::TState& part = state.Parts[i]; - if (const TIntervalSet<i32> pending = part.Here & state.Whole.NotHere) { - state.Whole.Data.CopyFrom(part.Data, pending); - state.Whole.Here |= pending; - state.Whole.NotHere -= pending; - } - } - } - return !state.Whole.NotHere; - } - bool CouldHaveBeenWritten(TBlobState& state, const TBlobStorageGroupInfo& info) { TBlobStorageGroupInfo::TSubgroupVDisks data(&info.GetTopology()); TBlobStorageGroupInfo::TSubgroupVDisks any(&info.GetTopology()); diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_min_iops_block.h b/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_min_iops_block.h index 9297db7a80..5f4d42f033 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_min_iops_block.h +++ b/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_min_iops_block.h @@ -9,246 +9,127 @@ class TMinIopsBlockStrategy : public TStrategyBase { public: std::optional<EStrategyOutcome> RestoreWholeFromDataParts(TLogContext& /*logCtx*/, TBlobState &state, const TBlobStorageGroupInfo &info) { - TIntervalSet<i32> missing(state.Whole.NotHere); - TString tmp; - for (auto [begin, end] : missing) { - TBlockSplitRange range; - info.Type.BlockSplitRange((TErasureType::ECrcMode)state.Id.CrcMode(), state.Id.BlobSize(), begin, - end, &range); - for (ui32 partIdx = range.BeginPartIdx; partIdx < range.EndPartIdx; ++partIdx) { - TPartOffsetRange &partRange = range.PartRanges[partIdx]; - if (partRange.Begin != partRange.End && partRange.WholeBegin < partRange.WholeEnd) { - if (!state.Parts[partIdx].Here.IsEmpty()) { - TIntervalVec<i32> partInterval(partRange.Begin, partRange.End); - if (partInterval.IsSubsetOf(state.Parts[partIdx].Here)) { - i64 sizeToCopy = partRange.End - partRange.Begin; - if (tmp.size() < (ui64)sizeToCopy) { - tmp = TString::Uninitialized(sizeToCopy); - } - state.Parts[partIdx].Data.Read(partRange.Begin, const_cast<char*>(tmp.data()), sizeToCopy); - Y_VERIFY(partRange.WholeEnd - partRange.WholeBegin == (ui64)sizeToCopy); - state.Whole.Data.Write(partRange.WholeBegin, tmp.data(), sizeToCopy); - state.Whole.Here.Add(partRange.WholeBegin, partRange.WholeEnd); - state.Whole.NotHere.Subtract(partRange.WholeBegin, partRange.WholeEnd); - } - } - } else { - // This is actually possible, for example for 33 byte blob we get: - // part 0: xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx - // part 1: 00000000 00000000 00000000 00000000 - // part 2: 00000000 00000000 00000000 00000000 - // part 3: x0000000 00000000 00000000 00000000 + for (auto [wbegin, wend] : state.Whole.NotHere()) { + TBlockSplitRange r; + info.Type.BlockSplitRange((TErasureType::ECrcMode)state.Id.CrcMode(), state.Id.BlobSize(), wbegin, wend, &r); + for (ui32 i = r.BeginPartIdx; i < r.EndPartIdx; ++i) { + Y_VERIFY(i < info.Type.DataParts()); + TPartOffsetRange& pr = r.PartRanges[i]; + if (pr.Begin == pr.End) { + continue; // this part has no data } + Y_VERIFY_DEBUG(pr.AlignedBegin <= pr.Begin && pr.End <= pr.AlignedEnd); + const TIntervalSet<i32> partRange(pr.Begin, pr.End); + state.Whole.Data.CopyFrom(state.Parts[i].Data, state.Parts[i].Here() & partRange, pr.WholeBegin - pr.Begin); } } - if (state.Whole.NotHere.IsEmpty()) { - state.WholeSituation = TBlobState::ESituation::Present; - return EStrategyOutcome::DONE; + + if (state.Whole.NotHere()) { + return {}; } - return std::nullopt; + + state.WholeSituation = TBlobState::ESituation::Present; + return EStrategyOutcome::DONE; } - std::optional<EStrategyOutcome> RestoreWholeWithErasure(TLogContext& /*logCtx*/, TBlobState &state, - const TBlobStorageGroupInfo &info) { + void RestoreWhateverDataPartsPossible(TBlobState& state, const TBlobStorageGroupInfo& info) { const ui32 totalPartCount = info.Type.TotalPartCount(); - const i32 handoff = info.Type.Handoff(); - ui32 partsMissing = 0; - ui32 responsesPending = 0; - for (ui32 partIdx = 0; partIdx < totalPartCount; ++partIdx) { - bool isMissing = true; - for (i32 niche = -1; niche < handoff; ++niche) { - ui32 diskIdx = (niche < 0 ? partIdx : totalPartCount + niche); - TBlobState::TDisk &disk = state.Disks[diskIdx]; - TBlobState::ESituation partSituation = disk.DiskParts[partIdx].Situation; - TIntervalSet<i32> &requested = disk.DiskParts[partIdx].Requested; - if (partSituation == TBlobState::ESituation::Present) { - isMissing = false; - } - if (!requested.IsEmpty()) { - responsesPending++; - } - } - if (isMissing) { - partsMissing++; - } - } - if (partsMissing > info.Type.ParityParts() && responsesPending > 0) { - return std::nullopt; - } + struct THeapItem { + ui32 PartIdx; + TIntervalSet<i32> Interval; - // get intervals needed to restore the requested full data - TIntervalSet<i32> toRestore; - for (auto it = state.Whole.NotHere.begin(); it != state.Whole.NotHere.end(); ++it) { - auto [begin, end] = *it; - TBlockSplitRange range; - info.Type.BlockSplitRange((TErasureType::ECrcMode)state.Id.CrcMode(), state.Id.BlobSize(), begin, end, &range); - for (ui32 partIdx = range.BeginPartIdx; partIdx < range.EndPartIdx; ++partIdx) { - TPartOffsetRange &partRange = range.PartRanges[partIdx]; - if (partRange.Begin != partRange.End) { - toRestore.Add(partRange.AlignedBegin, partRange.AlignedEnd); - } + std::tuple<ui32, ui32> GetRange() const { + Y_VERIFY_DEBUG(Interval); + return *Interval.begin(); } - } - ui32 partsWithEnoughData = 0; - for (ui32 partIdx = 0; partIdx < totalPartCount; ++partIdx) { - if (toRestore.IsSubsetOf(state.Parts[partIdx].Here)) { - partsWithEnoughData++; + bool operator <(const THeapItem& x) const { return x.GetRange() < GetRange(); } + }; + + TStackVec<THeapItem, TypicalPartsInBlob> heap; + for (ui32 i = 0; i < totalPartCount; ++i) { + if (TIntervalSet<i32> here = state.Parts[i].Here()) { + heap.push_back(THeapItem{i, std::move(here)}); } } - if (partsWithEnoughData < info.Type.MinimalRestorablePartCount()) { - return std::nullopt; - } - - // We have enough parts for each interval needed and we can restore all the missing whole intervals + std::make_heap(heap.begin(), heap.end()); + while (heap) { + ui32 alignedBegin; + ui32 alignedEnd; - const ui32 partSize = info.Type.PartSize(state.Id); - - // Gather part ranges that need to be restored - TIntervalSet<i32> partIntervals; - for (auto it = state.Whole.NotHere.begin(); it != state.Whole.NotHere.end(); ++it) { - auto [begin, end] = *it; // missing - TBlockSplitRange range; - info.Type.BlockSplitRange((TErasureType::ECrcMode)state.Id.CrcMode(), state.Id.BlobSize(), begin, end, &range); - for (ui32 partIdx = range.BeginPartIdx; partIdx < range.EndPartIdx; ++partIdx) { - TPartOffsetRange &partRange = range.PartRanges[partIdx]; - if (partRange.Begin != partRange.End) { - partIntervals.Add(partRange.AlignedBegin, partRange.AlignedEnd); + // collect all intervals with this common beginning aligned offset + auto iter = heap.end(); + while (iter != heap.begin()) { + auto [iterBegin, iterEnd] = heap.front().GetRange(); + const ui32 minBlockSize = info.Type.MinimalBlockSize() / info.Type.DataParts(); + // align beginning offset up and ending offset down + iterBegin += minBlockSize - 1; + iterBegin -= iterBegin % minBlockSize; + iterEnd -= iterEnd % minBlockSize; + if (iter != heap.end() && alignedBegin != iterBegin) { + alignedEnd = Min(alignedEnd, iterBegin); + break; } + std::tie(alignedBegin, alignedEnd) = std::make_tuple(iterBegin, + Min(iter != heap.end() ? alignedEnd : Max<ui32>(), iterEnd)); + std::pop_heap(heap.begin(), iter--); } - } - //Cerr << "partIntervals intersected# " << partIntervals.ToString() << Endl; - // Obtain part offsets - TBlockSplitRange wholeRange; - // Cerr << "Missing begin# " << missingBegin << " end# " << missingEnd << Endl; - info.Type.BlockSplitRange((TErasureType::ECrcMode)state.Id.CrcMode(), state.Id.BlobSize(), - 0, state.Id.BlobSize(), &wholeRange); + // [iter, heap.end()) range now contains all parts that we may possibly include in restoration; do it + if (heap.end() - iter >= info.Type.MinimalRestorablePartCount() && alignedBegin != alignedEnd) { + ui32 restoreMask = (1 << info.Type.DataParts()) - 1; - // Loop through intervals needed to restore the requested full data - bool isFallback = false; - { - TDataPartSet partSet; - partSet.Parts.resize(totalPartCount); - i64 maxSize = 0; - for (auto it = partIntervals.begin(); it != partIntervals.end(); ++it) { - auto [begin, end] = *it; - i64 size = end - begin; - maxSize = Max(size, maxSize); - } - for (ui32 i = 0; i < totalPartCount; ++i) { - TString tmp = TString::Uninitialized(maxSize); - partSet.Parts[i].ReferenceTo(tmp, 0, maxSize, partSize); - } - partSet.FullDataSize = state.Id.BlobSize(); - partSet.IsFragment = true; - for (auto it = partIntervals.begin(); it != partIntervals.end(); ++it) { - auto [alignedBegin, alignedEnd] = *it; - TIntervalVec<i32> needRange(alignedBegin, alignedEnd); - i32 needSize = alignedEnd - alignedBegin; - if (info.Type.ErasureFamily() == TErasureType::ErasureParityBlock) { - partSet.PartsMask = 0; - for (ui32 i = 0; i < totalPartCount; ++i) { - partSet.Parts[i].ReferenceTo(partSet.Parts[i].OwnedString, alignedBegin, needSize, partSize); - if (needRange.IsSubsetOf(state.Parts[i].Here)) { - partSet.PartsMask |= (1 << i); - // Read part pieces to those strings - state.Parts[i].Data.Read(alignedBegin, partSet.Parts[i].GetDataAt(alignedBegin), needSize); - } - } - // Cerr << Endl << "Restore block data parts" << Endl; - // Restore missing part piece - info.Type.RestoreData((TErasureType::ECrcMode)state.Id.CrcMode(), partSet, true, false, false); - // Write it back to the present set + // fetch some data parts + const TIntervalSet<i32> alignedInterval(alignedBegin, alignedEnd); + TStackVec<TRope, TypicalPartsInBlob> parts(totalPartCount); + for (auto i = iter; i != heap.end(); ++i) { + Y_VERIFY(alignedInterval.IsSubsetOf(state.Parts[i->PartIdx].Here())); + parts[i->PartIdx] = state.Parts[i->PartIdx].Data.Read(alignedBegin, alignedEnd - alignedBegin); + restoreMask &= ~(1 << i->PartIdx); // don't restore part we already have + } - for (ui32 partIdx = wholeRange.BeginPartIdx; partIdx < wholeRange.EndPartIdx; ++partIdx) { - TPartOffsetRange &partRange = wholeRange.PartRanges[partIdx]; - i32 begin = Max<i32>(partRange.Begin, alignedBegin); - i32 end = Min<i32>(partRange.End, alignedEnd); - if (begin < end) { - i32 offset = partRange.WholeBegin + begin - partRange.Begin; - TIntervalVec<i32> x(offset, offset + end - begin); - if (!x.IsSubsetOf(state.Whole.Here)) { - // Cerr << "copy from# " << partRange.WholeBegin << " to# " << partRange.WholeEnd << Endl; - state.Whole.Data.Write(offset, partSet.Parts[partIdx].GetDataAt(begin), end - begin); - state.Whole.Here.Add(offset, offset + end - begin); - state.Whole.NotHere.Subtract(offset, offset + end - begin); - } - } - } - } else { - /* - for (ui32 i = 0; i < totalPartCount; ++i) { - if (needRange.IsSubsetOf(state.Parts[i].Here)) { - partSet.PartsMask |= (1 << i); - TString tmp = TString::Uninitialized(needSize); - // Read part pieces to those strings - state.Parts[i].Data.Read(partRange.AlignedBegin, const_cast<char*>(tmp.Data()), needSize); - partSet.Parts[i].ReferenceTo(tmp, partRange.AlignedBegin, needSize, partSize); + if (restoreMask) { + // restore missing data parts + ErasureRestore((TErasureType::ECrcMode)state.Id.CrcMode(), info.Type, state.Id.BlobSize(), + nullptr, parts, restoreMask, alignedBegin, true); + + // put them back + for (ui32 i = 0; i < info.Type.DataParts(); ++i) { + if (restoreMask >> i & 1) { + Y_VERIFY_DEBUG_S(alignedBegin + parts[i].size() == alignedEnd, "alignedBegin# " << alignedBegin + << " alignedEnd# " << alignedEnd << " partSize# " << parts[i].size()); + state.Parts[i].Data.Write(alignedBegin, std::move(parts[i])); } } - partSet.FullDataSize = state.Id.BlobSize(); - ui64 wholePieceAlignedSize = partRange.WholeEnd - partRange.AlignedWholeBegin; - TString wholePiece = TString::Uninitialized(wholePieceAlignedSize); - partSet.FullDataFragment.ReferenceTo(wholePiece, partRange.AlignedWholeBegin, - wholePieceAlignedSize, state.Id.BlobSize()); - */ - // Restore whole blob piece - // Write it back to the present set - //state.Whole.Data.Write(partRange.AlignedWholeBegin, wholePiece.Data(), wholePieceAlignedSize); - //state.Whole.Here.Add(partRange.AlignedWholeBegin, partRange.WholeEnd); - //state.Whole.NotHere.Subtract(partRange.AlignedWholeBegin, partRange.WholeEnd); - isFallback = true; } } - } - if (isFallback) { - // Cerr << "Fallback" << Endl; - TDataPartSet partSet; - partSet.Parts.resize(totalPartCount); - for (ui32 i = 0; i < totalPartCount; ++i) { - if (toRestore.IsSubsetOf(state.Parts[i].Here)) { - partSet.PartsMask |= (1 << i); - TString tmp = TString::Uninitialized(partSize); - for (auto it = toRestore.begin(); it != toRestore.end(); ++it) { - auto [begin, end] = *it; - i32 offset = begin; - i32 size = end - begin; - // Cerr << "part# " << i << " partSize# " << partSize << " offset# " << offset << " size# " << size << Endl; - state.Parts[i].Data.Read(offset, const_cast<char*>(tmp.data()) + offset, size); - } - partSet.Parts[i].ReferenceTo(tmp); + // return items back to heap + TIntervalSet<i32> interval(0, alignedEnd); + while (iter != heap.end()) { + iter->Interval -= interval; + if (iter->Interval) { + std::push_heap(heap.begin(), ++iter); + } else { + std::swap(*iter, heap.back()); + heap.pop_back(); } } - partSet.FullDataSize = state.Id.BlobSize(); - TRope whole; - info.Type.RestoreData((TErasureType::ECrcMode)state.Id.CrcMode(), partSet, whole, false, true, false); - - TIntervalSet<i32> missing(state.Whole.NotHere); - for (auto it = missing.begin(); it != missing.end(); ++it) { - auto [begin, end] = *it; - i32 offset = begin; - i32 size = end - begin; - const char *source = whole.GetContiguousSpan().data() + offset; - // Cerr << "LINE " << __LINE__ << " copy whole [" << offset << ", " << (offset + size) << ")" << Endl; - state.Whole.Data.Write(offset, source, size); - state.Whole.Here.Add(offset, offset + size); - state.Whole.NotHere.Subtract(offset, offset + size); - } } - Y_VERIFY(state.Whole.NotHere.IsEmpty()); - state.WholeSituation = TBlobState::ESituation::Present; - return EStrategyOutcome::DONE; + } + + std::optional<EStrategyOutcome> RestoreWholeWithErasure(TLogContext& logCtx, TBlobState &state, + const TBlobStorageGroupInfo &info) { + RestoreWhateverDataPartsPossible(state, info); + return RestoreWholeFromDataParts(logCtx, state, info); } void IssueGetRequestsForMinimal(TLogContext &logCtx, TBlobState &state, const TBlobStorageGroupInfo &info, bool considerSlowAsError, TGroupDiskRequests &groupDiskRequests) { const ui32 totalPartCount = info.Type.TotalPartCount(); const i32 handoff = info.Type.Handoff(); - for (auto it = state.Whole.NotHere.begin(); it != state.Whole.NotHere.end(); ++it) { - auto [begin, end] = *it; + for (auto [begin, end] : state.Whole.NotHere()) { TBlockSplitRange range; info.Type.BlockSplitRange((TErasureType::ECrcMode)state.Id.CrcMode(), state.Id.BlobSize(), begin, end, &range); @@ -256,7 +137,7 @@ public: TPartOffsetRange &partRange = range.PartRanges[partIdx]; if (partRange.Begin != partRange.End) { TIntervalSet<i32> partInterval(partRange.AlignedBegin, partRange.AlignedEnd); - partInterval.Subtract(state.Parts[partIdx].Here); + partInterval.Subtract(state.Parts[partIdx].Here()); if (!partInterval.IsEmpty()) { for (i32 niche = -1; niche < handoff; ++niche) { ui32 diskIdx = (niche < 0 ? partIdx : totalPartCount + niche); @@ -284,8 +165,7 @@ public: const ui32 totalPartCount = info.Type.TotalPartCount(); const i32 handoff = info.Type.Handoff(); bool isMinimalPossible = true; - for (auto it = state.Whole.NotHere.begin(); it != state.Whole.NotHere.end(); ++it) { - auto [begin, end] = *it; + for (auto [begin, end] : state.Whole.NotHere()) { TBlockSplitRange range; info.Type.BlockSplitRange((TErasureType::ECrcMode)state.Id.CrcMode(), state.Id.BlobSize(), begin, end, &range); @@ -293,7 +173,7 @@ public: TPartOffsetRange &partRange = range.PartRanges[partIdx]; if (partRange.Begin != partRange.End) { TIntervalVec<i32> partInterval(partRange.AlignedBegin, partRange.AlignedEnd); - if (!partInterval.IsSubsetOf(state.Parts[partIdx].Here)) { + if (!partInterval.IsSubsetOf(state.Parts[partIdx].Here())) { bool isThereAGoodPart = false; for (i32 niche = -1; niche < handoff; ++niche) { ui32 diskIdx = (niche < 0 ? partIdx : totalPartCount + niche); @@ -319,8 +199,7 @@ public: void FillIntervalsToRestoreRequestedFullData(TBlobState &state, const TBlobStorageGroupInfo &info, TIntervalSet<i32> &outToRestore) { // get intervals needed to restore the requested full data - for (auto it = state.Whole.NotHere.begin(); it != state.Whole.NotHere.end(); ++it) { - auto [begin, end] = *it; + for (auto [begin, end] : state.Whole.NotHere()) { TBlockSplitRange range; info.Type.BlockSplitRange((TErasureType::ECrcMode)state.Id.CrcMode(), state.Id.BlobSize(), begin, end, &range); @@ -380,7 +259,7 @@ public: if (partSituation == TBlobState::ESituation::Unknown || partSituation == TBlobState::ESituation::Present) { TIntervalSet<i32> partIntervals(toRestore); - partIntervals.Subtract(state.Parts[partIdx].Here); + partIntervals.Subtract(state.Parts[partIdx].Here()); partIntervals.Subtract(disk.DiskParts[partIdx].Requested); if (!partIntervals.IsEmpty()) { // TODO(cthulhu): consider the case when we just need to know that there is a copy diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_min_iops_mirror.h b/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_min_iops_mirror.h index 4ec3edaf93..37197acd92 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_min_iops_mirror.h +++ b/ydb/core/blobstorage/dsproxy/dsproxy_strategy_get_min_iops_mirror.h @@ -8,28 +8,13 @@ namespace NKikimr { class TMinIopsMirrorStrategy : public TStrategyBase { public: std::optional<EStrategyOutcome> RestoreWholeFromDataParts(TLogContext& /*logCtx*/, TBlobState &state, - const TBlobStorageGroupInfo &info) { - TIntervalSet<i32> missing(state.Whole.NotHere); - const ui32 totalPartCount = info.Type.TotalPartCount(); - for (auto it = missing.begin(); it != missing.end(); ++it) { - auto [begin, end] = *it; - for (ui32 partIdx = 0; partIdx < totalPartCount; ++partIdx) { - TIntervalVec<i32> partInterval(begin, end); - if (partInterval.IsSubsetOf(state.Parts[partIdx].Here)) { - TString tmp = TString::Uninitialized(end - begin); - Y_VERIFY(tmp.size()); - state.Parts[partIdx].Data.Read(begin, const_cast<char*>(tmp.data()), tmp.size()); - state.Whole.Data.Write(begin, tmp.data(), end - begin); - state.Whole.Here.Add(begin, end); - state.Whole.NotHere.Subtract(begin, end); - } - } - } - if (state.Whole.NotHere.IsEmpty()) { + const TBlobStorageGroupInfo& /*info*/) { + if (RestoreWholeFromMirror(state)) { state.WholeSituation = TBlobState::ESituation::Present; return EStrategyOutcome::DONE; + } else { + return {}; } - return std::nullopt; } EStrategyOutcome Process(TLogContext &logCtx, TBlobState &state, const TBlobStorageGroupInfo &info, @@ -55,12 +40,11 @@ public: const ui32 totalPartCount = info.Type.TotalPartCount(); const i32 handoff = info.Type.Handoff(); bool isMinimalPossible = true; - for (auto it = state.Whole.NotHere.begin(); it != state.Whole.NotHere.end(); ++it) { - auto [begin, end] = *it; + for (auto [begin, end] : state.Whole.NotHere()) { bool isThereAGoodPart = false; for (ui32 partIdx = 0; partIdx < totalPartCount; ++partIdx) { TIntervalSet<i32> partInterval(begin, end); - partInterval.Subtract(state.Parts[partIdx].Here); + partInterval.Subtract(state.Parts[partIdx].Here()); if (!partInterval.IsEmpty()) { for (i32 niche = -1; niche < handoff; ++niche) { ui32 diskIdx = (niche < 0 ? partIdx : totalPartCount + niche); @@ -78,11 +62,10 @@ public: } } if (isMinimalPossible) { - for (auto it = state.Whole.NotHere.begin(); it != state.Whole.NotHere.end(); ++it) { - auto [begin, end] = *it; + for (auto [begin, end] : state.Whole.NotHere()) { for (ui32 partIdx = 0; partIdx < totalPartCount; ++partIdx) { TIntervalSet<i32> partInterval(begin, end); - partInterval.Subtract(state.Parts[partIdx].Here); + partInterval.Subtract(state.Parts[partIdx].Here()); if (!partInterval.IsEmpty()) { for (i32 niche = -1; niche < handoff; ++niche) { ui32 diskIdx = (niche < 0 ? partIdx : totalPartCount + niche); diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_strategy_put_m3of4.h b/ydb/core/blobstorage/dsproxy/dsproxy_strategy_put_m3of4.h index 4a48e10224..a93b4cb11d 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_strategy_put_m3of4.h +++ b/ydb/core/blobstorage/dsproxy/dsproxy_strategy_put_m3of4.h @@ -46,7 +46,7 @@ protected: } } const TIntervalVec<i32> interval(0, state.Id.BlobSize()); - Y_VERIFY(interval.IsSubsetOf(state.Whole.Here), "missing blob data State# %s", state.ToString().data()); + Y_VERIFY(interval.IsSubsetOf(state.Whole.Here()), "missing blob data State# %s", state.ToString().data()); std::array<TRope, 3> parts; ErasureSplit((TErasureType::ECrcMode)state.Id.CrcMode(), info.Type, state.Whole.Data.Read(0, state.Id.BlobSize()), parts); diff --git a/ydb/core/blobstorage/dsproxy/ut/dsproxy_get_ut.cpp b/ydb/core/blobstorage/dsproxy/ut/dsproxy_get_ut.cpp index 85e16c2aef..59f0c116c3 100644 --- a/ydb/core/blobstorage/dsproxy/ut/dsproxy_get_ut.cpp +++ b/ydb/core/blobstorage/dsproxy/ut/dsproxy_get_ut.cpp @@ -132,9 +132,9 @@ Y_UNIT_TEST(TestBlock42GetIntervalsAllOk) { TestIntervalsAndCrcAllOk(TErasureType::Erasure4Plus2Block, false, false); } -Y_UNIT_TEST(TestBlock42GetIntervalsAllOkVerbose) { - TestIntervalsAndCrcAllOk(TErasureType::Erasure4Plus2Block, true, false); -} +//Y_UNIT_TEST(TestBlock42GetIntervalsAllOkVerbose) { +// TestIntervalsAndCrcAllOk(TErasureType::Erasure4Plus2Block, true, false); +//} Y_UNIT_TEST(TestMirror32GetIntervalsAllOk) { TestIntervalsAndCrcAllOk(TErasureType::ErasureMirror3Plus2, false, false); @@ -145,9 +145,9 @@ Y_UNIT_TEST(TestBlock42GetBlobCrcCheck) { TestIntervalsAndCrcAllOk(TErasureType::Erasure4Plus2Block, false, true); } -Y_UNIT_TEST(TestBlock42GetBlobCrcCheckVerbose) { - TestIntervalsAndCrcAllOk(TErasureType::Erasure4Plus2Block, true, true); -} +//Y_UNIT_TEST(TestBlock42GetBlobCrcCheckVerbose) { +// TestIntervalsAndCrcAllOk(TErasureType::Erasure4Plus2Block, true, true); +//} Y_UNIT_TEST(TestMirror32GetBlobCrcCheck) { TestIntervalsAndCrcAllOk(TErasureType::ErasureMirror3Plus2, false, true); @@ -456,39 +456,7 @@ private: } }; -void TestIntervalsWipedAllOk(TErasureType::EErasureSpecies erasureSpecies, bool isVerboseNoDataEnabled = false) { - TActorSystemStub actorSystemStub; - - const ui32 groupId = 0; - TBlobStorageGroupType groupType(erasureSpecies); - const ui32 domainCount = groupType.BlobSubgroupSize(); - - TVector<ui64> queryCounts = {1, 2, 3, 13, 34}; - - for (bool isRestore : {false, true}) { - for (ui32 generateMode = 0; generateMode < 2; ++generateMode) { - for (ui64 wiped1 = 0; wiped1 < domainCount; ++wiped1) { - for (ui64 wiped2 = 0; wiped2 <= wiped1; ++wiped2) { - ui64 maxErrorMask = (wiped1 == wiped2 ? 4 : 24); - for (ui64 errorMask = 0; errorMask <= maxErrorMask; ++errorMask) { - ui64 error1 = errorMask % 5; - ui64 error2 = errorMask / 5; - TTestWipedAllOkStep testStep( - groupId, erasureSpecies, domainCount, queryCounts, - isVerboseNoDataEnabled, isRestore); - testStep.SetGenerateBlobsMode(generateMode); - testStep.Init(); - testStep.AddWipedVDisk(wiped1, error1); - testStep.AddWipedVDisk(wiped2, error2); - testStep.Run(false); - } - } - } - } - } -} - -void TestIntervalsWipedAllOkVMultiPut(TErasureType::EErasureSpecies erasureSpecies, bool isVerboseNoDataEnabled = false) { +void TestIntervalsWipedAllOk(TErasureType::EErasureSpecies erasureSpecies, bool isVerboseNoDataEnabled, bool multiput) { TActorSystemStub actorSystemStub; const ui32 groupId = 0; @@ -512,7 +480,7 @@ void TestIntervalsWipedAllOkVMultiPut(TErasureType::EErasureSpecies erasureSpeci testStep.Init(); testStep.AddWipedVDisk(wiped1, error1); testStep.AddWipedVDisk(wiped2, error2); - testStep.Run(true); + testStep.Run(multiput); } } } @@ -1233,29 +1201,17 @@ void TestWipedErrorWithTwoBlobs(TErasureType::EErasureSpecies erasureSpecies, bo } Y_UNIT_TEST(TestBlock42GetIntervalsWipedAllOk) { - TestIntervalsWipedAllOk(TErasureType::Erasure4Plus2Block); -} - -Y_UNIT_TEST(TestBlock42GetIntervalsWipedAllOkVerbose) { - TestIntervalsWipedAllOk(TErasureType::Erasure4Plus2Block, true); + TestIntervalsWipedAllOk(TErasureType::Erasure4Plus2Block, false, false); } Y_UNIT_TEST(TestBlock42GetIntervalsWipedAllOkVMultiPut) { - TestIntervalsWipedAllOkVMultiPut(TErasureType::Erasure4Plus2Block); -} - -Y_UNIT_TEST(TestBlock42GetIntervalsWipedAllOkVerboseVMultiPut) { - TestIntervalsWipedAllOkVMultiPut(TErasureType::Erasure4Plus2Block, true); + TestIntervalsWipedAllOk(TErasureType::Erasure4Plus2Block, false, true); } Y_UNIT_TEST(TestBlock42GetIntervalsWipedAllOkComparisonVMultiPutAndVPut) { TestIntervalsWipedAllOkComparisonVMultiPutAndVPut(TErasureType::Erasure4Plus2Block); } -Y_UNIT_TEST(TestBlock42GetIntervalsWipedAllOkVerboseComparisonVMultiPutAndVPut) { - TestIntervalsWipedAllOkComparisonVMultiPutAndVPut(TErasureType::Erasure4Plus2Block, true); -} - Y_UNIT_TEST(TestBlock42GetIntervalsWipedError) { TestIntervalsWipedError(TErasureType::Erasure4Plus2Block); } @@ -1265,11 +1221,11 @@ Y_UNIT_TEST(TestBlock42WipedErrorWithTwoBlobs) { } Y_UNIT_TEST(TestMirror32GetIntervalsWipedAllOk) { - TestIntervalsWipedAllOk(TErasureType::ErasureMirror3Plus2); + TestIntervalsWipedAllOk(TErasureType::ErasureMirror3Plus2, false, false); } Y_UNIT_TEST(TestMirror32GetIntervalsWipedAllOkVMultiPut) { - TestIntervalsWipedAllOkVMultiPut(TErasureType::ErasureMirror3Plus2); + TestIntervalsWipedAllOk(TErasureType::ErasureMirror3Plus2, false, true); } Y_UNIT_TEST(TestMirror32GetIntervalsWipedAllOkComparisonVMultiPutAndVPut) { diff --git a/ydb/core/blobstorage/dsproxy/ut/dsproxy_put_ut.cpp b/ydb/core/blobstorage/dsproxy/ut/dsproxy_put_ut.cpp index 79e36d52d6..80b6465237 100644 --- a/ydb/core/blobstorage/dsproxy/ut/dsproxy_put_ut.cpp +++ b/ydb/core/blobstorage/dsproxy/ut/dsproxy_put_ut.cpp @@ -59,7 +59,7 @@ void TestPutMaxPartCountOnHandoff(TErasureType::EErasureSpecies erasureSpecies) char *dataBytes = encryptedData.Detach(); Encrypt(dataBytes, dataBytes, 0, encryptedData.size(), blobId, *group.GetInfo()); - TBatchedVec<TStackVec<TRope, 8>> partSetSingleton(1); + TBatchedVec<TStackVec<TRope, TypicalPartsInBlob>> partSetSingleton(1); partSetSingleton[0].resize(totalParts); ErasureSplit((TErasureType::ECrcMode)blobId.CrcMode(), group.GetInfo()->Type, TRope(encryptedData), partSetSingleton[0]); @@ -165,7 +165,7 @@ struct TTestPutAllOk { TLogContext LogCtx; - TBatchedVec<TStackVec<TRope, 8>> PartSets; + TBatchedVec<TStackVec<TRope, TypicalPartsInBlob>> PartSets; TStackVec<ui32, 16> CheckStack; @@ -418,7 +418,7 @@ Y_UNIT_TEST(TestMirror3dcWith3x3MinLatencyMod) { logCtx.LogAcc.IsLogEnabled = false; const ui32 totalParts = env.Info->Type.TotalPartCount(); - TBatchedVec<TStackVec<TRope, 8>> partSetSingleton(1); + TBatchedVec<TStackVec<TRope, TypicalPartsInBlob>> partSetSingleton(1); partSetSingleton[0].resize(totalParts); TString encryptedData = data; diff --git a/ydb/core/blobstorage/dsproxy/ut/dsproxy_vdisk_mock_ut.h b/ydb/core/blobstorage/dsproxy/ut/dsproxy_vdisk_mock_ut.h index 0675c05952..2f19d39e2c 100644 --- a/ydb/core/blobstorage/dsproxy/ut/dsproxy_vdisk_mock_ut.h +++ b/ydb/core/blobstorage/dsproxy/ut/dsproxy_vdisk_mock_ut.h @@ -144,7 +144,7 @@ public: if (NotYetBlobs.find(it->first) != NotYetBlobs.end()) { outVGetResult.AddResult(NKikimrProto::NOT_YET, it->first, shift, static_cast<ui32>(resultSize), cookie); } else { - auto buffer = TRcBuf::Copy((char*)it->second.data() + rShift, resultSize); + auto buffer = TRcBuf::Copy(it->second.data() + rShift, resultSize); outVGetResult.AddResult(NKikimrProto::OK, it->first, shift, TRope(std::move(buffer)), cookie); } } @@ -180,7 +180,7 @@ private: TString AlphaData(ui32 size) { TString data = TString::Uninitialized(size); - ui8 *p = (ui8*)(void*)data.Detach(); + char *p = data.Detach(); for (ui32 offset = 0; offset < size; ++offset) { p[offset] = (ui8)offset; } @@ -257,9 +257,8 @@ public: shift = Min(shift, id.BlobSize()); ui32 rSize = size ? size : id.BlobSize(); rSize = Min(rSize, id.BlobSize() - shift); - TString result; - result.resize(rSize); - memcpy((void*)result.data(), (char*)(void*)data.data() + shift, rSize); + TString result = TString::Uninitialized(rSize); + memcpy(result.Detach(), data.data() + shift, rSize); return result; } @@ -269,14 +268,15 @@ public: if (size != buffer.size()) { UNIT_ASSERT_VALUES_EQUAL(size, buffer.size()); } - const ui8 *a = (const ui8*)(void*)blob.Data.data(); - const ui8 *b = (const ui8*)(void*)buffer.data(); + const char *a = blob.Data.data(); + const char *b = buffer.data(); + UNIT_ASSERT(shift < blob.Data.size()); + UNIT_ASSERT(shift + size <= blob.Data.size()); + UNIT_ASSERT(size <= buffer.size()); if (memcmp(a + shift, b, size) != 0) { for (ui32 offset = 0; offset < size; ++offset) { - UNIT_ASSERT_VALUES_EQUAL_C((ui32)a[shift + offset], (ui32)b[offset], - "Id# " << id.ToString() << " offset# " << offset - << " shift# " << shift << " s+o# " << (shift + offset) << " size# " << size - << " canonic a# " << (ui32)a[shift+offset] << " actual b# " << (ui32)b[offset]); + UNIT_ASSERT_VALUES_EQUAL_C((ui8)a[shift + offset], (ui8)b[offset], + "Id# " << id.ToString() << " offset# " << offset << " shift# " << shift << " size# " << size); } } } diff --git a/ydb/core/erasure/erasure.h b/ydb/core/erasure/erasure.h index 37b8c31b83..829603c012 100644 --- a/ydb/core/erasure/erasure.h +++ b/ydb/core/erasure/erasure.h @@ -391,6 +391,6 @@ bool ErasureSplit(TErasureType::ECrcMode crcMode, TErasureType erasure, const TR TErasureSplitContext *context = nullptr); void ErasureRestore(TErasureType::ECrcMode crcMode, TErasureType erasure, ui32 fullSize, TRope *whole, - std::span<TRope> parts, ui32 restoreMask); + std::span<TRope> parts, ui32 restoreMask, ui32 offset = 0, bool isFragment = false); } diff --git a/ydb/core/erasure/erasure_new_ut.cpp b/ydb/core/erasure/erasure_new_ut.cpp index 727008b92f..07fda34d9a 100644 --- a/ydb/core/erasure/erasure_new_ut.cpp +++ b/ydb/core/erasure/erasure_new_ut.cpp @@ -92,4 +92,183 @@ Y_UNIT_TEST_SUITE(ErasureBrandNew) { } } + Y_UNIT_TEST(Block42_restore) { + TErasureType erasure(TErasureType::Erasure4Plus2Block); + + for (ui32 length = 1; length < 4096; ++length) { + TString buffer = FastGenDataForLZ4(length, length); + + std::array<TRope, 6> parts; + ErasureSplit(TErasureType::CrcModeNone, TErasureType::Erasure4Plus2Block, TRope(buffer), parts); + + for (i32 i = -1; i < 6; ++i) { + for (i32 j = -1; j < 6; ++j) { + if (i == j) { + continue; + } + std::vector<bool> opts1(1, false); + if (i != -1) { + opts1.push_back(true); + } + std::vector<bool> opts2(1, false); + if (j != -1) { + opts2.push_back(true); + } + + for (bool restoreI : opts1) { + for (bool restoreJ : opts2) { + for (bool needWhole : {true, false}) { + const ui32 clearMask = (i != -1 ? 1 << i : 0) | (j != -1 ? 1 << j : 0); + const ui32 restoreMask = (restoreI ? 1 << i : 0) | (restoreJ ? 1 << j : 0); + if (!needWhole && !restoreMask) { + continue; + } + + TString iter = TStringBuilder() + << "i# " << i + << " j# " << j + << " restoreI# " << restoreI + << " restoreJ# " << restoreJ + << " needWhole# " << needWhole + << " clearMask# " << clearMask + << " restoreMask# " << restoreMask; + + std::array<TRope, 6> copy; + for (ui32 k = 0; k < 6; ++k) { + if (~clearMask >> k & 1) { + copy[k] = parts[k]; + } + } + + TRope whole; + ErasureRestore(TErasureType::CrcModeNone, TErasureType::Erasure4Plus2Block, buffer.size(), + needWhole ? &whole : nullptr, copy, restoreMask, 0, false); + for (ui32 k = 0; k < 6; ++k) { + TString restored = copy[k].ConvertToString(); + TString reference = parts[k].ConvertToString(); + if ((clearMask & ~restoreMask) >> k & 1) { + UNIT_ASSERT(copy[k].size() == parts[k].size() || !copy[k]); + if (copy[k].size() == parts[k].size()) { + UNIT_ASSERT_EQUAL(copy[k], parts[k]); + } + continue; + } + UNIT_ASSERT_VALUES_EQUAL_C(restored.size(), reference.size(), iter << " k# " << k); + for (ui32 z = 0; z < restored.size(); ++z) { + if (restored[z] != reference[z]) { + Cerr << "difference at " << z << Endl; + break; + } + } + UNIT_ASSERT_EQUAL_C(restored, reference, iter << " k# " << k); + } + if (needWhole) { + UNIT_ASSERT_EQUAL_C(whole.ConvertToString(), buffer, iter); + } + + ui32 partLen = parts[0].size(); + + if (!needWhole) { + auto doTry = [&](ui32 offset, ui32 len) { + if (offset < partLen && len <= partLen - offset) { + TString xiter = TStringBuilder() << iter + << " offset# " << offset << " len# " << len; + std::array<TRope, 6> copy; + for (ui32 k = 0; k < 6; ++k) { + if (~clearMask >> k & 1) { + copy[k] = {parts[k].Position(offset), parts[k].Position(offset + len)}; + UNIT_ASSERT_VALUES_EQUAL_C(copy[k].size(), len, xiter << " k# " << k); + } + } + ErasureRestore(TErasureType::CrcModeNone, TErasureType::Erasure4Plus2Block, + buffer.size(), nullptr, copy, restoreMask, offset, true); + for (ui32 k = 0; k < 6; ++k) { + TString restored = copy[k].ConvertToString(); + TString reference = parts[k].ConvertToString().substr(offset, len); + if ((clearMask & ~restoreMask) >> k & 1) { + UNIT_ASSERT(copy[k].size() == parts[k].size() || !copy[k]); + if (copy[k].size() == parts[k].size()) { + UNIT_ASSERT_EQUAL(copy[k], parts[k]); + } + continue; + } + UNIT_ASSERT_VALUES_EQUAL_C(restored.size(), reference.size(), xiter << " k# " << k); + UNIT_ASSERT_EQUAL_C(restored, reference, xiter << " k# " << k); + } + } + }; + + doTry(0, partLen); + doTry(32, partLen - 32); + doTry(64, partLen - 64); + doTry(0, 32); + doTry(32, 32); + } + } + } + } + } + } + } + } + + Y_UNIT_TEST(Block42_restore_benchmark) { + TErasureType erasure(TErasureType::Erasure4Plus2Block); + + ui64 totalSize = 0; + std::vector<std::tuple<TRope, std::array<TRope, 6>, ui32, ui32>> parts; + std::vector<TDataPartSet> restored1; + std::vector<std::array<TRope, 6>> restored2; + + for (ui32 iter = 0; iter < 10000; ++iter) { + const ui32 length = 1 + RandomNumber(100000u); + TString buffer = FastGenDataForLZ4(length, iter); + std::array<TRope, 6> p; + ErasureSplit(TErasureType::CrcModeNone, TErasureType::Erasure4Plus2Block, TRope(buffer), p); + for (TRope& r : p) { + r.Compact(); + } + parts.emplace_back(TRope(buffer), std::move(p), RandomNumber(6u), RandomNumber(6u)); + totalSize += buffer.size(); + } + + restored1.reserve(parts.size()); + restored2.reserve(parts.size()); + + THPTimer timer; + for (const auto& [rope, p, i, j] : parts) { + TDataPartSet& parts = restored1.emplace_back(); + parts.PartsMask = 63 & ~(1 << i) & ~(1 << j); + parts.FullDataSize = rope.size(); + parts.Parts.resize(6); + for (ui32 k = 0; k < 6; ++k) { + if (k != i && k != j) { + parts.Parts[k].ReferenceTo(p[k]); + } + } + erasure.RestoreData(TErasureType::CrcModeNone, parts, true, false, true); + } + TDuration period1 = TDuration::Seconds(timer.PassedReset()); + + for (const auto& [rope, p, i, j] : parts) { + std::array<TRope, 6>& copy = restored2.emplace_back(); + for (ui32 k = 0; k < 6; ++k) { + if (k != i && k != j) { + copy[k] = p[k]; + } + } + ErasureRestore(TErasureType::CrcModeNone, erasure, rope.size(), nullptr, copy, 1 << i | 1 << j, 0, false); + UNIT_ASSERT_EQUAL(copy, p); + } + TDuration period2 = TDuration::Seconds(timer.PassedReset()); + + Cerr << "totalSize# " << totalSize + << " period1# " << period1 + << " period2# " << period2 + << " MB/s1# " << (1.0 * totalSize / period1.MicroSeconds()) + << " MB/s2# " << (1.0 * totalSize / period2.MicroSeconds()) + << " factor# " << (1.0 * period1.MicroSeconds() / period2.MicroSeconds()) + << Endl; + } + } diff --git a/ydb/core/erasure/erasure_restore.cpp b/ydb/core/erasure/erasure_restore.cpp index 0244b33789..676e04a6e5 100644 --- a/ydb/core/erasure/erasure_restore.cpp +++ b/ydb/core/erasure/erasure_restore.cpp @@ -1,28 +1,564 @@ #include "erasure.h" +#include <library/cpp/pop_count/popcount.h> +#include <util/string/printf.h> namespace NKikimr { - void ErasureRestore(TErasureType::ECrcMode crcMode, TErasureType erasure, ui32 fullSize, TRope *whole, - std::span<TRope> parts, ui32 restoreMask) { +#define RESTORE_BLOCK(RestoreMask) /* all other parts are assumed available */ \ + if (RestoreMask == 5) { \ + const ui64 z0 = a1[0] ^ a1[1] ^ a1[2] ^ a1[3] ^ a3[0] ^ a3[1] ^ a3[2] ^ a4[0] ^ a4[2] ^ a5[2]; \ + const ui64 z1 = a1[0] ^ a3[3] ^ a4[0] ^ a4[1] ^ a4[2] ^ a4[3] ^ a5[0] ^ a5[2] ^ a5[3]; \ + const ui64 z2 = a1[2] ^ a1[3] ^ a3[1] ^ a3[2] ^ a4[0] ^ a4[1] ^ a4[3] ^ a5[0] ^ a5[1] ^ a5[2] ^ a5[3]; \ + const ui64 z3 = a1[0] ^ a1[1] ^ a1[2] ^ a3[0] ^ a3[1] ^ a3[3] ^ a4[1] ^ a5[1] ^ a5[3]; \ + const ui64 z4 = a1[1] ^ a1[2] ^ a1[3] ^ a3[1] ^ a3[2] ^ a4[2] ^ a5[2]; \ + const ui64 z5 = a1[0] ^ a1[1] ^ a3[1] ^ a3[3] ^ a4[0] ^ a4[2] ^ a4[3] ^ a5[0] ^ a5[2] ^ a5[3]; \ + const ui64 z6 = a1[3] ^ a3[1] ^ a4[0] ^ a4[1] ^ a4[2] ^ a4[3] ^ a5[0] ^ a5[1] ^ a5[2] ^ a5[3]; \ + const ui64 z7 = a1[0] ^ a1[1] ^ a1[2] ^ a1[3] ^ a3[0] ^ a3[1] ^ a4[1] ^ a4[3] ^ a5[1] ^ a5[3]; \ + out0[0] = z0; \ + out0[1] = z1; \ + out0[2] = z2; \ + out0[3] = z3; \ + out2[0] = z4; \ + out2[1] = z5; \ + out2[2] = z6; \ + out2[3] = z7; \ + } \ + if (RestoreMask == 1 || RestoreMask == 33) { \ + const ui64 z0 = a1[0] ^ a2[0] ^ a3[0] ^ a4[0]; \ + const ui64 z1 = a1[1] ^ a2[1] ^ a3[1] ^ a4[1]; \ + const ui64 z2 = a1[2] ^ a2[2] ^ a3[2] ^ a4[2]; \ + const ui64 z3 = a1[3] ^ a2[3] ^ a3[3] ^ a4[3]; \ + out0[0] = z0; \ + out0[1] = z1; \ + out0[2] = z2; \ + out0[3] = z3; \ + } \ + if (RestoreMask == 9) { \ + const ui64 z0 = a1[1] ^ a1[2] ^ a2[0] ^ a2[2] ^ a2[3] ^ a4[2] ^ a5[0] ^ a5[2]; \ + const ui64 z1 = a1[1] ^ a1[3] ^ a2[1] ^ a2[2] ^ a4[0] ^ a4[2] ^ a4[3] ^ a5[0] ^ a5[1] ^ a5[2] ^ a5[3]; \ + const ui64 z2 = a1[1] ^ a2[0] ^ a4[0] ^ a4[1] ^ a4[2] ^ a4[3] ^ a5[0] ^ a5[1] ^ a5[3]; \ + const ui64 z3 = a1[0] ^ a1[1] ^ a2[1] ^ a2[2] ^ a2[3] ^ a4[1] ^ a4[3] ^ a5[1]; \ + const ui64 z4 = a1[0] ^ a1[1] ^ a1[2] ^ a2[2] ^ a2[3] ^ a4[0] ^ a4[2] ^ a5[0] ^ a5[2]; \ + const ui64 z5 = a1[3] ^ a2[2] ^ a4[0] ^ a4[1] ^ a4[2] ^ a4[3] ^ a5[0] ^ a5[1] ^ a5[2] ^ a5[3]; \ + const ui64 z6 = a1[1] ^ a1[2] ^ a2[0] ^ a2[2] ^ a4[0] ^ a4[1] ^ a4[3] ^ a5[0] ^ a5[1] ^ a5[3]; \ + const ui64 z7 = a1[0] ^ a1[1] ^ a1[3] ^ a2[1] ^ a2[2] ^ a4[1] ^ a5[1]; \ + out0[0] = z0; \ + out0[1] = z1; \ + out0[2] = z2; \ + out0[3] = z3; \ + out3[0] = z4; \ + out3[1] = z5; \ + out3[2] = z6; \ + out3[3] = z7; \ + } \ + if (RestoreMask == 3) { \ + const ui64 z0 = a2[3] ^ a3[2] ^ a4[0] ^ a4[1] ^ a4[2] ^ a4[3] ^ a5[1] ^ a5[2] ^ a5[3]; \ + const ui64 z1 = a2[0] ^ a2[3] ^ a3[0] ^ a3[2] ^ a3[3] ^ a4[0] ^ a5[0] ^ a5[1]; \ + const ui64 z2 = a2[1] ^ a2[3] ^ a3[0] ^ a3[1] ^ a3[2] ^ a3[3] ^ a4[2] ^ a4[3] ^ a5[3]; \ + const ui64 z3 = a2[2] ^ a2[3] ^ a3[1] ^ a3[3] ^ a4[0] ^ a4[1] ^ a4[2] ^ a5[0] ^ a5[1] ^ a5[2] ^ a5[3]; \ + const ui64 z4 = a2[0] ^ a2[3] ^ a3[0] ^ a3[2] ^ a4[1] ^ a4[2] ^ a4[3] ^ a5[1] ^ a5[2] ^ a5[3]; \ + const ui64 z5 = a2[0] ^ a2[1] ^ a2[3] ^ a3[0] ^ a3[1] ^ a3[2] ^ a3[3] ^ a4[0] ^ a4[1] ^ a5[0] ^ a5[1]; \ + const ui64 z6 = a2[1] ^ a2[2] ^ a2[3] ^ a3[0] ^ a3[1] ^ a3[3] ^ a4[3] ^ a5[3]; \ + const ui64 z7 = a2[2] ^ a3[1] ^ a4[0] ^ a4[1] ^ a4[2] ^ a4[3] ^ a5[0] ^ a5[1] ^ a5[2] ^ a5[3]; \ + out0[0] = z0; \ + out0[1] = z1; \ + out0[2] = z2; \ + out0[3] = z3; \ + out1[0] = z4; \ + out1[1] = z5; \ + out1[2] = z6; \ + out1[3] = z7; \ + } \ + if (RestoreMask == 17) { \ + const ui64 z0 = a3[2] ^ a2[3] ^ a3[1] ^ a2[2] ^ a1[3] ^ a5[0]; \ + const ui64 z1 = a1[0] ^ a3[3] ^ a3[1] ^ a2[2] ^ a1[3] ^ a5[1]; \ + const ui64 z2 = a2[0] ^ a1[1] ^ a3[1] ^ a2[2] ^ a1[3] ^ a5[2]; \ + const ui64 z3 = a3[0] ^ a2[1] ^ a1[2] ^ a3[1] ^ a2[2] ^ a1[3] ^ a5[3]; \ + const ui64 z4 = a1[0] ^ a1[3] ^ a2[0] ^ a2[2] ^ a2[3] ^ a3[0] ^ a3[1] ^ a3[2] ^ a5[0]; \ + const ui64 z5 = a1[0] ^ a1[1] ^ a1[3] ^ a2[1] ^ a2[2] ^ a3[3] ^ a5[1]; \ + const ui64 z6 = a1[1] ^ a1[2] ^ a1[3] ^ a2[0] ^ a3[1] ^ a3[2] ^ a5[2]; \ + const ui64 z7 = a1[2] ^ a2[1] ^ a2[2] ^ a2[3] ^ a3[0] ^ a3[1] ^ a3[3] ^ a5[3]; \ + out0[0] = z0; \ + out0[1] = z1; \ + out0[2] = z2; \ + out0[3] = z3; \ + out4[0] = z4; \ + out4[1] = z5; \ + out4[2] = z6; \ + out4[3] = z7; \ + } \ + if (RestoreMask == 18) { \ + const ui64 z0 = a0[0] ^ a0[1] ^ a2[3] ^ a3[2] ^ a3[3] ^ a5[0] ^ a5[1]; \ + const ui64 z1 = a0[0] ^ a0[2] ^ a2[0] ^ a2[3] ^ a3[2] ^ a5[0] ^ a5[2]; \ + const ui64 z2 = a0[0] ^ a0[3] ^ a2[1] ^ a2[3] ^ a3[0] ^ a3[2] ^ a5[0] ^ a5[3]; \ + const ui64 z3 = a0[0] ^ a3[2] ^ a2[3] ^ a3[1] ^ a2[2] ^ a5[0]; \ + const ui64 z4 = a0[1] ^ a2[0] ^ a2[3] ^ a3[0] ^ a3[2] ^ a3[3] ^ a5[0] ^ a5[1]; \ + const ui64 z5 = a0[0] ^ a0[1] ^ a0[2] ^ a2[0] ^ a2[1] ^ a2[3] ^ a3[1] ^ a3[2] ^ a5[0] ^ a5[2]; \ + const ui64 z6 = a0[0] ^ a0[2] ^ a0[3] ^ a2[1] ^ a2[2] ^ a2[3] ^ a3[0] ^ a5[0] ^ a5[3]; \ + const ui64 z7 = a0[0] ^ a0[3] ^ a2[2] ^ a3[1] ^ a3[2] ^ a3[3] ^ a5[0]; \ + out1[0] = z0; \ + out1[1] = z1; \ + out1[2] = z2; \ + out1[3] = z3; \ + out4[0] = z4; \ + out4[1] = z5; \ + out4[2] = z6; \ + out4[3] = z7; \ + } \ + if (RestoreMask == 2 || RestoreMask == 34) { \ + const ui64 z0 = a0[0] ^ a2[0] ^ a3[0] ^ a4[0]; \ + const ui64 z1 = a0[1] ^ a2[1] ^ a3[1] ^ a4[1]; \ + const ui64 z2 = a0[2] ^ a2[2] ^ a3[2] ^ a4[2]; \ + const ui64 z3 = a0[3] ^ a2[3] ^ a3[3] ^ a4[3]; \ + out1[0] = z0; \ + out1[1] = z1; \ + out1[2] = z2; \ + out1[3] = z3; \ + } \ + if (RestoreMask == 6) { \ + const ui64 z0 = a0[1] ^ a3[3] ^ a4[0] ^ a4[1] ^ a4[2] ^ a4[3] ^ a5[0] ^ a5[2] ^ a5[3]; \ + const ui64 z1 = a0[0] ^ a0[1] ^ a0[2] ^ a3[0] ^ a3[3] ^ a4[0] ^ a5[1] ^ a5[2]; \ + const ui64 z2 = a0[0] ^ a0[2] ^ a0[3] ^ a3[1] ^ a3[3] ^ a4[2] ^ a4[3] ^ a5[0]; \ + const ui64 z3 = a0[0] ^ a0[3] ^ a3[2] ^ a3[3] ^ a4[0] ^ a4[1] ^ a4[2] ^ a5[1] ^ a5[2] ^ a5[3]; \ + const ui64 z4 = a0[0] ^ a0[1] ^ a3[0] ^ a3[3] ^ a4[1] ^ a4[2] ^ a4[3] ^ a5[0] ^ a5[2] ^ a5[3]; \ + const ui64 z5 = a0[0] ^ a0[2] ^ a3[0] ^ a3[1] ^ a3[3] ^ a4[0] ^ a4[1] ^ a5[1] ^ a5[2]; \ + const ui64 z6 = a0[0] ^ a0[3] ^ a3[1] ^ a3[2] ^ a3[3] ^ a4[3] ^ a5[0]; \ + const ui64 z7 = a0[0] ^ a3[2] ^ a4[0] ^ a4[1] ^ a4[2] ^ a4[3] ^ a5[1] ^ a5[2] ^ a5[3]; \ + out1[0] = z0; \ + out1[1] = z1; \ + out1[2] = z2; \ + out1[3] = z3; \ + out2[0] = z4; \ + out2[1] = z5; \ + out2[2] = z6; \ + out2[3] = z7; \ + } \ + if (RestoreMask == 10) { \ + const ui64 z0 = a0[2] ^ a0[3] ^ a2[0] ^ a2[1] ^ a2[2] ^ a2[3] ^ a4[0] ^ a4[2] ^ a5[0] ^ a5[3]; \ + const ui64 z1 = a0[2] ^ a2[0] ^ a4[0] ^ a4[1] ^ a4[2] ^ a4[3] ^ a5[0] ^ a5[1] ^ a5[3]; \ + const ui64 z2 = a0[0] ^ a0[2] ^ a2[2] ^ a2[3] ^ a4[0] ^ a4[1] ^ a4[3] ^ a5[1] ^ a5[2] ^ a5[3]; \ + const ui64 z3 = a0[1] ^ a0[2] ^ a2[0] ^ a2[1] ^ a2[2] ^ a4[1] ^ a5[2]; \ + const ui64 z4 = a0[0] ^ a0[2] ^ a0[3] ^ a2[1] ^ a2[2] ^ a2[3] ^ a4[2] ^ a5[0] ^ a5[3]; \ + const ui64 z5 = a0[1] ^ a0[2] ^ a2[0] ^ a2[1] ^ a4[0] ^ a4[2] ^ a4[3] ^ a5[0] ^ a5[1] ^ a5[3]; \ + const ui64 z6 = a0[0] ^ a2[3] ^ a4[0] ^ a4[1] ^ a4[2] ^ a4[3] ^ a5[1] ^ a5[2] ^ a5[3]; \ + const ui64 z7 = a0[1] ^ a0[2] ^ a0[3] ^ a2[0] ^ a2[1] ^ a2[2] ^ a2[3] ^ a4[1] ^ a4[3] ^ a5[2]; \ + out1[0] = z0; \ + out1[1] = z1; \ + out1[2] = z2; \ + out1[3] = z3; \ + out3[0] = z4; \ + out3[1] = z5; \ + out3[2] = z6; \ + out3[3] = z7; \ + } \ + if (RestoreMask == 4 || RestoreMask == 36) { \ + const ui64 z0 = a0[0] ^ a1[0] ^ a3[0] ^ a4[0]; \ + const ui64 z1 = a0[1] ^ a1[1] ^ a3[1] ^ a4[1]; \ + const ui64 z2 = a0[2] ^ a1[2] ^ a3[2] ^ a4[2]; \ + const ui64 z3 = a0[3] ^ a1[3] ^ a3[3] ^ a4[3]; \ + out2[0] = z0; \ + out2[1] = z1; \ + out2[2] = z2; \ + out2[3] = z3; \ + } \ + if (RestoreMask == 20) { \ + const ui64 z0 = a0[1] ^ a0[2] ^ a1[0] ^ a1[1] ^ a3[3] ^ a5[1] ^ a5[2]; \ + const ui64 z1 = a0[1] ^ a0[3] ^ a1[0] ^ a1[2] ^ a3[0] ^ a3[3] ^ a5[1] ^ a5[3]; \ + const ui64 z2 = a1[0] ^ a0[1] ^ a3[3] ^ a3[1] ^ a1[3] ^ a5[1]; \ + const ui64 z3 = a0[0] ^ a0[1] ^ a1[0] ^ a3[2] ^ a3[3] ^ a5[0] ^ a5[1]; \ + const ui64 z4 = a0[0] ^ a0[1] ^ a0[2] ^ a1[1] ^ a3[0] ^ a3[3] ^ a5[1] ^ a5[2]; \ + const ui64 z5 = a0[3] ^ a1[0] ^ a1[1] ^ a1[2] ^ a3[0] ^ a3[1] ^ a3[3] ^ a5[1] ^ a5[3]; \ + const ui64 z6 = a0[1] ^ a0[2] ^ a1[0] ^ a1[2] ^ a1[3] ^ a3[1] ^ a3[2] ^ a3[3] ^ a5[1]; \ + const ui64 z7 = a0[0] ^ a0[1] ^ a0[3] ^ a1[0] ^ a1[3] ^ a3[2] ^ a5[0] ^ a5[1]; \ + out2[0] = z0; \ + out2[1] = z1; \ + out2[2] = z2; \ + out2[3] = z3; \ + out4[0] = z4; \ + out4[1] = z5; \ + out4[2] = z6; \ + out4[3] = z7; \ + } \ + if (RestoreMask == 12) { \ + const ui64 z0 = a0[2] ^ a1[1] ^ a4[0] ^ a4[1] ^ a4[2] ^ a4[3] ^ a5[0] ^ a5[1] ^ a5[3]; \ + const ui64 z1 = a0[0] ^ a0[2] ^ a0[3] ^ a1[0] ^ a1[1] ^ a1[2] ^ a4[0] ^ a5[2] ^ a5[3]; \ + const ui64 z2 = a0[0] ^ a0[1] ^ a0[2] ^ a0[3] ^ a1[0] ^ a1[2] ^ a1[3] ^ a4[2] ^ a4[3] ^ a5[0] ^ a5[1]; \ + const ui64 z3 = a0[1] ^ a0[3] ^ a1[0] ^ a1[3] ^ a4[0] ^ a4[1] ^ a4[2] ^ a5[0] ^ a5[2] ^ a5[3]; \ + const ui64 z4 = a0[0] ^ a0[2] ^ a1[0] ^ a1[1] ^ a4[1] ^ a4[2] ^ a4[3] ^ a5[0] ^ a5[1] ^ a5[3]; \ + const ui64 z5 = a0[0] ^ a0[1] ^ a0[2] ^ a0[3] ^ a1[0] ^ a1[2] ^ a4[0] ^ a4[1] ^ a5[2] ^ a5[3]; \ + const ui64 z6 = a0[0] ^ a0[1] ^ a0[3] ^ a1[0] ^ a1[3] ^ a4[3] ^ a5[0] ^ a5[1]; \ + const ui64 z7 = a0[1] ^ a1[0] ^ a4[0] ^ a4[1] ^ a4[2] ^ a4[3] ^ a5[0] ^ a5[2] ^ a5[3]; \ + out2[0] = z0; \ + out2[1] = z1; \ + out2[2] = z2; \ + out2[3] = z3; \ + out3[0] = z4; \ + out3[1] = z5; \ + out3[2] = z6; \ + out3[3] = z7; \ + } \ + if (RestoreMask == 8 || RestoreMask == 40) { \ + const ui64 z0 = a0[0] ^ a1[0] ^ a2[0] ^ a4[0]; \ + const ui64 z1 = a0[1] ^ a1[1] ^ a2[1] ^ a4[1]; \ + const ui64 z2 = a0[2] ^ a1[2] ^ a2[2] ^ a4[2]; \ + const ui64 z3 = a0[3] ^ a1[3] ^ a2[3] ^ a4[3]; \ + out3[0] = z0; \ + out3[1] = z1; \ + out3[2] = z2; \ + out3[3] = z3; \ + } \ + if (RestoreMask == 24) { \ + const ui64 z0 = a0[2] ^ a0[3] ^ a1[1] ^ a1[2] ^ a2[0] ^ a2[1] ^ a5[2] ^ a5[3]; \ + const ui64 z1 = a2[0] ^ a1[1] ^ a0[2] ^ a2[2] ^ a1[3] ^ a5[2]; \ + const ui64 z2 = a0[0] ^ a0[2] ^ a1[1] ^ a2[0] ^ a2[3] ^ a5[0] ^ a5[2]; \ + const ui64 z3 = a0[1] ^ a0[2] ^ a1[0] ^ a1[1] ^ a2[0] ^ a5[1] ^ a5[2]; \ + const ui64 z4 = a0[0] ^ a0[2] ^ a0[3] ^ a1[0] ^ a1[1] ^ a1[2] ^ a2[1] ^ a5[2] ^ a5[3]; \ + const ui64 z5 = a0[1] ^ a0[2] ^ a1[3] ^ a2[0] ^ a2[1] ^ a2[2] ^ a5[2]; \ + const ui64 z6 = a0[0] ^ a1[1] ^ a1[2] ^ a2[0] ^ a2[2] ^ a2[3] ^ a5[0] ^ a5[2]; \ + const ui64 z7 = a0[1] ^ a0[2] ^ a0[3] ^ a1[0] ^ a1[1] ^ a1[3] ^ a2[0] ^ a2[3] ^ a5[1] ^ a5[2]; \ + out3[0] = z0; \ + out3[1] = z1; \ + out3[2] = z2; \ + out3[3] = z3; \ + out4[0] = z4; \ + out4[1] = z5; \ + out4[2] = z6; \ + out4[3] = z7; \ + } \ + if (RestoreMask == 16 || RestoreMask == 48) { \ + const ui64 z0 = a0[0] ^ a1[0] ^ a2[0] ^ a3[0]; \ + const ui64 z1 = a0[1] ^ a1[1] ^ a2[1] ^ a3[1]; \ + const ui64 z2 = a0[2] ^ a1[2] ^ a2[2] ^ a3[2]; \ + const ui64 z3 = a0[3] ^ a1[3] ^ a2[3] ^ a3[3]; \ + out4[0] = z0; \ + out4[1] = z1; \ + out4[2] = z2; \ + out4[3] = z3; \ + } \ + if (RestoreMask == 40) { \ + const ui64 z0 = a0[0] ^ a0[1] ^ a0[2] ^ a1[1] ^ a1[2] ^ a1[3] ^ a2[1] ^ a2[3] ^ a4[1] ^ a4[2]; \ + const ui64 z1 = a0[3] ^ a1[0] ^ a1[1] ^ a2[1] ^ a2[2] ^ a2[3] ^ a4[1] ^ a4[3]; \ + const ui64 z2 = a0[1] ^ a0[2] ^ a1[3] ^ a2[0] ^ a2[1] ^ a2[2] ^ a4[1]; \ + const ui64 z3 = a0[0] ^ a0[1] ^ a0[3] ^ a1[0] ^ a1[1] ^ a1[2] ^ a1[3] ^ a2[0] ^ a2[2] ^ a4[0] ^ a4[1]; \ + out5[0] = z0; \ + out5[1] = z1; \ + out5[2] = z2; \ + out5[3] = z3; \ + } \ + if (RestoreMask == 36) { \ + const ui64 z0 = a0[0] ^ a0[2] ^ a0[3] ^ a1[2] ^ a3[1] ^ a3[3] ^ a4[2] ^ a4[3]; \ + const ui64 z1 = a0[1] ^ a0[2] ^ a1[0] ^ a1[2] ^ a1[3] ^ a3[1] ^ a3[2] ^ a3[3] ^ a4[2]; \ + const ui64 z2 = a0[0] ^ a1[0] ^ a1[1] ^ a1[2] ^ a1[3] ^ a3[0] ^ a3[1] ^ a3[2] ^ a4[0] ^ a4[2]; \ + const ui64 z3 = a0[1] ^ a0[2] ^ a0[3] ^ a1[1] ^ a1[3] ^ a3[0] ^ a3[2] ^ a4[1] ^ a4[2]; \ + out5[0] = z0; \ + out5[1] = z1; \ + out5[2] = z2; \ + out5[3] = z3; \ + } \ + if (RestoreMask == 34) { \ + const ui64 z0 = a0[0] ^ a0[3] ^ a2[2] ^ a3[1] ^ a3[2] ^ a3[3] ^ a4[3]; \ + const ui64 z1 = a0[0] ^ a0[1] ^ a0[3] ^ a2[0] ^ a2[2] ^ a2[3] ^ a3[0] ^ a3[1] ^ a4[0] ^ a4[3]; \ + const ui64 z2 = a0[1] ^ a0[2] ^ a0[3] ^ a2[0] ^ a2[1] ^ a2[2] ^ a2[3] ^ a3[3] ^ a4[1] ^ a4[3]; \ + const ui64 z3 = a0[2] ^ a2[1] ^ a2[3] ^ a3[0] ^ a3[1] ^ a3[2] ^ a3[3] ^ a4[2] ^ a4[3]; \ + out5[0] = z0; \ + out5[1] = z1; \ + out5[2] = z2; \ + out5[3] = z3; \ + } \ + if (RestoreMask == 32 || RestoreMask == 48) { \ + const ui64 z0 = a0[0] ^ a3[2] ^ a2[3] ^ a3[1] ^ a2[2] ^ a1[3]; \ + const ui64 z1 = a1[0] ^ a0[1] ^ a3[3] ^ a3[1] ^ a2[2] ^ a1[3]; \ + const ui64 z2 = a2[0] ^ a1[1] ^ a0[2] ^ a3[1] ^ a2[2] ^ a1[3]; \ + const ui64 z3 = a3[0] ^ a2[1] ^ a1[2] ^ a0[3] ^ a3[1] ^ a2[2] ^ a1[3]; \ + out5[0] = z0; \ + out5[1] = z1; \ + out5[2] = z2; \ + out5[3] = z3; \ + } \ + if (RestoreMask == 33) { \ + const ui64 z0 = a1[0] ^ a1[3] ^ a2[0] ^ a2[2] ^ a2[3] ^ a3[0] ^ a3[1] ^ a3[2] ^ a4[0]; \ + const ui64 z1 = a1[0] ^ a1[1] ^ a1[3] ^ a2[1] ^ a2[2] ^ a3[3] ^ a4[1]; \ + const ui64 z2 = a1[1] ^ a1[2] ^ a1[3] ^ a2[0] ^ a3[1] ^ a3[2] ^ a4[2]; \ + const ui64 z3 = a1[2] ^ a2[1] ^ a2[2] ^ a2[3] ^ a3[0] ^ a3[1] ^ a3[3] ^ a4[3]; \ + out5[0] = z0; \ + out5[1] = z1; \ + out5[2] = z2; \ + out5[3] = z3; \ + } + + template<ui32 RestoreMask> + void ErasureRestoreBlock42Do(std::span<TRope> parts) { + const ui32 blockSize = 32; + + auto iter0 = parts[0].Begin(); + auto iter1 = parts[1].Begin(); + auto iter2 = parts[2].Begin(); + auto iter3 = parts[3].Begin(); + auto iter4 = parts[4].Begin(); + auto iter5 = parts[5].Begin(); + + alignas(32) ui64 temp[4 * 6]; // temporary storage to fetch data into + alignas(32) ui64 restore[4]; // temporary holder for unused output block + + const ui64 *a0 = nullptr; + const ui64 *a1 = nullptr; + const ui64 *a2 = nullptr; + const ui64 *a3 = nullptr; + const ui64 *a4 = nullptr; + const ui64 *a5 = nullptr; + + ui64 *out0 = nullptr; + ui64 *out1 = nullptr; + ui64 *out2 = nullptr; + ui64 *out3 = nullptr; + ui64 *out4 = nullptr; + ui64 *out5 = nullptr; + + ui32 offset = 0; + + for (;;) { + size_t common = Max<size_t>(); +#define COMMON_SIZE(I) \ + const size_t size##I = iter##I.ContiguousSize(); \ + if constexpr (~RestoreMask >> I & 1) { \ + common = Min(common, size##I); \ + } + COMMON_SIZE(0) + COMMON_SIZE(1) + COMMON_SIZE(2) + COMMON_SIZE(3) + COMMON_SIZE(4) + COMMON_SIZE(5) + if (!common) { + break; + } + + size_t numBlocks = Min<size_t>(10, Max<size_t>(1, common / blockSize)); + const size_t numBytes = numBlocks * blockSize; + +#define FETCH_BLOCK(I) \ + if constexpr (~RestoreMask >> I & 1) { /* input */ \ + if (size##I < blockSize) { \ + a##I = temp + I * 4; \ + iter##I.ExtractPlainDataAndAdvance(temp + I * 4, blockSize); \ + Y_VERIFY_DEBUG(numBytes == blockSize && numBlocks == 1); \ + } else { \ + a##I = reinterpret_cast<const ui64*>(iter##I.ContiguousData()); \ + iter##I += numBytes; \ + } \ + } else if (parts[I]) { /* expected output */ \ + Y_VERIFY(parts[I].IsContiguous()); \ + auto span = parts[I].GetContiguousSpanMut(); \ + out##I = reinterpret_cast<ui64*>(span.data() + offset); \ + Y_VERIFY_DEBUG(offset < span.size()); \ + Y_VERIFY_DEBUG(numBytes <= span.size() - offset); \ + } else { \ + out##I = restore; /* temporary storage */ \ + } + + FETCH_BLOCK(0) + FETCH_BLOCK(1) + FETCH_BLOCK(2) + FETCH_BLOCK(3) + FETCH_BLOCK(4) + FETCH_BLOCK(5) + + offset += numBytes; + + while (numBlocks--) { + // restore all but available parts; there can be one part that goes to "restore" local storage and + // further discarded + RESTORE_BLOCK(RestoreMask) + +#define NEXT(I) \ + if constexpr (~RestoreMask >> I & 1) { \ + a##I += 4; \ + } else if (parts[I]) { \ + out##I += 4; \ + } + NEXT(0) + NEXT(1) + NEXT(2) + NEXT(3) + NEXT(4) + NEXT(5) + } + } + } + + void ErasureRestoreBlock42(ui32 fullSize, TRope *whole, std::span<TRope> parts, ui32 restoreMask) { + const ui32 blockSize = 32; + const ui32 fullBlockSize = 4 * blockSize; + const ui32 fullBlocks = fullSize / fullBlockSize; + + Y_VERIFY(parts.size() == 6); + + ui32 availableMask = 0; + ui32 size = 0; // actual size of a part + + for (ui32 i = 0; i < parts.size(); ++i) { + if (parts[i]) { + availableMask |= 1 << i; + if (!size) { + size = parts[i].size(); + } else { + Y_VERIFY(size == parts[i].size()); + } + } + } + + Y_VERIFY(PopCount(availableMask) >= 4); + Y_VERIFY(size % 32 == 0); + + if (whole) { + restoreMask |= (1 << 4) - 1; // we have to restore all the data parts + } + + restoreMask &= ~availableMask; + + Y_VERIFY(PopCount(restoreMask) <= 2); + + for (ui32 part = 0; part < 6; ++part) { + if (restoreMask >> part & 1) { + Y_VERIFY(!parts[part]); + parts[part] = TRcBuf::Uninitialized(size); + } + } + + switch (restoreMask) { + case 0: break; + +#define SINGLE(I) \ + case 1 << I: \ + switch (63 & ~availableMask &~ restoreMask) { /* we may have 4 parts available and need only one to restore */ \ + case 0 : ErasureRestoreBlock42Do<1 << I >(parts); break; \ + case 1 << 0: ErasureRestoreBlock42Do<1 << I | 1 << 0>(parts); break; \ + case 1 << 1: ErasureRestoreBlock42Do<1 << I | 1 << 1>(parts); break; \ + case 1 << 2: ErasureRestoreBlock42Do<1 << I | 1 << 2>(parts); break; \ + case 1 << 3: ErasureRestoreBlock42Do<1 << I | 1 << 3>(parts); break; \ + case 1 << 4: ErasureRestoreBlock42Do<1 << I | 1 << 4>(parts); break; \ + case 1 << 5: ErasureRestoreBlock42Do<1 << I | 1 << 5>(parts); break; \ + } \ + break; + + SINGLE(0) + SINGLE(1) + SINGLE(2) + SINGLE(3) + SINGLE(4) + SINGLE(5) + +#define DOUBLE(I, J) \ + case 1 << I | 1 << J: \ + ErasureRestoreBlock42Do<1 << I | 1 << J>(parts); \ + break; + + DOUBLE(0, 1) + DOUBLE(0, 2) + DOUBLE(0, 3) + DOUBLE(0, 4) + DOUBLE(0, 5) + DOUBLE(1, 2) + DOUBLE(1, 3) + DOUBLE(1, 4) + DOUBLE(1, 5) + DOUBLE(2, 3) + DOUBLE(2, 4) + DOUBLE(2, 5) + DOUBLE(3, 4) + DOUBLE(3, 5) + DOUBLE(4, 5) + + default: + Y_FAIL(); + } + + if (whole) { + *whole = {}; + + ui32 remains = fullSize % fullBlockSize; + + for (ui32 part = 0; part < 4; ++part) { + ui32 partLen = fullBlocks * blockSize; + if (remains >= blockSize || part == 3) { + const ui32 len = Min(remains, blockSize); + partLen += len; + remains -= len; + } + + auto& p = parts[part]; + Y_VERIFY(partLen <= p.size()); + whole->Insert(whole->End(), TRope(p.Begin(), p.Position(partLen))); + } + } + } + + void ErasureRestoreFallback(TErasureType::ECrcMode crcMode, TErasureType erasure, ui32 fullSize, TRope *whole, + std::span<TRope> parts, ui32 restoreMask, ui32 offset, bool isFragment) { Y_VERIFY(parts.size() == erasure.TotalPartCount()); + Y_VERIFY(!isFragment || !whole); + + const ui32 partSize = erasure.PartSize(crcMode, fullSize); + + // ensure all the set parts have the same size + ui32 commonSize = 0; + for (const TRope& part : parts) { + if (part) { + Y_VERIFY(!commonSize || part.size() == commonSize); + commonSize = part.size(); + } + } + Y_VERIFY(commonSize); + Y_VERIFY(isFragment ? commonSize <= partSize - offset : commonSize == partSize); + + ui32 inputMask = 0; TDataPartSet p; p.FullDataSize = fullSize; + p.IsFragment = isFragment; + Y_VERIFY(isFragment || !offset); for (ui32 i = 0; i < parts.size(); ++i) { TPartFragment fragment; - fragment.ResetToWhole(parts[i]); - p.Parts.push_back(std::move(fragment)); if (parts[i]) { - p.PartsMask |= 1 << i; + inputMask |= 1 << i; + parts[i].Compact(); // this is the price of using TErasureType::RestoreData + if (isFragment) { + fragment.ReferenceTo(parts[i], offset, parts[i].size(), partSize); + } else { + fragment.ReferenceTo(parts[i]); + } + } else { + fragment.ReferenceTo(TRcBuf::Uninitialized(commonSize), offset, commonSize, partSize); } + p.Parts.push_back(std::move(fragment)); } + p.PartsMask = inputMask; - TRope temp; - erasure.RestoreData(crcMode, p, whole ? *whole : temp, restoreMask & ((1 << erasure.DataParts()) - 1), - whole != nullptr, restoreMask >> erasure.DataParts()); + const bool restoreParts = restoreMask & ((1 << erasure.DataParts()) - 1); + const bool restoreParityParts = restoreMask >> erasure.DataParts(); + if (whole) { + whole->Compact(); + erasure.RestoreData(crcMode, p, *whole, restoreParts, true, restoreParityParts); + } else { + erasure.RestoreData(crcMode, p, restoreParts, false, restoreParityParts); + } for (ui32 i = 0; i < parts.size(); ++i) { - parts[i] = std::move(p.Parts[i].OwnedString); + if (restoreMask >> i & 1) { + parts[i] = std::move(p.Parts[i].OwnedString); + } + } + } + + void ErasureRestore(TErasureType::ECrcMode crcMode, TErasureType erasure, ui32 fullSize, TRope *whole, + std::span<TRope> parts, ui32 restoreMask, ui32 offset, bool isFragment) { + if (crcMode == TErasureType::CrcModeNone && erasure.GetErasure() == TErasureType::Erasure4Plus2Block) { + Y_VERIFY(parts.size() == erasure.TotalPartCount()); + Y_VERIFY(!isFragment || !whole); + Y_VERIFY(offset % 32 == 0); + ErasureRestoreBlock42(fullSize, whole, parts, restoreMask); + } else { + ErasureRestoreFallback(crcMode, erasure, fullSize, whole, parts, restoreMask, offset, isFragment); } } diff --git a/ydb/core/erasure/erasure_split.cpp b/ydb/core/erasure/erasure_split.cpp index 7073d2a0c5..ebcf04fbce 100644 --- a/ydb/core/erasure/erasure_split.cpp +++ b/ydb/core/erasure/erasure_split.cpp @@ -1,21 +1,27 @@ #include "erasure.h" -namespace NKikimr { +static const char ZeroData[4096] = {0}; - class TZeroBuffer { - TRcBuf Buffer; +namespace NKikimr { + class TZeroBuffer : public IContiguousChunk { public: - TZeroBuffer() - : Buffer(TRcBuf::Uninitialized(4096)) - { - memset(Buffer.GetDataMut(), 0, Buffer.size()); + TContiguousSpan GetData() const override { + return {ZeroData, sizeof(ZeroData)}; + } + + TMutableContiguousSpan GetDataMut() override { + return {const_cast<char*>(ZeroData), sizeof(ZeroData)}; + } + + TMutableContiguousSpan UnsafeGetDataMut() override { + return {const_cast<char*>(ZeroData), sizeof(ZeroData)}; } - TRcBuf GetBuffer() const { - return Buffer; + size_t GetOccupiedMemorySize() const override { + return sizeof(ZeroData); } - } ZeroBuffer; + }; void ErasureSplitBlock42Prepare(const TRope& whole, std::span<TRope> parts) { const ui32 blockSize = 32; @@ -29,8 +35,9 @@ namespace NKikimr { for (ui32 part = 0; part < 4; ++part) { ui32 partLen = fullBlocks * blockSize; if (remains >= blockSize || part == 3) { - partLen += std::min(remains, blockSize); - remains -= blockSize; + const ui32 len = Min(remains, blockSize); + partLen += len; + remains -= len; } auto nextIter = iter + partLen; @@ -38,7 +45,7 @@ namespace NKikimr { r = {iter, nextIter}; Y_VERIFY_DEBUG(r.size() == partLen); if (const ui32 padding = partSize - r.size()) { - auto buffer = ZeroBuffer.GetBuffer(); + TRcBuf buffer(MakeIntrusive<TZeroBuffer>()); r.Insert(r.End(), TRcBuf(TRcBuf::Piece, buffer.data(), padding, buffer)); } } @@ -46,13 +53,11 @@ namespace NKikimr { } if (!parts[4]) { - TRcBuf xorPart = TRcBuf::Uninitialized(partSize); - parts[4] = TRope(std::move(xorPart)); + parts[4] = TRcBuf::Uninitialized(partSize); } if (!parts[5]) { - TRcBuf diagPart = TRcBuf::Uninitialized(partSize); - parts[5] = TRope(std::move(diagPart)); + parts[5] = TRcBuf::Uninitialized(partSize); } } diff --git a/ydb/core/util/fragmented_buffer.cpp b/ydb/core/util/fragmented_buffer.cpp index d4c8c86071..c6b47c7ae3 100644 --- a/ydb/core/util/fragmented_buffer.cpp +++ b/ydb/core/util/fragmented_buffer.cpp @@ -89,11 +89,9 @@ TString TFragmentedBuffer::Print() const { return str.Str(); } -void TFragmentedBuffer::CopyFrom(const TFragmentedBuffer& from, const TIntervalSet<i32>& range) { - Y_VERIFY(range); - for (auto it = range.begin(); it != range.end(); ++it) { - auto [begin, end] = *it; - Write(begin, from.Read(begin, end - begin)); +void TFragmentedBuffer::CopyFrom(const TFragmentedBuffer& from, const TIntervalSet<i32>& range, i32 offset) { + for (auto [begin, end] : range) { + Write(begin + offset, from.Read(begin, end - begin)); } } diff --git a/ydb/core/util/fragmented_buffer.h b/ydb/core/util/fragmented_buffer.h index d6c5c78c5d..6aae52eda1 100644 --- a/ydb/core/util/fragmented_buffer.h +++ b/ydb/core/util/fragmented_buffer.h @@ -21,7 +21,7 @@ public: TRope Read(ui32 begin, ui32 size) const; TString Print() const; - void CopyFrom(const TFragmentedBuffer& from, const TIntervalSet<i32>& range); + void CopyFrom(const TFragmentedBuffer& from, const TIntervalSet<i32>& range, i32 offset = 0); TIntervalSet<i32> GetIntervalSet() const; explicit operator bool() const { |