diff options
author | kruall <kruall@ydb.tech> | 2022-10-12 11:27:06 +0300 |
---|---|---|
committer | kruall <kruall@ydb.tech> | 2022-10-12 11:27:06 +0300 |
commit | 7650b5f2c69fecde07b39ac32e7f8f549daa0d55 (patch) | |
tree | fa328a4630e7b23e05818ae8a5e1d9bb43d916fb | |
parent | f74c83a786f508710fd5970e07d9a19e72300af7 (diff) | |
download | ydb-7650b5f2c69fecde07b39ac32e7f8f549daa0d55.tar.gz |
Add more logs for patch request,
-rw-r--r-- | ydb/core/blobstorage/dsproxy/dsproxy_patch.cpp | 190 |
1 files changed, 111 insertions, 79 deletions
diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_patch.cpp b/ydb/core/blobstorage/dsproxy/dsproxy_patch.cpp index 8bdd1d8bc5a..706b5a77971 100644 --- a/ydb/core/blobstorage/dsproxy/dsproxy_patch.cpp +++ b/ydb/core/blobstorage/dsproxy/dsproxy_patch.cpp @@ -3,6 +3,7 @@ #include "root_cause.h" #include <ydb/core/blobstorage/lwtrace_probes/blobstorage_probes.h> #include <ydb/core/blobstorage/groupinfo/blobstorage_groupinfo_partlayout.h> +#include <ydb/core/util/stlog.h> #include <util/generic/ymath.h> #include <util/system/datetime.h> @@ -70,6 +71,20 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor<TBlob TBlobStorageGroupInfo::TVDiskIds VDisks; bool UseVPatch = false; + bool IsGoodPatchedBlobId = false; + bool IsAllowedErasure = false; + +#define PATCH_LOG(priority, service, marker, msg, ...) \ + STLOG(priority, service, marker, msg, \ + (ActorId, SelfId()), \ + (Group, Info->GroupID), \ + (DiffCount, DiffCount), \ + (OriginalBlob, OriginalId), \ + (PatchedBlob, PatchedId), \ + (Deadline, Deadline), \ + (RestartCounter, RestartCounter), \ + __VA_ARGS__) \ +// PATCH_LOG public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { @@ -107,6 +122,10 @@ public: } void ReplyAndDie(NKikimrProto::EReplyStatus status) { + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA02, "ReplyAndDie", + (Status, status), + (ErrorReason, ErrorReason)); + std::unique_ptr<TEvBlobStorage::TEvPatchResult> result = std::make_unique<TEvBlobStorage::TEvPatchResult>(status, PatchedId, StatusFlags, Info->GroupID, ApproximateFreeSpaceShare); result->ErrorReason = ErrorReason; @@ -137,6 +156,10 @@ public: TEvBlobStorage::TEvGetResult *result = ev->Get(); Orbit = std::move(result->Orbit); + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA30, "Received TEvGetResult", + (Status, result->Status), + (ErrorReason, result->ErrorReason)); + ui32 patchedIdHash = PatchedId.Hash(); bool incorrectCookie = ev->Cookie != patchedIdHash; bool fail = incorrectCookie @@ -159,7 +182,6 @@ public: << getResponseStatus << " GetErrorReason# " << result->ErrorReason; } - R_LOG_ERROR_S("BPPA04", ErrorReason); ReplyAndDie(NKikimrProto::ERROR); return; } @@ -177,6 +199,10 @@ public: TEvBlobStorage::TEvPutResult *result = ev->Get(); Orbit = std::move(result->Orbit); + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA29, "Received TEvPutResult", + (Status, result->Status), + (ErrorReason, result->ErrorReason)); + StatusFlags = result->StatusFlags; ApproximateFreeSpaceShare = result->ApproximateFreeSpaceShare; @@ -192,7 +218,6 @@ public: << " PutStatus# " << NKikimrProto::EReplyStatus_Name(result->Status) << " PutErrorReason# " << result->ErrorReason; } - R_LOG_ERROR_S("BPPA03", ErrorReason); ReplyAndDie(NKikimrProto::ERROR); return; } @@ -215,9 +240,11 @@ public: void Handle(TEvBlobStorage::TEvVMovedPatchResult::TPtr &ev) { TEvBlobStorage::TEvVMovedPatchResult *result = ev->Get(); - A_LOG_DEBUG_S("BPPA02", "received " << ev->Get()->ToString() - << " from# " << VDiskIDFromVDiskID(ev->Get()->Record.GetVDiskID())); NKikimrBlobStorage::TEvVMovedPatchResult &record = result->Record; + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA28, "Received TEvVMovedPatchResult", + (Status, record.GetStatus()), + (ErrorReason, record.GetErrorReason()), + (VDiskId, VDiskIDFromVDiskID(record.GetVDiskID()))); PullOutStatusFlagsAndFressSpace(record); Orbit = std::move(result->Orbit); @@ -238,10 +265,9 @@ public: << " VMovedPatchStatus# " << NKikimrProto::EReplyStatus_Name(record.GetStatus()) << subErrorReason; } - A_LOG_INFO_S("BPPA05", "VMovedPatch failed, NaivePatch started;" - << " OriginalId# " << OriginalId - << " PatchedId# " << PatchedId - << " ErrorReason# " << ErrorReason); + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA27, "Start Naive strategy from hadling TEvVMovedPatchResult", + (Status, record.GetStatus()), + (ErrorReason, ErrorReason)); StartNaivePatch(); return; } @@ -282,26 +308,22 @@ public: OkVDisksWithParts.push_back(subgroupIdx); } - A_LOG_INFO_S("BPPA07", "received VPatchFoundParts" - << " Status# " << status - << " ActorId# " << SelfId() - << " Group# " << Info->GroupID - << " OriginalBlob# " << OriginalId - << " PatchedBlob# " << PatchedId - << " Deadline# " << Deadline - << " SubgroupIdx# " << (ui32)subgroupIdx - << " PartsCount# " << record.OriginalPartsSize() - << " ReceivedFoundParts# " << ReceivedFoundParts << '/' << SendedStarts - << " ErrorReason# " << errorReason); + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA26, "Received VPatchFoundParts", + (Status, status), + (SubgroupIdx, (ui32)subgroupIdx), + (ReceivedResults, static_cast<TString>(TStringBuilder() << ReceivedResults << '/' << Info->Type.TotalPartCount())), + (ErrorReason, errorReason)); if (ReceivedFoundParts == SendedStarts) { bool continueVPatch = VerifyPartPlacement(); if (continueVPatch) { continueVPatch = ContinueVPatch(); } else { + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA27, "Failed VerifyPartPlacement"); Mon->VPatchPartPlacementVerifyFailed->Inc(); } if (!continueVPatch) { + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA27, "Start Fallback strategy from hadling TEvVPatchFoundParts"); StopVPatch(); StartFallback(); } @@ -319,28 +341,27 @@ public: errorReason = record.GetErrorReason(); } - A_LOG_INFO_S("BPPA06", "received VPatchResult" - << " Status# " << status - << " ActorId# " << SelfId() - << " Group# " << Info->GroupID - << " OriginalBlob# " << OriginalId - << " PatchedBlob# " << PatchedId - << " Deadline# " << Deadline - << " ReceivedResults# " << ReceivedResults << '/' << Info->Type.TotalPartCount() - << " ErrorReason# " << errorReason); - Y_VERIFY(record.HasCookie()); ui8 subgroupIdx = record.GetCookie(); + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA23, "Received VPatchResult", + (Status, status), + (SubgroupIdx, (ui32)subgroupIdx), + (ReceivedResults, static_cast<TString>(TStringBuilder() << ReceivedResults << '/' << Info->Type.TotalPartCount())), + (ErrorReason, errorReason)); + bool wasReceived = std::exchange(ReceivedResponseFlags[subgroupIdx], true); Y_VERIFY(!wasReceived); if (status != NKikimrProto::OK) { + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA24, "Start Fallback strategy from handling VPatchResult", + (ReceivedResults, TStringBuilder() << ReceivedResults << '/' << Info->Type.TotalPartCount())); StartFallback(); return; } if (ReceivedResults == Info->Type.TotalPartCount()) { + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA25, "Got all succesful responses, make own success response"); ReplyAndDie(NKikimrProto::OK); } } @@ -355,6 +376,7 @@ public: } if (countByDC[0] && countByDC[1] && countByDC[2]) { + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA22, "VerifyPartPlacement {mirror-3-dc} found all 3 disks"); return true; } @@ -364,6 +386,8 @@ public: x2Count++; } } + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA23, "VerifyPartPlacement {mirror-3-dc}", + (X2Count, x2Count)); return x2Count >= 2; } @@ -372,15 +396,21 @@ public: return VerifyPartPlacementForMirror3dc(); } else { TSubgroupPartLayout layout; - - for (auto &[subgroupIdx, partId] : FoundParts) { - layout.AddItem(subgroupIdx, partId - 1, Info->Type); + for (auto &placement : FoundParts) { + PATCH_LOG(PRI_TRACE, BS_PROXY_PATCH, BPPA31, "Get part", + (SubgroupIdx, (ui32)placement.VDiskIdxInSubgroup), + (PartId, (ui32)placement.PartId)); + layout.AddItem(placement.VDiskIdxInSubgroup, placement.PartId - 1, Info->Type); } + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA21, "VerifyPartPlacement", + (EffectiveReplicas, layout.CountEffectiveReplicas(Info->Type)), + (TotalPartount, Info->Type.TotalPartCount())); return layout.CountEffectiveReplicas(Info->Type) == Info->Type.TotalPartCount(); } } void SendStopDiffs() { + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA18, "Send stop diffs"); TDeque<std::unique_ptr<TEvBlobStorage::TEvVPatchDiff>> events; for (ui32 vdiskIdx = 0; vdiskIdx < VDisks.size(); ++vdiskIdx) { if (!ErrorResponseFlags[vdiskIdx] && !EmptyResponseFlags[vdiskIdx] && ReceivedResponseFlags[vdiskIdx]) { @@ -388,6 +418,9 @@ public: OriginalId, PatchedId, VDisks[vdiskIdx], 0, Deadline, vdiskIdx); ev->SetForceEnd(); events.emplace_back(std::move(ev)); + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA19, "Send stop message", + (VDiskIdxInSubgroup, vdiskIdx), + (VDiskId, VDisks[vdiskIdx])); } } SendToQueues(events, false); @@ -454,7 +487,12 @@ public: for (const TPartPlacement &parity : parityPlacements) { ev->AddXorReceiver(VDisks[parity.VDiskIdxInSubgroup], parity.PartId); } - + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA20, "Send TEvVPatchDiff", + (VDiskIdxInSubgroup, idxInSubgroup), + (PatchedVDiskIdxInSubgroup, patchedIdxInSubgroup), + (DiffsForPart, diffsForPart.size()), + (ParityPlacements, parityPlacements.size()), + (WaitedXorDiffs, waitedXorDiffs)); events.push_back(std::move(ev)); } SendToQueues(events, false); @@ -485,12 +523,8 @@ public: } void StartMovedPatch() { - A_LOG_DEBUG_S("BPPA12", "StartMovedPatch" - << " ActorId# " << SelfId() - << " Group# " << Info->GroupID - << " OriginalBlob# " << OriginalId - << " PatchedBlob# " << PatchedId - << " Deadline# " << Deadline); + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA09, "Start Moved strategy", + (SendedStarts, SendedStarts)); Become(&TThis::MovedPatchState); ui32 subgroupIdx = 0; @@ -515,12 +549,7 @@ public: } void StartNaivePatch() { - A_LOG_DEBUG_S("BPPA11", "StartNaivePatch" - << " ActorId# " << SelfId() - << " Group# " << Info->GroupID - << " OriginalBlob# " << OriginalId - << " PatchedBlob# " << PatchedId - << " Deadline# " << Deadline); + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA07, "Start Naive strategy"); Become(&TThis::NaiveState); auto get = std::make_unique<TEvBlobStorage::TEvGet>(OriginalId, 0, OriginalId.BlobSize(), Deadline, NKikimrBlobStorage::AsyncRead); @@ -535,8 +564,12 @@ public: void StartFallback() { Mon->PatchesWithFallback->Inc(); if (WithMovingPatchRequestToStaticNode && UseVPatch) { + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA05, "Start Moved strategy from fallback"); StartMovedPatch(); } else { + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA06, "Start Naive strategy from fallback", + (WithMovingPatchRequestToStaticNode, WithMovingPatchRequestToStaticNode), + (UseVPatch, UseVPatch)); StartNaivePatch(); } } @@ -558,13 +591,8 @@ public: SendedStarts++; } - A_LOG_DEBUG_S("BPPA08", "StartVPatcn" - << " ActorId# " << SelfId() - << " Group# " << Info->GroupID - << " OriginalBlob# " << OriginalId - << " PatchedBlob# " << PatchedId - << " Deadline# " << Deadline - << " SendedStarts# " << SendedStarts); + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA08, "Start VPatch strategy", + (SendedStarts, SendedStarts)); SendToQueues(events, false); } @@ -609,6 +637,7 @@ public: } bool ContinueVPatchForMirror3dc() { + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA10, "Continue VPatch {mirror-3-dc}"); constexpr ui32 DCCount = 3; constexpr ui32 VDiskByDC = 3; ui32 countByDC[DCCount] = {0, 0, 0}; @@ -622,9 +651,14 @@ public: } if (countByDC[0] && countByDC[1] && countByDC[2]) { + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA11, "Found disks {mirror-3-dc} on each dc", + (DiskFromFirstDC, diskByDC[0][0].ToString()), + (DiskFromSecondDC, diskByDC[0][0].ToString()), + (DiskFromThirdDC, diskByDC[2][0].ToString())); SendDiffs({diskByDC[0][0], diskByDC[1][0], diskByDC[2][0]}); return true; } + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA12, "Didn't find disks {mirror-3-dc} on each dc"); ui32 x2Count = 0; for (ui32 dcIdx = 0; dcIdx < DCCount; ++dcIdx) { @@ -633,6 +667,7 @@ public: } } if (x2Count < 2) { + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA13, "Didn't find disks {mirror-3-dc}"); return false; } TStackVec<TPartPlacement, TypicalPartsInBlob> placements; @@ -643,17 +678,17 @@ public: } } SendDiffs(placements); + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA14, "Found disks {mirror-3-dc} x2 mode", + (FirstDiskFromFirstDC, placements[0]), + (SecondDiskFromFirstDC, placements[1]), + (FirstDiskFromSecondDC, placements[2]), + (SecondDiskFromSecondDC, placements[3])); return true; } bool ContinueVPatch() { - A_LOG_DEBUG_S("BPPA09", "ContinueVPatch" - << " ActorId# " << SelfId() - << " Group# " << Info->GroupID - << " OriginalBlob# " << OriginalId - << " PatchedBlob# " << PatchedId - << " FoundParts# " << ConvertFoundPartsToString() - << " Deadline# " << Deadline); + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA15, "Continue VPatch strategy", + (FoundParts, ConvertFoundPartsToString())); if (Info->Type.GetErasure() == TErasureType::ErasureMirror3dc) { return ContinueVPatchForMirror3dc(); @@ -683,10 +718,11 @@ public: TStackVec<ui32, TypicalHandoffCount> choosenHandoffForParts(handoffParts.size()); if (handoffParts.size()) { bool find = FindHandoffs(handoffForParts, handoffParts, &choosenHandoffForParts); - Y_VERIFY_DEBUG_S(find, "handoffParts# " << FormatList(handoffParts) - << " FoundParts# " << ConvertFoundPartsToString() - << " choosenHandoffForParts# " << FormatList(choosenHandoffForParts) - << " inPrimary# " << FormatList(inPrimary)); + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA16, "Find handoff parts", + (HandoffParts, FormatList(handoffParts)), + (FoundParts, ConvertFoundPartsToString()), + (choosenHandoffForParts, FormatList(choosenHandoffForParts)), + (IsPrimary, FormatList(inPrimary))); if (!find) { Mon->VPatchContinueFailed->Inc(); return false; @@ -698,12 +734,7 @@ public: } void StopVPatch() { - A_LOG_DEBUG_S("BPPA10", "StopVPatch" - << " ActorId# " << SelfId() - << " Group# " << Info->GroupID - << " OriginalBlob# " << OriginalId - << " PatchedBlob# " << PatchedId - << " Deadline# " << Deadline); + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA17, "Stop VPatch strategy"); SendStopDiffs(); ReceivedResponseFlags.assign(VDisks.size(), false); } @@ -739,14 +770,8 @@ public: } void Bootstrap() { - A_LOG_INFO_S("BPPA01", "bootstrap" - << " ActorId# " << SelfId() - << " Group# " << Info->GroupID - << " DiffCount# " << DiffCount - << " OriginalBlob# " << OriginalId - << " PatchedBlob# " << PatchedId - << " Deadline# " << Deadline - << " RestartCounter# " << RestartCounter); + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA01, "Actor bootstrapped"); + TLogoBlobID truePatchedBlobId = PatchedId; bool result = true; if (Info->Type.ErasureFamily() == TErasureType::ErasureParityBlock) { @@ -768,13 +793,20 @@ public: return; } - bool isAllowedErasure = Info->Type.ErasureFamily() == TErasureType::ErasureParityBlock + IsGoodPatchedBlobId = result; + IsAllowedErasure = Info->Type.ErasureFamily() == TErasureType::ErasureParityBlock || Info->Type.GetErasure() == TErasureType::ErasureNone || Info->Type.GetErasure() == TErasureType::ErasureMirror3 || Info->Type.GetErasure() == TErasureType::ErasureMirror3dc; - if (result && isAllowedErasure && UseVPatch && OriginalGroupId == Info->GroupID) { + if (IsGoodPatchedBlobId && IsAllowedErasure && UseVPatch && OriginalGroupId == Info->GroupID) { + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA03, "Start VPatch strategy from bootstrap"); StartVPatch(); } else { + PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA04, "Start Fallback strategy from bootstrap", + (IsGoodPatchedBlobId, IsGoodPatchedBlobId), + (IsAllowedErasure, IsAllowedErasure), + (UseVPatch, UseVPatch), + (IsSameGroup, OriginalGroupId == Info->GroupID)); StartFallback(); } } |