diff options
| author | ilnaz <[email protected]> | 2023-11-29 14:14:36 +0300 | 
|---|---|---|
| committer | ilnaz <[email protected]> | 2023-11-29 16:35:35 +0300 | 
| commit | 07d2ecc7f0770d65dc3d853051d526ae27f72976 (patch) | |
| tree | 6c1bba8e2459f2b4b7d7a6ab0b0ed77768f84280 | |
| parent | 867eee3708c11202a5137248004d04060b37b4da (diff) | |
Return verbose reason KIKIMR-20318
| -rw-r--r-- | ydb/core/cms/erasure_checkers.cpp | 27 | ||||
| -rw-r--r-- | ydb/core/cms/erasure_checkers.h | 7 | 
2 files changed, 19 insertions, 15 deletions
diff --git a/ydb/core/cms/erasure_checkers.cpp b/ydb/core/cms/erasure_checkers.cpp index 3e948898951..8d1b257ec1a 100644 --- a/ydb/core/cms/erasure_checkers.cpp +++ b/ydb/core/cms/erasure_checkers.cpp @@ -9,9 +9,9 @@ bool TErasureCounterBase::IsDown(const TVDiskInfo &vdisk, TClusterInfoPtr info,      // Check we received info for PDisk.      if (!pdisk.NodeId) { -        Down.insert(vdisk.VDiskId);          error.Code = TStatus::DISALLOW_TEMP; -        error.Reason = TStringBuilder() << "Missing info for " << pdisk.PrettyItemName(); +        error.Reason = TStringBuilder() << vdisk.PrettyItemName() << " has missing info for " << pdisk.PrettyItemName(); +        Down.emplace(vdisk.VDiskId, error.Reason);          return false;      } @@ -28,9 +28,9 @@ bool TErasureCounterBase::IsLocked(const TVDiskInfo &vdisk, TClusterInfoPtr info      // Check we received info for VDisk.      if (!vdisk.NodeId || !vdisk.PDiskId) { -        Down.insert(vdisk.VDiskId);          error.Code = TStatus::DISALLOW_TEMP; -        error.Reason = TStringBuilder() << "Missing info for " << vdisk.PrettyItemName(); +        error.Reason = TStringBuilder() << vdisk.PrettyItemName() << " has missing info"; +        Down.emplace(vdisk.VDiskId, error.Reason);          return false;      } @@ -43,7 +43,7 @@ bool TErasureCounterBase::GroupAlreadyHasLockedDisks() const {      return HasAlreadyLockedDisks;  } -static TString DumpVDisksInfo(const TSet<TVDiskID>& vdisks, TClusterInfoPtr info) { +static TString DumpVDisksInfo(const THashMap<TVDiskID, TString>& vdisks, TClusterInfoPtr info) {      if (vdisks.empty()) {          return "<empty>";      } @@ -51,12 +51,17 @@ static TString DumpVDisksInfo(const TSet<TVDiskID>& vdisks, TClusterInfoPtr info      TStringBuilder dump;      bool comma = false; -    for (const auto& vdisk : vdisks) { +    for (const auto& [vdisk, reason] : vdisks) {          if (comma) {              dump << ", ";          } -        dump << info->VDisk(vdisk).PrettyItemName();          comma = true; + +        if (reason) { +            dump << reason; +        } else { +            dump << info->VDisk(vdisk).PrettyItemName(); +        }      }      return dump; @@ -91,7 +96,7 @@ bool TErasureCounterBase::CountVDisk(const TVDiskInfo &vdisk, TClusterInfoPtr in      // Check locks.      TErrorInfo err;      if (IsLocked(vdisk, info, retryTime, duration, err)) { -        Locked.insert(vdisk.VDiskId); +        Locked.emplace(vdisk.VDiskId, err.Reason);          error.Code = err.Code;          error.Reason = TStringBuilder() << "Issue in affected group with id '" << GroupId << "'"              << ": " << err.Reason; @@ -101,7 +106,7 @@ bool TErasureCounterBase::CountVDisk(const TVDiskInfo &vdisk, TClusterInfoPtr in      // Check if disk is down.      if (IsDown(vdisk, info, retryTime, err)) { -        Down.insert(vdisk.VDiskId); +        Down.emplace(vdisk.VDiskId, err.Reason);          error.Code = err.Code;          error.Reason = TStringBuilder() << "Issue in affected group with id '" << GroupId << "'"              << ": " << err.Reason; @@ -122,7 +127,7 @@ void TErasureCounterBase::CountGroupState(TClusterInfoPtr info, TDuration retryT          HasAlreadyLockedDisks = true;      } -    Locked.insert(VDisk.VDiskId); +    Locked.emplace(VDisk.VDiskId, TStringBuilder() << VDisk.PrettyItemName() << " is locked by this request");  }  bool TDefaultErasureCounter::CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo &error, @@ -159,8 +164,6 @@ bool TDefaultErasureCounter::CheckForKeepAvailability(TClusterInfoPtr info, TErr  bool TMirror3dcCounter::CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo &error,          TInstant &defaultDeadline, bool allowPartial) const  { -    Y_UNUSED(info); -      if (HasAlreadyLockedDisks && allowPartial) {          error.Code = TStatus::DISALLOW_TEMP;          error.Reason = "You cannot get two or more disks from the same group at the same time" diff --git a/ydb/core/cms/erasure_checkers.h b/ydb/core/cms/erasure_checkers.h index c02fd1da8bf..517e473e3ce 100644 --- a/ydb/core/cms/erasure_checkers.h +++ b/ydb/core/cms/erasure_checkers.h @@ -6,7 +6,7 @@  #include <ydb/core/erasure/erasure.h>  #include <ydb/core/protos/cms.pb.h> -#include <util/generic/set.h> +#include <util/generic/hash.h>  namespace NKikimr::NCms { @@ -27,8 +27,9 @@ public:  class TErasureCounterBase: public IErasureCounter {  protected: -    TSet<TVDiskID> Down; -    TSet<TVDiskID> Locked; +    // id & reason +    THashMap<TVDiskID, TString> Down; +    THashMap<TVDiskID, TString> Locked;      const TVDiskInfo& VDisk;      const ui32 GroupId;      bool HasAlreadyLockedDisks;  | 
