aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorserg-belyakov <serg-belyakov@yandex-team.com>2023-05-16 10:19:12 +0300
committerserg-belyakov <serg-belyakov@yandex-team.com>2023-05-16 10:19:12 +0300
commitb0758245d516b1c90e1af1aa867d7886d46693a6 (patch)
tree41c5c49ca19b00aeccc773185316f3b2e758e9f4
parent70134716182107a07f3f5d70e0dcc1bf63964859 (diff)
downloadydb-b0758245d516b1c90e1af1aa867d7886d46693a6.tar.gz
Block attempts to sanitize groups when it is one step from degraded,
Reject when some disks are not fully operational Add error reason to reassigner actor Add operation log to html page Add TOperationLog for GroupLayoutSanitizer to SelfHeal actor
-rw-r--r--ydb/core/mind/bscontroller/config_fit_groups.cpp27
-rw-r--r--ydb/core/mind/bscontroller/self_heal.cpp2
2 files changed, 15 insertions, 14 deletions
diff --git a/ydb/core/mind/bscontroller/config_fit_groups.cpp b/ydb/core/mind/bscontroller/config_fit_groups.cpp
index 53c34106aec..a70b83d0662 100644
--- a/ydb/core/mind/bscontroller/config_fit_groups.cpp
+++ b/ydb/core/mind/bscontroller/config_fit_groups.cpp
@@ -189,7 +189,16 @@ namespace NKikimr {
////////////////////////////////////////////////////////////////////////////////////////////////////////
// scan through all VSlots and find matching PDisks
////////////////////////////////////////////////////////////////////////////////////////////////////////
+ // create topology for group
+ auto& topology = *groupInfo->Topology;
+ // fill in vector of failed disks (that are not fully operational)
+ TBlobStorageGroupInfo::TGroupVDisks failed(&topology);
+ auto& checker = *topology.QuorumChecker;
for (const TVSlotInfo *vslot : groupInfo->VDisksInGroup) {
+ if (!vslot->IsOperational()) {
+ failed |= {&topology, vslot->GetShortVDiskId()};
+ }
+
const auto it = State.ExplicitReconfigureMap.find(vslot->VSlotId);
bool replace = it != State.ExplicitReconfigureMap.end();
const TPDiskId targetPDiskId = replace ? it->second : TPDiskId();
@@ -238,9 +247,11 @@ namespace NKikimr {
}
if (sanitizingRequest) {
+ if (checker.OneStepFromDegradedOrWorse(failed)) {
+ throw TExFitGroupError() << "Sanitizing requst was blocked, group is one step from DEGRADED or worse";
+ }
if (groupInfo->VDisksInGroup.empty()) {
- throw TExFitGroupError() << "Group has been decommitted and cannot be sanitized"
- << " GroupId# " << groupId;
+ throw TExFitGroupError() << "Group has been decommitted and cannot be sanitized";
}
getGroup();
}
@@ -342,17 +353,7 @@ namespace NKikimr {
if (replacedSlots) {
if (!IgnoreGroupFailModelChecks) {
- // process only groups with changed content; create topology for group
- auto& topology = *groupInfo->Topology;
- // fill in vector of failed disks (that are not fully operational)
- TBlobStorageGroupInfo::TGroupVDisks failed(&topology);
- for (const TVSlotInfo *slot : groupInfo->VDisksInGroup) {
- if (!slot->IsOperational()) {
- failed |= {&topology, slot->GetShortVDiskId()};
- }
- }
- // check the failure model
- auto& checker = *topology.QuorumChecker;
+ // process only groups with changed content; check the failure model
if (!checker.CheckFailModelForGroup(failed)) {
throw TExMayLoseData(groupId);
} else if (!IgnoreDegradedGroupsChecks && checker.IsDegraded(failed)) {
diff --git a/ydb/core/mind/bscontroller/self_heal.cpp b/ydb/core/mind/bscontroller/self_heal.cpp
index dd2a3aa6c29..dbdd4966fe1 100644
--- a/ydb/core/mind/bscontroller/self_heal.cpp
+++ b/ydb/core/mind/bscontroller/self_heal.cpp
@@ -678,7 +678,7 @@ namespace NKikimr::NBsController {
}
}
}
- out << "<div/>";
+ out << "</div>";
}
}