summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Rutkovsky <[email protected]>2024-09-06 13:14:01 +0300
committerGitHub <[email protected]>2024-09-06 13:14:01 +0300
commit7770159299115ebf8b2e6883c1c2f62b652ea3a4 (patch)
tree0bc1f09671281e62a54b956e6f68785100bf69dc
parent0b897b9860e012c9e6209e9d3ecd55c1d84876bd (diff)
Allow SelfHeal operation while in DEGRADED state (#8734)
-rw-r--r--ydb/core/mind/bscontroller/bsc.cpp7
-rw-r--r--ydb/core/mind/bscontroller/impl.h1
-rw-r--r--ydb/core/mind/bscontroller/self_heal.cpp29
-rw-r--r--ydb/core/protos/config.proto9
4 files changed, 38 insertions, 8 deletions
diff --git a/ydb/core/mind/bscontroller/bsc.cpp b/ydb/core/mind/bscontroller/bsc.cpp
index d9ae407390b..be5c664ea07 100644
--- a/ydb/core/mind/bscontroller/bsc.cpp
+++ b/ydb/core/mind/bscontroller/bsc.cpp
@@ -270,6 +270,13 @@ void TBlobStorageController::Handle(TEvInterconnect::TEvNodesInfo::TPtr &ev) {
const bool initial = !HostRecords;
HostRecords = std::make_shared<THostRecordMap::element_type>(ev->Get());
if (initial) {
+ if (auto *appData = AppData()) {
+ if (appData->Icb) {
+ EnableSelfHealWithDegraded = std::make_shared<TControlWrapper>(0, 0, 1);
+ appData->Icb->RegisterSharedControl(*EnableSelfHealWithDegraded,
+ "BlobStorageControllerControls.EnableSelfHealWithDegraded");
+ }
+ }
SelfHealId = Register(CreateSelfHealActor());
PushStaticGroupsToSelfHeal();
if (StorageConfigObtained) {
diff --git a/ydb/core/mind/bscontroller/impl.h b/ydb/core/mind/bscontroller/impl.h
index f8a3c9a3670..d6cd2b30f2f 100644
--- a/ydb/core/mind/bscontroller/impl.h
+++ b/ydb/core/mind/bscontroller/impl.h
@@ -1516,6 +1516,7 @@ private:
bool AllowMultipleRealmsOccupation = true;
bool StorageConfigObtained = false;
bool Loaded = false;
+ std::shared_ptr<TControlWrapper> EnableSelfHealWithDegraded;
std::set<std::tuple<TGroupId, TNodeId>> GroupToNode;
diff --git a/ydb/core/mind/bscontroller/self_heal.cpp b/ydb/core/mind/bscontroller/self_heal.cpp
index 1421ba62dcd..d2ff2877b55 100644
--- a/ydb/core/mind/bscontroller/self_heal.cpp
+++ b/ydb/core/mind/bscontroller/self_heal.cpp
@@ -43,6 +43,7 @@ namespace NKikimr::NBsController {
std::shared_ptr<TBlobStorageGroupInfo::TTopology> Topology;
TBlobStorageGroupInfo::TGroupVDisks FailedGroupDisks;
const bool IsSelfHealReasonDecommit;
+ const bool IgnoreDegradedGroupsChecks;
const bool DonorMode;
THashSet<TVDiskID> PendingVDisks;
THashMap<TActorId, TVDiskID> ActorToDiskMap;
@@ -51,7 +52,7 @@ namespace NKikimr::NBsController {
public:
TReassignerActor(TActorId controllerId, TGroupId groupId, TEvControllerUpdateSelfHealInfo::TGroupContent group,
std::optional<TVDiskID> vdiskToReplace, std::shared_ptr<TBlobStorageGroupInfo::TTopology> topology,
- bool isSelfHealReasonDecommit, bool donorMode)
+ bool isSelfHealReasonDecommit, bool ignoreDegradedGroupsChecks, bool donorMode)
: ControllerId(controllerId)
, GroupId(groupId)
, Group(std::move(group))
@@ -59,6 +60,7 @@ namespace NKikimr::NBsController {
, Topology(std::move(topology))
, FailedGroupDisks(Topology.get())
, IsSelfHealReasonDecommit(isSelfHealReasonDecommit)
+ , IgnoreDegradedGroupsChecks(ignoreDegradedGroupsChecks)
, DonorMode(donorMode)
{}
@@ -166,6 +168,9 @@ namespace NKikimr::NBsController {
request->SetIgnoreGroupReserve(true);
request->SetSettleOnlyOnOperationalDisks(true);
request->SetIsSelfHealReasonDecommit(IsSelfHealReasonDecommit);
+ if (IgnoreDegradedGroupsChecks) {
+ request->SetIgnoreDegradedGroupsChecks(IgnoreDegradedGroupsChecks);
+ }
request->SetAllowUnusableDisks(true);
if (VDiskToReplace) {
ev->SelfHeal = true;
@@ -278,6 +283,7 @@ namespace NKikimr::NBsController {
bool AllowMultipleRealmsOccupation;
bool DonorMode;
THostRecordMap HostRecords;
+ std::shared_ptr<TControlWrapper> EnableSelfHealWithDegraded;
using TTopologyDescr = std::tuple<TBlobStorageGroupType::EErasureSpecies, ui32, ui32, ui32>;
THashMap<TTopologyDescr, std::shared_ptr<TBlobStorageGroupInfo::TTopology>> Topologies;
@@ -289,13 +295,15 @@ namespace NKikimr::NBsController {
public:
TSelfHealActor(ui64 tabletId, std::shared_ptr<std::atomic_uint64_t> unreassignableGroups, THostRecordMap hostRecords,
- bool groupLayoutSanitizerEnabled, bool allowMultipleRealmsOccupation, bool donorMode)
+ bool groupLayoutSanitizerEnabled, bool allowMultipleRealmsOccupation, bool donorMode,
+ std::shared_ptr<TControlWrapper> enableSelfHealWithDegraded)
: TabletId(tabletId)
, UnreassignableGroups(std::move(unreassignableGroups))
, GroupLayoutSanitizerEnabled(groupLayoutSanitizerEnabled)
, AllowMultipleRealmsOccupation(allowMultipleRealmsOccupation)
, DonorMode(donorMode)
, HostRecords(std::move(hostRecords))
+ , EnableSelfHealWithDegraded(std::move(enableSelfHealWithDegraded))
{}
void Bootstrap(const TActorId& parentId) {
@@ -427,9 +435,11 @@ namespace NKikimr::NBsController {
// check if it is possible to move anything out
bool isSelfHealReasonDecommit;
- if (const auto v = FindVDiskToReplace(group.Content, now, group.Topology.get(), &isSelfHealReasonDecommit)) {
+ bool ignoreDegradedGroupsChecks;
+ if (const auto v = FindVDiskToReplace(group.Content, now, group.Topology.get(), &isSelfHealReasonDecommit,
+ &ignoreDegradedGroupsChecks)) {
group.ReassignerActorId = Register(new TReassignerActor(ControllerId, group.GroupId, group.Content,
- *v, group.Topology, isSelfHealReasonDecommit, DonorMode));
+ *v, group.Topology, isSelfHealReasonDecommit, ignoreDegradedGroupsChecks, DonorMode));
} else {
++counter; // this group can't be reassigned right now
@@ -484,7 +494,8 @@ namespace NKikimr::NBsController {
ADD_RECORD_WITH_TIMESTAMP_TO_OPERATION_LOG(GroupLayoutSanitizerOperationLog,
"Start sanitizing GroupId# " << group.GroupId << " GroupGeneration# " << group.Content.Generation);
group.ReassignerActorId = Register(new TReassignerActor(ControllerId, group.GroupId, group.Content,
- std::nullopt, group.Topology, false /*isSelfHealReasonDecommit*/, DonorMode));
+ std::nullopt, group.Topology, false /*isSelfHealReasonDecommit*/,
+ false /*ignoreDegradedGroupsChecks*/, DonorMode));
}
}
}
@@ -534,7 +545,8 @@ namespace NKikimr::NBsController {
}
std::optional<TVDiskID> FindVDiskToReplace(const TEvControllerUpdateSelfHealInfo::TGroupContent& content,
- TMonotonic now, TBlobStorageGroupInfo::TTopology *topology, bool *isSelfHealReasonDecommit) {
+ TMonotonic now, TBlobStorageGroupInfo::TTopology *topology, bool *isSelfHealReasonDecommit,
+ bool *ignoreDegradedGroupsChecks) {
// main idea of selfhealing is step-by-step healing of bad group; we can allow healing of group with more
// than one disk missing, but we should not move next faulty disk until previous one is replicated, at least
// partially (meaning only phantoms left)
@@ -553,7 +565,7 @@ namespace NKikimr::NBsController {
}
[[fallthrough]];
case NKikimrBlobStorage::EVDiskStatus::INIT_PENDING:
- return std::nullopt; // don't touch group with replicating disks
+ return std::nullopt; // don't touch group with replicating or starting disks
default:
break;
@@ -579,6 +591,7 @@ namespace NKikimr::NBsController {
continue; // this group will become degraded when applying self-heal logic, skip disk
}
*isSelfHealReasonDecommit = vdisk.IsSelfHealReasonDecommit;
+ *ignoreDegradedGroupsChecks = checker.IsDegraded(failedByReadiness) && *EnableSelfHealWithDegraded;
return vdiskId;
}
}
@@ -886,7 +899,7 @@ namespace NKikimr::NBsController {
IActor *TBlobStorageController::CreateSelfHealActor() {
Y_ABORT_UNLESS(HostRecords);
return new TSelfHealActor(TabletID(), SelfHealUnreassignableGroups, HostRecords, GroupLayoutSanitizerEnabled,
- AllowMultipleRealmsOccupation, DonorMode);
+ AllowMultipleRealmsOccupation, DonorMode, EnableSelfHealWithDegraded);
}
void TBlobStorageController::InitializeSelfHealState() {
diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto
index 5dac682fbd8..d6404c0bc64 100644
--- a/ydb/core/protos/config.proto
+++ b/ydb/core/protos/config.proto
@@ -1343,6 +1343,14 @@ message TImmediateControlsConfig {
DefaultValue: 200 }];
}
+ message TBlobStorageControllerControls {
+ optional uint64 EnableSelfHealWithDegraded = 1 [(ControlOptions) = {
+ Description: "Should SelfHeal automatically process groups that are in DEGRADED status (one step from nonworking)",
+ MinValue: 0,
+ MaxValue: 1,
+ DefaultValue: 0 }];
+ }
+
optional TDataShardControls DataShardControls = 1;
optional TTxLimitControls TxLimitControls = 2;
optional TCoordinatorControls CoordinatorControls = 3;
@@ -1353,6 +1361,7 @@ message TImmediateControlsConfig {
optional TTabletControls TabletControls = 8;
optional TDSProxyControls DSProxyControls = 9;
optional TPDiskControls PDiskControls = 10;
+ optional TBlobStorageControllerControls BlobStorageControllerControls = 11;
};
message TMeteringConfig {