summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvgenik2 <[email protected]>2025-08-05 11:47:42 +0300
committerGitHub <[email protected]>2025-08-05 11:47:42 +0300
commit072968a77b476e4dc08a19c939f8642540468036 (patch)
tree9af8eac5ff8d75113163d921a67127b768c42d80
parente4501a8f37bdc5826001547e68daac136a9fe55c (diff)
Fix reassign node logic in self-heal (#22310)meta-1.0.2
-rw-r--r--ydb/core/blobstorage/nodewarden/distconf_invoke_state_storage.cpp66
-rw-r--r--ydb/core/blobstorage/nodewarden/distconf_selfheal.cpp79
-rw-r--r--ydb/core/blobstorage/nodewarden/distconf_selfheal.h31
-rw-r--r--ydb/core/blobstorage/nodewarden/distconf_statestorage_config_generator.cpp31
-rw-r--r--ydb/core/blobstorage/nodewarden/distconf_statestorage_config_generator.h3
-rw-r--r--ydb/core/protos/blobstorage_distributed_config.proto1
-rw-r--r--ydb/tests/functional/config/test_distconf_sentinel_node_status.py22
7 files changed, 187 insertions, 46 deletions
diff --git a/ydb/core/blobstorage/nodewarden/distconf_invoke_state_storage.cpp b/ydb/core/blobstorage/nodewarden/distconf_invoke_state_storage.cpp
index 34a61988d12..25db25107c8 100644
--- a/ydb/core/blobstorage/nodewarden/distconf_invoke_state_storage.cpp
+++ b/ydb/core/blobstorage/nodewarden/distconf_invoke_state_storage.cpp
@@ -103,7 +103,7 @@ namespace NKikimr::NStorage {
RunCommonChecks();
NKikimrBlobStorage::TStateStorageConfig targetConfig;
if (!GetRecommendedStateStorageConfig(&targetConfig) && !forceHeal) {
- throw TExError() << " Recommended configuration has faulty nodes and can not be applyed";
+ throw TExError() << "Recommended configuration has faulty nodes and can not be applyed";
}
NKikimrBlobStorage::TStateStorageConfig currentConfig;
@@ -118,10 +118,11 @@ namespace NKikimr::NStorage {
ONE_NODE,
FULL
};
- std::unordered_map<ui32, ui32> nodesToReplace;
+ std::unordered_map<ui32, std::tuple<ui32, ui32, ui32>> nodesToReplace;
auto needReconfig = [&](auto clearFunc, auto ssMutableFunc, auto buildFunc) {
auto copyCurrentConfig = currentConfig;
auto ss = *(copyCurrentConfig.*ssMutableFunc)();
+ auto targetSS = *(targetConfig.*ssMutableFunc)();
if (ss.RingGroupsSize() == 0) {
ss.MutableRing()->ClearRingGroupActorIdOffset();
} else {
@@ -132,9 +133,11 @@ namespace NKikimr::NStorage {
TIntrusivePtr<TStateStorageInfo> newSSInfo;
TIntrusivePtr<TStateStorageInfo> oldSSInfo;
oldSSInfo = (*buildFunc)(ss);
- newSSInfo = (*buildFunc)(*(targetConfig.*ssMutableFunc)());
+ newSSInfo = (*buildFunc)(targetSS);
if (oldSSInfo->RingGroups == newSSInfo->RingGroups) {
(targetConfig.*clearFunc)();
+ STLOG(PRI_DEBUG, BS_NODE, NW104, "needReconfig clear config"
+ , (CurrentConfig, ss), (TargetConfig, targetSS), (oldSSInfo, oldSSInfo->ToString()), (newSSInfo, newSSInfo->ToString()));
return ReconfigType::NONE;
}
@@ -193,9 +196,9 @@ namespace NKikimr::NStorage {
}
}
}
- for (ui32 j : xrange(oldRg.Rings.size())) {
- auto& oldRing = oldRg.Rings[j];
- auto& newRing = newRg.Rings[j];
+ for (ui32 ringIdx : xrange(oldRg.Rings.size())) {
+ auto& oldRing = oldRg.Rings[ringIdx];
+ auto& newRing = newRg.Rings[ringIdx];
if (oldRing == newRing) {
continue;
}
@@ -215,15 +218,29 @@ namespace NKikimr::NStorage {
if (oldRep == newRep) {
continue;
}
- if (auto it = nodesToReplace.find(oldRep); it != nodesToReplace.end() && it->second != newRep) {
+ std::tuple<ui32, ui32, ui32> placement{ ringGroupIdx, ringIdx, newRep };
+ if (auto it = nodesToReplace.find(oldRep); it != nodesToReplace.end() && it->second != placement) {
return ReconfigType::FULL;
}
- nodesToReplace[oldRep] = newRep;
+ nodesToReplace[oldRep] = placement;
}
}
}
if (nodesToReplace.size() == 1) {
- return ReconfigType::ONE_NODE;
+ auto placement = nodesToReplace.begin();
+ auto oldRg = oldSSInfo->RingGroups[std::get<0>(placement->second)];
+ std::unordered_set<ui32> badRings;
+ for (ui32 ringIdx : xrange(oldRg.Rings.size())) {
+ auto& oldRing = oldRg.Rings[ringIdx];
+ for (auto& rep : oldRing.Replicas) {
+ auto oldRep = rep.NodeId();
+ if (Self->SelfHealNodesState[oldRep]) {
+ badRings.insert(ringIdx);
+ }
+ }
+ }
+ ui32 majority = oldRg.NToSelect / 2 + 1;
+ return badRings.size() <= oldRg.NToSelect - majority ? ReconfigType::ONE_NODE : ReconfigType::FULL;
}
return nodesToReplace.empty() ? ReconfigType::NONE : ReconfigType::FULL;
};
@@ -237,16 +254,14 @@ namespace NKikimr::NStorage {
throw TExError() << "Current configuration is recommended. Nothing to self-heal.";
}
if (nodesToReplace.size() == 1 && needReconfigSS != ReconfigType::FULL && needReconfigSSB != ReconfigType::FULL && needReconfigSB != ReconfigType::FULL) {
- STLOG(PRI_DEBUG, BS_NODE, NW100, "Need to reconfig one node " << nodesToReplace.begin()->first << " to " << nodesToReplace.begin()->second
+ STLOG(PRI_DEBUG, BS_NODE, NW100, "Need to reconfig one node " << nodesToReplace.begin()->first << " to " << std::get<2>(nodesToReplace.begin()->second)
, (CurrentConfig, currentConfig), (TargetConfig, targetConfig));
-
- TQuery::TReassignStateStorageNode cmd;
- cmd.SetFrom(nodesToReplace.begin()->first);
- cmd.SetTo(nodesToReplace.begin()->second);
- cmd.SetStateStorage(needReconfigSS == ReconfigType::ONE_NODE);
- cmd.SetStateStorageBoard(needReconfigSSB == ReconfigType::ONE_NODE);
- cmd.SetSchemeBoard(needReconfigSB == ReconfigType::ONE_NODE);
- ReassignStateStorageNode(cmd);
+ auto *op = std::get_if<TInvokeExternalOperation>(&Query);
+ Y_ABORT_UNLESS(op);
+ Self->StateStorageSelfHealActor = Register(new TStateStorageReassignNodeSelfhealActor(op->Sender, op->Cookie
+ , TDuration::Seconds(waitForConfigStep), nodesToReplace.begin()->first, std::get<2>(nodesToReplace.begin()->second)
+ , needReconfigSS == ReconfigType::ONE_NODE, needReconfigSSB == ReconfigType::ONE_NODE, needReconfigSB == ReconfigType::ONE_NODE));
+ Finish(TResult::OK, std::nullopt);
return;
}
@@ -256,8 +271,11 @@ namespace NKikimr::NStorage {
auto *op = std::get_if<TInvokeExternalOperation>(&Query);
Y_ABORT_UNLESS(op);
+ ui32 pilesCnt = Self->BridgePileNameMap.size();
+ if (pilesCnt == 0)
+ pilesCnt = 1;
Self->StateStorageSelfHealActor = Register(new TStateStorageSelfhealActor(op->Sender, op->Cookie,
- TDuration::Seconds(waitForConfigStep), std::move(currentConfig), std::move(targetConfig)));
+ TDuration::Seconds(waitForConfigStep), std::move(currentConfig), std::move(targetConfig), pilesCnt));
Finish(TResult::OK, std::nullopt);
}
@@ -392,6 +410,8 @@ namespace NKikimr::NStorage {
NKikimrBlobStorage::TStorageConfig config = *Self->StorageConfig;
+ STLOG(PRI_DEBUG, BS_NODE, NW67, "TInvokeRequestHandlerActor::ReassignStateStorageNode",
+ (config, config));
auto process = [&](const char *name, auto hasFunc, auto mutableFunc) {
if (!(config.*hasFunc)()) {
throw TExError() << name << " configuration is not filled in";
@@ -414,7 +434,10 @@ namespace NKikimr::NStorage {
throw TExError() << name << " ambiguous From node";
} else {
found = true;
- ring->MutableNode()->Set(i, cmd.GetTo());
+ if (!cmd.GetDisableRing()) {
+ ring->MutableNode()->Set(i, cmd.GetTo());
+ }
+ ring->SetIsDisabled(cmd.GetDisableRing());
}
}
};
@@ -452,7 +475,8 @@ namespace NKikimr::NStorage {
F(StateStorageBoard)
F(SchemeBoard)
#undef F
-
+ STLOG(PRI_DEBUG, BS_NODE, NW67, "TInvokeRequestHandlerActor::ReassignStateStorageNode new config ",
+ (config, config));
StartProposition(&config);
}
diff --git a/ydb/core/blobstorage/nodewarden/distconf_selfheal.cpp b/ydb/core/blobstorage/nodewarden/distconf_selfheal.cpp
index 97dd23d27fb..482234d8e94 100644
--- a/ydb/core/blobstorage/nodewarden/distconf_selfheal.cpp
+++ b/ydb/core/blobstorage/nodewarden/distconf_selfheal.cpp
@@ -6,13 +6,14 @@ namespace NKikimr::NStorage {
static constexpr TDuration MaxWaitForConfigStep = TDuration::Minutes(10);
TStateStorageSelfhealActor::TStateStorageSelfhealActor(TActorId sender, ui64 cookie, TDuration waitForConfigStep
- , NKikimrBlobStorage::TStateStorageConfig&& currentConfig, NKikimrBlobStorage::TStateStorageConfig&& targetConfig)
+ , NKikimrBlobStorage::TStateStorageConfig&& currentConfig, NKikimrBlobStorage::TStateStorageConfig&& targetConfig, ui32 pilesCount)
: WaitForConfigStep(waitForConfigStep > TDuration::Seconds(0) && waitForConfigStep < MaxWaitForConfigStep ? waitForConfigStep : DefaultWaitForConfigStep)
, StateStorageReconfigurationStep(NONE)
, Sender(sender)
, Cookie(cookie)
, CurrentConfig(currentConfig)
, TargetConfig(targetConfig)
+ , PilesCount(pilesCount)
{}
void TStateStorageSelfhealActor::RequestChangeStateStorage() {
@@ -28,7 +29,7 @@ namespace NKikimr::NStorage {
}
auto *ringGroup = cfg->AddRingGroups();
ringGroup->CopyFrom(rg);
- ringGroup->SetWriteOnly(StateStorageReconfigurationStep == MAKE_PREVIOUS_GROUP_WRITEONLY);
+ ringGroup->SetWriteOnly(StateStorageReconfigurationStep == MAKE_PREVIOUS_GROUP_WRITEONLY || i > PilesCount);
}
} else {
auto *ringGroup = cfg->AddRingGroups();
@@ -137,4 +138,78 @@ namespace NKikimr::NStorage {
hFunc(NStorage::TEvNodeConfigInvokeOnRootResult, HandleResult);
)
}
+
+ TStateStorageReassignNodeSelfhealActor::TStateStorageReassignNodeSelfhealActor(TActorId sender, ui64 cookie, TDuration waitForConfigStep
+ , ui32 nodeFrom, ui32 nodeTo, bool needReconfigSS, bool needReconfigSSB, bool needReconfigSB)
+ : WaitForConfigStep(waitForConfigStep > TDuration::Seconds(0) && waitForConfigStep < MaxWaitForConfigStep ? waitForConfigStep : DefaultWaitForConfigStep)
+ , Sender(sender)
+ , Cookie(cookie)
+ , NodeFrom(nodeFrom)
+ , NodeTo(nodeTo)
+ , NeedReconfigSS(needReconfigSS)
+ , NeedReconfigSSB(needReconfigSSB)
+ , NeedReconfigSB(needReconfigSB)
+ {}
+
+ void TStateStorageReassignNodeSelfhealActor::Bootstrap(TActorId /*parentId*/) {
+ RequestChangeStateStorage(true);
+ Schedule(WaitForConfigStep, new TEvents::TEvWakeup());
+ Become(&TThis::StateFunc);
+ }
+
+ void TStateStorageReassignNodeSelfhealActor::RequestChangeStateStorage(bool disable) {
+ auto request = std::make_unique<TEvNodeConfigInvokeOnRoot>();
+ auto *cmd = request->Record.MutableReassignStateStorageNode();
+ cmd->SetFrom(NodeFrom);
+ cmd->SetTo(NodeTo);
+ cmd->SetStateStorage(NeedReconfigSS);
+ cmd->SetStateStorageBoard(NeedReconfigSSB);
+ cmd->SetSchemeBoard(NeedReconfigSB);
+ cmd->SetDisableRing(disable);
+ AllowNextStep = false;
+ Send(MakeBlobStorageNodeWardenID(SelfId().NodeId()), request.release());
+ STLOG(PRI_ERROR, BS_NODE, NW72, "StateStorageReassignNodeSelfhealActor::RequestChangeStateStorage", (cmd, cmd));
+ }
+
+ void TStateStorageReassignNodeSelfhealActor::Finish(TResult::EStatus result, const TString& errorReason) {
+ auto ev = std::make_unique<TEvNodeConfigInvokeOnRootResult>();
+ auto *record = &ev->Record;
+ record->SetStatus(result);
+ if (!errorReason.empty()) {
+ record->SetErrorReason(errorReason);
+ }
+ TActivationContext::Send(new IEventHandle(Sender, SelfId(), ev.release(), 0, Cookie));
+ PassAway();
+ }
+
+ void TStateStorageReassignNodeSelfhealActor::HandleResult(NStorage::TEvNodeConfigInvokeOnRootResult::TPtr& ev) {
+ if (ev->Get()->Record.GetStatus() != TResult::OK) {
+ STLOG(PRI_ERROR, BS_NODE, NW72, "TStateStorageReassignNodeSelfhealActor::HandleResult aborted. ", (Reason, ev->Get()->Record.GetErrorReason()));
+ Finish(TResult::ERROR, ev->Get()->Record.GetErrorReason());
+ } else {
+ AllowNextStep = true;
+ }
+ }
+
+ void TStateStorageReassignNodeSelfhealActor::HandleWakeup() {
+ if (!AllowNextStep) {
+ STLOG(PRI_ERROR, BS_NODE, NW78, "TStateStorageReassignNodeSelfhealActor::HandleWakeup aborted. Previous reconfiguration step not finished yet.");
+ Finish(TResult::ERROR, "Previous reconfiguration step not finished yet.");
+ return;
+ }
+ if (FinishReassign) {
+ Finish(TResult::OK);
+ return;
+ }
+ FinishReassign = true;
+ RequestChangeStateStorage(false);
+ Schedule(WaitForConfigStep, new TEvents::TEvWakeup());
+ }
+
+ STFUNC(TStateStorageReassignNodeSelfhealActor::StateFunc) {
+ STRICT_STFUNC_BODY(
+ cFunc(TEvents::TSystem::Wakeup, HandleWakeup);
+ hFunc(NStorage::TEvNodeConfigInvokeOnRootResult, HandleResult);
+ )
+ }
}
diff --git a/ydb/core/blobstorage/nodewarden/distconf_selfheal.h b/ydb/core/blobstorage/nodewarden/distconf_selfheal.h
index 17a6f427a99..d63e0e5c1fd 100644
--- a/ydb/core/blobstorage/nodewarden/distconf_selfheal.h
+++ b/ydb/core/blobstorage/nodewarden/distconf_selfheal.h
@@ -21,6 +21,7 @@ namespace NKikimr::NStorage {
NKikimrBlobStorage::TStateStorageConfig CurrentConfig;
NKikimrBlobStorage::TStateStorageConfig TargetConfig;
bool AllowNextStep = true;
+ ui32 PilesCount;
using TResult = NKikimrBlobStorage::TEvNodeConfigInvokeOnRootResult;
@@ -33,7 +34,35 @@ namespace NKikimr::NStorage {
public:
TStateStorageSelfhealActor(TActorId sender, ui64 cookie, TDuration waitForConfigStep
- , NKikimrBlobStorage::TStateStorageConfig&& currentConfig, NKikimrBlobStorage::TStateStorageConfig&& targetConfig);
+ , NKikimrBlobStorage::TStateStorageConfig&& currentConfig, NKikimrBlobStorage::TStateStorageConfig&& targetConfig, ui32 pilesCount);
+
+ void Bootstrap(TActorId parentId);
+
+ STFUNC(StateFunc);
+ };
+
+ class TStateStorageReassignNodeSelfhealActor : public TActorBootstrapped<TStateStorageReassignNodeSelfhealActor> {
+ const TDuration WaitForConfigStep;
+ const TActorId Sender;
+ const ui64 Cookie;
+ bool AllowNextStep = false;
+ bool FinishReassign = false;
+ ui32 NodeFrom;
+ ui32 NodeTo;
+ bool NeedReconfigSS;
+ bool NeedReconfigSSB;
+ bool NeedReconfigSB;
+
+ using TResult = NKikimrBlobStorage::TEvNodeConfigInvokeOnRootResult;
+
+ void HandleWakeup();
+ void Finish(TResult::EStatus result, const TString& errorReason = "");
+ void RequestChangeStateStorage(bool disable);
+ void HandleResult(NStorage::TEvNodeConfigInvokeOnRootResult::TPtr& ev);
+
+ public:
+ TStateStorageReassignNodeSelfhealActor(TActorId sender, ui64 cookie, TDuration waitForConfigStep
+ , ui32 nodeFrom, ui32 nodeTo, bool needReconfigSS, bool needReconfigSSB, bool needReconfigSB);
void Bootstrap(TActorId parentId);
diff --git a/ydb/core/blobstorage/nodewarden/distconf_statestorage_config_generator.cpp b/ydb/core/blobstorage/nodewarden/distconf_statestorage_config_generator.cpp
index a47d7d80402..18bf228ce54 100644
--- a/ydb/core/blobstorage/nodewarden/distconf_statestorage_config_generator.cpp
+++ b/ydb/core/blobstorage/nodewarden/distconf_statestorage_config_generator.cpp
@@ -30,7 +30,7 @@ namespace NKikimr::NStorage {
for (auto& n : dc) {
NodeGroups[0].Nodes.emplace_back(n);
ui32 nodeId = std::get<0>(n);
- ui32 state = CalcNodeState(nodeId);
+ ui32 state = CalcNodeState(nodeId, false);
NodeGroups[0].State[state]++;
}
@@ -44,6 +44,9 @@ namespace NKikimr::NStorage {
});
}
Y_ABORT_UNLESS(NodeGroups.size() > 0 && NodeGroups[0].Nodes.size() > 0);
+ for (auto& ng : NodeGroups) {
+ ng.Disconnected = ng.State[0] + ng.State[1] < ng.Nodes.size() / 2;
+ }
}
void TStateStoragePerPileGenerator::CalculateRingsParameters() {
@@ -82,6 +85,14 @@ namespace NKikimr::NStorage {
}
rg->SetNToSelect(NToSelect);
for (auto &nodes : Rings) {
+ std::ranges::sort(nodes, [&](const auto& x, const auto& y) {
+ return x < y;
+ });
+ }
+ std::ranges::sort(Rings, [&](const auto& x, const auto& y) {
+ return x[0] < y[0];
+ });
+ for (auto &nodes : Rings) {
auto *ring = rg->AddRing();
for(auto nodeId : nodes) {
ring->AddNode(nodeId);
@@ -90,8 +101,8 @@ namespace NKikimr::NStorage {
}
}
- ui32 TStateStoragePerPileGenerator::CalcNodeState(ui32 nodeId) {
- ui32 state = SelfHealNodesState.contains(nodeId) ? SelfHealNodesState.at(nodeId) : (NodeStatesSize - 1);
+ ui32 TStateStoragePerPileGenerator::CalcNodeState(ui32 nodeId, bool disconnected) {
+ ui32 state = disconnected ? 0 : (SelfHealNodesState.contains(nodeId) ? SelfHealNodesState.at(nodeId) : (NodeStatesSize - 1));
Y_ABORT_UNLESS(state < NodeStatesSize);
Y_ABORT_UNLESS(state != NCms::NSentinel::TNodeStatusComputer::ENodeState::PRETTY_GOOD);
if (state == 0 && UsedNodes.contains(nodeId)) {
@@ -117,7 +128,7 @@ namespace NKikimr::NStorage {
}
ui32 nodeId = std::get<0>(*iter);
location = std::get<1>(*iter);
- if (CalcNodeState(nodeId) <= stateLimit) {
+ if (CalcNodeState(nodeId, group.Disconnected) <= stateLimit) {
ring.push_back(nodeId);
}
iter++;
@@ -140,17 +151,17 @@ namespace NKikimr::NStorage {
auto rack = std::get<1>(n).GetRackId();
auto nodeId = std::get<0>(n);
auto& rackState = rackStates[rack];
- rackState[CalcNodeState(nodeId)]++;
+ rackState[CalcNodeState(nodeId, group.Disconnected)]++;
}
- auto compByRack = [&](const auto& x, const auto& y) {
+ auto compByState = [&](const auto& x, const auto& y) {
auto rackX = std::get<1>(x).GetRackId();
auto rackY = std::get<1>(y).GetRackId();
if (rackX == rackY) {
auto nodeX = std::get<0>(x);
auto nodeY = std::get<0>(y);
- ui32 state1 = CalcNodeState(nodeX);
- ui32 state2 = CalcNodeState(nodeY);
+ ui32 state1 = CalcNodeState(nodeX, group.Disconnected);
+ ui32 state2 = CalcNodeState(nodeY, group.Disconnected);
return state1 < state2 || (state1 == state2 && nodeX < nodeY);
}
auto& rackStateX = rackStates[rackX];
@@ -163,7 +174,7 @@ namespace NKikimr::NStorage {
return rackX < rackY;
};
- std::ranges::sort(group.Nodes, compByRack);
+ std::ranges::sort(group.Nodes, compByState);
for (ui32 stateLimit : xrange(NodeStatesSize)) {
if (PickNodesSimpleStrategy(group, stateLimit, rackStates.size() < RingsInGroupCount)) {
GoodConfig &= stateLimit <= 1;
@@ -171,7 +182,7 @@ namespace NKikimr::NStorage {
}
}
GoodConfig = false;
+ STLOG(PRI_DEBUG, BS_NODE, NW103, "TStateStoragePerPileGenerator::PickNodesByState without limits");
Y_ABORT_UNLESS(PickNodesSimpleStrategy(group, NodeStatesSize, true));
}
-
}
diff --git a/ydb/core/blobstorage/nodewarden/distconf_statestorage_config_generator.h b/ydb/core/blobstorage/nodewarden/distconf_statestorage_config_generator.h
index 29bdac48da5..d5e1c72bfd6 100644
--- a/ydb/core/blobstorage/nodewarden/distconf_statestorage_config_generator.h
+++ b/ydb/core/blobstorage/nodewarden/distconf_statestorage_config_generator.h
@@ -20,13 +20,14 @@ namespace NKikimr::NStorage {
struct TNodeGroup {
std::vector<std::tuple<ui32, TNodeLocation>> Nodes;
std::array<ui32, NodeStatesSize> State;
+ bool Disconnected;
};
void FillNodeGroups(THashMap<TString, std::vector<std::tuple<ui32, TNodeLocation>>>& nodes);
void CalculateRingsParameters();
bool PickNodesSimpleStrategy(TNodeGroup& group, ui32 stateLimit, bool ignoreRacks);
void PickNodes(TNodeGroup& group);
- ui32 CalcNodeState(ui32 nodeId);
+ ui32 CalcNodeState(ui32 nodeId, bool disconnected);
const std::optional<TBridgePileId> PileId;
const std::unordered_map<ui32, ui32>& SelfHealNodesState;
diff --git a/ydb/core/protos/blobstorage_distributed_config.proto b/ydb/core/protos/blobstorage_distributed_config.proto
index cd16c2797f0..02cce893729 100644
--- a/ydb/core/protos/blobstorage_distributed_config.proto
+++ b/ydb/core/protos/blobstorage_distributed_config.proto
@@ -229,6 +229,7 @@ message TEvNodeConfigInvokeOnRoot {
bool StateStorage = 3;
bool StateStorageBoard = 4;
bool SchemeBoard = 5;
+ optional bool DisableRing = 6;
}
message TGetStateStorageConfig {
diff --git a/ydb/tests/functional/config/test_distconf_sentinel_node_status.py b/ydb/tests/functional/config/test_distconf_sentinel_node_status.py
index 1e6c94b545c..b335e4b7bde 100644
--- a/ydb/tests/functional/config/test_distconf_sentinel_node_status.py
+++ b/ydb/tests/functional/config/test_distconf_sentinel_node_status.py
@@ -136,28 +136,28 @@ class KiKiMRDistConfNodeStatusTest(object):
class TestKiKiMRDistConfSelfHealNodeDisconnected(KiKiMRDistConfNodeStatusTest):
erasure = Erasure.MIRROR_3_DC
- nodes_count = 12
+ nodes_count = 10
def do_test(self, configName):
rg = get_ring_group(self.do_request_config(), configName)
assert_eq(rg["NToSelect"], 9)
assert_eq(len(rg["Ring"]), 9)
- self.validate_contains_nodes(rg, [3])
- self.cluster.nodes[3].stop()
- for i in range(15):
- time.sleep(2)
- cfg = self.do_request_config()[f"{configName}Config"]
- assert_eq(len(cfg["RingGroups"]), 1)
+ self.validate_contains_nodes(rg, [4])
+ self.cluster.nodes[4].stop()
+ time.sleep(25)
rg2 = get_ring_group(self.do_request_config(), configName)
assert_eq(rg["NToSelect"], 9)
assert_eq(len(rg["Ring"]), 9)
- self.validate_not_contains_nodes(rg2, [3])
+ self.validate_not_contains_nodes(rg2, [4])
+ assert_that("RingGroupActorIdOffset" not in rg2) # reassign node api used instead adding new ring groups test
+ for ring in rg2:
+ assert_that("IsDisabled" not in rg)
assert_that(rg != rg2)
- self.cluster.nodes[3].start()
+ self.cluster.nodes[4].start()
time.sleep(25)
rg3 = get_ring_group(self.do_request_config(), configName)
- assert_that(rg3 == rg2) # Current config has no bad nodes and should not run self-heal
+ assert_eq(rg3, rg2) # Current config has no bad nodes and should not run self-heal
class TestKiKiMRDistConfSelfHeal2NodesDisconnected(KiKiMRDistConfNodeStatusTest):
@@ -199,4 +199,4 @@ class TestKiKiMRDistConfSelfHealDCDisconnected(KiKiMRDistConfNodeStatusTest):
rg2 = get_ring_group(self.do_request_config(), configName)
assert_eq(rg["NToSelect"], 9)
assert_eq(len(rg["Ring"]), 9)
- assert_that(rg == rg2)
+ assert_eq(rg2, rg)