summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authort1mursadykov <[email protected]>2023-04-21 13:59:10 +0300
committert1mursadykov <[email protected]>2023-04-21 13:59:10 +0300
commit22d5a1e0a9d3a4aac4d5b5092406d63f2f4d2c4f (patch)
treea348d43385f66f30851b6811fe9dceddebcd84c0
parent5d90e2a102f93840f3beb44fef64144c89abbc0a (diff)
Remove o(n^3) while checking sys tablet nodes
-rw-r--r--ydb/core/cms/cluster_info.cpp65
-rw-r--r--ydb/core/cms/cluster_info.h31
-rw-r--r--ydb/core/cms/cms.cpp73
-rw-r--r--ydb/core/cms/cms_impl.h3
-rw-r--r--ydb/core/cms/info_collector.cpp8
-rw-r--r--ydb/core/cms/node_checkers.cpp57
-rw-r--r--ydb/core/cms/node_checkers.h142
7 files changed, 234 insertions, 145 deletions
diff --git a/ydb/core/cms/cluster_info.cpp b/ydb/core/cms/cluster_info.cpp
index c31c138ce96..28e6baeddbf 100644
--- a/ydb/core/cms/cluster_info.cpp
+++ b/ydb/core/cms/cluster_info.cpp
@@ -376,7 +376,9 @@ void TClusterInfo::AddNode(const TEvInterconnect::TNodeInfo &info, const TActorC
break;
}
}
- ClusterNodes->AddNode(node->NodeId);
+
+ node->AddNodeGroup(ClusterNodes);
+
HostNameToNodeId.emplace(node->Host, node->NodeId);
LockableItems[node->ItemName()] = node;
}
@@ -391,8 +393,6 @@ void TClusterInfo::SetNodeState(ui32 nodeId, NKikimrCms::EState state, const NKi
node.StartTime = TInstant::MilliSeconds(info.GetStartTime());
node.Version = info.GetVersion();
- ClusterNodes->UpdateNode(nodeId, state);
-
node.Services = TServices();
for (const auto& role : info.GetRoles()) {
EService value;
@@ -400,6 +400,8 @@ void TClusterInfo::SetNodeState(ui32 nodeId, NKikimrCms::EState state, const NKi
node.Services |= value;
}
}
+
+ node.UpdateNodeState();
}
void TClusterInfo::ClearNode(ui32 nodeId)
@@ -411,10 +413,9 @@ void TClusterInfo::ClearNode(ui32 nodeId)
for (auto tablet : node.Tablets)
Tablets.erase(tablet);
node.Tablets.clear();
- node.State = NKikimrCms::DOWN;
node.HasTenantInfo = false;
-
- ClusterNodes->UpdateNode(node.NodeId, NKikimrCms::DOWN);
+ node.State = NKikimrCms::DOWN;
+ node.UpdateNodeState();
}
void TClusterInfo::ApplyInitialNodeTenants(const TActorContext& ctx, const THashMap<ui32, TString>& nodeTenants)
@@ -647,10 +648,8 @@ void TClusterInfo::ApplyActionWithoutLog(const NKikimrCms::TAction &action)
case TAction::SHUTDOWN_HOST:
if (auto nodes = NodePtrs(action.GetHost(), MakeServices(action))) {
for (const auto node : nodes) {
- ClusterNodes->LockNode(node->NodeId);
- if (node->Tenant) {
- TenantNodesChecker[node->Tenant]->LockNode(node->NodeId);
- }
+ for (auto &nodeGroup: node->NodeGroups)
+ nodeGroup->LockNode(node->NodeId);
}
}
break;
@@ -658,10 +657,12 @@ void TClusterInfo::ApplyActionWithoutLog(const NKikimrCms::TAction &action)
for (const auto &device : action.GetDevices()) {
if (HasPDisk(device)) {
auto pdisk = &PDiskRef(device);
- ClusterNodes->LockNode(pdisk->NodeId);
+ for (auto &nodeGroup: NodeRef(pdisk->NodeId).NodeGroups)
+ nodeGroup->LockNode(pdisk->NodeId);
} else if (HasVDisk(device)) {
auto vdisk = &VDiskRef(device);
- ClusterNodes->LockNode(vdisk->NodeId);
+ for (auto &nodeGroup: NodeRef(vdisk->NodeId).NodeGroups)
+ nodeGroup->LockNode(vdisk->NodeId);
}
}
break;
@@ -884,18 +885,6 @@ void TClusterInfo::MigrateOldInfo(TClusterInfoPtr old)
}
}
-void TClusterInfo::ApplySysTabletsInfo(const NKikimrConfig::TBootstrap& config) {
- for (ui32 i = 0; i < config.TabletSize(); ++i) {
- const auto &tablet = config.GetTablet(i);
-
- for (ui32 j = 0; j < tablet.NodeSize(); ++j) {
- ui32 nodeId = tablet.GetNode(j);
- TabletTypeToNodes[tablet.GetType()].push_back(nodeId);
- NodeToTabletTypes[nodeId].push_back(tablet.GetType());
- }
- }
-}
-
void TClusterInfo::ApplyStateStorageInfo(TIntrusiveConstPtr<TStateStorageInfo> info) {
StateStorageInfoReceived = true;
for (ui32 ringId = 0; ringId < info->Rings.size(); ++ringId) {
@@ -920,9 +909,20 @@ void TClusterInfo::GenerateTenantNodesCheckers() {
for (auto &[nodeId, nodeInfo] : Nodes) {
if (nodeInfo->Tenant) {
if (!TenantNodesChecker.contains(nodeInfo->Tenant))
- TenantNodesChecker[nodeInfo->Tenant] = TSimpleSharedPtr<TNodesStateBase>(new TTenantState(nodeInfo->Tenant, 0, 0));
+ TenantNodesChecker[nodeInfo->Tenant] = TSimpleSharedPtr<TNodesLimitsCounterBase>(new TTenantLimitsCounter(nodeInfo->Tenant, 0, 0));
+
+ nodeInfo->AddNodeGroup(TenantNodesChecker[nodeInfo->Tenant]);
+ }
+ }
+}
+
+void TClusterInfo::GenerateSysTabletsNodesCheckers() {
+ for (auto tablet : BootstrapConfig.GetTablet()) {
+ SysNodesCheckers[tablet.GetType()] = TSimpleSharedPtr<TSysTabletsNodesCounter>(new TSysTabletsNodesCounter(tablet.GetType()));
- TenantNodesChecker[nodeInfo->Tenant]->UpdateNode(nodeId, nodeInfo->State);
+ for (auto nodeId : tablet.GetNode()) {
+ NodeToTabletTypes[nodeId].push_back(tablet.GetType());
+ NodeRef(nodeId).AddNodeGroup(SysNodesCheckers[tablet.GetType()]);
}
}
}
@@ -997,11 +997,8 @@ void TOperationLogManager::ApplyAction(const NKikimrCms::TAction &action,
case NKikimrCms::TAction::SHUTDOWN_HOST:
if (auto nodes = clusterState->NodePtrs(action.GetHost(), MakeServices(action))) {
for (const auto node : nodes) {
- AddNodeLockOperation(node->NodeId, clusterState->ClusterNodes);
-
- if (node->Tenant) {
- AddNodeLockOperation(node->NodeId, clusterState->TenantNodesChecker[node->Tenant]);
- }
+ for (auto &nodeGroup: node->NodeGroups)
+ AddNodeLockOperation(node->NodeId, nodeGroup);
}
}
break;
@@ -1009,11 +1006,13 @@ void TOperationLogManager::ApplyAction(const NKikimrCms::TAction &action,
for (const auto &device : action.GetDevices()) {
if (clusterState->HasPDisk(device)) {
auto pdisk = &clusterState->PDisk(device);
- AddNodeLockOperation(pdisk->NodeId, clusterState->ClusterNodes);
+ for (auto &nodeGroup: clusterState->NodeRef(pdisk->NodeId).NodeGroups)
+ AddNodeLockOperation(pdisk->NodeId, nodeGroup);
} else if (clusterState->HasVDisk(device)) {
auto vdisk = &clusterState->VDisk(device);
- AddNodeLockOperation(vdisk->NodeId, clusterState->ClusterNodes);
+ for (auto &nodeGroup: clusterState->NodeRef(vdisk->NodeId).NodeGroups)
+ AddNodeLockOperation(vdisk->NodeId, nodeGroup);
}
}
break;
diff --git a/ydb/core/cms/cluster_info.h b/ydb/core/cms/cluster_info.h
index ff66d8c91a9..85cd007ca4c 100644
--- a/ydb/core/cms/cluster_info.h
+++ b/ydb/core/cms/cluster_info.h
@@ -12,6 +12,7 @@
#include <ydb/core/mind/tenant_pool.h>
#include <ydb/core/node_whiteboard/node_whiteboard.h>
#include <ydb/core/protos/cms.pb.h>
+#include <ydb/core/protos/config.pb.h>
#include <ydb/core/protos/console.pb.h>
#include <library/cpp/actors/core/actor.h>
@@ -317,6 +318,17 @@ public:
return Sprintf("Host %s:%" PRIu16 " (%" PRIu32 ")", Host.data(), IcPort, NodeId);
}
+ void AddNodeGroup(TSimpleSharedPtr<INodesChecker> group) {
+ NodeGroups.push_back(group);
+ group->UpdateNode(NodeId, State);
+ }
+
+ void UpdateNodeState() {
+ for (auto &group : NodeGroups) {
+ group->UpdateNode(NodeId, State);
+ }
+ }
+
void MigrateOldInfo(const TLockableItem &old) override;
ui32 NodeId = 0;
@@ -333,6 +345,8 @@ public:
TString PreviousTenant;
TServices Services;
TInstant StartTime;
+
+ TVector<TSimpleSharedPtr<INodesChecker>> NodeGroups;
};
using TNodeInfoPtr = TIntrusivePtr<TNodeInfo>;
@@ -561,10 +575,10 @@ public:
const ui32 NodeId;
private:
- TSimpleSharedPtr<TNodesStateBase> NodesState;
+ TSimpleSharedPtr<INodesChecker> NodesState;
public:
- TLockNodeOperation(ui32 nodeId, TSimpleSharedPtr<TNodesStateBase> nodesState)
+ TLockNodeOperation(ui32 nodeId, TSimpleSharedPtr<INodesChecker> nodesState)
: TOperationBase(OPERATION_TYPE_LOCK_NODE)
, NodeId(nodeId)
, NodesState(nodesState)
@@ -606,7 +620,7 @@ public:
Log.emplace_back(new TLogRollbackPoint());
}
- void AddNodeLockOperation(ui32 nodeId, TSimpleSharedPtr<TNodesStateBase> nodesState) {
+ void AddNodeLockOperation(ui32 nodeId, TSimpleSharedPtr<INodesChecker> nodesState) {
Log.emplace_back(new TLockNodeOperation(nodeId, nodesState))->Do();
}
@@ -642,12 +656,12 @@ public:
using TVDisks = THashMap<TVDiskID, TVDiskInfoPtr>;
using TBSGroups = THashMap<ui32, TBSGroupInfo>;
- using TenantNodesCheckers = THashMap<TString, TSimpleSharedPtr<TNodesStateBase>>;
+ using TenantNodesCheckers = THashMap<TString, TSimpleSharedPtr<TNodesLimitsCounterBase>>;
friend TOperationLogManager;
TenantNodesCheckers TenantNodesChecker;
- TSimpleSharedPtr<TClusterNodesState> ClusterNodes = MakeSimpleShared<TClusterNodesState>(0u, 0u);
+ TSimpleSharedPtr<TClusterLimitsCounter> ClusterNodes = MakeSimpleShared<TClusterLimitsCounter>(0u, 0u);
TOperationLogManager LogManager;
TOperationLogManager ScheduledLogManager;
@@ -666,6 +680,7 @@ public:
void ApplyStateStorageInfo(TIntrusiveConstPtr<TStateStorageInfo> info);
void GenerateTenantNodesCheckers();
+ void GenerateSysTabletsNodesCheckers();
bool IsStateStorageReplicaNode(ui32 nodeId) {
return StateStorageReplicas.contains(nodeId);
@@ -904,8 +919,6 @@ public:
bool IsOutdated() const { return Outdated; }
void SetOutdated(bool val) { Outdated = val; }
- void ApplySysTabletsInfo(const NKikimrConfig::TBootstrap& config);
-
static EGroupConfigurationType VDiskConfigurationType(const TVDiskID &vdId) {
return TGroupID(vdId.GroupID).ConfigurationType();
}
@@ -1010,9 +1023,11 @@ private:
public:
bool IsLocalBootConfDiffersFromConsole = false;
- THashMap<NKikimrConfig::TBootstrap::ETabletType, TVector<ui32>> TabletTypeToNodes;
+ NKikimrConfig::TBootstrap BootstrapConfig;
THashMap<ui32, TVector<NKikimrConfig::TBootstrap::ETabletType>> NodeToTabletTypes;
+ THashMap<NKikimrConfig::TBootstrap::ETabletType, TSimpleSharedPtr<TSysTabletsNodesCounter>> SysNodesCheckers;
+
TIntrusiveConstPtr<TStateStorageInfo> StateStorageInfo;
TVector<TStateStorageRingInfoPtr> StateStorageRings;
};
diff --git a/ydb/core/cms/cms.cpp b/ydb/core/cms/cms.cpp
index f663a37f08d..211a7e0c622 100644
--- a/ydb/core/cms/cms.cpp
+++ b/ydb/core/cms/cms.cpp
@@ -373,7 +373,7 @@ bool TCms::CheckActionShutdownNode(const NKikimrCms::TAction &action,
}
if (!AppData(ctx)->DisableCheckingSysNodesCms &&
- !CheckSysTabletsNode(action, opts, node, error)) {
+ !CheckSysTabletsNode(opts, node, error)) {
return false;
}
@@ -533,8 +533,7 @@ bool TCms::TryToLockStateStorageReplica(const TAction& action,
return true;
}
-bool TCms::CheckSysTabletsNode(const TAction &action,
- const TActionOptions &opts,
+bool TCms::CheckSysTabletsNode(const TActionOptions &opts,
const TNodeInfo &node,
TErrorInfo &error) const
{
@@ -543,55 +542,12 @@ bool TCms::CheckSysTabletsNode(const TAction &action,
}
for (auto &tabletType : ClusterInfo->NodeToTabletTypes[node.NodeId]) {
- ui32 disabledNodesCnt = 1; // сounting including this node
- TErrorInfo err;
- TDuration duration = TDuration::MicroSeconds(action.GetDuration()) + opts.PermissionDuration;
- TInstant defaultDeadline = TActivationContext::Now() + State->Config.DefaultRetryTime;
-
- for (auto &nodeId : ClusterInfo->TabletTypeToNodes[tabletType]) {
- if (nodeId == node.NodeId) {
- continue;
- }
- if (ClusterInfo->Node(nodeId).IsLocked(err, State->Config.DefaultRetryTime,
- TActivationContext::Now(), duration) ||
- ClusterInfo->Node(nodeId).IsDown(err, defaultDeadline))
- {
- ++disabledNodesCnt;
- }
- }
-
- ui32 tabletNodes = ClusterInfo->TabletTypeToNodes[tabletType].size();
- switch (opts.AvailabilityMode) {
- case MODE_MAX_AVAILABILITY:
- if (tabletNodes > 1 && disabledNodesCnt * 2 > tabletNodes){
- error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = TStringBuilder() << NKikimrConfig::TBootstrap_ETabletType_Name(tabletType)
- << " has too many locked nodes: " << disabledNodesCnt
- << " limit: " << tabletNodes / 2 << " (50%)";
- error.Deadline = defaultDeadline;
- return false;
- }
- break;
- case MODE_KEEP_AVAILABLE:
- if (tabletNodes > 1 && disabledNodesCnt > tabletNodes - 1) {
- error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = TStringBuilder() << NKikimrConfig::TBootstrap_ETabletType_Name(tabletType)
- << " has too many locked nodes: " << disabledNodesCnt
- << ". At least one node must be available";
- error.Deadline = defaultDeadline;
- return false;
- }
- break;
- case MODE_FORCE_RESTART:
- break;
- default:
- error.Code = TStatus::WRONG_REQUEST;
- error.Reason = Sprintf("Unknown availability mode: %s (%" PRIu32 ")",
- EAvailabilityMode_Name(opts.AvailabilityMode).data(),
- static_cast<ui32>(opts.AvailabilityMode));
- error.Deadline = defaultDeadline;
- return false;
- }
+ if (!ClusterInfo->SysNodesCheckers[tabletType]->TryToLockNode(node.NodeId, opts.AvailabilityMode)) {
+ error.Code = TStatus::DISALLOW_TEMP;
+ error.Reason = ClusterInfo->SysNodesCheckers[tabletType]->ReadableReason(node.NodeId, opts.AvailabilityMode);
+ error.Deadline = TActivationContext::Now() + State->Config.DefaultRetryTime;
+ return false;
+ }
}
return true;
@@ -605,12 +561,10 @@ bool TCms::TryToLockNode(const TAction& action,
TDuration duration = TDuration::MicroSeconds(action.GetDuration());
duration += opts.PermissionDuration;
- bool isForceRestart = opts.AvailabilityMode == NKikimrCms::MODE_FORCE_RESTART;
-
- if (!ClusterInfo->ClusterNodes->TryToLockNode(node.NodeId, isForceRestart))
+ if (!ClusterInfo->ClusterNodes->TryToLockNode(node.NodeId, opts.AvailabilityMode))
{
error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = ClusterInfo->ClusterNodes->ReadableReason();
+ error.Reason = ClusterInfo->ClusterNodes->ReadableReason(node.NodeId, opts.AvailabilityMode);
error.Deadline = TActivationContext::Now() + State->Config.DefaultRetryTime;
return false;
@@ -618,10 +572,10 @@ bool TCms::TryToLockNode(const TAction& action,
if (node.Tenant
&& opts.TenantPolicy != NONE
- && !ClusterInfo->TenantNodesChecker[node.Tenant]->TryToLockNode(node.NodeId, isForceRestart))
+ && !ClusterInfo->TenantNodesChecker[node.Tenant]->TryToLockNode(node.NodeId, opts.AvailabilityMode))
{
error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = ClusterInfo->TenantNodesChecker[node.Tenant]->ReadableReason();
+ error.Reason = ClusterInfo->TenantNodesChecker[node.Tenant]->ReadableReason(node.NodeId, opts.AvailabilityMode);
error.Deadline = TActivationContext::Now() + State->Config.DefaultRetryTime;
return false;
@@ -1427,6 +1381,9 @@ void TCms::Handle(TEvPrivate::TEvClusterInfo::TPtr &ev, const TActorContext &ctx
// all the information about the tenants on the disconnected nodes
info->GenerateTenantNodesCheckers();
+ if (!AppData(ctx)->DisableCheckingSysNodesCms)
+ info->GenerateSysTabletsNodesCheckers();
+
AdjustInfo(info, ctx);
State->ClusterInfo = info;
diff --git a/ydb/core/cms/cms_impl.h b/ydb/core/cms/cms_impl.h
index e3c3bfb66da..9cb688b125a 100644
--- a/ydb/core/cms/cms_impl.h
+++ b/ydb/core/cms/cms_impl.h
@@ -301,8 +301,7 @@ private:
bool CheckActionReplaceDevices(const NKikimrCms::TAction &action,
const TActionOptions &options,
TErrorInfo &error) const;
- bool CheckSysTabletsNode(const NKikimrCms::TAction &action,
- const TActionOptions &opts,
+ bool CheckSysTabletsNode(const TActionOptions &opts,
const TNodeInfo &node,
TErrorInfo &error) const;
bool TryToLockNode(const NKikimrCms::TAction &action,
diff --git a/ydb/core/cms/info_collector.cpp b/ydb/core/cms/info_collector.cpp
index 278202797bb..8f4b021e1a2 100644
--- a/ydb/core/cms/info_collector.cpp
+++ b/ydb/core/cms/info_collector.cpp
@@ -215,12 +215,11 @@ void TInfoCollector::RequestBootstrapConfig() {
void TInfoCollector::Handle(TEvConfigsDispatcher::TEvGetConfigResponse::TPtr& ev) {
const auto& config = ev->Get()->Config;
const auto& initialBootstrapConfig = AppData()->BootstrapConfig;
- const NKikimrConfig::TBootstrap* bootstrapConfig = nullptr;
BootstrapConfigReceived = true;
if (!config->HasBootstrapConfig()) {
LOG_I("Couldn't collect bootstrap config from Console. Taking the local config");
- bootstrapConfig = &initialBootstrapConfig;
+ Info->BootstrapConfig.CopyFrom(initialBootstrapConfig);
} else {
const auto& currentBootstrapConfig = config->GetBootstrapConfig();
@@ -232,12 +231,9 @@ void TInfoCollector::Handle(TEvConfigsDispatcher::TEvGetConfigResponse::TPtr& ev
Info->IsLocalBootConfDiffersFromConsole = true;
}
- bootstrapConfig = &currentBootstrapConfig;
+ Info->BootstrapConfig.CopyFrom(currentBootstrapConfig);
}
- Y_VERIFY(bootstrapConfig);
- Info->ApplySysTabletsInfo(*bootstrapConfig);
-
MaybeReplyAndDie();
}
diff --git a/ydb/core/cms/node_checkers.cpp b/ydb/core/cms/node_checkers.cpp
index 5ebb1646ca8..8a857b0a63f 100644
--- a/ydb/core/cms/node_checkers.cpp
+++ b/ydb/core/cms/node_checkers.cpp
@@ -6,10 +6,10 @@
namespace NKikimr::NCms {
-#define NCH_LOG_D(stream) LOG_DEBUG_S (*TlsActivationContext, NKikimrServices::CMS, "[Checker] " << stream)
-#define NCH_LOG_T(stream) LOG_TRACE_S (*TlsActivationContext, NKikimrServices::CMS, "[Checker] " << stream)
+#define NCH_LOG_D(stream) LOG_DEBUG_S (*TlsActivationContext, NKikimrServices::CMS, "[Nodes Counter] " << stream)
+#define NCH_LOG_T(stream) LOG_TRACE_S (*TlsActivationContext, NKikimrServices::CMS, "[Nodes Counter] " << stream)
-TNodesStateBase::ENodeState TNodesStateBase::NodeState(NKikimrCms::EState state) {
+TNodesLimitsCounterBase::ENodeState INodesChecker::NodeState(NKikimrCms::EState state) {
switch (state) {
case NKikimrCms::UP:
return NODE_STATE_UP;
@@ -24,14 +24,14 @@ TNodesStateBase::ENodeState TNodesStateBase::NodeState(NKikimrCms::EState state)
}
}
-void TNodesStateBase::AddNode(ui32 nodeId) {
+void TNodesCounterBase::AddNode(ui32 nodeId) {
if (NodeToState.contains(nodeId)) {
return;
}
NodeToState[nodeId] = NODE_STATE_UNSPECIFIED;
}
-void TNodesStateBase::UpdateNode(ui32 nodeId, NKikimrCms::EState state) {
+void TNodesCounterBase::UpdateNode(ui32 nodeId, NKikimrCms::EState state) {
if (!NodeToState.contains(nodeId)) {
AddNode(nodeId);
}
@@ -57,7 +57,7 @@ void TNodesStateBase::UpdateNode(ui32 nodeId, NKikimrCms::EState state) {
}
}
-void TNodesStateBase::LockNode(ui32 nodeId) {
+void TNodesCounterBase::LockNode(ui32 nodeId) {
Y_VERIFY(NodeToState.contains(nodeId));
++LockedNodesCount;
@@ -69,7 +69,7 @@ void TNodesStateBase::LockNode(ui32 nodeId) {
}
}
-void TNodesStateBase::UnlockNode(ui32 nodeId) {
+void TNodesCounterBase::UnlockNode(ui32 nodeId) {
Y_VERIFY(NodeToState.contains(nodeId));
--LockedNodesCount;
@@ -81,10 +81,11 @@ void TNodesStateBase::UnlockNode(ui32 nodeId) {
}
}
-bool TNodesStateBase::TryToLockNode(ui32 nodeId, bool isForceRestart) {
+bool TNodesLimitsCounterBase::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const {
Y_VERIFY(NodeToState.contains(nodeId));
+ auto nodeState = NodeToState.at(nodeId);
- auto nodeState = NodeToState[nodeId];
+ bool isForceRestart = mode == NKikimrCms::MODE_FORCE_RESTART;
NCH_LOG_D("Checking Node: "
<< nodeId << ", with state: " << ToString(nodeState)
@@ -128,4 +129,42 @@ bool TNodesStateBase::TryToLockNode(ui32 nodeId, bool isForceRestart) {
return true;
}
+bool TSysTabletsNodesCounter::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const {
+ Y_VERIFY(NodeToState.contains(nodeId));
+ auto nodeState = NodeToState.at(nodeId);
+
+ NCH_LOG_D("Checking limits for sys tablet: " << NKikimrConfig::TBootstrap_ETabletType_Name(TabletType)
+ << ", on node: " << nodeId
+ << ", with state: " << ToString(nodeState)
+ << ", locked nodes: " << LockedNodesCount
+ << ", down nodes: " << DownNodesCount);
+
+ if (nodeState == NODE_STATE_RESTART ||
+ nodeState == NODE_STATE_LOCKED ||
+ nodeState == NODE_STATE_UNSPECIFIED) {
+
+ return false;
+ }
+
+ ui32 tabletNodes = NodeToState.size();
+ switch (mode) {
+ case NKikimrCms::MODE_MAX_AVAILABILITY:
+ if (tabletNodes > 1 && (DownNodesCount + LockedNodesCount + 1) * 2 > tabletNodes){
+ return false;
+ }
+ break;
+ case NKikimrCms::MODE_KEEP_AVAILABLE:
+ if (tabletNodes > 1 && (DownNodesCount + LockedNodesCount + 1) > tabletNodes - 1) {
+ return false;
+ }
+ break;
+ case NKikimrCms::MODE_FORCE_RESTART:
+ break;
+ default:
+ Y_FAIL("Unknown availability mode");
+ }
+
+ return true;
+}
+
} // namespace NKikimr::NCms
diff --git a/ydb/core/cms/node_checkers.h b/ydb/core/cms/node_checkers.h
index fd3544b6ad5..72362c84dba 100644
--- a/ydb/core/cms/node_checkers.h
+++ b/ydb/core/cms/node_checkers.h
@@ -5,6 +5,7 @@
#include <ydb/core/blobstorage/base/blobstorage_vdiskid.h>
#include <ydb/core/erasure/erasure.h>
#include <ydb/core/protos/cms.pb.h>
+#include <ydb/core/protos/config.pb.h>
#include <library/cpp/actors/core/log.h>
@@ -20,7 +21,12 @@
namespace NKikimr::NCms {
-class TNodesStateBase {
+/**
+ * A base class for storing the state of some group of nodes. For example, tenant nodes, state storage nodes, etc.
+ *
+ * Different groups of nodes may have their own failure model, so the checks for node permissions may be different.
+ */
+class INodesChecker {
public:
enum ENodeState : ui32 {
NODE_STATE_UNSPECIFIED /* "Unspecified" */,
@@ -30,59 +36,93 @@ public:
NODE_STATE_DOWN /* "Down" */
};
-private:
+protected:
static ENodeState NodeState(NKikimrCms::EState state);
-protected:
- ui32 DisabledNodesLimit;
- ui32 DisabledNodesRatioLimit;
+public:
+ virtual ~INodesChecker() = default;
+
+ virtual void AddNode(ui32 nodeId) = 0;
+ virtual void UpdateNode(ui32 nodeId, NKikimrCms::EState) = 0;
+
+ virtual void LockNode(ui32 nodeId) = 0;
+ virtual void UnlockNode(ui32 nodeId) = 0;
+
+ virtual bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const = 0;
+
+ virtual std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const = 0;
+};
+/**
+ * Base class for simple nodes counter with some limits
+ */
+class TNodesCounterBase : public INodesChecker {
+protected:
THashMap<ui32, ENodeState> NodeToState;
ui32 LockedNodesCount;
ui32 DownNodesCount;
public:
- TNodesStateBase(ui32 disabledNodesLimit, ui32 disabledNodesRatioLimit)
+ TNodesCounterBase()
+ : LockedNodesCount(0)
+ , DownNodesCount(0)
+ {}
+
+ virtual ~TNodesCounterBase() = default;
+
+ void AddNode(ui32 nodeId) override;
+ void UpdateNode(ui32 nodeId, NKikimrCms::EState) override;
+
+ void LockNode(ui32 nodeId) override;
+ void UnlockNode(ui32 nodeId) override;
+};
+
+/**
+ * Base class for counting groups of nodes with a limit on the number of locked and disabled nodes.
+ *
+ * Each such group of nodes has parameters, which are set in CmsConfigItem
+ * DisabledNodesLimit - the maximum number of unavailable nodes
+ * DisabledNodesRatioLimit - the maximum percentage of unavailable nodes
+ */
+class TNodesLimitsCounterBase : public TNodesCounterBase {
+protected:
+ ui32 DisabledNodesLimit;
+ ui32 DisabledNodesRatioLimit;
+
+public:
+ TNodesLimitsCounterBase(ui32 disabledNodesLimit, ui32 disabledNodesRatioLimit)
: DisabledNodesLimit(disabledNodesLimit)
, DisabledNodesRatioLimit(disabledNodesRatioLimit)
- , LockedNodesCount(0)
- , DownNodesCount(0)
{
}
- virtual ~TNodesStateBase() = default;
+ virtual ~TNodesLimitsCounterBase() = default;
void ApplyLimits(ui32 nodesLimit, ui32 ratioLimit) {
DisabledNodesLimit = nodesLimit;
DisabledNodesRatioLimit = ratioLimit;
}
- void AddNode(ui32 nodeId);
- void UpdateNode(ui32 nodeId, NKikimrCms::EState);
-
- void LockNode(ui32 nodeId);
- void UnlockNode(ui32 nodeId);
-
- bool TryToLockNode(ui32 nodeId, bool isForceRestart = false);
-
- virtual std::string ReadableReason() const = 0;
-
+ bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final;
};
-class TTenantState : public TNodesStateBase {
+class TTenantLimitsCounter : public TNodesLimitsCounterBase {
private:
const std::string TenantName;
public:
- TTenantState(const std::string &tenantName, ui32 disabledNodesLimit, ui32 disabledNodesRatioLimit)
- : TNodesStateBase(disabledNodesLimit, disabledNodesRatioLimit)
+ TTenantLimitsCounter(const std::string &tenantName, ui32 disabledNodesLimit, ui32 disabledNodesRatioLimit)
+ : TNodesLimitsCounterBase(disabledNodesLimit, disabledNodesRatioLimit)
, TenantName(tenantName)
{
}
- std::string ReadableReason() const override final {
+ std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final {
+ Y_UNUSED(mode);
+
std::stringstream reason;
- reason << "Too many locked nodes for tenant " << TenantName
+ reason << "Cannot lock node: " << nodeId
+ << ". Too many locked nodes for tenant " << TenantName
<< "; locked: " << LockedNodesCount
<< "; down: " << DownNodesCount
<< "; total: " << NodeToState.size()
@@ -93,16 +133,19 @@ public:
}
};
-class TClusterNodesState : public TNodesStateBase {
+class TClusterLimitsCounter : public TNodesLimitsCounterBase {
public:
- TClusterNodesState(ui32 disabledNodesLimit, ui32 disabledNodesRatioLimit)
- : TNodesStateBase(disabledNodesLimit, disabledNodesRatioLimit)
+ TClusterLimitsCounter(ui32 disabledNodesLimit, ui32 disabledNodesRatioLimit)
+ : TNodesLimitsCounterBase(disabledNodesLimit, disabledNodesRatioLimit)
{
}
- std::string ReadableReason() const override final {
+ std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final {
+ Y_UNUSED(mode);
+
std::stringstream reason;
- reason << "Too many locked nodes in cluster"
+ reason << "Cannot lock node: " << nodeId
+ <<". Too many locked nodes in cluster"
<< "; locked: " << LockedNodesCount
<< "; down: " << DownNodesCount
<< "; total: " << NodeToState.size()
@@ -113,4 +156,45 @@ public:
}
};
+/**
+ * Class to hold information about nodes where can start some system tablet. Those nodes are
+ * described in bootstrap config.
+ *
+ * At least one node from the bootstrap list must always be available
+ */
+class TSysTabletsNodesCounter : public TNodesCounterBase {
+private:
+ NKikimrConfig::TBootstrap::ETabletType TabletType;
+
+public:
+ explicit TSysTabletsNodesCounter(NKikimrConfig::TBootstrap::ETabletType tabletType)
+ : TabletType(tabletType)
+ {}
+
+ bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final;
+
+ std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final {
+ std::stringstream reason;
+
+ if (mode == NKikimrCms::MODE_FORCE_RESTART) {
+ return reason.str();
+ }
+
+ reason << "Cannot lock node: " << nodeId
+ << ". Tablet "
+ << NKikimrConfig::TBootstrap_ETabletType_Name(TabletType)
+ << " has too many unavailable nodes. Locked: " << LockedNodesCount
+ << ". Down: " << DownNodesCount;
+ if (mode == NKikimrCms::MODE_MAX_AVAILABILITY) {
+ reason << ". Limit: " << NodeToState.size() / 2 << " (50%)";
+ }
+
+ if (mode == NKikimrCms::MODE_KEEP_AVAILABLE) {
+ reason << ". Limit: " << NodeToState.size() - 1;
+ }
+
+ return reason.str();
+ }
+};
+
} // namespace NKikimr::NCms