diff options
| author | t1mursadykov <[email protected]> | 2023-04-21 13:59:10 +0300 |
|---|---|---|
| committer | t1mursadykov <[email protected]> | 2023-04-21 13:59:10 +0300 |
| commit | 22d5a1e0a9d3a4aac4d5b5092406d63f2f4d2c4f (patch) | |
| tree | a348d43385f66f30851b6811fe9dceddebcd84c0 | |
| parent | 5d90e2a102f93840f3beb44fef64144c89abbc0a (diff) | |
Remove o(n^3) while checking sys tablet nodes
| -rw-r--r-- | ydb/core/cms/cluster_info.cpp | 65 | ||||
| -rw-r--r-- | ydb/core/cms/cluster_info.h | 31 | ||||
| -rw-r--r-- | ydb/core/cms/cms.cpp | 73 | ||||
| -rw-r--r-- | ydb/core/cms/cms_impl.h | 3 | ||||
| -rw-r--r-- | ydb/core/cms/info_collector.cpp | 8 | ||||
| -rw-r--r-- | ydb/core/cms/node_checkers.cpp | 57 | ||||
| -rw-r--r-- | ydb/core/cms/node_checkers.h | 142 |
7 files changed, 234 insertions, 145 deletions
diff --git a/ydb/core/cms/cluster_info.cpp b/ydb/core/cms/cluster_info.cpp index c31c138ce96..28e6baeddbf 100644 --- a/ydb/core/cms/cluster_info.cpp +++ b/ydb/core/cms/cluster_info.cpp @@ -376,7 +376,9 @@ void TClusterInfo::AddNode(const TEvInterconnect::TNodeInfo &info, const TActorC break; } } - ClusterNodes->AddNode(node->NodeId); + + node->AddNodeGroup(ClusterNodes); + HostNameToNodeId.emplace(node->Host, node->NodeId); LockableItems[node->ItemName()] = node; } @@ -391,8 +393,6 @@ void TClusterInfo::SetNodeState(ui32 nodeId, NKikimrCms::EState state, const NKi node.StartTime = TInstant::MilliSeconds(info.GetStartTime()); node.Version = info.GetVersion(); - ClusterNodes->UpdateNode(nodeId, state); - node.Services = TServices(); for (const auto& role : info.GetRoles()) { EService value; @@ -400,6 +400,8 @@ void TClusterInfo::SetNodeState(ui32 nodeId, NKikimrCms::EState state, const NKi node.Services |= value; } } + + node.UpdateNodeState(); } void TClusterInfo::ClearNode(ui32 nodeId) @@ -411,10 +413,9 @@ void TClusterInfo::ClearNode(ui32 nodeId) for (auto tablet : node.Tablets) Tablets.erase(tablet); node.Tablets.clear(); - node.State = NKikimrCms::DOWN; node.HasTenantInfo = false; - - ClusterNodes->UpdateNode(node.NodeId, NKikimrCms::DOWN); + node.State = NKikimrCms::DOWN; + node.UpdateNodeState(); } void TClusterInfo::ApplyInitialNodeTenants(const TActorContext& ctx, const THashMap<ui32, TString>& nodeTenants) @@ -647,10 +648,8 @@ void TClusterInfo::ApplyActionWithoutLog(const NKikimrCms::TAction &action) case TAction::SHUTDOWN_HOST: if (auto nodes = NodePtrs(action.GetHost(), MakeServices(action))) { for (const auto node : nodes) { - ClusterNodes->LockNode(node->NodeId); - if (node->Tenant) { - TenantNodesChecker[node->Tenant]->LockNode(node->NodeId); - } + for (auto &nodeGroup: node->NodeGroups) + nodeGroup->LockNode(node->NodeId); } } break; @@ -658,10 +657,12 @@ void TClusterInfo::ApplyActionWithoutLog(const NKikimrCms::TAction &action) for (const auto &device : action.GetDevices()) { if (HasPDisk(device)) { auto pdisk = &PDiskRef(device); - ClusterNodes->LockNode(pdisk->NodeId); + for (auto &nodeGroup: NodeRef(pdisk->NodeId).NodeGroups) + nodeGroup->LockNode(pdisk->NodeId); } else if (HasVDisk(device)) { auto vdisk = &VDiskRef(device); - ClusterNodes->LockNode(vdisk->NodeId); + for (auto &nodeGroup: NodeRef(vdisk->NodeId).NodeGroups) + nodeGroup->LockNode(vdisk->NodeId); } } break; @@ -884,18 +885,6 @@ void TClusterInfo::MigrateOldInfo(TClusterInfoPtr old) } } -void TClusterInfo::ApplySysTabletsInfo(const NKikimrConfig::TBootstrap& config) { - for (ui32 i = 0; i < config.TabletSize(); ++i) { - const auto &tablet = config.GetTablet(i); - - for (ui32 j = 0; j < tablet.NodeSize(); ++j) { - ui32 nodeId = tablet.GetNode(j); - TabletTypeToNodes[tablet.GetType()].push_back(nodeId); - NodeToTabletTypes[nodeId].push_back(tablet.GetType()); - } - } -} - void TClusterInfo::ApplyStateStorageInfo(TIntrusiveConstPtr<TStateStorageInfo> info) { StateStorageInfoReceived = true; for (ui32 ringId = 0; ringId < info->Rings.size(); ++ringId) { @@ -920,9 +909,20 @@ void TClusterInfo::GenerateTenantNodesCheckers() { for (auto &[nodeId, nodeInfo] : Nodes) { if (nodeInfo->Tenant) { if (!TenantNodesChecker.contains(nodeInfo->Tenant)) - TenantNodesChecker[nodeInfo->Tenant] = TSimpleSharedPtr<TNodesStateBase>(new TTenantState(nodeInfo->Tenant, 0, 0)); + TenantNodesChecker[nodeInfo->Tenant] = TSimpleSharedPtr<TNodesLimitsCounterBase>(new TTenantLimitsCounter(nodeInfo->Tenant, 0, 0)); + + nodeInfo->AddNodeGroup(TenantNodesChecker[nodeInfo->Tenant]); + } + } +} + +void TClusterInfo::GenerateSysTabletsNodesCheckers() { + for (auto tablet : BootstrapConfig.GetTablet()) { + SysNodesCheckers[tablet.GetType()] = TSimpleSharedPtr<TSysTabletsNodesCounter>(new TSysTabletsNodesCounter(tablet.GetType())); - TenantNodesChecker[nodeInfo->Tenant]->UpdateNode(nodeId, nodeInfo->State); + for (auto nodeId : tablet.GetNode()) { + NodeToTabletTypes[nodeId].push_back(tablet.GetType()); + NodeRef(nodeId).AddNodeGroup(SysNodesCheckers[tablet.GetType()]); } } } @@ -997,11 +997,8 @@ void TOperationLogManager::ApplyAction(const NKikimrCms::TAction &action, case NKikimrCms::TAction::SHUTDOWN_HOST: if (auto nodes = clusterState->NodePtrs(action.GetHost(), MakeServices(action))) { for (const auto node : nodes) { - AddNodeLockOperation(node->NodeId, clusterState->ClusterNodes); - - if (node->Tenant) { - AddNodeLockOperation(node->NodeId, clusterState->TenantNodesChecker[node->Tenant]); - } + for (auto &nodeGroup: node->NodeGroups) + AddNodeLockOperation(node->NodeId, nodeGroup); } } break; @@ -1009,11 +1006,13 @@ void TOperationLogManager::ApplyAction(const NKikimrCms::TAction &action, for (const auto &device : action.GetDevices()) { if (clusterState->HasPDisk(device)) { auto pdisk = &clusterState->PDisk(device); - AddNodeLockOperation(pdisk->NodeId, clusterState->ClusterNodes); + for (auto &nodeGroup: clusterState->NodeRef(pdisk->NodeId).NodeGroups) + AddNodeLockOperation(pdisk->NodeId, nodeGroup); } else if (clusterState->HasVDisk(device)) { auto vdisk = &clusterState->VDisk(device); - AddNodeLockOperation(vdisk->NodeId, clusterState->ClusterNodes); + for (auto &nodeGroup: clusterState->NodeRef(vdisk->NodeId).NodeGroups) + AddNodeLockOperation(vdisk->NodeId, nodeGroup); } } break; diff --git a/ydb/core/cms/cluster_info.h b/ydb/core/cms/cluster_info.h index ff66d8c91a9..85cd007ca4c 100644 --- a/ydb/core/cms/cluster_info.h +++ b/ydb/core/cms/cluster_info.h @@ -12,6 +12,7 @@ #include <ydb/core/mind/tenant_pool.h> #include <ydb/core/node_whiteboard/node_whiteboard.h> #include <ydb/core/protos/cms.pb.h> +#include <ydb/core/protos/config.pb.h> #include <ydb/core/protos/console.pb.h> #include <library/cpp/actors/core/actor.h> @@ -317,6 +318,17 @@ public: return Sprintf("Host %s:%" PRIu16 " (%" PRIu32 ")", Host.data(), IcPort, NodeId); } + void AddNodeGroup(TSimpleSharedPtr<INodesChecker> group) { + NodeGroups.push_back(group); + group->UpdateNode(NodeId, State); + } + + void UpdateNodeState() { + for (auto &group : NodeGroups) { + group->UpdateNode(NodeId, State); + } + } + void MigrateOldInfo(const TLockableItem &old) override; ui32 NodeId = 0; @@ -333,6 +345,8 @@ public: TString PreviousTenant; TServices Services; TInstant StartTime; + + TVector<TSimpleSharedPtr<INodesChecker>> NodeGroups; }; using TNodeInfoPtr = TIntrusivePtr<TNodeInfo>; @@ -561,10 +575,10 @@ public: const ui32 NodeId; private: - TSimpleSharedPtr<TNodesStateBase> NodesState; + TSimpleSharedPtr<INodesChecker> NodesState; public: - TLockNodeOperation(ui32 nodeId, TSimpleSharedPtr<TNodesStateBase> nodesState) + TLockNodeOperation(ui32 nodeId, TSimpleSharedPtr<INodesChecker> nodesState) : TOperationBase(OPERATION_TYPE_LOCK_NODE) , NodeId(nodeId) , NodesState(nodesState) @@ -606,7 +620,7 @@ public: Log.emplace_back(new TLogRollbackPoint()); } - void AddNodeLockOperation(ui32 nodeId, TSimpleSharedPtr<TNodesStateBase> nodesState) { + void AddNodeLockOperation(ui32 nodeId, TSimpleSharedPtr<INodesChecker> nodesState) { Log.emplace_back(new TLockNodeOperation(nodeId, nodesState))->Do(); } @@ -642,12 +656,12 @@ public: using TVDisks = THashMap<TVDiskID, TVDiskInfoPtr>; using TBSGroups = THashMap<ui32, TBSGroupInfo>; - using TenantNodesCheckers = THashMap<TString, TSimpleSharedPtr<TNodesStateBase>>; + using TenantNodesCheckers = THashMap<TString, TSimpleSharedPtr<TNodesLimitsCounterBase>>; friend TOperationLogManager; TenantNodesCheckers TenantNodesChecker; - TSimpleSharedPtr<TClusterNodesState> ClusterNodes = MakeSimpleShared<TClusterNodesState>(0u, 0u); + TSimpleSharedPtr<TClusterLimitsCounter> ClusterNodes = MakeSimpleShared<TClusterLimitsCounter>(0u, 0u); TOperationLogManager LogManager; TOperationLogManager ScheduledLogManager; @@ -666,6 +680,7 @@ public: void ApplyStateStorageInfo(TIntrusiveConstPtr<TStateStorageInfo> info); void GenerateTenantNodesCheckers(); + void GenerateSysTabletsNodesCheckers(); bool IsStateStorageReplicaNode(ui32 nodeId) { return StateStorageReplicas.contains(nodeId); @@ -904,8 +919,6 @@ public: bool IsOutdated() const { return Outdated; } void SetOutdated(bool val) { Outdated = val; } - void ApplySysTabletsInfo(const NKikimrConfig::TBootstrap& config); - static EGroupConfigurationType VDiskConfigurationType(const TVDiskID &vdId) { return TGroupID(vdId.GroupID).ConfigurationType(); } @@ -1010,9 +1023,11 @@ private: public: bool IsLocalBootConfDiffersFromConsole = false; - THashMap<NKikimrConfig::TBootstrap::ETabletType, TVector<ui32>> TabletTypeToNodes; + NKikimrConfig::TBootstrap BootstrapConfig; THashMap<ui32, TVector<NKikimrConfig::TBootstrap::ETabletType>> NodeToTabletTypes; + THashMap<NKikimrConfig::TBootstrap::ETabletType, TSimpleSharedPtr<TSysTabletsNodesCounter>> SysNodesCheckers; + TIntrusiveConstPtr<TStateStorageInfo> StateStorageInfo; TVector<TStateStorageRingInfoPtr> StateStorageRings; }; diff --git a/ydb/core/cms/cms.cpp b/ydb/core/cms/cms.cpp index f663a37f08d..211a7e0c622 100644 --- a/ydb/core/cms/cms.cpp +++ b/ydb/core/cms/cms.cpp @@ -373,7 +373,7 @@ bool TCms::CheckActionShutdownNode(const NKikimrCms::TAction &action, } if (!AppData(ctx)->DisableCheckingSysNodesCms && - !CheckSysTabletsNode(action, opts, node, error)) { + !CheckSysTabletsNode(opts, node, error)) { return false; } @@ -533,8 +533,7 @@ bool TCms::TryToLockStateStorageReplica(const TAction& action, return true; } -bool TCms::CheckSysTabletsNode(const TAction &action, - const TActionOptions &opts, +bool TCms::CheckSysTabletsNode(const TActionOptions &opts, const TNodeInfo &node, TErrorInfo &error) const { @@ -543,55 +542,12 @@ bool TCms::CheckSysTabletsNode(const TAction &action, } for (auto &tabletType : ClusterInfo->NodeToTabletTypes[node.NodeId]) { - ui32 disabledNodesCnt = 1; // сounting including this node - TErrorInfo err; - TDuration duration = TDuration::MicroSeconds(action.GetDuration()) + opts.PermissionDuration; - TInstant defaultDeadline = TActivationContext::Now() + State->Config.DefaultRetryTime; - - for (auto &nodeId : ClusterInfo->TabletTypeToNodes[tabletType]) { - if (nodeId == node.NodeId) { - continue; - } - if (ClusterInfo->Node(nodeId).IsLocked(err, State->Config.DefaultRetryTime, - TActivationContext::Now(), duration) || - ClusterInfo->Node(nodeId).IsDown(err, defaultDeadline)) - { - ++disabledNodesCnt; - } - } - - ui32 tabletNodes = ClusterInfo->TabletTypeToNodes[tabletType].size(); - switch (opts.AvailabilityMode) { - case MODE_MAX_AVAILABILITY: - if (tabletNodes > 1 && disabledNodesCnt * 2 > tabletNodes){ - error.Code = TStatus::DISALLOW_TEMP; - error.Reason = TStringBuilder() << NKikimrConfig::TBootstrap_ETabletType_Name(tabletType) - << " has too many locked nodes: " << disabledNodesCnt - << " limit: " << tabletNodes / 2 << " (50%)"; - error.Deadline = defaultDeadline; - return false; - } - break; - case MODE_KEEP_AVAILABLE: - if (tabletNodes > 1 && disabledNodesCnt > tabletNodes - 1) { - error.Code = TStatus::DISALLOW_TEMP; - error.Reason = TStringBuilder() << NKikimrConfig::TBootstrap_ETabletType_Name(tabletType) - << " has too many locked nodes: " << disabledNodesCnt - << ". At least one node must be available"; - error.Deadline = defaultDeadline; - return false; - } - break; - case MODE_FORCE_RESTART: - break; - default: - error.Code = TStatus::WRONG_REQUEST; - error.Reason = Sprintf("Unknown availability mode: %s (%" PRIu32 ")", - EAvailabilityMode_Name(opts.AvailabilityMode).data(), - static_cast<ui32>(opts.AvailabilityMode)); - error.Deadline = defaultDeadline; - return false; - } + if (!ClusterInfo->SysNodesCheckers[tabletType]->TryToLockNode(node.NodeId, opts.AvailabilityMode)) { + error.Code = TStatus::DISALLOW_TEMP; + error.Reason = ClusterInfo->SysNodesCheckers[tabletType]->ReadableReason(node.NodeId, opts.AvailabilityMode); + error.Deadline = TActivationContext::Now() + State->Config.DefaultRetryTime; + return false; + } } return true; @@ -605,12 +561,10 @@ bool TCms::TryToLockNode(const TAction& action, TDuration duration = TDuration::MicroSeconds(action.GetDuration()); duration += opts.PermissionDuration; - bool isForceRestart = opts.AvailabilityMode == NKikimrCms::MODE_FORCE_RESTART; - - if (!ClusterInfo->ClusterNodes->TryToLockNode(node.NodeId, isForceRestart)) + if (!ClusterInfo->ClusterNodes->TryToLockNode(node.NodeId, opts.AvailabilityMode)) { error.Code = TStatus::DISALLOW_TEMP; - error.Reason = ClusterInfo->ClusterNodes->ReadableReason(); + error.Reason = ClusterInfo->ClusterNodes->ReadableReason(node.NodeId, opts.AvailabilityMode); error.Deadline = TActivationContext::Now() + State->Config.DefaultRetryTime; return false; @@ -618,10 +572,10 @@ bool TCms::TryToLockNode(const TAction& action, if (node.Tenant && opts.TenantPolicy != NONE - && !ClusterInfo->TenantNodesChecker[node.Tenant]->TryToLockNode(node.NodeId, isForceRestart)) + && !ClusterInfo->TenantNodesChecker[node.Tenant]->TryToLockNode(node.NodeId, opts.AvailabilityMode)) { error.Code = TStatus::DISALLOW_TEMP; - error.Reason = ClusterInfo->TenantNodesChecker[node.Tenant]->ReadableReason(); + error.Reason = ClusterInfo->TenantNodesChecker[node.Tenant]->ReadableReason(node.NodeId, opts.AvailabilityMode); error.Deadline = TActivationContext::Now() + State->Config.DefaultRetryTime; return false; @@ -1427,6 +1381,9 @@ void TCms::Handle(TEvPrivate::TEvClusterInfo::TPtr &ev, const TActorContext &ctx // all the information about the tenants on the disconnected nodes info->GenerateTenantNodesCheckers(); + if (!AppData(ctx)->DisableCheckingSysNodesCms) + info->GenerateSysTabletsNodesCheckers(); + AdjustInfo(info, ctx); State->ClusterInfo = info; diff --git a/ydb/core/cms/cms_impl.h b/ydb/core/cms/cms_impl.h index e3c3bfb66da..9cb688b125a 100644 --- a/ydb/core/cms/cms_impl.h +++ b/ydb/core/cms/cms_impl.h @@ -301,8 +301,7 @@ private: bool CheckActionReplaceDevices(const NKikimrCms::TAction &action, const TActionOptions &options, TErrorInfo &error) const; - bool CheckSysTabletsNode(const NKikimrCms::TAction &action, - const TActionOptions &opts, + bool CheckSysTabletsNode(const TActionOptions &opts, const TNodeInfo &node, TErrorInfo &error) const; bool TryToLockNode(const NKikimrCms::TAction &action, diff --git a/ydb/core/cms/info_collector.cpp b/ydb/core/cms/info_collector.cpp index 278202797bb..8f4b021e1a2 100644 --- a/ydb/core/cms/info_collector.cpp +++ b/ydb/core/cms/info_collector.cpp @@ -215,12 +215,11 @@ void TInfoCollector::RequestBootstrapConfig() { void TInfoCollector::Handle(TEvConfigsDispatcher::TEvGetConfigResponse::TPtr& ev) { const auto& config = ev->Get()->Config; const auto& initialBootstrapConfig = AppData()->BootstrapConfig; - const NKikimrConfig::TBootstrap* bootstrapConfig = nullptr; BootstrapConfigReceived = true; if (!config->HasBootstrapConfig()) { LOG_I("Couldn't collect bootstrap config from Console. Taking the local config"); - bootstrapConfig = &initialBootstrapConfig; + Info->BootstrapConfig.CopyFrom(initialBootstrapConfig); } else { const auto& currentBootstrapConfig = config->GetBootstrapConfig(); @@ -232,12 +231,9 @@ void TInfoCollector::Handle(TEvConfigsDispatcher::TEvGetConfigResponse::TPtr& ev Info->IsLocalBootConfDiffersFromConsole = true; } - bootstrapConfig = ¤tBootstrapConfig; + Info->BootstrapConfig.CopyFrom(currentBootstrapConfig); } - Y_VERIFY(bootstrapConfig); - Info->ApplySysTabletsInfo(*bootstrapConfig); - MaybeReplyAndDie(); } diff --git a/ydb/core/cms/node_checkers.cpp b/ydb/core/cms/node_checkers.cpp index 5ebb1646ca8..8a857b0a63f 100644 --- a/ydb/core/cms/node_checkers.cpp +++ b/ydb/core/cms/node_checkers.cpp @@ -6,10 +6,10 @@ namespace NKikimr::NCms { -#define NCH_LOG_D(stream) LOG_DEBUG_S (*TlsActivationContext, NKikimrServices::CMS, "[Checker] " << stream) -#define NCH_LOG_T(stream) LOG_TRACE_S (*TlsActivationContext, NKikimrServices::CMS, "[Checker] " << stream) +#define NCH_LOG_D(stream) LOG_DEBUG_S (*TlsActivationContext, NKikimrServices::CMS, "[Nodes Counter] " << stream) +#define NCH_LOG_T(stream) LOG_TRACE_S (*TlsActivationContext, NKikimrServices::CMS, "[Nodes Counter] " << stream) -TNodesStateBase::ENodeState TNodesStateBase::NodeState(NKikimrCms::EState state) { +TNodesLimitsCounterBase::ENodeState INodesChecker::NodeState(NKikimrCms::EState state) { switch (state) { case NKikimrCms::UP: return NODE_STATE_UP; @@ -24,14 +24,14 @@ TNodesStateBase::ENodeState TNodesStateBase::NodeState(NKikimrCms::EState state) } } -void TNodesStateBase::AddNode(ui32 nodeId) { +void TNodesCounterBase::AddNode(ui32 nodeId) { if (NodeToState.contains(nodeId)) { return; } NodeToState[nodeId] = NODE_STATE_UNSPECIFIED; } -void TNodesStateBase::UpdateNode(ui32 nodeId, NKikimrCms::EState state) { +void TNodesCounterBase::UpdateNode(ui32 nodeId, NKikimrCms::EState state) { if (!NodeToState.contains(nodeId)) { AddNode(nodeId); } @@ -57,7 +57,7 @@ void TNodesStateBase::UpdateNode(ui32 nodeId, NKikimrCms::EState state) { } } -void TNodesStateBase::LockNode(ui32 nodeId) { +void TNodesCounterBase::LockNode(ui32 nodeId) { Y_VERIFY(NodeToState.contains(nodeId)); ++LockedNodesCount; @@ -69,7 +69,7 @@ void TNodesStateBase::LockNode(ui32 nodeId) { } } -void TNodesStateBase::UnlockNode(ui32 nodeId) { +void TNodesCounterBase::UnlockNode(ui32 nodeId) { Y_VERIFY(NodeToState.contains(nodeId)); --LockedNodesCount; @@ -81,10 +81,11 @@ void TNodesStateBase::UnlockNode(ui32 nodeId) { } } -bool TNodesStateBase::TryToLockNode(ui32 nodeId, bool isForceRestart) { +bool TNodesLimitsCounterBase::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const { Y_VERIFY(NodeToState.contains(nodeId)); + auto nodeState = NodeToState.at(nodeId); - auto nodeState = NodeToState[nodeId]; + bool isForceRestart = mode == NKikimrCms::MODE_FORCE_RESTART; NCH_LOG_D("Checking Node: " << nodeId << ", with state: " << ToString(nodeState) @@ -128,4 +129,42 @@ bool TNodesStateBase::TryToLockNode(ui32 nodeId, bool isForceRestart) { return true; } +bool TSysTabletsNodesCounter::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const { + Y_VERIFY(NodeToState.contains(nodeId)); + auto nodeState = NodeToState.at(nodeId); + + NCH_LOG_D("Checking limits for sys tablet: " << NKikimrConfig::TBootstrap_ETabletType_Name(TabletType) + << ", on node: " << nodeId + << ", with state: " << ToString(nodeState) + << ", locked nodes: " << LockedNodesCount + << ", down nodes: " << DownNodesCount); + + if (nodeState == NODE_STATE_RESTART || + nodeState == NODE_STATE_LOCKED || + nodeState == NODE_STATE_UNSPECIFIED) { + + return false; + } + + ui32 tabletNodes = NodeToState.size(); + switch (mode) { + case NKikimrCms::MODE_MAX_AVAILABILITY: + if (tabletNodes > 1 && (DownNodesCount + LockedNodesCount + 1) * 2 > tabletNodes){ + return false; + } + break; + case NKikimrCms::MODE_KEEP_AVAILABLE: + if (tabletNodes > 1 && (DownNodesCount + LockedNodesCount + 1) > tabletNodes - 1) { + return false; + } + break; + case NKikimrCms::MODE_FORCE_RESTART: + break; + default: + Y_FAIL("Unknown availability mode"); + } + + return true; +} + } // namespace NKikimr::NCms diff --git a/ydb/core/cms/node_checkers.h b/ydb/core/cms/node_checkers.h index fd3544b6ad5..72362c84dba 100644 --- a/ydb/core/cms/node_checkers.h +++ b/ydb/core/cms/node_checkers.h @@ -5,6 +5,7 @@ #include <ydb/core/blobstorage/base/blobstorage_vdiskid.h> #include <ydb/core/erasure/erasure.h> #include <ydb/core/protos/cms.pb.h> +#include <ydb/core/protos/config.pb.h> #include <library/cpp/actors/core/log.h> @@ -20,7 +21,12 @@ namespace NKikimr::NCms { -class TNodesStateBase { +/** + * A base class for storing the state of some group of nodes. For example, tenant nodes, state storage nodes, etc. + * + * Different groups of nodes may have their own failure model, so the checks for node permissions may be different. + */ +class INodesChecker { public: enum ENodeState : ui32 { NODE_STATE_UNSPECIFIED /* "Unspecified" */, @@ -30,59 +36,93 @@ public: NODE_STATE_DOWN /* "Down" */ }; -private: +protected: static ENodeState NodeState(NKikimrCms::EState state); -protected: - ui32 DisabledNodesLimit; - ui32 DisabledNodesRatioLimit; +public: + virtual ~INodesChecker() = default; + + virtual void AddNode(ui32 nodeId) = 0; + virtual void UpdateNode(ui32 nodeId, NKikimrCms::EState) = 0; + + virtual void LockNode(ui32 nodeId) = 0; + virtual void UnlockNode(ui32 nodeId) = 0; + + virtual bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const = 0; + + virtual std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const = 0; +}; +/** + * Base class for simple nodes counter with some limits + */ +class TNodesCounterBase : public INodesChecker { +protected: THashMap<ui32, ENodeState> NodeToState; ui32 LockedNodesCount; ui32 DownNodesCount; public: - TNodesStateBase(ui32 disabledNodesLimit, ui32 disabledNodesRatioLimit) + TNodesCounterBase() + : LockedNodesCount(0) + , DownNodesCount(0) + {} + + virtual ~TNodesCounterBase() = default; + + void AddNode(ui32 nodeId) override; + void UpdateNode(ui32 nodeId, NKikimrCms::EState) override; + + void LockNode(ui32 nodeId) override; + void UnlockNode(ui32 nodeId) override; +}; + +/** + * Base class for counting groups of nodes with a limit on the number of locked and disabled nodes. + * + * Each such group of nodes has parameters, which are set in CmsConfigItem + * DisabledNodesLimit - the maximum number of unavailable nodes + * DisabledNodesRatioLimit - the maximum percentage of unavailable nodes + */ +class TNodesLimitsCounterBase : public TNodesCounterBase { +protected: + ui32 DisabledNodesLimit; + ui32 DisabledNodesRatioLimit; + +public: + TNodesLimitsCounterBase(ui32 disabledNodesLimit, ui32 disabledNodesRatioLimit) : DisabledNodesLimit(disabledNodesLimit) , DisabledNodesRatioLimit(disabledNodesRatioLimit) - , LockedNodesCount(0) - , DownNodesCount(0) { } - virtual ~TNodesStateBase() = default; + virtual ~TNodesLimitsCounterBase() = default; void ApplyLimits(ui32 nodesLimit, ui32 ratioLimit) { DisabledNodesLimit = nodesLimit; DisabledNodesRatioLimit = ratioLimit; } - void AddNode(ui32 nodeId); - void UpdateNode(ui32 nodeId, NKikimrCms::EState); - - void LockNode(ui32 nodeId); - void UnlockNode(ui32 nodeId); - - bool TryToLockNode(ui32 nodeId, bool isForceRestart = false); - - virtual std::string ReadableReason() const = 0; - + bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final; }; -class TTenantState : public TNodesStateBase { +class TTenantLimitsCounter : public TNodesLimitsCounterBase { private: const std::string TenantName; public: - TTenantState(const std::string &tenantName, ui32 disabledNodesLimit, ui32 disabledNodesRatioLimit) - : TNodesStateBase(disabledNodesLimit, disabledNodesRatioLimit) + TTenantLimitsCounter(const std::string &tenantName, ui32 disabledNodesLimit, ui32 disabledNodesRatioLimit) + : TNodesLimitsCounterBase(disabledNodesLimit, disabledNodesRatioLimit) , TenantName(tenantName) { } - std::string ReadableReason() const override final { + std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final { + Y_UNUSED(mode); + std::stringstream reason; - reason << "Too many locked nodes for tenant " << TenantName + reason << "Cannot lock node: " << nodeId + << ". Too many locked nodes for tenant " << TenantName << "; locked: " << LockedNodesCount << "; down: " << DownNodesCount << "; total: " << NodeToState.size() @@ -93,16 +133,19 @@ public: } }; -class TClusterNodesState : public TNodesStateBase { +class TClusterLimitsCounter : public TNodesLimitsCounterBase { public: - TClusterNodesState(ui32 disabledNodesLimit, ui32 disabledNodesRatioLimit) - : TNodesStateBase(disabledNodesLimit, disabledNodesRatioLimit) + TClusterLimitsCounter(ui32 disabledNodesLimit, ui32 disabledNodesRatioLimit) + : TNodesLimitsCounterBase(disabledNodesLimit, disabledNodesRatioLimit) { } - std::string ReadableReason() const override final { + std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final { + Y_UNUSED(mode); + std::stringstream reason; - reason << "Too many locked nodes in cluster" + reason << "Cannot lock node: " << nodeId + <<". Too many locked nodes in cluster" << "; locked: " << LockedNodesCount << "; down: " << DownNodesCount << "; total: " << NodeToState.size() @@ -113,4 +156,45 @@ public: } }; +/** + * Class to hold information about nodes where can start some system tablet. Those nodes are + * described in bootstrap config. + * + * At least one node from the bootstrap list must always be available + */ +class TSysTabletsNodesCounter : public TNodesCounterBase { +private: + NKikimrConfig::TBootstrap::ETabletType TabletType; + +public: + explicit TSysTabletsNodesCounter(NKikimrConfig::TBootstrap::ETabletType tabletType) + : TabletType(tabletType) + {} + + bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final; + + std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final { + std::stringstream reason; + + if (mode == NKikimrCms::MODE_FORCE_RESTART) { + return reason.str(); + } + + reason << "Cannot lock node: " << nodeId + << ". Tablet " + << NKikimrConfig::TBootstrap_ETabletType_Name(TabletType) + << " has too many unavailable nodes. Locked: " << LockedNodesCount + << ". Down: " << DownNodesCount; + if (mode == NKikimrCms::MODE_MAX_AVAILABILITY) { + reason << ". Limit: " << NodeToState.size() / 2 << " (50%)"; + } + + if (mode == NKikimrCms::MODE_KEEP_AVAILABLE) { + reason << ". Limit: " << NodeToState.size() - 1; + } + + return reason.str(); + } +}; + } // namespace NKikimr::NCms |
