diff options
author | t1mursadykov <t1mursadykov@ydb.tech> | 2022-11-02 22:42:50 +0300 |
---|---|---|
committer | t1mursadykov <t1mursadykov@ydb.tech> | 2022-11-02 22:42:50 +0300 |
commit | 30f08903e52a60c9459d38dfcbddd6f3794b0c9c (patch) | |
tree | 0038d341981115f68f66d0027618bd96dc69c280 | |
parent | 0b74f5913a3605197aea520fb0663f00172a8ddb (diff) | |
download | ydb-30f08903e52a60c9459d38dfcbddd6f3794b0c9c.tar.gz |
State Storage counting improvement in CMS
-rw-r--r-- | ydb/core/cms/cluster_info.cpp | 56 | ||||
-rw-r--r-- | ydb/core/cms/cluster_info.h | 102 | ||||
-rw-r--r-- | ydb/core/cms/cms.cpp | 156 | ||||
-rw-r--r-- | ydb/core/cms/cms_impl.h | 12 | ||||
-rw-r--r-- | ydb/core/cms/cms_tenants_ut.cpp | 8 | ||||
-rw-r--r-- | ydb/core/cms/cms_tx_load_state.cpp | 1 | ||||
-rw-r--r-- | ydb/core/cms/cms_ut.cpp | 77 | ||||
-rw-r--r-- | ydb/core/cms/cms_ut_common.cpp | 5 | ||||
-rw-r--r-- | ydb/core/cms/cms_ut_common.h | 8 | ||||
-rw-r--r-- | ydb/core/cms/info_collector.cpp | 49 |
10 files changed, 355 insertions, 119 deletions
diff --git a/ydb/core/cms/cluster_info.cpp b/ydb/core/cms/cluster_info.cpp index e30d9811df..733f3fc702 100644 --- a/ydb/core/cms/cluster_info.cpp +++ b/ydb/core/cms/cluster_info.cpp @@ -3,6 +3,7 @@ #include <util/string/builder.h> #include <util/system/hostname.h> +#include <util/datetime/base.h> #if defined BLOG_D || defined BLOG_I || defined BLOG_ERROR #error log macro definition clash @@ -298,6 +299,40 @@ void TVDiskInfo::MigrateOldInfo(const TLockableItem &old) } } +TStateStorageRingInfo::RingState TStateStorageRingInfo::CountState(TInstant now, + TDuration retryTime, + TDuration duration) const +{ + if (IsDisabled) { + return Disabled; + } + + ui32 unavailableReplicas = 0; + bool hasTimeout = false; + TErrorInfo error; + for (auto &node : Replicas) { + if (node->IsDown(error, now + retryTime) + || node->IsLocked(error, retryTime, now, duration)) { + ++unavailableReplicas; + continue; + } + + if (now <= node->StartTime + Timeout) { + hasTimeout = true; + } + } + + if (unavailableReplicas > 0) { + return Restart; + } + + if (hasTimeout) { + return Locked; + } + + return Ok; +} + void TClusterInfo::SetTimestamp(TInstant timestamp) { Timestamp = timestamp; @@ -347,6 +382,7 @@ void TClusterInfo::SetNodeState(ui32 nodeId, NKikimrCms::EState state, const NKi auto &node = NodeRef(nodeId); node.State = state; + node.StartTime = TInstant::MilliSeconds(info.GetStartTime()); node.Version = info.GetVersion(); node.Services = TServices(); @@ -798,6 +834,26 @@ void TClusterInfo::ApplySysTabletsInfo(const NKikimrConfig::TBootstrap& config) } } +void TClusterInfo::ApplyStateStorageInfo(TIntrusiveConstPtr<TStateStorageInfo> info) { + StateStorageInfoReceived = true; + for (ui32 ringId = 0; ringId < info->Rings.size(); ++ringId) { + auto &ring = info->Rings[ringId]; + TStateStorageRingInfoPtr ringInfo = MakeIntrusive<TStateStorageRingInfo>(); + ringInfo->RingId = ringId; + if (ring.IsDisabled) + ringInfo->SetDisabled(); + + for(auto replica : ring.Replicas) { + Y_VERIFY(HasNode(replica.NodeId())); + ringInfo->AddNode(Nodes[replica.NodeId()]); + StateStorageReplicas.insert(replica.NodeId()); + StateStorageNodeToRingId[replica.NodeId()] = ringId; + } + + StateStorageRings.push_back(ringInfo); + } +} + void TClusterInfo::DebugDump(const TActorContext &ctx) const { LOG_DEBUG_S(ctx, NKikimrServices::CMS, diff --git a/ydb/core/cms/cluster_info.h b/ydb/core/cms/cluster_info.h index 16baceed49..38c8aef06f 100644 --- a/ydb/core/cms/cluster_info.h +++ b/ydb/core/cms/cluster_info.h @@ -6,7 +6,9 @@ #include "services.h" #include <library/cpp/actors/interconnect/interconnect.h> +#include <library/cpp/actors/core/actor.h> #include <ydb/core/base/blobstorage.h> +#include <ydb/core/base/statestorage.h> #include <ydb/core/node_whiteboard/node_whiteboard.h> #include <ydb/core/blobstorage/base/blobstorage_vdiskid.h> #include <ydb/core/mind/tenant_pool.h> @@ -16,6 +18,7 @@ #include <util/generic/hash.h> #include <util/generic/maybe.h> #include <util/generic/set.h> +#include "util/generic/ptr.h" namespace NKikimr { namespace NCms { @@ -328,6 +331,7 @@ public: TString Tenant; TString PreviousTenant; TServices Services; + TInstant StartTime; }; using TNodeInfoPtr = TIntrusivePtr<TNodeInfo>; @@ -452,6 +456,83 @@ struct TBSGroupInfo { }; /** + * Structure to hold info and state for a state storage. It helps to + * avoid the situation when we quickly unlock one state stotage node and + * immediately lock another node from different ring + */ +class TStateStorageRingInfo : public TThrRefBase { +public: + + /** + * Ok: we can allow to restart nodes; + * + * Locked: all nodes are up. We restarted some nodes before and waiting + * some timeout to allow restart nodes from other ring. + * But, we still can restart nodes from this ring; + * + * Disabled: Disabled ring (see state storage config). The ring + * affects permissions of other rings, but this ring + * can be disabled without considering the others; + * + * Restart: has some restarting or down nodes. We can still restart + * nodes from this ring; + */ + enum RingState : ui8 { + Unknown = 0, + Ok, + Locked, + Disabled, + Restart, + }; + + TStateStorageRingInfo() = default; + TStateStorageRingInfo(const TStateStorageRingInfo &other) = default; + TStateStorageRingInfo(TStateStorageRingInfo &&other) = default; + + TStateStorageRingInfo &operator=(const TStateStorageRingInfo &other) = default; + TStateStorageRingInfo &operator=(TStateStorageRingInfo &&other) = default; + + static TString RingStateToString(RingState state) { + switch (state) { + case Unknown: + return "Unknown"; + break; + case Ok: + return "Ok"; + break; + case Locked: + return "Locked"; + break; + case Restart: + return "Restart"; + break; + default: + return "Unknown ring state"; + break; + } + } + + void AddNode(TNodeInfoPtr &node) { + Replicas.push_back(node); + } + + void SetDisabled() { + IsDisabled = true; + } + + RingState CountState(TInstant now, + TDuration retryTime, + TDuration duration) const; + + ui32 RingId = 0; + bool IsDisabled = false; + const TDuration Timeout = TDuration::Minutes(2); + + TVector<TNodeInfoPtr> Replicas; +}; +using TStateStorageRingInfoPtr = TIntrusivePtr<TStateStorageRingInfo>; + +/** * Main class to hold current cluster state. * * State is built by merging pieces of information from NodeWhiteboard through @@ -475,6 +556,21 @@ public: TClusterInfo &operator=(const TClusterInfo &other) = default; TClusterInfo &operator=(TClusterInfo &&other) = default; + void ApplyStateStorageInfo(TIntrusiveConstPtr<TStateStorageInfo> info); + + bool IsStateStorageReplicaNode(ui32 nodeId) { + return StateStorageReplicas.contains(nodeId); + } + + bool IsStateStorageinfoReceived() { + return StateStorageInfoReceived; + } + + ui32 GetRingId(ui32 nodeId) { + Y_VERIFY(IsStateStorageReplicaNode(nodeId)); + return StateStorageNodeToRingId[nodeId]; + } + bool HasNode(ui32 nodeId) const { return Nodes.contains(nodeId); @@ -823,16 +919,22 @@ private: ui64 RollbackPoint = 0; bool HasTenantsInfo = false; bool Outdated = false; + bool StateStorageInfoReceived; // Fast access structures. TMultiMap<TString, ui32> HostNameToNodeId; TMultiMap<TString, ui32> TenantToNodeId; THashMap<TString, TLockableItemPtr> LockableItems; + THashSet<ui32> StateStorageReplicas; + THashMap<ui32, ui32> StateStorageNodeToRingId; public: bool IsLocalBootConfDiffersFromConsole = false; THashMap<NKikimrConfig::TBootstrap::ETabletType, TVector<ui32>> TabletTypeToNodes; THashMap<ui32, TVector<NKikimrConfig::TBootstrap::ETabletType>> NodeToTabletTypes; + + TIntrusiveConstPtr<TStateStorageInfo> StateStorageInfo; + TVector<TStateStorageRingInfoPtr> StateStorageRings; }; inline bool ActionRequiresHost(NKikimrCms::TAction::EType type) diff --git a/ydb/core/cms/cms.cpp b/ydb/core/cms/cms.cpp index 6698252bdb..050bb3e417 100644 --- a/ydb/core/cms/cms.cpp +++ b/ydb/core/cms/cms.cpp @@ -122,16 +122,6 @@ void TCms::ProcessInitQueue(const TActorContext &ctx) } } -void TCms::RequestStateStorageConfig(const TActorContext &ctx) { - const auto& domains = *AppData(ctx)->DomainsInfo; - ui32 domainUid = domains.Domains.begin()->second->DomainUid; - const ui32 stateStorageGroup = domains.GetDefaultStateStorageGroup(domainUid); - - const TActorId proxy = MakeStateStorageProxyID(stateStorageGroup); - - ctx.Send(proxy, new TEvStateStorage::TEvListStateStorage(), IEventHandle::FlagTrackDelivery); -} - void TCms::SubscribeForConfig(const TActorContext &ctx) { ctx.Register(NConsole::CreateConfigSubscriber(TabletID(), @@ -421,7 +411,7 @@ bool TCms::CheckActionShutdownNode(const NKikimrCms::TAction &action, } // node is not locked - if (!CheckActionShutdownStateStorage(action, opts, node, error)) { + if (!TryToLockStateStorageReplica(action, opts, node, error, ctx)) { return false; } @@ -484,70 +474,103 @@ bool TCms::CheckActionShutdownHost(const TAction &action, return true; } -bool TCms::CheckActionShutdownStateStorage( - const TAction& action, - const TActionOptions& opts, - const TNodeInfo& node, - TErrorInfo& error) const +bool TCms::TryToLockStateStorageReplica(const TAction& action, + const TActionOptions& opts, + const TNodeInfo& node, + TErrorInfo& error, + const TActorContext &ctx) const { - // TODO (t1mursadykov): отслеживание времени отключенных стейт стораджей - if (opts.AvailabilityMode == MODE_FORCE_RESTART) { - return true; - } - TInstant defaultDeadline = TActivationContext::Now() + State->Config.DefaultRetryTime; - if (!StateStorageInfo) { + if (!ClusterInfo->IsStateStorageinfoReceived()) { error.Code = TStatus::DISALLOW_TEMP; error.Reason = "Did not received state storage configuration"; error.Deadline = defaultDeadline; return false; } - if (!StateStorageNodes.contains(node.NodeId)) { + if (!ClusterInfo->IsStateStorageReplicaNode(node.NodeId)) { return true; } - THashSet<ui32> injuredRings; + const ui32 nToSelect = ClusterInfo->StateStorageInfo->NToSelect; + const ui32 currentRing = ClusterInfo->GetRingId(node.NodeId); + ui8 currentRingState = TStateStorageRingInfo::Unknown; + ui32 restartRings = 0; + ui32 lockedRings = 0; + ui32 disabledRings = 0; + auto now = AppData(ctx)->TimeProvider->Now(); TDuration duration = TDuration::MicroSeconds(action.GetDuration()) + opts.PermissionDuration; - TErrorInfo err; - TStringStream brokenNodesMsg; - for (auto& i : StateStorageNodes) { - if (node.NodeId == i) { + for (auto ringInfo : ClusterInfo->StateStorageRings) { + auto state = ringInfo->CountState(now, State->Config.DefaultRetryTime, duration); + LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::CMS, "Ring: " << ringInfo->RingId + << "; State: " << TStateStorageRingInfo::RingStateToString(state)); + + if (ringInfo->RingId == currentRing) { + if (state == TStateStorageRingInfo::Disabled) { + return true; + } + currentRingState = state; continue; } - if (ClusterInfo->Node(i).IsLocked(err, State->Config.DefaultRetryTime, - TActivationContext::Now(), duration) || - ClusterInfo->Node(i).IsDown(err, defaultDeadline)) { - - injuredRings.insert(NodeToRing.at(i)); - brokenNodesMsg << " " << i; + switch (state) { + case TStateStorageRingInfo::Ok: + break; + case TStateStorageRingInfo::Locked: + ++lockedRings; + break; + case TStateStorageRingInfo::Restart: + ++restartRings; + break; + case TStateStorageRingInfo::Disabled: + ++disabledRings; + break; + default: + break; } } + Y_VERIFY(currentRingState != TStateStorageRingInfo::Unknown); - if (injuredRings.size() == 0) { - return true; - } - - if ((opts.AvailabilityMode == MODE_MAX_AVAILABILITY && injuredRings.size() > 1) || - (opts.AvailabilityMode == MODE_KEEP_AVAILABLE && injuredRings.size() > 2)) { - error.Code = TStatus::DISALLOW_TEMP; - error.Reason = TStringBuilder() << "Too many broken state storage rings: " << injuredRings.size() << - " Down state storage nodes:" << brokenNodesMsg.Str(); - error.Deadline = defaultDeadline; - return false; - } - - if (injuredRings.contains(NodeToRing.at(node.NodeId))) { - return true; - } + // Add current ring to restart rings + ++restartRings; - if (opts.AvailabilityMode == MODE_MAX_AVAILABILITY || injuredRings.size() > 2) { - error.Code = TStatus::DISALLOW_TEMP; - error.Reason = TStringBuilder() << "There are down state storage nodes in other rings. " - "Down state storage nodes: " << brokenNodesMsg.Str(); - error.Deadline = defaultDeadline; - return false; + switch (opts.AvailabilityMode) { + case MODE_MAX_AVAILABILITY: + if (restartRings + lockedRings > 1) { + error.Code = TStatus::DISALLOW_TEMP; + error.Reason = TStringBuilder() << "Too many unavailable state storage rings" + << ". Restarting rings: " + << (currentRingState == TStateStorageRingInfo::Restart ? restartRings : restartRings - 1) + << ". Temporary (for a 2 minutes) locked rings: " + << (currentRingState == TStateStorageRingInfo::Locked ? lockedRings + 1 : lockedRings) + << ". Maximum allowed number of unavailable rings for this mode: " << 1; + error.Deadline = defaultDeadline; + return false; + } + break; + case MODE_KEEP_AVAILABLE: + if (restartRings + lockedRings + disabledRings > (nToSelect - 1) / 2) { + error.Code = TStatus::DISALLOW_TEMP; + error.Reason = TStringBuilder() << "Too many unavailable state storage rings" + << ". Restarting rings: " + << (currentRingState == TStateStorageRingInfo::Restart ? restartRings : restartRings - 1) + << ". Temporary (for a 2 minutes) locked rings: " + << (currentRingState == TStateStorageRingInfo::Locked ? lockedRings + 1 : lockedRings) + << ". Disabled rings: " << disabledRings + << ". Maximum allowed number of unavailable rings for this mode: " << (nToSelect - 1) / 2; + error.Deadline = defaultDeadline; + return false; + } + break; + case MODE_FORCE_RESTART: + break; + default: + error.Code = TStatus::WRONG_REQUEST; + error.Reason = Sprintf("Unknown availability mode: %s (%" PRIu32 ")", + EAvailabilityMode_Name(opts.AvailabilityMode).data(), + static_cast<ui32>(opts.AvailabilityMode)); + error.Deadline = defaultDeadline; + return false; } return true; @@ -1450,27 +1473,6 @@ void TCms::Handle(TEvCms::TEvGetClusterInfoRequest::TPtr &ev, const TActorContex ctx.Send(ev->Sender, resp.Release()); } -void TCms::Handle(TEvStateStorage::TEvListStateStorageResult::TPtr& ev, const TActorContext &ctx) { - auto& info = ev->Get()->Info; - if (!info) { - LOG_NOTICE_S(ctx, NKikimrServices::CMS, - "Couldn't collect group info"); - return; - } - - StateStorageInfo = info; - - // index in array will be used as ring id for simplicity - for (ui32 ring = 0; ring < info->Rings.size(); ++ring) { - for (auto& replica : info->Rings[ring].Replicas) { - ui32 nodeId = replica.NodeId(); - - NodeToRing[nodeId] = ring; - StateStorageNodes.insert(nodeId); - } - } -} - void TCms::Handle(TEvPrivate::TEvClusterInfo::TPtr &ev, const TActorContext &ctx) { if (!ev->Get()->Success) { diff --git a/ydb/core/cms/cms_impl.h b/ydb/core/cms/cms_impl.h index a4e69f0b8f..4a6e4d3e9f 100644 --- a/ydb/core/cms/cms_impl.h +++ b/ydb/core/cms/cms_impl.h @@ -213,7 +213,6 @@ private: STFUNC(StateWork) { switch (ev->GetTypeRewrite()) { - HFunc(TEvStateStorage::TEvListStateStorageResult, Handle); HFunc(TEvPrivate::TEvClusterInfo, Handle); HFunc(TEvPrivate::TEvLogAndSend, Handle); FFunc(TEvPrivate::EvUpdateClusterInfo, EnqueueRequest); @@ -266,7 +265,6 @@ private: const TActorContext &ctx) override; void ProcessInitQueue(const TActorContext &ctx); - void RequestStateStorageConfig(const TActorContext &ctx); void SubscribeForConfig(const TActorContext &ctx); void AdjustInfo(TClusterInfoPtr &info, const TActorContext &ctx) const; bool CheckPermissionRequest(const NKikimrCms::TPermissionRequest &request, @@ -319,10 +317,11 @@ private: const TVDiskInfo &vdisk, TDuration duration, TErrorInfo &error) const; - bool CheckActionShutdownStateStorage(const NKikimrCms::TAction &action, - const TActionOptions &options, - const TNodeInfo &node, - TErrorInfo& error) const; + bool TryToLockStateStorageReplica(const NKikimrCms::TAction &action, + const TActionOptions &options, + const TNodeInfo &node, + TErrorInfo& error, + const TActorContext &ctx) const; void AcceptPermissions(NKikimrCms::TPermissionResponse &resp, const TString &requestId, const TString &owner, const TActorContext &ctx, bool check = false); void ScheduleUpdateClusterInfo(const TActorContext &ctx, bool now = false); @@ -377,7 +376,6 @@ private: void OnBSCPipeDestroyed(const TActorContext &ctx); - void Handle(TEvStateStorage::TEvListStateStorageResult::TPtr& ev, const TActorContext &ctx); void Handle(TEvPrivate::TEvClusterInfo::TPtr &ev, const TActorContext &ctx); void Handle(TEvPrivate::TEvLogAndSend::TPtr &ev, const TActorContext &ctx); void Handle(TEvPrivate::TEvUpdateClusterInfo::TPtr &ev, const TActorContext &ctx); diff --git a/ydb/core/cms/cms_tenants_ut.cpp b/ydb/core/cms/cms_tenants_ut.cpp index e14fe4dd57..d987374cb4 100644 --- a/ydb/core/cms/cms_tenants_ut.cpp +++ b/ydb/core/cms/cms_tenants_ut.cpp @@ -289,19 +289,19 @@ Y_UNIT_TEST_SUITE(TCmsTenatsTest) { env.SetLimits(1, 10, 1, 10); // Mark node as down and try to lock it. - TFakeNodeWhiteboardService::Info[env.GetNodeId(0)].Connected = false; + TFakeNodeWhiteboardService::Info[env.GetNodeId(1)].Connected = false; env.CheckPermissionRequest("user", false, false, false, false, TStatus::ALLOW, - MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(0), 60000000)); + MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(1), 60000000)); // Now node is locked and we cannot lock this or another node. env.CheckPermissionRequest("user", false, false, false, true, TStatus::DISALLOW_TEMP, - MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(0), 60000000)); + MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(1), 60000000)); env.CheckPermissionRequest("user", false, false, false, true, TStatus::DISALLOW_TEMP, - MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(1), 60000000)); + MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(2), 60000000)); } void TestScheduledPermission(bool defaultPolicy) diff --git a/ydb/core/cms/cms_tx_load_state.cpp b/ydb/core/cms/cms_tx_load_state.cpp index d34efc2b42..cee378a860 100644 --- a/ydb/core/cms/cms_tx_load_state.cpp +++ b/ydb/core/cms/cms_tx_load_state.cpp @@ -197,7 +197,6 @@ public: Self->ScheduleLogCleanup(ctx); Self->ScheduleUpdateClusterInfo(ctx, true); Self->ProcessInitQueue(ctx); - Self->RequestStateStorageConfig(ctx); } }; diff --git a/ydb/core/cms/cms_ut.cpp b/ydb/core/cms/cms_ut.cpp index 740181c370..b7a44c1021 100644 --- a/ydb/core/cms/cms_ut.cpp +++ b/ydb/core/cms/cms_ut.cpp @@ -855,10 +855,10 @@ Y_UNIT_TEST_SUITE(TCmsTest) { { TCmsTestEnv env(8); - TFakeNodeWhiteboardService::Info[env.GetNodeId(0)].Connected = false; + TFakeNodeWhiteboardService::Info[env.GetNodeId(1)].Connected = false; env.CheckPermissionRequest("user", false, false, false, true, TStatus::ALLOW, - MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(0), 60000000, "storage")); + MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(1), 60000000, "storage")); } void TestAvailabilityMode(EAvailabilityMode mode, bool disconnectNodes) @@ -867,6 +867,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { || mode == MODE_FORCE_RESTART); TCmsTestEnv env(8); + env.AdvanceCurrentTime(TDuration::Minutes(3)); auto res1 = env.ExtractPermissions (env.CheckPermissionRequest("user", false, false, false, @@ -932,6 +933,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { || mode == MODE_FORCE_RESTART); TCmsTestEnv env(8); + env.AdvanceCurrentTime(TDuration::Minutes(3)); auto res1 = env.ExtractPermissions (env.CheckPermissionRequest("user", true, false, true, @@ -1072,7 +1074,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { } Y_UNIT_TEST(StateStorageTwoRings) - { + { TTestEnvOpts options(5); options.VDisks = 0; options.NRings = 2; @@ -1080,11 +1082,12 @@ Y_UNIT_TEST_SUITE(TCmsTest) { options.NToSelect = 2; TCmsTestEnv env(options); - - env.CheckPermissionRequest("user", false, false, false, true, TStatus::ALLOW, + env.AdvanceCurrentTime(TDuration::Minutes(3)); + + env.CheckPermissionRequest("user", false, false, false, true, MODE_MAX_AVAILABILITY, TStatus::ALLOW, MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(0), 60000000, "storage")); - env.CheckPermissionRequest("user", false, false, false, true, TStatus::DISALLOW_TEMP, + env.CheckPermissionRequest("user", false, false, false, true, MODE_MAX_AVAILABILITY, TStatus::DISALLOW_TEMP, MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(2), 60000000, "storage")); } @@ -1097,6 +1100,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { options.NToSelect = 2; TCmsTestEnv env(options); + env.AdvanceCurrentTime(TDuration::Minutes(3)); env.CheckPermissionRequest("user", false, false, false, true, TStatus::ALLOW, MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(0), 60000000, "storage")); @@ -1106,45 +1110,45 @@ Y_UNIT_TEST_SUITE(TCmsTest) { } Y_UNIT_TEST(StateStorageAvailabilityMode) - { - TTestEnvOpts options(5); + { + TTestEnvOpts options(10); options.VDisks = 0; - options.NRings = 2; + options.NRings = 5; options.RingSize = 2; - options.NToSelect = 2; + options.NToSelect = 5; TCmsTestEnv env(options); - + TFakeNodeWhiteboardService::Info[env.GetNodeId(1)].Connected = false; env.RestartCms(); env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::ALLOW, MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(3), 60000000, "storage")); - + env.CheckPermissionRequest("user", false, true, false, true, MODE_MAX_AVAILABILITY, TStatus::DISALLOW_TEMP, MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(3), 60000000, "storage")); } Y_UNIT_TEST(StateStorageTwoBrokenRings) { - TTestEnvOpts options(7); + TTestEnvOpts options(10); options.VDisks = 0; - options.NRings = 3; + options.NRings = 5; options.RingSize = 2; - options.NToSelect = 2; + options.NToSelect = 5; TCmsTestEnv env(options); - TFakeNodeWhiteboardService::Info[env.GetNodeId(0)].Connected = false; + TFakeNodeWhiteboardService::Info[env.GetNodeId(1)].Connected = false; TFakeNodeWhiteboardService::Info[env.GetNodeId(2)].Connected = false; env.RestartCms(); env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::ALLOW, - MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(1), 60000000, "storage")); + MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(3), 60000000, "storage")); env.CheckPermissionRequest("user", false, true, false, true, MODE_MAX_AVAILABILITY, TStatus::DISALLOW_TEMP, - MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(1), 60000000, "storage")); + MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(3), 60000000, "storage")); } Y_UNIT_TEST(StateStorageRollingRestart) @@ -1153,7 +1157,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { options.VDisks = 0; options.NRings = 6; options.RingSize = 3; - options.NToSelect = 5; + options.NToSelect = 6; TCmsTestEnv env(options); @@ -1223,6 +1227,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { Y_UNIT_TEST(WalleCleanupTest) { TCmsTestEnv env(8); + env.RestartCms(); TAutoPtr<NCms::TEvCms::TEvPermissionRequest> event = new NCms::TEvCms::TEvPermissionRequest; event->Record.SetUser(WALLE_CMS_USER); @@ -1230,7 +1235,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { event->Record.SetDryRun(false); event->Record.SetSchedule(false); - AddActions(event, MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(3), 600000000, "storage")); + AddActions(event, MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(3), 6000000000, "storage")); NKikimrCms::TPermissionResponse res; res = env.CheckPermissionRequest(event, TStatus::ALLOW); @@ -1291,16 +1296,12 @@ Y_UNIT_TEST_SUITE(TCmsTest) { MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(5), 60000000, "storage")); } - Y_UNIT_TEST(Mirror3dcPermissions) { TTestEnvOpts options(18); options.UseMirror3dcErasure = true; options.VDisks = 9; - options.NRings = 3; options.DataCenterCount = 3; - options.RingSize = 2; - options.NToSelect = 2; TCmsTestEnv env(options); @@ -1338,6 +1339,34 @@ Y_UNIT_TEST_SUITE(TCmsTest) { env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::DISALLOW_TEMP, MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(2), 60000000, "storage")); } + + Y_UNIT_TEST(StateStorageLockedNodes) + { + TTestEnvOpts options(10); + options.VDisks = 0; + options.NRings = 9; + options.RingSize = 1; + options.NToSelect = 9; + + TCmsTestEnv env(options); + + TFakeNodeWhiteboardService::Info[env.GetNodeId(1)].Connected = false; + TFakeNodeWhiteboardService::Info[env.GetNodeId(2)].Connected = false; + + // Node downtime simulation + env.UpdateNodeStartTime(3, env.GetTimeProvider()->Now() + TDuration::Minutes(1)); + env.UpdateNodeStartTime(4, env.GetTimeProvider()->Now() + TDuration::Minutes(1)); + + env.AdvanceCurrentTime(TDuration::Seconds(90)); + env.RestartCms(); + + // Cant allow to restart ring when 2 restart 2 locked + env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::DISALLOW_TEMP, + MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(5), 60000000, "storage")); + // Can get node from locked ring + env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::ALLOW, + MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(4), 60000000, "storage")); + } } } // NCmsTest } // NKikimr diff --git a/ydb/core/cms/cms_ut_common.cpp b/ydb/core/cms/cms_ut_common.cpp index fc254a89d6..98b6eb3bbe 100644 --- a/ydb/core/cms/cms_ut_common.cpp +++ b/ydb/core/cms/cms_ut_common.cpp @@ -244,7 +244,7 @@ void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseC else numGroups = numNodes * numNodeGroups; - auto now = Now(); + auto now = runtime.GetTimeProvider()->Now(); for (ui32 groupId = 0; groupId < numGroups; ++groupId) { auto &group = *config->AddGroup(); group.SetGroupId(groupId); @@ -562,6 +562,9 @@ TCmsTestEnv::TCmsTestEnv(const TTestEnvOpts &options) cmsConfig.MutableTenantLimits()->SetDisabledNodesRatioLimit(0); cmsConfig.MutableClusterLimits()->SetDisabledNodesRatioLimit(0); SetCmsConfig(cmsConfig); + + // Need to allow restart state storage nodes + AdvanceCurrentTime(TDuration::Minutes(2)); } diff --git a/ydb/core/cms/cms_ut_common.h b/ydb/core/cms/cms_ut_common.h index 831b29407c..5ec1fdaec2 100644 --- a/ydb/core/cms/cms_ut_common.h +++ b/ydb/core/cms/cms_ut_common.h @@ -12,6 +12,7 @@ #include <ydb/core/testlib/basics/runtime.h> #include <util/system/mutex.h> +#include <util/datetime/base.h> namespace NKikimr { namespace NCmsTest { @@ -82,7 +83,7 @@ struct TTestEnvOpts { ui32 DataCenterCount; TNodeTenantsMap Tenants; bool UseMirror3dcErasure; - + bool AdvanceCurrentTime; TTestEnvOpts() = default; @@ -97,6 +98,7 @@ struct TTestEnvOpts { , DataCenterCount(1) , Tenants(tenants) , UseMirror3dcErasure(false) + , AdvanceCurrentTime(false) { } }; @@ -129,6 +131,10 @@ public: void EnableSysNodeChecking(); TIntrusiveConstPtr<NKikimr::TStateStorageInfo> GetStateStorageInfo(); + + void UpdateNodeStartTime(ui32 nodeIndex, TInstant startTime) { + TFakeNodeWhiteboardService::Info[GetNodeId(nodeIndex)].SystemStateInfo.SetStartTime(startTime.GetValue()); + } NKikimrCms::TClusterState RequestState(const NKikimrCms::TClusterStateRequest &request = {}, diff --git a/ydb/core/cms/info_collector.cpp b/ydb/core/cms/info_collector.cpp index 23ce5e3702..c138942874 100644 --- a/ydb/core/cms/info_collector.cpp +++ b/ydb/core/cms/info_collector.cpp @@ -38,6 +38,7 @@ public: , Info(new TClusterInfo) , BootstrapConfigReceived(false) , BaseConfigReceived(false) + , StateStorageInfoReceived(false) { } @@ -49,6 +50,9 @@ private: sFunc(TEvents::TEvWakeup, ReplyAndDie); hFunc(TEvConfigsDispatcher::TEvGetConfigResponse, Handle); + // State Storage Config + hFunc(TEvStateStorage::TEvListStateStorageResult, Handle); + // Nodes hFunc(TEvInterconnect::TEvNodesInfo, Handle); @@ -84,6 +88,10 @@ private: //Configs void RequestBootstrapConfig(); void Handle(TEvConfigsDispatcher::TEvGetConfigResponse::TPtr &ev); + + // State Storage + void RequestStateStorageConfig(); + void Handle(TEvStateStorage::TEvListStateStorageResult::TPtr &ev); // BSC void RequestBaseConfig(); @@ -113,6 +121,7 @@ private: TActorId BscPipe; bool BootstrapConfigReceived; bool BaseConfigReceived; + bool StateStorageInfoReceived; THashMap<ui32, TSet<ui32>> NodeEvents; // nodeId -> expected events THashMap<TPDiskID, TPDiskStateInfo, TPDiskIDHash> PDiskInfo; THashMap<TVDiskID, TVDiskStateInfo> VDiskInfo; @@ -121,7 +130,9 @@ private: void TInfoCollector::ReplyAndDie() { auto ev = MakeHolder<TCms::TEvPrivate::TEvClusterInfo>(); - ev->Success = BaseConfigReceived && BootstrapConfigReceived; + ev->Success = BaseConfigReceived + && BootstrapConfigReceived + && StateStorageInfoReceived; if (BaseConfigReceived) { for (const auto& [id, info] : PDiskInfo) { @@ -136,12 +147,18 @@ void TInfoCollector::ReplyAndDie() { ev->Info->SetTimestamp(TlsActivationContext->Now()); } + if (StateStorageInfoReceived) { + Info->ApplyStateStorageInfo(Info->StateStorageInfo); + } + Send(Client, std::move(ev)); PassAway(); } void TInfoCollector::MaybeReplyAndDie() { - if (!BaseConfigReceived || !BootstrapConfigReceived) { + if (!BaseConfigReceived + || !BootstrapConfigReceived + || !StateStorageInfoReceived) { return; } @@ -170,6 +187,7 @@ void TInfoCollector::Bootstrap() { Send(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes()); Schedule(Timeout, new TEvents::TEvWakeup()); RequestBootstrapConfig(); + RequestStateStorageConfig(); Become(&TThis::StateWork); } @@ -196,7 +214,6 @@ void TInfoCollector::Handle(TEvConfigsDispatcher::TEvGetConfigResponse::TPtr &ev if (!config->HasBootstrapConfig()){ LOG_I("Couldn't collect bootstrap config from Console. Taking the local config"); bootstrap.CopyFrom(AppData()->BootstrapConfig); - return; } else { LOG_D("Got Bootstrap config" << ": record# " << config->ShortDebugString()); @@ -207,11 +224,35 @@ void TInfoCollector::Handle(TEvConfigsDispatcher::TEvGetConfigResponse::TPtr &ev } bootstrap = config->GetBootstrapConfig(); } - + Info->ApplySysTabletsInfo(bootstrap); MaybeReplyAndDie(); } +void TInfoCollector::RequestStateStorageConfig() { + const auto& domains = *AppData()->DomainsInfo; + ui32 domainUid = domains.Domains.begin()->second->DomainUid; + const ui32 stateStorageGroup = domains.GetDefaultStateStorageGroup(domainUid); + + const TActorId proxy = MakeStateStorageProxyID(stateStorageGroup); + + Send(proxy, new TEvStateStorage::TEvListStateStorage()); +} + +void TInfoCollector::Handle(TEvStateStorage::TEvListStateStorageResult::TPtr& ev) { + auto& info = ev->Get()->Info; + if (!info) { + LOG_E("Couldn't collect state storage config"); + ReplyAndDie(); + return; + } + + StateStorageInfoReceived = true; + Info->StateStorageInfo = info; + + MaybeReplyAndDie(); +} + void TInfoCollector::RequestBaseConfig() { using namespace NTabletPipe; |