aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authort1mursadykov <t1mursadykov@ydb.tech>2022-11-02 22:42:50 +0300
committert1mursadykov <t1mursadykov@ydb.tech>2022-11-02 22:42:50 +0300
commit30f08903e52a60c9459d38dfcbddd6f3794b0c9c (patch)
tree0038d341981115f68f66d0027618bd96dc69c280
parent0b74f5913a3605197aea520fb0663f00172a8ddb (diff)
downloadydb-30f08903e52a60c9459d38dfcbddd6f3794b0c9c.tar.gz
State Storage counting improvement in CMS
-rw-r--r--ydb/core/cms/cluster_info.cpp56
-rw-r--r--ydb/core/cms/cluster_info.h102
-rw-r--r--ydb/core/cms/cms.cpp156
-rw-r--r--ydb/core/cms/cms_impl.h12
-rw-r--r--ydb/core/cms/cms_tenants_ut.cpp8
-rw-r--r--ydb/core/cms/cms_tx_load_state.cpp1
-rw-r--r--ydb/core/cms/cms_ut.cpp77
-rw-r--r--ydb/core/cms/cms_ut_common.cpp5
-rw-r--r--ydb/core/cms/cms_ut_common.h8
-rw-r--r--ydb/core/cms/info_collector.cpp49
10 files changed, 355 insertions, 119 deletions
diff --git a/ydb/core/cms/cluster_info.cpp b/ydb/core/cms/cluster_info.cpp
index e30d9811df..733f3fc702 100644
--- a/ydb/core/cms/cluster_info.cpp
+++ b/ydb/core/cms/cluster_info.cpp
@@ -3,6 +3,7 @@
#include <util/string/builder.h>
#include <util/system/hostname.h>
+#include <util/datetime/base.h>
#if defined BLOG_D || defined BLOG_I || defined BLOG_ERROR
#error log macro definition clash
@@ -298,6 +299,40 @@ void TVDiskInfo::MigrateOldInfo(const TLockableItem &old)
}
}
+TStateStorageRingInfo::RingState TStateStorageRingInfo::CountState(TInstant now,
+ TDuration retryTime,
+ TDuration duration) const
+{
+ if (IsDisabled) {
+ return Disabled;
+ }
+
+ ui32 unavailableReplicas = 0;
+ bool hasTimeout = false;
+ TErrorInfo error;
+ for (auto &node : Replicas) {
+ if (node->IsDown(error, now + retryTime)
+ || node->IsLocked(error, retryTime, now, duration)) {
+ ++unavailableReplicas;
+ continue;
+ }
+
+ if (now <= node->StartTime + Timeout) {
+ hasTimeout = true;
+ }
+ }
+
+ if (unavailableReplicas > 0) {
+ return Restart;
+ }
+
+ if (hasTimeout) {
+ return Locked;
+ }
+
+ return Ok;
+}
+
void TClusterInfo::SetTimestamp(TInstant timestamp)
{
Timestamp = timestamp;
@@ -347,6 +382,7 @@ void TClusterInfo::SetNodeState(ui32 nodeId, NKikimrCms::EState state, const NKi
auto &node = NodeRef(nodeId);
node.State = state;
+ node.StartTime = TInstant::MilliSeconds(info.GetStartTime());
node.Version = info.GetVersion();
node.Services = TServices();
@@ -798,6 +834,26 @@ void TClusterInfo::ApplySysTabletsInfo(const NKikimrConfig::TBootstrap& config)
}
}
+void TClusterInfo::ApplyStateStorageInfo(TIntrusiveConstPtr<TStateStorageInfo> info) {
+ StateStorageInfoReceived = true;
+ for (ui32 ringId = 0; ringId < info->Rings.size(); ++ringId) {
+ auto &ring = info->Rings[ringId];
+ TStateStorageRingInfoPtr ringInfo = MakeIntrusive<TStateStorageRingInfo>();
+ ringInfo->RingId = ringId;
+ if (ring.IsDisabled)
+ ringInfo->SetDisabled();
+
+ for(auto replica : ring.Replicas) {
+ Y_VERIFY(HasNode(replica.NodeId()));
+ ringInfo->AddNode(Nodes[replica.NodeId()]);
+ StateStorageReplicas.insert(replica.NodeId());
+ StateStorageNodeToRingId[replica.NodeId()] = ringId;
+ }
+
+ StateStorageRings.push_back(ringInfo);
+ }
+}
+
void TClusterInfo::DebugDump(const TActorContext &ctx) const
{
LOG_DEBUG_S(ctx, NKikimrServices::CMS,
diff --git a/ydb/core/cms/cluster_info.h b/ydb/core/cms/cluster_info.h
index 16baceed49..38c8aef06f 100644
--- a/ydb/core/cms/cluster_info.h
+++ b/ydb/core/cms/cluster_info.h
@@ -6,7 +6,9 @@
#include "services.h"
#include <library/cpp/actors/interconnect/interconnect.h>
+#include <library/cpp/actors/core/actor.h>
#include <ydb/core/base/blobstorage.h>
+#include <ydb/core/base/statestorage.h>
#include <ydb/core/node_whiteboard/node_whiteboard.h>
#include <ydb/core/blobstorage/base/blobstorage_vdiskid.h>
#include <ydb/core/mind/tenant_pool.h>
@@ -16,6 +18,7 @@
#include <util/generic/hash.h>
#include <util/generic/maybe.h>
#include <util/generic/set.h>
+#include "util/generic/ptr.h"
namespace NKikimr {
namespace NCms {
@@ -328,6 +331,7 @@ public:
TString Tenant;
TString PreviousTenant;
TServices Services;
+ TInstant StartTime;
};
using TNodeInfoPtr = TIntrusivePtr<TNodeInfo>;
@@ -452,6 +456,83 @@ struct TBSGroupInfo {
};
/**
+ * Structure to hold info and state for a state storage. It helps to
+ * avoid the situation when we quickly unlock one state stotage node and
+ * immediately lock another node from different ring
+ */
+class TStateStorageRingInfo : public TThrRefBase {
+public:
+
+ /**
+ * Ok: we can allow to restart nodes;
+ *
+ * Locked: all nodes are up. We restarted some nodes before and waiting
+ * some timeout to allow restart nodes from other ring.
+ * But, we still can restart nodes from this ring;
+ *
+ * Disabled: Disabled ring (see state storage config). The ring
+ * affects permissions of other rings, but this ring
+ * can be disabled without considering the others;
+ *
+ * Restart: has some restarting or down nodes. We can still restart
+ * nodes from this ring;
+ */
+ enum RingState : ui8 {
+ Unknown = 0,
+ Ok,
+ Locked,
+ Disabled,
+ Restart,
+ };
+
+ TStateStorageRingInfo() = default;
+ TStateStorageRingInfo(const TStateStorageRingInfo &other) = default;
+ TStateStorageRingInfo(TStateStorageRingInfo &&other) = default;
+
+ TStateStorageRingInfo &operator=(const TStateStorageRingInfo &other) = default;
+ TStateStorageRingInfo &operator=(TStateStorageRingInfo &&other) = default;
+
+ static TString RingStateToString(RingState state) {
+ switch (state) {
+ case Unknown:
+ return "Unknown";
+ break;
+ case Ok:
+ return "Ok";
+ break;
+ case Locked:
+ return "Locked";
+ break;
+ case Restart:
+ return "Restart";
+ break;
+ default:
+ return "Unknown ring state";
+ break;
+ }
+ }
+
+ void AddNode(TNodeInfoPtr &node) {
+ Replicas.push_back(node);
+ }
+
+ void SetDisabled() {
+ IsDisabled = true;
+ }
+
+ RingState CountState(TInstant now,
+ TDuration retryTime,
+ TDuration duration) const;
+
+ ui32 RingId = 0;
+ bool IsDisabled = false;
+ const TDuration Timeout = TDuration::Minutes(2);
+
+ TVector<TNodeInfoPtr> Replicas;
+};
+using TStateStorageRingInfoPtr = TIntrusivePtr<TStateStorageRingInfo>;
+
+/**
* Main class to hold current cluster state.
*
* State is built by merging pieces of information from NodeWhiteboard through
@@ -475,6 +556,21 @@ public:
TClusterInfo &operator=(const TClusterInfo &other) = default;
TClusterInfo &operator=(TClusterInfo &&other) = default;
+ void ApplyStateStorageInfo(TIntrusiveConstPtr<TStateStorageInfo> info);
+
+ bool IsStateStorageReplicaNode(ui32 nodeId) {
+ return StateStorageReplicas.contains(nodeId);
+ }
+
+ bool IsStateStorageinfoReceived() {
+ return StateStorageInfoReceived;
+ }
+
+ ui32 GetRingId(ui32 nodeId) {
+ Y_VERIFY(IsStateStorageReplicaNode(nodeId));
+ return StateStorageNodeToRingId[nodeId];
+ }
+
bool HasNode(ui32 nodeId) const
{
return Nodes.contains(nodeId);
@@ -823,16 +919,22 @@ private:
ui64 RollbackPoint = 0;
bool HasTenantsInfo = false;
bool Outdated = false;
+ bool StateStorageInfoReceived;
// Fast access structures.
TMultiMap<TString, ui32> HostNameToNodeId;
TMultiMap<TString, ui32> TenantToNodeId;
THashMap<TString, TLockableItemPtr> LockableItems;
+ THashSet<ui32> StateStorageReplicas;
+ THashMap<ui32, ui32> StateStorageNodeToRingId;
public:
bool IsLocalBootConfDiffersFromConsole = false;
THashMap<NKikimrConfig::TBootstrap::ETabletType, TVector<ui32>> TabletTypeToNodes;
THashMap<ui32, TVector<NKikimrConfig::TBootstrap::ETabletType>> NodeToTabletTypes;
+
+ TIntrusiveConstPtr<TStateStorageInfo> StateStorageInfo;
+ TVector<TStateStorageRingInfoPtr> StateStorageRings;
};
inline bool ActionRequiresHost(NKikimrCms::TAction::EType type)
diff --git a/ydb/core/cms/cms.cpp b/ydb/core/cms/cms.cpp
index 6698252bdb..050bb3e417 100644
--- a/ydb/core/cms/cms.cpp
+++ b/ydb/core/cms/cms.cpp
@@ -122,16 +122,6 @@ void TCms::ProcessInitQueue(const TActorContext &ctx)
}
}
-void TCms::RequestStateStorageConfig(const TActorContext &ctx) {
- const auto& domains = *AppData(ctx)->DomainsInfo;
- ui32 domainUid = domains.Domains.begin()->second->DomainUid;
- const ui32 stateStorageGroup = domains.GetDefaultStateStorageGroup(domainUid);
-
- const TActorId proxy = MakeStateStorageProxyID(stateStorageGroup);
-
- ctx.Send(proxy, new TEvStateStorage::TEvListStateStorage(), IEventHandle::FlagTrackDelivery);
-}
-
void TCms::SubscribeForConfig(const TActorContext &ctx)
{
ctx.Register(NConsole::CreateConfigSubscriber(TabletID(),
@@ -421,7 +411,7 @@ bool TCms::CheckActionShutdownNode(const NKikimrCms::TAction &action,
}
// node is not locked
- if (!CheckActionShutdownStateStorage(action, opts, node, error)) {
+ if (!TryToLockStateStorageReplica(action, opts, node, error, ctx)) {
return false;
}
@@ -484,70 +474,103 @@ bool TCms::CheckActionShutdownHost(const TAction &action,
return true;
}
-bool TCms::CheckActionShutdownStateStorage(
- const TAction& action,
- const TActionOptions& opts,
- const TNodeInfo& node,
- TErrorInfo& error) const
+bool TCms::TryToLockStateStorageReplica(const TAction& action,
+ const TActionOptions& opts,
+ const TNodeInfo& node,
+ TErrorInfo& error,
+ const TActorContext &ctx) const
{
- // TODO (t1mursadykov): отслеживание времени отключенных стейт стораджей
- if (opts.AvailabilityMode == MODE_FORCE_RESTART) {
- return true;
- }
-
TInstant defaultDeadline = TActivationContext::Now() + State->Config.DefaultRetryTime;
- if (!StateStorageInfo) {
+ if (!ClusterInfo->IsStateStorageinfoReceived()) {
error.Code = TStatus::DISALLOW_TEMP;
error.Reason = "Did not received state storage configuration";
error.Deadline = defaultDeadline;
return false;
}
- if (!StateStorageNodes.contains(node.NodeId)) {
+ if (!ClusterInfo->IsStateStorageReplicaNode(node.NodeId)) {
return true;
}
- THashSet<ui32> injuredRings;
+ const ui32 nToSelect = ClusterInfo->StateStorageInfo->NToSelect;
+ const ui32 currentRing = ClusterInfo->GetRingId(node.NodeId);
+ ui8 currentRingState = TStateStorageRingInfo::Unknown;
+ ui32 restartRings = 0;
+ ui32 lockedRings = 0;
+ ui32 disabledRings = 0;
+ auto now = AppData(ctx)->TimeProvider->Now();
TDuration duration = TDuration::MicroSeconds(action.GetDuration()) + opts.PermissionDuration;
- TErrorInfo err;
- TStringStream brokenNodesMsg;
- for (auto& i : StateStorageNodes) {
- if (node.NodeId == i) {
+ for (auto ringInfo : ClusterInfo->StateStorageRings) {
+ auto state = ringInfo->CountState(now, State->Config.DefaultRetryTime, duration);
+ LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::CMS, "Ring: " << ringInfo->RingId
+ << "; State: " << TStateStorageRingInfo::RingStateToString(state));
+
+ if (ringInfo->RingId == currentRing) {
+ if (state == TStateStorageRingInfo::Disabled) {
+ return true;
+ }
+ currentRingState = state;
continue;
}
- if (ClusterInfo->Node(i).IsLocked(err, State->Config.DefaultRetryTime,
- TActivationContext::Now(), duration) ||
- ClusterInfo->Node(i).IsDown(err, defaultDeadline)) {
-
- injuredRings.insert(NodeToRing.at(i));
- brokenNodesMsg << " " << i;
+ switch (state) {
+ case TStateStorageRingInfo::Ok:
+ break;
+ case TStateStorageRingInfo::Locked:
+ ++lockedRings;
+ break;
+ case TStateStorageRingInfo::Restart:
+ ++restartRings;
+ break;
+ case TStateStorageRingInfo::Disabled:
+ ++disabledRings;
+ break;
+ default:
+ break;
}
}
+ Y_VERIFY(currentRingState != TStateStorageRingInfo::Unknown);
- if (injuredRings.size() == 0) {
- return true;
- }
-
- if ((opts.AvailabilityMode == MODE_MAX_AVAILABILITY && injuredRings.size() > 1) ||
- (opts.AvailabilityMode == MODE_KEEP_AVAILABLE && injuredRings.size() > 2)) {
- error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = TStringBuilder() << "Too many broken state storage rings: " << injuredRings.size() <<
- " Down state storage nodes:" << brokenNodesMsg.Str();
- error.Deadline = defaultDeadline;
- return false;
- }
-
- if (injuredRings.contains(NodeToRing.at(node.NodeId))) {
- return true;
- }
+ // Add current ring to restart rings
+ ++restartRings;
- if (opts.AvailabilityMode == MODE_MAX_AVAILABILITY || injuredRings.size() > 2) {
- error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = TStringBuilder() << "There are down state storage nodes in other rings. "
- "Down state storage nodes: " << brokenNodesMsg.Str();
- error.Deadline = defaultDeadline;
- return false;
+ switch (opts.AvailabilityMode) {
+ case MODE_MAX_AVAILABILITY:
+ if (restartRings + lockedRings > 1) {
+ error.Code = TStatus::DISALLOW_TEMP;
+ error.Reason = TStringBuilder() << "Too many unavailable state storage rings"
+ << ". Restarting rings: "
+ << (currentRingState == TStateStorageRingInfo::Restart ? restartRings : restartRings - 1)
+ << ". Temporary (for a 2 minutes) locked rings: "
+ << (currentRingState == TStateStorageRingInfo::Locked ? lockedRings + 1 : lockedRings)
+ << ". Maximum allowed number of unavailable rings for this mode: " << 1;
+ error.Deadline = defaultDeadline;
+ return false;
+ }
+ break;
+ case MODE_KEEP_AVAILABLE:
+ if (restartRings + lockedRings + disabledRings > (nToSelect - 1) / 2) {
+ error.Code = TStatus::DISALLOW_TEMP;
+ error.Reason = TStringBuilder() << "Too many unavailable state storage rings"
+ << ". Restarting rings: "
+ << (currentRingState == TStateStorageRingInfo::Restart ? restartRings : restartRings - 1)
+ << ". Temporary (for a 2 minutes) locked rings: "
+ << (currentRingState == TStateStorageRingInfo::Locked ? lockedRings + 1 : lockedRings)
+ << ". Disabled rings: " << disabledRings
+ << ". Maximum allowed number of unavailable rings for this mode: " << (nToSelect - 1) / 2;
+ error.Deadline = defaultDeadline;
+ return false;
+ }
+ break;
+ case MODE_FORCE_RESTART:
+ break;
+ default:
+ error.Code = TStatus::WRONG_REQUEST;
+ error.Reason = Sprintf("Unknown availability mode: %s (%" PRIu32 ")",
+ EAvailabilityMode_Name(opts.AvailabilityMode).data(),
+ static_cast<ui32>(opts.AvailabilityMode));
+ error.Deadline = defaultDeadline;
+ return false;
}
return true;
@@ -1450,27 +1473,6 @@ void TCms::Handle(TEvCms::TEvGetClusterInfoRequest::TPtr &ev, const TActorContex
ctx.Send(ev->Sender, resp.Release());
}
-void TCms::Handle(TEvStateStorage::TEvListStateStorageResult::TPtr& ev, const TActorContext &ctx) {
- auto& info = ev->Get()->Info;
- if (!info) {
- LOG_NOTICE_S(ctx, NKikimrServices::CMS,
- "Couldn't collect group info");
- return;
- }
-
- StateStorageInfo = info;
-
- // index in array will be used as ring id for simplicity
- for (ui32 ring = 0; ring < info->Rings.size(); ++ring) {
- for (auto& replica : info->Rings[ring].Replicas) {
- ui32 nodeId = replica.NodeId();
-
- NodeToRing[nodeId] = ring;
- StateStorageNodes.insert(nodeId);
- }
- }
-}
-
void TCms::Handle(TEvPrivate::TEvClusterInfo::TPtr &ev, const TActorContext &ctx)
{
if (!ev->Get()->Success) {
diff --git a/ydb/core/cms/cms_impl.h b/ydb/core/cms/cms_impl.h
index a4e69f0b8f..4a6e4d3e9f 100644
--- a/ydb/core/cms/cms_impl.h
+++ b/ydb/core/cms/cms_impl.h
@@ -213,7 +213,6 @@ private:
STFUNC(StateWork)
{
switch (ev->GetTypeRewrite()) {
- HFunc(TEvStateStorage::TEvListStateStorageResult, Handle);
HFunc(TEvPrivate::TEvClusterInfo, Handle);
HFunc(TEvPrivate::TEvLogAndSend, Handle);
FFunc(TEvPrivate::EvUpdateClusterInfo, EnqueueRequest);
@@ -266,7 +265,6 @@ private:
const TActorContext &ctx) override;
void ProcessInitQueue(const TActorContext &ctx);
- void RequestStateStorageConfig(const TActorContext &ctx);
void SubscribeForConfig(const TActorContext &ctx);
void AdjustInfo(TClusterInfoPtr &info, const TActorContext &ctx) const;
bool CheckPermissionRequest(const NKikimrCms::TPermissionRequest &request,
@@ -319,10 +317,11 @@ private:
const TVDiskInfo &vdisk,
TDuration duration,
TErrorInfo &error) const;
- bool CheckActionShutdownStateStorage(const NKikimrCms::TAction &action,
- const TActionOptions &options,
- const TNodeInfo &node,
- TErrorInfo& error) const;
+ bool TryToLockStateStorageReplica(const NKikimrCms::TAction &action,
+ const TActionOptions &options,
+ const TNodeInfo &node,
+ TErrorInfo& error,
+ const TActorContext &ctx) const;
void AcceptPermissions(NKikimrCms::TPermissionResponse &resp, const TString &requestId,
const TString &owner, const TActorContext &ctx, bool check = false);
void ScheduleUpdateClusterInfo(const TActorContext &ctx, bool now = false);
@@ -377,7 +376,6 @@ private:
void OnBSCPipeDestroyed(const TActorContext &ctx);
- void Handle(TEvStateStorage::TEvListStateStorageResult::TPtr& ev, const TActorContext &ctx);
void Handle(TEvPrivate::TEvClusterInfo::TPtr &ev, const TActorContext &ctx);
void Handle(TEvPrivate::TEvLogAndSend::TPtr &ev, const TActorContext &ctx);
void Handle(TEvPrivate::TEvUpdateClusterInfo::TPtr &ev, const TActorContext &ctx);
diff --git a/ydb/core/cms/cms_tenants_ut.cpp b/ydb/core/cms/cms_tenants_ut.cpp
index e14fe4dd57..d987374cb4 100644
--- a/ydb/core/cms/cms_tenants_ut.cpp
+++ b/ydb/core/cms/cms_tenants_ut.cpp
@@ -289,19 +289,19 @@ Y_UNIT_TEST_SUITE(TCmsTenatsTest) {
env.SetLimits(1, 10, 1, 10);
// Mark node as down and try to lock it.
- TFakeNodeWhiteboardService::Info[env.GetNodeId(0)].Connected = false;
+ TFakeNodeWhiteboardService::Info[env.GetNodeId(1)].Connected = false;
env.CheckPermissionRequest("user", false, false, false,
false, TStatus::ALLOW,
- MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(0), 60000000));
+ MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(1), 60000000));
// Now node is locked and we cannot lock this or another node.
env.CheckPermissionRequest("user", false, false, false,
true, TStatus::DISALLOW_TEMP,
- MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(0), 60000000));
+ MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(1), 60000000));
env.CheckPermissionRequest("user", false, false, false,
true, TStatus::DISALLOW_TEMP,
- MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(1), 60000000));
+ MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(2), 60000000));
}
void TestScheduledPermission(bool defaultPolicy)
diff --git a/ydb/core/cms/cms_tx_load_state.cpp b/ydb/core/cms/cms_tx_load_state.cpp
index d34efc2b42..cee378a860 100644
--- a/ydb/core/cms/cms_tx_load_state.cpp
+++ b/ydb/core/cms/cms_tx_load_state.cpp
@@ -197,7 +197,6 @@ public:
Self->ScheduleLogCleanup(ctx);
Self->ScheduleUpdateClusterInfo(ctx, true);
Self->ProcessInitQueue(ctx);
- Self->RequestStateStorageConfig(ctx);
}
};
diff --git a/ydb/core/cms/cms_ut.cpp b/ydb/core/cms/cms_ut.cpp
index 740181c370..b7a44c1021 100644
--- a/ydb/core/cms/cms_ut.cpp
+++ b/ydb/core/cms/cms_ut.cpp
@@ -855,10 +855,10 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
{
TCmsTestEnv env(8);
- TFakeNodeWhiteboardService::Info[env.GetNodeId(0)].Connected = false;
+ TFakeNodeWhiteboardService::Info[env.GetNodeId(1)].Connected = false;
env.CheckPermissionRequest("user", false, false, false, true, TStatus::ALLOW,
- MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(0), 60000000, "storage"));
+ MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(1), 60000000, "storage"));
}
void TestAvailabilityMode(EAvailabilityMode mode, bool disconnectNodes)
@@ -867,6 +867,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
|| mode == MODE_FORCE_RESTART);
TCmsTestEnv env(8);
+ env.AdvanceCurrentTime(TDuration::Minutes(3));
auto res1 = env.ExtractPermissions
(env.CheckPermissionRequest("user", false, false, false,
@@ -932,6 +933,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
|| mode == MODE_FORCE_RESTART);
TCmsTestEnv env(8);
+ env.AdvanceCurrentTime(TDuration::Minutes(3));
auto res1 = env.ExtractPermissions
(env.CheckPermissionRequest("user", true, false, true,
@@ -1072,7 +1074,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
}
Y_UNIT_TEST(StateStorageTwoRings)
- {
+ {
TTestEnvOpts options(5);
options.VDisks = 0;
options.NRings = 2;
@@ -1080,11 +1082,12 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
options.NToSelect = 2;
TCmsTestEnv env(options);
-
- env.CheckPermissionRequest("user", false, false, false, true, TStatus::ALLOW,
+ env.AdvanceCurrentTime(TDuration::Minutes(3));
+
+ env.CheckPermissionRequest("user", false, false, false, true, MODE_MAX_AVAILABILITY, TStatus::ALLOW,
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(0), 60000000, "storage"));
- env.CheckPermissionRequest("user", false, false, false, true, TStatus::DISALLOW_TEMP,
+ env.CheckPermissionRequest("user", false, false, false, true, MODE_MAX_AVAILABILITY, TStatus::DISALLOW_TEMP,
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(2), 60000000, "storage"));
}
@@ -1097,6 +1100,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
options.NToSelect = 2;
TCmsTestEnv env(options);
+ env.AdvanceCurrentTime(TDuration::Minutes(3));
env.CheckPermissionRequest("user", false, false, false, true, TStatus::ALLOW,
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(0), 60000000, "storage"));
@@ -1106,45 +1110,45 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
}
Y_UNIT_TEST(StateStorageAvailabilityMode)
- {
- TTestEnvOpts options(5);
+ {
+ TTestEnvOpts options(10);
options.VDisks = 0;
- options.NRings = 2;
+ options.NRings = 5;
options.RingSize = 2;
- options.NToSelect = 2;
+ options.NToSelect = 5;
TCmsTestEnv env(options);
-
+
TFakeNodeWhiteboardService::Info[env.GetNodeId(1)].Connected = false;
env.RestartCms();
env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::ALLOW,
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(3), 60000000, "storage"));
-
+
env.CheckPermissionRequest("user", false, true, false, true, MODE_MAX_AVAILABILITY, TStatus::DISALLOW_TEMP,
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(3), 60000000, "storage"));
}
Y_UNIT_TEST(StateStorageTwoBrokenRings)
{
- TTestEnvOpts options(7);
+ TTestEnvOpts options(10);
options.VDisks = 0;
- options.NRings = 3;
+ options.NRings = 5;
options.RingSize = 2;
- options.NToSelect = 2;
+ options.NToSelect = 5;
TCmsTestEnv env(options);
- TFakeNodeWhiteboardService::Info[env.GetNodeId(0)].Connected = false;
+ TFakeNodeWhiteboardService::Info[env.GetNodeId(1)].Connected = false;
TFakeNodeWhiteboardService::Info[env.GetNodeId(2)].Connected = false;
env.RestartCms();
env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::ALLOW,
- MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(1), 60000000, "storage"));
+ MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(3), 60000000, "storage"));
env.CheckPermissionRequest("user", false, true, false, true, MODE_MAX_AVAILABILITY, TStatus::DISALLOW_TEMP,
- MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(1), 60000000, "storage"));
+ MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(3), 60000000, "storage"));
}
Y_UNIT_TEST(StateStorageRollingRestart)
@@ -1153,7 +1157,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
options.VDisks = 0;
options.NRings = 6;
options.RingSize = 3;
- options.NToSelect = 5;
+ options.NToSelect = 6;
TCmsTestEnv env(options);
@@ -1223,6 +1227,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
Y_UNIT_TEST(WalleCleanupTest)
{
TCmsTestEnv env(8);
+ env.RestartCms();
TAutoPtr<NCms::TEvCms::TEvPermissionRequest> event = new NCms::TEvCms::TEvPermissionRequest;
event->Record.SetUser(WALLE_CMS_USER);
@@ -1230,7 +1235,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
event->Record.SetDryRun(false);
event->Record.SetSchedule(false);
- AddActions(event, MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(3), 600000000, "storage"));
+ AddActions(event, MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(3), 6000000000, "storage"));
NKikimrCms::TPermissionResponse res;
res = env.CheckPermissionRequest(event, TStatus::ALLOW);
@@ -1291,16 +1296,12 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(5), 60000000, "storage"));
}
-
Y_UNIT_TEST(Mirror3dcPermissions)
{
TTestEnvOpts options(18);
options.UseMirror3dcErasure = true;
options.VDisks = 9;
- options.NRings = 3;
options.DataCenterCount = 3;
- options.RingSize = 2;
- options.NToSelect = 2;
TCmsTestEnv env(options);
@@ -1338,6 +1339,34 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::DISALLOW_TEMP,
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(2), 60000000, "storage"));
}
+
+ Y_UNIT_TEST(StateStorageLockedNodes)
+ {
+ TTestEnvOpts options(10);
+ options.VDisks = 0;
+ options.NRings = 9;
+ options.RingSize = 1;
+ options.NToSelect = 9;
+
+ TCmsTestEnv env(options);
+
+ TFakeNodeWhiteboardService::Info[env.GetNodeId(1)].Connected = false;
+ TFakeNodeWhiteboardService::Info[env.GetNodeId(2)].Connected = false;
+
+ // Node downtime simulation
+ env.UpdateNodeStartTime(3, env.GetTimeProvider()->Now() + TDuration::Minutes(1));
+ env.UpdateNodeStartTime(4, env.GetTimeProvider()->Now() + TDuration::Minutes(1));
+
+ env.AdvanceCurrentTime(TDuration::Seconds(90));
+ env.RestartCms();
+
+ // Cant allow to restart ring when 2 restart 2 locked
+ env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::DISALLOW_TEMP,
+ MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(5), 60000000, "storage"));
+ // Can get node from locked ring
+ env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::ALLOW,
+ MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(4), 60000000, "storage"));
+ }
}
} // NCmsTest
} // NKikimr
diff --git a/ydb/core/cms/cms_ut_common.cpp b/ydb/core/cms/cms_ut_common.cpp
index fc254a89d6..98b6eb3bbe 100644
--- a/ydb/core/cms/cms_ut_common.cpp
+++ b/ydb/core/cms/cms_ut_common.cpp
@@ -244,7 +244,7 @@ void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseC
else
numGroups = numNodes * numNodeGroups;
- auto now = Now();
+ auto now = runtime.GetTimeProvider()->Now();
for (ui32 groupId = 0; groupId < numGroups; ++groupId) {
auto &group = *config->AddGroup();
group.SetGroupId(groupId);
@@ -562,6 +562,9 @@ TCmsTestEnv::TCmsTestEnv(const TTestEnvOpts &options)
cmsConfig.MutableTenantLimits()->SetDisabledNodesRatioLimit(0);
cmsConfig.MutableClusterLimits()->SetDisabledNodesRatioLimit(0);
SetCmsConfig(cmsConfig);
+
+ // Need to allow restart state storage nodes
+ AdvanceCurrentTime(TDuration::Minutes(2));
}
diff --git a/ydb/core/cms/cms_ut_common.h b/ydb/core/cms/cms_ut_common.h
index 831b29407c..5ec1fdaec2 100644
--- a/ydb/core/cms/cms_ut_common.h
+++ b/ydb/core/cms/cms_ut_common.h
@@ -12,6 +12,7 @@
#include <ydb/core/testlib/basics/runtime.h>
#include <util/system/mutex.h>
+#include <util/datetime/base.h>
namespace NKikimr {
namespace NCmsTest {
@@ -82,7 +83,7 @@ struct TTestEnvOpts {
ui32 DataCenterCount;
TNodeTenantsMap Tenants;
bool UseMirror3dcErasure;
-
+ bool AdvanceCurrentTime;
TTestEnvOpts() = default;
@@ -97,6 +98,7 @@ struct TTestEnvOpts {
, DataCenterCount(1)
, Tenants(tenants)
, UseMirror3dcErasure(false)
+ , AdvanceCurrentTime(false)
{
}
};
@@ -129,6 +131,10 @@ public:
void EnableSysNodeChecking();
TIntrusiveConstPtr<NKikimr::TStateStorageInfo> GetStateStorageInfo();
+
+ void UpdateNodeStartTime(ui32 nodeIndex, TInstant startTime) {
+ TFakeNodeWhiteboardService::Info[GetNodeId(nodeIndex)].SystemStateInfo.SetStartTime(startTime.GetValue());
+ }
NKikimrCms::TClusterState
RequestState(const NKikimrCms::TClusterStateRequest &request = {},
diff --git a/ydb/core/cms/info_collector.cpp b/ydb/core/cms/info_collector.cpp
index 23ce5e3702..c138942874 100644
--- a/ydb/core/cms/info_collector.cpp
+++ b/ydb/core/cms/info_collector.cpp
@@ -38,6 +38,7 @@ public:
, Info(new TClusterInfo)
, BootstrapConfigReceived(false)
, BaseConfigReceived(false)
+ , StateStorageInfoReceived(false)
{
}
@@ -49,6 +50,9 @@ private:
sFunc(TEvents::TEvWakeup, ReplyAndDie);
hFunc(TEvConfigsDispatcher::TEvGetConfigResponse, Handle);
+ // State Storage Config
+ hFunc(TEvStateStorage::TEvListStateStorageResult, Handle);
+
// Nodes
hFunc(TEvInterconnect::TEvNodesInfo, Handle);
@@ -84,6 +88,10 @@ private:
//Configs
void RequestBootstrapConfig();
void Handle(TEvConfigsDispatcher::TEvGetConfigResponse::TPtr &ev);
+
+ // State Storage
+ void RequestStateStorageConfig();
+ void Handle(TEvStateStorage::TEvListStateStorageResult::TPtr &ev);
// BSC
void RequestBaseConfig();
@@ -113,6 +121,7 @@ private:
TActorId BscPipe;
bool BootstrapConfigReceived;
bool BaseConfigReceived;
+ bool StateStorageInfoReceived;
THashMap<ui32, TSet<ui32>> NodeEvents; // nodeId -> expected events
THashMap<TPDiskID, TPDiskStateInfo, TPDiskIDHash> PDiskInfo;
THashMap<TVDiskID, TVDiskStateInfo> VDiskInfo;
@@ -121,7 +130,9 @@ private:
void TInfoCollector::ReplyAndDie() {
auto ev = MakeHolder<TCms::TEvPrivate::TEvClusterInfo>();
- ev->Success = BaseConfigReceived && BootstrapConfigReceived;
+ ev->Success = BaseConfigReceived
+ && BootstrapConfigReceived
+ && StateStorageInfoReceived;
if (BaseConfigReceived) {
for (const auto& [id, info] : PDiskInfo) {
@@ -136,12 +147,18 @@ void TInfoCollector::ReplyAndDie() {
ev->Info->SetTimestamp(TlsActivationContext->Now());
}
+ if (StateStorageInfoReceived) {
+ Info->ApplyStateStorageInfo(Info->StateStorageInfo);
+ }
+
Send(Client, std::move(ev));
PassAway();
}
void TInfoCollector::MaybeReplyAndDie() {
- if (!BaseConfigReceived || !BootstrapConfigReceived) {
+ if (!BaseConfigReceived
+ || !BootstrapConfigReceived
+ || !StateStorageInfoReceived) {
return;
}
@@ -170,6 +187,7 @@ void TInfoCollector::Bootstrap() {
Send(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes());
Schedule(Timeout, new TEvents::TEvWakeup());
RequestBootstrapConfig();
+ RequestStateStorageConfig();
Become(&TThis::StateWork);
}
@@ -196,7 +214,6 @@ void TInfoCollector::Handle(TEvConfigsDispatcher::TEvGetConfigResponse::TPtr &ev
if (!config->HasBootstrapConfig()){
LOG_I("Couldn't collect bootstrap config from Console. Taking the local config");
bootstrap.CopyFrom(AppData()->BootstrapConfig);
- return;
} else {
LOG_D("Got Bootstrap config"
<< ": record# " << config->ShortDebugString());
@@ -207,11 +224,35 @@ void TInfoCollector::Handle(TEvConfigsDispatcher::TEvGetConfigResponse::TPtr &ev
}
bootstrap = config->GetBootstrapConfig();
}
-
+
Info->ApplySysTabletsInfo(bootstrap);
MaybeReplyAndDie();
}
+void TInfoCollector::RequestStateStorageConfig() {
+ const auto& domains = *AppData()->DomainsInfo;
+ ui32 domainUid = domains.Domains.begin()->second->DomainUid;
+ const ui32 stateStorageGroup = domains.GetDefaultStateStorageGroup(domainUid);
+
+ const TActorId proxy = MakeStateStorageProxyID(stateStorageGroup);
+
+ Send(proxy, new TEvStateStorage::TEvListStateStorage());
+}
+
+void TInfoCollector::Handle(TEvStateStorage::TEvListStateStorageResult::TPtr& ev) {
+ auto& info = ev->Get()->Info;
+ if (!info) {
+ LOG_E("Couldn't collect state storage config");
+ ReplyAndDie();
+ return;
+ }
+
+ StateStorageInfoReceived = true;
+ Info->StateStorageInfo = info;
+
+ MaybeReplyAndDie();
+}
+
void TInfoCollector::RequestBaseConfig() {
using namespace NTabletPipe;