aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authort1mursadykov <t1mursadykov@ydb.tech>2022-09-29 20:57:51 +0300
committert1mursadykov <t1mursadykov@ydb.tech>2022-09-29 20:57:51 +0300
commit4401b017554fb2087f25bb1380201f2a0ac043d3 (patch)
tree1607cf501ffd6f36221c54e53d622dfcf08cef3d
parentf556e448fbb43b5dc92d8a36ba1aace052cddade (diff)
downloadydb-4401b017554fb2087f25bb1380201f2a0ac043d3.tar.gz
Bootstrap config checking in CMS
-rw-r--r--ydb/core/base/appdata.h1
-rw-r--r--ydb/core/cms/cluster_info.cpp12
-rw-r--r--ydb/core/cms/cluster_info.h23
-rw-r--r--ydb/core/cms/cms.cpp95
-rw-r--r--ydb/core/cms/cms_impl.h13
-rw-r--r--ydb/core/cms/cms_tx_get_log_tail.cpp2
-rw-r--r--ydb/core/cms/cms_tx_init_scheme.cpp2
-rw-r--r--ydb/core/cms/cms_tx_load_state.cpp2
-rw-r--r--ydb/core/cms/cms_tx_log_and_send.cpp2
-rw-r--r--ydb/core/cms/cms_tx_log_cleanup.cpp2
-rw-r--r--ydb/core/cms/cms_tx_process_notification.cpp2
-rw-r--r--ydb/core/cms/cms_tx_reject_notification.cpp2
-rw-r--r--ydb/core/cms/cms_tx_remove_expired_notifications.cpp2
-rw-r--r--ydb/core/cms/cms_tx_remove_permissions.cpp2
-rw-r--r--ydb/core/cms/cms_tx_remove_request.cpp2
-rw-r--r--ydb/core/cms/cms_tx_remove_walle_task.cpp2
-rw-r--r--ydb/core/cms/cms_tx_store_permissions.cpp2
-rw-r--r--ydb/core/cms/cms_tx_store_walle_task.cpp2
-rw-r--r--ydb/core/cms/cms_tx_update_config.cpp2
-rw-r--r--ydb/core/cms/cms_tx_update_downtimes.cpp2
-rw-r--r--ydb/core/cms/cms_ut.cpp25
-rw-r--r--ydb/core/cms/cms_ut_common.cpp72
-rw-r--r--ydb/core/cms/cms_ut_common.h6
-rw-r--r--ydb/core/cms/info_collector.cpp45
-rw-r--r--ydb/core/driver_lib/run/run.cpp4
-rw-r--r--ydb/core/protos/CMakeLists.txt1
-rw-r--r--ydb/core/protos/counters_cms.proto47
27 files changed, 307 insertions, 67 deletions
diff --git a/ydb/core/base/appdata.h b/ydb/core/base/appdata.h
index 3c8fc84652f..6118b309a9d 100644
--- a/ydb/core/base/appdata.h
+++ b/ydb/core/base/appdata.h
@@ -139,6 +139,7 @@ struct TAppData {
NKikimrConfig::TMeteringConfig MeteringConfig;
NKikimrConfig::TCompactionConfig CompactionConfig;
NKikimrConfig::TDomainsConfig DomainsConfig;
+ NKikimrConfig::TBootstrap BootstrapConfig;
bool EnforceUserTokenRequirement = false;
bool AllowHugeKeyValueDeletes = true; // delete when all clients limit deletes per request
bool EnableKqpSpilling = false;
diff --git a/ydb/core/cms/cluster_info.cpp b/ydb/core/cms/cluster_info.cpp
index a1dc8a0b95f..e30d9811df8 100644
--- a/ydb/core/cms/cluster_info.cpp
+++ b/ydb/core/cms/cluster_info.cpp
@@ -786,6 +786,18 @@ void TClusterInfo::MigrateOldInfo(TClusterInfoPtr old)
}
}
+void TClusterInfo::ApplySysTabletsInfo(const NKikimrConfig::TBootstrap& config) {
+ for (ui32 i = 0; i < config.TabletSize(); ++i) {
+ const auto &tablet = config.GetTablet(i);
+
+ for (ui32 j = 0; j < tablet.NodeSize(); ++j) {
+ ui32 nodeId = tablet.GetNode(j);
+ TabletTypeToNodes[tablet.GetType()].push_back(nodeId);
+ NodeToTabletTypes[nodeId].push_back(tablet.GetType());
+ }
+ }
+}
+
void TClusterInfo::DebugDump(const TActorContext &ctx) const
{
LOG_DEBUG_S(ctx, NKikimrServices::CMS,
diff --git a/ydb/core/cms/cluster_info.h b/ydb/core/cms/cluster_info.h
index 0ef5de32e68..16baceed49d 100644
--- a/ydb/core/cms/cluster_info.h
+++ b/ydb/core/cms/cluster_info.h
@@ -11,6 +11,7 @@
#include <ydb/core/blobstorage/base/blobstorage_vdiskid.h>
#include <ydb/core/mind/tenant_pool.h>
#include <ydb/core/protos/cms.pb.h>
+#include <ydb/core/protos/console.pb.h>
#include <util/generic/hash.h>
#include <util/generic/maybe.h>
@@ -528,19 +529,6 @@ public:
return nodes;
}
- void ChooseSysNodes() {
- for (auto &[nodeId, node] : Nodes) {
- if (!node->PDisks.size()) {
- SysNodes.push_back(node.Get());
- }
- }
- }
-
- TVector<const TNodeInfo *> GetSysTabletNodes() const
- {
- return SysNodes;
- }
-
size_t NodesCount() const
{
return Nodes.size();
@@ -734,6 +722,8 @@ public:
bool IsOutdated() const { return Outdated; }
void SetOutdated(bool val) { Outdated = val; }
+ void ApplySysTabletsInfo(const NKikimrConfig::TBootstrap& config);
+
static EGroupConfigurationType VDiskConfigurationType(const TVDiskID &vdId) {
return TGroupID(vdId.GroupID).ConfigurationType();
}
@@ -833,13 +823,16 @@ private:
ui64 RollbackPoint = 0;
bool HasTenantsInfo = false;
bool Outdated = false;
-
- TVector<const TNodeInfo *> SysNodes; // nodes with sys tablets
// Fast access structures.
TMultiMap<TString, ui32> HostNameToNodeId;
TMultiMap<TString, ui32> TenantToNodeId;
THashMap<TString, TLockableItemPtr> LockableItems;
+public:
+
+ bool IsLocalBootConfDiffersFromConsole = false;
+ THashMap<NKikimrConfig::TBootstrap::ETabletType, TVector<ui32>> TabletTypeToNodes;
+ THashMap<ui32, TVector<NKikimrConfig::TBootstrap::ETabletType>> NodeToTabletTypes;
};
inline bool ActionRequiresHost(NKikimrCms::TAction::EType type)
diff --git a/ydb/core/cms/cms.cpp b/ydb/core/cms/cms.cpp
index 907edd8046c..ceeb61bef7d 100644
--- a/ydb/core/cms/cms.cpp
+++ b/ydb/core/cms/cms.cpp
@@ -4,9 +4,11 @@
#include "scheme.h"
#include "sentinel.h"
#include "erasure_checkers.h"
+#include "ydb/core/protos/config_units.pb.h"
#include <ydb/core/actorlib_impl/long_timer.h>
#include <ydb/core/base/appdata.h>
+#include <ydb/core/base/counters.h>
#include <ydb/core/base/statestorage.h>
#include <ydb/core/base/statestorage_impl.h>
#include <ydb/core/cms/console/config_helpers.h>
@@ -78,6 +80,8 @@ void TCms::OnActivateExecutor(const TActorContext &ctx)
return;
}
+ Executor()->RegisterExternalTabletCounters(TabletCountersPtr.Release());
+
State->CmsTabletId = TabletID();
State->CmsActorId = SelfId();
@@ -556,49 +560,58 @@ bool TCms::CheckSysTabletsNode(const TAction &action,
if (node.Services & EService::DynamicNode || node.PDisks.size()) {
return true;
}
-
- auto nodes = ClusterInfo->GetSysTabletNodes();
-
- ui32 disabledNodesCnt = 0;
- TErrorInfo err;
- TDuration duration = TDuration::MicroSeconds(action.GetDuration()) + opts.PermissionDuration;
- TInstant defaultDeadline = TActivationContext::Now() + State->Config.DefaultRetryTime;
- for (auto node : nodes) {
- if (node->IsLocked(err, State->Config.DefaultRetryTime,
- TActivationContext::Now(), duration) ||
- node->IsDown(err, defaultDeadline))
- {
- ++disabledNodesCnt;
- }
- }
-
- switch (opts.AvailabilityMode) {
- case MODE_MAX_AVAILABILITY:
- if (disabledNodesCnt > 0) {
- error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = TStringBuilder() << "Too many locked sys nodes: " << disabledNodesCnt;
- error.Deadline = defaultDeadline;
- return false;
- }
- break;
- case MODE_KEEP_AVAILABLE:
- if (disabledNodesCnt * 8 >= nodes.size()) {
- error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = TStringBuilder() << "Too many locked sys nodes: " << disabledNodesCnt;
- error.Deadline = defaultDeadline;
- return false;
+
+ for (auto &tabletType : ClusterInfo->NodeToTabletTypes[node.NodeId]) {
+ ui32 disabledNodesCnt = 1; // сounting including this node
+ TErrorInfo err;
+ TDuration duration = TDuration::MicroSeconds(action.GetDuration()) + opts.PermissionDuration;
+ TInstant defaultDeadline = TActivationContext::Now() + State->Config.DefaultRetryTime;
+
+ for (auto &nodeId : ClusterInfo->TabletTypeToNodes[tabletType]) {
+ if (nodeId == node.NodeId) {
+ continue;
+ }
+ if (ClusterInfo->Node(nodeId).IsLocked(err, State->Config.DefaultRetryTime,
+ TActivationContext::Now(), duration) ||
+ ClusterInfo->Node(nodeId).IsDown(err, defaultDeadline))
+ {
+ ++disabledNodesCnt;
+ }
}
- break;
- case MODE_FORCE_RESTART:
- break;
- default:
- error.Code = TStatus::WRONG_REQUEST;
- error.Reason = Sprintf("Unknown availability mode: %s (%" PRIu32 ")",
+
+ ui32 tabletNodes = ClusterInfo->TabletTypeToNodes[tabletType].size();
+ switch (opts.AvailabilityMode) {
+ case MODE_MAX_AVAILABILITY:
+ if (tabletNodes > 1 && disabledNodesCnt * 2 > tabletNodes){
+ error.Code = TStatus::DISALLOW_TEMP;
+ error.Reason = TStringBuilder() << NKikimrConfig::TBootstrap_ETabletType_Name(tabletType)
+ << " has too many locked nodes: " << disabledNodesCnt
+ << " limit: " << tabletNodes / 2 << " (50%)";
+ error.Deadline = defaultDeadline;
+ return false;
+ }
+ break;
+ case MODE_KEEP_AVAILABLE:
+ if (tabletNodes > 1 && disabledNodesCnt > tabletNodes - 1) {
+ error.Code = TStatus::DISALLOW_TEMP;
+ error.Reason = TStringBuilder() << NKikimrConfig::TBootstrap_ETabletType_Name(tabletType)
+ << " has too many locked nodes: " << disabledNodesCnt
+ << ". At least one node must be available";
+ error.Deadline = defaultDeadline;
+ return false;
+ }
+ break;
+ case MODE_FORCE_RESTART:
+ break;
+ default:
+ error.Code = TStatus::WRONG_REQUEST;
+ error.Reason = Sprintf("Unknown availability mode: %s (%" PRIu32 ")",
EAvailabilityMode_Name(opts.AvailabilityMode).data(),
static_cast<ui32>(opts.AvailabilityMode));
- error.Deadline = defaultDeadline;
- return false;
- }
+ error.Deadline = defaultDeadline;
+ return false;
+ }
+ }
return true;
}
@@ -1479,6 +1492,8 @@ void TCms::Handle(TEvPrivate::TEvClusterInfo::TPtr &ev, const TActorContext &ctx
info->DebugDump(ctx);
+ TabletCounters->Simple()[COUNTER_BOOTSTRAP_DIFFERS].Set(ClusterInfo->IsLocalBootConfDiffersFromConsole);
+
ProcessQueue(ctx);
}
diff --git a/ydb/core/cms/cms_impl.h b/ydb/core/cms/cms_impl.h
index af4136c204f..2a908067daa 100644
--- a/ydb/core/cms/cms_impl.h
+++ b/ydb/core/cms/cms_impl.h
@@ -12,9 +12,12 @@
#include <ydb/core/base/tablet_pipe.h>
#include <ydb/core/base/statestorage_impl.h>
#include <ydb/core/cms/console/console.h>
+#include <ydb/core/protos/counters_cms.pb.h>
+#include <ydb/core/tablet/tablet_counters_protobuf.h>
#include <ydb/core/tablet_flat/tablet_flat_executed.h>
#include <ydb/core/engine/minikql/flat_local_tx_factory.h>
+
#include <util/generic/stack.h>
#include <util/generic/queue.h>
@@ -416,6 +419,10 @@ private:
THashMap<ui32, ui32> NodeToRing;
THashSet<ui32> StateStorageNodes;
+ // Monitoring
+ THolder<class NKikimr::TTabletCountersBase> TabletCountersPtr;
+ TTabletCountersBase* TabletCounters;
+
public:
TCms(const TActorId &tablet, TTabletStorageInfo *info)
: TActor(&TThis::StateInit)
@@ -424,6 +431,12 @@ public:
, Logger(State)
, ConfigSubscriptionId(0)
{
+ TabletCountersPtr.Reset(new TProtobufTabletCounters<
+ ESimpleCounters_descriptor,
+ ECumulativeCounters_descriptor,
+ EPercentileCounters_descriptor,
+ ETxTypes_descriptor>());
+ TabletCounters = TabletCountersPtr.Get();
}
static constexpr NKikimrServices::TActivity::EType ActorActivityType()
diff --git a/ydb/core/cms/cms_tx_get_log_tail.cpp b/ydb/core/cms/cms_tx_get_log_tail.cpp
index 8477e1d21ac..51951270def 100644
--- a/ydb/core/cms/cms_tx_get_log_tail.cpp
+++ b/ydb/core/cms/cms_tx_get_log_tail.cpp
@@ -15,6 +15,8 @@ public:
{
}
+ TTxType GetTxType() const override { return TXTYPE_GET_LOG_TAIL; }
+
bool Execute(TTransactionContext &txc, const TActorContext &ctx) override
{
auto &req = Request->Get()->Record;
diff --git a/ydb/core/cms/cms_tx_init_scheme.cpp b/ydb/core/cms/cms_tx_init_scheme.cpp
index 796b6063962..51074f1b1ed 100644
--- a/ydb/core/cms/cms_tx_init_scheme.cpp
+++ b/ydb/core/cms/cms_tx_init_scheme.cpp
@@ -11,6 +11,8 @@ public:
{
}
+ TTxType GetTxType() const override { return TXTYPE_INIT_SCHEMA; }
+
bool Execute(TTransactionContext &txc, const TActorContext &ctx) override
{
LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxInitScheme Execute");
diff --git a/ydb/core/cms/cms_tx_load_state.cpp b/ydb/core/cms/cms_tx_load_state.cpp
index 441c5d52972..d34efc2b42c 100644
--- a/ydb/core/cms/cms_tx_load_state.cpp
+++ b/ydb/core/cms/cms_tx_load_state.cpp
@@ -17,6 +17,8 @@ public:
{
}
+ TTxType GetTxType() const override { return TXTYPE_LOAD_STATE; }
+
bool Execute(TTransactionContext &txc, const TActorContext &ctx) override
{
LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxLoadState Execute");
diff --git a/ydb/core/cms/cms_tx_log_and_send.cpp b/ydb/core/cms/cms_tx_log_and_send.cpp
index f713d28755e..228dcee5670 100644
--- a/ydb/core/cms/cms_tx_log_and_send.cpp
+++ b/ydb/core/cms/cms_tx_log_and_send.cpp
@@ -13,6 +13,8 @@ public:
{
}
+ TTxType GetTxType() const override { return TXTYPE_LOG_AND_SEND; }
+
bool Execute(TTransactionContext &txc, const TActorContext &ctx) override
{
LOG_DEBUG_S(ctx, NKikimrServices::CMS,
diff --git a/ydb/core/cms/cms_tx_log_cleanup.cpp b/ydb/core/cms/cms_tx_log_cleanup.cpp
index 933b67a93f1..0ffcceeb6bd 100644
--- a/ydb/core/cms/cms_tx_log_cleanup.cpp
+++ b/ydb/core/cms/cms_tx_log_cleanup.cpp
@@ -11,6 +11,8 @@ public:
{
}
+ TTxType GetTxType() const override { return TXTYPE_LOG_CLEANUP; }
+
bool Execute(TTransactionContext &txc, const TActorContext &ctx) override
{
LOG_DEBUG_S(ctx, NKikimrServices::CMS,
diff --git a/ydb/core/cms/cms_tx_process_notification.cpp b/ydb/core/cms/cms_tx_process_notification.cpp
index 2174a82f171..e1f4e7e1dbc 100644
--- a/ydb/core/cms/cms_tx_process_notification.cpp
+++ b/ydb/core/cms/cms_tx_process_notification.cpp
@@ -14,6 +14,8 @@ public:
{
}
+ TTxType GetTxType() const override { return TXTYPE_PROCESS_NOTIFICATION; }
+
bool Execute(TTransactionContext &txc, const TActorContext &ctx) override
{
LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxProcessNotification Execute");
diff --git a/ydb/core/cms/cms_tx_reject_notification.cpp b/ydb/core/cms/cms_tx_reject_notification.cpp
index 5612dd9fbb1..5794e06388a 100644
--- a/ydb/core/cms/cms_tx_reject_notification.cpp
+++ b/ydb/core/cms/cms_tx_reject_notification.cpp
@@ -14,6 +14,8 @@ public:
{
}
+ TTxType GetTxType() const override { return TXTYPE_REJECT_NOTIFICATION; }
+
bool Execute(TTransactionContext &txc, const TActorContext &ctx) override
{
LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxRejectNotification Execute");
diff --git a/ydb/core/cms/cms_tx_remove_expired_notifications.cpp b/ydb/core/cms/cms_tx_remove_expired_notifications.cpp
index 6235b7f8f0f..1eff9849f5d 100644
--- a/ydb/core/cms/cms_tx_remove_expired_notifications.cpp
+++ b/ydb/core/cms/cms_tx_remove_expired_notifications.cpp
@@ -13,6 +13,8 @@ public:
{
}
+ TTxType GetTxType() const override { return TXTYPE_REMOVE_EXPIRED_NOTIFICATION; }
+
bool Execute(TTransactionContext &txc, const TActorContext &ctx) override
{
LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxRemoveExpiredNotifications Execute");
diff --git a/ydb/core/cms/cms_tx_remove_permissions.cpp b/ydb/core/cms/cms_tx_remove_permissions.cpp
index 6e638c72ffb..4492dee128e 100644
--- a/ydb/core/cms/cms_tx_remove_permissions.cpp
+++ b/ydb/core/cms/cms_tx_remove_permissions.cpp
@@ -17,6 +17,8 @@ public:
{
}
+ TTxType GetTxType() const override { return TXTYPE_REMOVE_PERMISSIONS; }
+
bool Execute(TTransactionContext &txc, const TActorContext &ctx) override
{
LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxRemovePermissions Execute");
diff --git a/ydb/core/cms/cms_tx_remove_request.cpp b/ydb/core/cms/cms_tx_remove_request.cpp
index fab8144cfe3..65be4207c69 100644
--- a/ydb/core/cms/cms_tx_remove_request.cpp
+++ b/ydb/core/cms/cms_tx_remove_request.cpp
@@ -16,6 +16,8 @@ public:
{
}
+ TTxType GetTxType() const override { return TXTYPE_REMOVE_REQUEST; }
+
bool Execute(TTransactionContext &txc, const TActorContext &ctx) override
{
LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxRemoveRequest Execute");
diff --git a/ydb/core/cms/cms_tx_remove_walle_task.cpp b/ydb/core/cms/cms_tx_remove_walle_task.cpp
index d9e099f0db7..dc6a082b283 100644
--- a/ydb/core/cms/cms_tx_remove_walle_task.cpp
+++ b/ydb/core/cms/cms_tx_remove_walle_task.cpp
@@ -14,6 +14,8 @@ public:
{
}
+ TTxType GetTxType() const override { return TXTYPE_REMOVE_WALLE_TASK; }
+
bool Execute(TTransactionContext &txc, const TActorContext &ctx) override
{
LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxRemoveWalleTask Execute");
diff --git a/ydb/core/cms/cms_tx_store_permissions.cpp b/ydb/core/cms/cms_tx_store_permissions.cpp
index fc5bc54e3e6..28cb4b18797 100644
--- a/ydb/core/cms/cms_tx_store_permissions.cpp
+++ b/ydb/core/cms/cms_tx_store_permissions.cpp
@@ -20,6 +20,8 @@ public:
{
}
+ TTxType GetTxType() const override { return TXTYPE_STORE_PERMISSIONS ; }
+
bool Execute(TTransactionContext &txc, const TActorContext &ctx) override
{
LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxStorePermissions Execute");
diff --git a/ydb/core/cms/cms_tx_store_walle_task.cpp b/ydb/core/cms/cms_tx_store_walle_task.cpp
index d6ca017f564..774d9116624 100644
--- a/ydb/core/cms/cms_tx_store_walle_task.cpp
+++ b/ydb/core/cms/cms_tx_store_walle_task.cpp
@@ -18,6 +18,8 @@ public:
Y_VERIFY(Response);
}
+ TTxType GetTxType() const override { return TXTYPE_STORE_WALLE_TASK; }
+
bool Execute(TTransactionContext &txc, const TActorContext &ctx) override
{
LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxStoreWalleTask Execute");
diff --git a/ydb/core/cms/cms_tx_update_config.cpp b/ydb/core/cms/cms_tx_update_config.cpp
index 59f962b3e2c..f1f2b56d868 100644
--- a/ydb/core/cms/cms_tx_update_config.cpp
+++ b/ydb/core/cms/cms_tx_update_config.cpp
@@ -19,6 +19,8 @@ public:
{
}
+ TTxType GetTxType() const override { return TXTYPE_UPDATE_CONFIG; }
+
bool Execute(TTransactionContext &txc, const TActorContext &ctx) override
{
LOG_DEBUG_S(ctx, NKikimrServices::CMS,
diff --git a/ydb/core/cms/cms_tx_update_downtimes.cpp b/ydb/core/cms/cms_tx_update_downtimes.cpp
index d4641d8c263..10c4d44819a 100644
--- a/ydb/core/cms/cms_tx_update_downtimes.cpp
+++ b/ydb/core/cms/cms_tx_update_downtimes.cpp
@@ -11,6 +11,8 @@ public:
{
}
+ TTxType GetTxType() const override { return TXTYPE_UPDATE_DOWNTIMES; }
+
bool Execute(TTransactionContext &txc, const TActorContext &ctx) override
{
LOG_DEBUG_S(ctx, NKikimrServices::CMS,
diff --git a/ydb/core/cms/cms_ut.cpp b/ydb/core/cms/cms_ut.cpp
index 01b303f4fe8..8bfabe928cf 100644
--- a/ydb/core/cms/cms_ut.cpp
+++ b/ydb/core/cms/cms_ut.cpp
@@ -1270,29 +1270,40 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
Y_UNIT_TEST(SysTabletsNode)
{
- TTestEnvOpts opt(16);
+ TTestEnvOpts opt(6);
opt.VDisks = 0;
TCmsTestEnv env(opt);
-
+
env.EnableSysNodeChecking();
env.CheckPermissionRequest("user", false, true, false, true, MODE_MAX_AVAILABILITY, TStatus::ALLOW,
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(2), 60000000, "storage"));
-
+
+ TFakeNodeWhiteboardService::Info[env.GetNodeId(0)].Connected = false;
+ TFakeNodeWhiteboardService::Info[env.GetNodeId(1)].Connected = false;
+ env.RestartCms();
+
+ env.CheckPermissionRequest("user", false, true, false, true, MODE_MAX_AVAILABILITY, TStatus::ALLOW,
+ MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(2), 60000000, "storage"));
+
TFakeNodeWhiteboardService::Info[env.GetNodeId(2)].Connected = false;
env.RestartCms();
env.CheckPermissionRequest("user", false, true, false, true, MODE_MAX_AVAILABILITY, TStatus::DISALLOW_TEMP,
- MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(4), 60000000, "storage"));
+ MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(3), 60000000, "storage"));
+
+ TFakeNodeWhiteboardService::Info[env.GetNodeId(3)].Connected = false;
+ env.RestartCms();
+
env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::ALLOW,
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(4), 60000000, "storage"));
- TFakeNodeWhiteboardService::Info[env.GetNodeId(3)].Connected = false;
+ TFakeNodeWhiteboardService::Info[env.GetNodeId(4)].Connected = false;
env.RestartCms();
-
+
env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::DISALLOW_TEMP,
- MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(4), 60000000, "storage"));
+ MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(5), 60000000, "storage"));
}
diff --git a/ydb/core/cms/cms_ut_common.cpp b/ydb/core/cms/cms_ut_common.cpp
index 5183d29ac5e..3e200e0de57 100644
--- a/ydb/core/cms/cms_ut_common.cpp
+++ b/ydb/core/cms/cms_ut_common.cpp
@@ -17,6 +17,8 @@
#include <util/string/subst.h>
+#include <memory>
+
const bool STRAND_PDISK = true;
#ifndef NDEBUG
@@ -29,11 +31,23 @@ namespace NKikimr {
namespace NCmsTest {
using namespace NCms;
+using namespace NConsole;
using namespace NNodeWhiteboard;
using namespace NKikimrWhiteboard;
using namespace NKikimrCms;
using namespace NKikimrBlobStorage;
+void TFakeNodeWhiteboardService::Handle(TEvConfigsDispatcher::TEvGetConfigRequest::TPtr &ev,
+ const TActorContext &ctx)
+{
+ TGuard<TMutex> guard(Mutex);
+ Y_UNUSED(ev);
+ NKikimrConfig::TAppConfig appConfig;
+ appConfig.MutableBootstrapConfig()->CopyFrom(BootstrapConfig);
+ auto resp = MakeHolder<TEvConfigsDispatcher::TEvGetConfigResponse>();
+ resp->Config = std::make_shared<NKikimrConfig::TAppConfig>(appConfig);
+ ctx.Send(ev->Sender, resp.Release(), 0, ev->Cookie);
+}
void TFakeNodeWhiteboardService::Handle(TEvBlobStorage::TEvControllerConfigRequest::TPtr &ev,
const TActorContext &ctx)
{
@@ -155,6 +169,7 @@ void TFakeNodeWhiteboardService::Handle(TEvWhiteboard::TEvSystemStateRequest::TP
NKikimrBlobStorage::TEvControllerConfigResponse TFakeNodeWhiteboardService::Config;
THashMap<ui32, TFakeNodeInfo> TFakeNodeWhiteboardService::Info;
TMutex TFakeNodeWhiteboardService::Mutex;
+NKikimrConfig::TBootstrap TFakeNodeWhiteboardService::BootstrapConfig;
namespace {
@@ -332,6 +347,44 @@ static bool IsTabletActiveEvent(IEventHandle& ev) {
return false;
}
+
+inline void AddTablet(NKikimrConfig::TBootstrap::ETabletType type,
+ const TVector<ui32> &nodes,
+ NKikimrConfig::TBootstrap &config)
+{
+ auto &tablet = *config.AddTablet();
+ tablet.SetType(type);
+ for (ui32 node : nodes)
+ tablet.AddNode(node);
+}
+
+static NKikimrConfig::TBootstrap GenerateBootstrapConfig(TTestActorRuntime &runtime,
+ const ui32 nodesCount,
+ const TNodeTenantsMap &tenants) {
+ NKikimrConfig::TBootstrap res;
+
+ TVector<ui32> nodes;
+ nodes.reserve(nodesCount);
+ for (ui32 nodeIndex = 0; nodeIndex < nodesCount; ++nodeIndex) {
+ ui32 nodeId = runtime.GetNodeId(nodeIndex);
+ if (tenants.contains(nodeId))
+ continue;
+ nodes.push_back(nodeId);
+ }
+
+ AddTablet(NKikimrConfig::TBootstrap::FLAT_BS_CONTROLLER, nodes, res);
+ AddTablet(NKikimrConfig::TBootstrap::FLAT_SCHEMESHARD, nodes, res);
+ AddTablet(NKikimrConfig::TBootstrap::FLAT_TX_COORDINATOR, nodes, res);
+ AddTablet(NKikimrConfig::TBootstrap::TX_MEDIATOR, nodes, res);
+ AddTablet(NKikimrConfig::TBootstrap::TX_ALLOCATOR, nodes, res);
+ AddTablet(NKikimrConfig::TBootstrap::CONSOLE, nodes, res);
+ AddTablet(NKikimrConfig::TBootstrap::CMS, nodes, res);
+ AddTablet(NKikimrConfig::TBootstrap::NODE_BROKER, nodes, res);
+ AddTablet(NKikimrConfig::TBootstrap::TENANT_SLOT_BROKER, nodes, res);
+
+ return res;
+}
+
static void SetupServices(TTestActorRuntime &runtime,
const TNodeTenantsMap &tenants)
{
@@ -416,6 +469,9 @@ static void SetupServices(TTestActorRuntime &runtime,
SetupBSNodeWarden(runtime, nodeIndex, nodeWardenConfig);
SetupTabletResolver(runtime, nodeIndex);
+ NKikimrConfig::TAppConfig appConfig;
+ appConfig.MutableBootstrapConfig()->CopyFrom(TFakeNodeWhiteboardService::BootstrapConfig);
+
// fake NodeWhiteBoard
runtime.AddLocalService(NNodeWhiteboard::MakeNodeWhiteboardServiceId(runtime.GetNodeId(nodeIndex)),
TActorSetupCmd(CreateFakeNodeWhiteboardService(), TMailboxType::Simple, 0), nodeIndex);
@@ -426,13 +482,18 @@ static void SetupServices(TTestActorRuntime &runtime,
TActorSetupCmd(new TFakeTenantPool(nodeTenants), TMailboxType::Simple, 0), nodeIndex);
}
- runtime.Initialize(app.Unwrap());
+ NKikimrConfig::TAppConfig appConfig;
+ appConfig.MutableBootstrapConfig()->CopyFrom(TFakeNodeWhiteboardService::BootstrapConfig);
+ runtime.AddLocalService(MakeConfigsDispatcherID(runtime.GetNodeId(0)),
+ TActorSetupCmd(CreateConfigsDispatcher(appConfig), TMailboxType::Simple, 0), 0);
+ runtime.Initialize(app.Unwrap());
auto dnsConfig = new TDynamicNameserviceConfig();
dnsConfig->MaxStaticNodeId = 1000;
dnsConfig->MaxDynamicNodeId = 2000;
runtime.GetAppData().DynamicNameserviceConfig = dnsConfig;
runtime.GetAppData().DisableCheckingSysNodesCms = true;
+ runtime.GetAppData().BootstrapConfig = TFakeNodeWhiteboardService::BootstrapConfig;
if (!runtime.IsRealThreads()) {
TDispatchOptions options;
@@ -441,11 +502,14 @@ static void SetupServices(TTestActorRuntime &runtime,
runtime.DispatchEvents(options);
}
+ auto cid = CreateTestBootstrapper(runtime, CreateTestTabletInfo(MakeConsoleID(0), TTabletTypes::Console),
+ &NConsole::CreateConsole);
CreateTestBootstrapper(runtime, CreateTestTabletInfo(MakeBSControllerID(0), TTabletTypes::BSController),
&CreateFlatBsController);
auto aid = CreateTestBootstrapper(runtime, CreateTestTabletInfo(MakeCmsID(0), TTabletTypes::Cms), &CreateCms);
runtime.EnableScheduleForActor(aid, true);
+ runtime.EnableScheduleForActor(cid, true);
}
} // anonymous namespace
@@ -460,12 +524,16 @@ TCmsTestEnv::TCmsTestEnv(const TTestEnvOpts &options)
status.SetSuccess(true);
auto *config = status.MutableBaseConfig();
+ TFakeNodeWhiteboardService::BootstrapConfig = GenerateBootstrapConfig(*this, options.NodeCount, options.Tenants);
+
GenerateExtendedInfo(*this, config, options.VDisks, 4, options.Tenants, options.UseMirror3dcErasure);
// Set observer to pass fake base blobstorage config.
auto redirectConfigRequest = [](TTestActorRuntimeBase&,
TAutoPtr<IEventHandle> &event) -> auto {
- if (event->GetTypeRewrite() == TEvBlobStorage::EvControllerConfigRequest) {
+ if (event->GetTypeRewrite() == TEvBlobStorage::EvControllerConfigRequest
+ || event->GetTypeRewrite() == TEvConfigsDispatcher::EvGetConfigRequest
+ ) {
auto fakeId = NNodeWhiteboard::MakeNodeWhiteboardServiceId(event->Recipient.NodeId());
if (event->Recipient != fakeId)
event = event->Forward(fakeId);
diff --git a/ydb/core/cms/cms_ut_common.h b/ydb/core/cms/cms_ut_common.h
index 9556e78a553..0e1759cda6b 100644
--- a/ydb/core/cms/cms_ut_common.h
+++ b/ydb/core/cms/cms_ut_common.h
@@ -4,6 +4,8 @@
#include <ydb/core/base/counters.h>
#include <ydb/core/base/statestorage.h>
+#include <ydb/core/cms/console/console.h>
+#include <ydb/core/cms/console/configs_dispatcher.h>
#include <ydb/core/node_whiteboard/node_whiteboard.h>
#include <ydb/core/mind/tenant_pool.h>
#include <ydb/core/testlib/basics/helpers.h>
@@ -37,6 +39,7 @@ public:
using TEvWhiteboard = NNodeWhiteboard::TEvWhiteboard;
static NKikimrBlobStorage::TEvControllerConfigResponse Config;
+ static NKikimrConfig::TBootstrap BootstrapConfig;
static THashMap<ui32, TFakeNodeInfo> Info;
static TMutex Mutex;
@@ -55,9 +58,12 @@ public:
HFunc(TEvWhiteboard::TEvPDiskStateRequest, Handle);
HFunc(TEvWhiteboard::TEvVDiskStateRequest, Handle);
HFunc(TEvWhiteboard::TEvSystemStateRequest, Handle);
+ HFunc(NConsole::TEvConfigsDispatcher::TEvGetConfigRequest, Handle);
}
}
+
+ void Handle(NConsole::TEvConfigsDispatcher::TEvGetConfigRequest::TPtr &ev, const TActorContext &ctx);
void Handle(TEvBlobStorage::TEvControllerConfigRequest::TPtr &ev, const TActorContext &ctx);
void Handle(TEvWhiteboard::TEvTabletStateRequest::TPtr &ev, const TActorContext &ctx);
void Handle(TEvWhiteboard::TEvNodeStateRequest::TPtr &ev, const TActorContext &ctx);
diff --git a/ydb/core/cms/info_collector.cpp b/ydb/core/cms/info_collector.cpp
index afa2b64e7f9..23ce5e3702a 100644
--- a/ydb/core/cms/info_collector.cpp
+++ b/ydb/core/cms/info_collector.cpp
@@ -2,6 +2,7 @@
#include "info_collector.h"
#include <ydb/core/base/tablet_pipe.h>
+#include <ydb/core/cms/console/configs_dispatcher.h>
#include <ydb/core/blobstorage/base/blobstorage_events.h>
#include <ydb/core/mind/tenant_pool.h>
#include <ydb/core/node_whiteboard/node_whiteboard.h>
@@ -23,6 +24,7 @@ namespace NCms {
using namespace NNodeWhiteboard;
using namespace NKikimrWhiteboard;
+using namespace NConsole;
class TInfoCollector: public TActorBootstrapped<TInfoCollector> {
public:
@@ -34,6 +36,7 @@ public:
: Client(client)
, Timeout(timeout)
, Info(new TClusterInfo)
+ , BootstrapConfigReceived(false)
, BaseConfigReceived(false)
{
}
@@ -44,6 +47,7 @@ private:
STATEFN(StateWork) {
switch (ev->GetTypeRewrite()) {
sFunc(TEvents::TEvWakeup, ReplyAndDie);
+ hFunc(TEvConfigsDispatcher::TEvGetConfigResponse, Handle);
// Nodes
hFunc(TEvInterconnect::TEvNodesInfo, Handle);
@@ -76,6 +80,10 @@ private:
// Nodes
void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev);
+
+ //Configs
+ void RequestBootstrapConfig();
+ void Handle(TEvConfigsDispatcher::TEvGetConfigResponse::TPtr &ev);
// BSC
void RequestBaseConfig();
@@ -103,6 +111,7 @@ private:
TClusterInfoPtr Info;
TActorId BscPipe;
+ bool BootstrapConfigReceived;
bool BaseConfigReceived;
THashMap<ui32, TSet<ui32>> NodeEvents; // nodeId -> expected events
THashMap<TPDiskID, TPDiskStateInfo, TPDiskIDHash> PDiskInfo;
@@ -112,7 +121,7 @@ private:
void TInfoCollector::ReplyAndDie() {
auto ev = MakeHolder<TCms::TEvPrivate::TEvClusterInfo>();
- ev->Success = BaseConfigReceived;
+ ev->Success = BaseConfigReceived && BootstrapConfigReceived;
if (BaseConfigReceived) {
for (const auto& [id, info] : PDiskInfo) {
@@ -132,7 +141,7 @@ void TInfoCollector::ReplyAndDie() {
}
void TInfoCollector::MaybeReplyAndDie() {
- if (!BaseConfigReceived) {
+ if (!BaseConfigReceived || !BootstrapConfigReceived) {
return;
}
@@ -160,6 +169,7 @@ void TInfoCollector::PassAway() {
void TInfoCollector::Bootstrap() {
Send(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes());
Schedule(Timeout, new TEvents::TEvWakeup());
+ RequestBootstrapConfig();
Become(&TThis::StateWork);
}
@@ -172,6 +182,36 @@ void TInfoCollector::Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev) {
}
}
+void TInfoCollector::RequestBootstrapConfig() {
+ ui32 configKind = (ui32)NKikimrConsole::TConfigItem::BootstrapConfigItem;
+ Send(MakeConfigsDispatcherID(SelfId().NodeId()),
+ new TEvConfigsDispatcher::TEvGetConfigRequest(configKind));
+}
+
+void TInfoCollector::Handle(TEvConfigsDispatcher::TEvGetConfigResponse::TPtr &ev) {
+ auto &config = ev->Get()->Config;
+ NKikimrConfig::TBootstrap bootstrap;
+
+ BootstrapConfigReceived = true;
+ if (!config->HasBootstrapConfig()){
+ LOG_I("Couldn't collect bootstrap config from Console. Taking the local config");
+ bootstrap.CopyFrom(AppData()->BootstrapConfig);
+ return;
+ } else {
+ LOG_D("Got Bootstrap config"
+ << ": record# " << config->ShortDebugString());
+
+ if (!::google::protobuf::util::MessageDifferencer::Equals(AppData()->BootstrapConfig, config->GetBootstrapConfig())) {
+ LOG_D("Local Bootstrap config is different from the config from the console");
+ Info->IsLocalBootConfDiffersFromConsole = true;
+ }
+ bootstrap = config->GetBootstrapConfig();
+ }
+
+ Info->ApplySysTabletsInfo(bootstrap);
+ MaybeReplyAndDie();
+}
+
void TInfoCollector::RequestBaseConfig() {
using namespace NTabletPipe;
@@ -211,7 +251,6 @@ void TInfoCollector::Handle(TEvBlobStorage::TEvControllerConfigResponse::TPtr& e
Info->AddBSGroup(group);
}
- Info->ChooseSysNodes();
MaybeReplyAndDie();
}
}
diff --git a/ydb/core/driver_lib/run/run.cpp b/ydb/core/driver_lib/run/run.cpp
index d57e11631d6..f6dd469b8a6 100644
--- a/ydb/core/driver_lib/run/run.cpp
+++ b/ydb/core/driver_lib/run/run.cpp
@@ -1040,6 +1040,10 @@ void TKikimrRunner::InitializeAppData(const TKikimrRunConfig& runConfig)
AppData->MeteringConfig = runConfig.AppConfig.GetMeteringConfig();
}
+ if (runConfig.AppConfig.HasBootstrapConfig()) {
+ AppData->BootstrapConfig = runConfig.AppConfig.GetBootstrapConfig();
+ }
+
// setup resource profiles
AppData->ResourceProfiles = new TResourceProfiles;
if (runConfig.AppConfig.GetBootstrapConfig().ResourceProfilesSize())
diff --git a/ydb/core/protos/CMakeLists.txt b/ydb/core/protos/CMakeLists.txt
index 4ab0d4b49e4..d8e4f567463 100644
--- a/ydb/core/protos/CMakeLists.txt
+++ b/ydb/core/protos/CMakeLists.txt
@@ -57,6 +57,7 @@ target_proto_messages(ydb-core-protos PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/protos/console_tenant.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/counters_tx_allocator.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/counters_bs_controller.proto
+ ${CMAKE_SOURCE_DIR}/ydb/core/protos/counters_cms.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/counters_coordinator.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/counters_columnshard.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/counters_datashard.proto
diff --git a/ydb/core/protos/counters_cms.proto b/ydb/core/protos/counters_cms.proto
new file mode 100644
index 00000000000..b55d8ef37ab
--- /dev/null
+++ b/ydb/core/protos/counters_cms.proto
@@ -0,0 +1,47 @@
+import "ydb/core/protos/counters.proto";
+
+package NKikimr.NCms;
+
+option java_package = "ru.yandex.kikimr.proto";
+
+option (TabletTypeName) = "Cms";
+
+enum ESimpleCounters {
+ COUNTER_SIMPLE_IGNORE = 0;
+
+ // 1 when local config differs from console
+ COUNTER_BOOTSTRAP_DIFFERS = 1 [(CounterOpts) = {Name: "BootstrapConfigDiffersFromConsole"}];
+}
+
+enum ECumulativeCounters {
+ COUNTER_CUMULATIVE_IGNORE = 0;
+}
+
+enum EPercentileCounters {
+ option (GlobalCounterOpts) = {
+ Ranges { Value: 0 Name: "0 ms" }
+ Ranges { Value: 1 Name: "1 ms" }
+ };
+
+ COUNTER_PERCENTILE_IGNORE = 0;
+}
+
+enum ETxTypes {
+ TXTYPE_INIT_SCHEMA = 0 [(TxTypeOpts) = {Name: "TxInitSchema"}];
+ TXTYPE_INIT = 1 [(TxTypeOpts) = {Name: "TxInit"}];
+ TXTYPE_GET_LOG_TAIL = 2 [(TxTypeOpts) = {Name: "TxGetLogTail"}];
+ TXTYPE_LOAD_STATE = 3 [(TxTypeOpts) = {Name: "TxLoadState"}];
+ TXTYPE_LOG_AND_SEND = 4 [(TxTypeOpts) = {Name: "TxLogAndSend"}];
+ TXTYPE_LOG_CLEANUP = 5 [(TxTypeOpts) = {Name: "TxLogCleanup"}];
+ TXTYPE_PROCESS_NOTIFICATION = 6 [(TxTypeOpts) = {Name: "TxProcessNotification"}];
+ TXTYPE_REJECT_NOTIFICATION = 7 [(TxTypeOpts) = {Name: "TxRejectNotification"}];
+ TXTYPE_REMOVE_EXPIRED_NOTIFICATION = 8 [(TxTypeOpts) = {Name: "TxRemoveExpiredNotification"}];
+ TXTYPE_STORE_PERMISSIONS = 9 [(TxTypeOpts) = {Name: "TxStorePermissions"}];
+ TXTYPE_REMOVE_PERMISSIONS = 10 [(TxTypeOpts) = {Name: "TxRemovePermissions"}];
+ TXTYPE_REMOVE_REQUEST = 11 [(TxTypeOpts) = {Name: "TxRemoveRequest"}];
+ TXTYPE_REMOVE_WALLE_TASK = 12 [(TxTypeOpts) = {Name: "TxRemoveWalleTask"}];
+ TXTYPE_STORE_WALLE_TASK = 13 [(TxTypeOpts) = {Name: "TxStoreWalleTask"}];
+ TXTYPE_UPDATE_CONFIG = 14 [(TxTypeOpts) = {Name: "TxUpdateConfig"}];
+ TXTYPE_UPDATE_DOWNTIMES = 15 [(TxTypeOpts) = {Name: "TxUpdateDowntimes"}];
+}
+