diff options
author | t1mursadykov <t1mursadykov@ydb.tech> | 2022-09-29 20:57:51 +0300 |
---|---|---|
committer | t1mursadykov <t1mursadykov@ydb.tech> | 2022-09-29 20:57:51 +0300 |
commit | 4401b017554fb2087f25bb1380201f2a0ac043d3 (patch) | |
tree | 1607cf501ffd6f36221c54e53d622dfcf08cef3d | |
parent | f556e448fbb43b5dc92d8a36ba1aace052cddade (diff) | |
download | ydb-4401b017554fb2087f25bb1380201f2a0ac043d3.tar.gz |
Bootstrap config checking in CMS
27 files changed, 307 insertions, 67 deletions
diff --git a/ydb/core/base/appdata.h b/ydb/core/base/appdata.h index 3c8fc84652f..6118b309a9d 100644 --- a/ydb/core/base/appdata.h +++ b/ydb/core/base/appdata.h @@ -139,6 +139,7 @@ struct TAppData { NKikimrConfig::TMeteringConfig MeteringConfig; NKikimrConfig::TCompactionConfig CompactionConfig; NKikimrConfig::TDomainsConfig DomainsConfig; + NKikimrConfig::TBootstrap BootstrapConfig; bool EnforceUserTokenRequirement = false; bool AllowHugeKeyValueDeletes = true; // delete when all clients limit deletes per request bool EnableKqpSpilling = false; diff --git a/ydb/core/cms/cluster_info.cpp b/ydb/core/cms/cluster_info.cpp index a1dc8a0b95f..e30d9811df8 100644 --- a/ydb/core/cms/cluster_info.cpp +++ b/ydb/core/cms/cluster_info.cpp @@ -786,6 +786,18 @@ void TClusterInfo::MigrateOldInfo(TClusterInfoPtr old) } } +void TClusterInfo::ApplySysTabletsInfo(const NKikimrConfig::TBootstrap& config) { + for (ui32 i = 0; i < config.TabletSize(); ++i) { + const auto &tablet = config.GetTablet(i); + + for (ui32 j = 0; j < tablet.NodeSize(); ++j) { + ui32 nodeId = tablet.GetNode(j); + TabletTypeToNodes[tablet.GetType()].push_back(nodeId); + NodeToTabletTypes[nodeId].push_back(tablet.GetType()); + } + } +} + void TClusterInfo::DebugDump(const TActorContext &ctx) const { LOG_DEBUG_S(ctx, NKikimrServices::CMS, diff --git a/ydb/core/cms/cluster_info.h b/ydb/core/cms/cluster_info.h index 0ef5de32e68..16baceed49d 100644 --- a/ydb/core/cms/cluster_info.h +++ b/ydb/core/cms/cluster_info.h @@ -11,6 +11,7 @@ #include <ydb/core/blobstorage/base/blobstorage_vdiskid.h> #include <ydb/core/mind/tenant_pool.h> #include <ydb/core/protos/cms.pb.h> +#include <ydb/core/protos/console.pb.h> #include <util/generic/hash.h> #include <util/generic/maybe.h> @@ -528,19 +529,6 @@ public: return nodes; } - void ChooseSysNodes() { - for (auto &[nodeId, node] : Nodes) { - if (!node->PDisks.size()) { - SysNodes.push_back(node.Get()); - } - } - } - - TVector<const TNodeInfo *> GetSysTabletNodes() const - { - return SysNodes; - } - size_t NodesCount() const { return Nodes.size(); @@ -734,6 +722,8 @@ public: bool IsOutdated() const { return Outdated; } void SetOutdated(bool val) { Outdated = val; } + void ApplySysTabletsInfo(const NKikimrConfig::TBootstrap& config); + static EGroupConfigurationType VDiskConfigurationType(const TVDiskID &vdId) { return TGroupID(vdId.GroupID).ConfigurationType(); } @@ -833,13 +823,16 @@ private: ui64 RollbackPoint = 0; bool HasTenantsInfo = false; bool Outdated = false; - - TVector<const TNodeInfo *> SysNodes; // nodes with sys tablets // Fast access structures. TMultiMap<TString, ui32> HostNameToNodeId; TMultiMap<TString, ui32> TenantToNodeId; THashMap<TString, TLockableItemPtr> LockableItems; +public: + + bool IsLocalBootConfDiffersFromConsole = false; + THashMap<NKikimrConfig::TBootstrap::ETabletType, TVector<ui32>> TabletTypeToNodes; + THashMap<ui32, TVector<NKikimrConfig::TBootstrap::ETabletType>> NodeToTabletTypes; }; inline bool ActionRequiresHost(NKikimrCms::TAction::EType type) diff --git a/ydb/core/cms/cms.cpp b/ydb/core/cms/cms.cpp index 907edd8046c..ceeb61bef7d 100644 --- a/ydb/core/cms/cms.cpp +++ b/ydb/core/cms/cms.cpp @@ -4,9 +4,11 @@ #include "scheme.h" #include "sentinel.h" #include "erasure_checkers.h" +#include "ydb/core/protos/config_units.pb.h" #include <ydb/core/actorlib_impl/long_timer.h> #include <ydb/core/base/appdata.h> +#include <ydb/core/base/counters.h> #include <ydb/core/base/statestorage.h> #include <ydb/core/base/statestorage_impl.h> #include <ydb/core/cms/console/config_helpers.h> @@ -78,6 +80,8 @@ void TCms::OnActivateExecutor(const TActorContext &ctx) return; } + Executor()->RegisterExternalTabletCounters(TabletCountersPtr.Release()); + State->CmsTabletId = TabletID(); State->CmsActorId = SelfId(); @@ -556,49 +560,58 @@ bool TCms::CheckSysTabletsNode(const TAction &action, if (node.Services & EService::DynamicNode || node.PDisks.size()) { return true; } - - auto nodes = ClusterInfo->GetSysTabletNodes(); - - ui32 disabledNodesCnt = 0; - TErrorInfo err; - TDuration duration = TDuration::MicroSeconds(action.GetDuration()) + opts.PermissionDuration; - TInstant defaultDeadline = TActivationContext::Now() + State->Config.DefaultRetryTime; - for (auto node : nodes) { - if (node->IsLocked(err, State->Config.DefaultRetryTime, - TActivationContext::Now(), duration) || - node->IsDown(err, defaultDeadline)) - { - ++disabledNodesCnt; - } - } - - switch (opts.AvailabilityMode) { - case MODE_MAX_AVAILABILITY: - if (disabledNodesCnt > 0) { - error.Code = TStatus::DISALLOW_TEMP; - error.Reason = TStringBuilder() << "Too many locked sys nodes: " << disabledNodesCnt; - error.Deadline = defaultDeadline; - return false; - } - break; - case MODE_KEEP_AVAILABLE: - if (disabledNodesCnt * 8 >= nodes.size()) { - error.Code = TStatus::DISALLOW_TEMP; - error.Reason = TStringBuilder() << "Too many locked sys nodes: " << disabledNodesCnt; - error.Deadline = defaultDeadline; - return false; + + for (auto &tabletType : ClusterInfo->NodeToTabletTypes[node.NodeId]) { + ui32 disabledNodesCnt = 1; // сounting including this node + TErrorInfo err; + TDuration duration = TDuration::MicroSeconds(action.GetDuration()) + opts.PermissionDuration; + TInstant defaultDeadline = TActivationContext::Now() + State->Config.DefaultRetryTime; + + for (auto &nodeId : ClusterInfo->TabletTypeToNodes[tabletType]) { + if (nodeId == node.NodeId) { + continue; + } + if (ClusterInfo->Node(nodeId).IsLocked(err, State->Config.DefaultRetryTime, + TActivationContext::Now(), duration) || + ClusterInfo->Node(nodeId).IsDown(err, defaultDeadline)) + { + ++disabledNodesCnt; + } } - break; - case MODE_FORCE_RESTART: - break; - default: - error.Code = TStatus::WRONG_REQUEST; - error.Reason = Sprintf("Unknown availability mode: %s (%" PRIu32 ")", + + ui32 tabletNodes = ClusterInfo->TabletTypeToNodes[tabletType].size(); + switch (opts.AvailabilityMode) { + case MODE_MAX_AVAILABILITY: + if (tabletNodes > 1 && disabledNodesCnt * 2 > tabletNodes){ + error.Code = TStatus::DISALLOW_TEMP; + error.Reason = TStringBuilder() << NKikimrConfig::TBootstrap_ETabletType_Name(tabletType) + << " has too many locked nodes: " << disabledNodesCnt + << " limit: " << tabletNodes / 2 << " (50%)"; + error.Deadline = defaultDeadline; + return false; + } + break; + case MODE_KEEP_AVAILABLE: + if (tabletNodes > 1 && disabledNodesCnt > tabletNodes - 1) { + error.Code = TStatus::DISALLOW_TEMP; + error.Reason = TStringBuilder() << NKikimrConfig::TBootstrap_ETabletType_Name(tabletType) + << " has too many locked nodes: " << disabledNodesCnt + << ". At least one node must be available"; + error.Deadline = defaultDeadline; + return false; + } + break; + case MODE_FORCE_RESTART: + break; + default: + error.Code = TStatus::WRONG_REQUEST; + error.Reason = Sprintf("Unknown availability mode: %s (%" PRIu32 ")", EAvailabilityMode_Name(opts.AvailabilityMode).data(), static_cast<ui32>(opts.AvailabilityMode)); - error.Deadline = defaultDeadline; - return false; - } + error.Deadline = defaultDeadline; + return false; + } + } return true; } @@ -1479,6 +1492,8 @@ void TCms::Handle(TEvPrivate::TEvClusterInfo::TPtr &ev, const TActorContext &ctx info->DebugDump(ctx); + TabletCounters->Simple()[COUNTER_BOOTSTRAP_DIFFERS].Set(ClusterInfo->IsLocalBootConfDiffersFromConsole); + ProcessQueue(ctx); } diff --git a/ydb/core/cms/cms_impl.h b/ydb/core/cms/cms_impl.h index af4136c204f..2a908067daa 100644 --- a/ydb/core/cms/cms_impl.h +++ b/ydb/core/cms/cms_impl.h @@ -12,9 +12,12 @@ #include <ydb/core/base/tablet_pipe.h> #include <ydb/core/base/statestorage_impl.h> #include <ydb/core/cms/console/console.h> +#include <ydb/core/protos/counters_cms.pb.h> +#include <ydb/core/tablet/tablet_counters_protobuf.h> #include <ydb/core/tablet_flat/tablet_flat_executed.h> #include <ydb/core/engine/minikql/flat_local_tx_factory.h> + #include <util/generic/stack.h> #include <util/generic/queue.h> @@ -416,6 +419,10 @@ private: THashMap<ui32, ui32> NodeToRing; THashSet<ui32> StateStorageNodes; + // Monitoring + THolder<class NKikimr::TTabletCountersBase> TabletCountersPtr; + TTabletCountersBase* TabletCounters; + public: TCms(const TActorId &tablet, TTabletStorageInfo *info) : TActor(&TThis::StateInit) @@ -424,6 +431,12 @@ public: , Logger(State) , ConfigSubscriptionId(0) { + TabletCountersPtr.Reset(new TProtobufTabletCounters< + ESimpleCounters_descriptor, + ECumulativeCounters_descriptor, + EPercentileCounters_descriptor, + ETxTypes_descriptor>()); + TabletCounters = TabletCountersPtr.Get(); } static constexpr NKikimrServices::TActivity::EType ActorActivityType() diff --git a/ydb/core/cms/cms_tx_get_log_tail.cpp b/ydb/core/cms/cms_tx_get_log_tail.cpp index 8477e1d21ac..51951270def 100644 --- a/ydb/core/cms/cms_tx_get_log_tail.cpp +++ b/ydb/core/cms/cms_tx_get_log_tail.cpp @@ -15,6 +15,8 @@ public: { } + TTxType GetTxType() const override { return TXTYPE_GET_LOG_TAIL; } + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { auto &req = Request->Get()->Record; diff --git a/ydb/core/cms/cms_tx_init_scheme.cpp b/ydb/core/cms/cms_tx_init_scheme.cpp index 796b6063962..51074f1b1ed 100644 --- a/ydb/core/cms/cms_tx_init_scheme.cpp +++ b/ydb/core/cms/cms_tx_init_scheme.cpp @@ -11,6 +11,8 @@ public: { } + TTxType GetTxType() const override { return TXTYPE_INIT_SCHEMA; } + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxInitScheme Execute"); diff --git a/ydb/core/cms/cms_tx_load_state.cpp b/ydb/core/cms/cms_tx_load_state.cpp index 441c5d52972..d34efc2b42c 100644 --- a/ydb/core/cms/cms_tx_load_state.cpp +++ b/ydb/core/cms/cms_tx_load_state.cpp @@ -17,6 +17,8 @@ public: { } + TTxType GetTxType() const override { return TXTYPE_LOAD_STATE; } + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxLoadState Execute"); diff --git a/ydb/core/cms/cms_tx_log_and_send.cpp b/ydb/core/cms/cms_tx_log_and_send.cpp index f713d28755e..228dcee5670 100644 --- a/ydb/core/cms/cms_tx_log_and_send.cpp +++ b/ydb/core/cms/cms_tx_log_and_send.cpp @@ -13,6 +13,8 @@ public: { } + TTxType GetTxType() const override { return TXTYPE_LOG_AND_SEND; } + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { LOG_DEBUG_S(ctx, NKikimrServices::CMS, diff --git a/ydb/core/cms/cms_tx_log_cleanup.cpp b/ydb/core/cms/cms_tx_log_cleanup.cpp index 933b67a93f1..0ffcceeb6bd 100644 --- a/ydb/core/cms/cms_tx_log_cleanup.cpp +++ b/ydb/core/cms/cms_tx_log_cleanup.cpp @@ -11,6 +11,8 @@ public: { } + TTxType GetTxType() const override { return TXTYPE_LOG_CLEANUP; } + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { LOG_DEBUG_S(ctx, NKikimrServices::CMS, diff --git a/ydb/core/cms/cms_tx_process_notification.cpp b/ydb/core/cms/cms_tx_process_notification.cpp index 2174a82f171..e1f4e7e1dbc 100644 --- a/ydb/core/cms/cms_tx_process_notification.cpp +++ b/ydb/core/cms/cms_tx_process_notification.cpp @@ -14,6 +14,8 @@ public: { } + TTxType GetTxType() const override { return TXTYPE_PROCESS_NOTIFICATION; } + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxProcessNotification Execute"); diff --git a/ydb/core/cms/cms_tx_reject_notification.cpp b/ydb/core/cms/cms_tx_reject_notification.cpp index 5612dd9fbb1..5794e06388a 100644 --- a/ydb/core/cms/cms_tx_reject_notification.cpp +++ b/ydb/core/cms/cms_tx_reject_notification.cpp @@ -14,6 +14,8 @@ public: { } + TTxType GetTxType() const override { return TXTYPE_REJECT_NOTIFICATION; } + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxRejectNotification Execute"); diff --git a/ydb/core/cms/cms_tx_remove_expired_notifications.cpp b/ydb/core/cms/cms_tx_remove_expired_notifications.cpp index 6235b7f8f0f..1eff9849f5d 100644 --- a/ydb/core/cms/cms_tx_remove_expired_notifications.cpp +++ b/ydb/core/cms/cms_tx_remove_expired_notifications.cpp @@ -13,6 +13,8 @@ public: { } + TTxType GetTxType() const override { return TXTYPE_REMOVE_EXPIRED_NOTIFICATION; } + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxRemoveExpiredNotifications Execute"); diff --git a/ydb/core/cms/cms_tx_remove_permissions.cpp b/ydb/core/cms/cms_tx_remove_permissions.cpp index 6e638c72ffb..4492dee128e 100644 --- a/ydb/core/cms/cms_tx_remove_permissions.cpp +++ b/ydb/core/cms/cms_tx_remove_permissions.cpp @@ -17,6 +17,8 @@ public: { } + TTxType GetTxType() const override { return TXTYPE_REMOVE_PERMISSIONS; } + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxRemovePermissions Execute"); diff --git a/ydb/core/cms/cms_tx_remove_request.cpp b/ydb/core/cms/cms_tx_remove_request.cpp index fab8144cfe3..65be4207c69 100644 --- a/ydb/core/cms/cms_tx_remove_request.cpp +++ b/ydb/core/cms/cms_tx_remove_request.cpp @@ -16,6 +16,8 @@ public: { } + TTxType GetTxType() const override { return TXTYPE_REMOVE_REQUEST; } + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxRemoveRequest Execute"); diff --git a/ydb/core/cms/cms_tx_remove_walle_task.cpp b/ydb/core/cms/cms_tx_remove_walle_task.cpp index d9e099f0db7..dc6a082b283 100644 --- a/ydb/core/cms/cms_tx_remove_walle_task.cpp +++ b/ydb/core/cms/cms_tx_remove_walle_task.cpp @@ -14,6 +14,8 @@ public: { } + TTxType GetTxType() const override { return TXTYPE_REMOVE_WALLE_TASK; } + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxRemoveWalleTask Execute"); diff --git a/ydb/core/cms/cms_tx_store_permissions.cpp b/ydb/core/cms/cms_tx_store_permissions.cpp index fc5bc54e3e6..28cb4b18797 100644 --- a/ydb/core/cms/cms_tx_store_permissions.cpp +++ b/ydb/core/cms/cms_tx_store_permissions.cpp @@ -20,6 +20,8 @@ public: { } + TTxType GetTxType() const override { return TXTYPE_STORE_PERMISSIONS ; } + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxStorePermissions Execute"); diff --git a/ydb/core/cms/cms_tx_store_walle_task.cpp b/ydb/core/cms/cms_tx_store_walle_task.cpp index d6ca017f564..774d9116624 100644 --- a/ydb/core/cms/cms_tx_store_walle_task.cpp +++ b/ydb/core/cms/cms_tx_store_walle_task.cpp @@ -18,6 +18,8 @@ public: Y_VERIFY(Response); } + TTxType GetTxType() const override { return TXTYPE_STORE_WALLE_TASK; } + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { LOG_DEBUG(ctx, NKikimrServices::CMS, "TTxStoreWalleTask Execute"); diff --git a/ydb/core/cms/cms_tx_update_config.cpp b/ydb/core/cms/cms_tx_update_config.cpp index 59f962b3e2c..f1f2b56d868 100644 --- a/ydb/core/cms/cms_tx_update_config.cpp +++ b/ydb/core/cms/cms_tx_update_config.cpp @@ -19,6 +19,8 @@ public: { } + TTxType GetTxType() const override { return TXTYPE_UPDATE_CONFIG; } + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { LOG_DEBUG_S(ctx, NKikimrServices::CMS, diff --git a/ydb/core/cms/cms_tx_update_downtimes.cpp b/ydb/core/cms/cms_tx_update_downtimes.cpp index d4641d8c263..10c4d44819a 100644 --- a/ydb/core/cms/cms_tx_update_downtimes.cpp +++ b/ydb/core/cms/cms_tx_update_downtimes.cpp @@ -11,6 +11,8 @@ public: { } + TTxType GetTxType() const override { return TXTYPE_UPDATE_DOWNTIMES; } + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { LOG_DEBUG_S(ctx, NKikimrServices::CMS, diff --git a/ydb/core/cms/cms_ut.cpp b/ydb/core/cms/cms_ut.cpp index 01b303f4fe8..8bfabe928cf 100644 --- a/ydb/core/cms/cms_ut.cpp +++ b/ydb/core/cms/cms_ut.cpp @@ -1270,29 +1270,40 @@ Y_UNIT_TEST_SUITE(TCmsTest) { Y_UNIT_TEST(SysTabletsNode) { - TTestEnvOpts opt(16); + TTestEnvOpts opt(6); opt.VDisks = 0; TCmsTestEnv env(opt); - + env.EnableSysNodeChecking(); env.CheckPermissionRequest("user", false, true, false, true, MODE_MAX_AVAILABILITY, TStatus::ALLOW, MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(2), 60000000, "storage")); - + + TFakeNodeWhiteboardService::Info[env.GetNodeId(0)].Connected = false; + TFakeNodeWhiteboardService::Info[env.GetNodeId(1)].Connected = false; + env.RestartCms(); + + env.CheckPermissionRequest("user", false, true, false, true, MODE_MAX_AVAILABILITY, TStatus::ALLOW, + MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(2), 60000000, "storage")); + TFakeNodeWhiteboardService::Info[env.GetNodeId(2)].Connected = false; env.RestartCms(); env.CheckPermissionRequest("user", false, true, false, true, MODE_MAX_AVAILABILITY, TStatus::DISALLOW_TEMP, - MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(4), 60000000, "storage")); + MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(3), 60000000, "storage")); + + TFakeNodeWhiteboardService::Info[env.GetNodeId(3)].Connected = false; + env.RestartCms(); + env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::ALLOW, MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(4), 60000000, "storage")); - TFakeNodeWhiteboardService::Info[env.GetNodeId(3)].Connected = false; + TFakeNodeWhiteboardService::Info[env.GetNodeId(4)].Connected = false; env.RestartCms(); - + env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::DISALLOW_TEMP, - MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(4), 60000000, "storage")); + MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(5), 60000000, "storage")); } diff --git a/ydb/core/cms/cms_ut_common.cpp b/ydb/core/cms/cms_ut_common.cpp index 5183d29ac5e..3e200e0de57 100644 --- a/ydb/core/cms/cms_ut_common.cpp +++ b/ydb/core/cms/cms_ut_common.cpp @@ -17,6 +17,8 @@ #include <util/string/subst.h> +#include <memory> + const bool STRAND_PDISK = true; #ifndef NDEBUG @@ -29,11 +31,23 @@ namespace NKikimr { namespace NCmsTest { using namespace NCms; +using namespace NConsole; using namespace NNodeWhiteboard; using namespace NKikimrWhiteboard; using namespace NKikimrCms; using namespace NKikimrBlobStorage; +void TFakeNodeWhiteboardService::Handle(TEvConfigsDispatcher::TEvGetConfigRequest::TPtr &ev, + const TActorContext &ctx) +{ + TGuard<TMutex> guard(Mutex); + Y_UNUSED(ev); + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableBootstrapConfig()->CopyFrom(BootstrapConfig); + auto resp = MakeHolder<TEvConfigsDispatcher::TEvGetConfigResponse>(); + resp->Config = std::make_shared<NKikimrConfig::TAppConfig>(appConfig); + ctx.Send(ev->Sender, resp.Release(), 0, ev->Cookie); +} void TFakeNodeWhiteboardService::Handle(TEvBlobStorage::TEvControllerConfigRequest::TPtr &ev, const TActorContext &ctx) { @@ -155,6 +169,7 @@ void TFakeNodeWhiteboardService::Handle(TEvWhiteboard::TEvSystemStateRequest::TP NKikimrBlobStorage::TEvControllerConfigResponse TFakeNodeWhiteboardService::Config; THashMap<ui32, TFakeNodeInfo> TFakeNodeWhiteboardService::Info; TMutex TFakeNodeWhiteboardService::Mutex; +NKikimrConfig::TBootstrap TFakeNodeWhiteboardService::BootstrapConfig; namespace { @@ -332,6 +347,44 @@ static bool IsTabletActiveEvent(IEventHandle& ev) { return false; } + +inline void AddTablet(NKikimrConfig::TBootstrap::ETabletType type, + const TVector<ui32> &nodes, + NKikimrConfig::TBootstrap &config) +{ + auto &tablet = *config.AddTablet(); + tablet.SetType(type); + for (ui32 node : nodes) + tablet.AddNode(node); +} + +static NKikimrConfig::TBootstrap GenerateBootstrapConfig(TTestActorRuntime &runtime, + const ui32 nodesCount, + const TNodeTenantsMap &tenants) { + NKikimrConfig::TBootstrap res; + + TVector<ui32> nodes; + nodes.reserve(nodesCount); + for (ui32 nodeIndex = 0; nodeIndex < nodesCount; ++nodeIndex) { + ui32 nodeId = runtime.GetNodeId(nodeIndex); + if (tenants.contains(nodeId)) + continue; + nodes.push_back(nodeId); + } + + AddTablet(NKikimrConfig::TBootstrap::FLAT_BS_CONTROLLER, nodes, res); + AddTablet(NKikimrConfig::TBootstrap::FLAT_SCHEMESHARD, nodes, res); + AddTablet(NKikimrConfig::TBootstrap::FLAT_TX_COORDINATOR, nodes, res); + AddTablet(NKikimrConfig::TBootstrap::TX_MEDIATOR, nodes, res); + AddTablet(NKikimrConfig::TBootstrap::TX_ALLOCATOR, nodes, res); + AddTablet(NKikimrConfig::TBootstrap::CONSOLE, nodes, res); + AddTablet(NKikimrConfig::TBootstrap::CMS, nodes, res); + AddTablet(NKikimrConfig::TBootstrap::NODE_BROKER, nodes, res); + AddTablet(NKikimrConfig::TBootstrap::TENANT_SLOT_BROKER, nodes, res); + + return res; +} + static void SetupServices(TTestActorRuntime &runtime, const TNodeTenantsMap &tenants) { @@ -416,6 +469,9 @@ static void SetupServices(TTestActorRuntime &runtime, SetupBSNodeWarden(runtime, nodeIndex, nodeWardenConfig); SetupTabletResolver(runtime, nodeIndex); + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableBootstrapConfig()->CopyFrom(TFakeNodeWhiteboardService::BootstrapConfig); + // fake NodeWhiteBoard runtime.AddLocalService(NNodeWhiteboard::MakeNodeWhiteboardServiceId(runtime.GetNodeId(nodeIndex)), TActorSetupCmd(CreateFakeNodeWhiteboardService(), TMailboxType::Simple, 0), nodeIndex); @@ -426,13 +482,18 @@ static void SetupServices(TTestActorRuntime &runtime, TActorSetupCmd(new TFakeTenantPool(nodeTenants), TMailboxType::Simple, 0), nodeIndex); } - runtime.Initialize(app.Unwrap()); + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableBootstrapConfig()->CopyFrom(TFakeNodeWhiteboardService::BootstrapConfig); + runtime.AddLocalService(MakeConfigsDispatcherID(runtime.GetNodeId(0)), + TActorSetupCmd(CreateConfigsDispatcher(appConfig), TMailboxType::Simple, 0), 0); + runtime.Initialize(app.Unwrap()); auto dnsConfig = new TDynamicNameserviceConfig(); dnsConfig->MaxStaticNodeId = 1000; dnsConfig->MaxDynamicNodeId = 2000; runtime.GetAppData().DynamicNameserviceConfig = dnsConfig; runtime.GetAppData().DisableCheckingSysNodesCms = true; + runtime.GetAppData().BootstrapConfig = TFakeNodeWhiteboardService::BootstrapConfig; if (!runtime.IsRealThreads()) { TDispatchOptions options; @@ -441,11 +502,14 @@ static void SetupServices(TTestActorRuntime &runtime, runtime.DispatchEvents(options); } + auto cid = CreateTestBootstrapper(runtime, CreateTestTabletInfo(MakeConsoleID(0), TTabletTypes::Console), + &NConsole::CreateConsole); CreateTestBootstrapper(runtime, CreateTestTabletInfo(MakeBSControllerID(0), TTabletTypes::BSController), &CreateFlatBsController); auto aid = CreateTestBootstrapper(runtime, CreateTestTabletInfo(MakeCmsID(0), TTabletTypes::Cms), &CreateCms); runtime.EnableScheduleForActor(aid, true); + runtime.EnableScheduleForActor(cid, true); } } // anonymous namespace @@ -460,12 +524,16 @@ TCmsTestEnv::TCmsTestEnv(const TTestEnvOpts &options) status.SetSuccess(true); auto *config = status.MutableBaseConfig(); + TFakeNodeWhiteboardService::BootstrapConfig = GenerateBootstrapConfig(*this, options.NodeCount, options.Tenants); + GenerateExtendedInfo(*this, config, options.VDisks, 4, options.Tenants, options.UseMirror3dcErasure); // Set observer to pass fake base blobstorage config. auto redirectConfigRequest = [](TTestActorRuntimeBase&, TAutoPtr<IEventHandle> &event) -> auto { - if (event->GetTypeRewrite() == TEvBlobStorage::EvControllerConfigRequest) { + if (event->GetTypeRewrite() == TEvBlobStorage::EvControllerConfigRequest + || event->GetTypeRewrite() == TEvConfigsDispatcher::EvGetConfigRequest + ) { auto fakeId = NNodeWhiteboard::MakeNodeWhiteboardServiceId(event->Recipient.NodeId()); if (event->Recipient != fakeId) event = event->Forward(fakeId); diff --git a/ydb/core/cms/cms_ut_common.h b/ydb/core/cms/cms_ut_common.h index 9556e78a553..0e1759cda6b 100644 --- a/ydb/core/cms/cms_ut_common.h +++ b/ydb/core/cms/cms_ut_common.h @@ -4,6 +4,8 @@ #include <ydb/core/base/counters.h> #include <ydb/core/base/statestorage.h> +#include <ydb/core/cms/console/console.h> +#include <ydb/core/cms/console/configs_dispatcher.h> #include <ydb/core/node_whiteboard/node_whiteboard.h> #include <ydb/core/mind/tenant_pool.h> #include <ydb/core/testlib/basics/helpers.h> @@ -37,6 +39,7 @@ public: using TEvWhiteboard = NNodeWhiteboard::TEvWhiteboard; static NKikimrBlobStorage::TEvControllerConfigResponse Config; + static NKikimrConfig::TBootstrap BootstrapConfig; static THashMap<ui32, TFakeNodeInfo> Info; static TMutex Mutex; @@ -55,9 +58,12 @@ public: HFunc(TEvWhiteboard::TEvPDiskStateRequest, Handle); HFunc(TEvWhiteboard::TEvVDiskStateRequest, Handle); HFunc(TEvWhiteboard::TEvSystemStateRequest, Handle); + HFunc(NConsole::TEvConfigsDispatcher::TEvGetConfigRequest, Handle); } } + + void Handle(NConsole::TEvConfigsDispatcher::TEvGetConfigRequest::TPtr &ev, const TActorContext &ctx); void Handle(TEvBlobStorage::TEvControllerConfigRequest::TPtr &ev, const TActorContext &ctx); void Handle(TEvWhiteboard::TEvTabletStateRequest::TPtr &ev, const TActorContext &ctx); void Handle(TEvWhiteboard::TEvNodeStateRequest::TPtr &ev, const TActorContext &ctx); diff --git a/ydb/core/cms/info_collector.cpp b/ydb/core/cms/info_collector.cpp index afa2b64e7f9..23ce5e3702a 100644 --- a/ydb/core/cms/info_collector.cpp +++ b/ydb/core/cms/info_collector.cpp @@ -2,6 +2,7 @@ #include "info_collector.h" #include <ydb/core/base/tablet_pipe.h> +#include <ydb/core/cms/console/configs_dispatcher.h> #include <ydb/core/blobstorage/base/blobstorage_events.h> #include <ydb/core/mind/tenant_pool.h> #include <ydb/core/node_whiteboard/node_whiteboard.h> @@ -23,6 +24,7 @@ namespace NCms { using namespace NNodeWhiteboard; using namespace NKikimrWhiteboard; +using namespace NConsole; class TInfoCollector: public TActorBootstrapped<TInfoCollector> { public: @@ -34,6 +36,7 @@ public: : Client(client) , Timeout(timeout) , Info(new TClusterInfo) + , BootstrapConfigReceived(false) , BaseConfigReceived(false) { } @@ -44,6 +47,7 @@ private: STATEFN(StateWork) { switch (ev->GetTypeRewrite()) { sFunc(TEvents::TEvWakeup, ReplyAndDie); + hFunc(TEvConfigsDispatcher::TEvGetConfigResponse, Handle); // Nodes hFunc(TEvInterconnect::TEvNodesInfo, Handle); @@ -76,6 +80,10 @@ private: // Nodes void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev); + + //Configs + void RequestBootstrapConfig(); + void Handle(TEvConfigsDispatcher::TEvGetConfigResponse::TPtr &ev); // BSC void RequestBaseConfig(); @@ -103,6 +111,7 @@ private: TClusterInfoPtr Info; TActorId BscPipe; + bool BootstrapConfigReceived; bool BaseConfigReceived; THashMap<ui32, TSet<ui32>> NodeEvents; // nodeId -> expected events THashMap<TPDiskID, TPDiskStateInfo, TPDiskIDHash> PDiskInfo; @@ -112,7 +121,7 @@ private: void TInfoCollector::ReplyAndDie() { auto ev = MakeHolder<TCms::TEvPrivate::TEvClusterInfo>(); - ev->Success = BaseConfigReceived; + ev->Success = BaseConfigReceived && BootstrapConfigReceived; if (BaseConfigReceived) { for (const auto& [id, info] : PDiskInfo) { @@ -132,7 +141,7 @@ void TInfoCollector::ReplyAndDie() { } void TInfoCollector::MaybeReplyAndDie() { - if (!BaseConfigReceived) { + if (!BaseConfigReceived || !BootstrapConfigReceived) { return; } @@ -160,6 +169,7 @@ void TInfoCollector::PassAway() { void TInfoCollector::Bootstrap() { Send(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes()); Schedule(Timeout, new TEvents::TEvWakeup()); + RequestBootstrapConfig(); Become(&TThis::StateWork); } @@ -172,6 +182,36 @@ void TInfoCollector::Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev) { } } +void TInfoCollector::RequestBootstrapConfig() { + ui32 configKind = (ui32)NKikimrConsole::TConfigItem::BootstrapConfigItem; + Send(MakeConfigsDispatcherID(SelfId().NodeId()), + new TEvConfigsDispatcher::TEvGetConfigRequest(configKind)); +} + +void TInfoCollector::Handle(TEvConfigsDispatcher::TEvGetConfigResponse::TPtr &ev) { + auto &config = ev->Get()->Config; + NKikimrConfig::TBootstrap bootstrap; + + BootstrapConfigReceived = true; + if (!config->HasBootstrapConfig()){ + LOG_I("Couldn't collect bootstrap config from Console. Taking the local config"); + bootstrap.CopyFrom(AppData()->BootstrapConfig); + return; + } else { + LOG_D("Got Bootstrap config" + << ": record# " << config->ShortDebugString()); + + if (!::google::protobuf::util::MessageDifferencer::Equals(AppData()->BootstrapConfig, config->GetBootstrapConfig())) { + LOG_D("Local Bootstrap config is different from the config from the console"); + Info->IsLocalBootConfDiffersFromConsole = true; + } + bootstrap = config->GetBootstrapConfig(); + } + + Info->ApplySysTabletsInfo(bootstrap); + MaybeReplyAndDie(); +} + void TInfoCollector::RequestBaseConfig() { using namespace NTabletPipe; @@ -211,7 +251,6 @@ void TInfoCollector::Handle(TEvBlobStorage::TEvControllerConfigResponse::TPtr& e Info->AddBSGroup(group); } - Info->ChooseSysNodes(); MaybeReplyAndDie(); } } diff --git a/ydb/core/driver_lib/run/run.cpp b/ydb/core/driver_lib/run/run.cpp index d57e11631d6..f6dd469b8a6 100644 --- a/ydb/core/driver_lib/run/run.cpp +++ b/ydb/core/driver_lib/run/run.cpp @@ -1040,6 +1040,10 @@ void TKikimrRunner::InitializeAppData(const TKikimrRunConfig& runConfig) AppData->MeteringConfig = runConfig.AppConfig.GetMeteringConfig(); } + if (runConfig.AppConfig.HasBootstrapConfig()) { + AppData->BootstrapConfig = runConfig.AppConfig.GetBootstrapConfig(); + } + // setup resource profiles AppData->ResourceProfiles = new TResourceProfiles; if (runConfig.AppConfig.GetBootstrapConfig().ResourceProfilesSize()) diff --git a/ydb/core/protos/CMakeLists.txt b/ydb/core/protos/CMakeLists.txt index 4ab0d4b49e4..d8e4f567463 100644 --- a/ydb/core/protos/CMakeLists.txt +++ b/ydb/core/protos/CMakeLists.txt @@ -57,6 +57,7 @@ target_proto_messages(ydb-core-protos PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/protos/console_tenant.proto ${CMAKE_SOURCE_DIR}/ydb/core/protos/counters_tx_allocator.proto ${CMAKE_SOURCE_DIR}/ydb/core/protos/counters_bs_controller.proto + ${CMAKE_SOURCE_DIR}/ydb/core/protos/counters_cms.proto ${CMAKE_SOURCE_DIR}/ydb/core/protos/counters_coordinator.proto ${CMAKE_SOURCE_DIR}/ydb/core/protos/counters_columnshard.proto ${CMAKE_SOURCE_DIR}/ydb/core/protos/counters_datashard.proto diff --git a/ydb/core/protos/counters_cms.proto b/ydb/core/protos/counters_cms.proto new file mode 100644 index 00000000000..b55d8ef37ab --- /dev/null +++ b/ydb/core/protos/counters_cms.proto @@ -0,0 +1,47 @@ +import "ydb/core/protos/counters.proto"; + +package NKikimr.NCms; + +option java_package = "ru.yandex.kikimr.proto"; + +option (TabletTypeName) = "Cms"; + +enum ESimpleCounters { + COUNTER_SIMPLE_IGNORE = 0; + + // 1 when local config differs from console + COUNTER_BOOTSTRAP_DIFFERS = 1 [(CounterOpts) = {Name: "BootstrapConfigDiffersFromConsole"}]; +} + +enum ECumulativeCounters { + COUNTER_CUMULATIVE_IGNORE = 0; +} + +enum EPercentileCounters { + option (GlobalCounterOpts) = { + Ranges { Value: 0 Name: "0 ms" } + Ranges { Value: 1 Name: "1 ms" } + }; + + COUNTER_PERCENTILE_IGNORE = 0; +} + +enum ETxTypes { + TXTYPE_INIT_SCHEMA = 0 [(TxTypeOpts) = {Name: "TxInitSchema"}]; + TXTYPE_INIT = 1 [(TxTypeOpts) = {Name: "TxInit"}]; + TXTYPE_GET_LOG_TAIL = 2 [(TxTypeOpts) = {Name: "TxGetLogTail"}]; + TXTYPE_LOAD_STATE = 3 [(TxTypeOpts) = {Name: "TxLoadState"}]; + TXTYPE_LOG_AND_SEND = 4 [(TxTypeOpts) = {Name: "TxLogAndSend"}]; + TXTYPE_LOG_CLEANUP = 5 [(TxTypeOpts) = {Name: "TxLogCleanup"}]; + TXTYPE_PROCESS_NOTIFICATION = 6 [(TxTypeOpts) = {Name: "TxProcessNotification"}]; + TXTYPE_REJECT_NOTIFICATION = 7 [(TxTypeOpts) = {Name: "TxRejectNotification"}]; + TXTYPE_REMOVE_EXPIRED_NOTIFICATION = 8 [(TxTypeOpts) = {Name: "TxRemoveExpiredNotification"}]; + TXTYPE_STORE_PERMISSIONS = 9 [(TxTypeOpts) = {Name: "TxStorePermissions"}]; + TXTYPE_REMOVE_PERMISSIONS = 10 [(TxTypeOpts) = {Name: "TxRemovePermissions"}]; + TXTYPE_REMOVE_REQUEST = 11 [(TxTypeOpts) = {Name: "TxRemoveRequest"}]; + TXTYPE_REMOVE_WALLE_TASK = 12 [(TxTypeOpts) = {Name: "TxRemoveWalleTask"}]; + TXTYPE_STORE_WALLE_TASK = 13 [(TxTypeOpts) = {Name: "TxStoreWalleTask"}]; + TXTYPE_UPDATE_CONFIG = 14 [(TxTypeOpts) = {Name: "TxUpdateConfig"}]; + TXTYPE_UPDATE_DOWNTIMES = 15 [(TxTypeOpts) = {Name: "TxUpdateDowntimes"}]; +} + |