diff options
author | shmel1k <shmel1k@ydb.tech> | 2023-10-23 17:35:36 +0300 |
---|---|---|
committer | shmel1k <shmel1k@ydb.tech> | 2023-10-23 18:12:52 +0300 |
commit | dac2309e55d9e559c65b13c3a30a88d786805e2e (patch) | |
tree | 818afee3737c3ac5bedfde14aa7b9824afd885e0 | |
parent | a3f3a35aaee5b3cac5a9d2143fa754a41a82cfa5 (diff) | |
download | ydb-dac2309e55d9e559c65b13c3a30a88d786805e2e.tar.gz |
PR from branch users/shmel1k/YDBOPS-8547_add_cms_viewer_for_locks
add first version of CMS viewer
-rw-r--r-- | ydb/core/cms/cms.cpp | 153 | ||||
-rw-r--r-- | ydb/core/cms/cms_impl.h | 6 | ||||
-rw-r--r-- | ydb/core/cms/node_checkers.cpp | 8 | ||||
-rw-r--r-- | ydb/core/cms/node_checkers.h | 2 |
4 files changed, 167 insertions, 2 deletions
diff --git a/ydb/core/cms/cms.cpp b/ydb/core/cms/cms.cpp index 2e5620c91a..ffca58da2c 100644 --- a/ydb/core/cms/cms.cpp +++ b/ydb/core/cms/cms.cpp @@ -21,6 +21,7 @@ #include <library/cpp/actors/core/actor.h> #include <library/cpp/actors/core/hfunc.h> #include <library/cpp/actors/interconnect/interconnect.h> +#include <library/cpp/monlib/service/pages/templates.h> #include <util/datetime/base.h> #include <util/generic/serialized_enum.h> @@ -73,6 +74,158 @@ void TCms::OnTabletDead(TEvTablet::TEvTabletDead::TPtr &ev, const TActorContext Die(ctx); } +namespace { + struct TNodeVDisksStatus { + ui32 Up; + ui32 Down; + ui32 Restart; + }; + + void CalculateNodeVDisksStatus(const TClusterInfoPtr clusterInfo, const TNodeInfoPtr node, + THashMap<ui32, TNodeVDisksStatus>& nodeVDisksStatus) { + ui32 up = 0; + ui32 down = 0; + ui32 restart = 0; + for (const auto& vdiskID : node->VDisks) { + const auto& vdisk = clusterInfo->VDisk(vdiskID); + switch (vdisk.State) { + case NKikimrCms::EState::UNKNOWN: + break; + case NKikimrCms::EState::DOWN: + ++down; + break; + case NKikimrCms::EState::RESTART: + ++restart; + break; + case NKikimrCms::EState::UP: + ++up; + break; + } + } + nodeVDisksStatus[node->NodeId].Up = up; + nodeVDisksStatus[node->NodeId].Down = down; + nodeVDisksStatus[node->NodeId].Restart = restart; + } +} // namespace + +void TCms::GenerateNodeState(IOutputStream& out) +{ + THashMap<ui32, TNodeVDisksStatus> nodeVDisksStatusMap; + + ui32 totalVDisksUp = 0; + ui32 totalVDisksRestart = 0; + ui32 totalVDisksDown = 0; + + for (const auto& node: ClusterInfo->AllNodes()) { + CalculateNodeVDisksStatus(ClusterInfo, node.second, nodeVDisksStatusMap); + totalVDisksUp += nodeVDisksStatusMap[node.first].Up; + totalVDisksDown += nodeVDisksStatusMap[node.first].Down; + totalVDisksRestart += nodeVDisksStatusMap[node.first].Restart; + } + + const auto& nodeState = ClusterInfo->ClusterNodes->GetNodeToState(); + HTML(out) { + TAG(TH3) { + out << "Nodes with state"; + } + TAG(TH4) { + out << "ClusterInfo last update timestamp: " << ClusterInfo->GetTimestamp(); + } + TAG(TH4) { + out << "Total VDisks State. UP: " << totalVDisksUp << ", Restart = " << totalVDisksRestart << ", Down = " << totalVDisksDown; + } + TABLE_SORTABLE() { + TABLEHEAD() { + TABLER() { + TABLED() { + out << "NodeID"; + } + TABLED() { + out << "Host"; + } + TABLED() { + out << "State"; + } + TABLED() { + out << "InMemoryState"; + } + TABLED() { + out << "Tenant"; + } + TABLED() { + out << "VDisksUp"; + } + TABLED() { + out << "VDisksDown"; + } + TABLED() { + out << "VDisksRestart"; + } + } + } + TABLEBODY() { + for (const auto& node : ClusterInfo->AllNodes()) { + auto currentInMemoryState = INodesChecker::NODE_STATE_UNSPECIFIED; + if (nodeState.contains(node.first)) { + currentInMemoryState = nodeState.at(node.first); + } + TABLER() { + TABLED() { + out << node.first; + } + TABLED() { + out << node.second->Host; + } + TABLED() { + out << node.second->State; + } + TABLED() { + out << currentInMemoryState; + } + TABLED() { + out << node.second->Tenant; + } + if (node.second->VDisks) { + TABLED() { + out << nodeVDisksStatusMap[node.first].Up; + } + TABLED() { + out << nodeVDisksStatusMap[node.first].Down; + } + TABLED() { + out << nodeVDisksStatusMap[node.first].Restart; + } + } + } + } + } + } + } +} + +TString TCms::GenerateStat() +{ + TStringStream str; + HTML(str) { + TAG(TH2) { str << "Cluster management system tablet";} + GenerateNodeState(str); + } + return str.Str(); +} + +bool TCms::OnRenderAppHtmlPage(NMon::TEvRemoteHttpInfo::TPtr ev, const TActorContext& ctx) +{ + if (!ev) { + return true; + } + + ScheduleUpdateClusterInfo(ctx, true); + + TString str = GenerateStat(); + ctx.Send(ev->Sender, new NMon::TEvRemoteHttpInfoRes(std::move(str))); + return true; +} + void TCms::Enqueue(TAutoPtr<IEventHandle> &ev) { InitQueue.push(ev); diff --git a/ydb/core/cms/cms_impl.h b/ydb/core/cms/cms_impl.h index e662f382c4..10c1cc0b27 100644 --- a/ydb/core/cms/cms_impl.h +++ b/ydb/core/cms/cms_impl.h @@ -425,6 +425,8 @@ private: void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr &ev, const TActorContext &ctx); void Handle(TEvTabletPipe::TEvClientConnected::TPtr &ev, const TActorContext &ctx); + bool OnRenderAppHtmlPage(NMon::TEvRemoteHttpInfo::TPtr ev, const TActorContext& ctx) override; + private: TStack<TInstant> ScheduledCleanups; TString NotSupportedReason; @@ -452,6 +454,10 @@ private: TInstant InfoCollectorStartTime; +private: + TString GenerateStat(); + void GenerateNodeState(IOutputStream&); + public: TCms(const TActorId &tablet, TTabletStorageInfo *info) : TActor(&TThis::StateInit) diff --git a/ydb/core/cms/node_checkers.cpp b/ydb/core/cms/node_checkers.cpp index 33c09077f5..32d4c3af16 100644 --- a/ydb/core/cms/node_checkers.cpp +++ b/ydb/core/cms/node_checkers.cpp @@ -79,6 +79,10 @@ void TNodesCounterBase::UnlockNode(ui32 nodeId) { } } +const THashMap<ui32, INodesChecker::ENodeState>& TNodesCounterBase::GetNodeToState() const { + return NodeToState; +} + bool TNodesLimitsCounterBase::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TString& reason) const { Y_ABORT_UNLESS(NodeToState.contains(nodeId)); auto nodeState = NodeToState.at(nodeId); @@ -86,7 +90,7 @@ bool TNodesLimitsCounterBase::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabili bool isForceRestart = mode == NKikimrCms::MODE_FORCE_RESTART; NCH_LOG_D("Checking Node: " - << nodeId << ", with state: " << nodeState + << nodeId << ", with state: " << nodeState << ", with limit: " << DisabledNodesLimit << ", with ratio limit: " << DisabledNodesRatioLimit << ", locked nodes: " << LockedNodesCount @@ -145,7 +149,7 @@ bool TSysTabletsNodesCounter::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabili NCH_LOG_D("Checking limits for sys tablet: " << NKikimrConfig::TBootstrap_ETabletType_Name(TabletType) << ", on node: " << nodeId - << ", with state: " << nodeState + << ", with state: " << nodeState << ", locked nodes: " << LockedNodesCount << ", down nodes: " << DownNodesCount); diff --git a/ydb/core/cms/node_checkers.h b/ydb/core/cms/node_checkers.h index 0b0b856301..bca0b96b39 100644 --- a/ydb/core/cms/node_checkers.h +++ b/ydb/core/cms/node_checkers.h @@ -61,6 +61,8 @@ public: void LockNode(ui32 nodeId) override; void UnlockNode(ui32 nodeId) override; + + const THashMap<ui32, ENodeState>& GetNodeToState() const; }; /** |