aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorshmel1k <shmel1k@ydb.tech>2023-10-23 17:35:36 +0300
committershmel1k <shmel1k@ydb.tech>2023-10-23 18:12:52 +0300
commitdac2309e55d9e559c65b13c3a30a88d786805e2e (patch)
tree818afee3737c3ac5bedfde14aa7b9824afd885e0
parenta3f3a35aaee5b3cac5a9d2143fa754a41a82cfa5 (diff)
downloadydb-dac2309e55d9e559c65b13c3a30a88d786805e2e.tar.gz
PR from branch users/shmel1k/YDBOPS-8547_add_cms_viewer_for_locks
add first version of CMS viewer
-rw-r--r--ydb/core/cms/cms.cpp153
-rw-r--r--ydb/core/cms/cms_impl.h6
-rw-r--r--ydb/core/cms/node_checkers.cpp8
-rw-r--r--ydb/core/cms/node_checkers.h2
4 files changed, 167 insertions, 2 deletions
diff --git a/ydb/core/cms/cms.cpp b/ydb/core/cms/cms.cpp
index 2e5620c91a..ffca58da2c 100644
--- a/ydb/core/cms/cms.cpp
+++ b/ydb/core/cms/cms.cpp
@@ -21,6 +21,7 @@
#include <library/cpp/actors/core/actor.h>
#include <library/cpp/actors/core/hfunc.h>
#include <library/cpp/actors/interconnect/interconnect.h>
+#include <library/cpp/monlib/service/pages/templates.h>
#include <util/datetime/base.h>
#include <util/generic/serialized_enum.h>
@@ -73,6 +74,158 @@ void TCms::OnTabletDead(TEvTablet::TEvTabletDead::TPtr &ev, const TActorContext
Die(ctx);
}
+namespace {
+ struct TNodeVDisksStatus {
+ ui32 Up;
+ ui32 Down;
+ ui32 Restart;
+ };
+
+ void CalculateNodeVDisksStatus(const TClusterInfoPtr clusterInfo, const TNodeInfoPtr node,
+ THashMap<ui32, TNodeVDisksStatus>& nodeVDisksStatus) {
+ ui32 up = 0;
+ ui32 down = 0;
+ ui32 restart = 0;
+ for (const auto& vdiskID : node->VDisks) {
+ const auto& vdisk = clusterInfo->VDisk(vdiskID);
+ switch (vdisk.State) {
+ case NKikimrCms::EState::UNKNOWN:
+ break;
+ case NKikimrCms::EState::DOWN:
+ ++down;
+ break;
+ case NKikimrCms::EState::RESTART:
+ ++restart;
+ break;
+ case NKikimrCms::EState::UP:
+ ++up;
+ break;
+ }
+ }
+ nodeVDisksStatus[node->NodeId].Up = up;
+ nodeVDisksStatus[node->NodeId].Down = down;
+ nodeVDisksStatus[node->NodeId].Restart = restart;
+ }
+} // namespace
+
+void TCms::GenerateNodeState(IOutputStream& out)
+{
+ THashMap<ui32, TNodeVDisksStatus> nodeVDisksStatusMap;
+
+ ui32 totalVDisksUp = 0;
+ ui32 totalVDisksRestart = 0;
+ ui32 totalVDisksDown = 0;
+
+ for (const auto& node: ClusterInfo->AllNodes()) {
+ CalculateNodeVDisksStatus(ClusterInfo, node.second, nodeVDisksStatusMap);
+ totalVDisksUp += nodeVDisksStatusMap[node.first].Up;
+ totalVDisksDown += nodeVDisksStatusMap[node.first].Down;
+ totalVDisksRestart += nodeVDisksStatusMap[node.first].Restart;
+ }
+
+ const auto& nodeState = ClusterInfo->ClusterNodes->GetNodeToState();
+ HTML(out) {
+ TAG(TH3) {
+ out << "Nodes with state";
+ }
+ TAG(TH4) {
+ out << "ClusterInfo last update timestamp: " << ClusterInfo->GetTimestamp();
+ }
+ TAG(TH4) {
+ out << "Total VDisks State. UP: " << totalVDisksUp << ", Restart = " << totalVDisksRestart << ", Down = " << totalVDisksDown;
+ }
+ TABLE_SORTABLE() {
+ TABLEHEAD() {
+ TABLER() {
+ TABLED() {
+ out << "NodeID";
+ }
+ TABLED() {
+ out << "Host";
+ }
+ TABLED() {
+ out << "State";
+ }
+ TABLED() {
+ out << "InMemoryState";
+ }
+ TABLED() {
+ out << "Tenant";
+ }
+ TABLED() {
+ out << "VDisksUp";
+ }
+ TABLED() {
+ out << "VDisksDown";
+ }
+ TABLED() {
+ out << "VDisksRestart";
+ }
+ }
+ }
+ TABLEBODY() {
+ for (const auto& node : ClusterInfo->AllNodes()) {
+ auto currentInMemoryState = INodesChecker::NODE_STATE_UNSPECIFIED;
+ if (nodeState.contains(node.first)) {
+ currentInMemoryState = nodeState.at(node.first);
+ }
+ TABLER() {
+ TABLED() {
+ out << node.first;
+ }
+ TABLED() {
+ out << node.second->Host;
+ }
+ TABLED() {
+ out << node.second->State;
+ }
+ TABLED() {
+ out << currentInMemoryState;
+ }
+ TABLED() {
+ out << node.second->Tenant;
+ }
+ if (node.second->VDisks) {
+ TABLED() {
+ out << nodeVDisksStatusMap[node.first].Up;
+ }
+ TABLED() {
+ out << nodeVDisksStatusMap[node.first].Down;
+ }
+ TABLED() {
+ out << nodeVDisksStatusMap[node.first].Restart;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+TString TCms::GenerateStat()
+{
+ TStringStream str;
+ HTML(str) {
+ TAG(TH2) { str << "Cluster management system tablet";}
+ GenerateNodeState(str);
+ }
+ return str.Str();
+}
+
+bool TCms::OnRenderAppHtmlPage(NMon::TEvRemoteHttpInfo::TPtr ev, const TActorContext& ctx)
+{
+ if (!ev) {
+ return true;
+ }
+
+ ScheduleUpdateClusterInfo(ctx, true);
+
+ TString str = GenerateStat();
+ ctx.Send(ev->Sender, new NMon::TEvRemoteHttpInfoRes(std::move(str)));
+ return true;
+}
+
void TCms::Enqueue(TAutoPtr<IEventHandle> &ev)
{
InitQueue.push(ev);
diff --git a/ydb/core/cms/cms_impl.h b/ydb/core/cms/cms_impl.h
index e662f382c4..10c1cc0b27 100644
--- a/ydb/core/cms/cms_impl.h
+++ b/ydb/core/cms/cms_impl.h
@@ -425,6 +425,8 @@ private:
void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr &ev, const TActorContext &ctx);
void Handle(TEvTabletPipe::TEvClientConnected::TPtr &ev, const TActorContext &ctx);
+ bool OnRenderAppHtmlPage(NMon::TEvRemoteHttpInfo::TPtr ev, const TActorContext& ctx) override;
+
private:
TStack<TInstant> ScheduledCleanups;
TString NotSupportedReason;
@@ -452,6 +454,10 @@ private:
TInstant InfoCollectorStartTime;
+private:
+ TString GenerateStat();
+ void GenerateNodeState(IOutputStream&);
+
public:
TCms(const TActorId &tablet, TTabletStorageInfo *info)
: TActor(&TThis::StateInit)
diff --git a/ydb/core/cms/node_checkers.cpp b/ydb/core/cms/node_checkers.cpp
index 33c09077f5..32d4c3af16 100644
--- a/ydb/core/cms/node_checkers.cpp
+++ b/ydb/core/cms/node_checkers.cpp
@@ -79,6 +79,10 @@ void TNodesCounterBase::UnlockNode(ui32 nodeId) {
}
}
+const THashMap<ui32, INodesChecker::ENodeState>& TNodesCounterBase::GetNodeToState() const {
+ return NodeToState;
+}
+
bool TNodesLimitsCounterBase::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TString& reason) const {
Y_ABORT_UNLESS(NodeToState.contains(nodeId));
auto nodeState = NodeToState.at(nodeId);
@@ -86,7 +90,7 @@ bool TNodesLimitsCounterBase::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabili
bool isForceRestart = mode == NKikimrCms::MODE_FORCE_RESTART;
NCH_LOG_D("Checking Node: "
- << nodeId << ", with state: " << nodeState
+ << nodeId << ", with state: " << nodeState
<< ", with limit: " << DisabledNodesLimit
<< ", with ratio limit: " << DisabledNodesRatioLimit
<< ", locked nodes: " << LockedNodesCount
@@ -145,7 +149,7 @@ bool TSysTabletsNodesCounter::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabili
NCH_LOG_D("Checking limits for sys tablet: " << NKikimrConfig::TBootstrap_ETabletType_Name(TabletType)
<< ", on node: " << nodeId
- << ", with state: " << nodeState
+ << ", with state: " << nodeState
<< ", locked nodes: " << LockedNodesCount
<< ", down nodes: " << DownNodesCount);
diff --git a/ydb/core/cms/node_checkers.h b/ydb/core/cms/node_checkers.h
index 0b0b856301..bca0b96b39 100644
--- a/ydb/core/cms/node_checkers.h
+++ b/ydb/core/cms/node_checkers.h
@@ -61,6 +61,8 @@ public:
void LockNode(ui32 nodeId) override;
void UnlockNode(ui32 nodeId) override;
+
+ const THashMap<ui32, ENodeState>& GetNodeToState() const;
};
/**