aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorandrew-rykov <arykov@ydb.tech>2023-01-27 08:48:54 +0300
committerandrew-rykov <arykov@ydb.tech>2023-01-27 08:48:54 +0300
commit9c98c7691d3bce195fb06c49997951bfa855deb3 (patch)
treeb3496c7aa490707999a40ff1de7ba80892564839
parentc0df21eadad350aaceb26d82dd2f11384178a45f (diff)
downloadydb-9c98c7691d3bce195fb06c49997951bfa855deb3.tar.gz
PR from branch users/andrew-rykov//prometheus
working version fix respons prometheus
-rw-r--r--ydb/core/viewer/healthcheck.h169
-rw-r--r--ydb/core/viewer/healthcheck_record.h44
-rw-r--r--ydb/core/viewer/json_handlers_viewer.cpp2
-rw-r--r--ydb/core/viewer/json_healthcheck.h52
-rw-r--r--ydb/core/viewer/viewer.cpp11
5 files changed, 229 insertions, 49 deletions
diff --git a/ydb/core/viewer/healthcheck.h b/ydb/core/viewer/healthcheck.h
new file mode 100644
index 00000000000..d22c36df2fe
--- /dev/null
+++ b/ydb/core/viewer/healthcheck.h
@@ -0,0 +1,169 @@
+#pragma once
+
+#include <library/cpp/actors/core/actor_bootstrapped.h>
+#include <library/cpp/actors/core/interconnect.h>
+#include <library/cpp/actors/core/mon.h>
+#include <ydb/core/blobstorage/base/blobstorage_events.h>
+#include <ydb/core/base/tablet_pipe.h>
+#include <ydb/core/protos/services.pb.h>
+#include "viewer.h"
+#include <library/cpp/monlib/encode/prometheus/prometheus.h>
+#include <ydb/core/health_check/health_check.h>
+#include <ydb/core/util/proto_duration.h>
+#include <util/string/split.h>
+#include "healthcheck_record.h"
+#include <vector>
+
+namespace NKikimr {
+namespace NViewer {
+
+using namespace NActors;
+using namespace NMonitoring;
+
+enum HealthCheckResponseFormat {
+ JSON,
+ PROMETHEUS
+};
+
+class THealthCheck : public TActorBootstrapped<THealthCheck> {
+ static const bool WithRetry = false;
+ NMon::TEvHttpInfo::TPtr Event;
+ TJsonSettings JsonSettings;
+ ui32 Timeout = 0;
+ HealthCheckResponseFormat Format;
+
+public:
+ static constexpr NKikimrServices::TActivity::EType ActorActivityType() {
+ return NKikimrServices::TActivity::VIEWER_HANDLER;
+ }
+
+ THealthCheck(IViewer*, NMon::TEvHttpInfo::TPtr& ev)
+ : Event(ev)
+ {}
+
+ void Bootstrap(const TActorContext& ctx) {
+ Format = HealthCheckResponseFormat::JSON;
+ if (const auto *header = Event->Get()->Request.GetHeaders().FindHeader("Accept")) {
+ THashSet<TString> accept;
+ StringSplitter(header->Value()).SplitBySet(", ").SkipEmpty().Collect(&accept);
+ if (accept.contains("*/*") || accept.contains("application/json")) {
+ Format = HealthCheckResponseFormat::JSON;
+ } else if (accept.contains("text/plain")) {
+ Format = HealthCheckResponseFormat::PROMETHEUS;
+ } else {
+ Send(Event->Sender, new NMon::TEvHttpInfoRes(HTTPBADREQUEST_HEADERS, 0, NMon::IEvHttpInfoRes::EContentType::Custom));
+ Die(ctx);
+ }
+ }
+ const auto& params(Event->Get()->Request.GetParams());
+ if (Format == HealthCheckResponseFormat::JSON) {
+ JsonSettings.EnumAsNumbers = !FromStringWithDefault<bool>(params.Get("enums"), true);
+ JsonSettings.UI64AsString = !FromStringWithDefault<bool>(params.Get("ui64"), false);
+ }
+ Timeout = FromStringWithDefault<ui32>(params.Get("timeout"), 10000);
+ THolder<NHealthCheck::TEvSelfCheckRequest> request = MakeHolder<NHealthCheck::TEvSelfCheckRequest>();
+ request->Database = params.Get("tenant");
+ request->Request.set_return_verbose_status(FromStringWithDefault<bool>(params.Get("verbose"), false));
+ request->Request.set_maximum_level(FromStringWithDefault<ui32>(params.Get("max_level"), 0));
+ SetDuration(TDuration::MilliSeconds(Timeout), *request->Request.mutable_operation_params()->mutable_operation_timeout());
+ if (params.Has("min_status")) {
+ Ydb::Monitoring::StatusFlag::Status minStatus;
+ if (Ydb::Monitoring::StatusFlag_Status_Parse(params.Get("min_status"), &minStatus)) {
+ request->Request.set_minimum_status(minStatus);
+ } else {
+ Send(Event->Sender, new NMon::TEvHttpInfoRes(HTTPBADREQUEST, 0, NMon::IEvHttpInfoRes::EContentType::Custom));
+ return PassAway();
+ }
+ }
+ Send(NHealthCheck::MakeHealthCheckID(), request.Release());
+ Timeout += Timeout * 20 / 100; // we prefer to wait for more (+20%) verbose timeout status from HC
+ ctx.Schedule(TDuration::Seconds(10), new TEvents::TEvWakeup());
+ Become(&TThis::StateRequestedInfo);
+ }
+
+ STFUNC(StateRequestedInfo) {
+ switch (ev->GetTypeRewrite()) {
+ HFunc(NHealthCheck::TEvSelfCheckResult, Handle);
+ CFunc(TEvents::TSystem::Wakeup, HandleTimeout);
+ }
+ }
+
+ int GetIssueCount(const Ydb::Monitoring::IssueLog& issueLog) {
+ return issueLog.count() == 0 ? 1 : issueLog.count();
+ }
+
+ THolder<THashMap<TMetricRecord, ui32>> GetRecordCounters(NHealthCheck::TEvSelfCheckResult::TPtr& ev) {
+ const auto *descriptor = Ydb::Monitoring::StatusFlag_Status_descriptor();
+ THashMap<TMetricRecord, ui32> recordCounters;
+ for (auto& log : ev->Get()->Result.issue_log()) {
+ TMetricRecord record {
+ .Database = log.location().database().name(),
+ .Message = log.message(),
+ .Status = descriptor->FindValueByNumber(log.status())->name(),
+ .Type = log.type()
+ };
+
+ auto it = recordCounters.find(record);
+ if (it != recordCounters.end()) {
+ it->second += GetIssueCount(log);
+ } else {
+ recordCounters[record] = GetIssueCount(log);
+ }
+ }
+
+ return MakeHolder<THashMap<TMetricRecord, ui32>>(recordCounters);
+ }
+
+ void HandleJSON(NHealthCheck::TEvSelfCheckResult::TPtr& ev, const TActorContext &ctx) {
+ TStringStream json;
+ TProtoToJson::ProtoToJson(json, ev->Get()->Result, JsonSettings);
+ ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(HTTPOKJSON + json.Str(), 0, NMon::IEvHttpInfoRes::EContentType::Custom));
+ Die(ctx);
+ }
+
+ void HandlePrometheus(NHealthCheck::TEvSelfCheckResult::TPtr& ev, const TActorContext &ctx) {
+ auto recordCounters = GetRecordCounters(ev);
+
+ TStringStream ss;
+ IMetricEncoderPtr encoder = EncoderPrometheus(&ss);
+ IMetricEncoder* e = encoder.Get();
+ e->OnStreamBegin();
+ for (auto& recordCounter : *recordCounters) {
+ e->OnMetricBegin(EMetricType::IGAUGE);
+ {
+ e->OnLabelsBegin();
+ e->OnLabel("sensor", "Hc_ydb_ru");
+ if (recordCounter.first.Database) {
+ e->OnLabel("DATABASE", recordCounter.first.Database);
+ }
+ e->OnLabel("MESSAGE", recordCounter.first.Message);
+ e->OnLabel("STATUS", recordCounter.first.Status);
+ e->OnLabel("TYPE", recordCounter.first.Type);
+ e->OnLabelsEnd();
+ }
+ e->OnInt64(TInstant::Zero(), recordCounter.second);
+ e->OnMetricEnd();
+ }
+
+ e->OnStreamEnd();
+
+ ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(HTTPOKTEXT + ss.Str(), 0, NMon::IEvHttpInfoRes::EContentType::Custom));
+ Die(ctx);
+ }
+
+ void Handle(NHealthCheck::TEvSelfCheckResult::TPtr& ev, const TActorContext &ctx) {
+ if (Format == HealthCheckResponseFormat::JSON) {
+ HandleJSON(ev, ctx);
+ } else {
+ HandlePrometheus(ev, ctx);
+ }
+ }
+
+ void HandleTimeout(const TActorContext &ctx) {
+ Send(Event->Sender, new NMon::TEvHttpInfoRes(HTTPGATEWAYTIMEOUT, 0, NMon::IEvHttpInfoRes::EContentType::Custom));
+ Die(ctx);
+ }
+};
+
+}
+}
diff --git a/ydb/core/viewer/healthcheck_record.h b/ydb/core/viewer/healthcheck_record.h
new file mode 100644
index 00000000000..39c94e28877
--- /dev/null
+++ b/ydb/core/viewer/healthcheck_record.h
@@ -0,0 +1,44 @@
+#pragma once
+
+namespace NKikimr::NViewer {
+
+using namespace NActors;
+using namespace NMonitoring;
+
+struct TMetricRecord {
+ TString Database;
+ TString Message;
+ TString Status;
+ TString Type;
+
+ bool operator!=(const TMetricRecord& x) const noexcept {
+ return !(x == *this);
+ }
+
+ bool operator==(const TMetricRecord& x) const noexcept {
+ return this->Database == x.Database && this->Message == x.Message && this->Status == x.Status && this->Type == x.Type;
+ }
+
+ ui64 Hash() const noexcept {
+ ui64 hash = std::hash<TString>()(Database);
+ hash = CombineHashes<ui64>(hash, std::hash<TString>()(Message));
+ hash = CombineHashes<ui64>(hash, std::hash<TString>()(Status));
+ hash = CombineHashes<ui64>(hash, std::hash<TString>()(Type));
+ return hash;
+ }
+
+ struct THash {
+ ui64 operator()(const TMetricRecord& record) const noexcept {
+ return record.Hash();
+ }
+ };
+};
+
+}
+
+template<>
+struct THash<NKikimr::NViewer::TMetricRecord> {
+ inline ui64 operator()(const NKikimr::NViewer::TMetricRecord& x) const noexcept {
+ return x.Hash();
+ }
+};
diff --git a/ydb/core/viewer/json_handlers_viewer.cpp b/ydb/core/viewer/json_handlers_viewer.cpp
index b3c7954d712..daa64a165e2 100644
--- a/ydb/core/viewer/json_handlers_viewer.cpp
+++ b/ydb/core/viewer/json_handlers_viewer.cpp
@@ -30,12 +30,10 @@
#include "json_query.h"
#include "json_netinfo.h"
#include "json_compute.h"
-#include "counters_hosts.h"
#include "json_healthcheck.h"
#include "json_nodes.h"
#include "json_acl.h"
-
namespace NKikimr::NViewer {
template <>
diff --git a/ydb/core/viewer/json_healthcheck.h b/ydb/core/viewer/json_healthcheck.h
index 2f0e5efa736..3bff8c4a466 100644
--- a/ydb/core/viewer/json_healthcheck.h
+++ b/ydb/core/viewer/json_healthcheck.h
@@ -16,63 +16,21 @@ namespace NViewer {
using namespace NActors;
class TJsonHealthCheck : public TActorBootstrapped<TJsonHealthCheck> {
- static const bool WithRetry = false;
- using TBase = TActorBootstrapped<TJsonHealthCheck>;
- IViewer* Viewer;
NMon::TEvHttpInfo::TPtr Event;
- TJsonSettings JsonSettings;
- ui32 Timeout = 0;
public:
static constexpr NKikimrServices::TActivity::EType ActorActivityType() {
return NKikimrServices::TActivity::VIEWER_HANDLER;
}
- TJsonHealthCheck(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev)
- : Viewer(viewer)
- , Event(ev)
+ TJsonHealthCheck(IViewer*, NMon::TEvHttpInfo::TPtr& ev)
+// : Viewer(viewer)
+ : Event(ev)
{}
void Bootstrap() {
- const auto& params(Event->Get()->Request.GetParams());
- JsonSettings.EnumAsNumbers = !FromStringWithDefault<bool>(params.Get("enums"), true);
- JsonSettings.UI64AsString = !FromStringWithDefault<bool>(params.Get("ui64"), false);
- Timeout = FromStringWithDefault<ui32>(params.Get("timeout"), 10000);
- THolder<NHealthCheck::TEvSelfCheckRequest> request = MakeHolder<NHealthCheck::TEvSelfCheckRequest>();
- request->Database = params.Get("tenant");
- request->Request.set_return_verbose_status(FromStringWithDefault<bool>(params.Get("verbose"), false));
- request->Request.set_maximum_level(FromStringWithDefault<ui32>(params.Get("max_level"), 0));
- SetDuration(TDuration::MilliSeconds(Timeout), *request->Request.mutable_operation_params()->mutable_operation_timeout());
- if (params.Has("min_status")) {
- Ydb::Monitoring::StatusFlag::Status minStatus;
- if (Ydb::Monitoring::StatusFlag_Status_Parse(params.Get("min_status"), &minStatus)) {
- request->Request.set_minimum_status(minStatus);
- } else {
- Send(Event->Sender, new NMon::TEvHttpInfoRes(HTTPBADREQUEST, 0, NMon::IEvHttpInfoRes::EContentType::Custom));
- return PassAway();
- }
- }
- Send(NHealthCheck::MakeHealthCheckID(), request.Release());
- Timeout += Timeout * 20 / 100; // we prefer to wait for more (+20%) verbose timeout status from HC
- Become(&TThis::StateRequestedInfo, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup());
- }
-
- STATEFN(StateRequestedInfo) {
- switch (ev->GetTypeRewrite()) {
- hFunc(NHealthCheck::TEvSelfCheckResult, Handle);
- cFunc(TEvents::TSystem::Wakeup, HandleTimeout);
- }
- }
-
- void Handle(NHealthCheck::TEvSelfCheckResult::TPtr& ev) {
- TStringStream json;
- TProtoToJson::ProtoToJson(json, ev->Get()->Result, JsonSettings);
- Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get()) + json.Str(), 0, NMon::IEvHttpInfoRes::EContentType::Custom));
- PassAway();
- }
-
- void HandleTimeout() {
- Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(), 0, NMon::IEvHttpInfoRes::EContentType::Custom));
+ auto queryString = Event->Get()->Request.GetParams().Print();
+ Send(Event->Sender, new NMon::TEvHttpInfoRes("HTTP/1.1 302 Found\r\nLocation: /healthcheck?" + queryString + "\r\n\r\n", 0, NMon::IEvHttpInfoRes::EContentType::Custom));
PassAway();
}
};
diff --git a/ydb/core/viewer/viewer.cpp b/ydb/core/viewer/viewer.cpp
index 0f87a6e0e9e..60804758cad 100644
--- a/ydb/core/viewer/viewer.cpp
+++ b/ydb/core/viewer/viewer.cpp
@@ -20,6 +20,7 @@
#include "browse_pq.h"
#include "browse_db.h"
#include "counters_hosts.h"
+#include "healthcheck.h"
#include "json_handlers.h"
@@ -114,6 +115,12 @@ public:
.UseAuth = false,
});
mon->RegisterActorPage({
+ .RelPath = "healthcheck",
+ .ActorSystem = ctx.ExecutorThread.ActorSystem,
+ .ActorId = ctx.SelfID,
+ .UseAuth = false,
+ });
+ mon->RegisterActorPage({
.Title = "VDisk",
.RelPath = "vdisk",
.ActorSystem = ctx.ExecutorThread.ActorSystem,
@@ -345,6 +352,10 @@ private:
ctx.ExecutorThread.RegisterActor(new TCountersHostsList(this, ev));
return;
}
+ if (filename.StartsWith("healthcheck")) {
+ ctx.ExecutorThread.RegisterActor(new THealthCheck(this, ev));
+ return;
+ }
// TODO: check path validity
// TODO: cache
if (msg->Request.GetPathInfo().StartsWith('/')) {