diff options
author | andrew-rykov <arykov@ydb.tech> | 2023-01-27 08:48:54 +0300 |
---|---|---|
committer | andrew-rykov <arykov@ydb.tech> | 2023-01-27 08:48:54 +0300 |
commit | 9c98c7691d3bce195fb06c49997951bfa855deb3 (patch) | |
tree | b3496c7aa490707999a40ff1de7ba80892564839 | |
parent | c0df21eadad350aaceb26d82dd2f11384178a45f (diff) | |
download | ydb-9c98c7691d3bce195fb06c49997951bfa855deb3.tar.gz |
PR from branch users/andrew-rykov//prometheus
working version
fix respons
prometheus
-rw-r--r-- | ydb/core/viewer/healthcheck.h | 169 | ||||
-rw-r--r-- | ydb/core/viewer/healthcheck_record.h | 44 | ||||
-rw-r--r-- | ydb/core/viewer/json_handlers_viewer.cpp | 2 | ||||
-rw-r--r-- | ydb/core/viewer/json_healthcheck.h | 52 | ||||
-rw-r--r-- | ydb/core/viewer/viewer.cpp | 11 |
5 files changed, 229 insertions, 49 deletions
diff --git a/ydb/core/viewer/healthcheck.h b/ydb/core/viewer/healthcheck.h new file mode 100644 index 00000000000..d22c36df2fe --- /dev/null +++ b/ydb/core/viewer/healthcheck.h @@ -0,0 +1,169 @@ +#pragma once + +#include <library/cpp/actors/core/actor_bootstrapped.h> +#include <library/cpp/actors/core/interconnect.h> +#include <library/cpp/actors/core/mon.h> +#include <ydb/core/blobstorage/base/blobstorage_events.h> +#include <ydb/core/base/tablet_pipe.h> +#include <ydb/core/protos/services.pb.h> +#include "viewer.h" +#include <library/cpp/monlib/encode/prometheus/prometheus.h> +#include <ydb/core/health_check/health_check.h> +#include <ydb/core/util/proto_duration.h> +#include <util/string/split.h> +#include "healthcheck_record.h" +#include <vector> + +namespace NKikimr { +namespace NViewer { + +using namespace NActors; +using namespace NMonitoring; + +enum HealthCheckResponseFormat { + JSON, + PROMETHEUS +}; + +class THealthCheck : public TActorBootstrapped<THealthCheck> { + static const bool WithRetry = false; + NMon::TEvHttpInfo::TPtr Event; + TJsonSettings JsonSettings; + ui32 Timeout = 0; + HealthCheckResponseFormat Format; + +public: + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { + return NKikimrServices::TActivity::VIEWER_HANDLER; + } + + THealthCheck(IViewer*, NMon::TEvHttpInfo::TPtr& ev) + : Event(ev) + {} + + void Bootstrap(const TActorContext& ctx) { + Format = HealthCheckResponseFormat::JSON; + if (const auto *header = Event->Get()->Request.GetHeaders().FindHeader("Accept")) { + THashSet<TString> accept; + StringSplitter(header->Value()).SplitBySet(", ").SkipEmpty().Collect(&accept); + if (accept.contains("*/*") || accept.contains("application/json")) { + Format = HealthCheckResponseFormat::JSON; + } else if (accept.contains("text/plain")) { + Format = HealthCheckResponseFormat::PROMETHEUS; + } else { + Send(Event->Sender, new NMon::TEvHttpInfoRes(HTTPBADREQUEST_HEADERS, 0, NMon::IEvHttpInfoRes::EContentType::Custom)); + Die(ctx); + } + } + const auto& params(Event->Get()->Request.GetParams()); + if (Format == HealthCheckResponseFormat::JSON) { + JsonSettings.EnumAsNumbers = !FromStringWithDefault<bool>(params.Get("enums"), true); + JsonSettings.UI64AsString = !FromStringWithDefault<bool>(params.Get("ui64"), false); + } + Timeout = FromStringWithDefault<ui32>(params.Get("timeout"), 10000); + THolder<NHealthCheck::TEvSelfCheckRequest> request = MakeHolder<NHealthCheck::TEvSelfCheckRequest>(); + request->Database = params.Get("tenant"); + request->Request.set_return_verbose_status(FromStringWithDefault<bool>(params.Get("verbose"), false)); + request->Request.set_maximum_level(FromStringWithDefault<ui32>(params.Get("max_level"), 0)); + SetDuration(TDuration::MilliSeconds(Timeout), *request->Request.mutable_operation_params()->mutable_operation_timeout()); + if (params.Has("min_status")) { + Ydb::Monitoring::StatusFlag::Status minStatus; + if (Ydb::Monitoring::StatusFlag_Status_Parse(params.Get("min_status"), &minStatus)) { + request->Request.set_minimum_status(minStatus); + } else { + Send(Event->Sender, new NMon::TEvHttpInfoRes(HTTPBADREQUEST, 0, NMon::IEvHttpInfoRes::EContentType::Custom)); + return PassAway(); + } + } + Send(NHealthCheck::MakeHealthCheckID(), request.Release()); + Timeout += Timeout * 20 / 100; // we prefer to wait for more (+20%) verbose timeout status from HC + ctx.Schedule(TDuration::Seconds(10), new TEvents::TEvWakeup()); + Become(&TThis::StateRequestedInfo); + } + + STFUNC(StateRequestedInfo) { + switch (ev->GetTypeRewrite()) { + HFunc(NHealthCheck::TEvSelfCheckResult, Handle); + CFunc(TEvents::TSystem::Wakeup, HandleTimeout); + } + } + + int GetIssueCount(const Ydb::Monitoring::IssueLog& issueLog) { + return issueLog.count() == 0 ? 1 : issueLog.count(); + } + + THolder<THashMap<TMetricRecord, ui32>> GetRecordCounters(NHealthCheck::TEvSelfCheckResult::TPtr& ev) { + const auto *descriptor = Ydb::Monitoring::StatusFlag_Status_descriptor(); + THashMap<TMetricRecord, ui32> recordCounters; + for (auto& log : ev->Get()->Result.issue_log()) { + TMetricRecord record { + .Database = log.location().database().name(), + .Message = log.message(), + .Status = descriptor->FindValueByNumber(log.status())->name(), + .Type = log.type() + }; + + auto it = recordCounters.find(record); + if (it != recordCounters.end()) { + it->second += GetIssueCount(log); + } else { + recordCounters[record] = GetIssueCount(log); + } + } + + return MakeHolder<THashMap<TMetricRecord, ui32>>(recordCounters); + } + + void HandleJSON(NHealthCheck::TEvSelfCheckResult::TPtr& ev, const TActorContext &ctx) { + TStringStream json; + TProtoToJson::ProtoToJson(json, ev->Get()->Result, JsonSettings); + ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(HTTPOKJSON + json.Str(), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); + Die(ctx); + } + + void HandlePrometheus(NHealthCheck::TEvSelfCheckResult::TPtr& ev, const TActorContext &ctx) { + auto recordCounters = GetRecordCounters(ev); + + TStringStream ss; + IMetricEncoderPtr encoder = EncoderPrometheus(&ss); + IMetricEncoder* e = encoder.Get(); + e->OnStreamBegin(); + for (auto& recordCounter : *recordCounters) { + e->OnMetricBegin(EMetricType::IGAUGE); + { + e->OnLabelsBegin(); + e->OnLabel("sensor", "Hc_ydb_ru"); + if (recordCounter.first.Database) { + e->OnLabel("DATABASE", recordCounter.first.Database); + } + e->OnLabel("MESSAGE", recordCounter.first.Message); + e->OnLabel("STATUS", recordCounter.first.Status); + e->OnLabel("TYPE", recordCounter.first.Type); + e->OnLabelsEnd(); + } + e->OnInt64(TInstant::Zero(), recordCounter.second); + e->OnMetricEnd(); + } + + e->OnStreamEnd(); + + ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(HTTPOKTEXT + ss.Str(), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); + Die(ctx); + } + + void Handle(NHealthCheck::TEvSelfCheckResult::TPtr& ev, const TActorContext &ctx) { + if (Format == HealthCheckResponseFormat::JSON) { + HandleJSON(ev, ctx); + } else { + HandlePrometheus(ev, ctx); + } + } + + void HandleTimeout(const TActorContext &ctx) { + Send(Event->Sender, new NMon::TEvHttpInfoRes(HTTPGATEWAYTIMEOUT, 0, NMon::IEvHttpInfoRes::EContentType::Custom)); + Die(ctx); + } +}; + +} +} diff --git a/ydb/core/viewer/healthcheck_record.h b/ydb/core/viewer/healthcheck_record.h new file mode 100644 index 00000000000..39c94e28877 --- /dev/null +++ b/ydb/core/viewer/healthcheck_record.h @@ -0,0 +1,44 @@ +#pragma once + +namespace NKikimr::NViewer { + +using namespace NActors; +using namespace NMonitoring; + +struct TMetricRecord { + TString Database; + TString Message; + TString Status; + TString Type; + + bool operator!=(const TMetricRecord& x) const noexcept { + return !(x == *this); + } + + bool operator==(const TMetricRecord& x) const noexcept { + return this->Database == x.Database && this->Message == x.Message && this->Status == x.Status && this->Type == x.Type; + } + + ui64 Hash() const noexcept { + ui64 hash = std::hash<TString>()(Database); + hash = CombineHashes<ui64>(hash, std::hash<TString>()(Message)); + hash = CombineHashes<ui64>(hash, std::hash<TString>()(Status)); + hash = CombineHashes<ui64>(hash, std::hash<TString>()(Type)); + return hash; + } + + struct THash { + ui64 operator()(const TMetricRecord& record) const noexcept { + return record.Hash(); + } + }; +}; + +} + +template<> +struct THash<NKikimr::NViewer::TMetricRecord> { + inline ui64 operator()(const NKikimr::NViewer::TMetricRecord& x) const noexcept { + return x.Hash(); + } +}; diff --git a/ydb/core/viewer/json_handlers_viewer.cpp b/ydb/core/viewer/json_handlers_viewer.cpp index b3c7954d712..daa64a165e2 100644 --- a/ydb/core/viewer/json_handlers_viewer.cpp +++ b/ydb/core/viewer/json_handlers_viewer.cpp @@ -30,12 +30,10 @@ #include "json_query.h" #include "json_netinfo.h" #include "json_compute.h" -#include "counters_hosts.h" #include "json_healthcheck.h" #include "json_nodes.h" #include "json_acl.h" - namespace NKikimr::NViewer { template <> diff --git a/ydb/core/viewer/json_healthcheck.h b/ydb/core/viewer/json_healthcheck.h index 2f0e5efa736..3bff8c4a466 100644 --- a/ydb/core/viewer/json_healthcheck.h +++ b/ydb/core/viewer/json_healthcheck.h @@ -16,63 +16,21 @@ namespace NViewer { using namespace NActors; class TJsonHealthCheck : public TActorBootstrapped<TJsonHealthCheck> { - static const bool WithRetry = false; - using TBase = TActorBootstrapped<TJsonHealthCheck>; - IViewer* Viewer; NMon::TEvHttpInfo::TPtr Event; - TJsonSettings JsonSettings; - ui32 Timeout = 0; public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::VIEWER_HANDLER; } - TJsonHealthCheck(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) + TJsonHealthCheck(IViewer*, NMon::TEvHttpInfo::TPtr& ev) +// : Viewer(viewer) + : Event(ev) {} void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault<bool>(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault<bool>(params.Get("ui64"), false); - Timeout = FromStringWithDefault<ui32>(params.Get("timeout"), 10000); - THolder<NHealthCheck::TEvSelfCheckRequest> request = MakeHolder<NHealthCheck::TEvSelfCheckRequest>(); - request->Database = params.Get("tenant"); - request->Request.set_return_verbose_status(FromStringWithDefault<bool>(params.Get("verbose"), false)); - request->Request.set_maximum_level(FromStringWithDefault<ui32>(params.Get("max_level"), 0)); - SetDuration(TDuration::MilliSeconds(Timeout), *request->Request.mutable_operation_params()->mutable_operation_timeout()); - if (params.Has("min_status")) { - Ydb::Monitoring::StatusFlag::Status minStatus; - if (Ydb::Monitoring::StatusFlag_Status_Parse(params.Get("min_status"), &minStatus)) { - request->Request.set_minimum_status(minStatus); - } else { - Send(Event->Sender, new NMon::TEvHttpInfoRes(HTTPBADREQUEST, 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - } - Send(NHealthCheck::MakeHealthCheckID(), request.Release()); - Timeout += Timeout * 20 / 100; // we prefer to wait for more (+20%) verbose timeout status from HC - Become(&TThis::StateRequestedInfo, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - STATEFN(StateRequestedInfo) { - switch (ev->GetTypeRewrite()) { - hFunc(NHealthCheck::TEvSelfCheckResult, Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(NHealthCheck::TEvSelfCheckResult::TPtr& ev) { - TStringStream json; - TProtoToJson::ProtoToJson(json, ev->Get()->Result, JsonSettings); - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get()) + json.Str(), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void HandleTimeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); + auto queryString = Event->Get()->Request.GetParams().Print(); + Send(Event->Sender, new NMon::TEvHttpInfoRes("HTTP/1.1 302 Found\r\nLocation: /healthcheck?" + queryString + "\r\n\r\n", 0, NMon::IEvHttpInfoRes::EContentType::Custom)); PassAway(); } }; diff --git a/ydb/core/viewer/viewer.cpp b/ydb/core/viewer/viewer.cpp index 0f87a6e0e9e..60804758cad 100644 --- a/ydb/core/viewer/viewer.cpp +++ b/ydb/core/viewer/viewer.cpp @@ -20,6 +20,7 @@ #include "browse_pq.h" #include "browse_db.h" #include "counters_hosts.h" +#include "healthcheck.h" #include "json_handlers.h" @@ -114,6 +115,12 @@ public: .UseAuth = false, }); mon->RegisterActorPage({ + .RelPath = "healthcheck", + .ActorSystem = ctx.ExecutorThread.ActorSystem, + .ActorId = ctx.SelfID, + .UseAuth = false, + }); + mon->RegisterActorPage({ .Title = "VDisk", .RelPath = "vdisk", .ActorSystem = ctx.ExecutorThread.ActorSystem, @@ -345,6 +352,10 @@ private: ctx.ExecutorThread.RegisterActor(new TCountersHostsList(this, ev)); return; } + if (filename.StartsWith("healthcheck")) { + ctx.ExecutorThread.RegisterActor(new THealthCheck(this, ev)); + return; + } // TODO: check path validity // TODO: cache if (msg->Request.GetPathInfo().StartsWith('/')) { |