diff options
author | Alexey Efimov <[email protected]> | 2022-06-23 22:49:33 +0300 |
---|---|---|
committer | Alexey Efimov <[email protected]> | 2022-06-23 22:49:33 +0300 |
commit | 90ad4334410c5c893687416baf8cfb4598b8c803 (patch) | |
tree | 0479e40e599e3031c0342bc26ce4a608f8383e45 | |
parent | 78180bbdae2be1b31595e56185f5ccfd3aa08a55 (diff) |
add http and grpc handlers for checking node aliveness KIKIMR-12596
ref:b8d4c88f069ede7586f00f87ea1157c896a0253a
-rw-r--r-- | CMakeLists.darwin.txt | 2 | ||||
-rw-r--r-- | CMakeLists.linux.txt | 2 | ||||
-rw-r--r-- | ydb/core/grpc_services/base/base.h | 3 | ||||
-rw-r--r-- | ydb/core/grpc_services/grpc_request_proxy.cpp | 1 | ||||
-rw-r--r-- | ydb/core/grpc_services/grpc_request_proxy.h | 1 | ||||
-rw-r--r-- | ydb/core/grpc_services/rpc_calls.h | 2 | ||||
-rw-r--r-- | ydb/core/grpc_services/rpc_monitoring.cpp | 52 | ||||
-rw-r--r-- | ydb/core/grpc_services/service_monitoring.h | 1 | ||||
-rw-r--r-- | ydb/core/health_check/CMakeLists.txt | 1 | ||||
-rw-r--r-- | ydb/core/health_check/health_check.cpp | 227 | ||||
-rw-r--r-- | ydb/core/health_check/health_check.h | 5 | ||||
-rw-r--r-- | ydb/public/api/grpc/ydb_monitoring_v1.proto | 2 | ||||
-rw-r--r-- | ydb/public/api/protos/ydb_monitoring.proto | 9 | ||||
-rw-r--r-- | ydb/services/monitoring/grpc_service.cpp | 22 |
14 files changed, 321 insertions, 9 deletions
diff --git a/CMakeLists.darwin.txt b/CMakeLists.darwin.txt index 590f1c8bfeb..e70ca42a069 100644 --- a/CMakeLists.darwin.txt +++ b/CMakeLists.darwin.txt @@ -534,6 +534,7 @@ add_subdirectory(ydb/core/grpc_services/base) add_subdirectory(ydb/core/grpc_streaming) add_subdirectory(ydb/public/sdk/cpp/client/resources) add_subdirectory(ydb/core/health_check) +add_subdirectory(ydb/public/api/grpc) add_subdirectory(ydb/core/io_formats) add_subdirectory(ydb/core/kesus/tablet) add_subdirectory(ydb/core/metering) @@ -577,7 +578,6 @@ add_subdirectory(library/cpp/xml/init) add_subdirectory(contrib/libs/libxml) add_subdirectory(library/cpp/string_utils/ztstrbuf) add_subdirectory(ydb/public/lib/deprecated/kicli) -add_subdirectory(ydb/public/api/grpc) add_subdirectory(ydb/public/lib/deprecated/client) add_subdirectory(ydb/public/lib/value) add_subdirectory(ydb/library/yql/dq/actors/compute) diff --git a/CMakeLists.linux.txt b/CMakeLists.linux.txt index 7f67be46cbe..e2fe7e6fda7 100644 --- a/CMakeLists.linux.txt +++ b/CMakeLists.linux.txt @@ -614,6 +614,7 @@ add_subdirectory(ydb/core/grpc_services/base) add_subdirectory(ydb/core/grpc_streaming) add_subdirectory(ydb/public/sdk/cpp/client/resources) add_subdirectory(ydb/core/health_check) +add_subdirectory(ydb/public/api/grpc) add_subdirectory(ydb/core/io_formats) add_subdirectory(ydb/core/kesus/tablet) add_subdirectory(ydb/core/metering) @@ -657,7 +658,6 @@ add_subdirectory(library/cpp/xml/init) add_subdirectory(contrib/libs/libxml) add_subdirectory(library/cpp/string_utils/ztstrbuf) add_subdirectory(ydb/public/lib/deprecated/kicli) -add_subdirectory(ydb/public/api/grpc) add_subdirectory(ydb/public/lib/deprecated/client) add_subdirectory(ydb/public/lib/value) add_subdirectory(ydb/library/yql/dq/actors/compute) diff --git a/ydb/core/grpc_services/base/base.h b/ydb/core/grpc_services/base/base.h index 9882c40783e..f1c6721b389 100644 --- a/ydb/core/grpc_services/base/base.h +++ b/ydb/core/grpc_services/base/base.h @@ -215,7 +215,8 @@ struct TRpcServices { EvListYndxRateLimiterResources, EvDescribeYndxRateLimiterResource, EvAcquireYndxRateLimiterResource, - EvGrpcRuntimeRequest // !!! DO NOT ADD NEW REQUEST !!! + EvGrpcRuntimeRequest, + EvNodeCheckRequest // !!! DO NOT ADD NEW REQUEST !!! }; struct TEvGrpcNextReply : public TEventLocal<TEvGrpcNextReply, TRpcServices::EvGrpcStreamIsReady> { diff --git a/ydb/core/grpc_services/grpc_request_proxy.cpp b/ydb/core/grpc_services/grpc_request_proxy.cpp index 5a5d852eb50..36b00c5f246 100644 --- a/ydb/core/grpc_services/grpc_request_proxy.cpp +++ b/ydb/core/grpc_services/grpc_request_proxy.cpp @@ -594,6 +594,7 @@ void TGRpcRequestProxyImpl::StateFunc(TAutoPtr<IEventHandle>& ev, const TActorCo HFunc(TEvCreateTopicRequest, PreHandle); HFunc(TEvAlterTopicRequest, PreHandle); HFunc(TEvDescribeTopicRequest, PreHandle); + HFunc(TEvNodeCheckRequest, PreHandle); HFunc(TEvProxyRuntimeEvent, PreHandle); diff --git a/ydb/core/grpc_services/grpc_request_proxy.h b/ydb/core/grpc_services/grpc_request_proxy.h index a28b109aedf..cdeb0d3dd38 100644 --- a/ydb/core/grpc_services/grpc_request_proxy.h +++ b/ydb/core/grpc_services/grpc_request_proxy.h @@ -65,6 +65,7 @@ protected: void Handle(TEvPQDescribeTopicRequest::TPtr& ev, const TActorContext& ctx); void Handle(TEvDiscoverPQClustersRequest::TPtr& ev, const TActorContext& ctx); void Handle(TEvLoginRequest::TPtr& ev, const TActorContext& ctx); + void Handle(TEvNodeCheckRequest::TPtr& ev, const TActorContext& ctx); void Handle(TEvCoordinationSessionRequest::TPtr& ev, const TActorContext& ctx); void Handle(TEvDropTopicRequest::TPtr& ev, const TActorContext& ctx); void Handle(TEvCreateTopicRequest::TPtr& ev, const TActorContext& ctx); diff --git a/ydb/core/grpc_services/rpc_calls.h b/ydb/core/grpc_services/rpc_calls.h index 20d18efd6b2..52696e627f1 100644 --- a/ydb/core/grpc_services/rpc_calls.h +++ b/ydb/core/grpc_services/rpc_calls.h @@ -11,6 +11,7 @@ #include <ydb/public/api/protos/ydb_coordination.pb.h> #include <ydb/public/api/protos/ydb_discovery.pb.h> #include <ydb/public/api/protos/ydb_experimental.pb.h> +#include <ydb/public/api/protos/ydb_monitoring.pb.h> #include <ydb/public/api/protos/ydb_status_codes.pb.h> #include <ydb/public/api/protos/ydb_table.pb.h> #include <ydb/public/api/protos/ydb_s3_internal.pb.h> @@ -75,6 +76,7 @@ using TEvDescribeTopicRequest = TGRpcRequestValidationWrapper<TRpcServices::EvDe using TEvDiscoverPQClustersRequest = TGRpcRequestWrapper<TRpcServices::EvDiscoverPQClusters, Ydb::PersQueue::ClusterDiscovery::DiscoverClustersRequest, Ydb::PersQueue::ClusterDiscovery::DiscoverClustersResponse, true>; using TEvLoginRequest = TGRpcRequestWrapperNoAuth<TRpcServices::EvLogin, Ydb::Auth::LoginRequest, Ydb::Auth::LoginResponse>; +using TEvNodeCheckRequest = TGRpcRequestWrapperNoAuth<TRpcServices::EvNodeCheckRequest, Ydb::Monitoring::NodeCheckRequest, Ydb::Monitoring::NodeCheckResponse>; using TEvCoordinationSessionRequest = TGRpcRequestBiStreamWrapper<TRpcServices::EvCoordinationSession, Ydb::Coordination::SessionRequest, Ydb::Coordination::SessionResponse>; diff --git a/ydb/core/grpc_services/rpc_monitoring.cpp b/ydb/core/grpc_services/rpc_monitoring.cpp index d551da2fb76..fb79c647970 100644 --- a/ydb/core/grpc_services/rpc_monitoring.cpp +++ b/ydb/core/grpc_services/rpc_monitoring.cpp @@ -76,5 +76,57 @@ void DoSelfCheckRequest(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvide TActivationContext::AsActorContext().Register(new TSelfCheckRPC(p.release())); } +class TNodeCheckRPC : public TRpcRequestActor<TNodeCheckRPC, TEvNodeCheckRequest, true> { +public: + using TRpcRequestActor::TRpcRequestActor; + + THolder<NHealthCheck::TEvSelfCheckResult> Result; + Ydb::StatusIds_StatusCode Status = Ydb::StatusIds::SUCCESS; + + void Bootstrap() { + THolder<NHealthCheck::TEvNodeCheckRequest> request = MakeHolder<NHealthCheck::TEvNodeCheckRequest>(); + request->Request = *GetProtoRequest(); + Send(NHealthCheck::MakeHealthCheckID(), request.Release()); + Become(&TThis::StateWait); + } + + STATEFN(StateWait) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvents::TEvUndelivered, Handle); + hFunc(NHealthCheck::TEvSelfCheckResult, Handle); + } + } + + void Handle(NHealthCheck::TEvSelfCheckResult::TPtr& ev) { + Status = Ydb::StatusIds::SUCCESS; + Result = ev->Release(); + ReplyAndPassAway(); + } + + void Handle(TEvents::TEvUndelivered::TPtr&) { + Status = Ydb::StatusIds::UNAVAILABLE; + ReplyAndPassAway(); + } + + void ReplyAndPassAway() { + TResponse response; + Ydb::Operations::Operation& operation = *response.mutable_operation(); + operation.set_ready(true); + operation.set_status(Status); + if (Result) { + operation.mutable_result()->PackFrom(Result->Result); + } + return Reply(response); + } +}; + +// void DoNodeCheckRequest(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&) { +// TActivationContext::AsActorContext().Register(new TNodeCheckRPC(p.release())); +// } + +void TGRpcRequestProxy::Handle(TEvNodeCheckRequest::TPtr& ev, const TActorContext& ctx) { + ctx.Register(new TNodeCheckRPC(ev->Release().Release())); +} + } // namespace NGRpcService } // namespace NKikimr diff --git a/ydb/core/grpc_services/service_monitoring.h b/ydb/core/grpc_services/service_monitoring.h index 6ac2928d1af..ef7aa0bfe0a 100644 --- a/ydb/core/grpc_services/service_monitoring.h +++ b/ydb/core/grpc_services/service_monitoring.h @@ -9,6 +9,7 @@ class IRequestOpCtx; class IFacilityProvider; void DoSelfCheckRequest(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&); +//void DoNodeCheckRequest(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider&); } } diff --git a/ydb/core/health_check/CMakeLists.txt b/ydb/core/health_check/CMakeLists.txt index e40df8f5b25..70b680e345d 100644 --- a/ydb/core/health_check/CMakeLists.txt +++ b/ydb/core/health_check/CMakeLists.txt @@ -16,6 +16,7 @@ target_link_libraries(ydb-core-health_check PUBLIC core-blobstorage-base ydb-library-aclib api-protos + api-grpc public-issue-protos ) target_sources(ydb-core-health_check PRIVATE diff --git a/ydb/core/health_check/health_check.cpp b/ydb/core/health_check/health_check.cpp index 5afb541558d..7914e9681f2 100644 --- a/ydb/core/health_check/health_check.cpp +++ b/ydb/core/health_check/health_check.cpp @@ -7,12 +7,15 @@ #include <library/cpp/actors/core/hfunc.h> #include <library/cpp/actors/interconnect/interconnect.h> #include <library/cpp/digest/old_crc/crc.h> +#include <library/cpp/protobuf/json/proto2json.h> +#include <library/cpp/grpc/client/grpc_client_low.h> #include <util/random/shuffle.h> #include <ydb/core/base/hive.h> #include <ydb/core/base/path.h> #include <ydb/core/base/tablet_pipe.h> +#include <ydb/core/mon/mon.h> #include <ydb/core/blobstorage/base/blobstorage_events.h> #include <ydb/core/cms/console/console.h> #include <ydb/core/mind/tenant_slot_broker.h> @@ -22,6 +25,8 @@ #include <ydb/core/util/proto_duration.h> #include <ydb/core/util/tuples.h> +#include <ydb/public/api/grpc/ydb_monitoring_v1.grpc.pb.h> + static decltype(auto) make_vslot_tuple(const NKikimrBlobStorage::TVSlotId& id) { return std::make_tuple(id.GetNodeId(), id.GetPDiskId(), id.GetVSlotId()); } @@ -1932,22 +1937,238 @@ public: } }; -class THealthCheckService : public TActor<THealthCheckService> { +template<typename RequestType> +class TNodeCheckRequest : public TActorBootstrapped<TNodeCheckRequest<RequestType>> { +public: + using TBase = TActorBootstrapped<TNodeCheckRequest<RequestType>>; + using TThis = TNodeCheckRequest<RequestType>; + + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::MONITORING_REQUEST; } + + struct TEvPrivate { + enum EEv { + EvResult = EventSpaceBegin(TEvents::ES_PRIVATE), + EvError, + EvEnd + }; + + static_assert(EvEnd < EventSpaceEnd(TEvents::ES_PRIVATE), "expected EvEnd < EventSpaceEnd"); + + struct TEvResult : TEventLocal<TEvResult, EvResult> { + Ydb::Monitoring::NodeCheckResponse Response; + + TEvResult(Ydb::Monitoring::NodeCheckResponse&& response) + : Response(std::move(response)) + {} + }; + + struct TEvError : TEventLocal<TEvError, EvError> { + NGrpc::TGrpcStatus Status; + + TEvError(NGrpc::TGrpcStatus&& status) + : Status(std::move(status)) + {} + }; + }; + + TDuration Timeout = TDuration::MilliSeconds(10000); + std::shared_ptr<NGrpc::TGRpcClientLow> GRpcClientLow; + TActorId Sender; + THolder<RequestType> Request; + ui64 Cookie; + Ydb::Monitoring::SelfCheckResult Result; + + TNodeCheckRequest(std::shared_ptr<NGrpc::TGRpcClientLow> grpcClient, const TActorId& sender, THolder<RequestType> request, ui64 cookie) + : GRpcClientLow(grpcClient) + , Sender(sender) + , Request(std::move(request)) + , Cookie(cookie) + { + Result.set_self_check_result(Ydb::Monitoring::SelfCheck_Result::SelfCheck_Result_UNSPECIFIED); + } + + void Bootstrap(); + + void AddIssue(Ydb::Monitoring::StatusFlag::Status status, const TString& message) { + auto* issue = Result.add_issue_log(); + issue->set_id(std::to_string(Result.issue_log_size())); + issue->set_status(status); + issue->set_message(message); + } + + void Handle(NNodeWhiteboard::TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) { + NGrpc::TGRpcClientConfig config; + for (const auto& systemStateInfo : ev->Get()->Record.GetSystemStateInfo()) { + for (const auto& endpoint : systemStateInfo.GetEndpoints()) { + if (endpoint.GetName() == "grpc") { + config.Locator = "localhost" + endpoint.GetAddress(); + break; + } else if (endpoint.GetName() == "grpcs") { + config.Locator = "localhost" + endpoint.GetAddress(); + config.EnableSsl = true; + break; + } + } + break; + } + if (!config.Locator) { + AddIssue(Ydb::Monitoring::StatusFlag::RED, "Couldn't find local gRPC endpoint"); + ReplyAndPassAway(); + } + NActors::TActorSystem* actorSystem = TlsActivationContext->ActorSystem(); + NActors::TActorId actorId = TBase::SelfId(); + Ydb::Monitoring::NodeCheckRequest request; + NGrpc::TResponseCallback<Ydb::Monitoring::NodeCheckResponse> responseCb = + [actorId, actorSystem, context = GRpcClientLow->CreateContext()](NGrpc::TGrpcStatus&& status, Ydb::Monitoring::NodeCheckResponse&& response) -> void { + if (status.Ok()) { + actorSystem->Send(actorId, new typename TEvPrivate::TEvResult(std::move(response))); + } else { + actorSystem->Send(actorId, new typename TEvPrivate::TEvError(std::move(status))); + } + }; + NGrpc::TCallMeta meta; + meta.Timeout = Timeout; + auto service = GRpcClientLow->CreateGRpcServiceConnection<::Ydb::Monitoring::V1::MonitoringService>(config); + service->DoRequest(request, std::move(responseCb), &Ydb::Monitoring::V1::MonitoringService::Stub::AsyncNodeCheck, meta); + } + + void Handle(typename TEvPrivate::TEvResult::TPtr& ev) { + auto& operation(ev->Get()->Response.operation()); + if (operation.ready() && operation.status() == Ydb::StatusIds::SUCCESS) { + operation.result().UnpackTo(&Result); + } else { + Result.set_self_check_result(Ydb::Monitoring::SelfCheck_Result::SelfCheck_Result_MAINTENANCE_REQUIRED); + AddIssue(Ydb::Monitoring::StatusFlag::RED, "Local gRPC returned error"); + } + ReplyAndPassAway(); + } + + void Handle(typename TEvPrivate::TEvError::TPtr& ev) { + Result.set_self_check_result(Ydb::Monitoring::SelfCheck_Result::SelfCheck_Result_MAINTENANCE_REQUIRED); + AddIssue(Ydb::Monitoring::StatusFlag::RED, "Local gRPC request failed"); + Y_UNUSED(ev); + ReplyAndPassAway(); + } + + void HandleTimeout() { + Result.set_self_check_result(Ydb::Monitoring::SelfCheck_Result::SelfCheck_Result_MAINTENANCE_REQUIRED); + AddIssue(Ydb::Monitoring::StatusFlag::RED, "Timeout"); + ReplyAndPassAway(); + } + + void StateWork(TAutoPtr<NActors::IEventHandle>& ev, const TActorContext&) { + switch (ev->GetTypeRewrite()) { + hFunc(NNodeWhiteboard::TEvWhiteboard::TEvSystemStateResponse, Handle); + hFunc(TEvPrivate::TEvResult, Handle); + hFunc(TEvPrivate::TEvError, Handle); + cFunc(TEvents::TSystem::Wakeup, HandleTimeout); + } + } + + void FillResult(Ydb::Monitoring::SelfCheckResult& result) { + result = std::move(Result); + } + + void ReplyAndPassAway(); +}; + +template<> +void TNodeCheckRequest<TEvNodeCheckRequest>::ReplyAndPassAway() { + THolder<TEvSelfCheckResult> response = MakeHolder<TEvSelfCheckResult>(); + Ydb::Monitoring::SelfCheckResult& result = response->Result; + FillResult(result); + Send(Sender, response.Release(), 0, Cookie); + PassAway(); +} + +template<> +void TNodeCheckRequest<NMon::TEvHttpInfo>::ReplyAndPassAway() { + static const char HTTPJSON_GOOD[] = "HTTP/1.1 200 Ok\r\nContent-Type: application/json\r\n\r\n"; + static const char HTTPJSON_NOT_GOOD[] = "HTTP/1.1 500 Failed\r\nContent-Type: application/json\r\n\r\n"; + + Ydb::Monitoring::SelfCheckResult result; + FillResult(result); + auto config = NProtobufJson::TProto2JsonConfig() + .SetFormatOutput(false) + .SetEnumMode(NProtobufJson::TProto2JsonConfig::EnumName); + TStringStream json; + if (result.self_check_result() == Ydb::Monitoring::SelfCheck_Result::SelfCheck_Result_GOOD) { + json << HTTPJSON_GOOD; + } else { + json << HTTPJSON_NOT_GOOD; + } + NProtobufJson::Proto2Json(result, json, config); + Send(Sender, new NMon::TEvHttpInfoRes(json.Str(), 0, NMon::IEvHttpInfoRes::EContentType::Custom), 0, Cookie); + PassAway(); +} + +template<> +void TNodeCheckRequest<TEvNodeCheckRequest>::Bootstrap() { + if (Request->Request.operation_params().has_operation_timeout()) { + Timeout = GetDuration(Request->Request.operation_params().operation_timeout()); + } + Result.set_self_check_result(Ydb::Monitoring::SelfCheck_Result::SelfCheck_Result_GOOD); + ReplyAndPassAway(); +} + +template<> +void TNodeCheckRequest<NMon::TEvHttpInfo>::Bootstrap() { + TActorId whiteboardServiceId = NNodeWhiteboard::MakeNodeWhiteboardServiceId(TBase::SelfId().NodeId()); + TBase::Send(whiteboardServiceId, new NNodeWhiteboard::TEvWhiteboard::TEvSystemStateRequest()); + const auto& params(Request->Request.GetParams()); + Timeout = TDuration::MilliSeconds(FromStringWithDefault<ui32>(params.Get("timeout"), Timeout.MilliSeconds())); + TBase::Become(&TThis::StateWork, Timeout, new TEvents::TEvWakeup()); +} + +class THealthCheckService : public TActorBootstrapped<THealthCheckService> { public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::MONITORING_SERVICE; } THealthCheckService() - : TActor<THealthCheckService>(&THealthCheckService::StateWork) { } + void Bootstrap() { + TMon* mon = AppData()->Mon; + if (mon) { + mon->RegisterActorPage({ + .RelPath = "status", + .ActorSystem = TlsActivationContext->ExecutorThread.ActorSystem, + .ActorId = SelfId(), + }); + } + Become(&THealthCheckService::StateWork); + } + void Handle(TEvSelfCheckRequest::TPtr& ev) { - RegisterWithSameMailbox(new TSelfCheckRequest(ev->Sender, ev.Get()->Release(), ev->Cookie)); + Register(new TSelfCheckRequest(ev->Sender, ev.Get()->Release(), ev->Cookie)); + } + + std::shared_ptr<NGrpc::TGRpcClientLow> GRpcClientLow; + + void Handle(TEvNodeCheckRequest::TPtr& ev) { + if (!GRpcClientLow) { + GRpcClientLow = std::make_shared<NGrpc::TGRpcClientLow>(); + } + Register(new TNodeCheckRequest<TEvNodeCheckRequest>(GRpcClientLow, ev->Sender, ev.Get()->Release(), ev->Cookie)); + } + + void Handle(NMon::TEvHttpInfo::TPtr& ev) { + if (ev->Get()->Request.GetPath() == "/status") { + if (!GRpcClientLow) { + GRpcClientLow = std::make_shared<NGrpc::TGRpcClientLow>(); + } + Register(new TNodeCheckRequest<NMon::TEvHttpInfo>(GRpcClientLow, ev->Sender, ev.Get()->Release(), ev->Cookie)); + } else { + Send(ev->Sender, new NMon::TEvHttpInfoRes(NMonitoring::HTTPNOTFOUND, 0, NMon::IEvHttpInfoRes::EContentType::Custom), 0, ev->Cookie); + } } void StateWork(TAutoPtr<NActors::IEventHandle>& ev, const TActorContext&) { switch (ev->GetTypeRewrite()) { hFunc(TEvSelfCheckRequest, Handle); + hFunc(TEvNodeCheckRequest, Handle); + hFunc(NMon::TEvHttpInfo, Handle); cFunc(TEvents::TSystem::PoisonPill, PassAway); } } diff --git a/ydb/core/health_check/health_check.h b/ydb/core/health_check/health_check.h index 5708a4e974a..4f6e566566b 100644 --- a/ydb/core/health_check/health_check.h +++ b/ydb/core/health_check/health_check.h @@ -9,6 +9,7 @@ namespace NHealthCheck { enum EEv { // requests EvSelfCheckRequest = EventSpaceBegin(TKikimrEvents::ES_HEALTH_CHECK), + EvNodeCheckRequest, // replies EvSelfCheckResult = EvSelfCheckRequest + 512, @@ -23,6 +24,10 @@ struct TEvSelfCheckRequest : TEventLocal<TEvSelfCheckRequest, EvSelfCheckRequest TString Database; }; +struct TEvNodeCheckRequest : TEventLocal<TEvNodeCheckRequest, EvNodeCheckRequest> { + Ydb::Monitoring::NodeCheckRequest Request; +}; + struct TEvSelfCheckResult : TEventLocal<TEvSelfCheckResult, EvSelfCheckResult> { Ydb::Monitoring::SelfCheckResult Result; }; diff --git a/ydb/public/api/grpc/ydb_monitoring_v1.proto b/ydb/public/api/grpc/ydb_monitoring_v1.proto index 36f739c8a26..02580b91afe 100644 --- a/ydb/public/api/grpc/ydb_monitoring_v1.proto +++ b/ydb/public/api/grpc/ydb_monitoring_v1.proto @@ -8,4 +8,6 @@ import "ydb/public/api/protos/ydb_monitoring.proto"; service MonitoringService { // Gets the health status of the database. rpc SelfCheck(Monitoring.SelfCheckRequest) returns (Monitoring.SelfCheckResponse); + // Checks current node health + rpc NodeCheck(Monitoring.NodeCheckRequest) returns (Monitoring.NodeCheckResponse); } diff --git a/ydb/public/api/protos/ydb_monitoring.proto b/ydb/public/api/protos/ydb_monitoring.proto index 85917cc0cf3..d279e415d4a 100644 --- a/ydb/public/api/protos/ydb_monitoring.proto +++ b/ydb/public/api/protos/ydb_monitoring.proto @@ -34,6 +34,15 @@ message SelfCheckResponse { Ydb.Operations.Operation operation = 1; } +message NodeCheckRequest { + Ydb.Operations.OperationParams operation_params = 1; // basic operation params, including timeout +} + +message NodeCheckResponse { + // After successfull completion must contain SelfCheckResult. + Ydb.Operations.Operation operation = 1; +} + message SelfCheck { // Describes the result of self-check performed. enum Result { diff --git a/ydb/services/monitoring/grpc_service.cpp b/ydb/services/monitoring/grpc_service.cpp index c7fe67c7415..0c92b9f0fb3 100644 --- a/ydb/services/monitoring/grpc_service.cpp +++ b/ydb/services/monitoring/grpc_service.cpp @@ -4,6 +4,8 @@ #include <ydb/core/grpc_services/service_monitoring.h> #include <ydb/core/grpc_services/base/base.h> +#include <ydb/core/grpc_services/rpc_calls.h> + namespace NKikimr { namespace NGRpcService { @@ -49,7 +51,7 @@ void TGRpcMonitoringService::SetupIncomingRequests(NGrpc::TLoggerPtr logger) { #ifdef ADD_REQUEST #error ADD_REQUEST macro already defined #endif -#define ADD_REQUEST(NAME, CB) \ +#define ADD_REQUEST_NEW(NAME, CB) \ MakeIntrusive<TGRpcRequest<Monitoring::NAME##Request, Monitoring::NAME##Response, TGRpcMonitoringService>> \ (this, &Service_, CQ_, \ [this](NGrpc::IRequestContextBase *ctx) { \ @@ -60,9 +62,23 @@ void TGRpcMonitoringService::SetupIncomingRequests(NGrpc::TLoggerPtr logger) { }, &Ydb::Monitoring::V1::MonitoringService::AsyncService::Request ## NAME, \ #NAME, logger, getCounterBlock("monitoring", #NAME))->Run(); - ADD_REQUEST(SelfCheck, DoSelfCheckRequest) + ADD_REQUEST_NEW(SelfCheck, DoSelfCheckRequest); + +#define ADD_REQUEST_OLD(NAME, IN, OUT, ACTION) \ + MakeIntrusive<TGRpcRequest<Ydb::Monitoring::IN, Ydb::Monitoring::OUT, TGRpcMonitoringService>>(this, &Service_, CQ_, \ + [this](NGrpc::IRequestContextBase* reqCtx) { \ + NGRpcService::ReportGrpcReqToMon(*ActorSystem_, reqCtx->GetPeer(), GetSdkBuildInfo(reqCtx)); \ + ACTION; \ + }, &Ydb::Monitoring::V1::MonitoringService::AsyncService::Request ## NAME, \ + #NAME, logger, getCounterBlock("monitoring", #NAME))->Run(); + + ADD_REQUEST_OLD(NodeCheck, NodeCheckRequest, NodeCheckResponse, { + ActorSystem_->Send(GRpcRequestProxyId_, new TEvNodeCheckRequest(reqCtx)); + }); + -#undef ADD_REQUEST +#undef ADD_REQUEST_NEW +#undef ADD_REQUEST_OLD } } // namespace NGRpcService |