summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorilnaz <[email protected]>2023-06-14 18:49:37 +0300
committerilnaz <[email protected]>2023-06-14 18:49:37 +0300
commit3f3f15f2e06a1f183d72c229cacf1d0f8949aa57 (patch)
tree48bdaa3b6f1ce0a3a21f38cf5ad4afab4b2c121c
parentc35c89f6e630781a97a2384c5783d2cbbe10d588 (diff)
Maintenance service api implementation
-rw-r--r--ydb/core/cms/CMakeLists.darwin-x86_64.txt1
-rw-r--r--ydb/core/cms/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/core/cms/CMakeLists.linux-x86_64.txt1
-rw-r--r--ydb/core/cms/CMakeLists.windows-x86_64.txt1
-rw-r--r--ydb/core/cms/api_adapters.cpp773
-rw-r--r--ydb/core/cms/cms.cpp19
-rw-r--r--ydb/core/cms/cms.h84
-rw-r--r--ydb/core/cms/cms_impl.h22
-rw-r--r--ydb/core/cms/cms_state.h5
-rw-r--r--ydb/core/cms/cms_tx_load_state.cpp40
-rw-r--r--ydb/core/cms/cms_tx_remove_permissions.cpp5
-rw-r--r--ydb/core/cms/cms_tx_remove_task.cpp4
-rw-r--r--ydb/core/cms/cms_tx_store_permissions.cpp30
-rw-r--r--ydb/core/cms/scheme.h13
-rw-r--r--ydb/core/cms/ya.make1
-rw-r--r--ydb/core/driver_lib/run/CMakeLists.darwin-x86_64.txt1
-rw-r--r--ydb/core/driver_lib/run/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/core/driver_lib/run/CMakeLists.linux-x86_64.txt1
-rw-r--r--ydb/core/driver_lib/run/CMakeLists.windows-x86_64.txt1
-rw-r--r--ydb/core/driver_lib/run/run.cpp8
-rw-r--r--ydb/core/driver_lib/run/ya.make1
-rw-r--r--ydb/core/grpc_services/CMakeLists.darwin-x86_64.txt1
-rw-r--r--ydb/core/grpc_services/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/core/grpc_services/CMakeLists.linux-x86_64.txt1
-rw-r--r--ydb/core/grpc_services/CMakeLists.windows-x86_64.txt1
-rw-r--r--ydb/core/grpc_services/rpc_maintenance.cpp168
-rw-r--r--ydb/core/grpc_services/service_maintenance.h18
-rw-r--r--ydb/core/grpc_services/ya.make1
-rw-r--r--ydb/core/protos/CMakeLists.darwin-x86_64.txt13
-rw-r--r--ydb/core/protos/CMakeLists.linux-aarch64.txt13
-rw-r--r--ydb/core/protos/CMakeLists.linux-x86_64.txt13
-rw-r--r--ydb/core/protos/CMakeLists.windows-x86_64.txt13
-rw-r--r--ydb/core/protos/cms.proto1
-rw-r--r--ydb/core/protos/maintenance.proto70
-rw-r--r--ydb/core/protos/services.proto1
-rw-r--r--ydb/core/protos/ya.make1
-rw-r--r--ydb/public/api/grpc/draft/ydb_maintenance_v1.proto31
-rw-r--r--ydb/public/api/protos/draft/ydb_maintenance.proto382
-rw-r--r--ydb/services/CMakeLists.txt1
-rw-r--r--ydb/services/maintenance/CMakeLists.darwin-x86_64.txt21
-rw-r--r--ydb/services/maintenance/CMakeLists.linux-aarch64.txt22
-rw-r--r--ydb/services/maintenance/CMakeLists.linux-x86_64.txt22
-rw-r--r--ydb/services/maintenance/CMakeLists.txt17
-rw-r--r--ydb/services/maintenance/CMakeLists.windows-x86_64.txt21
-rw-r--r--ydb/services/maintenance/grpc_service.cpp43
-rw-r--r--ydb/services/maintenance/grpc_service.h18
-rw-r--r--ydb/services/maintenance/ya.make14
-rw-r--r--ydb/services/ya.make1
48 files changed, 1665 insertions, 257 deletions
diff --git a/ydb/core/cms/CMakeLists.darwin-x86_64.txt b/ydb/core/cms/CMakeLists.darwin-x86_64.txt
index 6467f87857e..5af4c829e07 100644
--- a/ydb/core/cms/CMakeLists.darwin-x86_64.txt
+++ b/ydb/core/cms/CMakeLists.darwin-x86_64.txt
@@ -51,6 +51,7 @@ target_link_libraries(ydb-core-cms PUBLIC
tools-enum_parser-enum_serialization_runtime
)
target_sources(ydb-core-cms PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/cms/api_adapters.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/audit_log.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cluster_info.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cms.cpp
diff --git a/ydb/core/cms/CMakeLists.linux-aarch64.txt b/ydb/core/cms/CMakeLists.linux-aarch64.txt
index 94b7c7387b3..3b403d7d220 100644
--- a/ydb/core/cms/CMakeLists.linux-aarch64.txt
+++ b/ydb/core/cms/CMakeLists.linux-aarch64.txt
@@ -52,6 +52,7 @@ target_link_libraries(ydb-core-cms PUBLIC
tools-enum_parser-enum_serialization_runtime
)
target_sources(ydb-core-cms PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/cms/api_adapters.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/audit_log.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cluster_info.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cms.cpp
diff --git a/ydb/core/cms/CMakeLists.linux-x86_64.txt b/ydb/core/cms/CMakeLists.linux-x86_64.txt
index 94b7c7387b3..3b403d7d220 100644
--- a/ydb/core/cms/CMakeLists.linux-x86_64.txt
+++ b/ydb/core/cms/CMakeLists.linux-x86_64.txt
@@ -52,6 +52,7 @@ target_link_libraries(ydb-core-cms PUBLIC
tools-enum_parser-enum_serialization_runtime
)
target_sources(ydb-core-cms PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/cms/api_adapters.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/audit_log.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cluster_info.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cms.cpp
diff --git a/ydb/core/cms/CMakeLists.windows-x86_64.txt b/ydb/core/cms/CMakeLists.windows-x86_64.txt
index 6467f87857e..5af4c829e07 100644
--- a/ydb/core/cms/CMakeLists.windows-x86_64.txt
+++ b/ydb/core/cms/CMakeLists.windows-x86_64.txt
@@ -51,6 +51,7 @@ target_link_libraries(ydb-core-cms PUBLIC
tools-enum_parser-enum_serialization_runtime
)
target_sources(ydb-core-cms PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/cms/api_adapters.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/audit_log.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cluster_info.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cms.cpp
diff --git a/ydb/core/cms/api_adapters.cpp b/ydb/core/cms/api_adapters.cpp
new file mode 100644
index 00000000000..3833d2426fe
--- /dev/null
+++ b/ydb/core/cms/api_adapters.cpp
@@ -0,0 +1,773 @@
+#include "cms_impl.h"
+
+#include <ydb/core/util/yverify_stream.h>
+#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h>
+#include <ydb/library/yql/public/issue/protos/issue_severity.pb.h>
+
+#include <library/cpp/actors/core/actor_bootstrapped.h>
+#include <library/cpp/actors/core/hfunc.h>
+
+#include <google/protobuf/util/time_util.h>
+
+#include <util/string/cast.h>
+
+namespace NKikimr::NCms {
+
+using TimeUtil = ::google::protobuf::util::TimeUtil;
+
+namespace {
+
+ NKikimrCms::EAvailabilityMode ConvertAvailabilityMode(Ydb::Maintenance::AvailabilityMode mode) {
+ switch (mode) {
+ case Ydb::Maintenance::AVAILABILITY_MODE_STRONG:
+ return NKikimrCms::MODE_MAX_AVAILABILITY;
+ case Ydb::Maintenance::AVAILABILITY_MODE_WEAK:
+ return NKikimrCms::MODE_KEEP_AVAILABLE;
+ case Ydb::Maintenance::AVAILABILITY_MODE_FORCE:
+ return NKikimrCms::MODE_FORCE_RESTART;
+ default:
+ Y_FAIL("unreachable");
+ }
+ }
+
+ Ydb::Maintenance::AvailabilityMode ConvertAvailabilityMode(NKikimrCms::EAvailabilityMode mode) {
+ switch (mode) {
+ case NKikimrCms::MODE_MAX_AVAILABILITY:
+ return Ydb::Maintenance::AVAILABILITY_MODE_STRONG;
+ case NKikimrCms::MODE_KEEP_AVAILABLE:
+ return Ydb::Maintenance::AVAILABILITY_MODE_WEAK;
+ case NKikimrCms::MODE_FORCE_RESTART:
+ return Ydb::Maintenance::AVAILABILITY_MODE_FORCE;
+ default:
+ Y_FAIL("unreachable");
+ }
+ }
+
+ void ConvertAction(const NKikimrCms::TAction& cmsAction, Ydb::Maintenance::LockAction& action) {
+ *action.mutable_duration() = TimeUtil::MicrosecondsToDuration(cmsAction.GetDuration());
+
+ ui32 nodeId;
+ if (TryFromString(cmsAction.GetHost(), nodeId)) {
+ action.mutable_scope()->set_node_id(nodeId);
+ } else {
+ action.mutable_scope()->set_host(cmsAction.GetHost());
+ }
+ }
+
+ void ConvertAction(const NKikimrCms::TAction& cmsAction, Ydb::Maintenance::ActionState& actionState) {
+ ConvertAction(cmsAction, *actionState.mutable_action()->mutable_lock_action());
+ // FIXME: specify action_uid
+ actionState.set_status(Ydb::Maintenance::ActionState::ACTION_STATUS_PENDING);
+ actionState.set_reason(Ydb::Maintenance::ActionState::ACTION_REASON_UNSPECIFIED); // FIXME: specify
+ }
+
+ void ConvertActionUid(const TString& taskUid, const TString& permissionId,
+ Ydb::Maintenance::ActionUid& actionUid)
+ {
+ actionUid.set_task_uid(taskUid);
+ actionUid.set_action_id(permissionId);
+ }
+
+ void ConvertPermission(const TString& taskUid, const NKikimrCms::TPermission& permission,
+ Ydb::Maintenance::ActionState& actionState)
+ {
+ ConvertAction(permission.GetAction(), *actionState.mutable_action()->mutable_lock_action());
+ ConvertActionUid(taskUid, permission.GetId(), *actionState.mutable_action_uid());
+
+ actionState.set_status(Ydb::Maintenance::ActionState::ACTION_STATUS_PERFORMED);
+ actionState.set_reason(Ydb::Maintenance::ActionState::ACTION_REASON_OK);
+ *actionState.mutable_deadline() = TimeUtil::MicrosecondsToTimestamp(permission.GetDeadline());
+ }
+
+ void ConvertPermission(const TString& taskUid, const TPermissionInfo& permission,
+ Ydb::Maintenance::ActionState& actionState)
+ {
+ NKikimrCms::TPermission protoPermission;
+ permission.CopyTo(protoPermission);
+ ConvertPermission(taskUid, protoPermission, actionState);
+ }
+
+} // anonymous
+
+template <typename TDerived, typename TEvRequest, typename TEvResponse>
+class TAdapterActor: public TActorBootstrapped<TDerived> {
+protected:
+ using TBase = TAdapterActor<TDerived, TEvRequest, TEvResponse>;
+
+ TCmsStatePtr GetCmsState() const {
+ Y_VERIFY(CmsState);
+ return CmsState;
+ }
+
+ void Reply(THolder<TEvResponse>&& ev) {
+ this->Send(Request->Sender, std::move(ev));
+ this->PassAway();
+ }
+
+ void Reply(Ydb::StatusIds::StatusCode code, const TString& error = {}) {
+ auto ev = MakeHolder<TEvResponse>();
+ ev->Record.SetStatus(code);
+
+ auto& issue = *ev->Record.AddIssues();
+ issue.set_severity(NYql::TSeverityIds::S_ERROR);
+ issue.set_message(error);
+
+ Reply(std::move(ev));
+ }
+
+public:
+ static constexpr NKikimrServices::TActivity::EType ActorActivityType() {
+ return NKikimrServices::TActivity::CMS_API_ADAPTER;
+ }
+
+ explicit TAdapterActor(typename TEvRequest::TPtr& ev, const TActorId& cmsActorId, TCmsStatePtr cmsState = nullptr)
+ : Request(ev)
+ , CmsActorId(cmsActorId)
+ , CmsState(cmsState)
+ {
+ }
+
+protected:
+ typename TEvRequest::TPtr Request;
+ const TActorId CmsActorId;
+
+private:
+ const TCmsStatePtr CmsState;
+
+}; // TAdapterActor
+
+class TListClusterNodes: public TAdapterActor<
+ TListClusterNodes,
+ TEvCms::TEvListClusterNodesRequest,
+ TEvCms::TEvListClusterNodesResponse>
+{
+ static Ydb::Maintenance::ItemState ConvertNodeState(NKikimrCms::EState state) {
+ switch (state) {
+ case NKikimrCms::UNKNOWN:
+ return Ydb::Maintenance::ITEM_STATE_UNSPECIFIED;
+ case NKikimrCms::UP:
+ return Ydb::Maintenance::ITEM_STATE_UP;
+ case NKikimrCms::RESTART:
+ return Ydb::Maintenance::ITEM_STATE_MAINTENANCE;
+ case NKikimrCms::DOWN:
+ return Ydb::Maintenance::ITEM_STATE_DOWN;
+ default:
+ Y_FAIL("unreachable");
+ }
+ }
+
+ static void ConvertNode(const TNodeInfo& in, Ydb::Maintenance::Node& out) {
+ out.set_node_id(in.NodeId);
+ out.set_host(in.Host);
+ out.set_port(in.IcPort);
+ out.set_state(ConvertNodeState(in.State));
+
+ auto& location = *out.mutable_location();
+ location.set_data_center(in.Location.GetDataCenterId());
+ location.set_module(in.Location.GetModuleId());
+ location.set_rack(in.Location.GetRackId());
+ location.set_unit(in.Location.GetUnitId());
+
+ if (in.Services & EService::DynamicNode) {
+ out.mutable_dynamic()->set_tenant(in.Tenant);
+ } else {
+ out.mutable_storage();
+ }
+ }
+
+public:
+ using TBase::TBase;
+
+ void Bootstrap() {
+ Send(CmsActorId, new TEvCms::TEvGetClusterInfoRequest());
+ Become(&TThis::StateWork);
+ }
+
+ STFUNC(StateWork) {
+ switch (ev->GetTypeRewrite()) {
+ hFunc(TEvCms::TEvGetClusterInfoResponse, Handle);
+ }
+ }
+
+ void Handle(TEvCms::TEvGetClusterInfoResponse::TPtr& ev) {
+ auto clusterInfo = ev->Get()->Info;
+
+ if (clusterInfo->IsOutdated()) {
+ return Reply(Ydb::StatusIds::UNAVAILABLE, "Cannot collect cluster info");
+ }
+
+ auto response = MakeHolder<TEvCms::TEvListClusterNodesResponse>();
+ response->Record.SetStatus(Ydb::StatusIds::SUCCESS);
+
+ for (const auto& [_, node] : clusterInfo->AllNodes()) {
+ ConvertNode(*node, *response->Record.MutableResult()->add_nodes());
+ }
+
+ Reply(std::move(response));
+ }
+
+}; // TListClusterNodes
+
+template <typename TDerived, typename TEvRequest>
+class TPermissionResponseProcessor: public TAdapterActor<TDerived, TEvRequest, TEvCms::TEvMaintenanceTaskResponse> {
+protected:
+ using TBase = TPermissionResponseProcessor<TDerived, TEvRequest>;
+
+public:
+ using TAdapterActor<TDerived, TEvRequest, TEvCms::TEvMaintenanceTaskResponse>::TAdapterActor;
+
+ STFUNC(StateWork) {
+ switch (ev->GetTypeRewrite()) {
+ hFunc(TEvCms::TEvPermissionResponse, Handle);
+ }
+ }
+
+ void Handle(TEvCms::TEvPermissionResponse::TPtr& ev) {
+ const auto& record = ev->Get()->Record;
+
+ switch (record.GetStatus().GetCode()) {
+ case NKikimrCms::TStatus::ALLOW:
+ case NKikimrCms::TStatus::ALLOW_PARTIAL:
+ case NKikimrCms::TStatus::DISALLOW:
+ case NKikimrCms::TStatus::DISALLOW_TEMP:
+ break;
+ case NKikimrCms::TStatus::ERROR_TEMP:
+ return this->Reply(Ydb::StatusIds::UNAVAILABLE, record.GetStatus().GetReason());
+ case NKikimrCms::TStatus::WRONG_REQUEST:
+ case NKikimrCms::TStatus::ERROR:
+ case NKikimrCms::TStatus::NO_SUCH_HOST:
+ case NKikimrCms::TStatus::NO_SUCH_DEVICE:
+ case NKikimrCms::TStatus::NO_SUCH_SERVICE:
+ return this->Reply(Ydb::StatusIds::BAD_REQUEST, record.GetStatus().GetReason());
+ case NKikimrCms::TStatus::UNAUTHORIZED:
+ return this->Reply(Ydb::StatusIds::UNAUTHORIZED, record.GetStatus().GetReason());
+ case NKikimrCms::TStatus::OK:
+ case NKikimrCms::TStatus::UNKNOWN:
+ return this->Reply(Ydb::StatusIds::INTERNAL_ERROR, record.GetStatus().GetReason());
+ }
+
+ const auto& taskUid = static_cast<const TDerived*>(this)->GetTaskUid();
+
+ auto response = MakeHolder<TEvCms::TEvMaintenanceTaskResponse>();
+ response->Record.SetStatus(Ydb::StatusIds::SUCCESS);
+
+ auto& result = *response->Record.MutableResult();
+ result.set_task_uid(taskUid);
+
+ if (record.GetDeadline()) {
+ *result.mutable_retry_after() = TimeUtil::MicrosecondsToTimestamp(record.GetDeadline());
+ }
+
+ THashSet<TString> permissionsSeen;
+ // performed actions: new permissions
+ for (const auto& permission : record.GetPermissions()) {
+ permissionsSeen.insert(permission.GetId());
+ ConvertPermission(taskUid, permission, *result.add_action_group_states()->add_action_states());
+ }
+
+ auto cmsState = this->GetCmsState();
+ // performed actions: existing permissions
+ if (cmsState->MaintenanceTasks.contains(taskUid)) {
+ const auto& task = cmsState->MaintenanceTasks.at(taskUid);
+ for (const auto& id : task.Permissions) {
+ if (!cmsState->Permissions.contains(id) || permissionsSeen.contains(id)) {
+ continue;
+ }
+
+ ConvertPermission(taskUid, cmsState->Permissions.at(id),
+ *result.add_action_group_states()->add_action_states());
+ }
+ }
+
+ // pending actions
+ if (cmsState->ScheduledRequests.contains(record.GetRequestId())) {
+ const auto& request = cmsState->ScheduledRequests.at(record.GetRequestId());
+ for (const auto& action : request.Request.GetActions()) {
+ ConvertAction(action, *result.add_action_group_states()->add_action_states());
+ }
+ }
+
+ this->Reply(std::move(response));
+ }
+
+}; // TPermissionResponseProcessor
+
+class TCreateMaintenanceTask: public TPermissionResponseProcessor<
+ TCreateMaintenanceTask,
+ TEvCms::TEvCreateMaintenanceTaskRequest>
+{
+ bool ValidateScope(const Ydb::Maintenance::ActionScope& scope) {
+ switch (scope.scope_case()) {
+ case Ydb::Maintenance::ActionScope::kNodeId:
+ case Ydb::Maintenance::ActionScope::kHost:
+ return true;
+ default:
+ Reply(Ydb::StatusIds::BAD_REQUEST, "Unknown scope");
+ return false;
+ }
+ }
+
+ bool ValidateAction(const Ydb::Maintenance::Action& action) {
+ switch (action.action_case()) {
+ case Ydb::Maintenance::Action::kLockAction:
+ return ValidateScope(action.lock_action().scope());
+ default:
+ Reply(Ydb::StatusIds::BAD_REQUEST, "Unknown action");
+ return false;
+ }
+ }
+
+ bool ValidateRequest(const Ydb::Maintenance::CreateMaintenanceTaskRequest& request) {
+ switch (request.task_options().availability_mode()) {
+ case Ydb::Maintenance::AVAILABILITY_MODE_STRONG:
+ case Ydb::Maintenance::AVAILABILITY_MODE_WEAK:
+ case Ydb::Maintenance::AVAILABILITY_MODE_FORCE:
+ break;
+ default:
+ Reply(Ydb::StatusIds::BAD_REQUEST, "Unknown availability mode");
+ return false;
+ }
+
+ for (const auto& group : request.action_groups()) {
+ if (group.actions().size() < 1) {
+ Reply(Ydb::StatusIds::BAD_REQUEST, "Empty actions");
+ return false;
+ } else if (group.actions().size() > 1) {
+ Reply(Ydb::StatusIds::UNSUPPORTED, "Composite action groups are not supported");
+ return false;
+ }
+
+ for (const auto& action : group.actions()) {
+ if (!ValidateAction(action)) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
+
+ static void ConvertAction(const Ydb::Maintenance::LockAction& action, NKikimrCms::TAction& cmsAction) {
+ cmsAction.SetType(NKikimrCms::TAction::SHUTDOWN_HOST);
+ cmsAction.SetDuration(TimeUtil::DurationToMicroseconds(action.duration()));
+
+ const auto& scope = action.scope();
+ switch (scope.scope_case()) {
+ case Ydb::Maintenance::ActionScope::kNodeId:
+ cmsAction.SetHost(ToString(scope.node_id()));
+ break;
+ case Ydb::Maintenance::ActionScope::kHost:
+ cmsAction.SetHost(scope.host());
+ break;
+ default:
+ Y_FAIL("unreachable");
+ }
+ }
+
+ static void ConvertRequest(const TString& user, const Ydb::Maintenance::CreateMaintenanceTaskRequest& request,
+ NKikimrCms::TPermissionRequest& cmsRequest)
+ {
+ const auto& opts = request.task_options();
+
+ cmsRequest.SetMaintenanceTaskId(opts.task_uid());
+ cmsRequest.SetUser(user);
+ cmsRequest.SetDryRun(opts.dry_run());
+ cmsRequest.SetReason(opts.description());
+ cmsRequest.SetAvailabilityMode(ConvertAvailabilityMode(opts.availability_mode()));
+ cmsRequest.SetPartialPermissionAllowed(true);
+ cmsRequest.SetSchedule(true);
+
+ for (const auto& group : request.action_groups()) {
+ Y_VERIFY(group.actions().size() == 1);
+ for (const auto& action : group.actions()) {
+ if (action.has_lock_action()) {
+ ConvertAction(action.lock_action(), *cmsRequest.AddActions());
+ } else {
+ Y_FAIL("unreachable");
+ }
+ }
+ }
+ }
+
+public:
+ using TBase::TBase;
+
+ void Bootstrap() {
+ const auto& user = Request->Get()->Record.GetUserSID();
+ const auto& request = Request->Get()->Record.GetRequest();
+
+ if (!ValidateRequest(request)) {
+ return;
+ }
+
+ auto cmsRequest = MakeHolder<TEvCms::TEvPermissionRequest>();
+ ConvertRequest(user, request, cmsRequest->Record);
+
+ Send(CmsActorId, std::move(cmsRequest));
+ Become(&TThis::StateWork);
+ }
+
+ const TString& GetTaskUid() const {
+ return Request->Get()->Record.GetRequest().task_options().task_uid();
+ }
+
+ // using processor's handler
+
+}; // TCreateMaintenanceTask
+
+class TRefreshMaintenanceTask: public TPermissionResponseProcessor<
+ TRefreshMaintenanceTask,
+ TEvCms::TEvRefreshMaintenanceTaskRequest>
+{
+public:
+ using TBase::TBase;
+
+ void Bootstrap() {
+ const auto& taskUid = GetTaskUid();
+ auto cmsState = GetCmsState();
+
+ auto tit = cmsState->MaintenanceTasks.find(taskUid);
+ if (tit == cmsState->MaintenanceTasks.end()) {
+ return Reply(Ydb::StatusIds::BAD_REQUEST, "Task not found");
+ }
+
+ const auto& task = tit->second;
+
+ auto rit = cmsState->ScheduledRequests.find(task.RequestId);
+ if (rit == cmsState->ScheduledRequests.end()) {
+ auto response = MakeHolder<TEvCms::TEvMaintenanceTaskResponse>();
+ response->Record.SetStatus(Ydb::StatusIds::SUCCESS);
+
+ auto& result = *response->Record.MutableResult();
+ result.set_task_uid(taskUid);
+
+ // performed actions
+ for (const auto& id : task.Permissions) {
+ if (!cmsState->Permissions.contains(id)) {
+ continue;
+ }
+
+ ConvertPermission(taskUid, cmsState->Permissions.at(id),
+ *result.add_action_group_states()->add_action_states());
+ }
+
+ if (result.action_group_states().empty()) {
+ return Reply(Ydb::StatusIds::BAD_REQUEST, "Task has no active actions");
+ }
+
+ return Reply(std::move(response));
+ }
+
+ const auto& request = rit->second;
+
+ auto cmsRequest = MakeHolder<TEvCms::TEvCheckRequest>();
+ cmsRequest->Record.SetUser(task.Owner);
+ cmsRequest->Record.SetRequestId(task.RequestId);
+ cmsRequest->Record.SetAvailabilityMode(request.Request.GetAvailabilityMode());
+
+ Send(CmsActorId, std::move(cmsRequest));
+ Become(&TThis::StateWork);
+ }
+
+ const TString& GetTaskUid() const {
+ return Request->Get()->Record.GetRequest().task_uid();
+ }
+
+ // using processor's handler
+
+}; // TRefreshMaintenanceTask
+
+class TGetMaintenanceTask: public TAdapterActor<
+ TGetMaintenanceTask,
+ TEvCms::TEvGetMaintenanceTaskRequest,
+ TEvCms::TEvGetMaintenanceTaskResponse>
+{
+public:
+ using TBase::TBase;
+
+ void Bootstrap() {
+ const auto& taskUid = GetTaskUid();
+ auto cmsState = GetCmsState();
+
+ auto it = cmsState->MaintenanceTasks.find(taskUid);
+ if (it == cmsState->MaintenanceTasks.end()) {
+ return Reply(Ydb::StatusIds::BAD_REQUEST, "Task not found");
+ }
+
+ const auto& task = it->second;
+
+ if (!cmsState->ScheduledRequests.contains(task.RequestId)) {
+ auto response = MakeHolder<TEvCms::TEvGetMaintenanceTaskResponse>();
+ response->Record.SetStatus(Ydb::StatusIds::SUCCESS);
+
+ auto& result = *response->Record.MutableResult();
+ result.mutable_task_options()->set_task_uid(taskUid);
+
+ // performed actions
+ for (const auto& id : task.Permissions) {
+ if (!cmsState->Permissions.contains(id)) {
+ continue;
+ }
+
+ ConvertPermission(taskUid, cmsState->Permissions.at(id),
+ *result.add_action_group_states()->add_action_states());
+ }
+
+ return Reply(std::move(response));
+ }
+
+ auto cmsRequest = MakeHolder<TEvCms::TEvManageRequestRequest>();
+ cmsRequest->Record.SetUser(task.Owner);
+ cmsRequest->Record.SetRequestId(task.RequestId);
+ cmsRequest->Record.SetCommand(NKikimrCms::TManageRequestRequest::GET);
+
+ Send(CmsActorId, std::move(cmsRequest));
+ Become(&TThis::StateWork);
+ }
+
+ const TString& GetTaskUid() const {
+ return Request->Get()->Record.GetRequest().task_uid();
+ }
+
+ STFUNC(StateWork) {
+ switch (ev->GetTypeRewrite()) {
+ hFunc(TEvCms::TEvManageRequestResponse, Handle);
+ }
+ }
+
+ void Handle(TEvCms::TEvManageRequestResponse::TPtr& ev) {
+ const auto& taskUid = GetTaskUid();
+ const auto& record = ev->Get()->Record;
+
+ switch (record.GetStatus().GetCode()) {
+ case NKikimrCms::TStatus::OK:
+ break;
+ case NKikimrCms::TStatus::WRONG_REQUEST:
+ return Reply(Ydb::StatusIds::BAD_REQUEST, record.GetStatus().GetReason());
+ default:
+ return Reply(Ydb::StatusIds::INTERNAL_ERROR, record.GetStatus().GetReason());
+ }
+
+ auto response = MakeHolder<TEvCms::TEvGetMaintenanceTaskResponse>();
+ response->Record.SetStatus(Ydb::StatusIds::SUCCESS);
+
+ auto& result = *response->Record.MutableResult();
+ for (const auto& request : record.GetRequests()) {
+ auto& opts = *result.mutable_task_options();
+ opts.set_task_uid(taskUid);
+ opts.set_description(request.GetReason());
+ opts.set_availability_mode(ConvertAvailabilityMode(request.GetAvailabilityMode()));
+
+ // pending actions
+ for (const auto& action : request.GetActions()) {
+ ConvertAction(action, *result.add_action_group_states()->add_action_states());
+ }
+ }
+
+ auto cmsState = GetCmsState();
+ // performed actions
+ if (cmsState->MaintenanceTasks.contains(taskUid)) {
+ const auto& task = cmsState->MaintenanceTasks.at(taskUid);
+ for (const auto& id : task.Permissions) {
+ if (!cmsState->Permissions.contains(id)) {
+ continue;
+ }
+
+ ConvertPermission(taskUid, cmsState->Permissions.at(id),
+ *result.add_action_group_states()->add_action_states());
+ }
+ }
+
+ Reply(std::move(response));
+ }
+
+}; // TGetMaintenanceTask
+
+class TListMaintenanceTasks: public TAdapterActor<
+ TListMaintenanceTasks,
+ TEvCms::TEvListMaintenanceTasksRequest,
+ TEvCms::TEvListMaintenanceTasksResponse>
+{
+public:
+ using TBase::TBase;
+
+ void Bootstrap() {
+ const auto& user = Request->Get()->Record.GetRequest().user();
+
+ auto response = MakeHolder<TEvCms::TEvListMaintenanceTasksResponse>();
+ response->Record.SetStatus(Ydb::StatusIds::SUCCESS);
+
+ auto cmsState = GetCmsState();
+ for (const auto& [taskUid, task] : cmsState->MaintenanceTasks) {
+ if (!user || user == task.Owner) {
+ response->Record.MutableResult()->add_tasks_uids(taskUid);
+ }
+ }
+
+ Reply(std::move(response));
+ }
+
+}; // TListMaintenanceTasks
+
+class TDropMaintenanceTask: public TAdapterActor<
+ TDropMaintenanceTask,
+ TEvCms::TEvDropMaintenanceTaskRequest,
+ TEvCms::TEvManageMaintenanceTaskResponse>
+{
+public:
+ using TBase::TBase;
+
+ void Bootstrap() {
+ auto cmsState = GetCmsState();
+
+ auto it = cmsState->MaintenanceTasks.find(Request->Get()->Record.GetRequest().task_uid());
+ if (it == cmsState->MaintenanceTasks.end()) {
+ return Reply(Ydb::StatusIds::BAD_REQUEST, "Task not found");
+ }
+
+ const auto& task = it->second;
+ if (cmsState->ScheduledRequests.contains(task.RequestId)) {
+ auto cmsRequest = MakeHolder<TEvCms::TEvManageRequestRequest>();
+ cmsRequest->Record.SetUser(task.Owner);
+ cmsRequest->Record.SetRequestId(task.RequestId);
+ cmsRequest->Record.SetCommand(NKikimrCms::TManageRequestRequest::REJECT);
+
+ Send(CmsActorId, std::move(cmsRequest));
+ } else {
+ auto cmsRequest = MakeHolder<TEvCms::TEvManagePermissionRequest>();
+ cmsRequest->Record.SetUser(task.Owner);
+ cmsRequest->Record.SetCommand(NKikimrCms::TManagePermissionRequest::REJECT);
+
+ for (const auto& id : task.Permissions) {
+ cmsRequest->Record.AddPermissions(id);
+ }
+
+ Send(CmsActorId, std::move(cmsRequest));
+ }
+
+ Become(&TThis::StateWork);
+ }
+
+ STFUNC(StateWork) {
+ switch (ev->GetTypeRewrite()) {
+ hFunc(TEvCms::TEvManageRequestResponse, Handle<TEvCms::TEvManageRequestResponse>);
+ hFunc(TEvCms::TEvManagePermissionResponse, Handle<TEvCms::TEvManagePermissionResponse>);
+ }
+ }
+
+ template <typename TEvResponse>
+ void Handle(typename TEvResponse::TPtr& ev) {
+ const auto& record = ev->Get()->Record;
+
+ switch (record.GetStatus().GetCode()) {
+ case NKikimrCms::TStatus::OK:
+ return Reply(Ydb::StatusIds::SUCCESS, record.GetStatus().GetReason());
+ case NKikimrCms::TStatus::WRONG_REQUEST:
+ return Reply(Ydb::StatusIds::BAD_REQUEST, record.GetStatus().GetReason());
+ default:
+ return Reply(Ydb::StatusIds::INTERNAL_ERROR, record.GetStatus().GetReason());
+ }
+ }
+
+}; // TDropMaintenanceTask
+
+class TCompleteAction: public TAdapterActor<
+ TCompleteAction,
+ TEvCms::TEvCompleteActionRequest,
+ TEvCms::TEvManageActionResponse>
+{
+public:
+ using TBase::TBase;
+
+ void Bootstrap() {
+ auto cmsRequest = MakeHolder<TEvCms::TEvManagePermissionRequest>();
+ cmsRequest->Record.SetCommand(NKikimrCms::TManagePermissionRequest::DONE);
+
+ auto cmsState = GetCmsState();
+ for (const auto& actionUid : Request->Get()->Record.GetRequest().action_uids()) {
+ auto it = cmsState->MaintenanceTasks.find(actionUid.task_uid());
+ if (it == cmsState->MaintenanceTasks.end()) {
+ return Reply(Ydb::StatusIds::BAD_REQUEST, "Task not found");
+ }
+
+ const auto& task = it->second;
+ if (!cmsState->ScheduledRequests.contains(task.RequestId)) {
+ if (!task.Permissions.contains(actionUid.action_id())) {
+ return Reply(Ydb::StatusIds::BAD_REQUEST, "Action not found");
+ }
+ }
+
+ if (cmsRequest->Record.HasUser() && cmsRequest->Record.GetUser() != task.Owner) {
+ return Reply(Ydb::StatusIds::UNSUPPORTED, "Cannot complete actions of multiple owners at once");
+ } else {
+ cmsRequest->Record.SetUser(task.Owner);
+ }
+
+ cmsRequest->Record.AddPermissions(actionUid.action_id());
+ }
+
+ Send(CmsActorId, std::move(cmsRequest));
+ Become(&TThis::StateWork);
+ }
+
+ STFUNC(StateWork) {
+ switch (ev->GetTypeRewrite()) {
+ hFunc(TEvCms::TEvManagePermissionResponse, Handle);
+ }
+ }
+
+ void Handle(TEvCms::TEvManagePermissionResponse::TPtr& ev) {
+ const auto& record = ev->Get()->Record;
+
+ switch (record.GetStatus().GetCode()) {
+ case NKikimrCms::TStatus::OK:
+ break;
+ case NKikimrCms::TStatus::WRONG_REQUEST:
+ return Reply(Ydb::StatusIds::BAD_REQUEST, record.GetStatus().GetReason());
+ default:
+ return Reply(Ydb::StatusIds::INTERNAL_ERROR, record.GetStatus().GetReason());
+ }
+
+ auto response = MakeHolder<TEvCms::TEvManageActionResponse>();
+ response->Record.SetStatus(Ydb::StatusIds::SUCCESS);
+
+ for (const auto& actionUid : Request->Get()->Record.GetRequest().action_uids()) {
+ auto& actionStatus = *response->Record.MutableResult()->add_action_statuses();
+ *actionStatus.mutable_action_uid() = actionUid;
+ actionStatus.set_status(Ydb::StatusIds::SUCCESS);
+ }
+
+ Reply(std::move(response));
+ }
+
+}; // TCompleteAction
+
+void TCms::Handle(TEvCms::TEvListClusterNodesRequest::TPtr& ev, const TActorContext& ctx) {
+ ctx.RegisterWithSameMailbox(new TListClusterNodes(ev, SelfId()));
+}
+
+void TCms::Handle(TEvCms::TEvCreateMaintenanceTaskRequest::TPtr& ev, const TActorContext& ctx) {
+ ctx.RegisterWithSameMailbox(new TCreateMaintenanceTask(ev, SelfId(), State));
+}
+
+void TCms::Handle(TEvCms::TEvRefreshMaintenanceTaskRequest::TPtr& ev, const TActorContext& ctx) {
+ ctx.RegisterWithSameMailbox(new TRefreshMaintenanceTask(ev, SelfId(), State));
+}
+
+void TCms::Handle(TEvCms::TEvGetMaintenanceTaskRequest::TPtr& ev, const TActorContext& ctx) {
+ ctx.RegisterWithSameMailbox(new TGetMaintenanceTask(ev, SelfId(), State));
+}
+
+void TCms::Handle(TEvCms::TEvListMaintenanceTasksRequest::TPtr& ev, const TActorContext& ctx) {
+ ctx.RegisterWithSameMailbox(new TListMaintenanceTasks(ev, SelfId(), State));
+}
+
+void TCms::Handle(TEvCms::TEvDropMaintenanceTaskRequest::TPtr& ev, const TActorContext& ctx) {
+ ctx.RegisterWithSameMailbox(new TDropMaintenanceTask(ev, SelfId(), State));
+}
+
+void TCms::Handle(TEvCms::TEvCompleteActionRequest::TPtr& ev, const TActorContext& ctx) {
+ ctx.RegisterWithSameMailbox(new TCompleteAction(ev, SelfId(), State));
+}
+
+}
diff --git a/ydb/core/cms/cms.cpp b/ydb/core/cms/cms.cpp
index 44ee2d5f340..e71a37aa33e 100644
--- a/ydb/core/cms/cms.cpp
+++ b/ydb/core/cms/cms.cpp
@@ -772,6 +772,7 @@ void TCms::AcceptPermissions(TPermissionResponse &resp, const TString &requestId
}
acceptTaskPermission(State->WalleTasks, State->WalleRequests, requestId, permission.GetId());
+ acceptTaskPermission(State->MaintenanceTasks, State->MaintenanceRequests, requestId, permission.GetId());
}
}
@@ -930,6 +931,8 @@ void TCms::RemoveEmptyTasks(const TActorContext &ctx)
{
for (auto &id : FindEmptyTasks(State->WalleTasks, ctx))
Execute(CreateTxRemoveWalleTask(id), ctx);
+ for (auto &id : FindEmptyTasks(State->MaintenanceTasks, ctx))
+ Execute(CreateTxRemoveMaintenanceTask(id), ctx);
}
void TCms::Cleanup(const TActorContext &ctx)
@@ -1237,6 +1240,13 @@ void TCms::CheckAndEnqueueRequest(TEvCms::TEvPermissionRequest::TPtr &ev, const
ev, TStatus::WRONG_REQUEST, "Missing user in request", ctx);
}
+ if (rec.HasMaintenanceTaskId()) {
+ if (State->MaintenanceTasks.contains(rec.GetMaintenanceTaskId())) {
+ return ReplyWithError<TEvCms::TEvPermissionResponse>(
+ ev, TStatus::WRONG_REQUEST, "Maintenance task already exists", ctx);
+ }
+ }
+
EnqueueRequest(ev.Release(), ctx);
}
@@ -1581,7 +1591,7 @@ void TCms::Handle(TEvCms::TEvPermissionRequest::TPtr &ev,
copy = new TRequestInfo(scheduled);
State->ScheduledRequests.emplace(reqId, std::move(scheduled));
- } else if (user == WALLE_CMS_USER) {
+ } else if (user == WALLE_CMS_USER || rec.HasMaintenanceTaskId()) {
scheduled.Owner = user;
scheduled.RequestId = reqId;
@@ -1591,8 +1601,13 @@ void TCms::Handle(TEvCms::TEvPermissionRequest::TPtr &ev,
if (ok)
AcceptPermissions(resp->Record, reqId, user, ctx);
+ TMaybe<TString> maintenanceTaskId;
+ if (rec.HasMaintenanceTaskId()) {
+ maintenanceTaskId.ConstructInPlace(rec.GetMaintenanceTaskId());
+ }
+
auto handle = new IEventHandle(ev->Sender, SelfId(), resp.Release(), 0, ev->Cookie);
- Execute(CreateTxStorePermissions(std::move(ev->Release()), handle, user, std::move(copy)), ctx);
+ Execute(CreateTxStorePermissions(std::move(ev->Release()), handle, user, std::move(copy), maintenanceTaskId), ctx);
}
TabletCounters->Percentile()[COUNTER_LATENCY_PERMISSION_REQUEST].IncrementFor((TInstant::Now() - requestStartTime).MilliSeconds());
diff --git a/ydb/core/cms/cms.h b/ydb/core/cms/cms.h
index 6b64b952cf0..b947dacd04e 100644
--- a/ydb/core/cms/cms.h
+++ b/ydb/core/cms/cms.h
@@ -5,6 +5,7 @@
#include "cms_state.h"
#include <ydb/core/protos/cms.pb.h>
+#include <ydb/core/protos/maintenance.pb.h>
#include <library/cpp/actors/interconnect/events_local.h>
#include <library/cpp/actors/core/actor.h>
@@ -57,6 +58,20 @@ struct TEvCms {
EvGetSentinelStateRequest,
EvGetSentinelStateResponse,
+ EvListClusterNodesRequest,
+ EvListClusterNodesResponse,
+ EvCreateMaintenanceTaskRequest,
+ EvRefreshMaintenanceTaskRequest,
+ EvMaintenanceTaskResponse,
+ EvGetMaintenanceTaskRequest,
+ EvGetMaintenanceTaskResponse,
+ EvListMaintenanceTasksRequest,
+ EvListMaintenanceTasksResponse,
+ EvDropMaintenanceTaskRequest,
+ EvManageMaintenanceTaskResponse,
+ EvCompleteActionRequest,
+ EvManageActionResponse,
+
EvWalleCreateTaskRequest = EvClusterStateRequest + 512,
EvWalleCreateTaskResponse,
EvWalleListTasksRequest,
@@ -251,7 +266,7 @@ struct TEvCms {
return Sprintf("%s { TaskId: %s }", ToStringHeader().data(), TaskId.data());
}
};
-
+
struct TEvGetClusterInfoRequest : public TEventLocal<TEvGetClusterInfoRequest, EvGetClusterInfoRequest> {
TString ToString() const override {
return "Get Cluster Info Request";
@@ -259,7 +274,7 @@ struct TEvCms {
};
struct TEvGetClusterInfoResponse : public TEventLocal<TEvGetClusterInfoResponse, EvGetClusterInfoResponse> {
- TClusterInfoPtr Info;
+ TClusterInfoPtr Info;
TString ToString() const override {
return "Get Cluster Info Response";
@@ -325,6 +340,71 @@ struct TEvCms {
NKikimrCms::TGetSentinelStateResponse,
EvGetSentinelStateResponse> {
};
+
+ struct TEvListClusterNodesRequest : public TEventPB<TEvListClusterNodesRequest,
+ NKikimrMaintenance::TListClusterNodesRequest,
+ EvListClusterNodesRequest> {
+ };
+
+ struct TEvListClusterNodesResponse : public TEventPB<TEvListClusterNodesResponse,
+ NKikimrMaintenance::TListClusterNodesResponse,
+ EvListClusterNodesResponse> {
+ };
+
+ struct TEvCreateMaintenanceTaskRequest : public TEventPB<TEvCreateMaintenanceTaskRequest,
+ NKikimrMaintenance::TCreateMaintenanceTaskRequest,
+ EvCreateMaintenanceTaskRequest> {
+ };
+
+ struct TEvRefreshMaintenanceTaskRequest : public TEventPB<TEvRefreshMaintenanceTaskRequest,
+ NKikimrMaintenance::TRefreshMaintenanceTaskRequest,
+ EvRefreshMaintenanceTaskRequest> {
+ };
+
+ struct TEvMaintenanceTaskResponse : public TEventPB<TEvMaintenanceTaskResponse,
+ NKikimrMaintenance::TMaintenanceTaskResponse,
+ EvMaintenanceTaskResponse> {
+ };
+
+ struct TEvGetMaintenanceTaskRequest : public TEventPB<TEvGetMaintenanceTaskRequest,
+ NKikimrMaintenance::TGetMaintenanceTaskRequest,
+ EvGetMaintenanceTaskRequest> {
+ };
+
+ struct TEvGetMaintenanceTaskResponse : public TEventPB<TEvGetMaintenanceTaskResponse,
+ NKikimrMaintenance::TGetMaintenanceTaskResponse,
+ EvGetMaintenanceTaskResponse> {
+ };
+
+ struct TEvListMaintenanceTasksRequest : public TEventPB<TEvListMaintenanceTasksRequest,
+ NKikimrMaintenance::TListMaintenanceTasksRequest,
+ EvListMaintenanceTasksRequest> {
+ };
+
+ struct TEvListMaintenanceTasksResponse : public TEventPB<TEvListMaintenanceTasksResponse,
+ NKikimrMaintenance::TListMaintenanceTasksResponse,
+ EvListMaintenanceTasksResponse> {
+ };
+
+ struct TEvDropMaintenanceTaskRequest : public TEventPB<TEvDropMaintenanceTaskRequest,
+ NKikimrMaintenance::TDropMaintenanceTaskRequest,
+ EvDropMaintenanceTaskRequest> {
+ };
+
+ struct TEvManageMaintenanceTaskResponse : public TEventPB<TEvManageMaintenanceTaskResponse,
+ NKikimrMaintenance::TManageMaintenanceTaskResponse,
+ EvManageMaintenanceTaskResponse> {
+ };
+
+ struct TEvCompleteActionRequest : public TEventPB<TEvCompleteActionRequest,
+ NKikimrMaintenance::TCompleteActionRequest,
+ EvCompleteActionRequest> {
+ };
+
+ struct TEvManageActionResponse : public TEventPB<TEvManageActionResponse,
+ NKikimrMaintenance::TManageActionResponse,
+ EvManageActionResponse> {
+ };
};
IActor *CreateCms(const TActorId &tablet, TTabletStorageInfo *info);
diff --git a/ydb/core/cms/cms_impl.h b/ydb/core/cms/cms_impl.h
index a448846a9bb..f8daaada1ca 100644
--- a/ydb/core/cms/cms_impl.h
+++ b/ydb/core/cms/cms_impl.h
@@ -135,8 +135,10 @@ private:
ITransaction *CreateTxRemoveRequest(const TString &id, THolder<IEventBase> req, TAutoPtr<IEventHandle> resp);
ITransaction *CreateTxRemovePermissions(TVector<TString> ids, THolder<IEventBase> req, TAutoPtr<IEventHandle> resp, bool expired = false);
ITransaction *CreateTxRemoveWalleTask(const TString &id);
+ ITransaction *CreateTxRemoveMaintenanceTask(const TString &id);
ITransaction *CreateTxStorePermissions(THolder<IEventBase> req, TAutoPtr<IEventHandle> resp,
- const TString &owner, TAutoPtr<TRequestInfo> scheduled);
+ const TString &owner, TAutoPtr<TRequestInfo> scheduled,
+ const TMaybe<TString> &maintenanceTaskId = {});
ITransaction *CreateTxStoreWalleTask(const TTaskInfo &task, THolder<IEventBase> req, TAutoPtr<IEventHandle> resp);
ITransaction *CreateTxUpdateConfig(TEvCms::TEvSetConfigRequest::TPtr &ev);
ITransaction *CreateTxUpdateConfig(TEvConsole::TEvConfigNotificationRequest::TPtr &ev);
@@ -238,6 +240,15 @@ private:
HFunc(TEvCms::TEvWalleRemoveTaskRequest, Handle);
HFunc(TEvCms::TEvStoreWalleTask, Handle);
HFunc(TEvCms::TEvRemoveWalleTask, Handle);
+ // public api begin
+ HFunc(TEvCms::TEvListClusterNodesRequest, Handle);
+ HFunc(TEvCms::TEvCreateMaintenanceTaskRequest, Handle);
+ HFunc(TEvCms::TEvRefreshMaintenanceTaskRequest, Handle);
+ HFunc(TEvCms::TEvGetMaintenanceTaskRequest, Handle);
+ HFunc(TEvCms::TEvListMaintenanceTasksRequest, Handle);
+ HFunc(TEvCms::TEvDropMaintenanceTaskRequest, Handle);
+ HFunc(TEvCms::TEvCompleteActionRequest, Handle);
+ // public api end
HFunc(TEvCms::TEvGetConfigRequest, Handle);
HFunc(TEvCms::TEvSetConfigRequest, Handle);
HFunc(TEvCms::TEvResetMarkerRequest, Handle);
@@ -393,6 +404,15 @@ private:
void Handle(TEvCms::TEvWalleRemoveTaskRequest::TPtr &ev, const TActorContext &ctx);
void Handle(TEvCms::TEvStoreWalleTask::TPtr &ev, const TActorContext &ctx);
void Handle(TEvCms::TEvRemoveWalleTask::TPtr &ev, const TActorContext &ctx);
+ // public api begin
+ void Handle(TEvCms::TEvListClusterNodesRequest::TPtr &ev, const TActorContext &ctx);
+ void Handle(TEvCms::TEvCreateMaintenanceTaskRequest::TPtr &ev, const TActorContext &ctx);
+ void Handle(TEvCms::TEvRefreshMaintenanceTaskRequest::TPtr &ev, const TActorContext &ctx);
+ void Handle(TEvCms::TEvGetMaintenanceTaskRequest::TPtr &ev, const TActorContext &ctx);
+ void Handle(TEvCms::TEvListMaintenanceTasksRequest::TPtr &ev, const TActorContext &ctx);
+ void Handle(TEvCms::TEvDropMaintenanceTaskRequest::TPtr &ev, const TActorContext &ctx);
+ void Handle(TEvCms::TEvCompleteActionRequest::TPtr &ev, const TActorContext &ctx);
+ // public api end
void Handle(TEvCms::TEvGetConfigRequest::TPtr &ev, const TActorContext &ctx);
void Handle(TEvCms::TEvSetConfigRequest::TPtr &ev, const TActorContext &ctx);
void Handle(TEvCms::TEvResetMarkerRequest::TPtr &ev, const TActorContext &ctx);
diff --git a/ydb/core/cms/cms_state.h b/ydb/core/cms/cms_state.h
index 92e4eeea66d..d2581b5936a 100644
--- a/ydb/core/cms/cms_state.h
+++ b/ydb/core/cms/cms_state.h
@@ -13,12 +13,14 @@ namespace NKikimr::NCms {
struct TTaskInfo {
TString TaskId;
TString RequestId;
+ TString Owner;
TSet<TString> Permissions;
TString ToString() const {
return TStringBuilder() << "{"
<< " TaskId: " << TaskId
<< " RequestId: " << RequestId
+ << " Owner: " << Owner
<< " Permissions: [" << JoinSeq(", ", Permissions) << "]"
<< " }";
}
@@ -39,6 +41,9 @@ struct TCmsState : public TAtomicRefCount<TCmsState> {
THashMap<TString, TTaskInfo> WalleTasks;
THashMap<TString, TString> WalleRequests;
+ THashMap<TString, TTaskInfo> MaintenanceTasks;
+ THashMap<TString, TString> MaintenanceRequests;
+
// CMS config.
TCmsConfig Config;
// CMS config proto cache
diff --git a/ydb/core/cms/cms_tx_load_state.cpp b/ydb/core/cms/cms_tx_load_state.cpp
index 17894caa106..663d4746029 100644
--- a/ydb/core/cms/cms_tx_load_state.cpp
+++ b/ydb/core/cms/cms_tx_load_state.cpp
@@ -31,13 +31,18 @@ public:
auto permissionRowset = db.Table<Schema::Permission>().Range().Select<Schema::Permission::TColumns>();
auto requestRowset = db.Table<Schema::Request>().Range().Select<Schema::Request::TColumns>();
auto walleTaskRowset = db.Table<Schema::WalleTask>().Range().Select<Schema::WalleTask::TColumns>();
+ auto maintenanceTasksRowset = db.Table<Schema::MaintenanceTasks>().Range().Select<Schema::MaintenanceTasks::TColumns>();
auto notificationRowset = db.Table<Schema::Notification>().Range().Select<Schema::Notification::TColumns>();
auto nodeTenantRowset = db.Table<Schema::NodeTenant>().Range().Select<Schema::NodeTenant::TColumns>();
auto logRowset = db.Table<Schema::LogRecords>().Range().Select<Schema::LogRecords::Timestamp>();
- if (!paramRow.IsReady() || !permissionRowset.IsReady()
- || !requestRowset.IsReady() || !walleTaskRowset.IsReady()
- || !notificationRowset.IsReady() || !logRowset.IsReady())
+ if (!paramRow.IsReady()
+ || !permissionRowset.IsReady()
+ || !requestRowset.IsReady()
+ || !walleTaskRowset.IsReady()
+ || !maintenanceTasksRowset.IsReady()
+ || !notificationRowset.IsReady()
+ || !logRowset.IsReady())
return false;
NKikimrCms::TCmsConfig config;
@@ -66,6 +71,8 @@ public:
state->WalleTasks.clear();
state->WalleRequests.clear();
+ state->MaintenanceTasks.clear();
+ state->MaintenanceRequests.clear();
state->Permissions.clear();
state->ScheduledRequests.clear();
state->Notifications.clear();
@@ -108,6 +115,25 @@ public:
return false;
}
+ while (!maintenanceTasksRowset.EndOfSet()) {
+ TString taskId = maintenanceTasksRowset.GetValue<Schema::MaintenanceTasks::TaskID>();
+ TString requestId = maintenanceTasksRowset.GetValue<Schema::MaintenanceTasks::RequestID>();
+ TString owner = maintenanceTasksRowset.GetValue<Schema::MaintenanceTasks::Owner>();
+
+ state->MaintenanceRequests.emplace(requestId, taskId);
+ state->MaintenanceTasks.emplace(taskId, TTaskInfo{
+ .TaskId = taskId,
+ .RequestId = requestId,
+ .Owner = owner,
+ });
+
+ LOG_DEBUG(ctx, NKikimrServices::CMS, "Loaded maintenance task %s mapped to request %s",
+ taskId.data(), requestId.data());
+
+ if (!maintenanceTasksRowset.Next())
+ return false;
+ }
+
while (!permissionRowset.EndOfSet()) {
TString id = permissionRowset.GetValue<Schema::Permission::ID>();
TString requestId = permissionRowset.GetValue<Schema::Permission::RequestID>();
@@ -135,6 +161,14 @@ public:
id.data(), taskId.data());
}
+ if (state->MaintenanceRequests.contains(requestId)) {
+ const auto &taskId = state->MaintenanceRequests[requestId];
+ state->MaintenanceTasks[taskId].Permissions.insert(id);
+
+ LOG_DEBUG(ctx, NKikimrServices::CMS, "Added permission %s to maintenance task %s",
+ id.data(), taskId.data());
+ }
+
if (!permissionRowset.Next())
return false;
}
diff --git a/ydb/core/cms/cms_tx_remove_permissions.cpp b/ydb/core/cms/cms_tx_remove_permissions.cpp
index ca617ab7d5f..27a7ffc5736 100644
--- a/ydb/core/cms/cms_tx_remove_permissions.cpp
+++ b/ydb/core/cms/cms_tx_remove_permissions.cpp
@@ -44,6 +44,11 @@ public:
Self->State->WalleTasks.find(taskId)->second.Permissions.erase(id);
}
+ if (Self->State->MaintenanceRequests.contains(requestId)) {
+ auto taskId = Self->State->MaintenanceRequests.find(requestId)->second;
+ Self->State->MaintenanceTasks.find(taskId)->second.Permissions.erase(id);
+ }
+
Self->AuditLog(ctx, TStringBuilder() << "Remove permission"
<< ": id# " << id
<< ", reason# " << (Request ? "explicit remove" : "scheduled cleanup"));
diff --git a/ydb/core/cms/cms_tx_remove_task.cpp b/ydb/core/cms/cms_tx_remove_task.cpp
index 3f2fc0549b7..96bdb302e4d 100644
--- a/ydb/core/cms/cms_tx_remove_task.cpp
+++ b/ydb/core/cms/cms_tx_remove_task.cpp
@@ -52,4 +52,8 @@ ITransaction *TCms::CreateTxRemoveWalleTask(const TString &id) {
return new TTxRemoveTask<Schema::WalleTask>(this, id, State->WalleTasks, State->WalleRequests);
}
+ITransaction *TCms::CreateTxRemoveMaintenanceTask(const TString &id) {
+ return new TTxRemoveTask<Schema::MaintenanceTasks>(this, id, State->MaintenanceTasks, State->MaintenanceRequests);
+}
+
} // namespace NKikimr::NCms
diff --git a/ydb/core/cms/cms_tx_store_permissions.cpp b/ydb/core/cms/cms_tx_store_permissions.cpp
index db8c2b49b47..1af75c5be6b 100644
--- a/ydb/core/cms/cms_tx_store_permissions.cpp
+++ b/ydb/core/cms/cms_tx_store_permissions.cpp
@@ -8,12 +8,13 @@ namespace NKikimr::NCms {
class TCms::TTxStorePermissions : public TTransactionBase<TCms> {
public:
TTxStorePermissions(TCms *self, THolder<IEventBase> req, TAutoPtr<IEventHandle> resp,
- const TString &owner, TAutoPtr<TRequestInfo> scheduled)
+ const TString &owner, TAutoPtr<TRequestInfo> scheduled, const TMaybe<TString> &maintenanceTaskId)
: TBase(self)
, Request(std::move(req))
, Response(std::move(resp))
, Owner(owner)
, Scheduled(scheduled)
+ , MaintenanceTaskId(maintenanceTaskId)
, NextPermissionId(self->State->NextPermissionId)
, NextRequestId(self->State->NextRequestId)
{
@@ -29,6 +30,23 @@ public:
NIceDb::TUpdate<Schema::Param::NextRequestID>(NextRequestId));
const auto &rec = Response->Get<TEvCms::TEvPermissionResponse>()->Record;
+
+ if (MaintenanceTaskId) {
+ Y_VERIFY(Scheduled);
+
+ Self->State->MaintenanceRequests.emplace(Scheduled->RequestId, *MaintenanceTaskId);
+ Self->State->MaintenanceTasks.emplace(*MaintenanceTaskId, TTaskInfo{
+ .TaskId = *MaintenanceTaskId,
+ .RequestId = Scheduled->RequestId,
+ .Owner = Scheduled->Owner,
+ });
+
+ db.Table<Schema::MaintenanceTasks>().Key(*MaintenanceTaskId).Update(
+ NIceDb::TUpdate<Schema::MaintenanceTasks::RequestID>(Scheduled->RequestId),
+ NIceDb::TUpdate<Schema::MaintenanceTasks::Owner>(Scheduled->Owner)
+ );
+ }
+
for (const auto &permission : rec.GetPermissions()) {
const auto &id = permission.GetId();
const auto &requestId = Scheduled ? Scheduled->RequestId : "";
@@ -42,6 +60,11 @@ public:
NIceDb::TUpdate<Schema::Permission::Deadline>(deadline),
NIceDb::TUpdate<Schema::Permission::RequestID>(requestId));
+ if (MaintenanceTaskId) {
+ Y_VERIFY(Self->State->MaintenanceTasks.contains(*MaintenanceTaskId));
+ Self->State->MaintenanceTasks.at(*MaintenanceTaskId).Permissions.insert(id);
+ }
+
Self->AuditLog(ctx, TStringBuilder() << "Store permission"
<< ": id# " << id
<< ", validity# " << TInstant::MicroSeconds(deadline)
@@ -92,14 +115,15 @@ private:
TAutoPtr<IEventHandle> Response;
TString Owner;
TAutoPtr<TRequestInfo> Scheduled;
+ const TMaybe<TString> MaintenanceTaskId;
ui64 NextPermissionId;
ui64 NextRequestId;
};
ITransaction *TCms::CreateTxStorePermissions(THolder<IEventBase> req, TAutoPtr<IEventHandle> resp,
- const TString &owner, TAutoPtr<TRequestInfo> scheduled)
+ const TString &owner, TAutoPtr<TRequestInfo> scheduled, const TMaybe<TString> &maintenanceTaskId)
{
- return new TTxStorePermissions(this, std::move(req), std::move(resp), owner, std::move(scheduled));
+ return new TTxStorePermissions(this, std::move(req), std::move(resp), owner, std::move(scheduled), maintenanceTaskId);
}
} // namespace NKikimr::NCms
diff --git a/ydb/core/cms/scheme.h b/ydb/core/cms/scheme.h
index f581ab03790..5bbd7dcc564 100644
--- a/ydb/core/cms/scheme.h
+++ b/ydb/core/cms/scheme.h
@@ -42,7 +42,6 @@ struct Schema : NIceDb::Schema {
using TColumns = TableColumns<ID, Owner, Order, Content>;
};
-
struct WalleTask : Table<4> {
struct TaskID : Column<1, NScheme::NTypeIds::Utf8> {};
struct RequestID : Column<2, NScheme::NTypeIds::Utf8> {};
@@ -130,8 +129,18 @@ struct Schema : NIceDb::Schema {
using TColumns = TableColumns<NodeId, DiskId, Downtime>;
};
+ struct MaintenanceTasks : Table<14> {
+ struct TaskID : Column<1, NScheme::NTypeIds::Utf8> {};
+ struct RequestID : Column<2, NScheme::NTypeIds::Utf8> {};
+ struct Owner : Column<3, NScheme::NTypeIds::Utf8> {};
+
+ using TKey = TableKey<TaskID>;
+ using TColumns = TableColumns<TaskID, RequestID, Owner>;
+ };
+
using TTables = SchemaTables<Param, Permission, Request, WalleTask, Notification, NodeTenant,
- HostMarkers, NodeMarkers, PDiskMarkers, VDiskMarkers, LogRecords, NodeDowntimes, PDiskDowntimes>;
+ HostMarkers, NodeMarkers, PDiskMarkers, VDiskMarkers, LogRecords, NodeDowntimes, PDiskDowntimes,
+ MaintenanceTasks>;
using TSettings = SchemaSettings<ExecutorLogBatching<true>,
ExecutorLogFlushPeriod<TDuration::MicroSeconds(512).GetValue()>>;
};
diff --git a/ydb/core/cms/ya.make b/ydb/core/cms/ya.make
index 585a77a5272..e374be95321 100644
--- a/ydb/core/cms/ya.make
+++ b/ydb/core/cms/ya.make
@@ -1,6 +1,7 @@
LIBRARY()
SRCS(
+ api_adapters.cpp
audit_log.cpp
base_handler.h
cluster_info.cpp
diff --git a/ydb/core/driver_lib/run/CMakeLists.darwin-x86_64.txt b/ydb/core/driver_lib/run/CMakeLists.darwin-x86_64.txt
index e7bbabfbedf..1791870b26e 100644
--- a/ydb/core/driver_lib/run/CMakeLists.darwin-x86_64.txt
+++ b/ydb/core/driver_lib/run/CMakeLists.darwin-x86_64.txt
@@ -130,6 +130,7 @@ target_link_libraries(run PUBLIC
ydb-services-fq
ydb-services-kesus
ydb-services-local_discovery
+ ydb-services-maintenance
services-metadata-ds_table
ydb-services-metadata
services-bg_tasks-ds_table
diff --git a/ydb/core/driver_lib/run/CMakeLists.linux-aarch64.txt b/ydb/core/driver_lib/run/CMakeLists.linux-aarch64.txt
index b1c4e4f1a30..7d71a613048 100644
--- a/ydb/core/driver_lib/run/CMakeLists.linux-aarch64.txt
+++ b/ydb/core/driver_lib/run/CMakeLists.linux-aarch64.txt
@@ -131,6 +131,7 @@ target_link_libraries(run PUBLIC
ydb-services-fq
ydb-services-kesus
ydb-services-local_discovery
+ ydb-services-maintenance
services-metadata-ds_table
ydb-services-metadata
services-bg_tasks-ds_table
diff --git a/ydb/core/driver_lib/run/CMakeLists.linux-x86_64.txt b/ydb/core/driver_lib/run/CMakeLists.linux-x86_64.txt
index b1c4e4f1a30..7d71a613048 100644
--- a/ydb/core/driver_lib/run/CMakeLists.linux-x86_64.txt
+++ b/ydb/core/driver_lib/run/CMakeLists.linux-x86_64.txt
@@ -131,6 +131,7 @@ target_link_libraries(run PUBLIC
ydb-services-fq
ydb-services-kesus
ydb-services-local_discovery
+ ydb-services-maintenance
services-metadata-ds_table
ydb-services-metadata
services-bg_tasks-ds_table
diff --git a/ydb/core/driver_lib/run/CMakeLists.windows-x86_64.txt b/ydb/core/driver_lib/run/CMakeLists.windows-x86_64.txt
index e7bbabfbedf..1791870b26e 100644
--- a/ydb/core/driver_lib/run/CMakeLists.windows-x86_64.txt
+++ b/ydb/core/driver_lib/run/CMakeLists.windows-x86_64.txt
@@ -130,6 +130,7 @@ target_link_libraries(run PUBLIC
ydb-services-fq
ydb-services-kesus
ydb-services-local_discovery
+ ydb-services-maintenance
services-metadata-ds_table
ydb-services-metadata
services-bg_tasks-ds_table
diff --git a/ydb/core/driver_lib/run/run.cpp b/ydb/core/driver_lib/run/run.cpp
index a196f6c7fb7..d8c5c4a25b0 100644
--- a/ydb/core/driver_lib/run/run.cpp
+++ b/ydb/core/driver_lib/run/run.cpp
@@ -90,6 +90,7 @@
#include <ydb/services/fq/private_grpc.h>
#include <ydb/services/kesus/grpc_service.h>
#include <ydb/services/local_discovery/grpc_service.h>
+#include <ydb/services/maintenance/grpc_service.h>
#include <ydb/services/monitoring/grpc_service.h>
#include <ydb/services/persqueue_cluster_discovery/grpc_service.h>
#include <ydb/services/persqueue_v1/persqueue.h>
@@ -566,6 +567,8 @@ void TKikimrRunner::InitializeGRpc(const TKikimrRunConfig& runConfig) {
names["scripting"] = &hasScripting;
TServiceCfg hasCms = services.empty();
names["cms"] = &hasCms;
+ TServiceCfg hasMaintenance = services.empty();
+ names["maintenance"] = &hasMaintenance;
TServiceCfg hasKesus = services.empty();
names["locking"] = names["kesus"] = &hasKesus;
TServiceCfg hasMonitoring = services.empty();
@@ -815,6 +818,11 @@ void TKikimrRunner::InitializeGRpc(const TKikimrRunConfig& runConfig) {
grpcRequestProxies[0], hasCms.IsRlAllowed()));
}
+ if (hasMaintenance) {
+ server.AddService(new NGRpcService::TGRpcMaintenanceService(ActorSystem.Get(), Counters,
+ grpcRequestProxies[0], hasMaintenance.IsRlAllowed()));
+ }
+
if (hasDiscovery) {
auto discoveryService = new NGRpcService::TGRpcDiscoveryService(ActorSystem.Get(), Counters,grpcRequestProxies[0], hasDiscovery.IsRlAllowed());
if (!opts.SslData.Empty()) {
diff --git a/ydb/core/driver_lib/run/ya.make b/ydb/core/driver_lib/run/ya.make
index e29db37d9a0..ce57ccfd593 100644
--- a/ydb/core/driver_lib/run/ya.make
+++ b/ydb/core/driver_lib/run/ya.make
@@ -145,6 +145,7 @@ PEERDIR(
ydb/services/fq
ydb/services/kesus
ydb/services/local_discovery
+ ydb/services/maintenance
ydb/services/metadata/ds_table
ydb/services/metadata
ydb/services/bg_tasks/ds_table
diff --git a/ydb/core/grpc_services/CMakeLists.darwin-x86_64.txt b/ydb/core/grpc_services/CMakeLists.darwin-x86_64.txt
index c25d9faed5b..4c2ee86dca7 100644
--- a/ydb/core/grpc_services/CMakeLists.darwin-x86_64.txt
+++ b/ydb/core/grpc_services/CMakeLists.darwin-x86_64.txt
@@ -117,6 +117,7 @@ target_sources(ydb-core-grpc_services PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_log_store.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_long_tx.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_node_registration.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_maintenance.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_make_directory.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_modify_permissions.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_monitoring.cpp
diff --git a/ydb/core/grpc_services/CMakeLists.linux-aarch64.txt b/ydb/core/grpc_services/CMakeLists.linux-aarch64.txt
index 8d1841680a4..5f2bd916cff 100644
--- a/ydb/core/grpc_services/CMakeLists.linux-aarch64.txt
+++ b/ydb/core/grpc_services/CMakeLists.linux-aarch64.txt
@@ -118,6 +118,7 @@ target_sources(ydb-core-grpc_services PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_log_store.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_long_tx.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_node_registration.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_maintenance.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_make_directory.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_modify_permissions.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_monitoring.cpp
diff --git a/ydb/core/grpc_services/CMakeLists.linux-x86_64.txt b/ydb/core/grpc_services/CMakeLists.linux-x86_64.txt
index 8d1841680a4..5f2bd916cff 100644
--- a/ydb/core/grpc_services/CMakeLists.linux-x86_64.txt
+++ b/ydb/core/grpc_services/CMakeLists.linux-x86_64.txt
@@ -118,6 +118,7 @@ target_sources(ydb-core-grpc_services PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_log_store.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_long_tx.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_node_registration.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_maintenance.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_make_directory.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_modify_permissions.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_monitoring.cpp
diff --git a/ydb/core/grpc_services/CMakeLists.windows-x86_64.txt b/ydb/core/grpc_services/CMakeLists.windows-x86_64.txt
index c25d9faed5b..4c2ee86dca7 100644
--- a/ydb/core/grpc_services/CMakeLists.windows-x86_64.txt
+++ b/ydb/core/grpc_services/CMakeLists.windows-x86_64.txt
@@ -117,6 +117,7 @@ target_sources(ydb-core-grpc_services PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_log_store.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_long_tx.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_node_registration.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_maintenance.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_make_directory.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_modify_permissions.cpp
${CMAKE_SOURCE_DIR}/ydb/core/grpc_services/rpc_monitoring.cpp
diff --git a/ydb/core/grpc_services/rpc_maintenance.cpp b/ydb/core/grpc_services/rpc_maintenance.cpp
new file mode 100644
index 00000000000..4e860eb3b4b
--- /dev/null
+++ b/ydb/core/grpc_services/rpc_maintenance.cpp
@@ -0,0 +1,168 @@
+#include "service_maintenance.h"
+
+#include <ydb/core/base/tablet_pipe.h>
+#include <ydb/core/cms/cms.h>
+#include <ydb/core/grpc_services/rpc_request_base.h>
+#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h>
+
+#include <library/cpp/actors/core/actor.h>
+#include <library/cpp/actors/core/hfunc.h>
+
+namespace NKikimr::NGRpcService {
+
+using namespace Ydb;
+
+using TEvListClusterNodes = TGrpcRequestOperationCall<Maintenance::ListClusterNodesRequest, Maintenance::ListClusterNodesResponse>;
+using TEvCreateMaintenanceTask = TGrpcRequestOperationCall<Maintenance::CreateMaintenanceTaskRequest, Maintenance::MaintenanceTaskResponse>;
+using TEvRefreshMaintenanceTask = TGrpcRequestOperationCall<Maintenance::RefreshMaintenanceTaskRequest, Maintenance::MaintenanceTaskResponse>;
+using TEvGetMaintenanceTask = TGrpcRequestOperationCall<Maintenance::GetMaintenanceTaskRequest, Maintenance::GetMaintenanceTaskResponse>;
+using TEvListMaintenanceTasks = TGrpcRequestOperationCall<Maintenance::ListMaintenanceTasksRequest, Maintenance::ListMaintenanceTasksResponse>;
+using TEvDropMaintenanceTask = TGrpcRequestOperationCall<Maintenance::DropMaintenanceTaskRequest, Maintenance::ManageMaintenanceTaskResponse>;
+using TEvCompleteAction = TGrpcRequestOperationCall<Maintenance::CompleteActionRequest, Maintenance::ManageActionResponse>;
+
+template <typename TEvRequest, typename TEvCmsRequest, typename TEvCmsResponse>
+class TMaintenanceRPC: public TRpcRequestActor<TMaintenanceRPC<TEvRequest, TEvCmsRequest, TEvCmsResponse>, TEvRequest, true> {
+ using TThis = TMaintenanceRPC<TEvRequest, TEvCmsRequest, TEvCmsResponse>;
+ using TBase = TRpcRequestActor<TThis, TEvRequest, true>;
+
+ bool CheckAccess() const {
+ if (AppData()->AdministrationAllowedSIDs.empty()) {
+ return true;
+ }
+
+ if (!this->UserToken) {
+ return false;
+ }
+
+ for (const auto& sid : AppData()->AdministrationAllowedSIDs) {
+ if (this->UserToken->IsExist(sid)) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ void SendRequest() {
+ auto ev = MakeHolder<TEvCmsRequest>();
+ ev->Record.MutableRequest()->CopyFrom(*this->GetProtoRequest());
+
+ if constexpr (std::is_same_v<TEvCmsRequest, NCms::TEvCms::TEvCreateMaintenanceTaskRequest>) {
+ if (this->UserToken) {
+ ev->Record.SetUserSID(this->UserToken->GetUserSID());
+ }
+ }
+
+ Y_VERIFY(AppData()->DomainsInfo);
+ Y_VERIFY(AppData()->DomainsInfo->Domains);
+ const auto group = AppData()->DomainsInfo->Domains.begin()->second->DefaultStateStorageGroup;
+
+ NTabletPipe::TClientConfig pipeConfig;
+ pipeConfig.RetryPolicy = {.RetryLimitCount = 10};
+ CmsPipe = this->RegisterWithSameMailbox(NTabletPipe::CreateClient(this->SelfId(), MakeCmsID(group), pipeConfig));
+
+ NTabletPipe::SendData(this->SelfId(), CmsPipe, ev.Release());
+ }
+
+ void Handle(typename TEvCmsResponse::TPtr& ev) {
+ const auto& record = ev->Get()->Record;
+
+ Ydb::Operations::Operation operation;
+ operation.set_ready(true);
+ operation.set_status(record.GetStatus());
+ if (record.IssuesSize()) {
+ operation.mutable_issues()->CopyFrom(record.GetIssues());
+ }
+
+ if constexpr (!std::is_same_v<TEvCmsResponse, NCms::TEvCms::TEvManageMaintenanceTaskResponse>) {
+ operation.mutable_result()->PackFrom(record.GetResult());
+ }
+
+ this->Reply(operation);
+ }
+
+ void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev) {
+ if (ev->Get()->Status != NKikimrProto::OK) {
+ Unavailable();
+ }
+ }
+
+ void Unavailable() {
+ this->Reply(Ydb::StatusIds::UNAVAILABLE, "CMS is unavailable");
+ }
+
+ void PassAway() override {
+ NTabletPipe::CloseAndForgetClient(this->SelfId(), CmsPipe);
+ TBase::PassAway();
+ }
+
+public:
+ using TBase::TBase;
+
+ void Bootstrap() {
+ if (!CheckAccess()) {
+ auto error = TStringBuilder() << "Access denied";
+ if (this->UserToken) {
+ error << ": '" << this->UserToken->GetUserSID() << "' is not an admin";
+ }
+
+ this->Reply(Ydb::StatusIds::UNAUTHORIZED, NKikimrIssues::TIssuesIds::ACCESS_DENIED, error);
+ } else {
+ SendRequest();
+ this->Become(&TThis::StateWork);
+ }
+ }
+
+ STRICT_STFUNC(StateWork,
+ hFunc(TEvCmsResponse, Handle)
+ hFunc(TEvTabletPipe::TEvClientConnected, Handle)
+ sFunc(TEvTabletPipe::TEvClientDestroyed, Unavailable)
+ )
+
+private:
+ TActorId CmsPipe;
+};
+
+void DoListClusterNodes(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider& f) {
+ f.RegisterActor(new TMaintenanceRPC<TEvListClusterNodes,
+ NCms::TEvCms::TEvListClusterNodesRequest,
+ NCms::TEvCms::TEvListClusterNodesResponse>(p.release()));
+}
+
+void DoCreateMaintenanceTask(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider& f) {
+ f.RegisterActor(new TMaintenanceRPC<TEvCreateMaintenanceTask,
+ NCms::TEvCms::TEvCreateMaintenanceTaskRequest,
+ NCms::TEvCms::TEvMaintenanceTaskResponse>(p.release()));
+}
+
+void DoRefreshMaintenanceTask(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider& f) {
+ f.RegisterActor(new TMaintenanceRPC<TEvRefreshMaintenanceTask,
+ NCms::TEvCms::TEvRefreshMaintenanceTaskRequest,
+ NCms::TEvCms::TEvMaintenanceTaskResponse>(p.release()));
+}
+
+void DoGetMaintenanceTask(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider& f) {
+ f.RegisterActor(new TMaintenanceRPC<TEvGetMaintenanceTask,
+ NCms::TEvCms::TEvGetMaintenanceTaskRequest,
+ NCms::TEvCms::TEvGetMaintenanceTaskResponse>(p.release()));
+}
+
+void DoListMaintenanceTasks(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider& f) {
+ f.RegisterActor(new TMaintenanceRPC<TEvListMaintenanceTasks,
+ NCms::TEvCms::TEvListMaintenanceTasksRequest,
+ NCms::TEvCms::TEvListMaintenanceTasksResponse>(p.release()));
+}
+
+void DoDropMaintenanceTask(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider& f) {
+ f.RegisterActor(new TMaintenanceRPC<TEvDropMaintenanceTask,
+ NCms::TEvCms::TEvDropMaintenanceTaskRequest,
+ NCms::TEvCms::TEvManageMaintenanceTaskResponse>(p.release()));
+}
+
+void DoCompleteAction(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider& f) {
+ f.RegisterActor(new TMaintenanceRPC<TEvCompleteAction,
+ NCms::TEvCms::TEvCompleteActionRequest,
+ NCms::TEvCms::TEvManageActionResponse>(p.release()));
+}
+
+}
diff --git a/ydb/core/grpc_services/service_maintenance.h b/ydb/core/grpc_services/service_maintenance.h
new file mode 100644
index 00000000000..3a7009bd370
--- /dev/null
+++ b/ydb/core/grpc_services/service_maintenance.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <memory>
+
+namespace NKikimr::NGRpcService {
+
+class IRequestOpCtx;
+class IFacilityProvider;
+
+void DoListClusterNodes(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider& f);
+void DoCreateMaintenanceTask(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider& f);
+void DoRefreshMaintenanceTask(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider& f);
+void DoGetMaintenanceTask(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider& f);
+void DoListMaintenanceTasks(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider& f);
+void DoDropMaintenanceTask(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider& f);
+void DoCompleteAction(std::unique_ptr<IRequestOpCtx> p, const IFacilityProvider& f);
+
+}
diff --git a/ydb/core/grpc_services/ya.make b/ydb/core/grpc_services/ya.make
index d6e169c22a9..454bfdc9ab6 100644
--- a/ydb/core/grpc_services/ya.make
+++ b/ydb/core/grpc_services/ya.make
@@ -55,6 +55,7 @@ SRCS(
rpc_log_store.cpp
rpc_long_tx.cpp
rpc_node_registration.cpp
+ rpc_maintenance.cpp
rpc_make_directory.cpp
rpc_modify_permissions.cpp
rpc_monitoring.cpp
diff --git a/ydb/core/protos/CMakeLists.darwin-x86_64.txt b/ydb/core/protos/CMakeLists.darwin-x86_64.txt
index 2742c4d4e7c..adad3d6aa30 100644
--- a/ydb/core/protos/CMakeLists.darwin-x86_64.txt
+++ b/ydb/core/protos/CMakeLists.darwin-x86_64.txt
@@ -1502,6 +1502,18 @@ get_built_tool_path(
cpp_styleguide
)
get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
TOOL_enum_parser_bin
TOOL_enum_parser_dependency
tools/enum_parser/enum_parser
@@ -1601,6 +1613,7 @@ target_proto_messages(ydb-core-protos PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/protos/load_test.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/local.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/long_tx_service.proto
+ ${CMAKE_SOURCE_DIR}/ydb/core/protos/maintenance.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/metrics.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/minikql_engine.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/mon.proto
diff --git a/ydb/core/protos/CMakeLists.linux-aarch64.txt b/ydb/core/protos/CMakeLists.linux-aarch64.txt
index f127d672c5b..fdc2937cd49 100644
--- a/ydb/core/protos/CMakeLists.linux-aarch64.txt
+++ b/ydb/core/protos/CMakeLists.linux-aarch64.txt
@@ -1502,6 +1502,18 @@ get_built_tool_path(
cpp_styleguide
)
get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
TOOL_enum_parser_bin
TOOL_enum_parser_dependency
tools/enum_parser/enum_parser
@@ -1602,6 +1614,7 @@ target_proto_messages(ydb-core-protos PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/protos/load_test.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/local.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/long_tx_service.proto
+ ${CMAKE_SOURCE_DIR}/ydb/core/protos/maintenance.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/metrics.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/minikql_engine.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/mon.proto
diff --git a/ydb/core/protos/CMakeLists.linux-x86_64.txt b/ydb/core/protos/CMakeLists.linux-x86_64.txt
index f127d672c5b..fdc2937cd49 100644
--- a/ydb/core/protos/CMakeLists.linux-x86_64.txt
+++ b/ydb/core/protos/CMakeLists.linux-x86_64.txt
@@ -1502,6 +1502,18 @@ get_built_tool_path(
cpp_styleguide
)
get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
TOOL_enum_parser_bin
TOOL_enum_parser_dependency
tools/enum_parser/enum_parser
@@ -1602,6 +1614,7 @@ target_proto_messages(ydb-core-protos PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/protos/load_test.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/local.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/long_tx_service.proto
+ ${CMAKE_SOURCE_DIR}/ydb/core/protos/maintenance.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/metrics.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/minikql_engine.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/mon.proto
diff --git a/ydb/core/protos/CMakeLists.windows-x86_64.txt b/ydb/core/protos/CMakeLists.windows-x86_64.txt
index 2742c4d4e7c..adad3d6aa30 100644
--- a/ydb/core/protos/CMakeLists.windows-x86_64.txt
+++ b/ydb/core/protos/CMakeLists.windows-x86_64.txt
@@ -1502,6 +1502,18 @@ get_built_tool_path(
cpp_styleguide
)
get_built_tool_path(
+ TOOL_protoc_bin
+ TOOL_protoc_dependency
+ contrib/tools/protoc/bin
+ protoc
+)
+get_built_tool_path(
+ TOOL_cpp_styleguide_bin
+ TOOL_cpp_styleguide_dependency
+ contrib/tools/protoc/plugins/cpp_styleguide
+ cpp_styleguide
+)
+get_built_tool_path(
TOOL_enum_parser_bin
TOOL_enum_parser_dependency
tools/enum_parser/enum_parser
@@ -1601,6 +1613,7 @@ target_proto_messages(ydb-core-protos PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/protos/load_test.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/local.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/long_tx_service.proto
+ ${CMAKE_SOURCE_DIR}/ydb/core/protos/maintenance.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/metrics.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/minikql_engine.proto
${CMAKE_SOURCE_DIR}/ydb/core/protos/mon.proto
diff --git a/ydb/core/protos/cms.proto b/ydb/core/protos/cms.proto
index 7821d58a305..4f0e2c3a2db 100644
--- a/ydb/core/protos/cms.proto
+++ b/ydb/core/protos/cms.proto
@@ -159,6 +159,7 @@ message TPermissionRequest {
optional ETenantPolicy TenantPolicy = 8 [default = DEFAULT];
// Availability mode is not preserved for scheduled events.
optional EAvailabilityMode AvailabilityMode = 9 [default = MODE_MAX_AVAILABILITY];
+ optional string MaintenanceTaskId = 10;
}
enum EExtensionType {
diff --git a/ydb/core/protos/maintenance.proto b/ydb/core/protos/maintenance.proto
new file mode 100644
index 00000000000..b0be9a31983
--- /dev/null
+++ b/ydb/core/protos/maintenance.proto
@@ -0,0 +1,70 @@
+import "ydb/public/api/protos/draft/ydb_maintenance.proto";
+import "ydb/public/api/protos/ydb_issue_message.proto";
+import "ydb/public/api/protos/ydb_status_codes.proto";
+
+package NKikimrMaintenance;
+option java_package = "ru.yandex.kikimr.proto";
+
+message TListClusterNodesRequest {
+ optional Ydb.Maintenance.ListClusterNodesRequest Request = 1;
+}
+
+message TListClusterNodesResponse {
+ optional Ydb.StatusIds.StatusCode Status = 1;
+ repeated Ydb.Issue.IssueMessage Issues = 2;
+ optional Ydb.Maintenance.ListClusterNodesResult Result = 3;
+}
+
+message TCreateMaintenanceTaskRequest {
+ optional Ydb.Maintenance.CreateMaintenanceTaskRequest Request = 1;
+ optional string UserSID = 2;
+}
+
+message TRefreshMaintenanceTaskRequest {
+ optional Ydb.Maintenance.RefreshMaintenanceTaskRequest Request = 1;
+}
+
+message TMaintenanceTaskResponse {
+ optional Ydb.StatusIds.StatusCode Status = 1;
+ repeated Ydb.Issue.IssueMessage Issues = 2;
+ optional Ydb.Maintenance.MaintenanceTaskResult Result = 3;
+}
+
+message TGetMaintenanceTaskRequest {
+ optional Ydb.Maintenance.GetMaintenanceTaskRequest Request = 1;
+}
+
+message TGetMaintenanceTaskResponse {
+ optional Ydb.StatusIds.StatusCode Status = 1;
+ repeated Ydb.Issue.IssueMessage Issues = 2;
+ optional Ydb.Maintenance.GetMaintenanceTaskResult Result = 3;
+}
+
+message TListMaintenanceTasksRequest {
+ optional Ydb.Maintenance.ListMaintenanceTasksRequest Request = 1;
+}
+
+message TListMaintenanceTasksResponse {
+ optional Ydb.StatusIds.StatusCode Status = 1;
+ repeated Ydb.Issue.IssueMessage Issues = 2;
+ optional Ydb.Maintenance.ListMaintenanceTasksResult Result = 3;
+}
+
+message TDropMaintenanceTaskRequest {
+ optional Ydb.Maintenance.DropMaintenanceTaskRequest Request = 1;
+}
+
+message TManageMaintenanceTaskResponse {
+ optional Ydb.StatusIds.StatusCode Status = 1;
+ repeated Ydb.Issue.IssueMessage Issues = 2;
+}
+
+message TCompleteActionRequest {
+ optional Ydb.Maintenance.CompleteActionRequest Request = 1;
+}
+
+message TManageActionResponse {
+ optional Ydb.StatusIds.StatusCode Status = 1;
+ repeated Ydb.Issue.IssueMessage Issues = 2;
+ optional Ydb.Maintenance.ManageActionResult Result = 3;
+}
diff --git a/ydb/core/protos/services.proto b/ydb/core/protos/services.proto
index e60f110a0c4..241d0d5091c 100644
--- a/ydb/core/protos/services.proto
+++ b/ydb/core/protos/services.proto
@@ -987,5 +987,6 @@ message TActivity {
KQP_SOURCE_READ_ACTOR = 610;
FEDERATION_DISCOVERY = 611;
GRPC_REQ_SHARD_WRITER = 612;
+ CMS_API_ADAPTER = 613;
};
};
diff --git a/ydb/core/protos/ya.make b/ydb/core/protos/ya.make
index d0918753599..7b4f840f2e3 100644
--- a/ydb/core/protos/ya.make
+++ b/ydb/core/protos/ya.make
@@ -72,6 +72,7 @@ SRCS(
load_test.proto
local.proto
long_tx_service.proto
+ maintenance.proto
metrics.proto
minikql_engine.proto
mon.proto
diff --git a/ydb/public/api/grpc/draft/ydb_maintenance_v1.proto b/ydb/public/api/grpc/draft/ydb_maintenance_v1.proto
index 21832dd2dfe..c308e48d2ab 100644
--- a/ydb/public/api/grpc/draft/ydb_maintenance_v1.proto
+++ b/ydb/public/api/grpc/draft/ydb_maintenance_v1.proto
@@ -1,36 +1,25 @@
syntax = "proto3";
package Ydb.Maintenance.V1;
-option java_package = "com.yandex.ydb.maintenance.service.v1";
+option java_package = "com.yandex.ydb.maintenance.v1";
import "ydb/public/api/protos/draft/ydb_maintenance.proto";
service MaintenanceService {
- // List cluster hosts
+ // List cluster nodes.
rpc ListClusterNodes(ListClusterNodesRequest) returns (ListClusterNodesResponse);
- // List node devices
- rpc ListNodesDevices(ListNodesDevicesRequest) returns (ListNodesDevicesResponse);
- // Request for permissions
+ // Create maintenance task.
rpc CreateMaintenanceTask(CreateMaintenanceTaskRequest) returns (MaintenanceTaskResponse);
- // Get new results
+ // Try to perform maintenance task's actions (polling).
rpc RefreshMaintenanceTask(RefreshMaintenanceTaskRequest) returns (MaintenanceTaskResponse);
- // Get scheduled task
- rpc GetMaintenanceTaskDetails(GetMaintenanceTaskRequest) returns (GetMaintenanceTaskResponse);
- // List maintenance tasks
+ // Get detailed task information.
+ rpc GetMaintenanceTask(GetMaintenanceTaskRequest) returns (GetMaintenanceTaskResponse);
+ // List maintenance tasks.
rpc ListMaintenanceTasks(ListMaintenanceTasksRequest) returns (ListMaintenanceTasksResponse);
-
- // Drop scheduled task
+ // Drop maintenance task.
rpc DropMaintenanceTask(DropMaintenanceTaskRequest) returns (ManageMaintenanceTaskResponse);
- // Update scheduled task deadline
- rpc ProlongateMaintenanceTask(ProlongateMaintenanceTaskRequest) returns (ManageMaintenanceTaskResponse);
-
- // Mark action result as no more needed
- rpc ReleaseActionResult(ReleaseActionResultRequest) returns (ManageActionResultResponse);
- // Update action result's deadline
- rpc ProlongateActionResult(ProlongateActionResultRequest) returns (ManageActionResultResponse);
- // Get detailed action state messages. Used for debugging service tasks to find out
- // the reason why an action does not get resolution.
- rpc GetReadableActionReason(GetReadableActionReasonRequest) returns (GetReadableActionReasonResponse);
+ // Mark action as completed.
+ rpc CompleteAction(CompleteActionRequest) returns (ManageActionResponse);
}
diff --git a/ydb/public/api/protos/draft/ydb_maintenance.proto b/ydb/public/api/protos/draft/ydb_maintenance.proto
index df66577709d..14d2c437552 100644
--- a/ydb/public/api/protos/draft/ydb_maintenance.proto
+++ b/ydb/public/api/protos/draft/ydb_maintenance.proto
@@ -1,304 +1,240 @@
syntax = "proto3";
option cc_enable_arenas = true;
-package Ydb.Maintenance;
-option java_package = "com.yandex.ydb.maintenance.service";
-
+import "ydb/public/api/protos/annotations/validation.proto";
+import "ydb/public/api/protos/ydb_discovery.proto";
+import "ydb/public/api/protos/ydb_operation.proto";
import "ydb/public/api/protos/ydb_status_codes.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/timestamp.proto";
+package Ydb.Maintenance;
+option java_package = "com.yandex.ydb.maintenance";
-// Used to describe the scope of a single action
-message ActionScope {
- message PDiskId {
- uint32 node_id = 1;
- uint32 pdisk_id = 2;
+// Represents state of an abstract item: e.g. node or device.
+enum ItemState {
+ // Item's state couldn't be identified.
+ ITEM_STATE_UNSPECIFIED = 0;
+ // Item is up.
+ ITEM_STATE_UP = 1;
+ // Item is down due to planned maintenance.
+ ITEM_STATE_MAINTENANCE = 2;
+ // Item is down off-schedule.
+ ITEM_STATE_DOWN = 3;
+}
+
+message Node {
+ message StorageNode {
+ }
+
+ message DynamicNode {
+ string tenant = 1;
+ }
+
+ uint32 node_id = 1;
+ string host = 2;
+ uint32 port = 3;
+ Ydb.Discovery.NodeLocation location = 4;
+ ItemState state = 5;
+ oneof type {
+ StorageNode storage = 6;
+ DynamicNode dynamic = 7;
}
+}
+
+message ListClusterNodesRequest {
+ Ydb.Operations.OperationParams operation_params = 1;
+}
+
+message ListClusterNodesResult {
+ repeated Node nodes = 1;
+}
+
+message ListClusterNodesResponse {
+ // operation.result = ListClusterNodesResult
+ Ydb.Operations.Operation operation = 1;
+}
+
+enum AvailabilityMode {
+ AVAILABILITY_MODE_UNSPECIFIED = 0;
+ // In this mode allowed:
+ // - at most 1 unavailable disk in each storage group;
+ // - at most 1 unavailable state storage ring.
+ // For nodes tenant and cluster policies are followed.
+ AVAILABILITY_MODE_STRONG = 1;
+
+ // In this mode:
+ // - total number of an unavailable disks in each storage group
+ // shouldn't exceed number of parity parts in that group;
+ // - it is allowed to disable (n_to_select - 1) / 2 state storage rings.
+ // Nodes are handled as in strong mode.
+ AVAILABILITY_MODE_WEAK = 2;
+
+ // Ignore any storage group & state storage checks.
+ // Using this mode might cause data unavailability.
+ AVAILABILITY_MODE_FORCE = 3;
+}
+
+message MaintenanceTaskOptions {
+ // User-defined _unique_ task identifier.
+ string task_uid = 1 [(length).le = 128];
+ // User-defined description.
+ string description = 2 [(length).le = 128];
+ // Availability mode.
+ AvailabilityMode availability_mode = 3;
+ bool dry_run = 4;
+}
+
+// Used to describe the scope of a single action.
+message ActionScope {
oneof scope {
- PDiskId pdisk_id = 1;
- uint32 node_id = 2;
- string host_name = 3;
- // string RackName = 3;
- // string DataCenter = 4;
+ uint32 node_id = 1;
+ string host = 2 [(length).le = 255];
}
}
-// Taking an exclusive lock to perform maintenance
+// Taking an exclusive lock to perform maintenance.
message LockAction {
- ActionScope action_scope = 1;
+ ActionScope scope = 1;
google.protobuf.Duration duration = 2;
}
-// Will not be implemented in the 1st version
-// Switching to maintenance mode. Maintenance modes
-// can overlap with each other
-message SetMaintenanceModeAction {
- ActionScope action_scope = 1;
- bool drain_tablets = 2;
- bool evict_vdisks = 3;
- google.protobuf.Duration duration = 4;
-}
-
message Action {
oneof action {
LockAction lock_action = 1;
- SetMaintenanceModeAction set_maintainance_mode_action = 2;
}
}
+message ActionGroup {
+ repeated Action actions = 1;
+}
+
+message CreateMaintenanceTaskRequest {
+ Ydb.Operations.OperationParams operation_params = 1;
+ MaintenanceTaskOptions task_options = 2;
+ repeated ActionGroup action_groups = 3;
+}
+
+message RefreshMaintenanceTaskRequest {
+ Ydb.Operations.OperationParams operation_params = 1;
+ string task_uid = 2 [(length).le = 128];
+}
+
message ActionUid {
- string task_uid = 1;
- // Unique id within a single task. Defined by cms
- uint32 group_id = 2;
- uint32 action_id = 3;
+ string task_uid = 1 [(length).le = 128];
+ // Unique ids within a single task, assigned by the server.
+ string group_id = 2 [(length).le = 128];
+ string action_id = 3 [(length).le = 128];
}
message ActionState {
enum ActionStatus {
ACTION_STATUS_UNSPECIFIED = 0;
- ACTION_STATUS_CREATED = 1;
- ACTION_STATUS_WAITING = 2;
- ACTION_STATUS_PENDING = 3;
- // Action has granted result.
- ACTION_STATUS_RESULT_PROVIDED = 4;
- // Result withdrawn due to deadline
- ACTION_STATUS_TIMEOUT_EXPIRED = 5;
- // The user marked the action as completed
- ACTION_STATUS_FINISHED_BY_USER = 6;
+ // Action can't be performed now.
+ ACTION_STATUS_PENDING = 1;
+ // Action performed: e.g. lock is taken.
+ ACTION_STATUS_PERFORMED = 2;
}
- // The reason why the state did not update
enum ActionReason {
ACTION_REASON_UNSPECIFIED = 0;
- // Action is ok
- ACTION_REASON_OK = 1;
+ // Everything is ok.
+ ACTION_REASON_OK = 1;
// Affected storage group has too many unavailable (locked or down) vdisks.
- // Can't grant another for this availability mode
ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS = 2;
- // Blob storage group is already broken
- ACTION_REASON_STORAGE_GROUP_BROKEN = 3;
- // Too many unavailable state storage rings,
- // it is impossible to grant node from another ring
- ACTION_REASON_TOO_MANY_UNAVAILABLE_STATE_STORAGE_RINGS = 4;
- // State storage broken. Too many (more than (nToSelect - 1) / 2) unavailable rings
- ACTION_REASON_STATE_STORAGE_BROKEN = 5;
- // Issue in cluster disabled nodes limit
- ACTION_REASON_DISABLED_NODES_LIMIT_RICHED = 6;
- // Issue in tenant limits
- ACTION_REASON_TENANT_DISABLED_NODES_LIMIT_RICHED = 7;
- // Wrong request
- ACTION_REASON_WRONG_REQUEST = 8;
+ // Too many unavailable state storage rings.
+ ACTION_REASON_TOO_MANY_UNAVAILABLE_STATE_STORAGE_RINGS = 3;
+ // Too many disabled nodes (storage & dynamic) in cluster.
+ ACTION_REASON_DISABLED_NODES_LIMIT_REACHED = 4;
+ // Too many disabled dynamic nodes of specific tenant.
+ ACTION_REASON_TENANT_DISABLED_NODES_LIMIT_REACHED = 5;
+ // Wrong request.
+ ACTION_REASON_WRONG_REQUEST = 6;
+ // Too many unavailable nodes with system tablets.
+ ACTION_REASON_SYS_TABLETS_NODE_LIMIT_REACHED = 7;
}
Action action = 1;
- ActionStatus status = 2;
- ActionUid action_uid = 3;
+ ActionUid action_uid = 2;
+ ActionStatus status = 3;
ActionReason reason = 4;
- // The time when the state was assigned
- google.protobuf.Timestamp state_timestamp = 5;
- // Fields specified for RESULT_GRANTED state
- google.protobuf.Timestamp deadline = 6;
-}
-
-message ActionGroup {
- repeated Action actions = 1;
+ google.protobuf.Timestamp deadline = 5;
}
message ActionGroupStates {
repeated ActionState action_states = 1;
}
-enum AvailabilityMode {
- AVAILABILITY_MODE_UNSPECIFIED = 0;
- // By default CMS tries to guarantee cluster availability
- // by allowing at most 1 disabled disk in each storage group.
- // For compute nodes tenant and cluster policies are followed.
- // In this mode CMS allows at most 1 disable state storage ring
- AVAILABILITY_MODE_STRONG = 1;
- // This mode allows to move cluster restart/update forward
- // in case some nodes are permanently down. In this mode
- // CMS allows at most 1 locked (by permission to restart
- // node or replace device) disk in a group. But total number
- // of disabled disks for a group shouldn't exceed number
- // of parity parts in that group.
- // Compute nodes are handled as in default mode.
-
- // In this mode CMS allows (nToSelect - 1) / 2 state storage rings
- AVAILABILITY_MODE_WEAK = 2;
- // In this mode CMS allows to lock 1 disk in a group, but if it can't
- // it waits for 15 minutes at gives 1 more node.
- AVAILABILITY_MODE_SMART = 3;
- // In this mode CMS allows at most 1 locked disk in a group
- // ignoring its parity parts count. Allows to restart nodes
- // even if multiple disks of some group are down. Using
- // this mode might cause data unavailability.
- // For compute nodes CMS follows tenant and cluster policies
- // but allows to restart at least one node for tenant or
- // cluster.
- AVAILABILITY_MODE_FORCE = 4;
-}
-
-enum ItemState {
- // Device/node state couldn't be identified.
- ITEM_STATE_UNSPECIFIED = 0;
- // Device/node is up.
- ITEM_STATE_UP = 1;
- // Device/node is Up, but permission granded
- ITEM_STATE_LOCKED = 2;
- // Device/node is down due to planned restart.
- ITEM_STATE_RESTART = 3;
- // Device/node is down off-schedule.
- ITEM_STATE_DOWN = 4;
-}
-
-message ListClusterNodesRequest {}
-
-message ListClusterNodesResponse {
- message Node {
- uint32 node_id = 1;
- string data_center = 2;
- string rack = 3;
- string fqdn = 4;
- uint32 interconnect_port = 5;
- ItemState state = 6;
- optional string tenant = 7;
- bool is_storage = 8;
- bool is_dynamic = 9;
- }
-
- repeated Node nodes = 1;
-}
-
-message ListNodesDevicesRequest {
- repeated uint32 node_id = 1;
+message MaintenanceTaskResult {
+ string task_uid = 1;
+ repeated ActionGroupStates action_group_states = 2;
+ // Try again after this deadline. Specified if there are no performed actions.
+ optional google.protobuf.Timestamp retry_after = 3;
}
-message ListNodesDevicesResponse {
- message Device {
- string name = 1;
- ItemState state = 2;
- }
-
- message NodeDevices {
- uint32 node_id = 1;
- repeated Device devices = 2;
- }
-
- repeated NodeDevices nodes_devices = 1;
+message MaintenanceTaskResponse {
+ // operation.result = MaintenanceTaskResult
+ Ydb.Operations.Operation operation = 1;
}
-message MaintenanceTaskOptions {
- // The maximum number of action groups in progress at a time
- uint32 in_flight = 1;
- bool dry_run = 2;
- // Name of a task and some comment.
- // Provided for the convenience of the user.
- string name = 3;
- string comment = 4;
- // Availability mode is not preserved for scheduled events.
- AvailabilityMode availability_mode = 5;
- // User defined GUID
- string task_uid = 6;
- // Task with largest priority blocks other tasks
- // until all actions are completed. Default is 0
- int64 priority = 7;
+message GetMaintenanceTaskRequest {
+ Ydb.Operations.OperationParams operation_params = 1;
+ string task_uid = 2 [(length).le = 128];
}
-message CreateMaintenanceTaskRequest {
+message GetMaintenanceTaskResult {
MaintenanceTaskOptions task_options = 1;
- repeated ActionGroup action_groups = 2;
- // Indicates that client is no longer interested in the task after
- // the specified duration starting from the time task arrives at the cms.
- // If not specified then default duration from CMS config is used.
- google.protobuf.Duration task_timeout = 5;
-
+ repeated ActionGroupStates action_group_states = 2;
}
-// Updated action states and tryes to grand permissions
-message RefreshMaintenanceTaskRequest {
- string task_uid = 2;
-}
-
-message MaintenanceTaskResponse {
- StatusIds.StatusCode status = 1;
- string task_uid = 2;
- repeated ActionGroupStates actions_states = 3;
- // Try again after this deadline. Specified if there are no PERMISSION_GRANDED
- // actions after request
- optional google.protobuf.Timestamp deadline = 4;
+message GetMaintenanceTaskResponse {
+ // operation.result = GetMaintenanceTaskResult
+ Ydb.Operations.Operation operation = 1;
}
message ListMaintenanceTasksRequest {
- // If specified, it will return the tasks created by this user.
- // Otherwise all tasks will be returned
- optional string user = 1;
+ Ydb.Operations.OperationParams operation_params = 1;
+ // User SID (Security ID).
+ // If specified, it will return the tasks created by this user.
+ // Otherwise all tasks will be returned.
+ optional string user = 2;
}
-message ListMaintenanceTasksResponse {
+message ListMaintenanceTasksResult {
repeated string tasks_uids = 1;
}
-// Returns specified task
-message GetMaintenanceTaskRequest {
- string task_uid = 1;
-}
-
-message GetMaintenanceTaskResponse {
- MaintenanceTaskOptions task_options = 1;
- repeated ActionGroupStates actions_group_states = 2;
- google.protobuf.Timestamp task_deadline = 3;
+message ListMaintenanceTasksResponse {
+ // operation.result = ListMaintenanceTasksResult
+ Ydb.Operations.Operation operation = 1;
}
-// Drop maintenance task
message DropMaintenanceTaskRequest {
- string task_uid = 1;
-}
-
-// Extends Request deadline
-message ProlongateMaintenanceTaskRequest {
- string task_uid = 1;
- google.protobuf.Timestamp new_deadline = 2;
+ Ydb.Operations.OperationParams operation_params = 1;
+ string task_uid = 2 [(length).le = 128];
}
message ManageMaintenanceTaskResponse {
- StatusIds.StatusCode status = 1;
-}
-
-// Removes resolved result
-message ReleaseActionResultRequest {
- repeated ActionUid action_uid = 1;
+ Ydb.Operations.Operation operation = 1;
}
-// Extends results deadlines
-message ProlongateActionResultRequest {
- message ActionDuration {
- ActionUid action_uid = 1;
- google.protobuf.Timestamp new_deadline = 2;
- }
- repeated ActionDuration action_durations = 1;
+message CompleteActionRequest {
+ Ydb.Operations.OperationParams operation_params = 1;
+ repeated ActionUid action_uids = 2;
}
-message ManageActionResultResponse {
- message ResultStatus {
+message ManageActionResult {
+ message Status {
ActionUid action_uid = 1;
StatusIds.StatusCode status = 2;
}
- repeated ResultStatus result_statuses = 1;
+ repeated Status action_statuses = 1;
}
-// Getting a detailed reason why the action doesn't get a result granted state
-message GetReadableActionReasonRequest {
- repeated ActionUid action_ids = 1;
-}
-
-message GetReadableActionReasonResponse {
- message Reason {
- ActionState action_state = 1;
- string Reason = 2;
- }
- repeated Reason reasons = 1;
+message ManageActionResponse {
+ // operation.result = ManageActionResult
+ Ydb.Operations.Operation operation = 1;
}
diff --git a/ydb/services/CMakeLists.txt b/ydb/services/CMakeLists.txt
index 5822fdab09a..4ec40bc8a3e 100644
--- a/ydb/services/CMakeLists.txt
+++ b/ydb/services/CMakeLists.txt
@@ -17,6 +17,7 @@ add_subdirectory(fq)
add_subdirectory(kesus)
add_subdirectory(lib)
add_subdirectory(local_discovery)
+add_subdirectory(maintenance)
add_subdirectory(metadata)
add_subdirectory(monitoring)
add_subdirectory(persqueue_cluster_discovery)
diff --git a/ydb/services/maintenance/CMakeLists.darwin-x86_64.txt b/ydb/services/maintenance/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 00000000000..0c048850836
--- /dev/null
+++ b/ydb/services/maintenance/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,21 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(ydb-services-maintenance)
+target_link_libraries(ydb-services-maintenance PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ ydb-core-grpc_services
+ api-grpc
+ cpp-actors-core
+ cpp-grpc-server
+)
+target_sources(ydb-services-maintenance PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/services/maintenance/grpc_service.cpp
+)
diff --git a/ydb/services/maintenance/CMakeLists.linux-aarch64.txt b/ydb/services/maintenance/CMakeLists.linux-aarch64.txt
new file mode 100644
index 00000000000..31a81fd3abb
--- /dev/null
+++ b/ydb/services/maintenance/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,22 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(ydb-services-maintenance)
+target_link_libraries(ydb-services-maintenance PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ ydb-core-grpc_services
+ api-grpc
+ cpp-actors-core
+ cpp-grpc-server
+)
+target_sources(ydb-services-maintenance PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/services/maintenance/grpc_service.cpp
+)
diff --git a/ydb/services/maintenance/CMakeLists.linux-x86_64.txt b/ydb/services/maintenance/CMakeLists.linux-x86_64.txt
new file mode 100644
index 00000000000..31a81fd3abb
--- /dev/null
+++ b/ydb/services/maintenance/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,22 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(ydb-services-maintenance)
+target_link_libraries(ydb-services-maintenance PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ ydb-core-grpc_services
+ api-grpc
+ cpp-actors-core
+ cpp-grpc-server
+)
+target_sources(ydb-services-maintenance PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/services/maintenance/grpc_service.cpp
+)
diff --git a/ydb/services/maintenance/CMakeLists.txt b/ydb/services/maintenance/CMakeLists.txt
new file mode 100644
index 00000000000..f8b31df0c11
--- /dev/null
+++ b/ydb/services/maintenance/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/services/maintenance/CMakeLists.windows-x86_64.txt b/ydb/services/maintenance/CMakeLists.windows-x86_64.txt
new file mode 100644
index 00000000000..0c048850836
--- /dev/null
+++ b/ydb/services/maintenance/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,21 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(ydb-services-maintenance)
+target_link_libraries(ydb-services-maintenance PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ ydb-core-grpc_services
+ api-grpc
+ cpp-actors-core
+ cpp-grpc-server
+)
+target_sources(ydb-services-maintenance PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/services/maintenance/grpc_service.cpp
+)
diff --git a/ydb/services/maintenance/grpc_service.cpp b/ydb/services/maintenance/grpc_service.cpp
new file mode 100644
index 00000000000..a485c4af4a4
--- /dev/null
+++ b/ydb/services/maintenance/grpc_service.cpp
@@ -0,0 +1,43 @@
+#include "grpc_service.h"
+
+#include <ydb/core/grpc_services/base/base.h>
+#include <ydb/core/grpc_services/grpc_helper.h>
+#include <ydb/core/grpc_services/grpc_request_proxy.h>
+#include <ydb/core/grpc_services/rpc_calls.h>
+#include <ydb/core/grpc_services/service_maintenance.h>
+
+namespace NKikimr::NGRpcService {
+
+void TGRpcMaintenanceService::SetupIncomingRequests(NGrpc::TLoggerPtr logger) {
+ Y_UNUSED(logger);
+
+ auto getCounterBlock = CreateCounterCb(Counters_, ActorSystem_);
+ using namespace Ydb;
+
+#ifdef ADD_REQUEST
+#error ADD_REQUEST macro already defined
+#endif
+
+#define ADD_REQUEST(NAME, REQUEST, RESPONSE, CB) \
+ MakeIntrusive<TGRpcRequest<Maintenance::REQUEST, Maintenance::RESPONSE, TGRpcMaintenanceService>> \
+ (this, &Service_, CQ_, \
+ [this](NGrpc::IRequestContextBase *ctx) { \
+ NGRpcService::ReportGrpcReqToMon(*ActorSystem_, ctx->GetPeer()); \
+ ActorSystem_->Send(GRpcRequestProxyId_, \
+ new TGrpcRequestOperationCall<Maintenance::REQUEST, Maintenance::RESPONSE> \
+ (ctx, &CB, TRequestAuxSettings{RLSWITCH(TRateLimiterMode::Rps), nullptr})); \
+ }, &Maintenance::V1::MaintenanceService::AsyncService::Request ## NAME, \
+ #NAME, logger, getCounterBlock("maintenance", #NAME))->Run();
+
+ ADD_REQUEST(ListClusterNodes, ListClusterNodesRequest, ListClusterNodesResponse, DoListClusterNodes);
+ ADD_REQUEST(CreateMaintenanceTask, CreateMaintenanceTaskRequest, MaintenanceTaskResponse, DoCreateMaintenanceTask);
+ ADD_REQUEST(RefreshMaintenanceTask, RefreshMaintenanceTaskRequest, MaintenanceTaskResponse, DoRefreshMaintenanceTask);
+ ADD_REQUEST(GetMaintenanceTask, GetMaintenanceTaskRequest, GetMaintenanceTaskResponse, DoGetMaintenanceTask);
+ ADD_REQUEST(ListMaintenanceTasks, ListMaintenanceTasksRequest, ListMaintenanceTasksResponse, DoListMaintenanceTasks);
+ ADD_REQUEST(DropMaintenanceTask, DropMaintenanceTaskRequest, ManageMaintenanceTaskResponse, DoDropMaintenanceTask);
+ ADD_REQUEST(CompleteAction, CompleteActionRequest, ManageActionResponse, DoCompleteAction);
+
+#undef ADD_REQUEST
+}
+
+}
diff --git a/ydb/services/maintenance/grpc_service.h b/ydb/services/maintenance/grpc_service.h
new file mode 100644
index 00000000000..985572b7f7b
--- /dev/null
+++ b/ydb/services/maintenance/grpc_service.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <ydb/core/grpc_services/base/base_service.h>
+#include <ydb/public/api/grpc/draft/ydb_maintenance_v1.grpc.pb.h>
+
+#include <library/cpp/actors/core/actorsystem.h>
+#include <library/cpp/grpc/server/grpc_server.h>
+
+namespace NKikimr::NGRpcService {
+
+class TGRpcMaintenanceService: public TGrpcServiceBase<Ydb::Maintenance::V1::MaintenanceService> {
+public:
+ using TGrpcServiceBase<Ydb::Maintenance::V1::MaintenanceService>::TGrpcServiceBase;
+private:
+ void SetupIncomingRequests(NGrpc::TLoggerPtr logger);
+};
+
+}
diff --git a/ydb/services/maintenance/ya.make b/ydb/services/maintenance/ya.make
new file mode 100644
index 00000000000..17c28e634bd
--- /dev/null
+++ b/ydb/services/maintenance/ya.make
@@ -0,0 +1,14 @@
+LIBRARY()
+
+SRCS(
+ grpc_service.cpp
+)
+
+PEERDIR(
+ ydb/core/grpc_services
+ ydb/public/api/grpc
+ library/cpp/actors/core
+ library/cpp/grpc/server
+)
+
+END()
diff --git a/ydb/services/ya.make b/ydb/services/ya.make
index a62562b1134..a89961e2bac 100644
--- a/ydb/services/ya.make
+++ b/ydb/services/ya.make
@@ -9,6 +9,7 @@ RECURSE(
kesus
lib
local_discovery
+ maintenance
metadata
monitoring
persqueue_cluster_discovery