aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorilnaz <ilnaz@ydb.tech>2023-06-07 16:48:06 +0300
committerilnaz <ilnaz@ydb.tech>2023-06-07 16:48:06 +0300
commitf86cc6b226195a56622fbd70e0c0c9aeefb6a46f (patch)
treeb2eca66f8b6378bc53eb1fd6f5047675893540db
parentb6947f76044ef4b12e08c62ce995ebf4b815fbb7 (diff)
downloadydb-f86cc6b226195a56622fbd70e0c0c9aeefb6a46f.tar.gz
Revert "Unify checking in CMS"
This reverts commit e7dd9f1e07897af53a5fa4875fdd76b96a6cc4a6, reversing changes made to da4de01e86ba5540671187f7ad0a8d3a8f0b78fd.
-rw-r--r--ydb/core/cms/CMakeLists.darwin-x86_64.txt11
-rw-r--r--ydb/core/cms/CMakeLists.linux-aarch64.txt11
-rw-r--r--ydb/core/cms/CMakeLists.linux-x86_64.txt11
-rw-r--r--ydb/core/cms/CMakeLists.windows-x86_64.txt11
-rw-r--r--ydb/core/cms/checkers_ut.cpp294
-rw-r--r--ydb/core/cms/cluster_info.cpp113
-rw-r--r--ydb/core/cms/cluster_info.h34
-rw-r--r--ydb/core/cms/cluster_info_ut.cpp37
-rw-r--r--ydb/core/cms/cms.cpp107
-rw-r--r--ydb/core/cms/cms_impl.h3
-rw-r--r--ydb/core/cms/cms_ut.cpp90
-rw-r--r--ydb/core/cms/cms_ut_common.cpp10
-rw-r--r--ydb/core/cms/erasure_checkers.cpp482
-rw-r--r--ydb/core/cms/erasure_checkers.h144
-rw-r--r--ydb/core/cms/node_checkers.cpp280
-rw-r--r--ydb/core/cms/node_checkers.h102
-rw-r--r--ydb/core/cms/ut/CMakeLists.darwin-x86_64.txt1
-rw-r--r--ydb/core/cms/ut/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/core/cms/ut/CMakeLists.linux-x86_64.txt1
-rw-r--r--ydb/core/cms/ut/CMakeLists.windows-x86_64.txt1
-rw-r--r--ydb/public/api/protos/draft/ydb_maintenance.proto10
21 files changed, 513 insertions, 1241 deletions
diff --git a/ydb/core/cms/CMakeLists.darwin-x86_64.txt b/ydb/core/cms/CMakeLists.darwin-x86_64.txt
index 7ef104adaa9..cf839e87ff8 100644
--- a/ydb/core/cms/CMakeLists.darwin-x86_64.txt
+++ b/ydb/core/cms/CMakeLists.darwin-x86_64.txt
@@ -22,12 +22,6 @@ get_built_tool_path(
enum_parser
)
get_built_tool_path(
- TOOL_enum_parser_bin
- TOOL_enum_parser_dependency
- tools/enum_parser/enum_parser
- enum_parser
-)
-get_built_tool_path(
TOOL_rescompiler_bin
TOOL_rescompiler_dependency
tools/rescompiler/bin
@@ -99,11 +93,6 @@ generate_enum_serilization(ydb-core-cms
INCLUDE_HEADERS
ydb/core/cms/node_checkers.h
)
-generate_enum_serilization(ydb-core-cms
- ${CMAKE_SOURCE_DIR}/ydb/core/cms/erasure_checkers.h
- INCLUDE_HEADERS
- ydb/core/cms/erasure_checkers.h
-)
add_global_library_for(ydb-core-cms.global ydb-core-cms)
target_link_libraries(ydb-core-cms.global PUBLIC
diff --git a/ydb/core/cms/CMakeLists.linux-aarch64.txt b/ydb/core/cms/CMakeLists.linux-aarch64.txt
index 54abfb25b63..e1dcbee6629 100644
--- a/ydb/core/cms/CMakeLists.linux-aarch64.txt
+++ b/ydb/core/cms/CMakeLists.linux-aarch64.txt
@@ -22,12 +22,6 @@ get_built_tool_path(
enum_parser
)
get_built_tool_path(
- TOOL_enum_parser_bin
- TOOL_enum_parser_dependency
- tools/enum_parser/enum_parser
- enum_parser
-)
-get_built_tool_path(
TOOL_rescompiler_bin
TOOL_rescompiler_dependency
tools/rescompiler/bin
@@ -100,11 +94,6 @@ generate_enum_serilization(ydb-core-cms
INCLUDE_HEADERS
ydb/core/cms/node_checkers.h
)
-generate_enum_serilization(ydb-core-cms
- ${CMAKE_SOURCE_DIR}/ydb/core/cms/erasure_checkers.h
- INCLUDE_HEADERS
- ydb/core/cms/erasure_checkers.h
-)
add_global_library_for(ydb-core-cms.global ydb-core-cms)
target_link_libraries(ydb-core-cms.global PUBLIC
diff --git a/ydb/core/cms/CMakeLists.linux-x86_64.txt b/ydb/core/cms/CMakeLists.linux-x86_64.txt
index 54abfb25b63..e1dcbee6629 100644
--- a/ydb/core/cms/CMakeLists.linux-x86_64.txt
+++ b/ydb/core/cms/CMakeLists.linux-x86_64.txt
@@ -22,12 +22,6 @@ get_built_tool_path(
enum_parser
)
get_built_tool_path(
- TOOL_enum_parser_bin
- TOOL_enum_parser_dependency
- tools/enum_parser/enum_parser
- enum_parser
-)
-get_built_tool_path(
TOOL_rescompiler_bin
TOOL_rescompiler_dependency
tools/rescompiler/bin
@@ -100,11 +94,6 @@ generate_enum_serilization(ydb-core-cms
INCLUDE_HEADERS
ydb/core/cms/node_checkers.h
)
-generate_enum_serilization(ydb-core-cms
- ${CMAKE_SOURCE_DIR}/ydb/core/cms/erasure_checkers.h
- INCLUDE_HEADERS
- ydb/core/cms/erasure_checkers.h
-)
add_global_library_for(ydb-core-cms.global ydb-core-cms)
target_link_libraries(ydb-core-cms.global PUBLIC
diff --git a/ydb/core/cms/CMakeLists.windows-x86_64.txt b/ydb/core/cms/CMakeLists.windows-x86_64.txt
index 7ef104adaa9..cf839e87ff8 100644
--- a/ydb/core/cms/CMakeLists.windows-x86_64.txt
+++ b/ydb/core/cms/CMakeLists.windows-x86_64.txt
@@ -22,12 +22,6 @@ get_built_tool_path(
enum_parser
)
get_built_tool_path(
- TOOL_enum_parser_bin
- TOOL_enum_parser_dependency
- tools/enum_parser/enum_parser
- enum_parser
-)
-get_built_tool_path(
TOOL_rescompiler_bin
TOOL_rescompiler_dependency
tools/rescompiler/bin
@@ -99,11 +93,6 @@ generate_enum_serilization(ydb-core-cms
INCLUDE_HEADERS
ydb/core/cms/node_checkers.h
)
-generate_enum_serilization(ydb-core-cms
- ${CMAKE_SOURCE_DIR}/ydb/core/cms/erasure_checkers.h
- INCLUDE_HEADERS
- ydb/core/cms/erasure_checkers.h
-)
add_global_library_for(ydb-core-cms.global ydb-core-cms)
target_link_libraries(ydb-core-cms.global PUBLIC
diff --git a/ydb/core/cms/checkers_ut.cpp b/ydb/core/cms/checkers_ut.cpp
deleted file mode 100644
index 0964432c5f6..00000000000
--- a/ydb/core/cms/checkers_ut.cpp
+++ /dev/null
@@ -1,294 +0,0 @@
-#include "cluster_info.h"
-#include "erasure_checkers.h"
-#include "node_checkers.h"
-#include "ut_helpers.h"
-#include "util/string/cast.h"
-
-#include <ydb/core/protos/cms.pb.h>
-#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h>
-
-#include <library/cpp/testing/unittest/registar.h>
-
-#include <util/generic/ptr.h>
-#include <util/generic/vector.h>
-
-#include <bitset>
-#include <string>
-
-namespace NKikimr::NCmsTest {
-
-using namespace NCms;
-using namespace NKikimrCms;
-using namespace Ydb::Maintenance;
-
-TVector<TVDiskID> GenerateDefaultBlock42Group() {
- TVector<TVDiskID> group;
- for (ui32 i = 0; i < 8; ++i) {
- group.push_back(TVDiskID(0, 1, 0, i % 8, 0));
- }
- return group;
-}
-
-TVector<TVDiskID> GenerateDefaultMirror3dcGroup() {
- TVector<TVDiskID> group;
- for (ui32 i = 0; i < 9; ++i) {
- group.push_back(TVDiskID(0, 1, i / 3, i % 3, 0));
- }
- return group;
-}
-
-Y_UNIT_TEST_SUITE(TCmsCheckersTest) {
- Y_UNIT_TEST(DefaultErasureCheckerAvailabilityMode)
- {
- TDefaultErasureChecker checker(0);
-
- auto vdisks = GenerateDefaultBlock42Group();
- for (auto vdisk : vdisks) {
- checker.UpdateVDisk(vdisk, UP);
- }
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[0], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_OK);
-
- checker.UpdateVDisk(vdisks[0], DOWN);
-
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[0], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_OK);
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[1], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
-
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[0], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK);
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[1], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK);
-
- checker.LockVDisk(vdisks[0]);
-
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[0], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_ALREADY_LOCKED);
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[1], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
-
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[0], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_ALREADY_LOCKED);
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[1], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK);
- }
-
- Y_UNIT_TEST(Mirror3dcCheckerAvailabilityMode)
- {
- TMirror3dcChecker checker(0);
-
- auto vdisks = GenerateDefaultMirror3dcGroup();
- for (auto vdisk : vdisks) {
- checker.UpdateVDisk(vdisk, UP);
- }
-
- // One disabled disk for max availability
- for (ui32 i = 0; i < 9; ++i) {
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_OK);
- checker.LockVDisk(vdisks[i]);
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_ALREADY_LOCKED);
-
- for (ui32 j = 0; j < 9; ++j) {
- if (i == j)
- continue;
-
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[j], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
- }
-
- checker.UnlockVDisk(vdisks[i]);
- }
-
- for (ui32 dc = 0; dc < 3; ++dc) {
- // Minus 1 dc
- checker.LockVDisk(vdisks[dc * 3]);
- checker.LockVDisk(vdisks[dc * 3 + 1]);
- checker.LockVDisk(vdisks[dc * 3 + 2]);
-
- for (ui32 i = 0; i < 9; ++i) {
- if ((i <= dc * 3 + 2) && (i >= dc * 3)) {
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_ALREADY_LOCKED);
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_ALREADY_LOCKED);
- continue;
- }
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK);
- }
-
- // Minus 2 in dc
- for (ui32 i = 0; i < 3; ++i) {
- checker.UnlockVDisk(vdisks[dc * 3 + i]);
-
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[dc * 3 + i], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK);
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[dc * 3 + i], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
-
- for (ui32 j = 0; j < 9; ++j) {
- if ((j <= dc * 3 + 2) && (j >= dc * 3))
- continue;
-
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK);
- }
-
- checker.LockVDisk(vdisks[dc * 3 + i]);
- }
-
- checker.UnlockVDisk(vdisks[dc * 3]);
- checker.UnlockVDisk(vdisks[dc * 3 + 1]);
- checker.UnlockVDisk(vdisks[dc * 3 + 2]);
- }
-
- // Minus 1 in each dc
- for (ui32 i = 0; i < 3; ++i) {
- checker.LockVDisk(vdisks[i]);
- checker.LockVDisk(vdisks[i + 3]);
- checker.LockVDisk(vdisks[i + 6]);
-
- for (ui32 j = 0; j < 9; ++j) {
- if (j % 3 == i)
- continue;
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[j], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[j], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
- }
-
- checker.UnlockVDisk(vdisks[i]);
- checker.UnlockVDisk(vdisks[i + 3]);
- checker.UnlockVDisk(vdisks[i + 6]);
- }
-
- checker.UpdateVDisk(vdisks[0], DOWN);
-
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[0], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_OK);
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[1], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
- }
-
- Y_UNIT_TEST(DefaultErasureCheckerPriorities)
- {
- TDefaultErasureChecker checker(0);
-
- auto vdisks = GenerateDefaultBlock42Group();
- for (auto vdisk : vdisks) {
- checker.UpdateVDisk(vdisk, UP);
- }
-
- // Check one scheduled task with order
- for (ui32 i = 0; i < 8; ++i) {
- checker.EmplaceTask(vdisks[i], 0, 1, "task-1");
-
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_MAX_AVAILABILITY, 0, 2), ActionState::ACTION_REASON_LOW_PRIORITY);
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_KEEP_AVAILABLE, 0, 2), ActionState::ACTION_REASON_LOW_PRIORITY);
-
- for (ui32 j = 0; j < 8; ++j) {
- if (j == i)
- continue;
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[j], MODE_MAX_AVAILABILITY, 0, 2), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[j], MODE_KEEP_AVAILABLE, 0, 2), ActionState::ACTION_REASON_OK);
- }
-
- checker.RemoveTask("task-1");
- }
-
- // Check two scheduled task with priority and order
- checker.EmplaceTask(vdisks[1], 1, 1, "task-1");
- checker.EmplaceTask(vdisks[2], 2, 1, "task-2");
-
- // Priority is higher than task-1 but lower than task-2
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[3], MODE_MAX_AVAILABILITY, 2, 2), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[3], MODE_KEEP_AVAILABLE, 2, 2), ActionState::ACTION_REASON_OK);
-
- checker.RemoveTask("task-1");
- checker.RemoveTask("task-2");
- }
-
- Y_UNIT_TEST(Mirror3dcCheckerPriorities)
- {
- TMirror3dcChecker checker(0);
-
- auto vdisks = GenerateDefaultMirror3dcGroup();
- for (auto vdisk : vdisks) {
- checker.UpdateVDisk(vdisk, UP);
- }
-
- // task-1 > task-2 > task-3 > task-4
- checker.EmplaceTask(vdisks[0], 2, 2, "task-1");
- checker.EmplaceTask(vdisks[1], 2, 5, "task-2");
- checker.EmplaceTask(vdisks[2], 1, 2, "task-3");
- checker.EmplaceTask(vdisks[3], 1, 4, "task-4");
-
- // Highest priority
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[4], MODE_MAX_AVAILABILITY, 3, 1), ActionState::ACTION_REASON_OK);
- // Blocked by all tasks
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[4], MODE_KEEP_AVAILABLE, 1, 6), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
- // Blocked by task-1, task-2, task-3
- UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[3], MODE_KEEP_AVAILABLE, 1, 3), ActionState::ACTION_REASON_OK);
- }
-
- Y_UNIT_TEST(ClusterNodesCounter)
- {
- const ui32 nodeCount = 30;
- TClusterLimitsCounter checker(0, 0);
-
- for (ui32 i = 1; i <= nodeCount; ++i) {
- checker.UpdateNode(i, UP);
- }
-
- // Without limit allow all nodes
- for (ui32 i = 1; i < nodeCount; ++i) {
- checker.LockNode(i);
- }
-
- UNIT_ASSERT_EQUAL(checker.TryToLockNode(nodeCount, MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_OK);
- UNIT_ASSERT_EQUAL(checker.TryToLockNode(nodeCount, MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK);
-
- for (ui32 i = 1; i < nodeCount; ++i) {
- checker.UnlockNode(i);
- }
-
- // Limit 15 nodes
- checker.ApplyLimits(15, 0);
- for (ui32 i = 1; i < 15; ++i) {
- checker.LockNode(i);
- }
-
- UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_OK);
- UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK);
-
- checker.ApplyLimits(0, 50);
-
- UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_OK);
- UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK);
-
- checker.LockNode(15);
- checker.ApplyLimits(15, 0);
-
- UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED);
- UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED);
-
- checker.ApplyLimits(0, 50);
-
- for (ui32 i = 1; i <= 15; ++i) {
- checker.UnlockNode(i);
- }
-
- checker.ApplyLimits(0, 0);
- for (ui32 i = 1; i < nodeCount; ++i) {
- checker.EmplaceTask(i, 1, 3, "task-1");
- }
-
- UNIT_ASSERT_EQUAL(checker.TryToLockNode(nodeCount, MODE_MAX_AVAILABILITY, 1, 1), ActionState::ACTION_REASON_OK);
- UNIT_ASSERT_EQUAL(checker.TryToLockNode(nodeCount, MODE_KEEP_AVAILABLE, 1, 1), ActionState::ACTION_REASON_OK);
-
- UNIT_ASSERT_EQUAL(checker.TryToLockNode(nodeCount, MODE_MAX_AVAILABILITY, 1, 4), ActionState::ACTION_REASON_OK);
- UNIT_ASSERT_EQUAL(checker.TryToLockNode(nodeCount, MODE_KEEP_AVAILABLE, 1, 4), ActionState::ACTION_REASON_OK);
-
- checker.RemoveTask("task-1");
-
- checker.ApplyLimits(15, 0);
- for (ui32 i = 1; i < 15; ++i) {
- checker.EmplaceTask(i, 1, 3, "task-2");
- }
- checker.EmplaceTask(15, 1, 4, "task-3");
-
- UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_MAX_AVAILABILITY, 1, 1), ActionState::ACTION_REASON_OK);
- UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_KEEP_AVAILABLE, 1, 1), ActionState::ACTION_REASON_OK);
-
- UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_MAX_AVAILABILITY, 1, 5), ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED);
- UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_KEEP_AVAILABLE, 1, 5), ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED);
-
- checker.RemoveTask("task-2");
- checker.RemoveTask("task-3");
- }
-
-}
-}
diff --git a/ydb/core/cms/cluster_info.cpp b/ydb/core/cms/cluster_info.cpp
index 1a3c85d5b33..28e6baeddbf 100644
--- a/ydb/core/cms/cluster_info.cpp
+++ b/ydb/core/cms/cluster_info.cpp
@@ -1,11 +1,8 @@
#include "cluster_info.h"
#include "cms_state.h"
#include "node_checkers.h"
-#include "erasure_checkers.h"
-#include <ydb/core/protos/cms.pb.h>
#include <ydb/core/protos/services.pb.h>
-#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h>
#include <library/cpp/actors/core/actor.h>
#include <library/cpp/actors/core/log.h>
@@ -404,7 +401,7 @@ void TClusterInfo::SetNodeState(ui32 nodeId, NKikimrCms::EState state, const NKi
}
}
- node.UpdateNodeState();
+ node.UpdateNodeState();
}
void TClusterInfo::ClearNode(ui32 nodeId)
@@ -419,10 +416,6 @@ void TClusterInfo::ClearNode(ui32 nodeId)
node.HasTenantInfo = false;
node.State = NKikimrCms::DOWN;
node.UpdateNodeState();
-
- for (auto& vdisk : node.VDisks) {
- BSGroup(vdisk.GroupID).GroupChecker->UpdateVDisk(vdisk, DOWN);
- }
}
void TClusterInfo::ApplyInitialNodeTenants(const TActorContext& ctx, const THashMap<ui32, TString>& nodeTenants)
@@ -496,15 +489,7 @@ void TClusterInfo::UpdatePDiskState(const TPDiskID &id, const NKikimrWhiteboard:
}
auto &pdisk = PDiskRef(id);
- auto state = info.GetState() == NKikimrBlobStorage::TPDiskState::Normal ? UP : DOWN;
- pdisk.State = state;
-
- if (state == UP)
- return;
-
- for (auto &vdisk : pdisk.VDisks) {
- BSGroup(vdisk.GroupID).GroupChecker->UpdateVDisk(vdisk, state);
- }
+ pdisk.State = info.GetState() == NKikimrBlobStorage::TPDiskState::Normal ? UP : DOWN;
}
void TClusterInfo::AddVDisk(const NKikimrBlobStorage::TBaseConfig::TVSlot &info)
@@ -560,13 +545,10 @@ void TClusterInfo::UpdateVDiskState(const TVDiskID &id, const NKikimrWhiteboard:
}
auto &vdisk = VDiskRef(id);
- if (info.GetVDiskState() == NKikimrWhiteboard::OK && info.GetReplicated()) {
+ if (info.GetVDiskState() == NKikimrWhiteboard::OK && info.GetReplicated())
vdisk.State = UP;
- BSGroup(vdisk.VDiskId.GroupID).GroupChecker->UpdateVDisk(id, UP);
- } else {
+ else
vdisk.State = DOWN;
- BSGroup(vdisk.VDiskId.GroupID).GroupChecker->UpdateVDisk(id, DOWN);
- }
}
void TClusterInfo::AddBSGroup(const NKikimrBlobStorage::TBaseConfig::TGroup &info)
@@ -575,8 +557,6 @@ void TClusterInfo::AddBSGroup(const NKikimrBlobStorage::TBaseConfig::TGroup &inf
bsgroup.GroupId = info.GetGroupId();
if (info.GetErasureSpecies())
bsgroup.Erasure = {TErasureType::ErasureSpeciesByName(info.GetErasureSpecies())};
-
- bsgroup.GroupChecker = CreateStorageGroupChecker(bsgroup.Erasure.GetErasure(), bsgroup.GroupId);
for (const auto &vdisk : info.GetVSlotId()) {
TPDiskID pdiskId = {vdisk.GetNodeId(), vdisk.GetPDiskId()};
Y_VERIFY_DEBUG(HasPDisk(pdiskId));
@@ -596,10 +576,8 @@ void TClusterInfo::AddBSGroup(const NKikimrBlobStorage::TBaseConfig::TGroup &inf
bsgroup.VDisks.insert(pdisk.VSlots.at(vdisk.GetVSlotId()));
}
- for (auto &vdisk : bsgroup.VDisks) {
+ for (auto &vdisk : bsgroup.VDisks)
VDiskRef(vdisk).BSGroups.insert(bsgroup.GroupId);
- bsgroup.GroupChecker->AddVDisk(vdisk);
- }
BSGroups[bsgroup.GroupId] = std::move(bsgroup);
}
@@ -634,18 +612,12 @@ void TClusterInfo::AddPDiskTempLock(TPDiskID pdiskId, const NKikimrCms::TAction
{
auto &pdisk = PDiskRef(pdiskId);
pdisk.TempLocks.push_back({RollbackPoint, action});
-
- for (auto& vdisk : pdisk.VDisks) {
- LogManager.AddLockVDiskOperation(vdisk ,BSGroup(vdisk.GroupID).GroupChecker);
- }
}
void TClusterInfo::AddVDiskTempLock(TVDiskID vdiskId, const NKikimrCms::TAction &action)
{
auto &vdisk = VDiskRef(vdiskId);
vdisk.TempLocks.push_back({RollbackPoint, action});
-
- LogManager.AddLockVDiskOperation(vdiskId, BSGroup(vdiskId.GroupID).GroupChecker);
}
static TServices MakeServices(const NKikimrCms::TAction &action) {
@@ -678,10 +650,6 @@ void TClusterInfo::ApplyActionWithoutLog(const NKikimrCms::TAction &action)
for (const auto node : nodes) {
for (auto &nodeGroup: node->NodeGroups)
nodeGroup->LockNode(node->NodeId);
-
- for (auto vdisk : node->VDisks) {
- BSGroup(vdisk.GroupID).GroupChecker->LockVDisk(vdisk);
- }
}
}
break;
@@ -689,12 +657,12 @@ void TClusterInfo::ApplyActionWithoutLog(const NKikimrCms::TAction &action)
for (const auto &device : action.GetDevices()) {
if (HasPDisk(device)) {
auto pdisk = &PDiskRef(device);
- for (auto vdisk : pdisk->VDisks)
- BSGroup(vdisk.GroupID).GroupChecker->LockVDisk(vdisk);
+ for (auto &nodeGroup: NodeRef(pdisk->NodeId).NodeGroups)
+ nodeGroup->LockNode(pdisk->NodeId);
} else if (HasVDisk(device)) {
auto vdisk = &VDiskRef(device);
-
- BSGroup(vdisk->VDiskId.GroupID).GroupChecker->LockVDisk(vdisk->VDiskId);
+ for (auto &nodeGroup: NodeRef(vdisk->NodeId).NodeGroups)
+ nodeGroup->LockNode(vdisk->NodeId);
}
}
break;
@@ -857,9 +825,8 @@ ui64 TClusterInfo::AddTempLocks(const NKikimrCms::TAction &action, const TActorC
LogManager.ApplyAction(action, this);
- for (auto item : items) {
+ for (auto item : items)
item->TempLocks.push_back({RollbackPoint, action});
- }
return items.size();
}
@@ -874,38 +841,6 @@ ui64 TClusterInfo::ScheduleActions(const TRequestInfo &request, const TActorCont
item->ScheduleLock({action, request.Owner, request.RequestId, request.Order});
locks += items.size();
-
- switch (action.GetType()) {
- case NKikimrCms::TAction::RESTART_SERVICES:
- case NKikimrCms::TAction::SHUTDOWN_HOST:
- if (auto nodes = NodePtrs(action.GetHost(), MakeServices(action))) {
- for (const auto node : nodes) {
- for (auto& group : node->NodeGroups) {
- group->EmplaceTask(node->NodeId, 0, request.Order, request.RequestId);
- }
- for (auto &vdisk: node->VDisks) {
- BSGroup(vdisk.GroupID).GroupChecker->EmplaceTask(vdisk, 0, request.Order, request.RequestId);
- }
- }
- }
- break;
- case NKikimrCms::TAction::REPLACE_DEVICES:
- for (const auto &device : action.GetDevices()) {
- if (HasPDisk(device)) {
- auto pdisk = &PDisk(device);
- for (auto &vdisk: pdisk->VDisks) {
- BSGroup(vdisk.GroupID).GroupChecker->EmplaceTask(vdisk, 0, request.Order, request.RequestId);
- }
- } else if (HasVDisk(device)) {
- auto vdisk = &VDisk(device);
- BSGroup(vdisk->VDiskId.GroupID).GroupChecker->EmplaceTask(vdisk->VDiskId, 0, request.Order, request.RequestId);
- }
- }
- break;
-
- default:
- break;
- }
}
return locks;
@@ -915,20 +850,6 @@ void TClusterInfo::UnscheduleActions(const TString &requestId)
{
for (auto &entry : LockableItems)
entry.second->RemoveScheduledLocks(requestId);
-
- for (auto &group : BSGroups) {
- group.second.GroupChecker->RemoveTask(requestId);
- }
-
- ClusterNodes->RemoveTask(requestId);
-
- for (auto& [_, tenantChecker] : TenantNodesChecker) {
- tenantChecker->RemoveTask(requestId);
- }
-
- for (auto& [_, sysNodesCheckers] : SysNodesCheckers) {
- sysNodesCheckers->RemoveTask(requestId);
- }
}
void TClusterInfo::DeactivateScheduledLocks(ui64 order)
@@ -997,7 +918,7 @@ void TClusterInfo::GenerateTenantNodesCheckers() {
void TClusterInfo::GenerateSysTabletsNodesCheckers() {
for (auto tablet : BootstrapConfig.GetTablet()) {
- SysNodesCheckers[tablet.GetType()] = TSimpleSharedPtr<TSysTabletsNodesCounter>(new TSysTabletsNodesCounter(tablet.GetType()));
+ SysNodesCheckers[tablet.GetType()] = TSimpleSharedPtr<TSysTabletsNodesCounter>(new TSysTabletsNodesCounter(tablet.GetType()));
for (auto nodeId : tablet.GetNode()) {
NodeToTabletTypes[nodeId].push_back(tablet.GetType());
@@ -1078,10 +999,6 @@ void TOperationLogManager::ApplyAction(const NKikimrCms::TAction &action,
for (const auto node : nodes) {
for (auto &nodeGroup: node->NodeGroups)
AddNodeLockOperation(node->NodeId, nodeGroup);
-
- for (auto& vdisk : node->VDisks) {
- AddLockVDiskOperation(vdisk, clusterState->BSGroup(vdisk.GroupID).GroupChecker);
- }
}
}
break;
@@ -1089,13 +1006,13 @@ void TOperationLogManager::ApplyAction(const NKikimrCms::TAction &action,
for (const auto &device : action.GetDevices()) {
if (clusterState->HasPDisk(device)) {
auto pdisk = &clusterState->PDisk(device);
- for (auto& vdisk : pdisk->VDisks) {
- AddLockVDiskOperation(vdisk, clusterState->BSGroup(vdisk.GroupID).GroupChecker);
- }
+ for (auto &nodeGroup: clusterState->NodeRef(pdisk->NodeId).NodeGroups)
+ AddNodeLockOperation(pdisk->NodeId, nodeGroup);
} else if (clusterState->HasVDisk(device)) {
auto vdisk = &clusterState->VDisk(device);
- AddLockVDiskOperation(vdisk->VDiskId, clusterState->BSGroup(vdisk->VDiskId.GroupID).GroupChecker);
+ for (auto &nodeGroup: clusterState->NodeRef(vdisk->NodeId).NodeGroups)
+ AddNodeLockOperation(vdisk->NodeId, nodeGroup);
}
}
break;
diff --git a/ydb/core/cms/cluster_info.h b/ydb/core/cms/cluster_info.h
index 0b7253d3688..923b5a7d05f 100644
--- a/ydb/core/cms/cluster_info.h
+++ b/ydb/core/cms/cluster_info.h
@@ -3,7 +3,6 @@
#include "defs.h"
#include "config.h"
#include "downtime.h"
-#include "erasure_checkers.h"
#include "node_checkers.h"
#include "services.h"
@@ -15,7 +14,6 @@
#include <ydb/core/protos/cms.pb.h>
#include <ydb/core/protos/config.pb.h>
#include <ydb/core/protos/console.pb.h>
-#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h>
#include <library/cpp/actors/core/actor.h>
#include <library/cpp/actors/interconnect/interconnect.h>
@@ -473,8 +471,6 @@ struct TBSGroupInfo {
ui32 GroupId = 0;
TErasureType Erasure;
TSet<TVDiskID> VDisks;
-
- TSimpleSharedPtr<IStorageGroupChecker> GroupChecker;
};
/**
@@ -600,32 +596,6 @@ public:
}
};
-class TLockDiskOperation : public TOperationBase {
-private:
- TVDiskID VDiskId;
-
-private:
- TSimpleSharedPtr<IStorageGroupChecker> StorageGroupChecker;
-
-public:
- TLockDiskOperation(const TVDiskID& vdiskId, TSimpleSharedPtr<IStorageGroupChecker> checker)
- : TOperationBase(OPERATION_TYPE_LOCK_DISK)
- , VDiskId(vdiskId)
- , StorageGroupChecker(checker)
- {
- }
-
- void Do() override final {
- StorageGroupChecker->LockVDisk(VDiskId);
- }
-
- void Undo() override final {
- StorageGroupChecker->UnlockVDisk(VDiskId);
- }
-
-
-};
-
class TLogRollbackPoint : public TOperationBase {
public:
TLogRollbackPoint() : TOperationBase(OPERATION_TYPE_ROLLBACK_POINT)
@@ -654,10 +624,6 @@ public:
Log.emplace_back(new TLockNodeOperation(nodeId, nodesState))->Do();
}
- void AddLockVDiskOperation(const TVDiskID& vdiskId, TSimpleSharedPtr<IStorageGroupChecker> checker) {
- Log.emplace_back(new TLockDiskOperation(vdiskId, checker))->Do();
- }
-
void RollbackOperations() {
while (!Log.empty() && Log.back()->Type != OPERATION_TYPE_ROLLBACK_POINT) {
Log.back()->Undo();
diff --git a/ydb/core/cms/cluster_info_ut.cpp b/ydb/core/cms/cluster_info_ut.cpp
index e56a530f2b5..0e79306a3dd 100644
--- a/ydb/core/cms/cluster_info_ut.cpp
+++ b/ydb/core/cms/cluster_info_ut.cpp
@@ -292,57 +292,56 @@ Y_UNIT_TEST_SUITE(TClusterInfoTest) {
UNIT_ASSERT_VALUES_EQUAL(cluster->NodesCount("localhost"), 2);
cluster->AddPDisk(MakePDiskConfig(1, 1));
- cluster->AddPDisk(MakePDiskConfig(2, 2));
- cluster->AddVDisk(MakeVSlotConfig(1, {0, 1, 0, 0, 0}, 1, 0));
- cluster->AddVDisk(MakeVSlotConfig(2, {0, 1, 0, 1, 0}, 2, 0));
- cluster->AddBSGroup(MakeBSGroup(0, "none", 1, 1, 0, 2, 2, 0));
-
cluster->UpdatePDiskState(NCms::TPDiskID(1, 1), MakePDiskInfo(1));
UNIT_ASSERT(cluster->HasPDisk(NCms::TPDiskID(1, 1)));
UNIT_ASSERT(!cluster->HasPDisk(NCms::TPDiskID(1, 2)));
UNIT_ASSERT(!cluster->HasPDisk(NCms::TPDiskID(2, 1)));
- CheckPDisk(cluster->PDisk(NCms::TPDiskID(1, 1)), 1, 1, UP, 1);
+ CheckPDisk(cluster->PDisk(NCms::TPDiskID(1, 1)), 1, 1, UP, 0);
UNIT_ASSERT(cluster->Node(1).PDisks.contains(NCms::TPDiskID(1, 1)));
+ cluster->AddVDisk(MakeVSlotConfig(1, {0, 1, 0, 0, 0}, 1, 0));
cluster->UpdateVDiskState({0, 1, 0, 0, 0}, MakeVDiskInfo({0, 1, 0, 0, 0}, 1, 0));
UNIT_ASSERT(cluster->HasVDisk({0, 1, 0, 0, 0}));
- CheckVDisk(cluster->VDisk({0, 1, 0, 0, 0}), {0, 1, 0, 0, 0}, 1, UP, 1, 1);
+ UNIT_ASSERT(!cluster->HasVDisk({0, 1, 0, 1, 0}));
+ CheckVDisk(cluster->VDisk({0, 1, 0, 0, 0}), {0, 1, 0, 0, 0}, 1, UP, 1, 0);
UNIT_ASSERT_VALUES_EQUAL(cluster->PDisk(NCms::TPDiskID(1, 1)).VDisks.size(), 1);
UNIT_ASSERT(cluster->PDisk(NCms::TPDiskID(1, 1)).VDisks.contains(TVDiskID(0, 1, 0, 0, 0)));
+ cluster->AddPDisk(MakePDiskConfig(2, 2));
cluster->UpdatePDiskState(NCms::TPDiskID(2, 2), MakePDiskInfo(2));
UNIT_ASSERT(cluster->HasPDisk(NCms::TPDiskID(2, 2)));
- CheckPDisk(cluster->PDisk(NCms::TPDiskID(2, 2)), 2, 2, UP, 1);
+ CheckPDisk(cluster->PDisk(NCms::TPDiskID(2, 2)), 2, 2, UP, 0);
+ cluster->AddVDisk(MakeVSlotConfig(2, {0, 1, 0, 1, 0}, 2, 0));
cluster->UpdateVDiskState({0, 1, 0, 1, 0}, MakeVDiskInfo({0, 1, 0, 1, 0}, 2, 0));
UNIT_ASSERT(cluster->HasVDisk({0, 1, 0, 1, 0}));
UNIT_ASSERT(cluster->HasPDisk(NCms::TPDiskID(2, 2)));
CheckPDisk(cluster->PDisk(NCms::TPDiskID(2, 2)), 2, 2, UP, 1);
UNIT_ASSERT(cluster->PDisk(NCms::TPDiskID(2, 2)).VDisks.contains(TVDiskID(0, 1, 0, 1, 0)));
- UNIT_ASSERT(cluster->HasBSGroup(0));
- UNIT_ASSERT(!cluster->HasBSGroup(1));
- CheckBSGroup(cluster->BSGroup(0), 0, TErasureType::ErasureNone, 2,
+ cluster->AddBSGroup(MakeBSGroup(1, "none", 1, 1, 0, 2, 2, 0));
+ UNIT_ASSERT(cluster->HasBSGroup(1));
+ UNIT_ASSERT(!cluster->HasBSGroup(2));
+ CheckBSGroup(cluster->BSGroup(1), 1, TErasureType::ErasureNone, 2,
TVDiskID(0, 1, 0, 0, 0), TVDiskID(0, 1, 0, 1, 0));
- CheckVDisk(cluster->VDisk({0, 1, 0, 0, 0}), 1, 0);
- CheckVDisk(cluster->VDisk({0, 1, 0, 1, 0}), 1, 0);
+ CheckVDisk(cluster->VDisk({0, 1, 0, 0, 0}), 1, 1);
+ CheckVDisk(cluster->VDisk({0, 1, 0, 1, 0}), 1, 1);
cluster->AddPDisk(MakePDiskConfig(3, 3));
- cluster->AddVDisk(MakeVSlotConfig(3, {0, 1, 0, 2, 0}, 3, 0));
- cluster->AddBSGroup(MakeBSGroup(2, "none", 1, 1, 0, 3, 3, 0));
-
cluster->UpdatePDiskState(NCms::TPDiskID(3, 3), MakePDiskInfo(3));
UNIT_ASSERT(cluster->HasPDisk(NCms::TPDiskID(3, 3)));
- CheckPDisk(cluster->PDisk(NCms::TPDiskID(3, 3)), 3, 3, UP, 1);
+ CheckPDisk(cluster->PDisk(NCms::TPDiskID(3, 3)), 3, 3, UP, 0);
+ cluster->AddVDisk(MakeVSlotConfig(3, {0, 1, 0, 2, 0}, 3, 0));
cluster->UpdateVDiskState({0, 1, 0, 2, 0}, MakeVDiskInfo({0, 1, 0, 2, 0}, 3, 0));
UNIT_ASSERT(cluster->HasVDisk({0, 1, 0, 2, 0}));
- CheckVDisk(cluster->VDisk({0, 1, 0, 2, 0}), TVDiskID(0, 1, 0, 2, 0), 3, UP, 3, 1);
+ CheckVDisk(cluster->VDisk({0, 1, 0, 2, 0}), TVDiskID(0, 1, 0, 2, 0), 3, UP, 3, 0);
+ cluster->AddBSGroup(MakeBSGroup(2, "none", 1, 1, 0, 3, 3, 0));
UNIT_ASSERT(cluster->HasBSGroup(2));
CheckBSGroup(cluster->BSGroup(2), 2, TErasureType::ErasureNone, 2,
TVDiskID(0, 1, 0, 0, 0), TVDiskID(0, 1, 0, 2, 0));
- CheckVDisk(cluster->VDisk({0, 1, 0, 0, 0}), 2, 0, 2);
+ CheckVDisk(cluster->VDisk({0, 1, 0, 0, 0}), 2, 1, 2);
UNIT_ASSERT(cluster->HasVDisk({0, 1, 0, 2, 0}));
CheckVDisk(cluster->VDisk({0, 1, 0, 2, 0}), TVDiskID(0, 1, 0, 2, 0), 3, UP, 3, 1, 2);
diff --git a/ydb/core/cms/cms.cpp b/ydb/core/cms/cms.cpp
index c400618ccaa..211a7e0c622 100644
--- a/ydb/core/cms/cms.cpp
+++ b/ydb/core/cms/cms.cpp
@@ -17,7 +17,6 @@
#include <ydb/core/protos/config_units.pb.h>
#include <ydb/core/protos/counters_cms.pb.h>
#include <ydb/core/tablet_flat/tablet_flat_executed.h>
-#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h>
#include <library/cpp/actors/core/actor.h>
#include <library/cpp/actors/core/hfunc.h>
@@ -111,7 +110,6 @@ namespace {
bool TCms::CheckPermissionRequest(const TPermissionRequest &request,
TPermissionResponse &response,
TPermissionRequest &scheduled,
- ui64 requestOrder,
const TActorContext &ctx)
{
static THashMap<EStatusCode, ui32> CodesRate = BuildCodesRateMap({
@@ -168,7 +166,6 @@ bool TCms::CheckPermissionRequest(const TPermissionRequest &request,
opts.TenantPolicy = request.GetTenantPolicy();
opts.AvailabilityMode = request.GetAvailabilityMode();
opts.PartialPermissionAllowed = allowPartial;
- opts.Order = requestOrder;
TErrorInfo error;
@@ -338,7 +335,7 @@ bool TCms::CheckAction(const TAction &action,
case TAction::SHUTDOWN_HOST:
return CheckActionShutdownHost(action, opts, error, ctx);
case TAction::REPLACE_DEVICES:
- return CheckActionReplaceDevices(action, opts, error);
+ return CheckActionReplaceDevices(action, opts.PermissionDuration, error);
case TAction::START_SERVICES:
case TAction::STOP_SERVICES:
case TAction::ADD_HOST:
@@ -545,10 +542,9 @@ bool TCms::CheckSysTabletsNode(const TActionOptions &opts,
}
for (auto &tabletType : ClusterInfo->NodeToTabletTypes[node.NodeId]) {
- auto reason = ClusterInfo->SysNodesCheckers[tabletType]->TryToLockNode(node.NodeId, opts.AvailabilityMode, 0, opts.Order);
- if (reason != Ydb::Maintenance::ActionState::ACTION_REASON_OK) {
+ if (!ClusterInfo->SysNodesCheckers[tabletType]->TryToLockNode(node.NodeId, opts.AvailabilityMode)) {
error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = ClusterInfo->SysNodesCheckers[tabletType]->ReadableReason(node.NodeId, opts.AvailabilityMode, reason);
+ error.Reason = ClusterInfo->SysNodesCheckers[tabletType]->ReadableReason(node.NodeId, opts.AvailabilityMode);
error.Deadline = TActivationContext::Now() + State->Config.DefaultRetryTime;
return false;
}
@@ -565,27 +561,24 @@ bool TCms::TryToLockNode(const TAction& action,
TDuration duration = TDuration::MicroSeconds(action.GetDuration());
duration += opts.PermissionDuration;
- auto clusterReason = ClusterInfo->ClusterNodes->TryToLockNode(node.NodeId, opts.AvailabilityMode, 0, opts.Order);
- if (clusterReason != Ydb::Maintenance::ActionState::ACTION_REASON_OK)
+ if (!ClusterInfo->ClusterNodes->TryToLockNode(node.NodeId, opts.AvailabilityMode))
{
error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = ClusterInfo->ClusterNodes->ReadableReason(node.NodeId, opts.AvailabilityMode, clusterReason);
+ error.Reason = ClusterInfo->ClusterNodes->ReadableReason(node.NodeId, opts.AvailabilityMode);
error.Deadline = TActivationContext::Now() + State->Config.DefaultRetryTime;
return false;
}
if (node.Tenant
- && opts.TenantPolicy != NONE) {
- auto tenantReason = ClusterInfo->TenantNodesChecker[node.Tenant]->TryToLockNode(node.NodeId, opts.AvailabilityMode, 0, opts.Order);
-
- if (tenantReason != Ydb::Maintenance::ActionState::ACTION_REASON_OK) {
- error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = ClusterInfo->TenantNodesChecker[node.Tenant]->ReadableReason(node.NodeId, opts.AvailabilityMode, tenantReason);
- error.Deadline = TActivationContext::Now() + State->Config.DefaultRetryTime;
+ && opts.TenantPolicy != NONE
+ && !ClusterInfo->TenantNodesChecker[node.Tenant]->TryToLockNode(node.NodeId, opts.AvailabilityMode))
+ {
+ error.Code = TStatus::DISALLOW_TEMP;
+ error.Reason = ClusterInfo->TenantNodesChecker[node.Tenant]->ReadableReason(node.NodeId, opts.AvailabilityMode);
+ error.Deadline = TActivationContext::Now() + State->Config.DefaultRetryTime;
- return false;
- }
+ return false;
}
return true;
@@ -664,37 +657,35 @@ bool TCms::TryToLockVDisk(const TActionOptions& opts,
return false;
}
- ui32 tempLocksCount = 0;
- for (auto& vdiskId : group.VDisks) {
- if (vdisk.VDiskId == vdiskId)
- continue;
+ auto counters = CreateErasureCounter(ClusterInfo->BSGroup(groupId).Erasure.GetErasure(), vdisk, groupId);
+ counters->CountGroupState(ClusterInfo, State->Config.DefaultRetryTime, duration, error);
- if (!ClusterInfo->VDisk(vdiskId).TempLocks.empty()
- || !ClusterInfo->Node(ClusterInfo->VDisk(vdiskId).NodeId).TempLocks.empty()
- || !ClusterInfo->PDisk(ClusterInfo->VDisk(vdiskId).PDiskId).TempLocks.empty()) {
- tempLocksCount += 1;
+ switch (opts.AvailabilityMode) {
+ case MODE_MAX_AVAILABILITY:
+ if (!counters->CheckForMaxAvailability(error, defaultDeadline, opts.PartialPermissionAllowed)) {
+ return false;
}
- }
-
- if (opts.PartialPermissionAllowed && tempLocksCount == 1) {
- error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = "You cannot get two or more disks from the same group at the same time";
- error.Deadline = defaultDeadline;
- return false;
- }
-
- auto result = group.GroupChecker->TryToLockVDisk(vdisk.VDiskId, opts.AvailabilityMode, 0, opts.Order);
- bool resultIsOk = result == Ydb::Maintenance::ActionState::ACTION_REASON_OK;
-
- if (!resultIsOk && !opts.PartialPermissionAllowed && tempLocksCount > 0) {
- error.Code = TStatus::DISALLOW;
- error.Reason = "Request is incorrect. You will never get a permissions. Try with PartialPermissionAllowed";
- return false;
- }
-
- if (!resultIsOk) {
- error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = group.GroupChecker->ReadableReason(vdisk.VDiskId, opts.AvailabilityMode, result);
+ break;
+ case MODE_KEEP_AVAILABLE:
+ if (!counters->CheckForKeepAvailability(ClusterInfo, error, defaultDeadline, opts.PartialPermissionAllowed)) {
+ return false;
+ }
+ break;
+ case MODE_FORCE_RESTART:
+ if ( counters->GroupAlreadyHasLockedDisks() && opts.PartialPermissionAllowed) {
+ error.Code = TStatus::DISALLOW_TEMP;
+ error.Reason = "You cannot get two or more disks from the same group at the same time"
+ " without specifying the PartialPermissionAllowed parameter";
+ error.Deadline = defaultDeadline;
+ return false;
+ }
+ // Any number of down disks is OK for this mode.
+ break;
+ default:
+ error.Code = TStatus::WRONG_REQUEST;
+ error.Reason = Sprintf("Unknown availability mode: %s (%" PRIu32 ")",
+ EAvailabilityMode_Name(opts.AvailabilityMode).data(),
+ static_cast<ui32>(opts.AvailabilityMode));
error.Deadline = defaultDeadline;
return false;
}
@@ -1559,9 +1550,13 @@ void TCms::Handle(TEvCms::TEvPermissionRequest::TPtr &ev,
}
}
- LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::CMS, "Next request Id: " << State->NextRequestId);
-
- bool ok = CheckPermissionRequest(rec, resp->Record, scheduled.Request, State->NextRequestId, ctx);
+ ClusterInfo->LogManager.PushRollbackPoint();
+ for (const auto &scheduled_request : State->ScheduledRequests) {
+ for (auto &action : scheduled_request.second.Request.GetActions())
+ ClusterInfo->LogManager.ApplyAction(action, ClusterInfo);
+ }
+ bool ok = CheckPermissionRequest(rec, resp->Record, scheduled.Request, ctx);
+ ClusterInfo->LogManager.RollbackOperations();
// Schedule request if required.
if (rec.GetDryRun()) {
@@ -1621,12 +1616,20 @@ void TCms::Handle(TEvCms::TEvCheckRequest::TPtr &ev, const TActorContext &ctx)
auto requestStartTime = TInstant::Now();
+ ClusterInfo->LogManager.PushRollbackPoint();
+ for (const auto &scheduled_request : State->ScheduledRequests) {
+ if (scheduled_request.second.Order < request.Order) {
+ for (auto &action : scheduled_request.second.Request.GetActions())
+ ClusterInfo->LogManager.ApplyAction(action, ClusterInfo);
+ }
+ }
// Deactivate locks of this and later requests to
// avoid false conflicts.
ClusterInfo->DeactivateScheduledLocks(request.Order);
request.Request.SetAvailabilityMode(rec.GetAvailabilityMode());
- bool ok = CheckPermissionRequest(request.Request, resp->Record, scheduled.Request, request.Order, ctx);
+ bool ok = CheckPermissionRequest(request.Request, resp->Record, scheduled.Request, ctx);
ClusterInfo->ReactivateScheduledLocks();
+ ClusterInfo->LogManager.RollbackOperations();
// Schedule request if required.
if (rec.GetDryRun()) {
diff --git a/ydb/core/cms/cms_impl.h b/ydb/core/cms/cms_impl.h
index d2038ec6e6f..9cb688b125a 100644
--- a/ydb/core/cms/cms_impl.h
+++ b/ydb/core/cms/cms_impl.h
@@ -105,14 +105,12 @@ private:
NKikimrCms::ETenantPolicy TenantPolicy;
NKikimrCms::EAvailabilityMode AvailabilityMode;
bool PartialPermissionAllowed;
- ui64 Order;
TActionOptions(TDuration dur)
: PermissionDuration(dur)
, TenantPolicy(NKikimrCms::DEFAULT)
, AvailabilityMode(NKikimrCms::MODE_MAX_AVAILABILITY)
, PartialPermissionAllowed(false)
- , Order(0)
{}
};
@@ -277,7 +275,6 @@ private:
bool CheckPermissionRequest(const NKikimrCms::TPermissionRequest &request,
NKikimrCms::TPermissionResponse &response,
NKikimrCms::TPermissionRequest &scheduled,
- const ui64 requestOrder,
const TActorContext &ctx);
bool IsActionHostValid(const NKikimrCms::TAction &action, TErrorInfo &error) const;
bool ParseServices(const NKikimrCms::TAction &action, TServices &services, TErrorInfo &error) const;
diff --git a/ydb/core/cms/cms_ut.cpp b/ydb/core/cms/cms_ut.cpp
index 4ec398add89..4b7ea52ce1f 100644
--- a/ydb/core/cms/cms_ut.cpp
+++ b/ydb/core/cms/cms_ut.cpp
@@ -729,6 +729,92 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
env.CheckWalleCheckTask("task-2", TStatus::ALLOW, env.GetNodeId(1));
}
+ Y_UNIT_TEST(Notifications)
+ {
+ TCmsTestEnv env(8);
+ env.AdvanceCurrentTime(TDuration::Minutes(20));
+
+ // User is not specified.
+ env.CheckNotification(TStatus::WRONG_REQUEST, "", env.GetCurrentTime(),
+ MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(0), 60000000));
+ // Too old.
+ env.CheckNotification(TStatus::WRONG_REQUEST, "user", env.GetCurrentTime() - TDuration::Minutes(10),
+ MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(0), 60000000));
+ // Store notification user-1.
+ auto id1 = env.CheckNotification
+ (TStatus::OK, "user", env.GetCurrentTime() + TDuration::Minutes(10),
+ MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), 60000000, env.PDiskName(1, 0)));
+
+ // OK to replace the same device before notification start time.
+ env.CheckPermissionRequest("user", false, true, true, true, TStatus::ALLOW,
+ MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), 60000000, env.PDiskName(1, 0)));
+
+ // Intersects with notification.
+ env.CheckPermissionRequest("user", false, true, true, true, TStatus::DISALLOW_TEMP,
+ MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), 10 * 60000000, env.PDiskName(1, 0)));
+
+ // Store notification user-2.
+ auto id2 = env.CheckNotification(TStatus::OK, "user", env.GetCurrentTime(),
+ MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), 60000000, env.PDiskName(2, 0)));
+ // Store notificaiton user1-3.
+ auto id3 = env.CheckNotification(TStatus::OK, "user1", env.GetCurrentTime(),
+ MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), 60000000, env.PDiskName(3, 0)));
+ // Get notification with no user.
+ env.CheckGetNotification("", id1, TStatus::WRONG_REQUEST);
+ // Get user-1.
+ env.CheckGetNotification("user", id1, TStatus::OK);
+ // Get with wrong user.
+ env.CheckGetNotification("user1", id1, TStatus::WRONG_REQUEST);
+ // Get with wrong id.
+ env.CheckGetNotification("user", "wrong-id", TStatus::WRONG_REQUEST);
+ // List notifications for user.
+ env.CheckListNotifications("user", TStatus::OK, 2);
+ // List notifications for user1.
+ env.CheckListNotifications("user1", TStatus::OK, 1);
+ // List with no user.
+ env.CheckListNotifications("", TStatus::WRONG_REQUEST, 0);
+ // Reject notification with no user.
+ env.CheckRejectNotification("", id1, TStatus::WRONG_REQUEST);
+ // Reject notification with wrong user.
+ env.CheckRejectNotification("user1", id1, TStatus::WRONG_REQUEST);
+ // Reject user-1 (dry run)
+ env.CheckRejectNotification("user", id1, TStatus::OK, true);
+ // Get user-1.
+ env.CheckGetNotification("user", id1, TStatus::OK);
+ // Reject user1-3.
+ env.CheckRejectNotification("user1", id3, TStatus::OK);
+ // Reject user-2.
+ env.CheckRejectNotification("user", id2, TStatus::OK);
+ // List notifications for user.
+ env.CheckListNotifications("user", TStatus::OK, 1);
+ // List notifications for user1.
+ env.CheckListNotifications("user1", TStatus::OK, 0);
+ // Get rejected user1-3.
+ env.CheckGetNotification("user1", id3, TStatus::WRONG_REQUEST);
+ // Get rejected user-2.
+ env.CheckGetNotification("user", id2, TStatus::WRONG_REQUEST);
+ // Get user-1.
+ env.CheckGetNotification("user", id1, TStatus::OK);
+ }
+
+ Y_UNIT_TEST(PermissionDuration) {
+ TCmsTestEnv env(8);
+
+ // Store notification user-1.
+ auto id1 = env.CheckNotification
+ (TStatus::OK, "user", env.GetCurrentTime() + TDuration::Minutes(10),
+ MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), 60000000, env.PDiskName(1, 0)));
+
+ // Intersects with notification.
+ const TDuration _10minutes = TDuration::Minutes(10);
+ env.CheckPermissionRequest("user", false, true, true, true, _10minutes, TStatus::DISALLOW_TEMP,
+ MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), _10minutes.MicroSeconds(), env.PDiskName(1, 0)));
+
+ // OK with default duration.
+ env.CheckPermissionRequest("user", false, true, true, true, TStatus::ALLOW,
+ MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), 60000000, env.PDiskName(1, 0)));
+ }
+
Y_UNIT_TEST(ActionWithZeroDuration) {
TCmsTestEnv env(8);
@@ -1223,6 +1309,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(2), 60000000, "storage"));
TFakeNodeWhiteboardService::Info[env.GetNodeId(1)].Connected = false;
+ env.RestartCms();
env.CheckPermissionRequest("user", false, true, false, true, MODE_MAX_AVAILABILITY, TStatus::DISALLOW_TEMP,
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(2), 60000000, "storage"));
@@ -1230,12 +1317,14 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
TFakeNodeWhiteboardService::Info[env.GetNodeId(7)].Connected = false;
TFakeNodeWhiteboardService::Info[env.GetNodeId(4)].Connected = false;
TFakeNodeWhiteboardService::Info[env.GetNodeId(1)].Connected = false;
+ env.RestartCms();
env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::DISALLOW_TEMP,
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(5), 60000000, "storage"));
// 2dc disabled
TFakeNodeWhiteboardService::Info[env.GetNodeId(7)].Connected = true;
+ env.RestartCms();
env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::DISALLOW_TEMP,
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(7), 60000000, "storage"));
@@ -1245,6 +1334,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(5), 60000000, "storage"));
TFakeNodeWhiteboardService::Info[env.GetNodeId(5)].Connected = false;
+ env.RestartCms();
env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::DISALLOW_TEMP,
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(2), 60000000, "storage"));
diff --git a/ydb/core/cms/cms_ut_common.cpp b/ydb/core/cms/cms_ut_common.cpp
index 444ff73933a..96421cc7a50 100644
--- a/ydb/core/cms/cms_ut_common.cpp
+++ b/ydb/core/cms/cms_ut_common.cpp
@@ -316,12 +316,10 @@ void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseC
ui32 vdiskId = pdiskIndex * vdiskPerPdisk + vdiskIndex;
ui32 groupId = groupShift + vdiskId;
ui32 failRealm = 0;
- ui32 failDomain = nodeIndex % 8;
- if (useMirror3dcErasure) {
+ if (useMirror3dcErasure)
failRealm = (nodeIndex % 9) / 3;
- failDomain = (nodeIndex % 9) % 3;
- }
- TVDiskID id = {(ui8)groupId, 1, (ui8)failRealm, (ui8)failDomain, (ui8)0};
+
+ TVDiskID id = {(ui8)groupId, 1, (ui8)failRealm, (ui8)(nodeIndex % 8), (ui8)0};
auto &vdisk = node.VDiskStateInfo[id];
VDiskIDFromVDiskID(id, vdisk.MutableVDiskId());
@@ -339,7 +337,7 @@ void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseC
vdiskConfig.SetGroupId(groupId);
vdiskConfig.SetGroupGeneration(1);
vdiskConfig.SetFailRealmIdx(failRealm);
- vdiskConfig.SetFailDomainIdx(failDomain);
+ vdiskConfig.SetFailDomainIdx(nodeIndex % 8);
config->MutableGroup(groupId)->AddVSlotId()
->CopyFrom(vdiskConfig.GetVSlotId());
diff --git a/ydb/core/cms/erasure_checkers.cpp b/ydb/core/cms/erasure_checkers.cpp
index 28634af6f09..b98748c7932 100644
--- a/ydb/core/cms/erasure_checkers.cpp
+++ b/ydb/core/cms/erasure_checkers.cpp
@@ -1,361 +1,233 @@
#include "erasure_checkers.h"
-#include <ydb/core/protos/cms.pb.h>
-#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h>
-
-#include <library/cpp/actors/core/log.h>
-
-#include <util/string/cast.h>
-#include <util/system/backtrace.h>
-#include <util/system/yassert.h>
-
-#include <bitset>
-#include <sstream>
-#include <vector>
-
namespace NKikimr::NCms {
-using namespace Ydb::Maintenance;
+bool TErasureCounterBase::IsDown(const TVDiskInfo &vdisk, TClusterInfoPtr info, TDuration &retryTime, TErrorInfo &error) {
+ const auto &node = info->Node(vdisk.NodeId);
+ const auto &pdisk = info->PDisk(vdisk.PDiskId);
+ const auto defaultDeadline = TActivationContext::Now() + retryTime;
-IStorageGroupChecker::EVDiskState IStorageGroupChecker::VDiskState(NKikimrCms::EState state) {
- switch (state) {
- case NKikimrCms::UP:
- return VDISK_STATE_UP;
- case NKikimrCms::UNKNOWN:
- return VDISK_STATE_UNSPECIFIED;
- case NKikimrCms::DOWN:
- return VDISK_STATE_DOWN;
- case NKikimrCms::RESTART:
- return VDISK_STATE_RESTART;
- default:
- Y_FAIL("Unknown EState");
+ // Check we received info for PDisk.
+ if (!pdisk.NodeId) {
+ ++Down;
+ error.Reason = TStringBuilder() << "Missing info for " << pdisk.ItemName();
+ return false;
}
-}
-TSimpleSharedPtr<IStorageGroupChecker> CreateStorageGroupChecker(TErasureType::EErasureSpecies es, ui32 groupId) {
- switch (es) {
- case TErasureType::ErasureNone:
- case TErasureType::ErasureMirror3:
- case TErasureType::Erasure3Plus1Block:
- case TErasureType::Erasure3Plus1Stripe:
- case TErasureType::Erasure4Plus2Block:
- case TErasureType::Erasure3Plus2Block:
- case TErasureType::Erasure4Plus2Stripe:
- case TErasureType::Erasure3Plus2Stripe:
- case TErasureType::ErasureMirror3Plus2:
- case TErasureType::Erasure4Plus3Block:
- case TErasureType::Erasure4Plus3Stripe:
- case TErasureType::Erasure3Plus3Block:
- case TErasureType::Erasure3Plus3Stripe:
- case TErasureType::Erasure2Plus3Block:
- case TErasureType::Erasure2Plus3Stripe:
- case TErasureType::Erasure2Plus2Block:
- case TErasureType::Erasure2Plus2Stripe:
- case TErasureType::ErasureMirror3of4:
- return TSimpleSharedPtr<IStorageGroupChecker>(new TDefaultErasureChecker(groupId));
- case TErasureType::ErasureMirror3dc:
- return TSimpleSharedPtr<IStorageGroupChecker>(new TMirror3dcChecker(groupId));
- default:
- Y_FAIL("Unknown erasure type: %d", es);
- }
+ return (node.NodeId != VDisk.NodeId && node.IsDown(error, defaultDeadline))
+ || (pdisk.PDiskId != VDisk.PDiskId && pdisk.IsDown(error, defaultDeadline))
+ || vdisk.IsDown(error, defaultDeadline);
}
+bool TErasureCounterBase::IsLocked(const TVDiskInfo &vdisk, TClusterInfoPtr info, TDuration &retryTime,
+ TDuration &duration, TErrorInfo &error)
+{
+ const auto &node = info->Node(vdisk.NodeId);
+ const auto &pdisk = info->PDisk(vdisk.PDiskId);
-void TErasureCheckerBase::AddVDisk(const TVDiskID& vdiskId) {
- if (DiskToState.contains(vdiskId)) {
- return;
- }
- DiskToState[vdiskId].State = VDISK_STATE_UNSPECIFIED;
-}
-
-void TErasureCheckerBase::UpdateVDisk(const TVDiskID& vdiskId, EState state) {
- AddVDisk(vdiskId);
-
- const auto newState = VDiskState(state);
-
- // The disk is marked based on the information obtained by InfoCollector.
- // If we marked the disk DOWN, it means that there was a reason
- if (DiskToState[vdiskId].State == VDISK_STATE_DOWN
- && newState == VDISK_STATE_UP)
- return;
-
- if (DiskToState[vdiskId].State == VDISK_STATE_DOWN) {
- --DownVDisksCount;
- }
-
- if (DiskToState[vdiskId].State == VDISK_STATE_LOCKED ||
- DiskToState[vdiskId].State == VDISK_STATE_RESTART) {
- --LockedVDisksCount;
- }
-
- DiskToState[vdiskId].State = newState;
-
- if (newState == VDISK_STATE_RESTART || newState == VDISK_STATE_LOCKED) {
- ++LockedVDisksCount;
- }
-
- if (newState == VDISK_STATE_DOWN) {
- ++DownVDisksCount;
+ // Check we received info for VDisk.
+ if (!vdisk.NodeId || !vdisk.PDiskId) {
+ ++Down;
+ error.Code = TStatus::DISALLOW_TEMP;
+ error.Reason = TStringBuilder() << "Missing info for " << vdisk.ItemName();
+ return false;
}
-}
-void TErasureCheckerBase::LockVDisk(const TVDiskID& vdiskId) {
- Y_VERIFY(DiskToState.contains(vdiskId));
-
- ++LockedVDisksCount;
- if (DiskToState[vdiskId].State == VDISK_STATE_DOWN) {
- DiskToState[vdiskId].State = VDISK_STATE_RESTART;
- --DownVDisksCount;
- } else {
- DiskToState[vdiskId].State = VDISK_STATE_LOCKED;
- }
+ return node.IsLocked(error, retryTime, TActivationContext::Now(), duration)
+ || pdisk.IsLocked(error, retryTime, TActivationContext::Now(), duration)
+ || vdisk.IsLocked(error, retryTime, TActivationContext::Now(), duration);
}
-void TErasureCheckerBase::UnlockVDisk(const TVDiskID& vdiskId) {
- Y_VERIFY(DiskToState.contains(vdiskId));
-
- --LockedVDisksCount;
- if (DiskToState[vdiskId].State == VDISK_STATE_RESTART) {
- DiskToState[vdiskId].State = VDISK_STATE_DOWN;
- ++DownVDisksCount;
- } else {
- DiskToState[vdiskId].State = VDISK_STATE_UP;
- }
+bool TErasureCounterBase::GroupAlreadyHasLockedDisks() const {
+ return HasAlreadyLockedDisks;
}
-void TErasureCheckerBase::EmplaceTask(const TVDiskID &vdiskId, i32 priority,
- ui64 order, const std::string &taskUId) {
-
- auto& priorities = DiskToState[vdiskId].Priorities;
- auto it = priorities.lower_bound(TVDiskState::TTaskPriority(priority, order, ""));
-
- if (it != priorities.end() && (it->Order == order && it->Priority == priority)) {
- if (it->TaskUId == taskUId) {
- return;
+bool TErasureCounterBase::CheckForMaxAvailability(TErrorInfo &error, TInstant &defaultDeadline, bool allowPartial) const {
+ if (Locked + Down > 1) {
+ if (HasAlreadyLockedDisks && !allowPartial) {
+ error.Code = TStatus::DISALLOW;
+ error.Reason = "The request is incorrect: too many disks from the one group. "
+ "Fix the request or set PartialPermissionAllowed to true";
+ return false;
}
- Y_FAIL("Task with the same priority and order already exists");
- } else {
- priorities.emplace_hint(it, priority, order, taskUId);
- }
-}
-
-void TErasureCheckerBase::RemoveTask(const std::string &taskUId) {
- auto taskUIdsEqual = [&taskUId](const TVDiskState::TTaskPriority &p) {
- return p.TaskUId == taskUId;
- };
-
- for (auto &[vdiskId, vdiskState] : DiskToState) {
- auto it = std::find_if(vdiskState.Priorities.begin(),
- vdiskState.Priorities.end(), taskUIdsEqual);
-
- if (it == vdiskState.Priorities.end()) {
- continue;
- }
-
- vdiskState.Priorities.erase(it);
+ error.Code = TStatus::DISALLOW_TEMP;
+ error.Reason = TStringBuilder() << "Issue in affected group " << GroupId
+ << ". " << "Too many locked and down vdisks: " << Locked + Down;
+ error.Deadline = defaultDeadline;
+ return false;
}
+ return true;
}
-ActionState::ActionReason TDefaultErasureChecker::TryToLockVDisk(const TVDiskID &vdiskId, EAvailabilityMode mode, i32 priority, ui64 order) const {
- Y_VERIFY(DiskToState.contains(vdiskId));
-
- const auto& diskState = DiskToState.at(vdiskId);
-
- if (diskState.State == VDISK_STATE_RESTART
- || diskState.State == VDISK_STATE_LOCKED) {
- return ActionState::ACTION_REASON_ALREADY_LOCKED;
- }
-
- auto taskPriority = TVDiskState::TTaskPriority(priority, order, "");
- if (!diskState.Priorities.empty() && taskPriority < *diskState.Priorities.rbegin()) {
- return ActionState::ACTION_REASON_LOW_PRIORITY;
+void TDefaultErasureCounter::CountVDisk(const TVDiskInfo &vdisk, TClusterInfoPtr info, TDuration retryTime,
+ TDuration duration, TErrorInfo &error)
+{
+ Y_VERIFY_DEBUG(vdisk.VDiskId != VDisk.VDiskId);
+
+ // Check locks.
+ TErrorInfo err;
+ if (IsLocked(vdisk, info, retryTime, duration, err)) {
+ ++Locked;
+ error.Code = err.Code;
+ error.Reason = TStringBuilder() << "Issue in affected group " << GroupId
+ << ". " << err.Reason;
+ error.Deadline = Max(error.Deadline, err.Deadline);
+ return;
}
- if (mode == NKikimrCms::MODE_FORCE_RESTART) {
- return ActionState::ACTION_REASON_OK;
+ // Check if disk is down.
+ if (IsDown(vdisk, info, retryTime, err)) {
+ ++Down;
+ error.Code = err.Code;
+ error.Reason = TStringBuilder() << "Issue in affected group " << GroupId
+ << ". " << err.Reason;
+ error.Deadline = Max(error.Deadline, err.Deadline);
}
+}
- // Check how many disks are waiting for higher prioriry task to be locked
- ui32 priorityLockedCount = 0;
- for (auto &[id, vdiskState] : DiskToState) {
- if (vdiskState.State != VDISK_STATE_UP) {
- continue;
- }
-
- if (!vdiskState.Priorities.empty() && taskPriority < *vdiskState.Priorities.rbegin()) {
- ++priorityLockedCount;
+bool TDefaultErasureCounter::CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo &error,
+ TInstant &defaultDeadline, bool allowPartial) const
+{
+ if (HasAlreadyLockedDisks && allowPartial) {
+ error.Code = TStatus::DISALLOW_TEMP;
+ error.Reason = "You cannot get two or more disks from the same group at the same time"
+ " without specifying the PartialPermissionAllowed parameter";
+ error.Deadline = defaultDeadline;
+ return false;
+ }
+
+ if (Down + Locked > info->BSGroup(GroupId).Erasure.ParityParts()) {
+ if (HasAlreadyLockedDisks && !allowPartial) {
+ error.Code = TStatus::DISALLOW;
+ error.Reason = "The request is incorrect: too many disks from the one group. "
+ "Fix the request or set PartialPermissionAllowed to true";
+ return false;
}
+ error.Code = TStatus::DISALLOW_TEMP;
+ error.Reason = TStringBuilder() << "Cannot lock disk " << VDisk.PrettyItemName()
+ << ". Too many locked nodes for group " << GroupId;
+ error.Deadline = defaultDeadline;
+ return false;
}
+ return true;
+}
- ui32 disksLimit = 0;
- if (diskState.State == VDISK_STATE_DOWN) {
- disksLimit = 1;
- }
+bool TMirror3dcCounter::CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo &error,
+ TInstant &defaultDeadline, bool allowPartial) const
+{
+ Y_UNUSED(info);
- switch (mode) {
- case NKikimrCms::MODE_MAX_AVAILABILITY:
- if ((LockedVDisksCount + DownVDisksCount + priorityLockedCount) > disksLimit) {
- return ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS;
- }
- break;
- case NKikimrCms::MODE_KEEP_AVAILABLE:
- if ((LockedVDisksCount + DownVDisksCount + priorityLockedCount) >= disksLimit + 2) {
- return ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS;
- }
- break;
- default:
- Y_FAIL("Unexpected Availability mode");
+ if (HasAlreadyLockedDisks && allowPartial) {
+ error.Code = TStatus::DISALLOW_TEMP;
+ error.Reason = "You cannot get two or more disks from the same group at the same time"
+ " without specifying the PartialPermissionAllowed parameter";
+ error.Deadline = defaultDeadline;
+ return false;
}
- return ActionState::ACTION_REASON_OK;
-}
-
-ActionState::ActionReason TMirror3dcChecker::TryToLockVDisk(const TVDiskID &vdiskId, EAvailabilityMode mode, i32 priority, ui64 order) const {
- Y_VERIFY(DiskToState.contains(vdiskId));
-
- const auto& diskState = DiskToState.at(vdiskId);
- const auto taskPriority = TVDiskState::TTaskPriority(priority, order, "");
+ if (DataCenterDisabledNodes.size() <= 1)
+ return true;
- if (!diskState.Priorities.empty() && taskPriority < *diskState.Priorities.rbegin()) {
- return ActionState::ACTION_REASON_LOW_PRIORITY;
+ if (DataCenterDisabledNodes.size() == 2
+ && (DataCenterDisabledNodes.begin()->second <= 1
+ || (++DataCenterDisabledNodes.begin())->second <= 1))
+ {
+ return true;
}
- if (mode == MODE_FORCE_RESTART) {
- return ActionState::ACTION_REASON_OK;
+ if (HasAlreadyLockedDisks && !allowPartial) {
+ error.Code = TStatus::DISALLOW;
+ error.Reason = "The request is incorrect: too many disks from the one group. "
+ "Fix the request or set PartialPermissionAllowed to true";
+ return false;
}
- if (diskState.State == VDISK_STATE_LOCKED
- || diskState.State == VDISK_STATE_RESTART) {
- return ActionState::ACTION_REASON_ALREADY_LOCKED;
+ if (DataCenterDisabledNodes.size() > 2) {
+ error.Code = TStatus::DISALLOW_TEMP;
+ error.Reason = TStringBuilder() << "Issue in affected group " << GroupId
+ << ". Too many data centers have unavailable vdisks: "
+ << DataCenterDisabledNodes.size();
+ error.Deadline = defaultDeadline;
+ return false;
}
- const std::vector<std::bitset<9>> MaxOkGroups = {
- 0x1E0, 0x1D0, 0x1C8, 0x1C4, 0x1C2, 0x1C1,
- 0x138, 0xB8, 0x78, 0x3C, 0x3A, 0x39,
- 0x107, 0x87, 0x47, 0x27, 0x17, 0xF,
- };
+ error.Code = TStatus::DISALLOW_TEMP;
+ error.Reason = TStringBuilder() << "Issue in affected group " << GroupId
+ << ". Data centers have too many unavailable vdisks";
+ error.Deadline = defaultDeadline;
- ui32 priorityLockedCount = 0;
- std::bitset<9> groupState(0);
- for (auto& [id, state] : DiskToState) {
- if (id == vdiskId)
- continue;
-
- if (state.State != VDISK_STATE_UP
- || (!state.Priorities.empty() && taskPriority < *state.Priorities.rbegin())) {
- groupState |= (1 << (id.FailRealm * 3 + id.FailDomain));
- }
-
- if (!state.Priorities.empty() && taskPriority < *state.Priorities.rbegin()) {
- ++priorityLockedCount;
- }
- }
- groupState |= (1 << (vdiskId.FailRealm * 3 + vdiskId.FailDomain));
+ return false;
+}
- ui32 downVDisks = diskState.State == VDISK_STATE_DOWN ? DownVDisksCount - 1 : DownVDisksCount;
- if (mode == NKikimrCms::MODE_MAX_AVAILABILITY) {
- if ((downVDisks + LockedVDisksCount + priorityLockedCount) == 0) {
- return ActionState::ACTION_REASON_OK;
- }
- return ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS;
+void TMirror3dcCounter::CountVDisk(const TVDiskInfo &vdisk, TClusterInfoPtr info, TDuration retryTime,
+ TDuration duration, TErrorInfo &error)
+{
+ Y_VERIFY_DEBUG(vdisk.VDiskId != VDisk.VDiskId);
+
+ // Check locks.
+ TErrorInfo err;
+ if (IsLocked(vdisk, info, retryTime, duration, err)
+ || IsDown(vdisk, info, retryTime, err)) {
+ error.Code = err.Code;
+ error.Reason = TStringBuilder() << "Issue in affected group " << GroupId
+ << ". " << err.Reason;
+ error.Deadline = Max(error.Deadline, err.Deadline);
+ ++Locked;
+ ++DataCenterDisabledNodes[vdisk.VDiskId.FailRealm];
}
+}
- size_t minCount = 9;
- for (auto okGroup : MaxOkGroups) {
- auto xoredState = (~okGroup) & groupState;
- minCount = std::min(minCount, xoredState.count());
+void TMirror3dcCounter::CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo &error) {
+ for (const auto &vdId : info->BSGroup(GroupId).VDisks) {
+ if (vdId != VDisk.VDiskId)
+ CountVDisk(info->VDisk(vdId), info, retryTime, duration, error);
}
+ ++Locked;
+ ++DataCenterDisabledNodes[VDisk.VDiskId.FailRealm];
- if (minCount > 0) {
- return ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS;
+ if (Locked && error.Code == TStatus::DISALLOW) {
+ HasAlreadyLockedDisks = true;
}
-
- return ActionState::ACTION_REASON_OK;
}
-std::string TDefaultErasureChecker::ReadableReason(const TVDiskID &vdiskId,
- EAvailabilityMode mode, ActionState::ActionReason reason) const {
- std::stringstream readableReason;
-
- if (reason == ActionState::ACTION_REASON_OK) {
- readableReason << "Action is OK";
- return readableReason.str();
+void TDefaultErasureCounter::CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo &error) {
+ for (const auto &vdId : info->BSGroup(GroupId).VDisks) {
+ if (vdId != VDisk.VDiskId)
+ CountVDisk(info->VDisk(vdId), info, retryTime, duration, error);
}
-
- readableReason << "Cannot lock vdisk" << vdiskId.ToString() << ". ";
-
- switch (reason) {
- case ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS:
- readableReason << "Group " << GroupId
- << " has too many unavailable vdisks. "
- << "Down disks count: " << DownVDisksCount
- << ". Locked disks count: " << LockedVDisksCount;
-
- if (mode == NKikimrCms::MODE_KEEP_AVAILABLE) {
- readableReason << ". Limit of unavailable disks for mode " << NKikimrCms::EAvailabilityMode_Name(mode)
- << " is " << 2;
- }
- if (mode == NKikimrCms::MODE_MAX_AVAILABILITY) {
- readableReason << ". Limit of unavailable disks for mode " << NKikimrCms::EAvailabilityMode_Name(mode)
- << " is " << 1;
- }
- break;
- case ActionState::ACTION_REASON_ALREADY_LOCKED:
- // TODO:: add info about lock id
- readableReason << "Disk is already locked";
- break;
- case ActionState::ACTION_REASON_LOW_PRIORITY:
- // TODO:: add info about task with higher priority
- readableReason << "Task with higher priority in progress";
- break;
- default:
- Y_FAIL("Unexpected Reason");
+ if (Locked && error.Code == TStatus::DISALLOW) {
+ HasAlreadyLockedDisks = true;
}
-
- return readableReason.str();
+ ++Locked;
}
-std::string TMirror3dcChecker::ReadableReason(const TVDiskID &vdiskId,
- EAvailabilityMode mode, ActionState::ActionReason reason) const {
- std::stringstream readableReason;
-
- if (reason == ActionState::ACTION_REASON_OK) {
- readableReason << "Action is OK";
- return readableReason.str();
- }
-
- readableReason << "Cannot lock vdisk" << vdiskId.ToString() << ". ";
-
- switch (reason) {
- case ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS:
- readableReason << "Group " << GroupId
- << " has too many unavailable vdisks. "
- << "Down disks count: " << DownVDisksCount
- << ". Locked disks count: " << LockedVDisksCount;
-
- if (mode == NKikimrCms::MODE_KEEP_AVAILABLE) {
- readableReason << ". Limit of unavailable disks for mode " << NKikimrCms::EAvailabilityMode_Name(mode)
- << " is 1";
- }
- if (mode == NKikimrCms::MODE_MAX_AVAILABILITY) {
- readableReason << ". Limit of unavailable disks for mode " << NKikimrCms::EAvailabilityMode_Name(mode)
- << "4, 3 of which are in the same data center";
- }
- break;
- case ActionState::ACTION_REASON_ALREADY_LOCKED:
- // TODO:: add info about lock id
- readableReason << "Disk is already locked";
- break;
- case ActionState::ACTION_REASON_LOW_PRIORITY:
- // TODO:: add info about task with higher priority
- readableReason << "Task with higher priority in progress";
- break;
+TSimpleSharedPtr<IErasureCounter> CreateErasureCounter(TErasureType::EErasureSpecies es, const TVDiskInfo &vdisk, ui32 groupId) {
+ switch (es) {
+ case TErasureType::ErasureNone:
+ case TErasureType::ErasureMirror3:
+ case TErasureType::Erasure3Plus1Block:
+ case TErasureType::Erasure3Plus1Stripe:
+ case TErasureType::Erasure4Plus2Block:
+ case TErasureType::Erasure3Plus2Block:
+ case TErasureType::Erasure4Plus2Stripe:
+ case TErasureType::Erasure3Plus2Stripe:
+ case TErasureType::ErasureMirror3Plus2:
+ case TErasureType::Erasure4Plus3Block:
+ case TErasureType::Erasure4Plus3Stripe:
+ case TErasureType::Erasure3Plus3Block:
+ case TErasureType::Erasure3Plus3Stripe:
+ case TErasureType::Erasure2Plus3Block:
+ case TErasureType::Erasure2Plus3Stripe:
+ case TErasureType::Erasure2Plus2Block:
+ case TErasureType::Erasure2Plus2Stripe:
+ case TErasureType::ErasureMirror3of4:
+ return TSimpleSharedPtr<IErasureCounter>(new TDefaultErasureCounter(vdisk, groupId));
+ case TErasureType::ErasureMirror3dc:
+ return TSimpleSharedPtr<IErasureCounter>(new TMirror3dcCounter(vdisk, groupId));
default:
- Y_FAIL("Unexpected Reason");
+ Y_FAIL("Unknown erasure type: %d", es);
}
-
- return readableReason.str();
}
} // namespace NKikimr::NCms
diff --git a/ydb/core/cms/erasure_checkers.h b/ydb/core/cms/erasure_checkers.h
index c958630e273..0c0de0be766 100644
--- a/ydb/core/cms/erasure_checkers.h
+++ b/ydb/core/cms/erasure_checkers.h
@@ -1,133 +1,79 @@
#pragma once
#include "defs.h"
+#include "cluster_info.h"
-#include <ydb/core/blobstorage/base/blobstorage_vdiskid.h>
#include <ydb/core/erasure/erasure.h>
#include <ydb/core/protos/cms.pb.h>
-#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h>
-
-#include <util/generic/queue.h>
-#include <util/system/compiler.h>
-
-#include <algorithm>
-#include <functional>
-#include <queue>
-#include <string>
namespace NKikimr::NCms {
using namespace NKikimrCms;
-class IStorageGroupChecker {
-public:
- enum EVDiskState : ui32 {
- VDISK_STATE_UNSPECIFIED /* "Unspecified" */,
- VDISK_STATE_UP /* "Up" */,
- VDISK_STATE_LOCKED /* "Locked" */,
- VDISK_STATE_RESTART /* "Restart" */,
- VDISK_STATE_DOWN /* "Down" */,
- VDISK_STATE_SCHEDULED_LOCKED /* "Scheduled locked" */
- };
-
-protected:
- EVDiskState VDiskState(NKikimrCms::EState state);
-
+class IErasureCounter {
public:
- virtual ~IStorageGroupChecker() = default;
-
- virtual void AddVDisk(const TVDiskID& vdiskId) = 0;
- virtual void UpdateVDisk(const TVDiskID& vdiskId, EState state) = 0;
-
- virtual void LockVDisk(const TVDiskID& vdiskId) = 0;
- virtual void UnlockVDisk(const TVDiskID& vdiskId) = 0;
+ virtual ~IErasureCounter() = default;
- virtual void EmplaceTask(const TVDiskID& vdiskId, i32 priority, ui64 order, const std::string& taskUId) = 0;
- virtual void RemoveTask(const std::string& taskUId) = 0;
-
- virtual Ydb::Maintenance::ActionState::ActionReason TryToLockVDisk(const TVDiskID& vdiskId, EAvailabilityMode mode, i32 priority, ui64 order) const = 0;
- virtual std::string ReadableReason(const TVDiskID& vdiskId, EAvailabilityMode mode, Ydb::Maintenance::ActionState::ActionReason reason) const = 0;
+ virtual bool GroupAlreadyHasLockedDisks() const = 0;
+ virtual bool CheckForMaxAvailability(TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const = 0;
+ virtual bool CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const = 0;
+ virtual void CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo& error) = 0;
+ virtual void CountVDisk(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo& error) = 0;
};
-class TErasureCheckerBase : public IStorageGroupChecker {
+class TErasureCounterBase: public IErasureCounter {
protected:
- /** Structure to hold information about vdisk state and priorities and orders of some task.
- *
- * Requests with equal priority are processed in the order of arrival at CMS.
- */
- struct TVDiskState {
- public:
- struct TTaskPriority {
- i32 Priority;
- ui64 Order;
- std::string TaskUId;
-
- explicit TTaskPriority(i32 priority, ui64 order, const std::string& taskUId)
- : Priority(priority)
- , Order(order)
- , TaskUId(taskUId)
- {}
-
- bool operator<(const TTaskPriority& rhs) const {
- return Priority < rhs.Priority || (Priority == rhs.Priority && Order > rhs.Order);
- }
- };
- public:
- EVDiskState State;
- std::set<TTaskPriority> Priorities;
- };
+ ui32 Down;
+ ui32 Locked;
+ const TVDiskInfo& VDisk;
+ const ui32 GroupId;
+ bool HasAlreadyLockedDisks;
protected:
- ui32 GroupId;
-
- THashMap<TVDiskID, TVDiskState> DiskToState;
- ui32 DownVDisksCount;
- ui32 LockedVDisksCount;
+ bool IsDown(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration& retryTime, TErrorInfo& error);
+ bool IsLocked(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration& retryTime, TDuration& duration, TErrorInfo& error);
public:
- explicit TErasureCheckerBase(ui32 groupId)
- : GroupId(groupId)
- , DownVDisksCount(0)
- , LockedVDisksCount(0)
+ TErasureCounterBase(const TVDiskInfo& vdisk, ui32 groupId)
+ : Down(0)
+ , Locked(0)
+ , VDisk(vdisk)
+ , GroupId(groupId)
+ , HasAlreadyLockedDisks(false)
{
}
- virtual ~TErasureCheckerBase() = default;
- void AddVDisk(const TVDiskID& vdiskId) override final;
- void UpdateVDisk(const TVDiskID& vdiskId, EState state) override final;
-
- void LockVDisk(const TVDiskID& vdiskId) override final;
- void UnlockVDisk(const TVDiskID& vdiskId) override final;
-
- void EmplaceTask(const TVDiskID &vdiskId, i32 priority, ui64 order,
- const std::string &taskUId) override final;
- void RemoveTask(const std::string &taskUId) override final;
+ bool GroupAlreadyHasLockedDisks() const final;
+ bool CheckForMaxAvailability(TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const final;
};
-class TDefaultErasureChecker : public TErasureCheckerBase {
+class TDefaultErasureCounter: public TErasureCounterBase {
public:
- explicit TDefaultErasureChecker(ui32 groupId)
- : TErasureCheckerBase(groupId)
- {}
-
- virtual Ydb::Maintenance::ActionState::ActionReason TryToLockVDisk(const TVDiskID& vdiskId, EAvailabilityMode mode, i32 priority, ui64 order) const override final;
-
- virtual std::string ReadableReason(const TVDiskID &vdiskId,
- EAvailabilityMode mode, Ydb::Maintenance::ActionState::ActionReason reason) const override final;
+ TDefaultErasureCounter(const TVDiskInfo& vdisk, ui32 groupId)
+ : TErasureCounterBase(vdisk, groupId)
+ {
+ }
+
+ void CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo &error) override;
+ bool CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const override;
+ void CountVDisk(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo& error) override;
};
-class TMirror3dcChecker : public TErasureCheckerBase {
-public:
- explicit TMirror3dcChecker(ui32 groupId)
- : TErasureCheckerBase(groupId)
- {}
+class TMirror3dcCounter: public TErasureCounterBase {
+private:
+ THashMap<ui8, ui32> DataCenterDisabledNodes;
- virtual Ydb::Maintenance::ActionState::ActionReason TryToLockVDisk(const TVDiskID& vdiskId, EAvailabilityMode mode, i32 priority, ui64 order) const override final;
+public:
+ TMirror3dcCounter(const TVDiskInfo& vdisk, ui32 groupId)
+ : TErasureCounterBase(vdisk, groupId)
+ {
+ }
- virtual std::string ReadableReason(const TVDiskID &vdiskId,
- EAvailabilityMode mode, Ydb::Maintenance::ActionState::ActionReason reason) const override final;
+ void CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo &error) override;
+ bool CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const override;
+ void CountVDisk(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo& error) override;
};
-TSimpleSharedPtr<IStorageGroupChecker> CreateStorageGroupChecker(TErasureType::EErasureSpecies es, ui32 groupId);
+TSimpleSharedPtr<IErasureCounter> CreateErasureCounter(TErasureType::EErasureSpecies es, const TVDiskInfo& vdisk, ui32 groupId);
} // namespace NKikimr::NCms
diff --git a/ydb/core/cms/node_checkers.cpp b/ydb/core/cms/node_checkers.cpp
index 9be56b669a4..8a857b0a63f 100644
--- a/ydb/core/cms/node_checkers.cpp
+++ b/ydb/core/cms/node_checkers.cpp
@@ -1,6 +1,4 @@
#include "node_checkers.h"
-#include "util/system/yassert.h"
-#include "ydb/public/api/protos/draft/ydb_maintenance.pb.h"
#include <ydb/core/protos/cms.pb.h>
@@ -11,8 +9,6 @@ namespace NKikimr::NCms {
#define NCH_LOG_D(stream) LOG_DEBUG_S (*TlsActivationContext, NKikimrServices::CMS, "[Nodes Counter] " << stream)
#define NCH_LOG_T(stream) LOG_TRACE_S (*TlsActivationContext, NKikimrServices::CMS, "[Nodes Counter] " << stream)
-using namespace Ydb::Maintenance;
-
TNodesLimitsCounterBase::ENodeState INodesChecker::NodeState(NKikimrCms::EState state) {
switch (state) {
case NKikimrCms::UP:
@@ -32,7 +28,7 @@ void TNodesCounterBase::AddNode(ui32 nodeId) {
if (NodeToState.contains(nodeId)) {
return;
}
- NodeToState[nodeId].State = NODE_STATE_UNSPECIFIED;
+ NodeToState[nodeId] = NODE_STATE_UNSPECIFIED;
}
void TNodesCounterBase::UpdateNode(ui32 nodeId, NKikimrCms::EState state) {
@@ -40,17 +36,17 @@ void TNodesCounterBase::UpdateNode(ui32 nodeId, NKikimrCms::EState state) {
AddNode(nodeId);
}
- if (NodeToState[nodeId].State == NODE_STATE_DOWN) {
+ if (NodeToState[nodeId] == NODE_STATE_DOWN) {
--DownNodesCount;
}
- if (NodeToState[nodeId].State == NODE_STATE_LOCKED ||
- NodeToState[nodeId].State == NODE_STATE_RESTART) {
+ if (NodeToState[nodeId] == NODE_STATE_LOCKED ||
+ NodeToState[nodeId] == NODE_STATE_RESTART) {
--LockedNodesCount;
}
const auto nodeState = NodeState(state);
- NodeToState[nodeId].State = nodeState;
+ NodeToState[nodeId] = nodeState;
if (nodeState == NODE_STATE_RESTART || nodeState == NODE_STATE_LOCKED) {
++LockedNodesCount;
@@ -65,11 +61,11 @@ void TNodesCounterBase::LockNode(ui32 nodeId) {
Y_VERIFY(NodeToState.contains(nodeId));
++LockedNodesCount;
- if (NodeToState[nodeId].State == NODE_STATE_DOWN) {
- NodeToState[nodeId].State = NODE_STATE_RESTART;
+ if (NodeToState[nodeId] == NODE_STATE_DOWN) {
+ NodeToState[nodeId] = NODE_STATE_RESTART;
--DownNodesCount;
} else {
- NodeToState[nodeId].State = NODE_STATE_LOCKED;
+ NodeToState[nodeId] = NODE_STATE_LOCKED;
}
}
@@ -77,144 +73,89 @@ void TNodesCounterBase::UnlockNode(ui32 nodeId) {
Y_VERIFY(NodeToState.contains(nodeId));
--LockedNodesCount;
- if (NodeToState[nodeId].State == NODE_STATE_RESTART) {
- NodeToState[nodeId].State = NODE_STATE_DOWN;
+ if (NodeToState[nodeId] == NODE_STATE_RESTART) {
+ NodeToState[nodeId] = NODE_STATE_DOWN;
++DownNodesCount;
} else {
- NodeToState[nodeId].State = NODE_STATE_UP;
- }
-}
-
-void TNodesCounterBase::EmplaceTask(const ui32 nodeId, i32 priority, ui64 order, const std::string& taskUId) {
- auto& priorities = NodeToState[nodeId].Priorities;
- auto it = priorities.lower_bound(TNodeState::TTaskPriority(priority, order, ""));
-
- if (it != priorities.end() && (it->Order == order && it->Priority == priority)) {
- if (it->TaskUId == taskUId) {
- return;
- }
- Y_FAIL("Task with the same priority and order already exists");
- } else {
- priorities.emplace_hint(it, priority, order, taskUId);
- }
-
- NodesWithScheduledTasks.insert(nodeId);
-}
-
-void TNodesCounterBase::RemoveTask(const std::string& taskUId) {
- auto taskUIdsEqual = [&taskUId](const TNodeState::TTaskPriority &p) {
- return p.TaskUId == taskUId;
- };
-
- TVector<ui32> NodesToRemove;
- for (auto nodeId : NodesWithScheduledTasks) {
- auto& nodeState = NodeToState[nodeId];
- auto it = std::find_if(nodeState.Priorities.begin(),
- nodeState.Priorities.end(), taskUIdsEqual);
- if (it == nodeState.Priorities.end()) {
- continue;
- }
-
- nodeState.Priorities.erase(it);
-
- if (nodeState.Priorities.empty()) {
- NodesToRemove.push_back(nodeId);
- }
- }
-
- for (auto nodeId : NodesToRemove) {
- NodesWithScheduledTasks.erase(nodeId);
+ NodeToState[nodeId] = NODE_STATE_UP;
}
}
-ActionState::ActionReason TNodesLimitsCounterBase::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, i32 priority, ui64 order) const {
+bool TNodesLimitsCounterBase::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const {
Y_VERIFY(NodeToState.contains(nodeId));
+ auto nodeState = NodeToState.at(nodeId);
- const auto& nodeState = NodeToState.at(nodeId);
- const auto taskPriority = TNodeState::TTaskPriority(priority, order, "");
-
- if (!nodeState.Priorities.empty() && (taskPriority < *nodeState.Priorities.rbegin())) {
- return ActionState::ACTION_REASON_LOW_PRIORITY;
- }
+ bool isForceRestart = mode == NKikimrCms::MODE_FORCE_RESTART;
- if (nodeState.State == NODE_STATE_RESTART ||
- nodeState.State == NODE_STATE_LOCKED ||
- nodeState.State == NODE_STATE_UNSPECIFIED) {
+ NCH_LOG_D("Checking Node: "
+ << nodeId << ", with state: " << ToString(nodeState)
+ << ", with limit: " << DisabledNodesLimit
+ << ", with ratio limit: " << DisabledNodesRatioLimit
+ << ", locked nodes: " << LockedNodesCount
+ << ", down nodes: " << DownNodesCount);
- return ActionState::ACTION_REASON_ALREADY_LOCKED;
+ // Allow to maintain down/unavailable node
+ if (nodeState == NODE_STATE_DOWN) {
+ return true;
}
- ui32 priorityLockedCount = 0;
- for (auto id : NodesWithScheduledTasks) {
- Y_VERIFY(!NodeToState.at(id).Priorities.empty());
+ if (nodeState == NODE_STATE_RESTART ||
+ nodeState == NODE_STATE_LOCKED ||
+ nodeState == NODE_STATE_UNSPECIFIED) {
- if (taskPriority < *NodeToState.at(id).Priorities.rbegin()) {
- ++priorityLockedCount;
- }
+ return false;
}
- ui32 downNodes = nodeState.State == NODE_STATE_DOWN ? DownNodesCount - 1 : DownNodesCount;
// Always allow at least one node
- if (LockedNodesCount + downNodes + priorityLockedCount == 0) {
- return ActionState::ACTION_REASON_OK;
+ if (LockedNodesCount + DownNodesCount == 0) {
+ return true;
}
- bool isForceRestart = mode == NKikimrCms::MODE_FORCE_RESTART;
-
if (isForceRestart && !LockedNodesCount) {
- return ActionState::ACTION_REASON_OK;
+ return true;
}
if (DisabledNodesLimit > 0 &&
- (LockedNodesCount + downNodes + priorityLockedCount + 1 > DisabledNodesLimit)) {
- return ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED;
+ (LockedNodesCount + DownNodesCount + 1 > DisabledNodesLimit)) {
+ return false;
}
if (DisabledNodesRatioLimit > 0 &&
- ((LockedNodesCount + downNodes + priorityLockedCount + 1) * 100 > (NodeToState.size() * DisabledNodesRatioLimit))) {
- return ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED;
+ ((LockedNodesCount + DownNodesCount + 1) * 100 >
+ (NodeToState.size() * DisabledNodesRatioLimit))) {
+ return false;
}
- return ActionState::ACTION_REASON_OK;
+ return true;
}
-ActionState::ActionReason TSysTabletsNodesCounter::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, i32 priority, ui64 order) const {
+bool TSysTabletsNodesCounter::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const {
Y_VERIFY(NodeToState.contains(nodeId));
+ auto nodeState = NodeToState.at(nodeId);
- const auto& nodeState = NodeToState.at(nodeId);
- const auto taskPriority = TNodeState::TTaskPriority(priority, order, "");
+ NCH_LOG_D("Checking limits for sys tablet: " << NKikimrConfig::TBootstrap_ETabletType_Name(TabletType)
+ << ", on node: " << nodeId
+ << ", with state: " << ToString(nodeState)
+ << ", locked nodes: " << LockedNodesCount
+ << ", down nodes: " << DownNodesCount);
- if (!nodeState.Priorities.empty() && (taskPriority < *nodeState.Priorities.rbegin())) {
- return ActionState::ACTION_REASON_LOW_PRIORITY;
- }
-
- if (nodeState.State == NODE_STATE_RESTART ||
- nodeState.State == NODE_STATE_LOCKED ||
- nodeState.State == NODE_STATE_UNSPECIFIED) {
-
- return ActionState::ACTION_REASON_ALREADY_LOCKED;
- }
-
- ui32 priorityLockedCount = 0;
- for (auto id : NodesWithScheduledTasks) {
- Y_VERIFY(!NodeToState.at(id).Priorities.empty());
+ if (nodeState == NODE_STATE_RESTART ||
+ nodeState == NODE_STATE_LOCKED ||
+ nodeState == NODE_STATE_UNSPECIFIED) {
- if (taskPriority < *NodeToState.at(id).Priorities.rbegin()) {
- ++priorityLockedCount;
- }
+ return false;
}
- ui32 downNodes = nodeState.State == NODE_STATE_DOWN ? DownNodesCount - 1 : DownNodesCount;
ui32 tabletNodes = NodeToState.size();
switch (mode) {
case NKikimrCms::MODE_MAX_AVAILABILITY:
- if (tabletNodes > 1 && (downNodes + LockedNodesCount + priorityLockedCount + 1) * 2 > tabletNodes){
- return ActionState::ACTION_REASON_SYS_TABLETS_NODE_LIMIT_REACHED;
+ if (tabletNodes > 1 && (DownNodesCount + LockedNodesCount + 1) * 2 > tabletNodes){
+ return false;
}
break;
case NKikimrCms::MODE_KEEP_AVAILABLE:
- if (tabletNodes > 1 && (downNodes + LockedNodesCount + priorityLockedCount + 1) > tabletNodes - 1) {
- return ActionState::ACTION_REASON_SYS_TABLETS_NODE_LIMIT_REACHED;
+ if (tabletNodes > 1 && (DownNodesCount + LockedNodesCount + 1) > tabletNodes - 1) {
+ return false;
}
break;
case NKikimrCms::MODE_FORCE_RESTART:
@@ -223,124 +164,7 @@ ActionState::ActionReason TSysTabletsNodesCounter::TryToLockNode(ui32 nodeId, NK
Y_FAIL("Unknown availability mode");
}
- return ActionState::ACTION_REASON_OK;
+ return true;
}
-std::string TTenantLimitsCounter::ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode,
- ActionState::ActionReason reason) const {
- Y_UNUSED(mode);
-
- std::stringstream readableReason;
-
- if (reason == ActionState::ACTION_REASON_OK) {
- readableReason << "Action is OK";
- return readableReason.str();
- }
-
- readableReason << "Cannot lock node: " << nodeId;
-
- switch (reason) {
- case ActionState::ACTION_REASON_ALREADY_LOCKED:
- readableReason << "Node is already locked";
- break;
- case ActionState::ACTION_REASON_LOW_PRIORITY:
- readableReason << "Task with higher priority in progress: " << (*NodeToState.at(nodeId).Priorities.rbegin()).TaskUId;
- break;
- case ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED:
- readableReason << ". Too many locked nodes for tenant " << TenantName
- << "; locked: " << LockedNodesCount
- << "; down: " << DownNodesCount
- << "; total: " << NodeToState.size()
- << "; limit: " << DisabledNodesLimit
- << "; ratio limit: " << DisabledNodesRatioLimit << "%";
- break;
- default:
- Y_FAIL("Unexpected reason");
- break;
- }
- return readableReason.str();
-}
-
-std::string TClusterLimitsCounter::ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode,
- ActionState::ActionReason reason) const {
- Y_UNUSED(mode);
-
- std::stringstream readableReason;
-
- if (reason == ActionState::ACTION_REASON_OK) {
- readableReason << "Action is OK";
- return readableReason.str();
- }
-
- if (mode == NKikimrCms::MODE_FORCE_RESTART) {
- return readableReason.str();
- }
-
- readableReason << "Cannot lock node: " << nodeId;
-
- switch (reason) {
- case ActionState::ACTION_REASON_ALREADY_LOCKED:
- readableReason << "Node is already locked";
- break;
- case ActionState::ACTION_REASON_LOW_PRIORITY:
- readableReason << "Task with higher priority in progress: " << (*NodeToState.at(nodeId).Priorities.rbegin()).TaskUId;
- break;
- case ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED:
- readableReason << ". Too many locked nodes in cluster"
- << "; locked: " << LockedNodesCount
- << "; down: " << DownNodesCount
- << "; total: " << NodeToState.size()
- << "; limit: " << DisabledNodesLimit
- << "; ratio limit: " << DisabledNodesRatioLimit << "%";
- break;
- default:
- Y_FAIL("Unexpected reason");
- break;
- }
-
- return readableReason.str();
-}
-
-
-std::string TSysTabletsNodesCounter::ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode,
- ActionState::ActionReason reason) const {
- std::stringstream readableReason;
-
- if (reason == ActionState::ACTION_REASON_OK) {
- readableReason << "Action is OK";
- return readableReason.str();
- }
-
- if (mode == NKikimrCms::MODE_FORCE_RESTART) {
- return readableReason.str();
- }
-
- switch (reason) {
- case ActionState::ACTION_REASON_ALREADY_LOCKED:
- readableReason << "Node is already locked";
- break;
- case ActionState::ACTION_REASON_LOW_PRIORITY:
- readableReason << "Task with higher priority in progress: " << (*NodeToState.at(nodeId).Priorities.rbegin()).TaskUId;
- break;
- case ActionState::ACTION_REASON_SYS_TABLETS_NODE_LIMIT_REACHED:
- readableReason << "Cannot lock node: " << nodeId << ". Tablet "
- << NKikimrConfig::TBootstrap_ETabletType_Name(TabletType)
- << " has too many unavailable nodes. Locked: "
- << LockedNodesCount << ". Down: " << DownNodesCount;
-
- if (mode == NKikimrCms::MODE_MAX_AVAILABILITY) {
- readableReason << ". Limit: " << NodeToState.size() / 2 << " (50%)";
- }
-
- if (mode == NKikimrCms::MODE_KEEP_AVAILABLE) {
- readableReason << ". Limit: " << NodeToState.size() - 1;
- }
- break;
- default:
- Y_FAIL("Unexpected reason");
-
- }
-
- return readableReason.str();
-}
} // namespace NKikimr::NCms
diff --git a/ydb/core/cms/node_checkers.h b/ydb/core/cms/node_checkers.h
index 047a7a5c5ca..72362c84dba 100644
--- a/ydb/core/cms/node_checkers.h
+++ b/ydb/core/cms/node_checkers.h
@@ -6,7 +6,6 @@
#include <ydb/core/erasure/erasure.h>
#include <ydb/core/protos/cms.pb.h>
#include <ydb/core/protos/config.pb.h>
-#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h>
#include <library/cpp/actors/core/log.h>
@@ -37,7 +36,6 @@ public:
NODE_STATE_DOWN /* "Down" */
};
-
protected:
static ENodeState NodeState(NKikimrCms::EState state);
@@ -50,12 +48,9 @@ public:
virtual void LockNode(ui32 nodeId) = 0;
virtual void UnlockNode(ui32 nodeId) = 0;
- virtual void EmplaceTask(const ui32 nodeId, i32 priority, ui64 order, const std::string& taskUId) = 0;
- virtual void RemoveTask(const std::string& taskUId) = 0;
-
- virtual Ydb::Maintenance::ActionState::ActionReason TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, i32 priority, ui64 order) const = 0;
+ virtual bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const = 0;
- virtual std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, Ydb::Maintenance::ActionState::ActionReason reason) const = 0;
+ virtual std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const = 0;
};
/**
@@ -63,35 +58,7 @@ public:
*/
class TNodesCounterBase : public INodesChecker {
protected:
- /** Structure to hold information about vdisk state and priorities and orders of some task.
- *
- * Requests with equal priority are processed in the order of arrival at CMS.
- */
- struct TNodeState {
- public:
- struct TTaskPriority {
- i32 Priority;
- ui64 Order;
- std::string TaskUId;
-
- explicit TTaskPriority(i32 priority, ui64 order, const std::string& taskUId)
- : Priority(priority)
- , Order(order)
- , TaskUId(taskUId)
- {}
-
- bool operator<(const TTaskPriority& rhs) const {
- return Priority < rhs.Priority || (Priority == rhs.Priority && Order > rhs.Order);
- }
- };
- public:
- ENodeState State;
- std::set<TTaskPriority> Priorities;
- };
-
-protected:
- THashMap<ui32, TNodeState> NodeToState;
- THashSet<ui32> NodesWithScheduledTasks;
+ THashMap<ui32, ENodeState> NodeToState;
ui32 LockedNodesCount;
ui32 DownNodesCount;
@@ -106,9 +73,6 @@ public:
void AddNode(ui32 nodeId) override;
void UpdateNode(ui32 nodeId, NKikimrCms::EState) override;
- void EmplaceTask(const ui32 nodeId, i32 priority, ui64 order, const std::string& taskUId) override final;
- virtual void RemoveTask(const std::string& taskUId) override final;
-
void LockNode(ui32 nodeId) override;
void UnlockNode(ui32 nodeId) override;
};
@@ -139,7 +103,7 @@ public:
DisabledNodesRatioLimit = ratioLimit;
}
- Ydb::Maintenance::ActionState::ActionReason TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, i32 priority, ui64 order) const override final;
+ bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final;
};
class TTenantLimitsCounter : public TNodesLimitsCounterBase {
@@ -153,8 +117,20 @@ public:
{
}
- std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode,
- Ydb::Maintenance::ActionState::ActionReason reason) const override final;
+ std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final {
+ Y_UNUSED(mode);
+
+ std::stringstream reason;
+ reason << "Cannot lock node: " << nodeId
+ << ". Too many locked nodes for tenant " << TenantName
+ << "; locked: " << LockedNodesCount
+ << "; down: " << DownNodesCount
+ << "; total: " << NodeToState.size()
+ << "; limit: " << DisabledNodesLimit
+ << "; ratio limit: " << DisabledNodesRatioLimit << "%";
+
+ return reason.str();
+ }
};
class TClusterLimitsCounter : public TNodesLimitsCounterBase {
@@ -164,8 +140,20 @@ public:
{
}
- std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode,
- Ydb::Maintenance::ActionState::ActionReason reason) const override final;
+ std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final {
+ Y_UNUSED(mode);
+
+ std::stringstream reason;
+ reason << "Cannot lock node: " << nodeId
+ <<". Too many locked nodes in cluster"
+ << "; locked: " << LockedNodesCount
+ << "; down: " << DownNodesCount
+ << "; total: " << NodeToState.size()
+ << "; limit: " << DisabledNodesLimit
+ << "; ratio limit: " << DisabledNodesRatioLimit << "%";
+
+ return reason.str();
+ }
};
/**
@@ -183,10 +171,30 @@ public:
: TabletType(tabletType)
{}
- Ydb::Maintenance::ActionState::ActionReason TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, i32 priority, ui64 order) const override final;
+ bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final;
+
+ std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final {
+ std::stringstream reason;
- std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode,
- Ydb::Maintenance::ActionState::ActionReason reason) const override final;
+ if (mode == NKikimrCms::MODE_FORCE_RESTART) {
+ return reason.str();
+ }
+
+ reason << "Cannot lock node: " << nodeId
+ << ". Tablet "
+ << NKikimrConfig::TBootstrap_ETabletType_Name(TabletType)
+ << " has too many unavailable nodes. Locked: " << LockedNodesCount
+ << ". Down: " << DownNodesCount;
+ if (mode == NKikimrCms::MODE_MAX_AVAILABILITY) {
+ reason << ". Limit: " << NodeToState.size() / 2 << " (50%)";
+ }
+
+ if (mode == NKikimrCms::MODE_KEEP_AVAILABLE) {
+ reason << ". Limit: " << NodeToState.size() - 1;
+ }
+
+ return reason.str();
+ }
};
} // namespace NKikimr::NCms
diff --git a/ydb/core/cms/ut/CMakeLists.darwin-x86_64.txt b/ydb/core/cms/ut/CMakeLists.darwin-x86_64.txt
index 1cff84774ce..922dc62a7b9 100644
--- a/ydb/core/cms/ut/CMakeLists.darwin-x86_64.txt
+++ b/ydb/core/cms/ut/CMakeLists.darwin-x86_64.txt
@@ -32,7 +32,6 @@ target_link_options(ydb-core-cms-ut PRIVATE
CoreFoundation
)
target_sources(ydb-core-cms-ut PRIVATE
- ${CMAKE_SOURCE_DIR}/ydb/core/cms/checkers_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cluster_info_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_tenants_ut.cpp
diff --git a/ydb/core/cms/ut/CMakeLists.linux-aarch64.txt b/ydb/core/cms/ut/CMakeLists.linux-aarch64.txt
index 890df0f266e..64ee05e5931 100644
--- a/ydb/core/cms/ut/CMakeLists.linux-aarch64.txt
+++ b/ydb/core/cms/ut/CMakeLists.linux-aarch64.txt
@@ -35,7 +35,6 @@ target_link_options(ydb-core-cms-ut PRIVATE
-ldl
)
target_sources(ydb-core-cms-ut PRIVATE
- ${CMAKE_SOURCE_DIR}/ydb/core/cms/checkers_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cluster_info_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_tenants_ut.cpp
diff --git a/ydb/core/cms/ut/CMakeLists.linux-x86_64.txt b/ydb/core/cms/ut/CMakeLists.linux-x86_64.txt
index 3d68909dc52..ede8d036c2a 100644
--- a/ydb/core/cms/ut/CMakeLists.linux-x86_64.txt
+++ b/ydb/core/cms/ut/CMakeLists.linux-x86_64.txt
@@ -36,7 +36,6 @@ target_link_options(ydb-core-cms-ut PRIVATE
-ldl
)
target_sources(ydb-core-cms-ut PRIVATE
- ${CMAKE_SOURCE_DIR}/ydb/core/cms/checkers_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cluster_info_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_tenants_ut.cpp
diff --git a/ydb/core/cms/ut/CMakeLists.windows-x86_64.txt b/ydb/core/cms/ut/CMakeLists.windows-x86_64.txt
index 29a6d9015d2..1dbbee1959a 100644
--- a/ydb/core/cms/ut/CMakeLists.windows-x86_64.txt
+++ b/ydb/core/cms/ut/CMakeLists.windows-x86_64.txt
@@ -25,7 +25,6 @@ target_link_libraries(ydb-core-cms-ut PUBLIC
core-testlib-default
)
target_sources(ydb-core-cms-ut PRIVATE
- ${CMAKE_SOURCE_DIR}/ydb/core/cms/checkers_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cluster_info_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_tenants_ut.cpp
diff --git a/ydb/public/api/protos/draft/ydb_maintenance.proto b/ydb/public/api/protos/draft/ydb_maintenance.proto
index 332f03e3a63..df66577709d 100644
--- a/ydb/public/api/protos/draft/ydb_maintenance.proto
+++ b/ydb/public/api/protos/draft/ydb_maintenance.proto
@@ -86,17 +86,11 @@ message ActionState {
// State storage broken. Too many (more than (nToSelect - 1) / 2) unavailable rings
ACTION_REASON_STATE_STORAGE_BROKEN = 5;
// Issue in cluster disabled nodes limit
- ACTION_REASON_DISABLED_NODES_LIMIT_REACHED = 6;
+ ACTION_REASON_DISABLED_NODES_LIMIT_RICHED = 6;
// Issue in tenant limits
- ACTION_REASON_TENANT_DISABLED_NODES_LIMIT_REACHED = 7;
+ ACTION_REASON_TENANT_DISABLED_NODES_LIMIT_RICHED = 7;
// Wrong request
ACTION_REASON_WRONG_REQUEST = 8;
- // Low priority
- ACTION_REASON_LOW_PRIORITY = 9;
- // Lock is granded to this item
- ACTION_REASON_ALREADY_LOCKED = 10;
- // Limit to nodes with sys tablets
- ACTION_REASON_SYS_TABLETS_NODE_LIMIT_REACHED = 11;
}
Action action = 1;