aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authort1mursadykov <t1mursadykov@ydb.tech>2023-05-04 18:58:35 +0300
committert1mursadykov <t1mursadykov@ydb.tech>2023-05-04 18:58:35 +0300
commitd8041116809f56885d742109346f7d530e7b0def (patch)
tree621b10bc3f620085689f725692844275963d884e
parent82951621d9c50b5360d435b26f44d38adfd03f0d (diff)
downloadydb-d8041116809f56885d742109346f7d530e7b0def.tar.gz
Unify checking in CMS
-rw-r--r--ydb/core/cms/CMakeLists.darwin-x86_64.txt11
-rw-r--r--ydb/core/cms/CMakeLists.linux-aarch64.txt11
-rw-r--r--ydb/core/cms/CMakeLists.linux-x86_64.txt11
-rw-r--r--ydb/core/cms/CMakeLists.windows-x86_64.txt11
-rw-r--r--ydb/core/cms/checkers_ut.cpp294
-rw-r--r--ydb/core/cms/cluster_info.cpp113
-rw-r--r--ydb/core/cms/cluster_info.h34
-rw-r--r--ydb/core/cms/cluster_info_ut.cpp37
-rw-r--r--ydb/core/cms/cms.cpp107
-rw-r--r--ydb/core/cms/cms_impl.h3
-rw-r--r--ydb/core/cms/cms_ut.cpp90
-rw-r--r--ydb/core/cms/cms_ut_common.cpp10
-rw-r--r--ydb/core/cms/erasure_checkers.cpp482
-rw-r--r--ydb/core/cms/erasure_checkers.h144
-rw-r--r--ydb/core/cms/node_checkers.cpp280
-rw-r--r--ydb/core/cms/node_checkers.h102
-rw-r--r--ydb/core/cms/ut/CMakeLists.darwin-x86_64.txt1
-rw-r--r--ydb/core/cms/ut/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/core/cms/ut/CMakeLists.linux-x86_64.txt1
-rw-r--r--ydb/core/cms/ut/CMakeLists.windows-x86_64.txt1
-rw-r--r--ydb/public/api/protos/draft/ydb_maintenance.proto10
21 files changed, 1241 insertions, 513 deletions
diff --git a/ydb/core/cms/CMakeLists.darwin-x86_64.txt b/ydb/core/cms/CMakeLists.darwin-x86_64.txt
index cf839e87ff8..7ef104adaa9 100644
--- a/ydb/core/cms/CMakeLists.darwin-x86_64.txt
+++ b/ydb/core/cms/CMakeLists.darwin-x86_64.txt
@@ -22,6 +22,12 @@ get_built_tool_path(
enum_parser
)
get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+get_built_tool_path(
TOOL_rescompiler_bin
TOOL_rescompiler_dependency
tools/rescompiler/bin
@@ -93,6 +99,11 @@ generate_enum_serilization(ydb-core-cms
INCLUDE_HEADERS
ydb/core/cms/node_checkers.h
)
+generate_enum_serilization(ydb-core-cms
+ ${CMAKE_SOURCE_DIR}/ydb/core/cms/erasure_checkers.h
+ INCLUDE_HEADERS
+ ydb/core/cms/erasure_checkers.h
+)
add_global_library_for(ydb-core-cms.global ydb-core-cms)
target_link_libraries(ydb-core-cms.global PUBLIC
diff --git a/ydb/core/cms/CMakeLists.linux-aarch64.txt b/ydb/core/cms/CMakeLists.linux-aarch64.txt
index e1dcbee6629..54abfb25b63 100644
--- a/ydb/core/cms/CMakeLists.linux-aarch64.txt
+++ b/ydb/core/cms/CMakeLists.linux-aarch64.txt
@@ -22,6 +22,12 @@ get_built_tool_path(
enum_parser
)
get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+get_built_tool_path(
TOOL_rescompiler_bin
TOOL_rescompiler_dependency
tools/rescompiler/bin
@@ -94,6 +100,11 @@ generate_enum_serilization(ydb-core-cms
INCLUDE_HEADERS
ydb/core/cms/node_checkers.h
)
+generate_enum_serilization(ydb-core-cms
+ ${CMAKE_SOURCE_DIR}/ydb/core/cms/erasure_checkers.h
+ INCLUDE_HEADERS
+ ydb/core/cms/erasure_checkers.h
+)
add_global_library_for(ydb-core-cms.global ydb-core-cms)
target_link_libraries(ydb-core-cms.global PUBLIC
diff --git a/ydb/core/cms/CMakeLists.linux-x86_64.txt b/ydb/core/cms/CMakeLists.linux-x86_64.txt
index e1dcbee6629..54abfb25b63 100644
--- a/ydb/core/cms/CMakeLists.linux-x86_64.txt
+++ b/ydb/core/cms/CMakeLists.linux-x86_64.txt
@@ -22,6 +22,12 @@ get_built_tool_path(
enum_parser
)
get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+get_built_tool_path(
TOOL_rescompiler_bin
TOOL_rescompiler_dependency
tools/rescompiler/bin
@@ -94,6 +100,11 @@ generate_enum_serilization(ydb-core-cms
INCLUDE_HEADERS
ydb/core/cms/node_checkers.h
)
+generate_enum_serilization(ydb-core-cms
+ ${CMAKE_SOURCE_DIR}/ydb/core/cms/erasure_checkers.h
+ INCLUDE_HEADERS
+ ydb/core/cms/erasure_checkers.h
+)
add_global_library_for(ydb-core-cms.global ydb-core-cms)
target_link_libraries(ydb-core-cms.global PUBLIC
diff --git a/ydb/core/cms/CMakeLists.windows-x86_64.txt b/ydb/core/cms/CMakeLists.windows-x86_64.txt
index cf839e87ff8..7ef104adaa9 100644
--- a/ydb/core/cms/CMakeLists.windows-x86_64.txt
+++ b/ydb/core/cms/CMakeLists.windows-x86_64.txt
@@ -22,6 +22,12 @@ get_built_tool_path(
enum_parser
)
get_built_tool_path(
+ TOOL_enum_parser_bin
+ TOOL_enum_parser_dependency
+ tools/enum_parser/enum_parser
+ enum_parser
+)
+get_built_tool_path(
TOOL_rescompiler_bin
TOOL_rescompiler_dependency
tools/rescompiler/bin
@@ -93,6 +99,11 @@ generate_enum_serilization(ydb-core-cms
INCLUDE_HEADERS
ydb/core/cms/node_checkers.h
)
+generate_enum_serilization(ydb-core-cms
+ ${CMAKE_SOURCE_DIR}/ydb/core/cms/erasure_checkers.h
+ INCLUDE_HEADERS
+ ydb/core/cms/erasure_checkers.h
+)
add_global_library_for(ydb-core-cms.global ydb-core-cms)
target_link_libraries(ydb-core-cms.global PUBLIC
diff --git a/ydb/core/cms/checkers_ut.cpp b/ydb/core/cms/checkers_ut.cpp
new file mode 100644
index 00000000000..0964432c5f6
--- /dev/null
+++ b/ydb/core/cms/checkers_ut.cpp
@@ -0,0 +1,294 @@
+#include "cluster_info.h"
+#include "erasure_checkers.h"
+#include "node_checkers.h"
+#include "ut_helpers.h"
+#include "util/string/cast.h"
+
+#include <ydb/core/protos/cms.pb.h>
+#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/generic/ptr.h>
+#include <util/generic/vector.h>
+
+#include <bitset>
+#include <string>
+
+namespace NKikimr::NCmsTest {
+
+using namespace NCms;
+using namespace NKikimrCms;
+using namespace Ydb::Maintenance;
+
+TVector<TVDiskID> GenerateDefaultBlock42Group() {
+ TVector<TVDiskID> group;
+ for (ui32 i = 0; i < 8; ++i) {
+ group.push_back(TVDiskID(0, 1, 0, i % 8, 0));
+ }
+ return group;
+}
+
+TVector<TVDiskID> GenerateDefaultMirror3dcGroup() {
+ TVector<TVDiskID> group;
+ for (ui32 i = 0; i < 9; ++i) {
+ group.push_back(TVDiskID(0, 1, i / 3, i % 3, 0));
+ }
+ return group;
+}
+
+Y_UNIT_TEST_SUITE(TCmsCheckersTest) {
+ Y_UNIT_TEST(DefaultErasureCheckerAvailabilityMode)
+ {
+ TDefaultErasureChecker checker(0);
+
+ auto vdisks = GenerateDefaultBlock42Group();
+ for (auto vdisk : vdisks) {
+ checker.UpdateVDisk(vdisk, UP);
+ }
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[0], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_OK);
+
+ checker.UpdateVDisk(vdisks[0], DOWN);
+
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[0], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_OK);
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[1], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
+
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[0], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK);
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[1], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK);
+
+ checker.LockVDisk(vdisks[0]);
+
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[0], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_ALREADY_LOCKED);
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[1], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
+
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[0], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_ALREADY_LOCKED);
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[1], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK);
+ }
+
+ Y_UNIT_TEST(Mirror3dcCheckerAvailabilityMode)
+ {
+ TMirror3dcChecker checker(0);
+
+ auto vdisks = GenerateDefaultMirror3dcGroup();
+ for (auto vdisk : vdisks) {
+ checker.UpdateVDisk(vdisk, UP);
+ }
+
+ // One disabled disk for max availability
+ for (ui32 i = 0; i < 9; ++i) {
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_OK);
+ checker.LockVDisk(vdisks[i]);
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_ALREADY_LOCKED);
+
+ for (ui32 j = 0; j < 9; ++j) {
+ if (i == j)
+ continue;
+
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[j], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
+ }
+
+ checker.UnlockVDisk(vdisks[i]);
+ }
+
+ for (ui32 dc = 0; dc < 3; ++dc) {
+ // Minus 1 dc
+ checker.LockVDisk(vdisks[dc * 3]);
+ checker.LockVDisk(vdisks[dc * 3 + 1]);
+ checker.LockVDisk(vdisks[dc * 3 + 2]);
+
+ for (ui32 i = 0; i < 9; ++i) {
+ if ((i <= dc * 3 + 2) && (i >= dc * 3)) {
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_ALREADY_LOCKED);
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_ALREADY_LOCKED);
+ continue;
+ }
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK);
+ }
+
+ // Minus 2 in dc
+ for (ui32 i = 0; i < 3; ++i) {
+ checker.UnlockVDisk(vdisks[dc * 3 + i]);
+
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[dc * 3 + i], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK);
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[dc * 3 + i], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
+
+ for (ui32 j = 0; j < 9; ++j) {
+ if ((j <= dc * 3 + 2) && (j >= dc * 3))
+ continue;
+
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK);
+ }
+
+ checker.LockVDisk(vdisks[dc * 3 + i]);
+ }
+
+ checker.UnlockVDisk(vdisks[dc * 3]);
+ checker.UnlockVDisk(vdisks[dc * 3 + 1]);
+ checker.UnlockVDisk(vdisks[dc * 3 + 2]);
+ }
+
+ // Minus 1 in each dc
+ for (ui32 i = 0; i < 3; ++i) {
+ checker.LockVDisk(vdisks[i]);
+ checker.LockVDisk(vdisks[i + 3]);
+ checker.LockVDisk(vdisks[i + 6]);
+
+ for (ui32 j = 0; j < 9; ++j) {
+ if (j % 3 == i)
+ continue;
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[j], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[j], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
+ }
+
+ checker.UnlockVDisk(vdisks[i]);
+ checker.UnlockVDisk(vdisks[i + 3]);
+ checker.UnlockVDisk(vdisks[i + 6]);
+ }
+
+ checker.UpdateVDisk(vdisks[0], DOWN);
+
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[0], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_OK);
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[1], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
+ }
+
+ Y_UNIT_TEST(DefaultErasureCheckerPriorities)
+ {
+ TDefaultErasureChecker checker(0);
+
+ auto vdisks = GenerateDefaultBlock42Group();
+ for (auto vdisk : vdisks) {
+ checker.UpdateVDisk(vdisk, UP);
+ }
+
+ // Check one scheduled task with order
+ for (ui32 i = 0; i < 8; ++i) {
+ checker.EmplaceTask(vdisks[i], 0, 1, "task-1");
+
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_MAX_AVAILABILITY, 0, 2), ActionState::ACTION_REASON_LOW_PRIORITY);
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_KEEP_AVAILABLE, 0, 2), ActionState::ACTION_REASON_LOW_PRIORITY);
+
+ for (ui32 j = 0; j < 8; ++j) {
+ if (j == i)
+ continue;
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[j], MODE_MAX_AVAILABILITY, 0, 2), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[j], MODE_KEEP_AVAILABLE, 0, 2), ActionState::ACTION_REASON_OK);
+ }
+
+ checker.RemoveTask("task-1");
+ }
+
+ // Check two scheduled task with priority and order
+ checker.EmplaceTask(vdisks[1], 1, 1, "task-1");
+ checker.EmplaceTask(vdisks[2], 2, 1, "task-2");
+
+ // Priority is higher than task-1 but lower than task-2
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[3], MODE_MAX_AVAILABILITY, 2, 2), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[3], MODE_KEEP_AVAILABLE, 2, 2), ActionState::ACTION_REASON_OK);
+
+ checker.RemoveTask("task-1");
+ checker.RemoveTask("task-2");
+ }
+
+ Y_UNIT_TEST(Mirror3dcCheckerPriorities)
+ {
+ TMirror3dcChecker checker(0);
+
+ auto vdisks = GenerateDefaultMirror3dcGroup();
+ for (auto vdisk : vdisks) {
+ checker.UpdateVDisk(vdisk, UP);
+ }
+
+ // task-1 > task-2 > task-3 > task-4
+ checker.EmplaceTask(vdisks[0], 2, 2, "task-1");
+ checker.EmplaceTask(vdisks[1], 2, 5, "task-2");
+ checker.EmplaceTask(vdisks[2], 1, 2, "task-3");
+ checker.EmplaceTask(vdisks[3], 1, 4, "task-4");
+
+ // Highest priority
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[4], MODE_MAX_AVAILABILITY, 3, 1), ActionState::ACTION_REASON_OK);
+ // Blocked by all tasks
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[4], MODE_KEEP_AVAILABLE, 1, 6), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
+ // Blocked by task-1, task-2, task-3
+ UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[3], MODE_KEEP_AVAILABLE, 1, 3), ActionState::ACTION_REASON_OK);
+ }
+
+ Y_UNIT_TEST(ClusterNodesCounter)
+ {
+ const ui32 nodeCount = 30;
+ TClusterLimitsCounter checker(0, 0);
+
+ for (ui32 i = 1; i <= nodeCount; ++i) {
+ checker.UpdateNode(i, UP);
+ }
+
+ // Without limit allow all nodes
+ for (ui32 i = 1; i < nodeCount; ++i) {
+ checker.LockNode(i);
+ }
+
+ UNIT_ASSERT_EQUAL(checker.TryToLockNode(nodeCount, MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_OK);
+ UNIT_ASSERT_EQUAL(checker.TryToLockNode(nodeCount, MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK);
+
+ for (ui32 i = 1; i < nodeCount; ++i) {
+ checker.UnlockNode(i);
+ }
+
+ // Limit 15 nodes
+ checker.ApplyLimits(15, 0);
+ for (ui32 i = 1; i < 15; ++i) {
+ checker.LockNode(i);
+ }
+
+ UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_OK);
+ UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK);
+
+ checker.ApplyLimits(0, 50);
+
+ UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_OK);
+ UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK);
+
+ checker.LockNode(15);
+ checker.ApplyLimits(15, 0);
+
+ UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED);
+ UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED);
+
+ checker.ApplyLimits(0, 50);
+
+ for (ui32 i = 1; i <= 15; ++i) {
+ checker.UnlockNode(i);
+ }
+
+ checker.ApplyLimits(0, 0);
+ for (ui32 i = 1; i < nodeCount; ++i) {
+ checker.EmplaceTask(i, 1, 3, "task-1");
+ }
+
+ UNIT_ASSERT_EQUAL(checker.TryToLockNode(nodeCount, MODE_MAX_AVAILABILITY, 1, 1), ActionState::ACTION_REASON_OK);
+ UNIT_ASSERT_EQUAL(checker.TryToLockNode(nodeCount, MODE_KEEP_AVAILABLE, 1, 1), ActionState::ACTION_REASON_OK);
+
+ UNIT_ASSERT_EQUAL(checker.TryToLockNode(nodeCount, MODE_MAX_AVAILABILITY, 1, 4), ActionState::ACTION_REASON_OK);
+ UNIT_ASSERT_EQUAL(checker.TryToLockNode(nodeCount, MODE_KEEP_AVAILABLE, 1, 4), ActionState::ACTION_REASON_OK);
+
+ checker.RemoveTask("task-1");
+
+ checker.ApplyLimits(15, 0);
+ for (ui32 i = 1; i < 15; ++i) {
+ checker.EmplaceTask(i, 1, 3, "task-2");
+ }
+ checker.EmplaceTask(15, 1, 4, "task-3");
+
+ UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_MAX_AVAILABILITY, 1, 1), ActionState::ACTION_REASON_OK);
+ UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_KEEP_AVAILABLE, 1, 1), ActionState::ACTION_REASON_OK);
+
+ UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_MAX_AVAILABILITY, 1, 5), ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED);
+ UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_KEEP_AVAILABLE, 1, 5), ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED);
+
+ checker.RemoveTask("task-2");
+ checker.RemoveTask("task-3");
+ }
+
+}
+}
diff --git a/ydb/core/cms/cluster_info.cpp b/ydb/core/cms/cluster_info.cpp
index 28e6baeddbf..1a3c85d5b33 100644
--- a/ydb/core/cms/cluster_info.cpp
+++ b/ydb/core/cms/cluster_info.cpp
@@ -1,8 +1,11 @@
#include "cluster_info.h"
#include "cms_state.h"
#include "node_checkers.h"
+#include "erasure_checkers.h"
+#include <ydb/core/protos/cms.pb.h>
#include <ydb/core/protos/services.pb.h>
+#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h>
#include <library/cpp/actors/core/actor.h>
#include <library/cpp/actors/core/log.h>
@@ -401,7 +404,7 @@ void TClusterInfo::SetNodeState(ui32 nodeId, NKikimrCms::EState state, const NKi
}
}
- node.UpdateNodeState();
+ node.UpdateNodeState();
}
void TClusterInfo::ClearNode(ui32 nodeId)
@@ -416,6 +419,10 @@ void TClusterInfo::ClearNode(ui32 nodeId)
node.HasTenantInfo = false;
node.State = NKikimrCms::DOWN;
node.UpdateNodeState();
+
+ for (auto& vdisk : node.VDisks) {
+ BSGroup(vdisk.GroupID).GroupChecker->UpdateVDisk(vdisk, DOWN);
+ }
}
void TClusterInfo::ApplyInitialNodeTenants(const TActorContext& ctx, const THashMap<ui32, TString>& nodeTenants)
@@ -489,7 +496,15 @@ void TClusterInfo::UpdatePDiskState(const TPDiskID &id, const NKikimrWhiteboard:
}
auto &pdisk = PDiskRef(id);
- pdisk.State = info.GetState() == NKikimrBlobStorage::TPDiskState::Normal ? UP : DOWN;
+ auto state = info.GetState() == NKikimrBlobStorage::TPDiskState::Normal ? UP : DOWN;
+ pdisk.State = state;
+
+ if (state == UP)
+ return;
+
+ for (auto &vdisk : pdisk.VDisks) {
+ BSGroup(vdisk.GroupID).GroupChecker->UpdateVDisk(vdisk, state);
+ }
}
void TClusterInfo::AddVDisk(const NKikimrBlobStorage::TBaseConfig::TVSlot &info)
@@ -545,10 +560,13 @@ void TClusterInfo::UpdateVDiskState(const TVDiskID &id, const NKikimrWhiteboard:
}
auto &vdisk = VDiskRef(id);
- if (info.GetVDiskState() == NKikimrWhiteboard::OK && info.GetReplicated())
+ if (info.GetVDiskState() == NKikimrWhiteboard::OK && info.GetReplicated()) {
vdisk.State = UP;
- else
+ BSGroup(vdisk.VDiskId.GroupID).GroupChecker->UpdateVDisk(id, UP);
+ } else {
vdisk.State = DOWN;
+ BSGroup(vdisk.VDiskId.GroupID).GroupChecker->UpdateVDisk(id, DOWN);
+ }
}
void TClusterInfo::AddBSGroup(const NKikimrBlobStorage::TBaseConfig::TGroup &info)
@@ -557,6 +575,8 @@ void TClusterInfo::AddBSGroup(const NKikimrBlobStorage::TBaseConfig::TGroup &inf
bsgroup.GroupId = info.GetGroupId();
if (info.GetErasureSpecies())
bsgroup.Erasure = {TErasureType::ErasureSpeciesByName(info.GetErasureSpecies())};
+
+ bsgroup.GroupChecker = CreateStorageGroupChecker(bsgroup.Erasure.GetErasure(), bsgroup.GroupId);
for (const auto &vdisk : info.GetVSlotId()) {
TPDiskID pdiskId = {vdisk.GetNodeId(), vdisk.GetPDiskId()};
Y_VERIFY_DEBUG(HasPDisk(pdiskId));
@@ -576,8 +596,10 @@ void TClusterInfo::AddBSGroup(const NKikimrBlobStorage::TBaseConfig::TGroup &inf
bsgroup.VDisks.insert(pdisk.VSlots.at(vdisk.GetVSlotId()));
}
- for (auto &vdisk : bsgroup.VDisks)
+ for (auto &vdisk : bsgroup.VDisks) {
VDiskRef(vdisk).BSGroups.insert(bsgroup.GroupId);
+ bsgroup.GroupChecker->AddVDisk(vdisk);
+ }
BSGroups[bsgroup.GroupId] = std::move(bsgroup);
}
@@ -612,12 +634,18 @@ void TClusterInfo::AddPDiskTempLock(TPDiskID pdiskId, const NKikimrCms::TAction
{
auto &pdisk = PDiskRef(pdiskId);
pdisk.TempLocks.push_back({RollbackPoint, action});
+
+ for (auto& vdisk : pdisk.VDisks) {
+ LogManager.AddLockVDiskOperation(vdisk ,BSGroup(vdisk.GroupID).GroupChecker);
+ }
}
void TClusterInfo::AddVDiskTempLock(TVDiskID vdiskId, const NKikimrCms::TAction &action)
{
auto &vdisk = VDiskRef(vdiskId);
vdisk.TempLocks.push_back({RollbackPoint, action});
+
+ LogManager.AddLockVDiskOperation(vdiskId, BSGroup(vdiskId.GroupID).GroupChecker);
}
static TServices MakeServices(const NKikimrCms::TAction &action) {
@@ -650,6 +678,10 @@ void TClusterInfo::ApplyActionWithoutLog(const NKikimrCms::TAction &action)
for (const auto node : nodes) {
for (auto &nodeGroup: node->NodeGroups)
nodeGroup->LockNode(node->NodeId);
+
+ for (auto vdisk : node->VDisks) {
+ BSGroup(vdisk.GroupID).GroupChecker->LockVDisk(vdisk);
+ }
}
}
break;
@@ -657,12 +689,12 @@ void TClusterInfo::ApplyActionWithoutLog(const NKikimrCms::TAction &action)
for (const auto &device : action.GetDevices()) {
if (HasPDisk(device)) {
auto pdisk = &PDiskRef(device);
- for (auto &nodeGroup: NodeRef(pdisk->NodeId).NodeGroups)
- nodeGroup->LockNode(pdisk->NodeId);
+ for (auto vdisk : pdisk->VDisks)
+ BSGroup(vdisk.GroupID).GroupChecker->LockVDisk(vdisk);
} else if (HasVDisk(device)) {
auto vdisk = &VDiskRef(device);
- for (auto &nodeGroup: NodeRef(vdisk->NodeId).NodeGroups)
- nodeGroup->LockNode(vdisk->NodeId);
+
+ BSGroup(vdisk->VDiskId.GroupID).GroupChecker->LockVDisk(vdisk->VDiskId);
}
}
break;
@@ -825,8 +857,9 @@ ui64 TClusterInfo::AddTempLocks(const NKikimrCms::TAction &action, const TActorC
LogManager.ApplyAction(action, this);
- for (auto item : items)
+ for (auto item : items) {
item->TempLocks.push_back({RollbackPoint, action});
+ }
return items.size();
}
@@ -841,6 +874,38 @@ ui64 TClusterInfo::ScheduleActions(const TRequestInfo &request, const TActorCont
item->ScheduleLock({action, request.Owner, request.RequestId, request.Order});
locks += items.size();
+
+ switch (action.GetType()) {
+ case NKikimrCms::TAction::RESTART_SERVICES:
+ case NKikimrCms::TAction::SHUTDOWN_HOST:
+ if (auto nodes = NodePtrs(action.GetHost(), MakeServices(action))) {
+ for (const auto node : nodes) {
+ for (auto& group : node->NodeGroups) {
+ group->EmplaceTask(node->NodeId, 0, request.Order, request.RequestId);
+ }
+ for (auto &vdisk: node->VDisks) {
+ BSGroup(vdisk.GroupID).GroupChecker->EmplaceTask(vdisk, 0, request.Order, request.RequestId);
+ }
+ }
+ }
+ break;
+ case NKikimrCms::TAction::REPLACE_DEVICES:
+ for (const auto &device : action.GetDevices()) {
+ if (HasPDisk(device)) {
+ auto pdisk = &PDisk(device);
+ for (auto &vdisk: pdisk->VDisks) {
+ BSGroup(vdisk.GroupID).GroupChecker->EmplaceTask(vdisk, 0, request.Order, request.RequestId);
+ }
+ } else if (HasVDisk(device)) {
+ auto vdisk = &VDisk(device);
+ BSGroup(vdisk->VDiskId.GroupID).GroupChecker->EmplaceTask(vdisk->VDiskId, 0, request.Order, request.RequestId);
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
}
return locks;
@@ -850,6 +915,20 @@ void TClusterInfo::UnscheduleActions(const TString &requestId)
{
for (auto &entry : LockableItems)
entry.second->RemoveScheduledLocks(requestId);
+
+ for (auto &group : BSGroups) {
+ group.second.GroupChecker->RemoveTask(requestId);
+ }
+
+ ClusterNodes->RemoveTask(requestId);
+
+ for (auto& [_, tenantChecker] : TenantNodesChecker) {
+ tenantChecker->RemoveTask(requestId);
+ }
+
+ for (auto& [_, sysNodesCheckers] : SysNodesCheckers) {
+ sysNodesCheckers->RemoveTask(requestId);
+ }
}
void TClusterInfo::DeactivateScheduledLocks(ui64 order)
@@ -918,7 +997,7 @@ void TClusterInfo::GenerateTenantNodesCheckers() {
void TClusterInfo::GenerateSysTabletsNodesCheckers() {
for (auto tablet : BootstrapConfig.GetTablet()) {
- SysNodesCheckers[tablet.GetType()] = TSimpleSharedPtr<TSysTabletsNodesCounter>(new TSysTabletsNodesCounter(tablet.GetType()));
+ SysNodesCheckers[tablet.GetType()] = TSimpleSharedPtr<TSysTabletsNodesCounter>(new TSysTabletsNodesCounter(tablet.GetType()));
for (auto nodeId : tablet.GetNode()) {
NodeToTabletTypes[nodeId].push_back(tablet.GetType());
@@ -999,6 +1078,10 @@ void TOperationLogManager::ApplyAction(const NKikimrCms::TAction &action,
for (const auto node : nodes) {
for (auto &nodeGroup: node->NodeGroups)
AddNodeLockOperation(node->NodeId, nodeGroup);
+
+ for (auto& vdisk : node->VDisks) {
+ AddLockVDiskOperation(vdisk, clusterState->BSGroup(vdisk.GroupID).GroupChecker);
+ }
}
}
break;
@@ -1006,13 +1089,13 @@ void TOperationLogManager::ApplyAction(const NKikimrCms::TAction &action,
for (const auto &device : action.GetDevices()) {
if (clusterState->HasPDisk(device)) {
auto pdisk = &clusterState->PDisk(device);
- for (auto &nodeGroup: clusterState->NodeRef(pdisk->NodeId).NodeGroups)
- AddNodeLockOperation(pdisk->NodeId, nodeGroup);
+ for (auto& vdisk : pdisk->VDisks) {
+ AddLockVDiskOperation(vdisk, clusterState->BSGroup(vdisk.GroupID).GroupChecker);
+ }
} else if (clusterState->HasVDisk(device)) {
auto vdisk = &clusterState->VDisk(device);
- for (auto &nodeGroup: clusterState->NodeRef(vdisk->NodeId).NodeGroups)
- AddNodeLockOperation(vdisk->NodeId, nodeGroup);
+ AddLockVDiskOperation(vdisk->VDiskId, clusterState->BSGroup(vdisk->VDiskId.GroupID).GroupChecker);
}
}
break;
diff --git a/ydb/core/cms/cluster_info.h b/ydb/core/cms/cluster_info.h
index 923b5a7d05f..0b7253d3688 100644
--- a/ydb/core/cms/cluster_info.h
+++ b/ydb/core/cms/cluster_info.h
@@ -3,6 +3,7 @@
#include "defs.h"
#include "config.h"
#include "downtime.h"
+#include "erasure_checkers.h"
#include "node_checkers.h"
#include "services.h"
@@ -14,6 +15,7 @@
#include <ydb/core/protos/cms.pb.h>
#include <ydb/core/protos/config.pb.h>
#include <ydb/core/protos/console.pb.h>
+#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h>
#include <library/cpp/actors/core/actor.h>
#include <library/cpp/actors/interconnect/interconnect.h>
@@ -471,6 +473,8 @@ struct TBSGroupInfo {
ui32 GroupId = 0;
TErasureType Erasure;
TSet<TVDiskID> VDisks;
+
+ TSimpleSharedPtr<IStorageGroupChecker> GroupChecker;
};
/**
@@ -596,6 +600,32 @@ public:
}
};
+class TLockDiskOperation : public TOperationBase {
+private:
+ TVDiskID VDiskId;
+
+private:
+ TSimpleSharedPtr<IStorageGroupChecker> StorageGroupChecker;
+
+public:
+ TLockDiskOperation(const TVDiskID& vdiskId, TSimpleSharedPtr<IStorageGroupChecker> checker)
+ : TOperationBase(OPERATION_TYPE_LOCK_DISK)
+ , VDiskId(vdiskId)
+ , StorageGroupChecker(checker)
+ {
+ }
+
+ void Do() override final {
+ StorageGroupChecker->LockVDisk(VDiskId);
+ }
+
+ void Undo() override final {
+ StorageGroupChecker->UnlockVDisk(VDiskId);
+ }
+
+
+};
+
class TLogRollbackPoint : public TOperationBase {
public:
TLogRollbackPoint() : TOperationBase(OPERATION_TYPE_ROLLBACK_POINT)
@@ -624,6 +654,10 @@ public:
Log.emplace_back(new TLockNodeOperation(nodeId, nodesState))->Do();
}
+ void AddLockVDiskOperation(const TVDiskID& vdiskId, TSimpleSharedPtr<IStorageGroupChecker> checker) {
+ Log.emplace_back(new TLockDiskOperation(vdiskId, checker))->Do();
+ }
+
void RollbackOperations() {
while (!Log.empty() && Log.back()->Type != OPERATION_TYPE_ROLLBACK_POINT) {
Log.back()->Undo();
diff --git a/ydb/core/cms/cluster_info_ut.cpp b/ydb/core/cms/cluster_info_ut.cpp
index 0e79306a3dd..e56a530f2b5 100644
--- a/ydb/core/cms/cluster_info_ut.cpp
+++ b/ydb/core/cms/cluster_info_ut.cpp
@@ -292,56 +292,57 @@ Y_UNIT_TEST_SUITE(TClusterInfoTest) {
UNIT_ASSERT_VALUES_EQUAL(cluster->NodesCount("localhost"), 2);
cluster->AddPDisk(MakePDiskConfig(1, 1));
+ cluster->AddPDisk(MakePDiskConfig(2, 2));
+ cluster->AddVDisk(MakeVSlotConfig(1, {0, 1, 0, 0, 0}, 1, 0));
+ cluster->AddVDisk(MakeVSlotConfig(2, {0, 1, 0, 1, 0}, 2, 0));
+ cluster->AddBSGroup(MakeBSGroup(0, "none", 1, 1, 0, 2, 2, 0));
+
cluster->UpdatePDiskState(NCms::TPDiskID(1, 1), MakePDiskInfo(1));
UNIT_ASSERT(cluster->HasPDisk(NCms::TPDiskID(1, 1)));
UNIT_ASSERT(!cluster->HasPDisk(NCms::TPDiskID(1, 2)));
UNIT_ASSERT(!cluster->HasPDisk(NCms::TPDiskID(2, 1)));
- CheckPDisk(cluster->PDisk(NCms::TPDiskID(1, 1)), 1, 1, UP, 0);
+ CheckPDisk(cluster->PDisk(NCms::TPDiskID(1, 1)), 1, 1, UP, 1);
UNIT_ASSERT(cluster->Node(1).PDisks.contains(NCms::TPDiskID(1, 1)));
- cluster->AddVDisk(MakeVSlotConfig(1, {0, 1, 0, 0, 0}, 1, 0));
cluster->UpdateVDiskState({0, 1, 0, 0, 0}, MakeVDiskInfo({0, 1, 0, 0, 0}, 1, 0));
UNIT_ASSERT(cluster->HasVDisk({0, 1, 0, 0, 0}));
- UNIT_ASSERT(!cluster->HasVDisk({0, 1, 0, 1, 0}));
- CheckVDisk(cluster->VDisk({0, 1, 0, 0, 0}), {0, 1, 0, 0, 0}, 1, UP, 1, 0);
+ CheckVDisk(cluster->VDisk({0, 1, 0, 0, 0}), {0, 1, 0, 0, 0}, 1, UP, 1, 1);
UNIT_ASSERT_VALUES_EQUAL(cluster->PDisk(NCms::TPDiskID(1, 1)).VDisks.size(), 1);
UNIT_ASSERT(cluster->PDisk(NCms::TPDiskID(1, 1)).VDisks.contains(TVDiskID(0, 1, 0, 0, 0)));
- cluster->AddPDisk(MakePDiskConfig(2, 2));
cluster->UpdatePDiskState(NCms::TPDiskID(2, 2), MakePDiskInfo(2));
UNIT_ASSERT(cluster->HasPDisk(NCms::TPDiskID(2, 2)));
- CheckPDisk(cluster->PDisk(NCms::TPDiskID(2, 2)), 2, 2, UP, 0);
+ CheckPDisk(cluster->PDisk(NCms::TPDiskID(2, 2)), 2, 2, UP, 1);
- cluster->AddVDisk(MakeVSlotConfig(2, {0, 1, 0, 1, 0}, 2, 0));
cluster->UpdateVDiskState({0, 1, 0, 1, 0}, MakeVDiskInfo({0, 1, 0, 1, 0}, 2, 0));
UNIT_ASSERT(cluster->HasVDisk({0, 1, 0, 1, 0}));
UNIT_ASSERT(cluster->HasPDisk(NCms::TPDiskID(2, 2)));
CheckPDisk(cluster->PDisk(NCms::TPDiskID(2, 2)), 2, 2, UP, 1);
UNIT_ASSERT(cluster->PDisk(NCms::TPDiskID(2, 2)).VDisks.contains(TVDiskID(0, 1, 0, 1, 0)));
- cluster->AddBSGroup(MakeBSGroup(1, "none", 1, 1, 0, 2, 2, 0));
- UNIT_ASSERT(cluster->HasBSGroup(1));
- UNIT_ASSERT(!cluster->HasBSGroup(2));
- CheckBSGroup(cluster->BSGroup(1), 1, TErasureType::ErasureNone, 2,
+ UNIT_ASSERT(cluster->HasBSGroup(0));
+ UNIT_ASSERT(!cluster->HasBSGroup(1));
+ CheckBSGroup(cluster->BSGroup(0), 0, TErasureType::ErasureNone, 2,
TVDiskID(0, 1, 0, 0, 0), TVDiskID(0, 1, 0, 1, 0));
- CheckVDisk(cluster->VDisk({0, 1, 0, 0, 0}), 1, 1);
- CheckVDisk(cluster->VDisk({0, 1, 0, 1, 0}), 1, 1);
+ CheckVDisk(cluster->VDisk({0, 1, 0, 0, 0}), 1, 0);
+ CheckVDisk(cluster->VDisk({0, 1, 0, 1, 0}), 1, 0);
cluster->AddPDisk(MakePDiskConfig(3, 3));
+ cluster->AddVDisk(MakeVSlotConfig(3, {0, 1, 0, 2, 0}, 3, 0));
+ cluster->AddBSGroup(MakeBSGroup(2, "none", 1, 1, 0, 3, 3, 0));
+
cluster->UpdatePDiskState(NCms::TPDiskID(3, 3), MakePDiskInfo(3));
UNIT_ASSERT(cluster->HasPDisk(NCms::TPDiskID(3, 3)));
- CheckPDisk(cluster->PDisk(NCms::TPDiskID(3, 3)), 3, 3, UP, 0);
+ CheckPDisk(cluster->PDisk(NCms::TPDiskID(3, 3)), 3, 3, UP, 1);
- cluster->AddVDisk(MakeVSlotConfig(3, {0, 1, 0, 2, 0}, 3, 0));
cluster->UpdateVDiskState({0, 1, 0, 2, 0}, MakeVDiskInfo({0, 1, 0, 2, 0}, 3, 0));
UNIT_ASSERT(cluster->HasVDisk({0, 1, 0, 2, 0}));
- CheckVDisk(cluster->VDisk({0, 1, 0, 2, 0}), TVDiskID(0, 1, 0, 2, 0), 3, UP, 3, 0);
+ CheckVDisk(cluster->VDisk({0, 1, 0, 2, 0}), TVDiskID(0, 1, 0, 2, 0), 3, UP, 3, 1);
- cluster->AddBSGroup(MakeBSGroup(2, "none", 1, 1, 0, 3, 3, 0));
UNIT_ASSERT(cluster->HasBSGroup(2));
CheckBSGroup(cluster->BSGroup(2), 2, TErasureType::ErasureNone, 2,
TVDiskID(0, 1, 0, 0, 0), TVDiskID(0, 1, 0, 2, 0));
- CheckVDisk(cluster->VDisk({0, 1, 0, 0, 0}), 2, 1, 2);
+ CheckVDisk(cluster->VDisk({0, 1, 0, 0, 0}), 2, 0, 2);
UNIT_ASSERT(cluster->HasVDisk({0, 1, 0, 2, 0}));
CheckVDisk(cluster->VDisk({0, 1, 0, 2, 0}), TVDiskID(0, 1, 0, 2, 0), 3, UP, 3, 1, 2);
diff --git a/ydb/core/cms/cms.cpp b/ydb/core/cms/cms.cpp
index 211a7e0c622..c400618ccaa 100644
--- a/ydb/core/cms/cms.cpp
+++ b/ydb/core/cms/cms.cpp
@@ -17,6 +17,7 @@
#include <ydb/core/protos/config_units.pb.h>
#include <ydb/core/protos/counters_cms.pb.h>
#include <ydb/core/tablet_flat/tablet_flat_executed.h>
+#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h>
#include <library/cpp/actors/core/actor.h>
#include <library/cpp/actors/core/hfunc.h>
@@ -110,6 +111,7 @@ namespace {
bool TCms::CheckPermissionRequest(const TPermissionRequest &request,
TPermissionResponse &response,
TPermissionRequest &scheduled,
+ ui64 requestOrder,
const TActorContext &ctx)
{
static THashMap<EStatusCode, ui32> CodesRate = BuildCodesRateMap({
@@ -166,6 +168,7 @@ bool TCms::CheckPermissionRequest(const TPermissionRequest &request,
opts.TenantPolicy = request.GetTenantPolicy();
opts.AvailabilityMode = request.GetAvailabilityMode();
opts.PartialPermissionAllowed = allowPartial;
+ opts.Order = requestOrder;
TErrorInfo error;
@@ -335,7 +338,7 @@ bool TCms::CheckAction(const TAction &action,
case TAction::SHUTDOWN_HOST:
return CheckActionShutdownHost(action, opts, error, ctx);
case TAction::REPLACE_DEVICES:
- return CheckActionReplaceDevices(action, opts.PermissionDuration, error);
+ return CheckActionReplaceDevices(action, opts, error);
case TAction::START_SERVICES:
case TAction::STOP_SERVICES:
case TAction::ADD_HOST:
@@ -542,9 +545,10 @@ bool TCms::CheckSysTabletsNode(const TActionOptions &opts,
}
for (auto &tabletType : ClusterInfo->NodeToTabletTypes[node.NodeId]) {
- if (!ClusterInfo->SysNodesCheckers[tabletType]->TryToLockNode(node.NodeId, opts.AvailabilityMode)) {
+ auto reason = ClusterInfo->SysNodesCheckers[tabletType]->TryToLockNode(node.NodeId, opts.AvailabilityMode, 0, opts.Order);
+ if (reason != Ydb::Maintenance::ActionState::ACTION_REASON_OK) {
error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = ClusterInfo->SysNodesCheckers[tabletType]->ReadableReason(node.NodeId, opts.AvailabilityMode);
+ error.Reason = ClusterInfo->SysNodesCheckers[tabletType]->ReadableReason(node.NodeId, opts.AvailabilityMode, reason);
error.Deadline = TActivationContext::Now() + State->Config.DefaultRetryTime;
return false;
}
@@ -561,24 +565,27 @@ bool TCms::TryToLockNode(const TAction& action,
TDuration duration = TDuration::MicroSeconds(action.GetDuration());
duration += opts.PermissionDuration;
- if (!ClusterInfo->ClusterNodes->TryToLockNode(node.NodeId, opts.AvailabilityMode))
+ auto clusterReason = ClusterInfo->ClusterNodes->TryToLockNode(node.NodeId, opts.AvailabilityMode, 0, opts.Order);
+ if (clusterReason != Ydb::Maintenance::ActionState::ACTION_REASON_OK)
{
error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = ClusterInfo->ClusterNodes->ReadableReason(node.NodeId, opts.AvailabilityMode);
+ error.Reason = ClusterInfo->ClusterNodes->ReadableReason(node.NodeId, opts.AvailabilityMode, clusterReason);
error.Deadline = TActivationContext::Now() + State->Config.DefaultRetryTime;
return false;
}
if (node.Tenant
- && opts.TenantPolicy != NONE
- && !ClusterInfo->TenantNodesChecker[node.Tenant]->TryToLockNode(node.NodeId, opts.AvailabilityMode))
- {
- error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = ClusterInfo->TenantNodesChecker[node.Tenant]->ReadableReason(node.NodeId, opts.AvailabilityMode);
- error.Deadline = TActivationContext::Now() + State->Config.DefaultRetryTime;
+ && opts.TenantPolicy != NONE) {
+ auto tenantReason = ClusterInfo->TenantNodesChecker[node.Tenant]->TryToLockNode(node.NodeId, opts.AvailabilityMode, 0, opts.Order);
- return false;
+ if (tenantReason != Ydb::Maintenance::ActionState::ACTION_REASON_OK) {
+ error.Code = TStatus::DISALLOW_TEMP;
+ error.Reason = ClusterInfo->TenantNodesChecker[node.Tenant]->ReadableReason(node.NodeId, opts.AvailabilityMode, tenantReason);
+ error.Deadline = TActivationContext::Now() + State->Config.DefaultRetryTime;
+
+ return false;
+ }
}
return true;
@@ -657,35 +664,37 @@ bool TCms::TryToLockVDisk(const TActionOptions& opts,
return false;
}
- auto counters = CreateErasureCounter(ClusterInfo->BSGroup(groupId).Erasure.GetErasure(), vdisk, groupId);
- counters->CountGroupState(ClusterInfo, State->Config.DefaultRetryTime, duration, error);
+ ui32 tempLocksCount = 0;
+ for (auto& vdiskId : group.VDisks) {
+ if (vdisk.VDiskId == vdiskId)
+ continue;
- switch (opts.AvailabilityMode) {
- case MODE_MAX_AVAILABILITY:
- if (!counters->CheckForMaxAvailability(error, defaultDeadline, opts.PartialPermissionAllowed)) {
- return false;
+ if (!ClusterInfo->VDisk(vdiskId).TempLocks.empty()
+ || !ClusterInfo->Node(ClusterInfo->VDisk(vdiskId).NodeId).TempLocks.empty()
+ || !ClusterInfo->PDisk(ClusterInfo->VDisk(vdiskId).PDiskId).TempLocks.empty()) {
+ tempLocksCount += 1;
}
- break;
- case MODE_KEEP_AVAILABLE:
- if (!counters->CheckForKeepAvailability(ClusterInfo, error, defaultDeadline, opts.PartialPermissionAllowed)) {
- return false;
- }
- break;
- case MODE_FORCE_RESTART:
- if ( counters->GroupAlreadyHasLockedDisks() && opts.PartialPermissionAllowed) {
- error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = "You cannot get two or more disks from the same group at the same time"
- " without specifying the PartialPermissionAllowed parameter";
- error.Deadline = defaultDeadline;
- return false;
- }
- // Any number of down disks is OK for this mode.
- break;
- default:
- error.Code = TStatus::WRONG_REQUEST;
- error.Reason = Sprintf("Unknown availability mode: %s (%" PRIu32 ")",
- EAvailabilityMode_Name(opts.AvailabilityMode).data(),
- static_cast<ui32>(opts.AvailabilityMode));
+ }
+
+ if (opts.PartialPermissionAllowed && tempLocksCount == 1) {
+ error.Code = TStatus::DISALLOW_TEMP;
+ error.Reason = "You cannot get two or more disks from the same group at the same time";
+ error.Deadline = defaultDeadline;
+ return false;
+ }
+
+ auto result = group.GroupChecker->TryToLockVDisk(vdisk.VDiskId, opts.AvailabilityMode, 0, opts.Order);
+ bool resultIsOk = result == Ydb::Maintenance::ActionState::ACTION_REASON_OK;
+
+ if (!resultIsOk && !opts.PartialPermissionAllowed && tempLocksCount > 0) {
+ error.Code = TStatus::DISALLOW;
+ error.Reason = "Request is incorrect. You will never get a permissions. Try with PartialPermissionAllowed";
+ return false;
+ }
+
+ if (!resultIsOk) {
+ error.Code = TStatus::DISALLOW_TEMP;
+ error.Reason = group.GroupChecker->ReadableReason(vdisk.VDiskId, opts.AvailabilityMode, result);
error.Deadline = defaultDeadline;
return false;
}
@@ -1550,13 +1559,9 @@ void TCms::Handle(TEvCms::TEvPermissionRequest::TPtr &ev,
}
}
- ClusterInfo->LogManager.PushRollbackPoint();
- for (const auto &scheduled_request : State->ScheduledRequests) {
- for (auto &action : scheduled_request.second.Request.GetActions())
- ClusterInfo->LogManager.ApplyAction(action, ClusterInfo);
- }
- bool ok = CheckPermissionRequest(rec, resp->Record, scheduled.Request, ctx);
- ClusterInfo->LogManager.RollbackOperations();
+ LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::CMS, "Next request Id: " << State->NextRequestId);
+
+ bool ok = CheckPermissionRequest(rec, resp->Record, scheduled.Request, State->NextRequestId, ctx);
// Schedule request if required.
if (rec.GetDryRun()) {
@@ -1616,20 +1621,12 @@ void TCms::Handle(TEvCms::TEvCheckRequest::TPtr &ev, const TActorContext &ctx)
auto requestStartTime = TInstant::Now();
- ClusterInfo->LogManager.PushRollbackPoint();
- for (const auto &scheduled_request : State->ScheduledRequests) {
- if (scheduled_request.second.Order < request.Order) {
- for (auto &action : scheduled_request.second.Request.GetActions())
- ClusterInfo->LogManager.ApplyAction(action, ClusterInfo);
- }
- }
// Deactivate locks of this and later requests to
// avoid false conflicts.
ClusterInfo->DeactivateScheduledLocks(request.Order);
request.Request.SetAvailabilityMode(rec.GetAvailabilityMode());
- bool ok = CheckPermissionRequest(request.Request, resp->Record, scheduled.Request, ctx);
+ bool ok = CheckPermissionRequest(request.Request, resp->Record, scheduled.Request, request.Order, ctx);
ClusterInfo->ReactivateScheduledLocks();
- ClusterInfo->LogManager.RollbackOperations();
// Schedule request if required.
if (rec.GetDryRun()) {
diff --git a/ydb/core/cms/cms_impl.h b/ydb/core/cms/cms_impl.h
index 9cb688b125a..d2038ec6e6f 100644
--- a/ydb/core/cms/cms_impl.h
+++ b/ydb/core/cms/cms_impl.h
@@ -105,12 +105,14 @@ private:
NKikimrCms::ETenantPolicy TenantPolicy;
NKikimrCms::EAvailabilityMode AvailabilityMode;
bool PartialPermissionAllowed;
+ ui64 Order;
TActionOptions(TDuration dur)
: PermissionDuration(dur)
, TenantPolicy(NKikimrCms::DEFAULT)
, AvailabilityMode(NKikimrCms::MODE_MAX_AVAILABILITY)
, PartialPermissionAllowed(false)
+ , Order(0)
{}
};
@@ -275,6 +277,7 @@ private:
bool CheckPermissionRequest(const NKikimrCms::TPermissionRequest &request,
NKikimrCms::TPermissionResponse &response,
NKikimrCms::TPermissionRequest &scheduled,
+ const ui64 requestOrder,
const TActorContext &ctx);
bool IsActionHostValid(const NKikimrCms::TAction &action, TErrorInfo &error) const;
bool ParseServices(const NKikimrCms::TAction &action, TServices &services, TErrorInfo &error) const;
diff --git a/ydb/core/cms/cms_ut.cpp b/ydb/core/cms/cms_ut.cpp
index 4b7ea52ce1f..4ec398add89 100644
--- a/ydb/core/cms/cms_ut.cpp
+++ b/ydb/core/cms/cms_ut.cpp
@@ -729,92 +729,6 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
env.CheckWalleCheckTask("task-2", TStatus::ALLOW, env.GetNodeId(1));
}
- Y_UNIT_TEST(Notifications)
- {
- TCmsTestEnv env(8);
- env.AdvanceCurrentTime(TDuration::Minutes(20));
-
- // User is not specified.
- env.CheckNotification(TStatus::WRONG_REQUEST, "", env.GetCurrentTime(),
- MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(0), 60000000));
- // Too old.
- env.CheckNotification(TStatus::WRONG_REQUEST, "user", env.GetCurrentTime() - TDuration::Minutes(10),
- MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(0), 60000000));
- // Store notification user-1.
- auto id1 = env.CheckNotification
- (TStatus::OK, "user", env.GetCurrentTime() + TDuration::Minutes(10),
- MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), 60000000, env.PDiskName(1, 0)));
-
- // OK to replace the same device before notification start time.
- env.CheckPermissionRequest("user", false, true, true, true, TStatus::ALLOW,
- MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), 60000000, env.PDiskName(1, 0)));
-
- // Intersects with notification.
- env.CheckPermissionRequest("user", false, true, true, true, TStatus::DISALLOW_TEMP,
- MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), 10 * 60000000, env.PDiskName(1, 0)));
-
- // Store notification user-2.
- auto id2 = env.CheckNotification(TStatus::OK, "user", env.GetCurrentTime(),
- MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), 60000000, env.PDiskName(2, 0)));
- // Store notificaiton user1-3.
- auto id3 = env.CheckNotification(TStatus::OK, "user1", env.GetCurrentTime(),
- MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), 60000000, env.PDiskName(3, 0)));
- // Get notification with no user.
- env.CheckGetNotification("", id1, TStatus::WRONG_REQUEST);
- // Get user-1.
- env.CheckGetNotification("user", id1, TStatus::OK);
- // Get with wrong user.
- env.CheckGetNotification("user1", id1, TStatus::WRONG_REQUEST);
- // Get with wrong id.
- env.CheckGetNotification("user", "wrong-id", TStatus::WRONG_REQUEST);
- // List notifications for user.
- env.CheckListNotifications("user", TStatus::OK, 2);
- // List notifications for user1.
- env.CheckListNotifications("user1", TStatus::OK, 1);
- // List with no user.
- env.CheckListNotifications("", TStatus::WRONG_REQUEST, 0);
- // Reject notification with no user.
- env.CheckRejectNotification("", id1, TStatus::WRONG_REQUEST);
- // Reject notification with wrong user.
- env.CheckRejectNotification("user1", id1, TStatus::WRONG_REQUEST);
- // Reject user-1 (dry run)
- env.CheckRejectNotification("user", id1, TStatus::OK, true);
- // Get user-1.
- env.CheckGetNotification("user", id1, TStatus::OK);
- // Reject user1-3.
- env.CheckRejectNotification("user1", id3, TStatus::OK);
- // Reject user-2.
- env.CheckRejectNotification("user", id2, TStatus::OK);
- // List notifications for user.
- env.CheckListNotifications("user", TStatus::OK, 1);
- // List notifications for user1.
- env.CheckListNotifications("user1", TStatus::OK, 0);
- // Get rejected user1-3.
- env.CheckGetNotification("user1", id3, TStatus::WRONG_REQUEST);
- // Get rejected user-2.
- env.CheckGetNotification("user", id2, TStatus::WRONG_REQUEST);
- // Get user-1.
- env.CheckGetNotification("user", id1, TStatus::OK);
- }
-
- Y_UNIT_TEST(PermissionDuration) {
- TCmsTestEnv env(8);
-
- // Store notification user-1.
- auto id1 = env.CheckNotification
- (TStatus::OK, "user", env.GetCurrentTime() + TDuration::Minutes(10),
- MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), 60000000, env.PDiskName(1, 0)));
-
- // Intersects with notification.
- const TDuration _10minutes = TDuration::Minutes(10);
- env.CheckPermissionRequest("user", false, true, true, true, _10minutes, TStatus::DISALLOW_TEMP,
- MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), _10minutes.MicroSeconds(), env.PDiskName(1, 0)));
-
- // OK with default duration.
- env.CheckPermissionRequest("user", false, true, true, true, TStatus::ALLOW,
- MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), 60000000, env.PDiskName(1, 0)));
- }
-
Y_UNIT_TEST(ActionWithZeroDuration) {
TCmsTestEnv env(8);
@@ -1309,7 +1223,6 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(2), 60000000, "storage"));
TFakeNodeWhiteboardService::Info[env.GetNodeId(1)].Connected = false;
- env.RestartCms();
env.CheckPermissionRequest("user", false, true, false, true, MODE_MAX_AVAILABILITY, TStatus::DISALLOW_TEMP,
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(2), 60000000, "storage"));
@@ -1317,14 +1230,12 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
TFakeNodeWhiteboardService::Info[env.GetNodeId(7)].Connected = false;
TFakeNodeWhiteboardService::Info[env.GetNodeId(4)].Connected = false;
TFakeNodeWhiteboardService::Info[env.GetNodeId(1)].Connected = false;
- env.RestartCms();
env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::DISALLOW_TEMP,
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(5), 60000000, "storage"));
// 2dc disabled
TFakeNodeWhiteboardService::Info[env.GetNodeId(7)].Connected = true;
- env.RestartCms();
env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::DISALLOW_TEMP,
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(7), 60000000, "storage"));
@@ -1334,7 +1245,6 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(5), 60000000, "storage"));
TFakeNodeWhiteboardService::Info[env.GetNodeId(5)].Connected = false;
- env.RestartCms();
env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::DISALLOW_TEMP,
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(2), 60000000, "storage"));
diff --git a/ydb/core/cms/cms_ut_common.cpp b/ydb/core/cms/cms_ut_common.cpp
index 96421cc7a50..444ff73933a 100644
--- a/ydb/core/cms/cms_ut_common.cpp
+++ b/ydb/core/cms/cms_ut_common.cpp
@@ -316,10 +316,12 @@ void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseC
ui32 vdiskId = pdiskIndex * vdiskPerPdisk + vdiskIndex;
ui32 groupId = groupShift + vdiskId;
ui32 failRealm = 0;
- if (useMirror3dcErasure)
+ ui32 failDomain = nodeIndex % 8;
+ if (useMirror3dcErasure) {
failRealm = (nodeIndex % 9) / 3;
-
- TVDiskID id = {(ui8)groupId, 1, (ui8)failRealm, (ui8)(nodeIndex % 8), (ui8)0};
+ failDomain = (nodeIndex % 9) % 3;
+ }
+ TVDiskID id = {(ui8)groupId, 1, (ui8)failRealm, (ui8)failDomain, (ui8)0};
auto &vdisk = node.VDiskStateInfo[id];
VDiskIDFromVDiskID(id, vdisk.MutableVDiskId());
@@ -337,7 +339,7 @@ void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseC
vdiskConfig.SetGroupId(groupId);
vdiskConfig.SetGroupGeneration(1);
vdiskConfig.SetFailRealmIdx(failRealm);
- vdiskConfig.SetFailDomainIdx(nodeIndex % 8);
+ vdiskConfig.SetFailDomainIdx(failDomain);
config->MutableGroup(groupId)->AddVSlotId()
->CopyFrom(vdiskConfig.GetVSlotId());
diff --git a/ydb/core/cms/erasure_checkers.cpp b/ydb/core/cms/erasure_checkers.cpp
index b98748c7932..28634af6f09 100644
--- a/ydb/core/cms/erasure_checkers.cpp
+++ b/ydb/core/cms/erasure_checkers.cpp
@@ -1,233 +1,361 @@
#include "erasure_checkers.h"
+#include <ydb/core/protos/cms.pb.h>
+#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h>
+
+#include <library/cpp/actors/core/log.h>
+
+#include <util/string/cast.h>
+#include <util/system/backtrace.h>
+#include <util/system/yassert.h>
+
+#include <bitset>
+#include <sstream>
+#include <vector>
+
namespace NKikimr::NCms {
-bool TErasureCounterBase::IsDown(const TVDiskInfo &vdisk, TClusterInfoPtr info, TDuration &retryTime, TErrorInfo &error) {
- const auto &node = info->Node(vdisk.NodeId);
- const auto &pdisk = info->PDisk(vdisk.PDiskId);
- const auto defaultDeadline = TActivationContext::Now() + retryTime;
+using namespace Ydb::Maintenance;
- // Check we received info for PDisk.
- if (!pdisk.NodeId) {
- ++Down;
- error.Reason = TStringBuilder() << "Missing info for " << pdisk.ItemName();
- return false;
+IStorageGroupChecker::EVDiskState IStorageGroupChecker::VDiskState(NKikimrCms::EState state) {
+ switch (state) {
+ case NKikimrCms::UP:
+ return VDISK_STATE_UP;
+ case NKikimrCms::UNKNOWN:
+ return VDISK_STATE_UNSPECIFIED;
+ case NKikimrCms::DOWN:
+ return VDISK_STATE_DOWN;
+ case NKikimrCms::RESTART:
+ return VDISK_STATE_RESTART;
+ default:
+ Y_FAIL("Unknown EState");
}
+}
- return (node.NodeId != VDisk.NodeId && node.IsDown(error, defaultDeadline))
- || (pdisk.PDiskId != VDisk.PDiskId && pdisk.IsDown(error, defaultDeadline))
- || vdisk.IsDown(error, defaultDeadline);
+TSimpleSharedPtr<IStorageGroupChecker> CreateStorageGroupChecker(TErasureType::EErasureSpecies es, ui32 groupId) {
+ switch (es) {
+ case TErasureType::ErasureNone:
+ case TErasureType::ErasureMirror3:
+ case TErasureType::Erasure3Plus1Block:
+ case TErasureType::Erasure3Plus1Stripe:
+ case TErasureType::Erasure4Plus2Block:
+ case TErasureType::Erasure3Plus2Block:
+ case TErasureType::Erasure4Plus2Stripe:
+ case TErasureType::Erasure3Plus2Stripe:
+ case TErasureType::ErasureMirror3Plus2:
+ case TErasureType::Erasure4Plus3Block:
+ case TErasureType::Erasure4Plus3Stripe:
+ case TErasureType::Erasure3Plus3Block:
+ case TErasureType::Erasure3Plus3Stripe:
+ case TErasureType::Erasure2Plus3Block:
+ case TErasureType::Erasure2Plus3Stripe:
+ case TErasureType::Erasure2Plus2Block:
+ case TErasureType::Erasure2Plus2Stripe:
+ case TErasureType::ErasureMirror3of4:
+ return TSimpleSharedPtr<IStorageGroupChecker>(new TDefaultErasureChecker(groupId));
+ case TErasureType::ErasureMirror3dc:
+ return TSimpleSharedPtr<IStorageGroupChecker>(new TMirror3dcChecker(groupId));
+ default:
+ Y_FAIL("Unknown erasure type: %d", es);
+ }
}
-bool TErasureCounterBase::IsLocked(const TVDiskInfo &vdisk, TClusterInfoPtr info, TDuration &retryTime,
- TDuration &duration, TErrorInfo &error)
-{
- const auto &node = info->Node(vdisk.NodeId);
- const auto &pdisk = info->PDisk(vdisk.PDiskId);
- // Check we received info for VDisk.
- if (!vdisk.NodeId || !vdisk.PDiskId) {
- ++Down;
- error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = TStringBuilder() << "Missing info for " << vdisk.ItemName();
- return false;
+void TErasureCheckerBase::AddVDisk(const TVDiskID& vdiskId) {
+ if (DiskToState.contains(vdiskId)) {
+ return;
}
-
- return node.IsLocked(error, retryTime, TActivationContext::Now(), duration)
- || pdisk.IsLocked(error, retryTime, TActivationContext::Now(), duration)
- || vdisk.IsLocked(error, retryTime, TActivationContext::Now(), duration);
+ DiskToState[vdiskId].State = VDISK_STATE_UNSPECIFIED;
}
-bool TErasureCounterBase::GroupAlreadyHasLockedDisks() const {
- return HasAlreadyLockedDisks;
-}
+void TErasureCheckerBase::UpdateVDisk(const TVDiskID& vdiskId, EState state) {
+ AddVDisk(vdiskId);
-bool TErasureCounterBase::CheckForMaxAvailability(TErrorInfo &error, TInstant &defaultDeadline, bool allowPartial) const {
- if (Locked + Down > 1) {
- if (HasAlreadyLockedDisks && !allowPartial) {
- error.Code = TStatus::DISALLOW;
- error.Reason = "The request is incorrect: too many disks from the one group. "
- "Fix the request or set PartialPermissionAllowed to true";
- return false;
- }
+ const auto newState = VDiskState(state);
+
+ // The disk is marked based on the information obtained by InfoCollector.
+ // If we marked the disk DOWN, it means that there was a reason
+ if (DiskToState[vdiskId].State == VDISK_STATE_DOWN
+ && newState == VDISK_STATE_UP)
+ return;
+
+ if (DiskToState[vdiskId].State == VDISK_STATE_DOWN) {
+ --DownVDisksCount;
+ }
+
+ if (DiskToState[vdiskId].State == VDISK_STATE_LOCKED ||
+ DiskToState[vdiskId].State == VDISK_STATE_RESTART) {
+ --LockedVDisksCount;
+ }
+
+ DiskToState[vdiskId].State = newState;
- error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = TStringBuilder() << "Issue in affected group " << GroupId
- << ". " << "Too many locked and down vdisks: " << Locked + Down;
- error.Deadline = defaultDeadline;
- return false;
+ if (newState == VDISK_STATE_RESTART || newState == VDISK_STATE_LOCKED) {
+ ++LockedVDisksCount;
+ }
+
+ if (newState == VDISK_STATE_DOWN) {
+ ++DownVDisksCount;
}
- return true;
}
-void TDefaultErasureCounter::CountVDisk(const TVDiskInfo &vdisk, TClusterInfoPtr info, TDuration retryTime,
- TDuration duration, TErrorInfo &error)
-{
- Y_VERIFY_DEBUG(vdisk.VDiskId != VDisk.VDiskId);
-
- // Check locks.
- TErrorInfo err;
- if (IsLocked(vdisk, info, retryTime, duration, err)) {
- ++Locked;
- error.Code = err.Code;
- error.Reason = TStringBuilder() << "Issue in affected group " << GroupId
- << ". " << err.Reason;
- error.Deadline = Max(error.Deadline, err.Deadline);
- return;
+void TErasureCheckerBase::LockVDisk(const TVDiskID& vdiskId) {
+ Y_VERIFY(DiskToState.contains(vdiskId));
+
+ ++LockedVDisksCount;
+ if (DiskToState[vdiskId].State == VDISK_STATE_DOWN) {
+ DiskToState[vdiskId].State = VDISK_STATE_RESTART;
+ --DownVDisksCount;
+ } else {
+ DiskToState[vdiskId].State = VDISK_STATE_LOCKED;
}
+}
- // Check if disk is down.
- if (IsDown(vdisk, info, retryTime, err)) {
- ++Down;
- error.Code = err.Code;
- error.Reason = TStringBuilder() << "Issue in affected group " << GroupId
- << ". " << err.Reason;
- error.Deadline = Max(error.Deadline, err.Deadline);
+void TErasureCheckerBase::UnlockVDisk(const TVDiskID& vdiskId) {
+ Y_VERIFY(DiskToState.contains(vdiskId));
+
+ --LockedVDisksCount;
+ if (DiskToState[vdiskId].State == VDISK_STATE_RESTART) {
+ DiskToState[vdiskId].State = VDISK_STATE_DOWN;
+ ++DownVDisksCount;
+ } else {
+ DiskToState[vdiskId].State = VDISK_STATE_UP;
}
}
-bool TDefaultErasureCounter::CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo &error,
- TInstant &defaultDeadline, bool allowPartial) const
-{
- if (HasAlreadyLockedDisks && allowPartial) {
- error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = "You cannot get two or more disks from the same group at the same time"
- " without specifying the PartialPermissionAllowed parameter";
- error.Deadline = defaultDeadline;
- return false;
- }
-
- if (Down + Locked > info->BSGroup(GroupId).Erasure.ParityParts()) {
- if (HasAlreadyLockedDisks && !allowPartial) {
- error.Code = TStatus::DISALLOW;
- error.Reason = "The request is incorrect: too many disks from the one group. "
- "Fix the request or set PartialPermissionAllowed to true";
- return false;
+void TErasureCheckerBase::EmplaceTask(const TVDiskID &vdiskId, i32 priority,
+ ui64 order, const std::string &taskUId) {
+
+ auto& priorities = DiskToState[vdiskId].Priorities;
+ auto it = priorities.lower_bound(TVDiskState::TTaskPriority(priority, order, ""));
+
+ if (it != priorities.end() && (it->Order == order && it->Priority == priority)) {
+ if (it->TaskUId == taskUId) {
+ return;
}
- error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = TStringBuilder() << "Cannot lock disk " << VDisk.PrettyItemName()
- << ". Too many locked nodes for group " << GroupId;
- error.Deadline = defaultDeadline;
- return false;
+ Y_FAIL("Task with the same priority and order already exists");
+ } else {
+ priorities.emplace_hint(it, priority, order, taskUId);
}
- return true;
}
-bool TMirror3dcCounter::CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo &error,
- TInstant &defaultDeadline, bool allowPartial) const
-{
- Y_UNUSED(info);
+void TErasureCheckerBase::RemoveTask(const std::string &taskUId) {
+ auto taskUIdsEqual = [&taskUId](const TVDiskState::TTaskPriority &p) {
+ return p.TaskUId == taskUId;
+ };
+
+ for (auto &[vdiskId, vdiskState] : DiskToState) {
+ auto it = std::find_if(vdiskState.Priorities.begin(),
+ vdiskState.Priorities.end(), taskUIdsEqual);
+
+ if (it == vdiskState.Priorities.end()) {
+ continue;
+ }
+
+ vdiskState.Priorities.erase(it);
- if (HasAlreadyLockedDisks && allowPartial) {
- error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = "You cannot get two or more disks from the same group at the same time"
- " without specifying the PartialPermissionAllowed parameter";
- error.Deadline = defaultDeadline;
- return false;
}
+}
- if (DataCenterDisabledNodes.size() <= 1)
- return true;
+ActionState::ActionReason TDefaultErasureChecker::TryToLockVDisk(const TVDiskID &vdiskId, EAvailabilityMode mode, i32 priority, ui64 order) const {
+ Y_VERIFY(DiskToState.contains(vdiskId));
- if (DataCenterDisabledNodes.size() == 2
- && (DataCenterDisabledNodes.begin()->second <= 1
- || (++DataCenterDisabledNodes.begin())->second <= 1))
- {
- return true;
+ const auto& diskState = DiskToState.at(vdiskId);
+
+ if (diskState.State == VDISK_STATE_RESTART
+ || diskState.State == VDISK_STATE_LOCKED) {
+ return ActionState::ACTION_REASON_ALREADY_LOCKED;
}
- if (HasAlreadyLockedDisks && !allowPartial) {
- error.Code = TStatus::DISALLOW;
- error.Reason = "The request is incorrect: too many disks from the one group. "
- "Fix the request or set PartialPermissionAllowed to true";
- return false;
+ auto taskPriority = TVDiskState::TTaskPriority(priority, order, "");
+ if (!diskState.Priorities.empty() && taskPriority < *diskState.Priorities.rbegin()) {
+ return ActionState::ACTION_REASON_LOW_PRIORITY;
}
- if (DataCenterDisabledNodes.size() > 2) {
- error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = TStringBuilder() << "Issue in affected group " << GroupId
- << ". Too many data centers have unavailable vdisks: "
- << DataCenterDisabledNodes.size();
- error.Deadline = defaultDeadline;
- return false;
+ if (mode == NKikimrCms::MODE_FORCE_RESTART) {
+ return ActionState::ACTION_REASON_OK;
}
- error.Code = TStatus::DISALLOW_TEMP;
- error.Reason = TStringBuilder() << "Issue in affected group " << GroupId
- << ". Data centers have too many unavailable vdisks";
- error.Deadline = defaultDeadline;
+ // Check how many disks are waiting for higher prioriry task to be locked
+ ui32 priorityLockedCount = 0;
+ for (auto &[id, vdiskState] : DiskToState) {
+ if (vdiskState.State != VDISK_STATE_UP) {
+ continue;
+ }
- return false;
-}
+ if (!vdiskState.Priorities.empty() && taskPriority < *vdiskState.Priorities.rbegin()) {
+ ++priorityLockedCount;
+ }
+ }
-void TMirror3dcCounter::CountVDisk(const TVDiskInfo &vdisk, TClusterInfoPtr info, TDuration retryTime,
- TDuration duration, TErrorInfo &error)
-{
- Y_VERIFY_DEBUG(vdisk.VDiskId != VDisk.VDiskId);
-
- // Check locks.
- TErrorInfo err;
- if (IsLocked(vdisk, info, retryTime, duration, err)
- || IsDown(vdisk, info, retryTime, err)) {
- error.Code = err.Code;
- error.Reason = TStringBuilder() << "Issue in affected group " << GroupId
- << ". " << err.Reason;
- error.Deadline = Max(error.Deadline, err.Deadline);
- ++Locked;
- ++DataCenterDisabledNodes[vdisk.VDiskId.FailRealm];
+ ui32 disksLimit = 0;
+ if (diskState.State == VDISK_STATE_DOWN) {
+ disksLimit = 1;
}
+
+ switch (mode) {
+ case NKikimrCms::MODE_MAX_AVAILABILITY:
+ if ((LockedVDisksCount + DownVDisksCount + priorityLockedCount) > disksLimit) {
+ return ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS;
+ }
+ break;
+ case NKikimrCms::MODE_KEEP_AVAILABLE:
+ if ((LockedVDisksCount + DownVDisksCount + priorityLockedCount) >= disksLimit + 2) {
+ return ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS;
+ }
+ break;
+ default:
+ Y_FAIL("Unexpected Availability mode");
+ }
+
+ return ActionState::ACTION_REASON_OK;
}
-void TMirror3dcCounter::CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo &error) {
- for (const auto &vdId : info->BSGroup(GroupId).VDisks) {
- if (vdId != VDisk.VDiskId)
- CountVDisk(info->VDisk(vdId), info, retryTime, duration, error);
+ActionState::ActionReason TMirror3dcChecker::TryToLockVDisk(const TVDiskID &vdiskId, EAvailabilityMode mode, i32 priority, ui64 order) const {
+ Y_VERIFY(DiskToState.contains(vdiskId));
+
+ const auto& diskState = DiskToState.at(vdiskId);
+ const auto taskPriority = TVDiskState::TTaskPriority(priority, order, "");
+
+ if (!diskState.Priorities.empty() && taskPriority < *diskState.Priorities.rbegin()) {
+ return ActionState::ACTION_REASON_LOW_PRIORITY;
+ }
+
+ if (mode == MODE_FORCE_RESTART) {
+ return ActionState::ACTION_REASON_OK;
+ }
+
+ if (diskState.State == VDISK_STATE_LOCKED
+ || diskState.State == VDISK_STATE_RESTART) {
+ return ActionState::ACTION_REASON_ALREADY_LOCKED;
+ }
+
+ const std::vector<std::bitset<9>> MaxOkGroups = {
+ 0x1E0, 0x1D0, 0x1C8, 0x1C4, 0x1C2, 0x1C1,
+ 0x138, 0xB8, 0x78, 0x3C, 0x3A, 0x39,
+ 0x107, 0x87, 0x47, 0x27, 0x17, 0xF,
+ };
+
+ ui32 priorityLockedCount = 0;
+ std::bitset<9> groupState(0);
+ for (auto& [id, state] : DiskToState) {
+ if (id == vdiskId)
+ continue;
+
+ if (state.State != VDISK_STATE_UP
+ || (!state.Priorities.empty() && taskPriority < *state.Priorities.rbegin())) {
+ groupState |= (1 << (id.FailRealm * 3 + id.FailDomain));
+ }
+
+ if (!state.Priorities.empty() && taskPriority < *state.Priorities.rbegin()) {
+ ++priorityLockedCount;
+ }
+ }
+ groupState |= (1 << (vdiskId.FailRealm * 3 + vdiskId.FailDomain));
+
+ ui32 downVDisks = diskState.State == VDISK_STATE_DOWN ? DownVDisksCount - 1 : DownVDisksCount;
+ if (mode == NKikimrCms::MODE_MAX_AVAILABILITY) {
+ if ((downVDisks + LockedVDisksCount + priorityLockedCount) == 0) {
+ return ActionState::ACTION_REASON_OK;
+ }
+ return ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS;
+ }
+
+ size_t minCount = 9;
+ for (auto okGroup : MaxOkGroups) {
+ auto xoredState = (~okGroup) & groupState;
+ minCount = std::min(minCount, xoredState.count());
}
- ++Locked;
- ++DataCenterDisabledNodes[VDisk.VDiskId.FailRealm];
- if (Locked && error.Code == TStatus::DISALLOW) {
- HasAlreadyLockedDisks = true;
+ if (minCount > 0) {
+ return ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS;
}
+
+ return ActionState::ACTION_REASON_OK;
}
-void TDefaultErasureCounter::CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo &error) {
- for (const auto &vdId : info->BSGroup(GroupId).VDisks) {
- if (vdId != VDisk.VDiskId)
- CountVDisk(info->VDisk(vdId), info, retryTime, duration, error);
+std::string TDefaultErasureChecker::ReadableReason(const TVDiskID &vdiskId,
+ EAvailabilityMode mode, ActionState::ActionReason reason) const {
+ std::stringstream readableReason;
+
+ if (reason == ActionState::ACTION_REASON_OK) {
+ readableReason << "Action is OK";
+ return readableReason.str();
}
- if (Locked && error.Code == TStatus::DISALLOW) {
- HasAlreadyLockedDisks = true;
+
+ readableReason << "Cannot lock vdisk" << vdiskId.ToString() << ". ";
+
+ switch (reason) {
+ case ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS:
+ readableReason << "Group " << GroupId
+ << " has too many unavailable vdisks. "
+ << "Down disks count: " << DownVDisksCount
+ << ". Locked disks count: " << LockedVDisksCount;
+
+ if (mode == NKikimrCms::MODE_KEEP_AVAILABLE) {
+ readableReason << ". Limit of unavailable disks for mode " << NKikimrCms::EAvailabilityMode_Name(mode)
+ << " is " << 2;
+ }
+ if (mode == NKikimrCms::MODE_MAX_AVAILABILITY) {
+ readableReason << ". Limit of unavailable disks for mode " << NKikimrCms::EAvailabilityMode_Name(mode)
+ << " is " << 1;
+ }
+ break;
+ case ActionState::ACTION_REASON_ALREADY_LOCKED:
+ // TODO:: add info about lock id
+ readableReason << "Disk is already locked";
+ break;
+ case ActionState::ACTION_REASON_LOW_PRIORITY:
+ // TODO:: add info about task with higher priority
+ readableReason << "Task with higher priority in progress";
+ break;
+ default:
+ Y_FAIL("Unexpected Reason");
}
- ++Locked;
+
+ return readableReason.str();
}
-TSimpleSharedPtr<IErasureCounter> CreateErasureCounter(TErasureType::EErasureSpecies es, const TVDiskInfo &vdisk, ui32 groupId) {
- switch (es) {
- case TErasureType::ErasureNone:
- case TErasureType::ErasureMirror3:
- case TErasureType::Erasure3Plus1Block:
- case TErasureType::Erasure3Plus1Stripe:
- case TErasureType::Erasure4Plus2Block:
- case TErasureType::Erasure3Plus2Block:
- case TErasureType::Erasure4Plus2Stripe:
- case TErasureType::Erasure3Plus2Stripe:
- case TErasureType::ErasureMirror3Plus2:
- case TErasureType::Erasure4Plus3Block:
- case TErasureType::Erasure4Plus3Stripe:
- case TErasureType::Erasure3Plus3Block:
- case TErasureType::Erasure3Plus3Stripe:
- case TErasureType::Erasure2Plus3Block:
- case TErasureType::Erasure2Plus3Stripe:
- case TErasureType::Erasure2Plus2Block:
- case TErasureType::Erasure2Plus2Stripe:
- case TErasureType::ErasureMirror3of4:
- return TSimpleSharedPtr<IErasureCounter>(new TDefaultErasureCounter(vdisk, groupId));
- case TErasureType::ErasureMirror3dc:
- return TSimpleSharedPtr<IErasureCounter>(new TMirror3dcCounter(vdisk, groupId));
+std::string TMirror3dcChecker::ReadableReason(const TVDiskID &vdiskId,
+ EAvailabilityMode mode, ActionState::ActionReason reason) const {
+ std::stringstream readableReason;
+
+ if (reason == ActionState::ACTION_REASON_OK) {
+ readableReason << "Action is OK";
+ return readableReason.str();
+ }
+
+ readableReason << "Cannot lock vdisk" << vdiskId.ToString() << ". ";
+
+ switch (reason) {
+ case ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS:
+ readableReason << "Group " << GroupId
+ << " has too many unavailable vdisks. "
+ << "Down disks count: " << DownVDisksCount
+ << ". Locked disks count: " << LockedVDisksCount;
+
+ if (mode == NKikimrCms::MODE_KEEP_AVAILABLE) {
+ readableReason << ". Limit of unavailable disks for mode " << NKikimrCms::EAvailabilityMode_Name(mode)
+ << " is 1";
+ }
+ if (mode == NKikimrCms::MODE_MAX_AVAILABILITY) {
+ readableReason << ". Limit of unavailable disks for mode " << NKikimrCms::EAvailabilityMode_Name(mode)
+ << "4, 3 of which are in the same data center";
+ }
+ break;
+ case ActionState::ACTION_REASON_ALREADY_LOCKED:
+ // TODO:: add info about lock id
+ readableReason << "Disk is already locked";
+ break;
+ case ActionState::ACTION_REASON_LOW_PRIORITY:
+ // TODO:: add info about task with higher priority
+ readableReason << "Task with higher priority in progress";
+ break;
default:
- Y_FAIL("Unknown erasure type: %d", es);
+ Y_FAIL("Unexpected Reason");
}
+
+ return readableReason.str();
}
} // namespace NKikimr::NCms
diff --git a/ydb/core/cms/erasure_checkers.h b/ydb/core/cms/erasure_checkers.h
index 0c0de0be766..c958630e273 100644
--- a/ydb/core/cms/erasure_checkers.h
+++ b/ydb/core/cms/erasure_checkers.h
@@ -1,79 +1,133 @@
#pragma once
#include "defs.h"
-#include "cluster_info.h"
+#include <ydb/core/blobstorage/base/blobstorage_vdiskid.h>
#include <ydb/core/erasure/erasure.h>
#include <ydb/core/protos/cms.pb.h>
+#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h>
+
+#include <util/generic/queue.h>
+#include <util/system/compiler.h>
+
+#include <algorithm>
+#include <functional>
+#include <queue>
+#include <string>
namespace NKikimr::NCms {
using namespace NKikimrCms;
-class IErasureCounter {
+class IStorageGroupChecker {
+public:
+ enum EVDiskState : ui32 {
+ VDISK_STATE_UNSPECIFIED /* "Unspecified" */,
+ VDISK_STATE_UP /* "Up" */,
+ VDISK_STATE_LOCKED /* "Locked" */,
+ VDISK_STATE_RESTART /* "Restart" */,
+ VDISK_STATE_DOWN /* "Down" */,
+ VDISK_STATE_SCHEDULED_LOCKED /* "Scheduled locked" */
+ };
+
+protected:
+ EVDiskState VDiskState(NKikimrCms::EState state);
+
public:
- virtual ~IErasureCounter() = default;
+ virtual ~IStorageGroupChecker() = default;
+
+ virtual void AddVDisk(const TVDiskID& vdiskId) = 0;
+ virtual void UpdateVDisk(const TVDiskID& vdiskId, EState state) = 0;
+
+ virtual void LockVDisk(const TVDiskID& vdiskId) = 0;
+ virtual void UnlockVDisk(const TVDiskID& vdiskId) = 0;
- virtual bool GroupAlreadyHasLockedDisks() const = 0;
- virtual bool CheckForMaxAvailability(TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const = 0;
- virtual bool CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const = 0;
- virtual void CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo& error) = 0;
- virtual void CountVDisk(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo& error) = 0;
+ virtual void EmplaceTask(const TVDiskID& vdiskId, i32 priority, ui64 order, const std::string& taskUId) = 0;
+ virtual void RemoveTask(const std::string& taskUId) = 0;
+
+ virtual Ydb::Maintenance::ActionState::ActionReason TryToLockVDisk(const TVDiskID& vdiskId, EAvailabilityMode mode, i32 priority, ui64 order) const = 0;
+ virtual std::string ReadableReason(const TVDiskID& vdiskId, EAvailabilityMode mode, Ydb::Maintenance::ActionState::ActionReason reason) const = 0;
};
-class TErasureCounterBase: public IErasureCounter {
+class TErasureCheckerBase : public IStorageGroupChecker {
protected:
- ui32 Down;
- ui32 Locked;
- const TVDiskInfo& VDisk;
- const ui32 GroupId;
- bool HasAlreadyLockedDisks;
+ /** Structure to hold information about vdisk state and priorities and orders of some task.
+ *
+ * Requests with equal priority are processed in the order of arrival at CMS.
+ */
+ struct TVDiskState {
+ public:
+ struct TTaskPriority {
+ i32 Priority;
+ ui64 Order;
+ std::string TaskUId;
+
+ explicit TTaskPriority(i32 priority, ui64 order, const std::string& taskUId)
+ : Priority(priority)
+ , Order(order)
+ , TaskUId(taskUId)
+ {}
+
+ bool operator<(const TTaskPriority& rhs) const {
+ return Priority < rhs.Priority || (Priority == rhs.Priority && Order > rhs.Order);
+ }
+ };
+ public:
+ EVDiskState State;
+ std::set<TTaskPriority> Priorities;
+ };
protected:
- bool IsDown(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration& retryTime, TErrorInfo& error);
- bool IsLocked(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration& retryTime, TDuration& duration, TErrorInfo& error);
+ ui32 GroupId;
+
+ THashMap<TVDiskID, TVDiskState> DiskToState;
+ ui32 DownVDisksCount;
+ ui32 LockedVDisksCount;
public:
- TErasureCounterBase(const TVDiskInfo& vdisk, ui32 groupId)
- : Down(0)
- , Locked(0)
- , VDisk(vdisk)
- , GroupId(groupId)
- , HasAlreadyLockedDisks(false)
+ explicit TErasureCheckerBase(ui32 groupId)
+ : GroupId(groupId)
+ , DownVDisksCount(0)
+ , LockedVDisksCount(0)
{
}
+ virtual ~TErasureCheckerBase() = default;
- bool GroupAlreadyHasLockedDisks() const final;
- bool CheckForMaxAvailability(TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const final;
-};
+ void AddVDisk(const TVDiskID& vdiskId) override final;
+ void UpdateVDisk(const TVDiskID& vdiskId, EState state) override final;
-class TDefaultErasureCounter: public TErasureCounterBase {
-public:
- TDefaultErasureCounter(const TVDiskInfo& vdisk, ui32 groupId)
- : TErasureCounterBase(vdisk, groupId)
- {
- }
+ void LockVDisk(const TVDiskID& vdiskId) override final;
+ void UnlockVDisk(const TVDiskID& vdiskId) override final;
- void CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo &error) override;
- bool CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const override;
- void CountVDisk(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo& error) override;
+ void EmplaceTask(const TVDiskID &vdiskId, i32 priority, ui64 order,
+ const std::string &taskUId) override final;
+ void RemoveTask(const std::string &taskUId) override final;
};
-class TMirror3dcCounter: public TErasureCounterBase {
-private:
- THashMap<ui8, ui32> DataCenterDisabledNodes;
+class TDefaultErasureChecker : public TErasureCheckerBase {
+public:
+ explicit TDefaultErasureChecker(ui32 groupId)
+ : TErasureCheckerBase(groupId)
+ {}
+
+ virtual Ydb::Maintenance::ActionState::ActionReason TryToLockVDisk(const TVDiskID& vdiskId, EAvailabilityMode mode, i32 priority, ui64 order) const override final;
+
+ virtual std::string ReadableReason(const TVDiskID &vdiskId,
+ EAvailabilityMode mode, Ydb::Maintenance::ActionState::ActionReason reason) const override final;
+};
+class TMirror3dcChecker : public TErasureCheckerBase {
public:
- TMirror3dcCounter(const TVDiskInfo& vdisk, ui32 groupId)
- : TErasureCounterBase(vdisk, groupId)
- {
- }
+ explicit TMirror3dcChecker(ui32 groupId)
+ : TErasureCheckerBase(groupId)
+ {}
+
+ virtual Ydb::Maintenance::ActionState::ActionReason TryToLockVDisk(const TVDiskID& vdiskId, EAvailabilityMode mode, i32 priority, ui64 order) const override final;
- void CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo &error) override;
- bool CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const override;
- void CountVDisk(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo& error) override;
+ virtual std::string ReadableReason(const TVDiskID &vdiskId,
+ EAvailabilityMode mode, Ydb::Maintenance::ActionState::ActionReason reason) const override final;
};
-TSimpleSharedPtr<IErasureCounter> CreateErasureCounter(TErasureType::EErasureSpecies es, const TVDiskInfo& vdisk, ui32 groupId);
+TSimpleSharedPtr<IStorageGroupChecker> CreateStorageGroupChecker(TErasureType::EErasureSpecies es, ui32 groupId);
} // namespace NKikimr::NCms
diff --git a/ydb/core/cms/node_checkers.cpp b/ydb/core/cms/node_checkers.cpp
index 8a857b0a63f..9be56b669a4 100644
--- a/ydb/core/cms/node_checkers.cpp
+++ b/ydb/core/cms/node_checkers.cpp
@@ -1,4 +1,6 @@
#include "node_checkers.h"
+#include "util/system/yassert.h"
+#include "ydb/public/api/protos/draft/ydb_maintenance.pb.h"
#include <ydb/core/protos/cms.pb.h>
@@ -9,6 +11,8 @@ namespace NKikimr::NCms {
#define NCH_LOG_D(stream) LOG_DEBUG_S (*TlsActivationContext, NKikimrServices::CMS, "[Nodes Counter] " << stream)
#define NCH_LOG_T(stream) LOG_TRACE_S (*TlsActivationContext, NKikimrServices::CMS, "[Nodes Counter] " << stream)
+using namespace Ydb::Maintenance;
+
TNodesLimitsCounterBase::ENodeState INodesChecker::NodeState(NKikimrCms::EState state) {
switch (state) {
case NKikimrCms::UP:
@@ -28,7 +32,7 @@ void TNodesCounterBase::AddNode(ui32 nodeId) {
if (NodeToState.contains(nodeId)) {
return;
}
- NodeToState[nodeId] = NODE_STATE_UNSPECIFIED;
+ NodeToState[nodeId].State = NODE_STATE_UNSPECIFIED;
}
void TNodesCounterBase::UpdateNode(ui32 nodeId, NKikimrCms::EState state) {
@@ -36,17 +40,17 @@ void TNodesCounterBase::UpdateNode(ui32 nodeId, NKikimrCms::EState state) {
AddNode(nodeId);
}
- if (NodeToState[nodeId] == NODE_STATE_DOWN) {
+ if (NodeToState[nodeId].State == NODE_STATE_DOWN) {
--DownNodesCount;
}
- if (NodeToState[nodeId] == NODE_STATE_LOCKED ||
- NodeToState[nodeId] == NODE_STATE_RESTART) {
+ if (NodeToState[nodeId].State == NODE_STATE_LOCKED ||
+ NodeToState[nodeId].State == NODE_STATE_RESTART) {
--LockedNodesCount;
}
const auto nodeState = NodeState(state);
- NodeToState[nodeId] = nodeState;
+ NodeToState[nodeId].State = nodeState;
if (nodeState == NODE_STATE_RESTART || nodeState == NODE_STATE_LOCKED) {
++LockedNodesCount;
@@ -61,11 +65,11 @@ void TNodesCounterBase::LockNode(ui32 nodeId) {
Y_VERIFY(NodeToState.contains(nodeId));
++LockedNodesCount;
- if (NodeToState[nodeId] == NODE_STATE_DOWN) {
- NodeToState[nodeId] = NODE_STATE_RESTART;
+ if (NodeToState[nodeId].State == NODE_STATE_DOWN) {
+ NodeToState[nodeId].State = NODE_STATE_RESTART;
--DownNodesCount;
} else {
- NodeToState[nodeId] = NODE_STATE_LOCKED;
+ NodeToState[nodeId].State = NODE_STATE_LOCKED;
}
}
@@ -73,89 +77,144 @@ void TNodesCounterBase::UnlockNode(ui32 nodeId) {
Y_VERIFY(NodeToState.contains(nodeId));
--LockedNodesCount;
- if (NodeToState[nodeId] == NODE_STATE_RESTART) {
- NodeToState[nodeId] = NODE_STATE_DOWN;
+ if (NodeToState[nodeId].State == NODE_STATE_RESTART) {
+ NodeToState[nodeId].State = NODE_STATE_DOWN;
++DownNodesCount;
} else {
- NodeToState[nodeId] = NODE_STATE_UP;
+ NodeToState[nodeId].State = NODE_STATE_UP;
+ }
+}
+
+void TNodesCounterBase::EmplaceTask(const ui32 nodeId, i32 priority, ui64 order, const std::string& taskUId) {
+ auto& priorities = NodeToState[nodeId].Priorities;
+ auto it = priorities.lower_bound(TNodeState::TTaskPriority(priority, order, ""));
+
+ if (it != priorities.end() && (it->Order == order && it->Priority == priority)) {
+ if (it->TaskUId == taskUId) {
+ return;
+ }
+ Y_FAIL("Task with the same priority and order already exists");
+ } else {
+ priorities.emplace_hint(it, priority, order, taskUId);
+ }
+
+ NodesWithScheduledTasks.insert(nodeId);
+}
+
+void TNodesCounterBase::RemoveTask(const std::string& taskUId) {
+ auto taskUIdsEqual = [&taskUId](const TNodeState::TTaskPriority &p) {
+ return p.TaskUId == taskUId;
+ };
+
+ TVector<ui32> NodesToRemove;
+ for (auto nodeId : NodesWithScheduledTasks) {
+ auto& nodeState = NodeToState[nodeId];
+ auto it = std::find_if(nodeState.Priorities.begin(),
+ nodeState.Priorities.end(), taskUIdsEqual);
+ if (it == nodeState.Priorities.end()) {
+ continue;
+ }
+
+ nodeState.Priorities.erase(it);
+
+ if (nodeState.Priorities.empty()) {
+ NodesToRemove.push_back(nodeId);
+ }
+ }
+
+ for (auto nodeId : NodesToRemove) {
+ NodesWithScheduledTasks.erase(nodeId);
}
}
-bool TNodesLimitsCounterBase::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const {
+ActionState::ActionReason TNodesLimitsCounterBase::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, i32 priority, ui64 order) const {
Y_VERIFY(NodeToState.contains(nodeId));
- auto nodeState = NodeToState.at(nodeId);
- bool isForceRestart = mode == NKikimrCms::MODE_FORCE_RESTART;
+ const auto& nodeState = NodeToState.at(nodeId);
+ const auto taskPriority = TNodeState::TTaskPriority(priority, order, "");
- NCH_LOG_D("Checking Node: "
- << nodeId << ", with state: " << ToString(nodeState)
- << ", with limit: " << DisabledNodesLimit
- << ", with ratio limit: " << DisabledNodesRatioLimit
- << ", locked nodes: " << LockedNodesCount
- << ", down nodes: " << DownNodesCount);
+ if (!nodeState.Priorities.empty() && (taskPriority < *nodeState.Priorities.rbegin())) {
+ return ActionState::ACTION_REASON_LOW_PRIORITY;
+ }
- // Allow to maintain down/unavailable node
- if (nodeState == NODE_STATE_DOWN) {
- return true;
+ if (nodeState.State == NODE_STATE_RESTART ||
+ nodeState.State == NODE_STATE_LOCKED ||
+ nodeState.State == NODE_STATE_UNSPECIFIED) {
+
+ return ActionState::ACTION_REASON_ALREADY_LOCKED;
}
- if (nodeState == NODE_STATE_RESTART ||
- nodeState == NODE_STATE_LOCKED ||
- nodeState == NODE_STATE_UNSPECIFIED) {
+ ui32 priorityLockedCount = 0;
+ for (auto id : NodesWithScheduledTasks) {
+ Y_VERIFY(!NodeToState.at(id).Priorities.empty());
- return false;
+ if (taskPriority < *NodeToState.at(id).Priorities.rbegin()) {
+ ++priorityLockedCount;
+ }
}
+ ui32 downNodes = nodeState.State == NODE_STATE_DOWN ? DownNodesCount - 1 : DownNodesCount;
// Always allow at least one node
- if (LockedNodesCount + DownNodesCount == 0) {
- return true;
+ if (LockedNodesCount + downNodes + priorityLockedCount == 0) {
+ return ActionState::ACTION_REASON_OK;
}
+ bool isForceRestart = mode == NKikimrCms::MODE_FORCE_RESTART;
+
if (isForceRestart && !LockedNodesCount) {
- return true;
+ return ActionState::ACTION_REASON_OK;
}
if (DisabledNodesLimit > 0 &&
- (LockedNodesCount + DownNodesCount + 1 > DisabledNodesLimit)) {
- return false;
+ (LockedNodesCount + downNodes + priorityLockedCount + 1 > DisabledNodesLimit)) {
+ return ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED;
}
if (DisabledNodesRatioLimit > 0 &&
- ((LockedNodesCount + DownNodesCount + 1) * 100 >
- (NodeToState.size() * DisabledNodesRatioLimit))) {
- return false;
+ ((LockedNodesCount + downNodes + priorityLockedCount + 1) * 100 > (NodeToState.size() * DisabledNodesRatioLimit))) {
+ return ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED;
}
- return true;
+ return ActionState::ACTION_REASON_OK;
}
-bool TSysTabletsNodesCounter::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const {
+ActionState::ActionReason TSysTabletsNodesCounter::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, i32 priority, ui64 order) const {
Y_VERIFY(NodeToState.contains(nodeId));
- auto nodeState = NodeToState.at(nodeId);
- NCH_LOG_D("Checking limits for sys tablet: " << NKikimrConfig::TBootstrap_ETabletType_Name(TabletType)
- << ", on node: " << nodeId
- << ", with state: " << ToString(nodeState)
- << ", locked nodes: " << LockedNodesCount
- << ", down nodes: " << DownNodesCount);
+ const auto& nodeState = NodeToState.at(nodeId);
+ const auto taskPriority = TNodeState::TTaskPriority(priority, order, "");
- if (nodeState == NODE_STATE_RESTART ||
- nodeState == NODE_STATE_LOCKED ||
- nodeState == NODE_STATE_UNSPECIFIED) {
+ if (!nodeState.Priorities.empty() && (taskPriority < *nodeState.Priorities.rbegin())) {
+ return ActionState::ACTION_REASON_LOW_PRIORITY;
+ }
+
+ if (nodeState.State == NODE_STATE_RESTART ||
+ nodeState.State == NODE_STATE_LOCKED ||
+ nodeState.State == NODE_STATE_UNSPECIFIED) {
+
+ return ActionState::ACTION_REASON_ALREADY_LOCKED;
+ }
+
+ ui32 priorityLockedCount = 0;
+ for (auto id : NodesWithScheduledTasks) {
+ Y_VERIFY(!NodeToState.at(id).Priorities.empty());
- return false;
+ if (taskPriority < *NodeToState.at(id).Priorities.rbegin()) {
+ ++priorityLockedCount;
+ }
}
+ ui32 downNodes = nodeState.State == NODE_STATE_DOWN ? DownNodesCount - 1 : DownNodesCount;
ui32 tabletNodes = NodeToState.size();
switch (mode) {
case NKikimrCms::MODE_MAX_AVAILABILITY:
- if (tabletNodes > 1 && (DownNodesCount + LockedNodesCount + 1) * 2 > tabletNodes){
- return false;
+ if (tabletNodes > 1 && (downNodes + LockedNodesCount + priorityLockedCount + 1) * 2 > tabletNodes){
+ return ActionState::ACTION_REASON_SYS_TABLETS_NODE_LIMIT_REACHED;
}
break;
case NKikimrCms::MODE_KEEP_AVAILABLE:
- if (tabletNodes > 1 && (DownNodesCount + LockedNodesCount + 1) > tabletNodes - 1) {
- return false;
+ if (tabletNodes > 1 && (downNodes + LockedNodesCount + priorityLockedCount + 1) > tabletNodes - 1) {
+ return ActionState::ACTION_REASON_SYS_TABLETS_NODE_LIMIT_REACHED;
}
break;
case NKikimrCms::MODE_FORCE_RESTART:
@@ -164,7 +223,124 @@ bool TSysTabletsNodesCounter::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabili
Y_FAIL("Unknown availability mode");
}
- return true;
+ return ActionState::ACTION_REASON_OK;
}
+std::string TTenantLimitsCounter::ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode,
+ ActionState::ActionReason reason) const {
+ Y_UNUSED(mode);
+
+ std::stringstream readableReason;
+
+ if (reason == ActionState::ACTION_REASON_OK) {
+ readableReason << "Action is OK";
+ return readableReason.str();
+ }
+
+ readableReason << "Cannot lock node: " << nodeId;
+
+ switch (reason) {
+ case ActionState::ACTION_REASON_ALREADY_LOCKED:
+ readableReason << "Node is already locked";
+ break;
+ case ActionState::ACTION_REASON_LOW_PRIORITY:
+ readableReason << "Task with higher priority in progress: " << (*NodeToState.at(nodeId).Priorities.rbegin()).TaskUId;
+ break;
+ case ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED:
+ readableReason << ". Too many locked nodes for tenant " << TenantName
+ << "; locked: " << LockedNodesCount
+ << "; down: " << DownNodesCount
+ << "; total: " << NodeToState.size()
+ << "; limit: " << DisabledNodesLimit
+ << "; ratio limit: " << DisabledNodesRatioLimit << "%";
+ break;
+ default:
+ Y_FAIL("Unexpected reason");
+ break;
+ }
+ return readableReason.str();
+}
+
+std::string TClusterLimitsCounter::ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode,
+ ActionState::ActionReason reason) const {
+ Y_UNUSED(mode);
+
+ std::stringstream readableReason;
+
+ if (reason == ActionState::ACTION_REASON_OK) {
+ readableReason << "Action is OK";
+ return readableReason.str();
+ }
+
+ if (mode == NKikimrCms::MODE_FORCE_RESTART) {
+ return readableReason.str();
+ }
+
+ readableReason << "Cannot lock node: " << nodeId;
+
+ switch (reason) {
+ case ActionState::ACTION_REASON_ALREADY_LOCKED:
+ readableReason << "Node is already locked";
+ break;
+ case ActionState::ACTION_REASON_LOW_PRIORITY:
+ readableReason << "Task with higher priority in progress: " << (*NodeToState.at(nodeId).Priorities.rbegin()).TaskUId;
+ break;
+ case ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED:
+ readableReason << ". Too many locked nodes in cluster"
+ << "; locked: " << LockedNodesCount
+ << "; down: " << DownNodesCount
+ << "; total: " << NodeToState.size()
+ << "; limit: " << DisabledNodesLimit
+ << "; ratio limit: " << DisabledNodesRatioLimit << "%";
+ break;
+ default:
+ Y_FAIL("Unexpected reason");
+ break;
+ }
+
+ return readableReason.str();
+}
+
+
+std::string TSysTabletsNodesCounter::ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode,
+ ActionState::ActionReason reason) const {
+ std::stringstream readableReason;
+
+ if (reason == ActionState::ACTION_REASON_OK) {
+ readableReason << "Action is OK";
+ return readableReason.str();
+ }
+
+ if (mode == NKikimrCms::MODE_FORCE_RESTART) {
+ return readableReason.str();
+ }
+
+ switch (reason) {
+ case ActionState::ACTION_REASON_ALREADY_LOCKED:
+ readableReason << "Node is already locked";
+ break;
+ case ActionState::ACTION_REASON_LOW_PRIORITY:
+ readableReason << "Task with higher priority in progress: " << (*NodeToState.at(nodeId).Priorities.rbegin()).TaskUId;
+ break;
+ case ActionState::ACTION_REASON_SYS_TABLETS_NODE_LIMIT_REACHED:
+ readableReason << "Cannot lock node: " << nodeId << ". Tablet "
+ << NKikimrConfig::TBootstrap_ETabletType_Name(TabletType)
+ << " has too many unavailable nodes. Locked: "
+ << LockedNodesCount << ". Down: " << DownNodesCount;
+
+ if (mode == NKikimrCms::MODE_MAX_AVAILABILITY) {
+ readableReason << ". Limit: " << NodeToState.size() / 2 << " (50%)";
+ }
+
+ if (mode == NKikimrCms::MODE_KEEP_AVAILABLE) {
+ readableReason << ". Limit: " << NodeToState.size() - 1;
+ }
+ break;
+ default:
+ Y_FAIL("Unexpected reason");
+
+ }
+
+ return readableReason.str();
+}
} // namespace NKikimr::NCms
diff --git a/ydb/core/cms/node_checkers.h b/ydb/core/cms/node_checkers.h
index 72362c84dba..047a7a5c5ca 100644
--- a/ydb/core/cms/node_checkers.h
+++ b/ydb/core/cms/node_checkers.h
@@ -6,6 +6,7 @@
#include <ydb/core/erasure/erasure.h>
#include <ydb/core/protos/cms.pb.h>
#include <ydb/core/protos/config.pb.h>
+#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h>
#include <library/cpp/actors/core/log.h>
@@ -36,6 +37,7 @@ public:
NODE_STATE_DOWN /* "Down" */
};
+
protected:
static ENodeState NodeState(NKikimrCms::EState state);
@@ -48,9 +50,12 @@ public:
virtual void LockNode(ui32 nodeId) = 0;
virtual void UnlockNode(ui32 nodeId) = 0;
- virtual bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const = 0;
+ virtual void EmplaceTask(const ui32 nodeId, i32 priority, ui64 order, const std::string& taskUId) = 0;
+ virtual void RemoveTask(const std::string& taskUId) = 0;
+
+ virtual Ydb::Maintenance::ActionState::ActionReason TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, i32 priority, ui64 order) const = 0;
- virtual std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const = 0;
+ virtual std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, Ydb::Maintenance::ActionState::ActionReason reason) const = 0;
};
/**
@@ -58,7 +63,35 @@ public:
*/
class TNodesCounterBase : public INodesChecker {
protected:
- THashMap<ui32, ENodeState> NodeToState;
+ /** Structure to hold information about vdisk state and priorities and orders of some task.
+ *
+ * Requests with equal priority are processed in the order of arrival at CMS.
+ */
+ struct TNodeState {
+ public:
+ struct TTaskPriority {
+ i32 Priority;
+ ui64 Order;
+ std::string TaskUId;
+
+ explicit TTaskPriority(i32 priority, ui64 order, const std::string& taskUId)
+ : Priority(priority)
+ , Order(order)
+ , TaskUId(taskUId)
+ {}
+
+ bool operator<(const TTaskPriority& rhs) const {
+ return Priority < rhs.Priority || (Priority == rhs.Priority && Order > rhs.Order);
+ }
+ };
+ public:
+ ENodeState State;
+ std::set<TTaskPriority> Priorities;
+ };
+
+protected:
+ THashMap<ui32, TNodeState> NodeToState;
+ THashSet<ui32> NodesWithScheduledTasks;
ui32 LockedNodesCount;
ui32 DownNodesCount;
@@ -73,6 +106,9 @@ public:
void AddNode(ui32 nodeId) override;
void UpdateNode(ui32 nodeId, NKikimrCms::EState) override;
+ void EmplaceTask(const ui32 nodeId, i32 priority, ui64 order, const std::string& taskUId) override final;
+ virtual void RemoveTask(const std::string& taskUId) override final;
+
void LockNode(ui32 nodeId) override;
void UnlockNode(ui32 nodeId) override;
};
@@ -103,7 +139,7 @@ public:
DisabledNodesRatioLimit = ratioLimit;
}
- bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final;
+ Ydb::Maintenance::ActionState::ActionReason TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, i32 priority, ui64 order) const override final;
};
class TTenantLimitsCounter : public TNodesLimitsCounterBase {
@@ -117,20 +153,8 @@ public:
{
}
- std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final {
- Y_UNUSED(mode);
-
- std::stringstream reason;
- reason << "Cannot lock node: " << nodeId
- << ". Too many locked nodes for tenant " << TenantName
- << "; locked: " << LockedNodesCount
- << "; down: " << DownNodesCount
- << "; total: " << NodeToState.size()
- << "; limit: " << DisabledNodesLimit
- << "; ratio limit: " << DisabledNodesRatioLimit << "%";
-
- return reason.str();
- }
+ std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode,
+ Ydb::Maintenance::ActionState::ActionReason reason) const override final;
};
class TClusterLimitsCounter : public TNodesLimitsCounterBase {
@@ -140,20 +164,8 @@ public:
{
}
- std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final {
- Y_UNUSED(mode);
-
- std::stringstream reason;
- reason << "Cannot lock node: " << nodeId
- <<". Too many locked nodes in cluster"
- << "; locked: " << LockedNodesCount
- << "; down: " << DownNodesCount
- << "; total: " << NodeToState.size()
- << "; limit: " << DisabledNodesLimit
- << "; ratio limit: " << DisabledNodesRatioLimit << "%";
-
- return reason.str();
- }
+ std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode,
+ Ydb::Maintenance::ActionState::ActionReason reason) const override final;
};
/**
@@ -171,30 +183,10 @@ public:
: TabletType(tabletType)
{}
- bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final;
-
- std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final {
- std::stringstream reason;
+ Ydb::Maintenance::ActionState::ActionReason TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, i32 priority, ui64 order) const override final;
- if (mode == NKikimrCms::MODE_FORCE_RESTART) {
- return reason.str();
- }
-
- reason << "Cannot lock node: " << nodeId
- << ". Tablet "
- << NKikimrConfig::TBootstrap_ETabletType_Name(TabletType)
- << " has too many unavailable nodes. Locked: " << LockedNodesCount
- << ". Down: " << DownNodesCount;
- if (mode == NKikimrCms::MODE_MAX_AVAILABILITY) {
- reason << ". Limit: " << NodeToState.size() / 2 << " (50%)";
- }
-
- if (mode == NKikimrCms::MODE_KEEP_AVAILABLE) {
- reason << ". Limit: " << NodeToState.size() - 1;
- }
-
- return reason.str();
- }
+ std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode,
+ Ydb::Maintenance::ActionState::ActionReason reason) const override final;
};
} // namespace NKikimr::NCms
diff --git a/ydb/core/cms/ut/CMakeLists.darwin-x86_64.txt b/ydb/core/cms/ut/CMakeLists.darwin-x86_64.txt
index 922dc62a7b9..1cff84774ce 100644
--- a/ydb/core/cms/ut/CMakeLists.darwin-x86_64.txt
+++ b/ydb/core/cms/ut/CMakeLists.darwin-x86_64.txt
@@ -32,6 +32,7 @@ target_link_options(ydb-core-cms-ut PRIVATE
CoreFoundation
)
target_sources(ydb-core-cms-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/cms/checkers_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cluster_info_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_tenants_ut.cpp
diff --git a/ydb/core/cms/ut/CMakeLists.linux-aarch64.txt b/ydb/core/cms/ut/CMakeLists.linux-aarch64.txt
index 64ee05e5931..890df0f266e 100644
--- a/ydb/core/cms/ut/CMakeLists.linux-aarch64.txt
+++ b/ydb/core/cms/ut/CMakeLists.linux-aarch64.txt
@@ -35,6 +35,7 @@ target_link_options(ydb-core-cms-ut PRIVATE
-ldl
)
target_sources(ydb-core-cms-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/cms/checkers_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cluster_info_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_tenants_ut.cpp
diff --git a/ydb/core/cms/ut/CMakeLists.linux-x86_64.txt b/ydb/core/cms/ut/CMakeLists.linux-x86_64.txt
index ede8d036c2a..3d68909dc52 100644
--- a/ydb/core/cms/ut/CMakeLists.linux-x86_64.txt
+++ b/ydb/core/cms/ut/CMakeLists.linux-x86_64.txt
@@ -36,6 +36,7 @@ target_link_options(ydb-core-cms-ut PRIVATE
-ldl
)
target_sources(ydb-core-cms-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/cms/checkers_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cluster_info_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_tenants_ut.cpp
diff --git a/ydb/core/cms/ut/CMakeLists.windows-x86_64.txt b/ydb/core/cms/ut/CMakeLists.windows-x86_64.txt
index 1dbbee1959a..29a6d9015d2 100644
--- a/ydb/core/cms/ut/CMakeLists.windows-x86_64.txt
+++ b/ydb/core/cms/ut/CMakeLists.windows-x86_64.txt
@@ -25,6 +25,7 @@ target_link_libraries(ydb-core-cms-ut PUBLIC
core-testlib-default
)
target_sources(ydb-core-cms-ut PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/core/cms/checkers_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cluster_info_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_ut.cpp
${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_tenants_ut.cpp
diff --git a/ydb/public/api/protos/draft/ydb_maintenance.proto b/ydb/public/api/protos/draft/ydb_maintenance.proto
index cbd1481ed1a..329c1532479 100644
--- a/ydb/public/api/protos/draft/ydb_maintenance.proto
+++ b/ydb/public/api/protos/draft/ydb_maintenance.proto
@@ -87,11 +87,17 @@ message ActionState {
// State storage broken. Too many (more than (nToSelect - 1) / 2) unavailable rings
ACTION_REASON_STATE_STORAGE_BROKEN = 5;
// Issue in cluster disabled nodes limit
- ACTION_REASON_DISABLED_NODES_LIMIT_RICHED = 6;
+ ACTION_REASON_DISABLED_NODES_LIMIT_REACHED = 6;
// Issue in tenant limits
- ACTION_REASON_TENANT_DISABLED_NODES_LIMIT_RICHED = 7;
+ ACTION_REASON_TENANT_DISABLED_NODES_LIMIT_REACHED = 7;
// Wrong request
ACTION_REASON_WRONG_REQUEST = 8;
+ // Low priority
+ ACTION_REASON_LOW_PRIORITY = 9;
+ // Lock is granded to this item
+ ACTION_REASON_ALREADY_LOCKED = 10;
+ // Limit to nodes with sys tablets
+ ACTION_REASON_SYS_TABLETS_NODE_LIMIT_REACHED = 11;
}
Action action = 1;