diff options
author | ilnaz <ilnaz@ydb.tech> | 2023-06-07 16:48:06 +0300 |
---|---|---|
committer | ilnaz <ilnaz@ydb.tech> | 2023-06-07 16:48:06 +0300 |
commit | f86cc6b226195a56622fbd70e0c0c9aeefb6a46f (patch) | |
tree | b2eca66f8b6378bc53eb1fd6f5047675893540db | |
parent | b6947f76044ef4b12e08c62ce995ebf4b815fbb7 (diff) | |
download | ydb-f86cc6b226195a56622fbd70e0c0c9aeefb6a46f.tar.gz |
Revert "Unify checking in CMS"
This reverts commit e7dd9f1e07897af53a5fa4875fdd76b96a6cc4a6, reversing
changes made to da4de01e86ba5540671187f7ad0a8d3a8f0b78fd.
-rw-r--r-- | ydb/core/cms/CMakeLists.darwin-x86_64.txt | 11 | ||||
-rw-r--r-- | ydb/core/cms/CMakeLists.linux-aarch64.txt | 11 | ||||
-rw-r--r-- | ydb/core/cms/CMakeLists.linux-x86_64.txt | 11 | ||||
-rw-r--r-- | ydb/core/cms/CMakeLists.windows-x86_64.txt | 11 | ||||
-rw-r--r-- | ydb/core/cms/checkers_ut.cpp | 294 | ||||
-rw-r--r-- | ydb/core/cms/cluster_info.cpp | 113 | ||||
-rw-r--r-- | ydb/core/cms/cluster_info.h | 34 | ||||
-rw-r--r-- | ydb/core/cms/cluster_info_ut.cpp | 37 | ||||
-rw-r--r-- | ydb/core/cms/cms.cpp | 107 | ||||
-rw-r--r-- | ydb/core/cms/cms_impl.h | 3 | ||||
-rw-r--r-- | ydb/core/cms/cms_ut.cpp | 90 | ||||
-rw-r--r-- | ydb/core/cms/cms_ut_common.cpp | 10 | ||||
-rw-r--r-- | ydb/core/cms/erasure_checkers.cpp | 482 | ||||
-rw-r--r-- | ydb/core/cms/erasure_checkers.h | 144 | ||||
-rw-r--r-- | ydb/core/cms/node_checkers.cpp | 280 | ||||
-rw-r--r-- | ydb/core/cms/node_checkers.h | 102 | ||||
-rw-r--r-- | ydb/core/cms/ut/CMakeLists.darwin-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/core/cms/ut/CMakeLists.linux-aarch64.txt | 1 | ||||
-rw-r--r-- | ydb/core/cms/ut/CMakeLists.linux-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/core/cms/ut/CMakeLists.windows-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/public/api/protos/draft/ydb_maintenance.proto | 10 |
21 files changed, 513 insertions, 1241 deletions
diff --git a/ydb/core/cms/CMakeLists.darwin-x86_64.txt b/ydb/core/cms/CMakeLists.darwin-x86_64.txt index 7ef104adaa9..cf839e87ff8 100644 --- a/ydb/core/cms/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/cms/CMakeLists.darwin-x86_64.txt @@ -22,12 +22,6 @@ get_built_tool_path( enum_parser ) get_built_tool_path( - TOOL_enum_parser_bin - TOOL_enum_parser_dependency - tools/enum_parser/enum_parser - enum_parser -) -get_built_tool_path( TOOL_rescompiler_bin TOOL_rescompiler_dependency tools/rescompiler/bin @@ -99,11 +93,6 @@ generate_enum_serilization(ydb-core-cms INCLUDE_HEADERS ydb/core/cms/node_checkers.h ) -generate_enum_serilization(ydb-core-cms - ${CMAKE_SOURCE_DIR}/ydb/core/cms/erasure_checkers.h - INCLUDE_HEADERS - ydb/core/cms/erasure_checkers.h -) add_global_library_for(ydb-core-cms.global ydb-core-cms) target_link_libraries(ydb-core-cms.global PUBLIC diff --git a/ydb/core/cms/CMakeLists.linux-aarch64.txt b/ydb/core/cms/CMakeLists.linux-aarch64.txt index 54abfb25b63..e1dcbee6629 100644 --- a/ydb/core/cms/CMakeLists.linux-aarch64.txt +++ b/ydb/core/cms/CMakeLists.linux-aarch64.txt @@ -22,12 +22,6 @@ get_built_tool_path( enum_parser ) get_built_tool_path( - TOOL_enum_parser_bin - TOOL_enum_parser_dependency - tools/enum_parser/enum_parser - enum_parser -) -get_built_tool_path( TOOL_rescompiler_bin TOOL_rescompiler_dependency tools/rescompiler/bin @@ -100,11 +94,6 @@ generate_enum_serilization(ydb-core-cms INCLUDE_HEADERS ydb/core/cms/node_checkers.h ) -generate_enum_serilization(ydb-core-cms - ${CMAKE_SOURCE_DIR}/ydb/core/cms/erasure_checkers.h - INCLUDE_HEADERS - ydb/core/cms/erasure_checkers.h -) add_global_library_for(ydb-core-cms.global ydb-core-cms) target_link_libraries(ydb-core-cms.global PUBLIC diff --git a/ydb/core/cms/CMakeLists.linux-x86_64.txt b/ydb/core/cms/CMakeLists.linux-x86_64.txt index 54abfb25b63..e1dcbee6629 100644 --- a/ydb/core/cms/CMakeLists.linux-x86_64.txt +++ b/ydb/core/cms/CMakeLists.linux-x86_64.txt @@ -22,12 +22,6 @@ get_built_tool_path( enum_parser ) get_built_tool_path( - TOOL_enum_parser_bin - TOOL_enum_parser_dependency - tools/enum_parser/enum_parser - enum_parser -) -get_built_tool_path( TOOL_rescompiler_bin TOOL_rescompiler_dependency tools/rescompiler/bin @@ -100,11 +94,6 @@ generate_enum_serilization(ydb-core-cms INCLUDE_HEADERS ydb/core/cms/node_checkers.h ) -generate_enum_serilization(ydb-core-cms - ${CMAKE_SOURCE_DIR}/ydb/core/cms/erasure_checkers.h - INCLUDE_HEADERS - ydb/core/cms/erasure_checkers.h -) add_global_library_for(ydb-core-cms.global ydb-core-cms) target_link_libraries(ydb-core-cms.global PUBLIC diff --git a/ydb/core/cms/CMakeLists.windows-x86_64.txt b/ydb/core/cms/CMakeLists.windows-x86_64.txt index 7ef104adaa9..cf839e87ff8 100644 --- a/ydb/core/cms/CMakeLists.windows-x86_64.txt +++ b/ydb/core/cms/CMakeLists.windows-x86_64.txt @@ -22,12 +22,6 @@ get_built_tool_path( enum_parser ) get_built_tool_path( - TOOL_enum_parser_bin - TOOL_enum_parser_dependency - tools/enum_parser/enum_parser - enum_parser -) -get_built_tool_path( TOOL_rescompiler_bin TOOL_rescompiler_dependency tools/rescompiler/bin @@ -99,11 +93,6 @@ generate_enum_serilization(ydb-core-cms INCLUDE_HEADERS ydb/core/cms/node_checkers.h ) -generate_enum_serilization(ydb-core-cms - ${CMAKE_SOURCE_DIR}/ydb/core/cms/erasure_checkers.h - INCLUDE_HEADERS - ydb/core/cms/erasure_checkers.h -) add_global_library_for(ydb-core-cms.global ydb-core-cms) target_link_libraries(ydb-core-cms.global PUBLIC diff --git a/ydb/core/cms/checkers_ut.cpp b/ydb/core/cms/checkers_ut.cpp deleted file mode 100644 index 0964432c5f6..00000000000 --- a/ydb/core/cms/checkers_ut.cpp +++ /dev/null @@ -1,294 +0,0 @@ -#include "cluster_info.h" -#include "erasure_checkers.h" -#include "node_checkers.h" -#include "ut_helpers.h" -#include "util/string/cast.h" - -#include <ydb/core/protos/cms.pb.h> -#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h> - -#include <library/cpp/testing/unittest/registar.h> - -#include <util/generic/ptr.h> -#include <util/generic/vector.h> - -#include <bitset> -#include <string> - -namespace NKikimr::NCmsTest { - -using namespace NCms; -using namespace NKikimrCms; -using namespace Ydb::Maintenance; - -TVector<TVDiskID> GenerateDefaultBlock42Group() { - TVector<TVDiskID> group; - for (ui32 i = 0; i < 8; ++i) { - group.push_back(TVDiskID(0, 1, 0, i % 8, 0)); - } - return group; -} - -TVector<TVDiskID> GenerateDefaultMirror3dcGroup() { - TVector<TVDiskID> group; - for (ui32 i = 0; i < 9; ++i) { - group.push_back(TVDiskID(0, 1, i / 3, i % 3, 0)); - } - return group; -} - -Y_UNIT_TEST_SUITE(TCmsCheckersTest) { - Y_UNIT_TEST(DefaultErasureCheckerAvailabilityMode) - { - TDefaultErasureChecker checker(0); - - auto vdisks = GenerateDefaultBlock42Group(); - for (auto vdisk : vdisks) { - checker.UpdateVDisk(vdisk, UP); - } - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[0], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_OK); - - checker.UpdateVDisk(vdisks[0], DOWN); - - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[0], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_OK); - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[1], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS); - - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[0], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK); - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[1], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK); - - checker.LockVDisk(vdisks[0]); - - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[0], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_ALREADY_LOCKED); - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[1], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS); - - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[0], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_ALREADY_LOCKED); - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[1], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK); - } - - Y_UNIT_TEST(Mirror3dcCheckerAvailabilityMode) - { - TMirror3dcChecker checker(0); - - auto vdisks = GenerateDefaultMirror3dcGroup(); - for (auto vdisk : vdisks) { - checker.UpdateVDisk(vdisk, UP); - } - - // One disabled disk for max availability - for (ui32 i = 0; i < 9; ++i) { - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_OK); - checker.LockVDisk(vdisks[i]); - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_ALREADY_LOCKED); - - for (ui32 j = 0; j < 9; ++j) { - if (i == j) - continue; - - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[j], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS); - } - - checker.UnlockVDisk(vdisks[i]); - } - - for (ui32 dc = 0; dc < 3; ++dc) { - // Minus 1 dc - checker.LockVDisk(vdisks[dc * 3]); - checker.LockVDisk(vdisks[dc * 3 + 1]); - checker.LockVDisk(vdisks[dc * 3 + 2]); - - for (ui32 i = 0; i < 9; ++i) { - if ((i <= dc * 3 + 2) && (i >= dc * 3)) { - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_ALREADY_LOCKED); - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_ALREADY_LOCKED); - continue; - } - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS); - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK); - } - - // Minus 2 in dc - for (ui32 i = 0; i < 3; ++i) { - checker.UnlockVDisk(vdisks[dc * 3 + i]); - - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[dc * 3 + i], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK); - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[dc * 3 + i], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS); - - for (ui32 j = 0; j < 9; ++j) { - if ((j <= dc * 3 + 2) && (j >= dc * 3)) - continue; - - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS); - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK); - } - - checker.LockVDisk(vdisks[dc * 3 + i]); - } - - checker.UnlockVDisk(vdisks[dc * 3]); - checker.UnlockVDisk(vdisks[dc * 3 + 1]); - checker.UnlockVDisk(vdisks[dc * 3 + 2]); - } - - // Minus 1 in each dc - for (ui32 i = 0; i < 3; ++i) { - checker.LockVDisk(vdisks[i]); - checker.LockVDisk(vdisks[i + 3]); - checker.LockVDisk(vdisks[i + 6]); - - for (ui32 j = 0; j < 9; ++j) { - if (j % 3 == i) - continue; - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[j], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS); - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[j], MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS); - } - - checker.UnlockVDisk(vdisks[i]); - checker.UnlockVDisk(vdisks[i + 3]); - checker.UnlockVDisk(vdisks[i + 6]); - } - - checker.UpdateVDisk(vdisks[0], DOWN); - - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[0], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_OK); - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[1], MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS); - } - - Y_UNIT_TEST(DefaultErasureCheckerPriorities) - { - TDefaultErasureChecker checker(0); - - auto vdisks = GenerateDefaultBlock42Group(); - for (auto vdisk : vdisks) { - checker.UpdateVDisk(vdisk, UP); - } - - // Check one scheduled task with order - for (ui32 i = 0; i < 8; ++i) { - checker.EmplaceTask(vdisks[i], 0, 1, "task-1"); - - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_MAX_AVAILABILITY, 0, 2), ActionState::ACTION_REASON_LOW_PRIORITY); - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[i], MODE_KEEP_AVAILABLE, 0, 2), ActionState::ACTION_REASON_LOW_PRIORITY); - - for (ui32 j = 0; j < 8; ++j) { - if (j == i) - continue; - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[j], MODE_MAX_AVAILABILITY, 0, 2), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS); - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[j], MODE_KEEP_AVAILABLE, 0, 2), ActionState::ACTION_REASON_OK); - } - - checker.RemoveTask("task-1"); - } - - // Check two scheduled task with priority and order - checker.EmplaceTask(vdisks[1], 1, 1, "task-1"); - checker.EmplaceTask(vdisks[2], 2, 1, "task-2"); - - // Priority is higher than task-1 but lower than task-2 - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[3], MODE_MAX_AVAILABILITY, 2, 2), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS); - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[3], MODE_KEEP_AVAILABLE, 2, 2), ActionState::ACTION_REASON_OK); - - checker.RemoveTask("task-1"); - checker.RemoveTask("task-2"); - } - - Y_UNIT_TEST(Mirror3dcCheckerPriorities) - { - TMirror3dcChecker checker(0); - - auto vdisks = GenerateDefaultMirror3dcGroup(); - for (auto vdisk : vdisks) { - checker.UpdateVDisk(vdisk, UP); - } - - // task-1 > task-2 > task-3 > task-4 - checker.EmplaceTask(vdisks[0], 2, 2, "task-1"); - checker.EmplaceTask(vdisks[1], 2, 5, "task-2"); - checker.EmplaceTask(vdisks[2], 1, 2, "task-3"); - checker.EmplaceTask(vdisks[3], 1, 4, "task-4"); - - // Highest priority - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[4], MODE_MAX_AVAILABILITY, 3, 1), ActionState::ACTION_REASON_OK); - // Blocked by all tasks - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[4], MODE_KEEP_AVAILABLE, 1, 6), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS); - // Blocked by task-1, task-2, task-3 - UNIT_ASSERT_EQUAL(checker.TryToLockVDisk(vdisks[3], MODE_KEEP_AVAILABLE, 1, 3), ActionState::ACTION_REASON_OK); - } - - Y_UNIT_TEST(ClusterNodesCounter) - { - const ui32 nodeCount = 30; - TClusterLimitsCounter checker(0, 0); - - for (ui32 i = 1; i <= nodeCount; ++i) { - checker.UpdateNode(i, UP); - } - - // Without limit allow all nodes - for (ui32 i = 1; i < nodeCount; ++i) { - checker.LockNode(i); - } - - UNIT_ASSERT_EQUAL(checker.TryToLockNode(nodeCount, MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_OK); - UNIT_ASSERT_EQUAL(checker.TryToLockNode(nodeCount, MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK); - - for (ui32 i = 1; i < nodeCount; ++i) { - checker.UnlockNode(i); - } - - // Limit 15 nodes - checker.ApplyLimits(15, 0); - for (ui32 i = 1; i < 15; ++i) { - checker.LockNode(i); - } - - UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_OK); - UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK); - - checker.ApplyLimits(0, 50); - - UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_OK); - UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_OK); - - checker.LockNode(15); - checker.ApplyLimits(15, 0); - - UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_MAX_AVAILABILITY, 0, 0), ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED); - UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_KEEP_AVAILABLE, 0, 0), ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED); - - checker.ApplyLimits(0, 50); - - for (ui32 i = 1; i <= 15; ++i) { - checker.UnlockNode(i); - } - - checker.ApplyLimits(0, 0); - for (ui32 i = 1; i < nodeCount; ++i) { - checker.EmplaceTask(i, 1, 3, "task-1"); - } - - UNIT_ASSERT_EQUAL(checker.TryToLockNode(nodeCount, MODE_MAX_AVAILABILITY, 1, 1), ActionState::ACTION_REASON_OK); - UNIT_ASSERT_EQUAL(checker.TryToLockNode(nodeCount, MODE_KEEP_AVAILABLE, 1, 1), ActionState::ACTION_REASON_OK); - - UNIT_ASSERT_EQUAL(checker.TryToLockNode(nodeCount, MODE_MAX_AVAILABILITY, 1, 4), ActionState::ACTION_REASON_OK); - UNIT_ASSERT_EQUAL(checker.TryToLockNode(nodeCount, MODE_KEEP_AVAILABLE, 1, 4), ActionState::ACTION_REASON_OK); - - checker.RemoveTask("task-1"); - - checker.ApplyLimits(15, 0); - for (ui32 i = 1; i < 15; ++i) { - checker.EmplaceTask(i, 1, 3, "task-2"); - } - checker.EmplaceTask(15, 1, 4, "task-3"); - - UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_MAX_AVAILABILITY, 1, 1), ActionState::ACTION_REASON_OK); - UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_KEEP_AVAILABLE, 1, 1), ActionState::ACTION_REASON_OK); - - UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_MAX_AVAILABILITY, 1, 5), ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED); - UNIT_ASSERT_EQUAL(checker.TryToLockNode(17, MODE_KEEP_AVAILABLE, 1, 5), ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED); - - checker.RemoveTask("task-2"); - checker.RemoveTask("task-3"); - } - -} -} diff --git a/ydb/core/cms/cluster_info.cpp b/ydb/core/cms/cluster_info.cpp index 1a3c85d5b33..28e6baeddbf 100644 --- a/ydb/core/cms/cluster_info.cpp +++ b/ydb/core/cms/cluster_info.cpp @@ -1,11 +1,8 @@ #include "cluster_info.h" #include "cms_state.h" #include "node_checkers.h" -#include "erasure_checkers.h" -#include <ydb/core/protos/cms.pb.h> #include <ydb/core/protos/services.pb.h> -#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h> #include <library/cpp/actors/core/actor.h> #include <library/cpp/actors/core/log.h> @@ -404,7 +401,7 @@ void TClusterInfo::SetNodeState(ui32 nodeId, NKikimrCms::EState state, const NKi } } - node.UpdateNodeState(); + node.UpdateNodeState(); } void TClusterInfo::ClearNode(ui32 nodeId) @@ -419,10 +416,6 @@ void TClusterInfo::ClearNode(ui32 nodeId) node.HasTenantInfo = false; node.State = NKikimrCms::DOWN; node.UpdateNodeState(); - - for (auto& vdisk : node.VDisks) { - BSGroup(vdisk.GroupID).GroupChecker->UpdateVDisk(vdisk, DOWN); - } } void TClusterInfo::ApplyInitialNodeTenants(const TActorContext& ctx, const THashMap<ui32, TString>& nodeTenants) @@ -496,15 +489,7 @@ void TClusterInfo::UpdatePDiskState(const TPDiskID &id, const NKikimrWhiteboard: } auto &pdisk = PDiskRef(id); - auto state = info.GetState() == NKikimrBlobStorage::TPDiskState::Normal ? UP : DOWN; - pdisk.State = state; - - if (state == UP) - return; - - for (auto &vdisk : pdisk.VDisks) { - BSGroup(vdisk.GroupID).GroupChecker->UpdateVDisk(vdisk, state); - } + pdisk.State = info.GetState() == NKikimrBlobStorage::TPDiskState::Normal ? UP : DOWN; } void TClusterInfo::AddVDisk(const NKikimrBlobStorage::TBaseConfig::TVSlot &info) @@ -560,13 +545,10 @@ void TClusterInfo::UpdateVDiskState(const TVDiskID &id, const NKikimrWhiteboard: } auto &vdisk = VDiskRef(id); - if (info.GetVDiskState() == NKikimrWhiteboard::OK && info.GetReplicated()) { + if (info.GetVDiskState() == NKikimrWhiteboard::OK && info.GetReplicated()) vdisk.State = UP; - BSGroup(vdisk.VDiskId.GroupID).GroupChecker->UpdateVDisk(id, UP); - } else { + else vdisk.State = DOWN; - BSGroup(vdisk.VDiskId.GroupID).GroupChecker->UpdateVDisk(id, DOWN); - } } void TClusterInfo::AddBSGroup(const NKikimrBlobStorage::TBaseConfig::TGroup &info) @@ -575,8 +557,6 @@ void TClusterInfo::AddBSGroup(const NKikimrBlobStorage::TBaseConfig::TGroup &inf bsgroup.GroupId = info.GetGroupId(); if (info.GetErasureSpecies()) bsgroup.Erasure = {TErasureType::ErasureSpeciesByName(info.GetErasureSpecies())}; - - bsgroup.GroupChecker = CreateStorageGroupChecker(bsgroup.Erasure.GetErasure(), bsgroup.GroupId); for (const auto &vdisk : info.GetVSlotId()) { TPDiskID pdiskId = {vdisk.GetNodeId(), vdisk.GetPDiskId()}; Y_VERIFY_DEBUG(HasPDisk(pdiskId)); @@ -596,10 +576,8 @@ void TClusterInfo::AddBSGroup(const NKikimrBlobStorage::TBaseConfig::TGroup &inf bsgroup.VDisks.insert(pdisk.VSlots.at(vdisk.GetVSlotId())); } - for (auto &vdisk : bsgroup.VDisks) { + for (auto &vdisk : bsgroup.VDisks) VDiskRef(vdisk).BSGroups.insert(bsgroup.GroupId); - bsgroup.GroupChecker->AddVDisk(vdisk); - } BSGroups[bsgroup.GroupId] = std::move(bsgroup); } @@ -634,18 +612,12 @@ void TClusterInfo::AddPDiskTempLock(TPDiskID pdiskId, const NKikimrCms::TAction { auto &pdisk = PDiskRef(pdiskId); pdisk.TempLocks.push_back({RollbackPoint, action}); - - for (auto& vdisk : pdisk.VDisks) { - LogManager.AddLockVDiskOperation(vdisk ,BSGroup(vdisk.GroupID).GroupChecker); - } } void TClusterInfo::AddVDiskTempLock(TVDiskID vdiskId, const NKikimrCms::TAction &action) { auto &vdisk = VDiskRef(vdiskId); vdisk.TempLocks.push_back({RollbackPoint, action}); - - LogManager.AddLockVDiskOperation(vdiskId, BSGroup(vdiskId.GroupID).GroupChecker); } static TServices MakeServices(const NKikimrCms::TAction &action) { @@ -678,10 +650,6 @@ void TClusterInfo::ApplyActionWithoutLog(const NKikimrCms::TAction &action) for (const auto node : nodes) { for (auto &nodeGroup: node->NodeGroups) nodeGroup->LockNode(node->NodeId); - - for (auto vdisk : node->VDisks) { - BSGroup(vdisk.GroupID).GroupChecker->LockVDisk(vdisk); - } } } break; @@ -689,12 +657,12 @@ void TClusterInfo::ApplyActionWithoutLog(const NKikimrCms::TAction &action) for (const auto &device : action.GetDevices()) { if (HasPDisk(device)) { auto pdisk = &PDiskRef(device); - for (auto vdisk : pdisk->VDisks) - BSGroup(vdisk.GroupID).GroupChecker->LockVDisk(vdisk); + for (auto &nodeGroup: NodeRef(pdisk->NodeId).NodeGroups) + nodeGroup->LockNode(pdisk->NodeId); } else if (HasVDisk(device)) { auto vdisk = &VDiskRef(device); - - BSGroup(vdisk->VDiskId.GroupID).GroupChecker->LockVDisk(vdisk->VDiskId); + for (auto &nodeGroup: NodeRef(vdisk->NodeId).NodeGroups) + nodeGroup->LockNode(vdisk->NodeId); } } break; @@ -857,9 +825,8 @@ ui64 TClusterInfo::AddTempLocks(const NKikimrCms::TAction &action, const TActorC LogManager.ApplyAction(action, this); - for (auto item : items) { + for (auto item : items) item->TempLocks.push_back({RollbackPoint, action}); - } return items.size(); } @@ -874,38 +841,6 @@ ui64 TClusterInfo::ScheduleActions(const TRequestInfo &request, const TActorCont item->ScheduleLock({action, request.Owner, request.RequestId, request.Order}); locks += items.size(); - - switch (action.GetType()) { - case NKikimrCms::TAction::RESTART_SERVICES: - case NKikimrCms::TAction::SHUTDOWN_HOST: - if (auto nodes = NodePtrs(action.GetHost(), MakeServices(action))) { - for (const auto node : nodes) { - for (auto& group : node->NodeGroups) { - group->EmplaceTask(node->NodeId, 0, request.Order, request.RequestId); - } - for (auto &vdisk: node->VDisks) { - BSGroup(vdisk.GroupID).GroupChecker->EmplaceTask(vdisk, 0, request.Order, request.RequestId); - } - } - } - break; - case NKikimrCms::TAction::REPLACE_DEVICES: - for (const auto &device : action.GetDevices()) { - if (HasPDisk(device)) { - auto pdisk = &PDisk(device); - for (auto &vdisk: pdisk->VDisks) { - BSGroup(vdisk.GroupID).GroupChecker->EmplaceTask(vdisk, 0, request.Order, request.RequestId); - } - } else if (HasVDisk(device)) { - auto vdisk = &VDisk(device); - BSGroup(vdisk->VDiskId.GroupID).GroupChecker->EmplaceTask(vdisk->VDiskId, 0, request.Order, request.RequestId); - } - } - break; - - default: - break; - } } return locks; @@ -915,20 +850,6 @@ void TClusterInfo::UnscheduleActions(const TString &requestId) { for (auto &entry : LockableItems) entry.second->RemoveScheduledLocks(requestId); - - for (auto &group : BSGroups) { - group.second.GroupChecker->RemoveTask(requestId); - } - - ClusterNodes->RemoveTask(requestId); - - for (auto& [_, tenantChecker] : TenantNodesChecker) { - tenantChecker->RemoveTask(requestId); - } - - for (auto& [_, sysNodesCheckers] : SysNodesCheckers) { - sysNodesCheckers->RemoveTask(requestId); - } } void TClusterInfo::DeactivateScheduledLocks(ui64 order) @@ -997,7 +918,7 @@ void TClusterInfo::GenerateTenantNodesCheckers() { void TClusterInfo::GenerateSysTabletsNodesCheckers() { for (auto tablet : BootstrapConfig.GetTablet()) { - SysNodesCheckers[tablet.GetType()] = TSimpleSharedPtr<TSysTabletsNodesCounter>(new TSysTabletsNodesCounter(tablet.GetType())); + SysNodesCheckers[tablet.GetType()] = TSimpleSharedPtr<TSysTabletsNodesCounter>(new TSysTabletsNodesCounter(tablet.GetType())); for (auto nodeId : tablet.GetNode()) { NodeToTabletTypes[nodeId].push_back(tablet.GetType()); @@ -1078,10 +999,6 @@ void TOperationLogManager::ApplyAction(const NKikimrCms::TAction &action, for (const auto node : nodes) { for (auto &nodeGroup: node->NodeGroups) AddNodeLockOperation(node->NodeId, nodeGroup); - - for (auto& vdisk : node->VDisks) { - AddLockVDiskOperation(vdisk, clusterState->BSGroup(vdisk.GroupID).GroupChecker); - } } } break; @@ -1089,13 +1006,13 @@ void TOperationLogManager::ApplyAction(const NKikimrCms::TAction &action, for (const auto &device : action.GetDevices()) { if (clusterState->HasPDisk(device)) { auto pdisk = &clusterState->PDisk(device); - for (auto& vdisk : pdisk->VDisks) { - AddLockVDiskOperation(vdisk, clusterState->BSGroup(vdisk.GroupID).GroupChecker); - } + for (auto &nodeGroup: clusterState->NodeRef(pdisk->NodeId).NodeGroups) + AddNodeLockOperation(pdisk->NodeId, nodeGroup); } else if (clusterState->HasVDisk(device)) { auto vdisk = &clusterState->VDisk(device); - AddLockVDiskOperation(vdisk->VDiskId, clusterState->BSGroup(vdisk->VDiskId.GroupID).GroupChecker); + for (auto &nodeGroup: clusterState->NodeRef(vdisk->NodeId).NodeGroups) + AddNodeLockOperation(vdisk->NodeId, nodeGroup); } } break; diff --git a/ydb/core/cms/cluster_info.h b/ydb/core/cms/cluster_info.h index 0b7253d3688..923b5a7d05f 100644 --- a/ydb/core/cms/cluster_info.h +++ b/ydb/core/cms/cluster_info.h @@ -3,7 +3,6 @@ #include "defs.h" #include "config.h" #include "downtime.h" -#include "erasure_checkers.h" #include "node_checkers.h" #include "services.h" @@ -15,7 +14,6 @@ #include <ydb/core/protos/cms.pb.h> #include <ydb/core/protos/config.pb.h> #include <ydb/core/protos/console.pb.h> -#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h> #include <library/cpp/actors/core/actor.h> #include <library/cpp/actors/interconnect/interconnect.h> @@ -473,8 +471,6 @@ struct TBSGroupInfo { ui32 GroupId = 0; TErasureType Erasure; TSet<TVDiskID> VDisks; - - TSimpleSharedPtr<IStorageGroupChecker> GroupChecker; }; /** @@ -600,32 +596,6 @@ public: } }; -class TLockDiskOperation : public TOperationBase { -private: - TVDiskID VDiskId; - -private: - TSimpleSharedPtr<IStorageGroupChecker> StorageGroupChecker; - -public: - TLockDiskOperation(const TVDiskID& vdiskId, TSimpleSharedPtr<IStorageGroupChecker> checker) - : TOperationBase(OPERATION_TYPE_LOCK_DISK) - , VDiskId(vdiskId) - , StorageGroupChecker(checker) - { - } - - void Do() override final { - StorageGroupChecker->LockVDisk(VDiskId); - } - - void Undo() override final { - StorageGroupChecker->UnlockVDisk(VDiskId); - } - - -}; - class TLogRollbackPoint : public TOperationBase { public: TLogRollbackPoint() : TOperationBase(OPERATION_TYPE_ROLLBACK_POINT) @@ -654,10 +624,6 @@ public: Log.emplace_back(new TLockNodeOperation(nodeId, nodesState))->Do(); } - void AddLockVDiskOperation(const TVDiskID& vdiskId, TSimpleSharedPtr<IStorageGroupChecker> checker) { - Log.emplace_back(new TLockDiskOperation(vdiskId, checker))->Do(); - } - void RollbackOperations() { while (!Log.empty() && Log.back()->Type != OPERATION_TYPE_ROLLBACK_POINT) { Log.back()->Undo(); diff --git a/ydb/core/cms/cluster_info_ut.cpp b/ydb/core/cms/cluster_info_ut.cpp index e56a530f2b5..0e79306a3dd 100644 --- a/ydb/core/cms/cluster_info_ut.cpp +++ b/ydb/core/cms/cluster_info_ut.cpp @@ -292,57 +292,56 @@ Y_UNIT_TEST_SUITE(TClusterInfoTest) { UNIT_ASSERT_VALUES_EQUAL(cluster->NodesCount("localhost"), 2); cluster->AddPDisk(MakePDiskConfig(1, 1)); - cluster->AddPDisk(MakePDiskConfig(2, 2)); - cluster->AddVDisk(MakeVSlotConfig(1, {0, 1, 0, 0, 0}, 1, 0)); - cluster->AddVDisk(MakeVSlotConfig(2, {0, 1, 0, 1, 0}, 2, 0)); - cluster->AddBSGroup(MakeBSGroup(0, "none", 1, 1, 0, 2, 2, 0)); - cluster->UpdatePDiskState(NCms::TPDiskID(1, 1), MakePDiskInfo(1)); UNIT_ASSERT(cluster->HasPDisk(NCms::TPDiskID(1, 1))); UNIT_ASSERT(!cluster->HasPDisk(NCms::TPDiskID(1, 2))); UNIT_ASSERT(!cluster->HasPDisk(NCms::TPDiskID(2, 1))); - CheckPDisk(cluster->PDisk(NCms::TPDiskID(1, 1)), 1, 1, UP, 1); + CheckPDisk(cluster->PDisk(NCms::TPDiskID(1, 1)), 1, 1, UP, 0); UNIT_ASSERT(cluster->Node(1).PDisks.contains(NCms::TPDiskID(1, 1))); + cluster->AddVDisk(MakeVSlotConfig(1, {0, 1, 0, 0, 0}, 1, 0)); cluster->UpdateVDiskState({0, 1, 0, 0, 0}, MakeVDiskInfo({0, 1, 0, 0, 0}, 1, 0)); UNIT_ASSERT(cluster->HasVDisk({0, 1, 0, 0, 0})); - CheckVDisk(cluster->VDisk({0, 1, 0, 0, 0}), {0, 1, 0, 0, 0}, 1, UP, 1, 1); + UNIT_ASSERT(!cluster->HasVDisk({0, 1, 0, 1, 0})); + CheckVDisk(cluster->VDisk({0, 1, 0, 0, 0}), {0, 1, 0, 0, 0}, 1, UP, 1, 0); UNIT_ASSERT_VALUES_EQUAL(cluster->PDisk(NCms::TPDiskID(1, 1)).VDisks.size(), 1); UNIT_ASSERT(cluster->PDisk(NCms::TPDiskID(1, 1)).VDisks.contains(TVDiskID(0, 1, 0, 0, 0))); + cluster->AddPDisk(MakePDiskConfig(2, 2)); cluster->UpdatePDiskState(NCms::TPDiskID(2, 2), MakePDiskInfo(2)); UNIT_ASSERT(cluster->HasPDisk(NCms::TPDiskID(2, 2))); - CheckPDisk(cluster->PDisk(NCms::TPDiskID(2, 2)), 2, 2, UP, 1); + CheckPDisk(cluster->PDisk(NCms::TPDiskID(2, 2)), 2, 2, UP, 0); + cluster->AddVDisk(MakeVSlotConfig(2, {0, 1, 0, 1, 0}, 2, 0)); cluster->UpdateVDiskState({0, 1, 0, 1, 0}, MakeVDiskInfo({0, 1, 0, 1, 0}, 2, 0)); UNIT_ASSERT(cluster->HasVDisk({0, 1, 0, 1, 0})); UNIT_ASSERT(cluster->HasPDisk(NCms::TPDiskID(2, 2))); CheckPDisk(cluster->PDisk(NCms::TPDiskID(2, 2)), 2, 2, UP, 1); UNIT_ASSERT(cluster->PDisk(NCms::TPDiskID(2, 2)).VDisks.contains(TVDiskID(0, 1, 0, 1, 0))); - UNIT_ASSERT(cluster->HasBSGroup(0)); - UNIT_ASSERT(!cluster->HasBSGroup(1)); - CheckBSGroup(cluster->BSGroup(0), 0, TErasureType::ErasureNone, 2, + cluster->AddBSGroup(MakeBSGroup(1, "none", 1, 1, 0, 2, 2, 0)); + UNIT_ASSERT(cluster->HasBSGroup(1)); + UNIT_ASSERT(!cluster->HasBSGroup(2)); + CheckBSGroup(cluster->BSGroup(1), 1, TErasureType::ErasureNone, 2, TVDiskID(0, 1, 0, 0, 0), TVDiskID(0, 1, 0, 1, 0)); - CheckVDisk(cluster->VDisk({0, 1, 0, 0, 0}), 1, 0); - CheckVDisk(cluster->VDisk({0, 1, 0, 1, 0}), 1, 0); + CheckVDisk(cluster->VDisk({0, 1, 0, 0, 0}), 1, 1); + CheckVDisk(cluster->VDisk({0, 1, 0, 1, 0}), 1, 1); cluster->AddPDisk(MakePDiskConfig(3, 3)); - cluster->AddVDisk(MakeVSlotConfig(3, {0, 1, 0, 2, 0}, 3, 0)); - cluster->AddBSGroup(MakeBSGroup(2, "none", 1, 1, 0, 3, 3, 0)); - cluster->UpdatePDiskState(NCms::TPDiskID(3, 3), MakePDiskInfo(3)); UNIT_ASSERT(cluster->HasPDisk(NCms::TPDiskID(3, 3))); - CheckPDisk(cluster->PDisk(NCms::TPDiskID(3, 3)), 3, 3, UP, 1); + CheckPDisk(cluster->PDisk(NCms::TPDiskID(3, 3)), 3, 3, UP, 0); + cluster->AddVDisk(MakeVSlotConfig(3, {0, 1, 0, 2, 0}, 3, 0)); cluster->UpdateVDiskState({0, 1, 0, 2, 0}, MakeVDiskInfo({0, 1, 0, 2, 0}, 3, 0)); UNIT_ASSERT(cluster->HasVDisk({0, 1, 0, 2, 0})); - CheckVDisk(cluster->VDisk({0, 1, 0, 2, 0}), TVDiskID(0, 1, 0, 2, 0), 3, UP, 3, 1); + CheckVDisk(cluster->VDisk({0, 1, 0, 2, 0}), TVDiskID(0, 1, 0, 2, 0), 3, UP, 3, 0); + cluster->AddBSGroup(MakeBSGroup(2, "none", 1, 1, 0, 3, 3, 0)); UNIT_ASSERT(cluster->HasBSGroup(2)); CheckBSGroup(cluster->BSGroup(2), 2, TErasureType::ErasureNone, 2, TVDiskID(0, 1, 0, 0, 0), TVDiskID(0, 1, 0, 2, 0)); - CheckVDisk(cluster->VDisk({0, 1, 0, 0, 0}), 2, 0, 2); + CheckVDisk(cluster->VDisk({0, 1, 0, 0, 0}), 2, 1, 2); UNIT_ASSERT(cluster->HasVDisk({0, 1, 0, 2, 0})); CheckVDisk(cluster->VDisk({0, 1, 0, 2, 0}), TVDiskID(0, 1, 0, 2, 0), 3, UP, 3, 1, 2); diff --git a/ydb/core/cms/cms.cpp b/ydb/core/cms/cms.cpp index c400618ccaa..211a7e0c622 100644 --- a/ydb/core/cms/cms.cpp +++ b/ydb/core/cms/cms.cpp @@ -17,7 +17,6 @@ #include <ydb/core/protos/config_units.pb.h> #include <ydb/core/protos/counters_cms.pb.h> #include <ydb/core/tablet_flat/tablet_flat_executed.h> -#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h> #include <library/cpp/actors/core/actor.h> #include <library/cpp/actors/core/hfunc.h> @@ -111,7 +110,6 @@ namespace { bool TCms::CheckPermissionRequest(const TPermissionRequest &request, TPermissionResponse &response, TPermissionRequest &scheduled, - ui64 requestOrder, const TActorContext &ctx) { static THashMap<EStatusCode, ui32> CodesRate = BuildCodesRateMap({ @@ -168,7 +166,6 @@ bool TCms::CheckPermissionRequest(const TPermissionRequest &request, opts.TenantPolicy = request.GetTenantPolicy(); opts.AvailabilityMode = request.GetAvailabilityMode(); opts.PartialPermissionAllowed = allowPartial; - opts.Order = requestOrder; TErrorInfo error; @@ -338,7 +335,7 @@ bool TCms::CheckAction(const TAction &action, case TAction::SHUTDOWN_HOST: return CheckActionShutdownHost(action, opts, error, ctx); case TAction::REPLACE_DEVICES: - return CheckActionReplaceDevices(action, opts, error); + return CheckActionReplaceDevices(action, opts.PermissionDuration, error); case TAction::START_SERVICES: case TAction::STOP_SERVICES: case TAction::ADD_HOST: @@ -545,10 +542,9 @@ bool TCms::CheckSysTabletsNode(const TActionOptions &opts, } for (auto &tabletType : ClusterInfo->NodeToTabletTypes[node.NodeId]) { - auto reason = ClusterInfo->SysNodesCheckers[tabletType]->TryToLockNode(node.NodeId, opts.AvailabilityMode, 0, opts.Order); - if (reason != Ydb::Maintenance::ActionState::ACTION_REASON_OK) { + if (!ClusterInfo->SysNodesCheckers[tabletType]->TryToLockNode(node.NodeId, opts.AvailabilityMode)) { error.Code = TStatus::DISALLOW_TEMP; - error.Reason = ClusterInfo->SysNodesCheckers[tabletType]->ReadableReason(node.NodeId, opts.AvailabilityMode, reason); + error.Reason = ClusterInfo->SysNodesCheckers[tabletType]->ReadableReason(node.NodeId, opts.AvailabilityMode); error.Deadline = TActivationContext::Now() + State->Config.DefaultRetryTime; return false; } @@ -565,27 +561,24 @@ bool TCms::TryToLockNode(const TAction& action, TDuration duration = TDuration::MicroSeconds(action.GetDuration()); duration += opts.PermissionDuration; - auto clusterReason = ClusterInfo->ClusterNodes->TryToLockNode(node.NodeId, opts.AvailabilityMode, 0, opts.Order); - if (clusterReason != Ydb::Maintenance::ActionState::ACTION_REASON_OK) + if (!ClusterInfo->ClusterNodes->TryToLockNode(node.NodeId, opts.AvailabilityMode)) { error.Code = TStatus::DISALLOW_TEMP; - error.Reason = ClusterInfo->ClusterNodes->ReadableReason(node.NodeId, opts.AvailabilityMode, clusterReason); + error.Reason = ClusterInfo->ClusterNodes->ReadableReason(node.NodeId, opts.AvailabilityMode); error.Deadline = TActivationContext::Now() + State->Config.DefaultRetryTime; return false; } if (node.Tenant - && opts.TenantPolicy != NONE) { - auto tenantReason = ClusterInfo->TenantNodesChecker[node.Tenant]->TryToLockNode(node.NodeId, opts.AvailabilityMode, 0, opts.Order); - - if (tenantReason != Ydb::Maintenance::ActionState::ACTION_REASON_OK) { - error.Code = TStatus::DISALLOW_TEMP; - error.Reason = ClusterInfo->TenantNodesChecker[node.Tenant]->ReadableReason(node.NodeId, opts.AvailabilityMode, tenantReason); - error.Deadline = TActivationContext::Now() + State->Config.DefaultRetryTime; + && opts.TenantPolicy != NONE + && !ClusterInfo->TenantNodesChecker[node.Tenant]->TryToLockNode(node.NodeId, opts.AvailabilityMode)) + { + error.Code = TStatus::DISALLOW_TEMP; + error.Reason = ClusterInfo->TenantNodesChecker[node.Tenant]->ReadableReason(node.NodeId, opts.AvailabilityMode); + error.Deadline = TActivationContext::Now() + State->Config.DefaultRetryTime; - return false; - } + return false; } return true; @@ -664,37 +657,35 @@ bool TCms::TryToLockVDisk(const TActionOptions& opts, return false; } - ui32 tempLocksCount = 0; - for (auto& vdiskId : group.VDisks) { - if (vdisk.VDiskId == vdiskId) - continue; + auto counters = CreateErasureCounter(ClusterInfo->BSGroup(groupId).Erasure.GetErasure(), vdisk, groupId); + counters->CountGroupState(ClusterInfo, State->Config.DefaultRetryTime, duration, error); - if (!ClusterInfo->VDisk(vdiskId).TempLocks.empty() - || !ClusterInfo->Node(ClusterInfo->VDisk(vdiskId).NodeId).TempLocks.empty() - || !ClusterInfo->PDisk(ClusterInfo->VDisk(vdiskId).PDiskId).TempLocks.empty()) { - tempLocksCount += 1; + switch (opts.AvailabilityMode) { + case MODE_MAX_AVAILABILITY: + if (!counters->CheckForMaxAvailability(error, defaultDeadline, opts.PartialPermissionAllowed)) { + return false; } - } - - if (opts.PartialPermissionAllowed && tempLocksCount == 1) { - error.Code = TStatus::DISALLOW_TEMP; - error.Reason = "You cannot get two or more disks from the same group at the same time"; - error.Deadline = defaultDeadline; - return false; - } - - auto result = group.GroupChecker->TryToLockVDisk(vdisk.VDiskId, opts.AvailabilityMode, 0, opts.Order); - bool resultIsOk = result == Ydb::Maintenance::ActionState::ACTION_REASON_OK; - - if (!resultIsOk && !opts.PartialPermissionAllowed && tempLocksCount > 0) { - error.Code = TStatus::DISALLOW; - error.Reason = "Request is incorrect. You will never get a permissions. Try with PartialPermissionAllowed"; - return false; - } - - if (!resultIsOk) { - error.Code = TStatus::DISALLOW_TEMP; - error.Reason = group.GroupChecker->ReadableReason(vdisk.VDiskId, opts.AvailabilityMode, result); + break; + case MODE_KEEP_AVAILABLE: + if (!counters->CheckForKeepAvailability(ClusterInfo, error, defaultDeadline, opts.PartialPermissionAllowed)) { + return false; + } + break; + case MODE_FORCE_RESTART: + if ( counters->GroupAlreadyHasLockedDisks() && opts.PartialPermissionAllowed) { + error.Code = TStatus::DISALLOW_TEMP; + error.Reason = "You cannot get two or more disks from the same group at the same time" + " without specifying the PartialPermissionAllowed parameter"; + error.Deadline = defaultDeadline; + return false; + } + // Any number of down disks is OK for this mode. + break; + default: + error.Code = TStatus::WRONG_REQUEST; + error.Reason = Sprintf("Unknown availability mode: %s (%" PRIu32 ")", + EAvailabilityMode_Name(opts.AvailabilityMode).data(), + static_cast<ui32>(opts.AvailabilityMode)); error.Deadline = defaultDeadline; return false; } @@ -1559,9 +1550,13 @@ void TCms::Handle(TEvCms::TEvPermissionRequest::TPtr &ev, } } - LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::CMS, "Next request Id: " << State->NextRequestId); - - bool ok = CheckPermissionRequest(rec, resp->Record, scheduled.Request, State->NextRequestId, ctx); + ClusterInfo->LogManager.PushRollbackPoint(); + for (const auto &scheduled_request : State->ScheduledRequests) { + for (auto &action : scheduled_request.second.Request.GetActions()) + ClusterInfo->LogManager.ApplyAction(action, ClusterInfo); + } + bool ok = CheckPermissionRequest(rec, resp->Record, scheduled.Request, ctx); + ClusterInfo->LogManager.RollbackOperations(); // Schedule request if required. if (rec.GetDryRun()) { @@ -1621,12 +1616,20 @@ void TCms::Handle(TEvCms::TEvCheckRequest::TPtr &ev, const TActorContext &ctx) auto requestStartTime = TInstant::Now(); + ClusterInfo->LogManager.PushRollbackPoint(); + for (const auto &scheduled_request : State->ScheduledRequests) { + if (scheduled_request.second.Order < request.Order) { + for (auto &action : scheduled_request.second.Request.GetActions()) + ClusterInfo->LogManager.ApplyAction(action, ClusterInfo); + } + } // Deactivate locks of this and later requests to // avoid false conflicts. ClusterInfo->DeactivateScheduledLocks(request.Order); request.Request.SetAvailabilityMode(rec.GetAvailabilityMode()); - bool ok = CheckPermissionRequest(request.Request, resp->Record, scheduled.Request, request.Order, ctx); + bool ok = CheckPermissionRequest(request.Request, resp->Record, scheduled.Request, ctx); ClusterInfo->ReactivateScheduledLocks(); + ClusterInfo->LogManager.RollbackOperations(); // Schedule request if required. if (rec.GetDryRun()) { diff --git a/ydb/core/cms/cms_impl.h b/ydb/core/cms/cms_impl.h index d2038ec6e6f..9cb688b125a 100644 --- a/ydb/core/cms/cms_impl.h +++ b/ydb/core/cms/cms_impl.h @@ -105,14 +105,12 @@ private: NKikimrCms::ETenantPolicy TenantPolicy; NKikimrCms::EAvailabilityMode AvailabilityMode; bool PartialPermissionAllowed; - ui64 Order; TActionOptions(TDuration dur) : PermissionDuration(dur) , TenantPolicy(NKikimrCms::DEFAULT) , AvailabilityMode(NKikimrCms::MODE_MAX_AVAILABILITY) , PartialPermissionAllowed(false) - , Order(0) {} }; @@ -277,7 +275,6 @@ private: bool CheckPermissionRequest(const NKikimrCms::TPermissionRequest &request, NKikimrCms::TPermissionResponse &response, NKikimrCms::TPermissionRequest &scheduled, - const ui64 requestOrder, const TActorContext &ctx); bool IsActionHostValid(const NKikimrCms::TAction &action, TErrorInfo &error) const; bool ParseServices(const NKikimrCms::TAction &action, TServices &services, TErrorInfo &error) const; diff --git a/ydb/core/cms/cms_ut.cpp b/ydb/core/cms/cms_ut.cpp index 4ec398add89..4b7ea52ce1f 100644 --- a/ydb/core/cms/cms_ut.cpp +++ b/ydb/core/cms/cms_ut.cpp @@ -729,6 +729,92 @@ Y_UNIT_TEST_SUITE(TCmsTest) { env.CheckWalleCheckTask("task-2", TStatus::ALLOW, env.GetNodeId(1)); } + Y_UNIT_TEST(Notifications) + { + TCmsTestEnv env(8); + env.AdvanceCurrentTime(TDuration::Minutes(20)); + + // User is not specified. + env.CheckNotification(TStatus::WRONG_REQUEST, "", env.GetCurrentTime(), + MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(0), 60000000)); + // Too old. + env.CheckNotification(TStatus::WRONG_REQUEST, "user", env.GetCurrentTime() - TDuration::Minutes(10), + MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(0), 60000000)); + // Store notification user-1. + auto id1 = env.CheckNotification + (TStatus::OK, "user", env.GetCurrentTime() + TDuration::Minutes(10), + MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), 60000000, env.PDiskName(1, 0))); + + // OK to replace the same device before notification start time. + env.CheckPermissionRequest("user", false, true, true, true, TStatus::ALLOW, + MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), 60000000, env.PDiskName(1, 0))); + + // Intersects with notification. + env.CheckPermissionRequest("user", false, true, true, true, TStatus::DISALLOW_TEMP, + MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), 10 * 60000000, env.PDiskName(1, 0))); + + // Store notification user-2. + auto id2 = env.CheckNotification(TStatus::OK, "user", env.GetCurrentTime(), + MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), 60000000, env.PDiskName(2, 0))); + // Store notificaiton user1-3. + auto id3 = env.CheckNotification(TStatus::OK, "user1", env.GetCurrentTime(), + MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), 60000000, env.PDiskName(3, 0))); + // Get notification with no user. + env.CheckGetNotification("", id1, TStatus::WRONG_REQUEST); + // Get user-1. + env.CheckGetNotification("user", id1, TStatus::OK); + // Get with wrong user. + env.CheckGetNotification("user1", id1, TStatus::WRONG_REQUEST); + // Get with wrong id. + env.CheckGetNotification("user", "wrong-id", TStatus::WRONG_REQUEST); + // List notifications for user. + env.CheckListNotifications("user", TStatus::OK, 2); + // List notifications for user1. + env.CheckListNotifications("user1", TStatus::OK, 1); + // List with no user. + env.CheckListNotifications("", TStatus::WRONG_REQUEST, 0); + // Reject notification with no user. + env.CheckRejectNotification("", id1, TStatus::WRONG_REQUEST); + // Reject notification with wrong user. + env.CheckRejectNotification("user1", id1, TStatus::WRONG_REQUEST); + // Reject user-1 (dry run) + env.CheckRejectNotification("user", id1, TStatus::OK, true); + // Get user-1. + env.CheckGetNotification("user", id1, TStatus::OK); + // Reject user1-3. + env.CheckRejectNotification("user1", id3, TStatus::OK); + // Reject user-2. + env.CheckRejectNotification("user", id2, TStatus::OK); + // List notifications for user. + env.CheckListNotifications("user", TStatus::OK, 1); + // List notifications for user1. + env.CheckListNotifications("user1", TStatus::OK, 0); + // Get rejected user1-3. + env.CheckGetNotification("user1", id3, TStatus::WRONG_REQUEST); + // Get rejected user-2. + env.CheckGetNotification("user", id2, TStatus::WRONG_REQUEST); + // Get user-1. + env.CheckGetNotification("user", id1, TStatus::OK); + } + + Y_UNIT_TEST(PermissionDuration) { + TCmsTestEnv env(8); + + // Store notification user-1. + auto id1 = env.CheckNotification + (TStatus::OK, "user", env.GetCurrentTime() + TDuration::Minutes(10), + MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), 60000000, env.PDiskName(1, 0))); + + // Intersects with notification. + const TDuration _10minutes = TDuration::Minutes(10); + env.CheckPermissionRequest("user", false, true, true, true, _10minutes, TStatus::DISALLOW_TEMP, + MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), _10minutes.MicroSeconds(), env.PDiskName(1, 0))); + + // OK with default duration. + env.CheckPermissionRequest("user", false, true, true, true, TStatus::ALLOW, + MakeAction(TAction::REPLACE_DEVICES, env.GetNodeId(0), 60000000, env.PDiskName(1, 0))); + } + Y_UNIT_TEST(ActionWithZeroDuration) { TCmsTestEnv env(8); @@ -1223,6 +1309,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(2), 60000000, "storage")); TFakeNodeWhiteboardService::Info[env.GetNodeId(1)].Connected = false; + env.RestartCms(); env.CheckPermissionRequest("user", false, true, false, true, MODE_MAX_AVAILABILITY, TStatus::DISALLOW_TEMP, MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(2), 60000000, "storage")); @@ -1230,12 +1317,14 @@ Y_UNIT_TEST_SUITE(TCmsTest) { TFakeNodeWhiteboardService::Info[env.GetNodeId(7)].Connected = false; TFakeNodeWhiteboardService::Info[env.GetNodeId(4)].Connected = false; TFakeNodeWhiteboardService::Info[env.GetNodeId(1)].Connected = false; + env.RestartCms(); env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::DISALLOW_TEMP, MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(5), 60000000, "storage")); // 2dc disabled TFakeNodeWhiteboardService::Info[env.GetNodeId(7)].Connected = true; + env.RestartCms(); env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::DISALLOW_TEMP, MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(7), 60000000, "storage")); @@ -1245,6 +1334,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(5), 60000000, "storage")); TFakeNodeWhiteboardService::Info[env.GetNodeId(5)].Connected = false; + env.RestartCms(); env.CheckPermissionRequest("user", false, true, false, true, MODE_KEEP_AVAILABLE, TStatus::DISALLOW_TEMP, MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(2), 60000000, "storage")); diff --git a/ydb/core/cms/cms_ut_common.cpp b/ydb/core/cms/cms_ut_common.cpp index 444ff73933a..96421cc7a50 100644 --- a/ydb/core/cms/cms_ut_common.cpp +++ b/ydb/core/cms/cms_ut_common.cpp @@ -316,12 +316,10 @@ void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseC ui32 vdiskId = pdiskIndex * vdiskPerPdisk + vdiskIndex; ui32 groupId = groupShift + vdiskId; ui32 failRealm = 0; - ui32 failDomain = nodeIndex % 8; - if (useMirror3dcErasure) { + if (useMirror3dcErasure) failRealm = (nodeIndex % 9) / 3; - failDomain = (nodeIndex % 9) % 3; - } - TVDiskID id = {(ui8)groupId, 1, (ui8)failRealm, (ui8)failDomain, (ui8)0}; + + TVDiskID id = {(ui8)groupId, 1, (ui8)failRealm, (ui8)(nodeIndex % 8), (ui8)0}; auto &vdisk = node.VDiskStateInfo[id]; VDiskIDFromVDiskID(id, vdisk.MutableVDiskId()); @@ -339,7 +337,7 @@ void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseC vdiskConfig.SetGroupId(groupId); vdiskConfig.SetGroupGeneration(1); vdiskConfig.SetFailRealmIdx(failRealm); - vdiskConfig.SetFailDomainIdx(failDomain); + vdiskConfig.SetFailDomainIdx(nodeIndex % 8); config->MutableGroup(groupId)->AddVSlotId() ->CopyFrom(vdiskConfig.GetVSlotId()); diff --git a/ydb/core/cms/erasure_checkers.cpp b/ydb/core/cms/erasure_checkers.cpp index 28634af6f09..b98748c7932 100644 --- a/ydb/core/cms/erasure_checkers.cpp +++ b/ydb/core/cms/erasure_checkers.cpp @@ -1,361 +1,233 @@ #include "erasure_checkers.h" -#include <ydb/core/protos/cms.pb.h> -#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h> - -#include <library/cpp/actors/core/log.h> - -#include <util/string/cast.h> -#include <util/system/backtrace.h> -#include <util/system/yassert.h> - -#include <bitset> -#include <sstream> -#include <vector> - namespace NKikimr::NCms { -using namespace Ydb::Maintenance; +bool TErasureCounterBase::IsDown(const TVDiskInfo &vdisk, TClusterInfoPtr info, TDuration &retryTime, TErrorInfo &error) { + const auto &node = info->Node(vdisk.NodeId); + const auto &pdisk = info->PDisk(vdisk.PDiskId); + const auto defaultDeadline = TActivationContext::Now() + retryTime; -IStorageGroupChecker::EVDiskState IStorageGroupChecker::VDiskState(NKikimrCms::EState state) { - switch (state) { - case NKikimrCms::UP: - return VDISK_STATE_UP; - case NKikimrCms::UNKNOWN: - return VDISK_STATE_UNSPECIFIED; - case NKikimrCms::DOWN: - return VDISK_STATE_DOWN; - case NKikimrCms::RESTART: - return VDISK_STATE_RESTART; - default: - Y_FAIL("Unknown EState"); + // Check we received info for PDisk. + if (!pdisk.NodeId) { + ++Down; + error.Reason = TStringBuilder() << "Missing info for " << pdisk.ItemName(); + return false; } -} -TSimpleSharedPtr<IStorageGroupChecker> CreateStorageGroupChecker(TErasureType::EErasureSpecies es, ui32 groupId) { - switch (es) { - case TErasureType::ErasureNone: - case TErasureType::ErasureMirror3: - case TErasureType::Erasure3Plus1Block: - case TErasureType::Erasure3Plus1Stripe: - case TErasureType::Erasure4Plus2Block: - case TErasureType::Erasure3Plus2Block: - case TErasureType::Erasure4Plus2Stripe: - case TErasureType::Erasure3Plus2Stripe: - case TErasureType::ErasureMirror3Plus2: - case TErasureType::Erasure4Plus3Block: - case TErasureType::Erasure4Plus3Stripe: - case TErasureType::Erasure3Plus3Block: - case TErasureType::Erasure3Plus3Stripe: - case TErasureType::Erasure2Plus3Block: - case TErasureType::Erasure2Plus3Stripe: - case TErasureType::Erasure2Plus2Block: - case TErasureType::Erasure2Plus2Stripe: - case TErasureType::ErasureMirror3of4: - return TSimpleSharedPtr<IStorageGroupChecker>(new TDefaultErasureChecker(groupId)); - case TErasureType::ErasureMirror3dc: - return TSimpleSharedPtr<IStorageGroupChecker>(new TMirror3dcChecker(groupId)); - default: - Y_FAIL("Unknown erasure type: %d", es); - } + return (node.NodeId != VDisk.NodeId && node.IsDown(error, defaultDeadline)) + || (pdisk.PDiskId != VDisk.PDiskId && pdisk.IsDown(error, defaultDeadline)) + || vdisk.IsDown(error, defaultDeadline); } +bool TErasureCounterBase::IsLocked(const TVDiskInfo &vdisk, TClusterInfoPtr info, TDuration &retryTime, + TDuration &duration, TErrorInfo &error) +{ + const auto &node = info->Node(vdisk.NodeId); + const auto &pdisk = info->PDisk(vdisk.PDiskId); -void TErasureCheckerBase::AddVDisk(const TVDiskID& vdiskId) { - if (DiskToState.contains(vdiskId)) { - return; - } - DiskToState[vdiskId].State = VDISK_STATE_UNSPECIFIED; -} - -void TErasureCheckerBase::UpdateVDisk(const TVDiskID& vdiskId, EState state) { - AddVDisk(vdiskId); - - const auto newState = VDiskState(state); - - // The disk is marked based on the information obtained by InfoCollector. - // If we marked the disk DOWN, it means that there was a reason - if (DiskToState[vdiskId].State == VDISK_STATE_DOWN - && newState == VDISK_STATE_UP) - return; - - if (DiskToState[vdiskId].State == VDISK_STATE_DOWN) { - --DownVDisksCount; - } - - if (DiskToState[vdiskId].State == VDISK_STATE_LOCKED || - DiskToState[vdiskId].State == VDISK_STATE_RESTART) { - --LockedVDisksCount; - } - - DiskToState[vdiskId].State = newState; - - if (newState == VDISK_STATE_RESTART || newState == VDISK_STATE_LOCKED) { - ++LockedVDisksCount; - } - - if (newState == VDISK_STATE_DOWN) { - ++DownVDisksCount; + // Check we received info for VDisk. + if (!vdisk.NodeId || !vdisk.PDiskId) { + ++Down; + error.Code = TStatus::DISALLOW_TEMP; + error.Reason = TStringBuilder() << "Missing info for " << vdisk.ItemName(); + return false; } -} -void TErasureCheckerBase::LockVDisk(const TVDiskID& vdiskId) { - Y_VERIFY(DiskToState.contains(vdiskId)); - - ++LockedVDisksCount; - if (DiskToState[vdiskId].State == VDISK_STATE_DOWN) { - DiskToState[vdiskId].State = VDISK_STATE_RESTART; - --DownVDisksCount; - } else { - DiskToState[vdiskId].State = VDISK_STATE_LOCKED; - } + return node.IsLocked(error, retryTime, TActivationContext::Now(), duration) + || pdisk.IsLocked(error, retryTime, TActivationContext::Now(), duration) + || vdisk.IsLocked(error, retryTime, TActivationContext::Now(), duration); } -void TErasureCheckerBase::UnlockVDisk(const TVDiskID& vdiskId) { - Y_VERIFY(DiskToState.contains(vdiskId)); - - --LockedVDisksCount; - if (DiskToState[vdiskId].State == VDISK_STATE_RESTART) { - DiskToState[vdiskId].State = VDISK_STATE_DOWN; - ++DownVDisksCount; - } else { - DiskToState[vdiskId].State = VDISK_STATE_UP; - } +bool TErasureCounterBase::GroupAlreadyHasLockedDisks() const { + return HasAlreadyLockedDisks; } -void TErasureCheckerBase::EmplaceTask(const TVDiskID &vdiskId, i32 priority, - ui64 order, const std::string &taskUId) { - - auto& priorities = DiskToState[vdiskId].Priorities; - auto it = priorities.lower_bound(TVDiskState::TTaskPriority(priority, order, "")); - - if (it != priorities.end() && (it->Order == order && it->Priority == priority)) { - if (it->TaskUId == taskUId) { - return; +bool TErasureCounterBase::CheckForMaxAvailability(TErrorInfo &error, TInstant &defaultDeadline, bool allowPartial) const { + if (Locked + Down > 1) { + if (HasAlreadyLockedDisks && !allowPartial) { + error.Code = TStatus::DISALLOW; + error.Reason = "The request is incorrect: too many disks from the one group. " + "Fix the request or set PartialPermissionAllowed to true"; + return false; } - Y_FAIL("Task with the same priority and order already exists"); - } else { - priorities.emplace_hint(it, priority, order, taskUId); - } -} - -void TErasureCheckerBase::RemoveTask(const std::string &taskUId) { - auto taskUIdsEqual = [&taskUId](const TVDiskState::TTaskPriority &p) { - return p.TaskUId == taskUId; - }; - - for (auto &[vdiskId, vdiskState] : DiskToState) { - auto it = std::find_if(vdiskState.Priorities.begin(), - vdiskState.Priorities.end(), taskUIdsEqual); - - if (it == vdiskState.Priorities.end()) { - continue; - } - - vdiskState.Priorities.erase(it); + error.Code = TStatus::DISALLOW_TEMP; + error.Reason = TStringBuilder() << "Issue in affected group " << GroupId + << ". " << "Too many locked and down vdisks: " << Locked + Down; + error.Deadline = defaultDeadline; + return false; } + return true; } -ActionState::ActionReason TDefaultErasureChecker::TryToLockVDisk(const TVDiskID &vdiskId, EAvailabilityMode mode, i32 priority, ui64 order) const { - Y_VERIFY(DiskToState.contains(vdiskId)); - - const auto& diskState = DiskToState.at(vdiskId); - - if (diskState.State == VDISK_STATE_RESTART - || diskState.State == VDISK_STATE_LOCKED) { - return ActionState::ACTION_REASON_ALREADY_LOCKED; - } - - auto taskPriority = TVDiskState::TTaskPriority(priority, order, ""); - if (!diskState.Priorities.empty() && taskPriority < *diskState.Priorities.rbegin()) { - return ActionState::ACTION_REASON_LOW_PRIORITY; +void TDefaultErasureCounter::CountVDisk(const TVDiskInfo &vdisk, TClusterInfoPtr info, TDuration retryTime, + TDuration duration, TErrorInfo &error) +{ + Y_VERIFY_DEBUG(vdisk.VDiskId != VDisk.VDiskId); + + // Check locks. + TErrorInfo err; + if (IsLocked(vdisk, info, retryTime, duration, err)) { + ++Locked; + error.Code = err.Code; + error.Reason = TStringBuilder() << "Issue in affected group " << GroupId + << ". " << err.Reason; + error.Deadline = Max(error.Deadline, err.Deadline); + return; } - if (mode == NKikimrCms::MODE_FORCE_RESTART) { - return ActionState::ACTION_REASON_OK; + // Check if disk is down. + if (IsDown(vdisk, info, retryTime, err)) { + ++Down; + error.Code = err.Code; + error.Reason = TStringBuilder() << "Issue in affected group " << GroupId + << ". " << err.Reason; + error.Deadline = Max(error.Deadline, err.Deadline); } +} - // Check how many disks are waiting for higher prioriry task to be locked - ui32 priorityLockedCount = 0; - for (auto &[id, vdiskState] : DiskToState) { - if (vdiskState.State != VDISK_STATE_UP) { - continue; - } - - if (!vdiskState.Priorities.empty() && taskPriority < *vdiskState.Priorities.rbegin()) { - ++priorityLockedCount; +bool TDefaultErasureCounter::CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo &error, + TInstant &defaultDeadline, bool allowPartial) const +{ + if (HasAlreadyLockedDisks && allowPartial) { + error.Code = TStatus::DISALLOW_TEMP; + error.Reason = "You cannot get two or more disks from the same group at the same time" + " without specifying the PartialPermissionAllowed parameter"; + error.Deadline = defaultDeadline; + return false; + } + + if (Down + Locked > info->BSGroup(GroupId).Erasure.ParityParts()) { + if (HasAlreadyLockedDisks && !allowPartial) { + error.Code = TStatus::DISALLOW; + error.Reason = "The request is incorrect: too many disks from the one group. " + "Fix the request or set PartialPermissionAllowed to true"; + return false; } + error.Code = TStatus::DISALLOW_TEMP; + error.Reason = TStringBuilder() << "Cannot lock disk " << VDisk.PrettyItemName() + << ". Too many locked nodes for group " << GroupId; + error.Deadline = defaultDeadline; + return false; } + return true; +} - ui32 disksLimit = 0; - if (diskState.State == VDISK_STATE_DOWN) { - disksLimit = 1; - } +bool TMirror3dcCounter::CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo &error, + TInstant &defaultDeadline, bool allowPartial) const +{ + Y_UNUSED(info); - switch (mode) { - case NKikimrCms::MODE_MAX_AVAILABILITY: - if ((LockedVDisksCount + DownVDisksCount + priorityLockedCount) > disksLimit) { - return ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS; - } - break; - case NKikimrCms::MODE_KEEP_AVAILABLE: - if ((LockedVDisksCount + DownVDisksCount + priorityLockedCount) >= disksLimit + 2) { - return ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS; - } - break; - default: - Y_FAIL("Unexpected Availability mode"); + if (HasAlreadyLockedDisks && allowPartial) { + error.Code = TStatus::DISALLOW_TEMP; + error.Reason = "You cannot get two or more disks from the same group at the same time" + " without specifying the PartialPermissionAllowed parameter"; + error.Deadline = defaultDeadline; + return false; } - return ActionState::ACTION_REASON_OK; -} - -ActionState::ActionReason TMirror3dcChecker::TryToLockVDisk(const TVDiskID &vdiskId, EAvailabilityMode mode, i32 priority, ui64 order) const { - Y_VERIFY(DiskToState.contains(vdiskId)); - - const auto& diskState = DiskToState.at(vdiskId); - const auto taskPriority = TVDiskState::TTaskPriority(priority, order, ""); + if (DataCenterDisabledNodes.size() <= 1) + return true; - if (!diskState.Priorities.empty() && taskPriority < *diskState.Priorities.rbegin()) { - return ActionState::ACTION_REASON_LOW_PRIORITY; + if (DataCenterDisabledNodes.size() == 2 + && (DataCenterDisabledNodes.begin()->second <= 1 + || (++DataCenterDisabledNodes.begin())->second <= 1)) + { + return true; } - if (mode == MODE_FORCE_RESTART) { - return ActionState::ACTION_REASON_OK; + if (HasAlreadyLockedDisks && !allowPartial) { + error.Code = TStatus::DISALLOW; + error.Reason = "The request is incorrect: too many disks from the one group. " + "Fix the request or set PartialPermissionAllowed to true"; + return false; } - if (diskState.State == VDISK_STATE_LOCKED - || diskState.State == VDISK_STATE_RESTART) { - return ActionState::ACTION_REASON_ALREADY_LOCKED; + if (DataCenterDisabledNodes.size() > 2) { + error.Code = TStatus::DISALLOW_TEMP; + error.Reason = TStringBuilder() << "Issue in affected group " << GroupId + << ". Too many data centers have unavailable vdisks: " + << DataCenterDisabledNodes.size(); + error.Deadline = defaultDeadline; + return false; } - const std::vector<std::bitset<9>> MaxOkGroups = { - 0x1E0, 0x1D0, 0x1C8, 0x1C4, 0x1C2, 0x1C1, - 0x138, 0xB8, 0x78, 0x3C, 0x3A, 0x39, - 0x107, 0x87, 0x47, 0x27, 0x17, 0xF, - }; + error.Code = TStatus::DISALLOW_TEMP; + error.Reason = TStringBuilder() << "Issue in affected group " << GroupId + << ". Data centers have too many unavailable vdisks"; + error.Deadline = defaultDeadline; - ui32 priorityLockedCount = 0; - std::bitset<9> groupState(0); - for (auto& [id, state] : DiskToState) { - if (id == vdiskId) - continue; - - if (state.State != VDISK_STATE_UP - || (!state.Priorities.empty() && taskPriority < *state.Priorities.rbegin())) { - groupState |= (1 << (id.FailRealm * 3 + id.FailDomain)); - } - - if (!state.Priorities.empty() && taskPriority < *state.Priorities.rbegin()) { - ++priorityLockedCount; - } - } - groupState |= (1 << (vdiskId.FailRealm * 3 + vdiskId.FailDomain)); + return false; +} - ui32 downVDisks = diskState.State == VDISK_STATE_DOWN ? DownVDisksCount - 1 : DownVDisksCount; - if (mode == NKikimrCms::MODE_MAX_AVAILABILITY) { - if ((downVDisks + LockedVDisksCount + priorityLockedCount) == 0) { - return ActionState::ACTION_REASON_OK; - } - return ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS; +void TMirror3dcCounter::CountVDisk(const TVDiskInfo &vdisk, TClusterInfoPtr info, TDuration retryTime, + TDuration duration, TErrorInfo &error) +{ + Y_VERIFY_DEBUG(vdisk.VDiskId != VDisk.VDiskId); + + // Check locks. + TErrorInfo err; + if (IsLocked(vdisk, info, retryTime, duration, err) + || IsDown(vdisk, info, retryTime, err)) { + error.Code = err.Code; + error.Reason = TStringBuilder() << "Issue in affected group " << GroupId + << ". " << err.Reason; + error.Deadline = Max(error.Deadline, err.Deadline); + ++Locked; + ++DataCenterDisabledNodes[vdisk.VDiskId.FailRealm]; } +} - size_t minCount = 9; - for (auto okGroup : MaxOkGroups) { - auto xoredState = (~okGroup) & groupState; - minCount = std::min(minCount, xoredState.count()); +void TMirror3dcCounter::CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo &error) { + for (const auto &vdId : info->BSGroup(GroupId).VDisks) { + if (vdId != VDisk.VDiskId) + CountVDisk(info->VDisk(vdId), info, retryTime, duration, error); } + ++Locked; + ++DataCenterDisabledNodes[VDisk.VDiskId.FailRealm]; - if (minCount > 0) { - return ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS; + if (Locked && error.Code == TStatus::DISALLOW) { + HasAlreadyLockedDisks = true; } - - return ActionState::ACTION_REASON_OK; } -std::string TDefaultErasureChecker::ReadableReason(const TVDiskID &vdiskId, - EAvailabilityMode mode, ActionState::ActionReason reason) const { - std::stringstream readableReason; - - if (reason == ActionState::ACTION_REASON_OK) { - readableReason << "Action is OK"; - return readableReason.str(); +void TDefaultErasureCounter::CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo &error) { + for (const auto &vdId : info->BSGroup(GroupId).VDisks) { + if (vdId != VDisk.VDiskId) + CountVDisk(info->VDisk(vdId), info, retryTime, duration, error); } - - readableReason << "Cannot lock vdisk" << vdiskId.ToString() << ". "; - - switch (reason) { - case ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS: - readableReason << "Group " << GroupId - << " has too many unavailable vdisks. " - << "Down disks count: " << DownVDisksCount - << ". Locked disks count: " << LockedVDisksCount; - - if (mode == NKikimrCms::MODE_KEEP_AVAILABLE) { - readableReason << ". Limit of unavailable disks for mode " << NKikimrCms::EAvailabilityMode_Name(mode) - << " is " << 2; - } - if (mode == NKikimrCms::MODE_MAX_AVAILABILITY) { - readableReason << ". Limit of unavailable disks for mode " << NKikimrCms::EAvailabilityMode_Name(mode) - << " is " << 1; - } - break; - case ActionState::ACTION_REASON_ALREADY_LOCKED: - // TODO:: add info about lock id - readableReason << "Disk is already locked"; - break; - case ActionState::ACTION_REASON_LOW_PRIORITY: - // TODO:: add info about task with higher priority - readableReason << "Task with higher priority in progress"; - break; - default: - Y_FAIL("Unexpected Reason"); + if (Locked && error.Code == TStatus::DISALLOW) { + HasAlreadyLockedDisks = true; } - - return readableReason.str(); + ++Locked; } -std::string TMirror3dcChecker::ReadableReason(const TVDiskID &vdiskId, - EAvailabilityMode mode, ActionState::ActionReason reason) const { - std::stringstream readableReason; - - if (reason == ActionState::ACTION_REASON_OK) { - readableReason << "Action is OK"; - return readableReason.str(); - } - - readableReason << "Cannot lock vdisk" << vdiskId.ToString() << ". "; - - switch (reason) { - case ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS: - readableReason << "Group " << GroupId - << " has too many unavailable vdisks. " - << "Down disks count: " << DownVDisksCount - << ". Locked disks count: " << LockedVDisksCount; - - if (mode == NKikimrCms::MODE_KEEP_AVAILABLE) { - readableReason << ". Limit of unavailable disks for mode " << NKikimrCms::EAvailabilityMode_Name(mode) - << " is 1"; - } - if (mode == NKikimrCms::MODE_MAX_AVAILABILITY) { - readableReason << ". Limit of unavailable disks for mode " << NKikimrCms::EAvailabilityMode_Name(mode) - << "4, 3 of which are in the same data center"; - } - break; - case ActionState::ACTION_REASON_ALREADY_LOCKED: - // TODO:: add info about lock id - readableReason << "Disk is already locked"; - break; - case ActionState::ACTION_REASON_LOW_PRIORITY: - // TODO:: add info about task with higher priority - readableReason << "Task with higher priority in progress"; - break; +TSimpleSharedPtr<IErasureCounter> CreateErasureCounter(TErasureType::EErasureSpecies es, const TVDiskInfo &vdisk, ui32 groupId) { + switch (es) { + case TErasureType::ErasureNone: + case TErasureType::ErasureMirror3: + case TErasureType::Erasure3Plus1Block: + case TErasureType::Erasure3Plus1Stripe: + case TErasureType::Erasure4Plus2Block: + case TErasureType::Erasure3Plus2Block: + case TErasureType::Erasure4Plus2Stripe: + case TErasureType::Erasure3Plus2Stripe: + case TErasureType::ErasureMirror3Plus2: + case TErasureType::Erasure4Plus3Block: + case TErasureType::Erasure4Plus3Stripe: + case TErasureType::Erasure3Plus3Block: + case TErasureType::Erasure3Plus3Stripe: + case TErasureType::Erasure2Plus3Block: + case TErasureType::Erasure2Plus3Stripe: + case TErasureType::Erasure2Plus2Block: + case TErasureType::Erasure2Plus2Stripe: + case TErasureType::ErasureMirror3of4: + return TSimpleSharedPtr<IErasureCounter>(new TDefaultErasureCounter(vdisk, groupId)); + case TErasureType::ErasureMirror3dc: + return TSimpleSharedPtr<IErasureCounter>(new TMirror3dcCounter(vdisk, groupId)); default: - Y_FAIL("Unexpected Reason"); + Y_FAIL("Unknown erasure type: %d", es); } - - return readableReason.str(); } } // namespace NKikimr::NCms diff --git a/ydb/core/cms/erasure_checkers.h b/ydb/core/cms/erasure_checkers.h index c958630e273..0c0de0be766 100644 --- a/ydb/core/cms/erasure_checkers.h +++ b/ydb/core/cms/erasure_checkers.h @@ -1,133 +1,79 @@ #pragma once #include "defs.h" +#include "cluster_info.h" -#include <ydb/core/blobstorage/base/blobstorage_vdiskid.h> #include <ydb/core/erasure/erasure.h> #include <ydb/core/protos/cms.pb.h> -#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h> - -#include <util/generic/queue.h> -#include <util/system/compiler.h> - -#include <algorithm> -#include <functional> -#include <queue> -#include <string> namespace NKikimr::NCms { using namespace NKikimrCms; -class IStorageGroupChecker { -public: - enum EVDiskState : ui32 { - VDISK_STATE_UNSPECIFIED /* "Unspecified" */, - VDISK_STATE_UP /* "Up" */, - VDISK_STATE_LOCKED /* "Locked" */, - VDISK_STATE_RESTART /* "Restart" */, - VDISK_STATE_DOWN /* "Down" */, - VDISK_STATE_SCHEDULED_LOCKED /* "Scheduled locked" */ - }; - -protected: - EVDiskState VDiskState(NKikimrCms::EState state); - +class IErasureCounter { public: - virtual ~IStorageGroupChecker() = default; - - virtual void AddVDisk(const TVDiskID& vdiskId) = 0; - virtual void UpdateVDisk(const TVDiskID& vdiskId, EState state) = 0; - - virtual void LockVDisk(const TVDiskID& vdiskId) = 0; - virtual void UnlockVDisk(const TVDiskID& vdiskId) = 0; + virtual ~IErasureCounter() = default; - virtual void EmplaceTask(const TVDiskID& vdiskId, i32 priority, ui64 order, const std::string& taskUId) = 0; - virtual void RemoveTask(const std::string& taskUId) = 0; - - virtual Ydb::Maintenance::ActionState::ActionReason TryToLockVDisk(const TVDiskID& vdiskId, EAvailabilityMode mode, i32 priority, ui64 order) const = 0; - virtual std::string ReadableReason(const TVDiskID& vdiskId, EAvailabilityMode mode, Ydb::Maintenance::ActionState::ActionReason reason) const = 0; + virtual bool GroupAlreadyHasLockedDisks() const = 0; + virtual bool CheckForMaxAvailability(TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const = 0; + virtual bool CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const = 0; + virtual void CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo& error) = 0; + virtual void CountVDisk(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo& error) = 0; }; -class TErasureCheckerBase : public IStorageGroupChecker { +class TErasureCounterBase: public IErasureCounter { protected: - /** Structure to hold information about vdisk state and priorities and orders of some task. - * - * Requests with equal priority are processed in the order of arrival at CMS. - */ - struct TVDiskState { - public: - struct TTaskPriority { - i32 Priority; - ui64 Order; - std::string TaskUId; - - explicit TTaskPriority(i32 priority, ui64 order, const std::string& taskUId) - : Priority(priority) - , Order(order) - , TaskUId(taskUId) - {} - - bool operator<(const TTaskPriority& rhs) const { - return Priority < rhs.Priority || (Priority == rhs.Priority && Order > rhs.Order); - } - }; - public: - EVDiskState State; - std::set<TTaskPriority> Priorities; - }; + ui32 Down; + ui32 Locked; + const TVDiskInfo& VDisk; + const ui32 GroupId; + bool HasAlreadyLockedDisks; protected: - ui32 GroupId; - - THashMap<TVDiskID, TVDiskState> DiskToState; - ui32 DownVDisksCount; - ui32 LockedVDisksCount; + bool IsDown(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration& retryTime, TErrorInfo& error); + bool IsLocked(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration& retryTime, TDuration& duration, TErrorInfo& error); public: - explicit TErasureCheckerBase(ui32 groupId) - : GroupId(groupId) - , DownVDisksCount(0) - , LockedVDisksCount(0) + TErasureCounterBase(const TVDiskInfo& vdisk, ui32 groupId) + : Down(0) + , Locked(0) + , VDisk(vdisk) + , GroupId(groupId) + , HasAlreadyLockedDisks(false) { } - virtual ~TErasureCheckerBase() = default; - void AddVDisk(const TVDiskID& vdiskId) override final; - void UpdateVDisk(const TVDiskID& vdiskId, EState state) override final; - - void LockVDisk(const TVDiskID& vdiskId) override final; - void UnlockVDisk(const TVDiskID& vdiskId) override final; - - void EmplaceTask(const TVDiskID &vdiskId, i32 priority, ui64 order, - const std::string &taskUId) override final; - void RemoveTask(const std::string &taskUId) override final; + bool GroupAlreadyHasLockedDisks() const final; + bool CheckForMaxAvailability(TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const final; }; -class TDefaultErasureChecker : public TErasureCheckerBase { +class TDefaultErasureCounter: public TErasureCounterBase { public: - explicit TDefaultErasureChecker(ui32 groupId) - : TErasureCheckerBase(groupId) - {} - - virtual Ydb::Maintenance::ActionState::ActionReason TryToLockVDisk(const TVDiskID& vdiskId, EAvailabilityMode mode, i32 priority, ui64 order) const override final; - - virtual std::string ReadableReason(const TVDiskID &vdiskId, - EAvailabilityMode mode, Ydb::Maintenance::ActionState::ActionReason reason) const override final; + TDefaultErasureCounter(const TVDiskInfo& vdisk, ui32 groupId) + : TErasureCounterBase(vdisk, groupId) + { + } + + void CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo &error) override; + bool CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const override; + void CountVDisk(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo& error) override; }; -class TMirror3dcChecker : public TErasureCheckerBase { -public: - explicit TMirror3dcChecker(ui32 groupId) - : TErasureCheckerBase(groupId) - {} +class TMirror3dcCounter: public TErasureCounterBase { +private: + THashMap<ui8, ui32> DataCenterDisabledNodes; - virtual Ydb::Maintenance::ActionState::ActionReason TryToLockVDisk(const TVDiskID& vdiskId, EAvailabilityMode mode, i32 priority, ui64 order) const override final; +public: + TMirror3dcCounter(const TVDiskInfo& vdisk, ui32 groupId) + : TErasureCounterBase(vdisk, groupId) + { + } - virtual std::string ReadableReason(const TVDiskID &vdiskId, - EAvailabilityMode mode, Ydb::Maintenance::ActionState::ActionReason reason) const override final; + void CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo &error) override; + bool CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const override; + void CountVDisk(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo& error) override; }; -TSimpleSharedPtr<IStorageGroupChecker> CreateStorageGroupChecker(TErasureType::EErasureSpecies es, ui32 groupId); +TSimpleSharedPtr<IErasureCounter> CreateErasureCounter(TErasureType::EErasureSpecies es, const TVDiskInfo& vdisk, ui32 groupId); } // namespace NKikimr::NCms diff --git a/ydb/core/cms/node_checkers.cpp b/ydb/core/cms/node_checkers.cpp index 9be56b669a4..8a857b0a63f 100644 --- a/ydb/core/cms/node_checkers.cpp +++ b/ydb/core/cms/node_checkers.cpp @@ -1,6 +1,4 @@ #include "node_checkers.h" -#include "util/system/yassert.h" -#include "ydb/public/api/protos/draft/ydb_maintenance.pb.h" #include <ydb/core/protos/cms.pb.h> @@ -11,8 +9,6 @@ namespace NKikimr::NCms { #define NCH_LOG_D(stream) LOG_DEBUG_S (*TlsActivationContext, NKikimrServices::CMS, "[Nodes Counter] " << stream) #define NCH_LOG_T(stream) LOG_TRACE_S (*TlsActivationContext, NKikimrServices::CMS, "[Nodes Counter] " << stream) -using namespace Ydb::Maintenance; - TNodesLimitsCounterBase::ENodeState INodesChecker::NodeState(NKikimrCms::EState state) { switch (state) { case NKikimrCms::UP: @@ -32,7 +28,7 @@ void TNodesCounterBase::AddNode(ui32 nodeId) { if (NodeToState.contains(nodeId)) { return; } - NodeToState[nodeId].State = NODE_STATE_UNSPECIFIED; + NodeToState[nodeId] = NODE_STATE_UNSPECIFIED; } void TNodesCounterBase::UpdateNode(ui32 nodeId, NKikimrCms::EState state) { @@ -40,17 +36,17 @@ void TNodesCounterBase::UpdateNode(ui32 nodeId, NKikimrCms::EState state) { AddNode(nodeId); } - if (NodeToState[nodeId].State == NODE_STATE_DOWN) { + if (NodeToState[nodeId] == NODE_STATE_DOWN) { --DownNodesCount; } - if (NodeToState[nodeId].State == NODE_STATE_LOCKED || - NodeToState[nodeId].State == NODE_STATE_RESTART) { + if (NodeToState[nodeId] == NODE_STATE_LOCKED || + NodeToState[nodeId] == NODE_STATE_RESTART) { --LockedNodesCount; } const auto nodeState = NodeState(state); - NodeToState[nodeId].State = nodeState; + NodeToState[nodeId] = nodeState; if (nodeState == NODE_STATE_RESTART || nodeState == NODE_STATE_LOCKED) { ++LockedNodesCount; @@ -65,11 +61,11 @@ void TNodesCounterBase::LockNode(ui32 nodeId) { Y_VERIFY(NodeToState.contains(nodeId)); ++LockedNodesCount; - if (NodeToState[nodeId].State == NODE_STATE_DOWN) { - NodeToState[nodeId].State = NODE_STATE_RESTART; + if (NodeToState[nodeId] == NODE_STATE_DOWN) { + NodeToState[nodeId] = NODE_STATE_RESTART; --DownNodesCount; } else { - NodeToState[nodeId].State = NODE_STATE_LOCKED; + NodeToState[nodeId] = NODE_STATE_LOCKED; } } @@ -77,144 +73,89 @@ void TNodesCounterBase::UnlockNode(ui32 nodeId) { Y_VERIFY(NodeToState.contains(nodeId)); --LockedNodesCount; - if (NodeToState[nodeId].State == NODE_STATE_RESTART) { - NodeToState[nodeId].State = NODE_STATE_DOWN; + if (NodeToState[nodeId] == NODE_STATE_RESTART) { + NodeToState[nodeId] = NODE_STATE_DOWN; ++DownNodesCount; } else { - NodeToState[nodeId].State = NODE_STATE_UP; - } -} - -void TNodesCounterBase::EmplaceTask(const ui32 nodeId, i32 priority, ui64 order, const std::string& taskUId) { - auto& priorities = NodeToState[nodeId].Priorities; - auto it = priorities.lower_bound(TNodeState::TTaskPriority(priority, order, "")); - - if (it != priorities.end() && (it->Order == order && it->Priority == priority)) { - if (it->TaskUId == taskUId) { - return; - } - Y_FAIL("Task with the same priority and order already exists"); - } else { - priorities.emplace_hint(it, priority, order, taskUId); - } - - NodesWithScheduledTasks.insert(nodeId); -} - -void TNodesCounterBase::RemoveTask(const std::string& taskUId) { - auto taskUIdsEqual = [&taskUId](const TNodeState::TTaskPriority &p) { - return p.TaskUId == taskUId; - }; - - TVector<ui32> NodesToRemove; - for (auto nodeId : NodesWithScheduledTasks) { - auto& nodeState = NodeToState[nodeId]; - auto it = std::find_if(nodeState.Priorities.begin(), - nodeState.Priorities.end(), taskUIdsEqual); - if (it == nodeState.Priorities.end()) { - continue; - } - - nodeState.Priorities.erase(it); - - if (nodeState.Priorities.empty()) { - NodesToRemove.push_back(nodeId); - } - } - - for (auto nodeId : NodesToRemove) { - NodesWithScheduledTasks.erase(nodeId); + NodeToState[nodeId] = NODE_STATE_UP; } } -ActionState::ActionReason TNodesLimitsCounterBase::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, i32 priority, ui64 order) const { +bool TNodesLimitsCounterBase::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const { Y_VERIFY(NodeToState.contains(nodeId)); + auto nodeState = NodeToState.at(nodeId); - const auto& nodeState = NodeToState.at(nodeId); - const auto taskPriority = TNodeState::TTaskPriority(priority, order, ""); - - if (!nodeState.Priorities.empty() && (taskPriority < *nodeState.Priorities.rbegin())) { - return ActionState::ACTION_REASON_LOW_PRIORITY; - } + bool isForceRestart = mode == NKikimrCms::MODE_FORCE_RESTART; - if (nodeState.State == NODE_STATE_RESTART || - nodeState.State == NODE_STATE_LOCKED || - nodeState.State == NODE_STATE_UNSPECIFIED) { + NCH_LOG_D("Checking Node: " + << nodeId << ", with state: " << ToString(nodeState) + << ", with limit: " << DisabledNodesLimit + << ", with ratio limit: " << DisabledNodesRatioLimit + << ", locked nodes: " << LockedNodesCount + << ", down nodes: " << DownNodesCount); - return ActionState::ACTION_REASON_ALREADY_LOCKED; + // Allow to maintain down/unavailable node + if (nodeState == NODE_STATE_DOWN) { + return true; } - ui32 priorityLockedCount = 0; - for (auto id : NodesWithScheduledTasks) { - Y_VERIFY(!NodeToState.at(id).Priorities.empty()); + if (nodeState == NODE_STATE_RESTART || + nodeState == NODE_STATE_LOCKED || + nodeState == NODE_STATE_UNSPECIFIED) { - if (taskPriority < *NodeToState.at(id).Priorities.rbegin()) { - ++priorityLockedCount; - } + return false; } - ui32 downNodes = nodeState.State == NODE_STATE_DOWN ? DownNodesCount - 1 : DownNodesCount; // Always allow at least one node - if (LockedNodesCount + downNodes + priorityLockedCount == 0) { - return ActionState::ACTION_REASON_OK; + if (LockedNodesCount + DownNodesCount == 0) { + return true; } - bool isForceRestart = mode == NKikimrCms::MODE_FORCE_RESTART; - if (isForceRestart && !LockedNodesCount) { - return ActionState::ACTION_REASON_OK; + return true; } if (DisabledNodesLimit > 0 && - (LockedNodesCount + downNodes + priorityLockedCount + 1 > DisabledNodesLimit)) { - return ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED; + (LockedNodesCount + DownNodesCount + 1 > DisabledNodesLimit)) { + return false; } if (DisabledNodesRatioLimit > 0 && - ((LockedNodesCount + downNodes + priorityLockedCount + 1) * 100 > (NodeToState.size() * DisabledNodesRatioLimit))) { - return ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED; + ((LockedNodesCount + DownNodesCount + 1) * 100 > + (NodeToState.size() * DisabledNodesRatioLimit))) { + return false; } - return ActionState::ACTION_REASON_OK; + return true; } -ActionState::ActionReason TSysTabletsNodesCounter::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, i32 priority, ui64 order) const { +bool TSysTabletsNodesCounter::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const { Y_VERIFY(NodeToState.contains(nodeId)); + auto nodeState = NodeToState.at(nodeId); - const auto& nodeState = NodeToState.at(nodeId); - const auto taskPriority = TNodeState::TTaskPriority(priority, order, ""); + NCH_LOG_D("Checking limits for sys tablet: " << NKikimrConfig::TBootstrap_ETabletType_Name(TabletType) + << ", on node: " << nodeId + << ", with state: " << ToString(nodeState) + << ", locked nodes: " << LockedNodesCount + << ", down nodes: " << DownNodesCount); - if (!nodeState.Priorities.empty() && (taskPriority < *nodeState.Priorities.rbegin())) { - return ActionState::ACTION_REASON_LOW_PRIORITY; - } - - if (nodeState.State == NODE_STATE_RESTART || - nodeState.State == NODE_STATE_LOCKED || - nodeState.State == NODE_STATE_UNSPECIFIED) { - - return ActionState::ACTION_REASON_ALREADY_LOCKED; - } - - ui32 priorityLockedCount = 0; - for (auto id : NodesWithScheduledTasks) { - Y_VERIFY(!NodeToState.at(id).Priorities.empty()); + if (nodeState == NODE_STATE_RESTART || + nodeState == NODE_STATE_LOCKED || + nodeState == NODE_STATE_UNSPECIFIED) { - if (taskPriority < *NodeToState.at(id).Priorities.rbegin()) { - ++priorityLockedCount; - } + return false; } - ui32 downNodes = nodeState.State == NODE_STATE_DOWN ? DownNodesCount - 1 : DownNodesCount; ui32 tabletNodes = NodeToState.size(); switch (mode) { case NKikimrCms::MODE_MAX_AVAILABILITY: - if (tabletNodes > 1 && (downNodes + LockedNodesCount + priorityLockedCount + 1) * 2 > tabletNodes){ - return ActionState::ACTION_REASON_SYS_TABLETS_NODE_LIMIT_REACHED; + if (tabletNodes > 1 && (DownNodesCount + LockedNodesCount + 1) * 2 > tabletNodes){ + return false; } break; case NKikimrCms::MODE_KEEP_AVAILABLE: - if (tabletNodes > 1 && (downNodes + LockedNodesCount + priorityLockedCount + 1) > tabletNodes - 1) { - return ActionState::ACTION_REASON_SYS_TABLETS_NODE_LIMIT_REACHED; + if (tabletNodes > 1 && (DownNodesCount + LockedNodesCount + 1) > tabletNodes - 1) { + return false; } break; case NKikimrCms::MODE_FORCE_RESTART: @@ -223,124 +164,7 @@ ActionState::ActionReason TSysTabletsNodesCounter::TryToLockNode(ui32 nodeId, NK Y_FAIL("Unknown availability mode"); } - return ActionState::ACTION_REASON_OK; + return true; } -std::string TTenantLimitsCounter::ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, - ActionState::ActionReason reason) const { - Y_UNUSED(mode); - - std::stringstream readableReason; - - if (reason == ActionState::ACTION_REASON_OK) { - readableReason << "Action is OK"; - return readableReason.str(); - } - - readableReason << "Cannot lock node: " << nodeId; - - switch (reason) { - case ActionState::ACTION_REASON_ALREADY_LOCKED: - readableReason << "Node is already locked"; - break; - case ActionState::ACTION_REASON_LOW_PRIORITY: - readableReason << "Task with higher priority in progress: " << (*NodeToState.at(nodeId).Priorities.rbegin()).TaskUId; - break; - case ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED: - readableReason << ". Too many locked nodes for tenant " << TenantName - << "; locked: " << LockedNodesCount - << "; down: " << DownNodesCount - << "; total: " << NodeToState.size() - << "; limit: " << DisabledNodesLimit - << "; ratio limit: " << DisabledNodesRatioLimit << "%"; - break; - default: - Y_FAIL("Unexpected reason"); - break; - } - return readableReason.str(); -} - -std::string TClusterLimitsCounter::ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, - ActionState::ActionReason reason) const { - Y_UNUSED(mode); - - std::stringstream readableReason; - - if (reason == ActionState::ACTION_REASON_OK) { - readableReason << "Action is OK"; - return readableReason.str(); - } - - if (mode == NKikimrCms::MODE_FORCE_RESTART) { - return readableReason.str(); - } - - readableReason << "Cannot lock node: " << nodeId; - - switch (reason) { - case ActionState::ACTION_REASON_ALREADY_LOCKED: - readableReason << "Node is already locked"; - break; - case ActionState::ACTION_REASON_LOW_PRIORITY: - readableReason << "Task with higher priority in progress: " << (*NodeToState.at(nodeId).Priorities.rbegin()).TaskUId; - break; - case ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED: - readableReason << ". Too many locked nodes in cluster" - << "; locked: " << LockedNodesCount - << "; down: " << DownNodesCount - << "; total: " << NodeToState.size() - << "; limit: " << DisabledNodesLimit - << "; ratio limit: " << DisabledNodesRatioLimit << "%"; - break; - default: - Y_FAIL("Unexpected reason"); - break; - } - - return readableReason.str(); -} - - -std::string TSysTabletsNodesCounter::ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, - ActionState::ActionReason reason) const { - std::stringstream readableReason; - - if (reason == ActionState::ACTION_REASON_OK) { - readableReason << "Action is OK"; - return readableReason.str(); - } - - if (mode == NKikimrCms::MODE_FORCE_RESTART) { - return readableReason.str(); - } - - switch (reason) { - case ActionState::ACTION_REASON_ALREADY_LOCKED: - readableReason << "Node is already locked"; - break; - case ActionState::ACTION_REASON_LOW_PRIORITY: - readableReason << "Task with higher priority in progress: " << (*NodeToState.at(nodeId).Priorities.rbegin()).TaskUId; - break; - case ActionState::ACTION_REASON_SYS_TABLETS_NODE_LIMIT_REACHED: - readableReason << "Cannot lock node: " << nodeId << ". Tablet " - << NKikimrConfig::TBootstrap_ETabletType_Name(TabletType) - << " has too many unavailable nodes. Locked: " - << LockedNodesCount << ". Down: " << DownNodesCount; - - if (mode == NKikimrCms::MODE_MAX_AVAILABILITY) { - readableReason << ". Limit: " << NodeToState.size() / 2 << " (50%)"; - } - - if (mode == NKikimrCms::MODE_KEEP_AVAILABLE) { - readableReason << ". Limit: " << NodeToState.size() - 1; - } - break; - default: - Y_FAIL("Unexpected reason"); - - } - - return readableReason.str(); -} } // namespace NKikimr::NCms diff --git a/ydb/core/cms/node_checkers.h b/ydb/core/cms/node_checkers.h index 047a7a5c5ca..72362c84dba 100644 --- a/ydb/core/cms/node_checkers.h +++ b/ydb/core/cms/node_checkers.h @@ -6,7 +6,6 @@ #include <ydb/core/erasure/erasure.h> #include <ydb/core/protos/cms.pb.h> #include <ydb/core/protos/config.pb.h> -#include <ydb/public/api/protos/draft/ydb_maintenance.pb.h> #include <library/cpp/actors/core/log.h> @@ -37,7 +36,6 @@ public: NODE_STATE_DOWN /* "Down" */ }; - protected: static ENodeState NodeState(NKikimrCms::EState state); @@ -50,12 +48,9 @@ public: virtual void LockNode(ui32 nodeId) = 0; virtual void UnlockNode(ui32 nodeId) = 0; - virtual void EmplaceTask(const ui32 nodeId, i32 priority, ui64 order, const std::string& taskUId) = 0; - virtual void RemoveTask(const std::string& taskUId) = 0; - - virtual Ydb::Maintenance::ActionState::ActionReason TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, i32 priority, ui64 order) const = 0; + virtual bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const = 0; - virtual std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, Ydb::Maintenance::ActionState::ActionReason reason) const = 0; + virtual std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const = 0; }; /** @@ -63,35 +58,7 @@ public: */ class TNodesCounterBase : public INodesChecker { protected: - /** Structure to hold information about vdisk state and priorities and orders of some task. - * - * Requests with equal priority are processed in the order of arrival at CMS. - */ - struct TNodeState { - public: - struct TTaskPriority { - i32 Priority; - ui64 Order; - std::string TaskUId; - - explicit TTaskPriority(i32 priority, ui64 order, const std::string& taskUId) - : Priority(priority) - , Order(order) - , TaskUId(taskUId) - {} - - bool operator<(const TTaskPriority& rhs) const { - return Priority < rhs.Priority || (Priority == rhs.Priority && Order > rhs.Order); - } - }; - public: - ENodeState State; - std::set<TTaskPriority> Priorities; - }; - -protected: - THashMap<ui32, TNodeState> NodeToState; - THashSet<ui32> NodesWithScheduledTasks; + THashMap<ui32, ENodeState> NodeToState; ui32 LockedNodesCount; ui32 DownNodesCount; @@ -106,9 +73,6 @@ public: void AddNode(ui32 nodeId) override; void UpdateNode(ui32 nodeId, NKikimrCms::EState) override; - void EmplaceTask(const ui32 nodeId, i32 priority, ui64 order, const std::string& taskUId) override final; - virtual void RemoveTask(const std::string& taskUId) override final; - void LockNode(ui32 nodeId) override; void UnlockNode(ui32 nodeId) override; }; @@ -139,7 +103,7 @@ public: DisabledNodesRatioLimit = ratioLimit; } - Ydb::Maintenance::ActionState::ActionReason TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, i32 priority, ui64 order) const override final; + bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final; }; class TTenantLimitsCounter : public TNodesLimitsCounterBase { @@ -153,8 +117,20 @@ public: { } - std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, - Ydb::Maintenance::ActionState::ActionReason reason) const override final; + std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final { + Y_UNUSED(mode); + + std::stringstream reason; + reason << "Cannot lock node: " << nodeId + << ". Too many locked nodes for tenant " << TenantName + << "; locked: " << LockedNodesCount + << "; down: " << DownNodesCount + << "; total: " << NodeToState.size() + << "; limit: " << DisabledNodesLimit + << "; ratio limit: " << DisabledNodesRatioLimit << "%"; + + return reason.str(); + } }; class TClusterLimitsCounter : public TNodesLimitsCounterBase { @@ -164,8 +140,20 @@ public: { } - std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, - Ydb::Maintenance::ActionState::ActionReason reason) const override final; + std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final { + Y_UNUSED(mode); + + std::stringstream reason; + reason << "Cannot lock node: " << nodeId + <<". Too many locked nodes in cluster" + << "; locked: " << LockedNodesCount + << "; down: " << DownNodesCount + << "; total: " << NodeToState.size() + << "; limit: " << DisabledNodesLimit + << "; ratio limit: " << DisabledNodesRatioLimit << "%"; + + return reason.str(); + } }; /** @@ -183,10 +171,30 @@ public: : TabletType(tabletType) {} - Ydb::Maintenance::ActionState::ActionReason TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, i32 priority, ui64 order) const override final; + bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final; + + std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode) const override final { + std::stringstream reason; - std::string ReadableReason(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, - Ydb::Maintenance::ActionState::ActionReason reason) const override final; + if (mode == NKikimrCms::MODE_FORCE_RESTART) { + return reason.str(); + } + + reason << "Cannot lock node: " << nodeId + << ". Tablet " + << NKikimrConfig::TBootstrap_ETabletType_Name(TabletType) + << " has too many unavailable nodes. Locked: " << LockedNodesCount + << ". Down: " << DownNodesCount; + if (mode == NKikimrCms::MODE_MAX_AVAILABILITY) { + reason << ". Limit: " << NodeToState.size() / 2 << " (50%)"; + } + + if (mode == NKikimrCms::MODE_KEEP_AVAILABLE) { + reason << ". Limit: " << NodeToState.size() - 1; + } + + return reason.str(); + } }; } // namespace NKikimr::NCms diff --git a/ydb/core/cms/ut/CMakeLists.darwin-x86_64.txt b/ydb/core/cms/ut/CMakeLists.darwin-x86_64.txt index 1cff84774ce..922dc62a7b9 100644 --- a/ydb/core/cms/ut/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/cms/ut/CMakeLists.darwin-x86_64.txt @@ -32,7 +32,6 @@ target_link_options(ydb-core-cms-ut PRIVATE CoreFoundation ) target_sources(ydb-core-cms-ut PRIVATE - ${CMAKE_SOURCE_DIR}/ydb/core/cms/checkers_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/core/cms/cluster_info_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_tenants_ut.cpp diff --git a/ydb/core/cms/ut/CMakeLists.linux-aarch64.txt b/ydb/core/cms/ut/CMakeLists.linux-aarch64.txt index 890df0f266e..64ee05e5931 100644 --- a/ydb/core/cms/ut/CMakeLists.linux-aarch64.txt +++ b/ydb/core/cms/ut/CMakeLists.linux-aarch64.txt @@ -35,7 +35,6 @@ target_link_options(ydb-core-cms-ut PRIVATE -ldl ) target_sources(ydb-core-cms-ut PRIVATE - ${CMAKE_SOURCE_DIR}/ydb/core/cms/checkers_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/core/cms/cluster_info_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_tenants_ut.cpp diff --git a/ydb/core/cms/ut/CMakeLists.linux-x86_64.txt b/ydb/core/cms/ut/CMakeLists.linux-x86_64.txt index 3d68909dc52..ede8d036c2a 100644 --- a/ydb/core/cms/ut/CMakeLists.linux-x86_64.txt +++ b/ydb/core/cms/ut/CMakeLists.linux-x86_64.txt @@ -36,7 +36,6 @@ target_link_options(ydb-core-cms-ut PRIVATE -ldl ) target_sources(ydb-core-cms-ut PRIVATE - ${CMAKE_SOURCE_DIR}/ydb/core/cms/checkers_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/core/cms/cluster_info_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_tenants_ut.cpp diff --git a/ydb/core/cms/ut/CMakeLists.windows-x86_64.txt b/ydb/core/cms/ut/CMakeLists.windows-x86_64.txt index 29a6d9015d2..1dbbee1959a 100644 --- a/ydb/core/cms/ut/CMakeLists.windows-x86_64.txt +++ b/ydb/core/cms/ut/CMakeLists.windows-x86_64.txt @@ -25,7 +25,6 @@ target_link_libraries(ydb-core-cms-ut PUBLIC core-testlib-default ) target_sources(ydb-core-cms-ut PRIVATE - ${CMAKE_SOURCE_DIR}/ydb/core/cms/checkers_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/core/cms/cluster_info_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/core/cms/cms_tenants_ut.cpp diff --git a/ydb/public/api/protos/draft/ydb_maintenance.proto b/ydb/public/api/protos/draft/ydb_maintenance.proto index 332f03e3a63..df66577709d 100644 --- a/ydb/public/api/protos/draft/ydb_maintenance.proto +++ b/ydb/public/api/protos/draft/ydb_maintenance.proto @@ -86,17 +86,11 @@ message ActionState { // State storage broken. Too many (more than (nToSelect - 1) / 2) unavailable rings ACTION_REASON_STATE_STORAGE_BROKEN = 5; // Issue in cluster disabled nodes limit - ACTION_REASON_DISABLED_NODES_LIMIT_REACHED = 6; + ACTION_REASON_DISABLED_NODES_LIMIT_RICHED = 6; // Issue in tenant limits - ACTION_REASON_TENANT_DISABLED_NODES_LIMIT_REACHED = 7; + ACTION_REASON_TENANT_DISABLED_NODES_LIMIT_RICHED = 7; // Wrong request ACTION_REASON_WRONG_REQUEST = 8; - // Low priority - ACTION_REASON_LOW_PRIORITY = 9; - // Lock is granded to this item - ACTION_REASON_ALREADY_LOCKED = 10; - // Limit to nodes with sys tablets - ACTION_REASON_SYS_TABLETS_NODE_LIMIT_REACHED = 11; } Action action = 1; |