diff options
author | Alexander Rutkovsky <alexvru@mail.ru> | 2022-03-28 18:57:23 +0300 |
---|---|---|
committer | Alexander Rutkovsky <alexvru@mail.ru> | 2022-03-28 18:57:23 +0300 |
commit | 7c332b6a8b33c7d576d976c28500ea767fd091d2 (patch) | |
tree | 05a212358bf403d62c1bfcf60ec94518b2c0f5a8 | |
parent | 0e5c7eb9d835a56022a6cb3d78002c332c6bee35 (diff) | |
download | ydb-7c332b6a8b33c7d576d976c28500ea767fd091d2.tar.gz |
Support DECOMMIT_* status in BS_CONTROLLER KIKIMR-14580
ref:b87b05de67cadc810f3d9b3f4ee9b2e9c839b8a3
-rw-r--r-- | ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp | 31 | ||||
-rw-r--r-- | ydb/core/blobstorage/ut_blobstorage/CMakeLists.darwin.txt | 1 | ||||
-rw-r--r-- | ydb/core/blobstorage/ut_blobstorage/CMakeLists.linux.txt | 1 | ||||
-rw-r--r-- | ydb/core/blobstorage/ut_blobstorage/decommit_3dc.cpp | 73 | ||||
-rw-r--r-- | ydb/core/blobstorage/ut_blobstorage/lib/env.h | 12 | ||||
-rw-r--r-- | ydb/core/cms/sentinel.cpp | 4 | ||||
-rw-r--r-- | ydb/core/cms/sentinel_ut.cpp | 4 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/cmds_drive_status.cpp | 5 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/config_fit_groups.cpp | 18 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/error.h | 12 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/group_mapper.cpp | 44 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/group_mapper.h | 15 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/group_mapper_ut.cpp | 13 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/impl.h | 25 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/sys_view.cpp | 13 | ||||
-rw-r--r-- | ydb/core/protos/blobstorage_config.proto | 7 | ||||
-rw-r--r-- | ydb/core/util/testactorsys.cpp | 6 | ||||
-rw-r--r-- | ydb/core/util/testactorsys.h | 2 |
18 files changed, 227 insertions, 59 deletions
diff --git a/ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp b/ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp index 1e84f5d8f69..86cc23e1e90 100644 --- a/ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp +++ b/ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp @@ -1,4 +1,5 @@ #include "pdisk_mock.h" +#include <ydb/core/blobstorage/base/blobstorage_events.h> #include <ydb/core/util/stlog.h> #include <ydb/core/util/interval_set.h> @@ -238,7 +239,7 @@ TPDiskMockState::TPtr TPDiskMockState::Snapshot() { return res; } -class TPDiskMockActor : public TActor<TPDiskMockActor> { +class TPDiskMockActor : public TActorBootstrapped<TPDiskMockActor> { enum { EvResume = EventSpaceBegin(TEvents::ES_PRIVATE), }; @@ -251,8 +252,7 @@ class TPDiskMockActor : public TActor<TPDiskMockActor> { public: TPDiskMockActor(TPDiskMockState::TPtr state) - : TActor(&TThis::StateFunc) - , State(std::move(state)) // to keep ownership + : State(std::move(state)) // to keep ownership , Impl(*State->Impl) , Prefix(TStringBuilder() << "PDiskMock[" << Impl.NodeId << ":" << Impl.PDiskId << "] ") { @@ -263,6 +263,30 @@ public: } } + void Bootstrap() { + Become(&TThis::StateFunc); + ReportMetrics(); + } + + void ReportMetrics() { + ui32 usedChunks = 0; + for (const auto& [ownerId, owner] : Impl.Owners) { + usedChunks += owner.CommittedChunks.size() + owner.ReservedChunks.size(); + } + Y_VERIFY(usedChunks <= Impl.TotalChunks); + + auto ev = std::make_unique<TEvBlobStorage::TEvControllerUpdateDiskStatus>(); + auto& record = ev->Record; + auto *p = record.AddPDisksMetrics(); + p->SetPDiskId(Impl.PDiskId); + p->SetAvailableSize((Impl.TotalChunks - usedChunks) * Impl.ChunkSize); + p->SetTotalSize(Impl.TotalChunks * Impl.ChunkSize); + p->SetState(NKikimrBlobStorage::TPDiskState::Normal); + Send(MakeBlobStorageNodeWardenID(SelfId().NodeId()), ev.release()); + + Schedule(TDuration::Seconds(5), new TEvents::TEvWakeup); + } + void Handle(NPDisk::TEvYardInit::TPtr ev) { // report message and validate PDisk guid auto *msg = ev->Get(); @@ -663,6 +687,7 @@ public: hFunc(NPDisk::TEvSlay, Handle); hFunc(NPDisk::TEvHarakiri, Handle); hFunc(NPDisk::TEvConfigureScheduler, Handle); + cFunc(TEvents::TSystem::Wakeup, ReportMetrics); ) }; diff --git a/ydb/core/blobstorage/ut_blobstorage/CMakeLists.darwin.txt b/ydb/core/blobstorage/ut_blobstorage/CMakeLists.darwin.txt index e7fffd6a221..33829c5e789 100644 --- a/ydb/core/blobstorage/ut_blobstorage/CMakeLists.darwin.txt +++ b/ydb/core/blobstorage/ut_blobstorage/CMakeLists.darwin.txt @@ -23,6 +23,7 @@ target_link_libraries(ydb-core-blobstorage-ut_blobstorage PUBLIC target_sources(ydb-core-blobstorage-ut_blobstorage PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/block_race.cpp ${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/counting_events.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/decommit_3dc.cpp ${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/defrag.cpp ${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/donor.cpp ${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/encryption.cpp diff --git a/ydb/core/blobstorage/ut_blobstorage/CMakeLists.linux.txt b/ydb/core/blobstorage/ut_blobstorage/CMakeLists.linux.txt index 50b835923e5..5a514146bfb 100644 --- a/ydb/core/blobstorage/ut_blobstorage/CMakeLists.linux.txt +++ b/ydb/core/blobstorage/ut_blobstorage/CMakeLists.linux.txt @@ -24,6 +24,7 @@ target_link_libraries(ydb-core-blobstorage-ut_blobstorage PUBLIC target_sources(ydb-core-blobstorage-ut_blobstorage PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/block_race.cpp ${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/counting_events.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/decommit_3dc.cpp ${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/defrag.cpp ${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/donor.cpp ${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/encryption.cpp diff --git a/ydb/core/blobstorage/ut_blobstorage/decommit_3dc.cpp b/ydb/core/blobstorage/ut_blobstorage/decommit_3dc.cpp new file mode 100644 index 00000000000..6244762920f --- /dev/null +++ b/ydb/core/blobstorage/ut_blobstorage/decommit_3dc.cpp @@ -0,0 +1,73 @@ +#include <ydb/core/blobstorage/ut_blobstorage/lib/env.h> + +Y_UNIT_TEST_SUITE(Decommit3dc) { + Y_UNIT_TEST(Test) { + TEnvironmentSetup env{{ + .NodeCount = 12, + .Erasure = TBlobStorageGroupType::ErasureMirror3dc, + .NumDataCenters = 4, + }}; + + { + NKikimrBlobStorage::TConfigRequest request; + auto *cmd = request.AddCommand(); + auto *us = cmd->MutableUpdateSettings(); + us->AddEnableDonorMode(true); + us->AddEnableSelfHeal(true); + auto response = env.Invoke(request); + UNIT_ASSERT_C(response.GetSuccess(), response.GetErrorDescription()); + } + + env.CreateBoxAndPool(1, 1); + env.Sim(TDuration::Seconds(30)); + auto config = env.FetchBaseConfig(); + + std::set<ui32> nodesToSettle; + TString datacenterToSettle; + for (const auto& node : config.GetNode()) { + const auto& location = node.GetLocation(); + if (!datacenterToSettle) { + datacenterToSettle = location.GetDataCenter(); + } + if (datacenterToSettle == location.GetDataCenter()) { + nodesToSettle.insert(node.GetNodeId()); + } + } + + NKikimrBlobStorage::TConfigRequest request; + + std::set<std::pair<ui32, ui32>> pdisksToSettle; + for (const auto& pdisk : config.GetPDisk()) { + if (nodesToSettle.count(pdisk.GetNodeId())) { + pdisksToSettle.emplace(pdisk.GetNodeId(), pdisk.GetPDiskId()); + auto *cmd = request.AddCommand(); + auto *ds = cmd->MutableUpdateDriveStatus(); + ds->MutableHostKey()->SetNodeId(pdisk.GetNodeId()); + ds->SetPDiskId(pdisk.GetPDiskId()); + ds->SetStatus(NKikimrBlobStorage::EDriveStatus::DECOMMIT_PENDING); + } + } + + auto response = env.Invoke(request); + UNIT_ASSERT_C(response.GetSuccess(), response.GetErrorDescription()); + + std::set<std::pair<ui32, ui32>> movedOutPDisks; + for (const auto& [nodeId, pdiskId] : pdisksToSettle) { + request.Clear(); + auto *cmd = request.AddCommand(); + auto *ds = cmd->MutableUpdateDriveStatus(); + ds->MutableHostKey()->SetNodeId(nodeId); + ds->SetPDiskId(pdiskId); + ds->SetStatus(NKikimrBlobStorage::EDriveStatus::DECOMMIT_IMMINENT); + movedOutPDisks.emplace(nodeId, pdiskId); + auto response = env.Invoke(request); + UNIT_ASSERT_C(response.GetSuccess(), response.GetErrorDescription()); + env.Sim(TDuration::Seconds(60)); + auto config = env.FetchBaseConfig(); + for (const auto& vslot : config.GetVSlot()) { + const auto& vslotId = vslot.GetVSlotId(); + UNIT_ASSERT(!movedOutPDisks.count({vslotId.GetNodeId(), vslotId.GetPDiskId()})); + } + } + } +} diff --git a/ydb/core/blobstorage/ut_blobstorage/lib/env.h b/ydb/core/blobstorage/ut_blobstorage/lib/env.h index 1cc05f00bf0..eb564483ad3 100644 --- a/ydb/core/blobstorage/ut_blobstorage/lib/env.h +++ b/ydb/core/blobstorage/ut_blobstorage/lib/env.h @@ -29,6 +29,7 @@ struct TEnvironmentSetup { const std::function<void(TTestActorSystem&)> PrepareRuntime; const ui32 ControllerNodeId = 1; const bool Cache = false; + const ui32 NumDataCenters = 0; }; const TSettings Settings; @@ -93,6 +94,11 @@ struct TEnvironmentSetup { Cerr << "RandomSeed# " << seed << Endl; } + ui32 GetNumDataCenters() const { + return Settings.NumDataCenters ? Settings.NumDataCenters : + Settings.Erasure.GetErasure() == TBlobStorageGroupType::ErasureMirror3dc ? 3 : 1; + } + void Initialize() { Runtime = std::make_unique<TTestActorSystem>(Settings.NodeCount); if (Settings.PrepareRuntime) { @@ -102,8 +108,7 @@ struct TEnvironmentSetup { Runtime->Start(); auto *appData = Runtime->GetAppData(); appData->DomainsInfo->AddDomain(TDomainsInfo::TDomain::ConstructEmptyDomain("dom", DomainId).Release()); - Runtime->SetupTabletRuntime(Settings.Erasure.GetErasure() == TBlobStorageGroupType::ErasureMirror3dc, - Settings.ControllerNodeId); + Runtime->SetupTabletRuntime(GetNumDataCenters(), Settings.ControllerNodeId); SetupStaticStorage(); SetupTablet(); SetupStorage(); @@ -115,8 +120,7 @@ struct TEnvironmentSetup { void StartNode(ui32 nodeId) { Runtime->StartNode(nodeId); - Runtime->SetupTabletRuntime(Settings.Erasure.GetErasure() == TBlobStorageGroupType::ErasureMirror3dc, - Settings.ControllerNodeId, nodeId); + Runtime->SetupTabletRuntime(GetNumDataCenters(), Settings.ControllerNodeId, nodeId); if (nodeId == Settings.ControllerNodeId) { SetupStaticStorage(); SetupTablet(); diff --git a/ydb/core/cms/sentinel.cpp b/ydb/core/cms/sentinel.cpp index 638b78fe09b..173f1ecbf1d 100644 --- a/ydb/core/cms/sentinel.cpp +++ b/ydb/core/cms/sentinel.cpp @@ -145,13 +145,14 @@ bool TPDiskStatus::IsNewStatusGood() const { switch (Compute(Current, unused)) { case EPDiskStatus::INACTIVE: case EPDiskStatus::ACTIVE: - case EPDiskStatus::SPARE: return true; case EPDiskStatus::UNKNOWN: case EPDiskStatus::FAULTY: case EPDiskStatus::BROKEN: case EPDiskStatus::TO_BE_REMOVED: + case EPDiskStatus::DECOMMIT_PENDING: + case EPDiskStatus::DECOMMIT_IMMINENT: case EPDiskStatus::EDriveStatus_INT_MIN_SENTINEL_DO_NOT_USE_: case EPDiskStatus::EDriveStatus_INT_MAX_SENTINEL_DO_NOT_USE_: return false; @@ -649,6 +650,7 @@ class TStatusChanger: public TSentinelChildBase<TStatusChanger> { command.MutableHostKey()->SetNodeId(Id.NodeId); command.SetPDiskId(Id.DiskId); command.SetStatus(Status); + command.SetProhibitDecommittedStatusChange(true); NTabletPipe::SendData(SelfId(), CmsState->BSControllerPipe, request.Release()); } diff --git a/ydb/core/cms/sentinel_ut.cpp b/ydb/core/cms/sentinel_ut.cpp index 62c0fdf309d..0dac24b073c 100644 --- a/ydb/core/cms/sentinel_ut.cpp +++ b/ydb/core/cms/sentinel_ut.cpp @@ -48,8 +48,10 @@ Y_UNIT_TEST_SUITE(TSentinelBaseTests) { EPDiskStatus::ACTIVE, EPDiskStatus::INACTIVE, EPDiskStatus::BROKEN, - EPDiskStatus::SPARE, EPDiskStatus::FAULTY, + EPDiskStatus::TO_BE_REMOVED, + EPDiskStatus::DECOMMIT_PENDING, + EPDiskStatus::DECOMMIT_IMMINENT, }; for (const EPDiskStatus status : AllStatuses) { diff --git a/ydb/core/mind/bscontroller/cmds_drive_status.cpp b/ydb/core/mind/bscontroller/cmds_drive_status.cpp index 8abaaa5979c..72b3b8bc29e 100644 --- a/ydb/core/mind/bscontroller/cmds_drive_status.cpp +++ b/ydb/core/mind/bscontroller/cmds_drive_status.cpp @@ -25,6 +25,11 @@ namespace NKikimr::NBsController { TPDiskInfo *pdisk = PDisks.FindForUpdate(pdiskId); if (cmd.GetStatus() != pdisk->Status) { + using E = NKikimrBlobStorage::EDriveStatus; + if (cmd.GetProhibitDecommittedStatusChange() && (pdisk->Status == E::DECOMMIT_PENDING || pdisk->Status == E::DECOMMIT_IMMINENT)) { + throw TExPDiskStatusRace(pdiskId.NodeId, pdiskId.PDiskId, pdisk->Status); + } + const bool wasGoodExpectedStatus = pdisk->HasGoodExpectedStatus(); pdisk->Status = cmd.GetStatus(); pdisk->StatusTimestamp = Timestamp; diff --git a/ydb/core/mind/bscontroller/config_fit_groups.cpp b/ydb/core/mind/bscontroller/config_fit_groups.cpp index cb8801b0f5f..fcf3a3365a2 100644 --- a/ydb/core/mind/bscontroller/config_fit_groups.cpp +++ b/ydb/core/mind/bscontroller/config_fit_groups.cpp @@ -197,11 +197,10 @@ namespace NKikimr { replace = true; break; - case NKikimrBlobStorage::EDriveStatus::SPARE: - break; - case NKikimrBlobStorage::EDriveStatus::FAULTY: case NKikimrBlobStorage::EDriveStatus::TO_BE_REMOVED: + case NKikimrBlobStorage::EDriveStatus::DECOMMIT_PENDING: + case NKikimrBlobStorage::EDriveStatus::DECOMMIT_IMMINENT: // groups are moved out asynchronously break; @@ -460,8 +459,17 @@ namespace NKikimr { } // register PDisk in the mapper - return Mapper->RegisterPDisk(id, State.HostRecords->GetLocation(id.NodeId), usable, numSlots, - info.ExpectedSlotCount, groups.data(), groups.size(), availableSpace, info.Operational); + return Mapper->RegisterPDisk({ + .PDiskId = id, + .Location = State.HostRecords->GetLocation(id.NodeId), + .Usable = usable, + .NumSlots = numSlots, + .MaxSlots = info.ExpectedSlotCount, + .Groups = std::move(groups), + .SpaceAvailable = availableSpace, + .Operational = info.Operational, + .Decommitted = info.Decommitted(), + }); } std::map<TVDiskIdShort, TVSlotInfo*> CreateVSlotsForGroup(TGroupInfo *groupInfo, diff --git a/ydb/core/mind/bscontroller/error.h b/ydb/core/mind/bscontroller/error.h index 6cd4a22cb2e..e3f51615c66 100644 --- a/ydb/core/mind/bscontroller/error.h +++ b/ydb/core/mind/bscontroller/error.h @@ -35,6 +35,7 @@ namespace NKikimr::NBsController { P(ItemConfigGenerationProvided, ui64) P(ItemConfigGenerationExpected, ui64) P(GroupId, ui32) + P(Status, NKikimrBlobStorage::EDriveStatus) struct TVDiskIdTraits { using Type = TVDiskID; @@ -205,4 +206,15 @@ namespace NKikimr::NBsController { } }; + struct TExPDiskStatusRace : TExError { + TExPDiskStatusRace(ui32 nodeId, ui32 pdiskId, NKikimrBlobStorage::EDriveStatus status) { + *this << "PDisk status race" << TErrorParams::NodeId(nodeId) << TErrorParams::PDiskId(pdiskId) + << TErrorParams::Status(status); + } + + NKikimrBlobStorage::TConfigResponse::TStatus::EFailReason GetFailReason() const override { + return NKikimrBlobStorage::TConfigResponse::TStatus::kPDiskStatusRace; + } + }; + } // NKikimr::NBsController diff --git a/ydb/core/mind/bscontroller/group_mapper.cpp b/ydb/core/mind/bscontroller/group_mapper.cpp index 84b5ac6d7a8..eb4b898cf9b 100644 --- a/ydb/core/mind/bscontroller/group_mapper.cpp +++ b/ydb/core/mind/bscontroller/group_mapper.cpp @@ -64,31 +64,14 @@ namespace NKikimr::NBsController { struct TFailDomainInfo; - struct TPDiskInfo { - const TNodeLocation Location; - i64 SpaceAvailable; - bool Usable; - ui32 NumSlots; - const ui32 MaxSlots; + struct TPDiskInfo : TPDiskRecord { TPDiskLayoutPosition Position; - const TPDiskId PDiskId; - TStackVec<ui32, 32> Groups; - const bool Operational; TFailDomainInfo *FailDomain; bool Matching = false; - TPDiskInfo(TNodeLocation location, bool usable, ui32 numSlots, ui32 maxSlots, TPDiskLayoutPosition position, - const TPDiskId& pdiskId, const ui32 groupIds[], size_t numGroups, i64 spaceAvailable, - bool operational, TFailDomainInfo *failDomain) - : Location(std::move(location)) - , SpaceAvailable(spaceAvailable) - , Usable(usable) - , NumSlots(numSlots) - , MaxSlots(maxSlots) + TPDiskInfo(const TPDiskRecord& pdisk, TPDiskLayoutPosition position, TFailDomainInfo *failDomain) + : TPDiskRecord(pdisk) , Position(std::move(position)) - , PDiskId(pdiskId) - , Groups(groupIds, groupIds + numGroups) - , Operational(operational) , FailDomain(failDomain) { std::sort(Groups.begin(), Groups.end()); @@ -99,7 +82,7 @@ namespace NKikimr::NBsController { } bool IsUsable() const { - return Usable && NumSlots < MaxSlots; + return Usable && !Decommitted && NumSlots < MaxSlots; } void InsertGroup(ui32 groupId) { @@ -192,7 +175,7 @@ namespace NKikimr::NBsController { const TPDiskInfo& pdisk = it->second; // register the disk in context - if (!AddDisk(pdisk, failRealmIdx, domainThroughIdx, error)) { + if (!pdisk.Decommitted && !AddDisk(pdisk, failRealmIdx, domainThroughIdx, error)) { return false; } } @@ -396,16 +379,14 @@ namespace NKikimr::NBsController { , Randomize(randomize) {} - bool RegisterPDisk(TPDiskId pdiskId, TNodeLocation location, bool usable, ui32 numSlots, ui32 maxSlots, - const ui32 groupIds[], size_t numGroups, i64 spaceAvailable, bool operational) { + bool RegisterPDisk(const TPDiskRecord& pdisk) { // calculate disk position - const TPDiskLayoutPosition p(DomainMapper, location, pdiskId, Geom); + const TPDiskLayoutPosition p(DomainMapper, pdisk.Location, pdisk.PDiskId, Geom); // insert PDisk into specific map TPDisks::iterator it; bool inserted; - std::tie(it, inserted) = PDisks.try_emplace(pdiskId, std::move(location), usable, numSlots, maxSlots, - p, pdiskId, groupIds, numGroups, spaceAvailable, operational, &Box(p)); + std::tie(it, inserted) = PDisks.try_emplace(pdisk.PDiskId, pdisk, p, &Box(p)); if (inserted) { it->second.FailDomain->push_back(&*it); } @@ -469,6 +450,9 @@ namespace NKikimr::NBsController { if (!it->second.Usable) { s << std::exchange(minus, "") << "u"; } + if (it->second.Decommitted) { + s << std::exchange(minus, "") << "d"; + } if (it->second.NumSlots >= it->second.MaxSlots) { s << std::exchange(minus, "") << "m"; } @@ -770,10 +754,8 @@ namespace NKikimr::NBsController { TGroupMapper::~TGroupMapper() = default; - bool TGroupMapper::RegisterPDisk(TPDiskId pdiskId, TNodeLocation location, bool usable, ui32 numSlots, ui32 maxSlots, - const ui32 groupIds[], size_t numGroups, i64 spaceAvailable, bool operational) { - return Impl->RegisterPDisk(pdiskId, std::move(location), usable, numSlots, maxSlots, groupIds, numGroups, - spaceAvailable, operational); + bool TGroupMapper::RegisterPDisk(const TPDiskRecord& pdisk) { + return Impl->RegisterPDisk(pdisk); } void TGroupMapper::UnregisterPDisk(TPDiskId pdiskId) { diff --git a/ydb/core/mind/bscontroller/group_mapper.h b/ydb/core/mind/bscontroller/group_mapper.h index f66ba3171c3..46dbb11c8da 100644 --- a/ydb/core/mind/bscontroller/group_mapper.h +++ b/ydb/core/mind/bscontroller/group_mapper.h @@ -18,13 +18,24 @@ namespace NKikimr { using TGroupDefinition = TVector<TVector<TVector<TPDiskId>>>; // Realm/Domain/Disk using TForbiddenPDisks = std::unordered_set<TPDiskId, THash<TPDiskId>>; + struct TPDiskRecord { + const TPDiskId PDiskId; + const TNodeLocation Location; + const bool Usable; + ui32 NumSlots; + const ui32 MaxSlots; + TStackVec<ui32, 16> Groups; + i64 SpaceAvailable; + const bool Operational; + const bool Decommitted; + }; + public: TGroupMapper(TGroupGeometryInfo geom, bool randomize = false); ~TGroupMapper(); // Register PDisk inside mapper to use it in subsequent map operations - bool RegisterPDisk(TPDiskId pdiskId, TNodeLocation location, bool usable, ui32 numSlots, ui32 maxSlots, - const ui32 groupIds[], size_t numGroups, i64 spaceAvailable, bool operational); + bool RegisterPDisk(const TPDiskRecord& pdisk); // Remove PDisk from the table. void UnregisterPDisk(TPDiskId pdiskId); diff --git a/ydb/core/mind/bscontroller/group_mapper_ut.cpp b/ydb/core/mind/bscontroller/group_mapper_ut.cpp index 0dcb5be0863..88800829ddd 100644 --- a/ydb/core/mind/bscontroller/group_mapper_ut.cpp +++ b/ydb/core/mind/bscontroller/group_mapper_ut.cpp @@ -306,8 +306,17 @@ public: } for (const auto& pair : PDisks) { auto& g = groupDisks[pair.first]; - mapper.RegisterPDisk(pair.first, pair.second.GetLocation(), !unusableDisks.count(pair.first), - pair.second.NumSlots, maxSlots, g.data(), g.size(), 0, nonoperationalDisks.count(pair.first)); + mapper.RegisterPDisk({ + .PDiskId = pair.first, + .Location = pair.second.GetLocation(), + .Usable = !unusableDisks.count(pair.first), + .NumSlots = pair.second.NumSlots, + .MaxSlots = maxSlots, + .Groups{g.begin(), g.end()}, + .SpaceAvailable = 0, + .Operational = static_cast<bool>(nonoperationalDisks.count(pair.first)), + .Decommitted = false, + }); } } }; diff --git a/ydb/core/mind/bscontroller/impl.h b/ydb/core/mind/bscontroller/impl.h index 9e409917f63..3e5c67412f9 100644 --- a/ydb/core/mind/bscontroller/impl.h +++ b/ydb/core/mind/bscontroller/impl.h @@ -403,6 +403,7 @@ public: switch (Status) { case NKikimrBlobStorage::EDriveStatus::FAULTY: case NKikimrBlobStorage::EDriveStatus::TO_BE_REMOVED: + case NKikimrBlobStorage::EDriveStatus::DECOMMIT_IMMINENT: return true; default: return false; @@ -410,7 +411,24 @@ public: } bool BadInTermsOfSelfHeal() const { - return ShouldBeSettledBySelfHeal() || Status == NKikimrBlobStorage::EDriveStatus::INACTIVE; + switch (Status) { + case NKikimrBlobStorage::EDriveStatus::FAULTY: + case NKikimrBlobStorage::EDriveStatus::TO_BE_REMOVED: + case NKikimrBlobStorage::EDriveStatus::INACTIVE: + return true; + default: + return false; + } + } + + bool Decommitted() const { + switch (Status) { + case NKikimrBlobStorage::EDriveStatus::DECOMMIT_PENDING: + case NKikimrBlobStorage::EDriveStatus::DECOMMIT_IMMINENT: + return true; + default: + return false; + } } std::tuple<bool, bool> GetSelfHealStatusTuple() const { @@ -425,9 +443,10 @@ public: case NKikimrBlobStorage::EDriveStatus::ACTIVE: case NKikimrBlobStorage::EDriveStatus::INACTIVE: - case NKikimrBlobStorage::EDriveStatus::SPARE: case NKikimrBlobStorage::EDriveStatus::FAULTY: case NKikimrBlobStorage::EDriveStatus::TO_BE_REMOVED: + case NKikimrBlobStorage::EDriveStatus::DECOMMIT_PENDING: + case NKikimrBlobStorage::EDriveStatus::DECOMMIT_IMMINENT: return true; case NKikimrBlobStorage::EDriveStatus::EDriveStatus_INT_MIN_SENTINEL_DO_NOT_USE_: @@ -487,7 +506,7 @@ public: struct TGroupStatus { // status derived from the actual state of VDisks (IsReady() to be exact) NKikimrBlobStorage::TGroupStatus::E OperatingStatus = NKikimrBlobStorage::TGroupStatus::UNKNOWN; - // status derived by adding underlying PDisk status (FAULTY&BROKEN are assumed to be not working ones) + // status derived by adding underlying PDisk status (some of them are assumed to be not working ones) NKikimrBlobStorage::TGroupStatus::E ExpectedStatus = NKikimrBlobStorage::TGroupStatus::UNKNOWN; } Status; diff --git a/ydb/core/mind/bscontroller/sys_view.cpp b/ydb/core/mind/bscontroller/sys_view.cpp index ba9b50cde00..71215bb35e7 100644 --- a/ydb/core/mind/bscontroller/sys_view.cpp +++ b/ydb/core/mind/bscontroller/sys_view.cpp @@ -289,8 +289,17 @@ public: const auto readCentric = pdisk.HasReadCentric() ? MakeMaybe(pdisk.GetReadCentric()) : Nothing(); if (filter.MatchPDisk(pdisk.GetCategory(), sharedWithOs, readCentric)) { const TNodeLocation& location = HostRecords->GetLocation(pdiskId.NodeId); - const bool ok = mapper.RegisterPDisk(pdiskId, location, true, pdisk.GetNumActiveSlots(), - pdisk.GetExpectedSlotCount(), nullptr, 0, 0, true); + const bool ok = mapper.RegisterPDisk({ + .PDiskId = pdiskId, + .Location = location, + .Usable = true, + .NumSlots = pdisk.GetNumActiveSlots(), + .MaxSlots = pdisk.GetExpectedSlotCount(), + .Groups = {}, + .SpaceAvailable = 0, + .Operational = true, + .Decommitted = false, + }); Y_VERIFY(ok); break; } diff --git a/ydb/core/protos/blobstorage_config.proto b/ydb/core/protos/blobstorage_config.proto index 10d0a29e6c3..e32a2dfbad9 100644 --- a/ydb/core/protos/blobstorage_config.proto +++ b/ydb/core/protos/blobstorage_config.proto @@ -186,9 +186,11 @@ enum EDriveStatus { ACTIVE = 1; // working as expected INACTIVE = 2; // new groups are not created over this drive, but existing ones continue to work as expected BROKEN = 3; // drive is not working, groups are automatically moved out of this drive upon reception of this status - SPARE = 4; // spare drive -- groups are created only when being moved from BROKEN drives + reserved 4; FAULTY = 5; // drive is expected to become BROKEN soon, new groups are not created, old groups are asynchronously moved out from this drive TO_BE_REMOVED = 6; // same as INACTIVE, but drive is counted in fault model as not working + DECOMMIT_PENDING = 7; // drive is going to be removed soon, but SelfHeal logic do not remove it automatically + DECOMMIT_IMMINENT = 8; // drive is going to be removed automatically } message TGroupStatus { @@ -226,6 +228,7 @@ message TUpdateDriveStatus { uint32 PDiskId = 4; // may be set instead of path to identify PDisk string Serial = 5; // may be set instead of path and PDiskId to identify PDisk uint64 StatusChangeTimestamp = 6; // used only in return of ReadDriveStatus + bool ProhibitDecommittedStatusChange = 7; // used by CMS to prevent changing status from DECOMMIT_* } message TReadDriveStatus { @@ -613,6 +616,7 @@ message TConfigResponse { kDiskIsNotDonor = 8; kAlready = 9; kMayGetDegraded = 10; + kPDiskStatusRace = 11; } message TFailParam { @@ -630,6 +634,7 @@ message TConfigResponse { uint32 GroupId = 11; NKikimrBlobStorage.TVDiskID VDiskId = 12; NKikimrBlobStorage.TVSlotId VSlotId = 13; + EDriveStatus Status = 14; } } diff --git a/ydb/core/util/testactorsys.cpp b/ydb/core/util/testactorsys.cpp index 4a270e4f8c0..a678744a2f1 100644 --- a/ydb/core/util/testactorsys.cpp +++ b/ydb/core/util/testactorsys.cpp @@ -132,12 +132,12 @@ TActorId TTestActorSystem::CreateTestBootstrapper(TTabletStorageInfo *info, std: return Register(CreateBootstrapper(info, bi.Get()), nodeId); } -void TTestActorSystem::SetupTabletRuntime(bool isMirror3dc, ui32 stateStorageNodeId, ui32 targetNodeId) { +void TTestActorSystem::SetupTabletRuntime(ui32 numDataCenters, ui32 stateStorageNodeId, ui32 targetNodeId) { auto setup = MakeIntrusive<TTableNameserverSetup>(); - ui32 nodeCountInDC = (MaxNodeId + 2) / 3; + ui32 nodeCountInDC = (MaxNodeId + numDataCenters - 1) / numDataCenters; for (ui32 nodeId : GetNodes()) { const TString name = Sprintf("127.0.0.%u", nodeId); - ui32 dcNum = isMirror3dc ? ((nodeId + nodeCountInDC - 1) / nodeCountInDC) : 1; + const ui32 dcNum = (nodeId + nodeCountInDC - 1) / nodeCountInDC; NActorsInterconnect::TNodeLocation location; location.SetDataCenter(ToString(dcNum)); location.SetRack(ToString(nodeId)); diff --git a/ydb/core/util/testactorsys.h b/ydb/core/util/testactorsys.h index 722d5a81638..6bbe4dd3eb3 100644 --- a/ydb/core/util/testactorsys.h +++ b/ydb/core/util/testactorsys.h @@ -660,7 +660,7 @@ public: //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // tablet-related utility functions - void SetupTabletRuntime(bool isMirror3dc = false, ui32 stateStorageNodeId = 0, ui32 targetNodeId = 0); + void SetupTabletRuntime(ui32 numDataCenters = 1, ui32 stateStorageNodeId = 0, ui32 targetNodeId = 0); static NTabletPipe::TClientConfig GetPipeConfigWithRetries(); void SendToPipe(ui64 tabletId, const TActorId& sender, IEventBase* payload, ui64 cookie, const NKikimr::NTabletPipe::TClientConfig& pipeConfig); static TTabletStorageInfo *CreateTestTabletInfo(ui64 tabletId, TTabletTypes::EType tabletType, TBlobStorageGroupType::EErasureSpecies erasure, ui32 groupId); |