aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Rutkovsky <alexvru@mail.ru>2022-03-28 18:57:23 +0300
committerAlexander Rutkovsky <alexvru@mail.ru>2022-03-28 18:57:23 +0300
commit7c332b6a8b33c7d576d976c28500ea767fd091d2 (patch)
tree05a212358bf403d62c1bfcf60ec94518b2c0f5a8
parent0e5c7eb9d835a56022a6cb3d78002c332c6bee35 (diff)
downloadydb-7c332b6a8b33c7d576d976c28500ea767fd091d2.tar.gz
Support DECOMMIT_* status in BS_CONTROLLER KIKIMR-14580
ref:b87b05de67cadc810f3d9b3f4ee9b2e9c839b8a3
-rw-r--r--ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp31
-rw-r--r--ydb/core/blobstorage/ut_blobstorage/CMakeLists.darwin.txt1
-rw-r--r--ydb/core/blobstorage/ut_blobstorage/CMakeLists.linux.txt1
-rw-r--r--ydb/core/blobstorage/ut_blobstorage/decommit_3dc.cpp73
-rw-r--r--ydb/core/blobstorage/ut_blobstorage/lib/env.h12
-rw-r--r--ydb/core/cms/sentinel.cpp4
-rw-r--r--ydb/core/cms/sentinel_ut.cpp4
-rw-r--r--ydb/core/mind/bscontroller/cmds_drive_status.cpp5
-rw-r--r--ydb/core/mind/bscontroller/config_fit_groups.cpp18
-rw-r--r--ydb/core/mind/bscontroller/error.h12
-rw-r--r--ydb/core/mind/bscontroller/group_mapper.cpp44
-rw-r--r--ydb/core/mind/bscontroller/group_mapper.h15
-rw-r--r--ydb/core/mind/bscontroller/group_mapper_ut.cpp13
-rw-r--r--ydb/core/mind/bscontroller/impl.h25
-rw-r--r--ydb/core/mind/bscontroller/sys_view.cpp13
-rw-r--r--ydb/core/protos/blobstorage_config.proto7
-rw-r--r--ydb/core/util/testactorsys.cpp6
-rw-r--r--ydb/core/util/testactorsys.h2
18 files changed, 227 insertions, 59 deletions
diff --git a/ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp b/ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp
index 1e84f5d8f69..86cc23e1e90 100644
--- a/ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp
+++ b/ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp
@@ -1,4 +1,5 @@
#include "pdisk_mock.h"
+#include <ydb/core/blobstorage/base/blobstorage_events.h>
#include <ydb/core/util/stlog.h>
#include <ydb/core/util/interval_set.h>
@@ -238,7 +239,7 @@ TPDiskMockState::TPtr TPDiskMockState::Snapshot() {
return res;
}
-class TPDiskMockActor : public TActor<TPDiskMockActor> {
+class TPDiskMockActor : public TActorBootstrapped<TPDiskMockActor> {
enum {
EvResume = EventSpaceBegin(TEvents::ES_PRIVATE),
};
@@ -251,8 +252,7 @@ class TPDiskMockActor : public TActor<TPDiskMockActor> {
public:
TPDiskMockActor(TPDiskMockState::TPtr state)
- : TActor(&TThis::StateFunc)
- , State(std::move(state)) // to keep ownership
+ : State(std::move(state)) // to keep ownership
, Impl(*State->Impl)
, Prefix(TStringBuilder() << "PDiskMock[" << Impl.NodeId << ":" << Impl.PDiskId << "] ")
{
@@ -263,6 +263,30 @@ public:
}
}
+ void Bootstrap() {
+ Become(&TThis::StateFunc);
+ ReportMetrics();
+ }
+
+ void ReportMetrics() {
+ ui32 usedChunks = 0;
+ for (const auto& [ownerId, owner] : Impl.Owners) {
+ usedChunks += owner.CommittedChunks.size() + owner.ReservedChunks.size();
+ }
+ Y_VERIFY(usedChunks <= Impl.TotalChunks);
+
+ auto ev = std::make_unique<TEvBlobStorage::TEvControllerUpdateDiskStatus>();
+ auto& record = ev->Record;
+ auto *p = record.AddPDisksMetrics();
+ p->SetPDiskId(Impl.PDiskId);
+ p->SetAvailableSize((Impl.TotalChunks - usedChunks) * Impl.ChunkSize);
+ p->SetTotalSize(Impl.TotalChunks * Impl.ChunkSize);
+ p->SetState(NKikimrBlobStorage::TPDiskState::Normal);
+ Send(MakeBlobStorageNodeWardenID(SelfId().NodeId()), ev.release());
+
+ Schedule(TDuration::Seconds(5), new TEvents::TEvWakeup);
+ }
+
void Handle(NPDisk::TEvYardInit::TPtr ev) {
// report message and validate PDisk guid
auto *msg = ev->Get();
@@ -663,6 +687,7 @@ public:
hFunc(NPDisk::TEvSlay, Handle);
hFunc(NPDisk::TEvHarakiri, Handle);
hFunc(NPDisk::TEvConfigureScheduler, Handle);
+ cFunc(TEvents::TSystem::Wakeup, ReportMetrics);
)
};
diff --git a/ydb/core/blobstorage/ut_blobstorage/CMakeLists.darwin.txt b/ydb/core/blobstorage/ut_blobstorage/CMakeLists.darwin.txt
index e7fffd6a221..33829c5e789 100644
--- a/ydb/core/blobstorage/ut_blobstorage/CMakeLists.darwin.txt
+++ b/ydb/core/blobstorage/ut_blobstorage/CMakeLists.darwin.txt
@@ -23,6 +23,7 @@ target_link_libraries(ydb-core-blobstorage-ut_blobstorage PUBLIC
target_sources(ydb-core-blobstorage-ut_blobstorage PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/block_race.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/counting_events.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/decommit_3dc.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/defrag.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/donor.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/encryption.cpp
diff --git a/ydb/core/blobstorage/ut_blobstorage/CMakeLists.linux.txt b/ydb/core/blobstorage/ut_blobstorage/CMakeLists.linux.txt
index 50b835923e5..5a514146bfb 100644
--- a/ydb/core/blobstorage/ut_blobstorage/CMakeLists.linux.txt
+++ b/ydb/core/blobstorage/ut_blobstorage/CMakeLists.linux.txt
@@ -24,6 +24,7 @@ target_link_libraries(ydb-core-blobstorage-ut_blobstorage PUBLIC
target_sources(ydb-core-blobstorage-ut_blobstorage PRIVATE
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/block_race.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/counting_events.cpp
+ ${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/decommit_3dc.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/defrag.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/donor.cpp
${CMAKE_SOURCE_DIR}/ydb/core/blobstorage/ut_blobstorage/encryption.cpp
diff --git a/ydb/core/blobstorage/ut_blobstorage/decommit_3dc.cpp b/ydb/core/blobstorage/ut_blobstorage/decommit_3dc.cpp
new file mode 100644
index 00000000000..6244762920f
--- /dev/null
+++ b/ydb/core/blobstorage/ut_blobstorage/decommit_3dc.cpp
@@ -0,0 +1,73 @@
+#include <ydb/core/blobstorage/ut_blobstorage/lib/env.h>
+
+Y_UNIT_TEST_SUITE(Decommit3dc) {
+ Y_UNIT_TEST(Test) {
+ TEnvironmentSetup env{{
+ .NodeCount = 12,
+ .Erasure = TBlobStorageGroupType::ErasureMirror3dc,
+ .NumDataCenters = 4,
+ }};
+
+ {
+ NKikimrBlobStorage::TConfigRequest request;
+ auto *cmd = request.AddCommand();
+ auto *us = cmd->MutableUpdateSettings();
+ us->AddEnableDonorMode(true);
+ us->AddEnableSelfHeal(true);
+ auto response = env.Invoke(request);
+ UNIT_ASSERT_C(response.GetSuccess(), response.GetErrorDescription());
+ }
+
+ env.CreateBoxAndPool(1, 1);
+ env.Sim(TDuration::Seconds(30));
+ auto config = env.FetchBaseConfig();
+
+ std::set<ui32> nodesToSettle;
+ TString datacenterToSettle;
+ for (const auto& node : config.GetNode()) {
+ const auto& location = node.GetLocation();
+ if (!datacenterToSettle) {
+ datacenterToSettle = location.GetDataCenter();
+ }
+ if (datacenterToSettle == location.GetDataCenter()) {
+ nodesToSettle.insert(node.GetNodeId());
+ }
+ }
+
+ NKikimrBlobStorage::TConfigRequest request;
+
+ std::set<std::pair<ui32, ui32>> pdisksToSettle;
+ for (const auto& pdisk : config.GetPDisk()) {
+ if (nodesToSettle.count(pdisk.GetNodeId())) {
+ pdisksToSettle.emplace(pdisk.GetNodeId(), pdisk.GetPDiskId());
+ auto *cmd = request.AddCommand();
+ auto *ds = cmd->MutableUpdateDriveStatus();
+ ds->MutableHostKey()->SetNodeId(pdisk.GetNodeId());
+ ds->SetPDiskId(pdisk.GetPDiskId());
+ ds->SetStatus(NKikimrBlobStorage::EDriveStatus::DECOMMIT_PENDING);
+ }
+ }
+
+ auto response = env.Invoke(request);
+ UNIT_ASSERT_C(response.GetSuccess(), response.GetErrorDescription());
+
+ std::set<std::pair<ui32, ui32>> movedOutPDisks;
+ for (const auto& [nodeId, pdiskId] : pdisksToSettle) {
+ request.Clear();
+ auto *cmd = request.AddCommand();
+ auto *ds = cmd->MutableUpdateDriveStatus();
+ ds->MutableHostKey()->SetNodeId(nodeId);
+ ds->SetPDiskId(pdiskId);
+ ds->SetStatus(NKikimrBlobStorage::EDriveStatus::DECOMMIT_IMMINENT);
+ movedOutPDisks.emplace(nodeId, pdiskId);
+ auto response = env.Invoke(request);
+ UNIT_ASSERT_C(response.GetSuccess(), response.GetErrorDescription());
+ env.Sim(TDuration::Seconds(60));
+ auto config = env.FetchBaseConfig();
+ for (const auto& vslot : config.GetVSlot()) {
+ const auto& vslotId = vslot.GetVSlotId();
+ UNIT_ASSERT(!movedOutPDisks.count({vslotId.GetNodeId(), vslotId.GetPDiskId()}));
+ }
+ }
+ }
+}
diff --git a/ydb/core/blobstorage/ut_blobstorage/lib/env.h b/ydb/core/blobstorage/ut_blobstorage/lib/env.h
index 1cc05f00bf0..eb564483ad3 100644
--- a/ydb/core/blobstorage/ut_blobstorage/lib/env.h
+++ b/ydb/core/blobstorage/ut_blobstorage/lib/env.h
@@ -29,6 +29,7 @@ struct TEnvironmentSetup {
const std::function<void(TTestActorSystem&)> PrepareRuntime;
const ui32 ControllerNodeId = 1;
const bool Cache = false;
+ const ui32 NumDataCenters = 0;
};
const TSettings Settings;
@@ -93,6 +94,11 @@ struct TEnvironmentSetup {
Cerr << "RandomSeed# " << seed << Endl;
}
+ ui32 GetNumDataCenters() const {
+ return Settings.NumDataCenters ? Settings.NumDataCenters :
+ Settings.Erasure.GetErasure() == TBlobStorageGroupType::ErasureMirror3dc ? 3 : 1;
+ }
+
void Initialize() {
Runtime = std::make_unique<TTestActorSystem>(Settings.NodeCount);
if (Settings.PrepareRuntime) {
@@ -102,8 +108,7 @@ struct TEnvironmentSetup {
Runtime->Start();
auto *appData = Runtime->GetAppData();
appData->DomainsInfo->AddDomain(TDomainsInfo::TDomain::ConstructEmptyDomain("dom", DomainId).Release());
- Runtime->SetupTabletRuntime(Settings.Erasure.GetErasure() == TBlobStorageGroupType::ErasureMirror3dc,
- Settings.ControllerNodeId);
+ Runtime->SetupTabletRuntime(GetNumDataCenters(), Settings.ControllerNodeId);
SetupStaticStorage();
SetupTablet();
SetupStorage();
@@ -115,8 +120,7 @@ struct TEnvironmentSetup {
void StartNode(ui32 nodeId) {
Runtime->StartNode(nodeId);
- Runtime->SetupTabletRuntime(Settings.Erasure.GetErasure() == TBlobStorageGroupType::ErasureMirror3dc,
- Settings.ControllerNodeId, nodeId);
+ Runtime->SetupTabletRuntime(GetNumDataCenters(), Settings.ControllerNodeId, nodeId);
if (nodeId == Settings.ControllerNodeId) {
SetupStaticStorage();
SetupTablet();
diff --git a/ydb/core/cms/sentinel.cpp b/ydb/core/cms/sentinel.cpp
index 638b78fe09b..173f1ecbf1d 100644
--- a/ydb/core/cms/sentinel.cpp
+++ b/ydb/core/cms/sentinel.cpp
@@ -145,13 +145,14 @@ bool TPDiskStatus::IsNewStatusGood() const {
switch (Compute(Current, unused)) {
case EPDiskStatus::INACTIVE:
case EPDiskStatus::ACTIVE:
- case EPDiskStatus::SPARE:
return true;
case EPDiskStatus::UNKNOWN:
case EPDiskStatus::FAULTY:
case EPDiskStatus::BROKEN:
case EPDiskStatus::TO_BE_REMOVED:
+ case EPDiskStatus::DECOMMIT_PENDING:
+ case EPDiskStatus::DECOMMIT_IMMINENT:
case EPDiskStatus::EDriveStatus_INT_MIN_SENTINEL_DO_NOT_USE_:
case EPDiskStatus::EDriveStatus_INT_MAX_SENTINEL_DO_NOT_USE_:
return false;
@@ -649,6 +650,7 @@ class TStatusChanger: public TSentinelChildBase<TStatusChanger> {
command.MutableHostKey()->SetNodeId(Id.NodeId);
command.SetPDiskId(Id.DiskId);
command.SetStatus(Status);
+ command.SetProhibitDecommittedStatusChange(true);
NTabletPipe::SendData(SelfId(), CmsState->BSControllerPipe, request.Release());
}
diff --git a/ydb/core/cms/sentinel_ut.cpp b/ydb/core/cms/sentinel_ut.cpp
index 62c0fdf309d..0dac24b073c 100644
--- a/ydb/core/cms/sentinel_ut.cpp
+++ b/ydb/core/cms/sentinel_ut.cpp
@@ -48,8 +48,10 @@ Y_UNIT_TEST_SUITE(TSentinelBaseTests) {
EPDiskStatus::ACTIVE,
EPDiskStatus::INACTIVE,
EPDiskStatus::BROKEN,
- EPDiskStatus::SPARE,
EPDiskStatus::FAULTY,
+ EPDiskStatus::TO_BE_REMOVED,
+ EPDiskStatus::DECOMMIT_PENDING,
+ EPDiskStatus::DECOMMIT_IMMINENT,
};
for (const EPDiskStatus status : AllStatuses) {
diff --git a/ydb/core/mind/bscontroller/cmds_drive_status.cpp b/ydb/core/mind/bscontroller/cmds_drive_status.cpp
index 8abaaa5979c..72b3b8bc29e 100644
--- a/ydb/core/mind/bscontroller/cmds_drive_status.cpp
+++ b/ydb/core/mind/bscontroller/cmds_drive_status.cpp
@@ -25,6 +25,11 @@ namespace NKikimr::NBsController {
TPDiskInfo *pdisk = PDisks.FindForUpdate(pdiskId);
if (cmd.GetStatus() != pdisk->Status) {
+ using E = NKikimrBlobStorage::EDriveStatus;
+ if (cmd.GetProhibitDecommittedStatusChange() && (pdisk->Status == E::DECOMMIT_PENDING || pdisk->Status == E::DECOMMIT_IMMINENT)) {
+ throw TExPDiskStatusRace(pdiskId.NodeId, pdiskId.PDiskId, pdisk->Status);
+ }
+
const bool wasGoodExpectedStatus = pdisk->HasGoodExpectedStatus();
pdisk->Status = cmd.GetStatus();
pdisk->StatusTimestamp = Timestamp;
diff --git a/ydb/core/mind/bscontroller/config_fit_groups.cpp b/ydb/core/mind/bscontroller/config_fit_groups.cpp
index cb8801b0f5f..fcf3a3365a2 100644
--- a/ydb/core/mind/bscontroller/config_fit_groups.cpp
+++ b/ydb/core/mind/bscontroller/config_fit_groups.cpp
@@ -197,11 +197,10 @@ namespace NKikimr {
replace = true;
break;
- case NKikimrBlobStorage::EDriveStatus::SPARE:
- break;
-
case NKikimrBlobStorage::EDriveStatus::FAULTY:
case NKikimrBlobStorage::EDriveStatus::TO_BE_REMOVED:
+ case NKikimrBlobStorage::EDriveStatus::DECOMMIT_PENDING:
+ case NKikimrBlobStorage::EDriveStatus::DECOMMIT_IMMINENT:
// groups are moved out asynchronously
break;
@@ -460,8 +459,17 @@ namespace NKikimr {
}
// register PDisk in the mapper
- return Mapper->RegisterPDisk(id, State.HostRecords->GetLocation(id.NodeId), usable, numSlots,
- info.ExpectedSlotCount, groups.data(), groups.size(), availableSpace, info.Operational);
+ return Mapper->RegisterPDisk({
+ .PDiskId = id,
+ .Location = State.HostRecords->GetLocation(id.NodeId),
+ .Usable = usable,
+ .NumSlots = numSlots,
+ .MaxSlots = info.ExpectedSlotCount,
+ .Groups = std::move(groups),
+ .SpaceAvailable = availableSpace,
+ .Operational = info.Operational,
+ .Decommitted = info.Decommitted(),
+ });
}
std::map<TVDiskIdShort, TVSlotInfo*> CreateVSlotsForGroup(TGroupInfo *groupInfo,
diff --git a/ydb/core/mind/bscontroller/error.h b/ydb/core/mind/bscontroller/error.h
index 6cd4a22cb2e..e3f51615c66 100644
--- a/ydb/core/mind/bscontroller/error.h
+++ b/ydb/core/mind/bscontroller/error.h
@@ -35,6 +35,7 @@ namespace NKikimr::NBsController {
P(ItemConfigGenerationProvided, ui64)
P(ItemConfigGenerationExpected, ui64)
P(GroupId, ui32)
+ P(Status, NKikimrBlobStorage::EDriveStatus)
struct TVDiskIdTraits {
using Type = TVDiskID;
@@ -205,4 +206,15 @@ namespace NKikimr::NBsController {
}
};
+ struct TExPDiskStatusRace : TExError {
+ TExPDiskStatusRace(ui32 nodeId, ui32 pdiskId, NKikimrBlobStorage::EDriveStatus status) {
+ *this << "PDisk status race" << TErrorParams::NodeId(nodeId) << TErrorParams::PDiskId(pdiskId)
+ << TErrorParams::Status(status);
+ }
+
+ NKikimrBlobStorage::TConfigResponse::TStatus::EFailReason GetFailReason() const override {
+ return NKikimrBlobStorage::TConfigResponse::TStatus::kPDiskStatusRace;
+ }
+ };
+
} // NKikimr::NBsController
diff --git a/ydb/core/mind/bscontroller/group_mapper.cpp b/ydb/core/mind/bscontroller/group_mapper.cpp
index 84b5ac6d7a8..eb4b898cf9b 100644
--- a/ydb/core/mind/bscontroller/group_mapper.cpp
+++ b/ydb/core/mind/bscontroller/group_mapper.cpp
@@ -64,31 +64,14 @@ namespace NKikimr::NBsController {
struct TFailDomainInfo;
- struct TPDiskInfo {
- const TNodeLocation Location;
- i64 SpaceAvailable;
- bool Usable;
- ui32 NumSlots;
- const ui32 MaxSlots;
+ struct TPDiskInfo : TPDiskRecord {
TPDiskLayoutPosition Position;
- const TPDiskId PDiskId;
- TStackVec<ui32, 32> Groups;
- const bool Operational;
TFailDomainInfo *FailDomain;
bool Matching = false;
- TPDiskInfo(TNodeLocation location, bool usable, ui32 numSlots, ui32 maxSlots, TPDiskLayoutPosition position,
- const TPDiskId& pdiskId, const ui32 groupIds[], size_t numGroups, i64 spaceAvailable,
- bool operational, TFailDomainInfo *failDomain)
- : Location(std::move(location))
- , SpaceAvailable(spaceAvailable)
- , Usable(usable)
- , NumSlots(numSlots)
- , MaxSlots(maxSlots)
+ TPDiskInfo(const TPDiskRecord& pdisk, TPDiskLayoutPosition position, TFailDomainInfo *failDomain)
+ : TPDiskRecord(pdisk)
, Position(std::move(position))
- , PDiskId(pdiskId)
- , Groups(groupIds, groupIds + numGroups)
- , Operational(operational)
, FailDomain(failDomain)
{
std::sort(Groups.begin(), Groups.end());
@@ -99,7 +82,7 @@ namespace NKikimr::NBsController {
}
bool IsUsable() const {
- return Usable && NumSlots < MaxSlots;
+ return Usable && !Decommitted && NumSlots < MaxSlots;
}
void InsertGroup(ui32 groupId) {
@@ -192,7 +175,7 @@ namespace NKikimr::NBsController {
const TPDiskInfo& pdisk = it->second;
// register the disk in context
- if (!AddDisk(pdisk, failRealmIdx, domainThroughIdx, error)) {
+ if (!pdisk.Decommitted && !AddDisk(pdisk, failRealmIdx, domainThroughIdx, error)) {
return false;
}
}
@@ -396,16 +379,14 @@ namespace NKikimr::NBsController {
, Randomize(randomize)
{}
- bool RegisterPDisk(TPDiskId pdiskId, TNodeLocation location, bool usable, ui32 numSlots, ui32 maxSlots,
- const ui32 groupIds[], size_t numGroups, i64 spaceAvailable, bool operational) {
+ bool RegisterPDisk(const TPDiskRecord& pdisk) {
// calculate disk position
- const TPDiskLayoutPosition p(DomainMapper, location, pdiskId, Geom);
+ const TPDiskLayoutPosition p(DomainMapper, pdisk.Location, pdisk.PDiskId, Geom);
// insert PDisk into specific map
TPDisks::iterator it;
bool inserted;
- std::tie(it, inserted) = PDisks.try_emplace(pdiskId, std::move(location), usable, numSlots, maxSlots,
- p, pdiskId, groupIds, numGroups, spaceAvailable, operational, &Box(p));
+ std::tie(it, inserted) = PDisks.try_emplace(pdisk.PDiskId, pdisk, p, &Box(p));
if (inserted) {
it->second.FailDomain->push_back(&*it);
}
@@ -469,6 +450,9 @@ namespace NKikimr::NBsController {
if (!it->second.Usable) {
s << std::exchange(minus, "") << "u";
}
+ if (it->second.Decommitted) {
+ s << std::exchange(minus, "") << "d";
+ }
if (it->second.NumSlots >= it->second.MaxSlots) {
s << std::exchange(minus, "") << "m";
}
@@ -770,10 +754,8 @@ namespace NKikimr::NBsController {
TGroupMapper::~TGroupMapper() = default;
- bool TGroupMapper::RegisterPDisk(TPDiskId pdiskId, TNodeLocation location, bool usable, ui32 numSlots, ui32 maxSlots,
- const ui32 groupIds[], size_t numGroups, i64 spaceAvailable, bool operational) {
- return Impl->RegisterPDisk(pdiskId, std::move(location), usable, numSlots, maxSlots, groupIds, numGroups,
- spaceAvailable, operational);
+ bool TGroupMapper::RegisterPDisk(const TPDiskRecord& pdisk) {
+ return Impl->RegisterPDisk(pdisk);
}
void TGroupMapper::UnregisterPDisk(TPDiskId pdiskId) {
diff --git a/ydb/core/mind/bscontroller/group_mapper.h b/ydb/core/mind/bscontroller/group_mapper.h
index f66ba3171c3..46dbb11c8da 100644
--- a/ydb/core/mind/bscontroller/group_mapper.h
+++ b/ydb/core/mind/bscontroller/group_mapper.h
@@ -18,13 +18,24 @@ namespace NKikimr {
using TGroupDefinition = TVector<TVector<TVector<TPDiskId>>>; // Realm/Domain/Disk
using TForbiddenPDisks = std::unordered_set<TPDiskId, THash<TPDiskId>>;
+ struct TPDiskRecord {
+ const TPDiskId PDiskId;
+ const TNodeLocation Location;
+ const bool Usable;
+ ui32 NumSlots;
+ const ui32 MaxSlots;
+ TStackVec<ui32, 16> Groups;
+ i64 SpaceAvailable;
+ const bool Operational;
+ const bool Decommitted;
+ };
+
public:
TGroupMapper(TGroupGeometryInfo geom, bool randomize = false);
~TGroupMapper();
// Register PDisk inside mapper to use it in subsequent map operations
- bool RegisterPDisk(TPDiskId pdiskId, TNodeLocation location, bool usable, ui32 numSlots, ui32 maxSlots,
- const ui32 groupIds[], size_t numGroups, i64 spaceAvailable, bool operational);
+ bool RegisterPDisk(const TPDiskRecord& pdisk);
// Remove PDisk from the table.
void UnregisterPDisk(TPDiskId pdiskId);
diff --git a/ydb/core/mind/bscontroller/group_mapper_ut.cpp b/ydb/core/mind/bscontroller/group_mapper_ut.cpp
index 0dcb5be0863..88800829ddd 100644
--- a/ydb/core/mind/bscontroller/group_mapper_ut.cpp
+++ b/ydb/core/mind/bscontroller/group_mapper_ut.cpp
@@ -306,8 +306,17 @@ public:
}
for (const auto& pair : PDisks) {
auto& g = groupDisks[pair.first];
- mapper.RegisterPDisk(pair.first, pair.second.GetLocation(), !unusableDisks.count(pair.first),
- pair.second.NumSlots, maxSlots, g.data(), g.size(), 0, nonoperationalDisks.count(pair.first));
+ mapper.RegisterPDisk({
+ .PDiskId = pair.first,
+ .Location = pair.second.GetLocation(),
+ .Usable = !unusableDisks.count(pair.first),
+ .NumSlots = pair.second.NumSlots,
+ .MaxSlots = maxSlots,
+ .Groups{g.begin(), g.end()},
+ .SpaceAvailable = 0,
+ .Operational = static_cast<bool>(nonoperationalDisks.count(pair.first)),
+ .Decommitted = false,
+ });
}
}
};
diff --git a/ydb/core/mind/bscontroller/impl.h b/ydb/core/mind/bscontroller/impl.h
index 9e409917f63..3e5c67412f9 100644
--- a/ydb/core/mind/bscontroller/impl.h
+++ b/ydb/core/mind/bscontroller/impl.h
@@ -403,6 +403,7 @@ public:
switch (Status) {
case NKikimrBlobStorage::EDriveStatus::FAULTY:
case NKikimrBlobStorage::EDriveStatus::TO_BE_REMOVED:
+ case NKikimrBlobStorage::EDriveStatus::DECOMMIT_IMMINENT:
return true;
default:
return false;
@@ -410,7 +411,24 @@ public:
}
bool BadInTermsOfSelfHeal() const {
- return ShouldBeSettledBySelfHeal() || Status == NKikimrBlobStorage::EDriveStatus::INACTIVE;
+ switch (Status) {
+ case NKikimrBlobStorage::EDriveStatus::FAULTY:
+ case NKikimrBlobStorage::EDriveStatus::TO_BE_REMOVED:
+ case NKikimrBlobStorage::EDriveStatus::INACTIVE:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ bool Decommitted() const {
+ switch (Status) {
+ case NKikimrBlobStorage::EDriveStatus::DECOMMIT_PENDING:
+ case NKikimrBlobStorage::EDriveStatus::DECOMMIT_IMMINENT:
+ return true;
+ default:
+ return false;
+ }
}
std::tuple<bool, bool> GetSelfHealStatusTuple() const {
@@ -425,9 +443,10 @@ public:
case NKikimrBlobStorage::EDriveStatus::ACTIVE:
case NKikimrBlobStorage::EDriveStatus::INACTIVE:
- case NKikimrBlobStorage::EDriveStatus::SPARE:
case NKikimrBlobStorage::EDriveStatus::FAULTY:
case NKikimrBlobStorage::EDriveStatus::TO_BE_REMOVED:
+ case NKikimrBlobStorage::EDriveStatus::DECOMMIT_PENDING:
+ case NKikimrBlobStorage::EDriveStatus::DECOMMIT_IMMINENT:
return true;
case NKikimrBlobStorage::EDriveStatus::EDriveStatus_INT_MIN_SENTINEL_DO_NOT_USE_:
@@ -487,7 +506,7 @@ public:
struct TGroupStatus {
// status derived from the actual state of VDisks (IsReady() to be exact)
NKikimrBlobStorage::TGroupStatus::E OperatingStatus = NKikimrBlobStorage::TGroupStatus::UNKNOWN;
- // status derived by adding underlying PDisk status (FAULTY&BROKEN are assumed to be not working ones)
+ // status derived by adding underlying PDisk status (some of them are assumed to be not working ones)
NKikimrBlobStorage::TGroupStatus::E ExpectedStatus = NKikimrBlobStorage::TGroupStatus::UNKNOWN;
} Status;
diff --git a/ydb/core/mind/bscontroller/sys_view.cpp b/ydb/core/mind/bscontroller/sys_view.cpp
index ba9b50cde00..71215bb35e7 100644
--- a/ydb/core/mind/bscontroller/sys_view.cpp
+++ b/ydb/core/mind/bscontroller/sys_view.cpp
@@ -289,8 +289,17 @@ public:
const auto readCentric = pdisk.HasReadCentric() ? MakeMaybe(pdisk.GetReadCentric()) : Nothing();
if (filter.MatchPDisk(pdisk.GetCategory(), sharedWithOs, readCentric)) {
const TNodeLocation& location = HostRecords->GetLocation(pdiskId.NodeId);
- const bool ok = mapper.RegisterPDisk(pdiskId, location, true, pdisk.GetNumActiveSlots(),
- pdisk.GetExpectedSlotCount(), nullptr, 0, 0, true);
+ const bool ok = mapper.RegisterPDisk({
+ .PDiskId = pdiskId,
+ .Location = location,
+ .Usable = true,
+ .NumSlots = pdisk.GetNumActiveSlots(),
+ .MaxSlots = pdisk.GetExpectedSlotCount(),
+ .Groups = {},
+ .SpaceAvailable = 0,
+ .Operational = true,
+ .Decommitted = false,
+ });
Y_VERIFY(ok);
break;
}
diff --git a/ydb/core/protos/blobstorage_config.proto b/ydb/core/protos/blobstorage_config.proto
index 10d0a29e6c3..e32a2dfbad9 100644
--- a/ydb/core/protos/blobstorage_config.proto
+++ b/ydb/core/protos/blobstorage_config.proto
@@ -186,9 +186,11 @@ enum EDriveStatus {
ACTIVE = 1; // working as expected
INACTIVE = 2; // new groups are not created over this drive, but existing ones continue to work as expected
BROKEN = 3; // drive is not working, groups are automatically moved out of this drive upon reception of this status
- SPARE = 4; // spare drive -- groups are created only when being moved from BROKEN drives
+ reserved 4;
FAULTY = 5; // drive is expected to become BROKEN soon, new groups are not created, old groups are asynchronously moved out from this drive
TO_BE_REMOVED = 6; // same as INACTIVE, but drive is counted in fault model as not working
+ DECOMMIT_PENDING = 7; // drive is going to be removed soon, but SelfHeal logic do not remove it automatically
+ DECOMMIT_IMMINENT = 8; // drive is going to be removed automatically
}
message TGroupStatus {
@@ -226,6 +228,7 @@ message TUpdateDriveStatus {
uint32 PDiskId = 4; // may be set instead of path to identify PDisk
string Serial = 5; // may be set instead of path and PDiskId to identify PDisk
uint64 StatusChangeTimestamp = 6; // used only in return of ReadDriveStatus
+ bool ProhibitDecommittedStatusChange = 7; // used by CMS to prevent changing status from DECOMMIT_*
}
message TReadDriveStatus {
@@ -613,6 +616,7 @@ message TConfigResponse {
kDiskIsNotDonor = 8;
kAlready = 9;
kMayGetDegraded = 10;
+ kPDiskStatusRace = 11;
}
message TFailParam {
@@ -630,6 +634,7 @@ message TConfigResponse {
uint32 GroupId = 11;
NKikimrBlobStorage.TVDiskID VDiskId = 12;
NKikimrBlobStorage.TVSlotId VSlotId = 13;
+ EDriveStatus Status = 14;
}
}
diff --git a/ydb/core/util/testactorsys.cpp b/ydb/core/util/testactorsys.cpp
index 4a270e4f8c0..a678744a2f1 100644
--- a/ydb/core/util/testactorsys.cpp
+++ b/ydb/core/util/testactorsys.cpp
@@ -132,12 +132,12 @@ TActorId TTestActorSystem::CreateTestBootstrapper(TTabletStorageInfo *info, std:
return Register(CreateBootstrapper(info, bi.Get()), nodeId);
}
-void TTestActorSystem::SetupTabletRuntime(bool isMirror3dc, ui32 stateStorageNodeId, ui32 targetNodeId) {
+void TTestActorSystem::SetupTabletRuntime(ui32 numDataCenters, ui32 stateStorageNodeId, ui32 targetNodeId) {
auto setup = MakeIntrusive<TTableNameserverSetup>();
- ui32 nodeCountInDC = (MaxNodeId + 2) / 3;
+ ui32 nodeCountInDC = (MaxNodeId + numDataCenters - 1) / numDataCenters;
for (ui32 nodeId : GetNodes()) {
const TString name = Sprintf("127.0.0.%u", nodeId);
- ui32 dcNum = isMirror3dc ? ((nodeId + nodeCountInDC - 1) / nodeCountInDC) : 1;
+ const ui32 dcNum = (nodeId + nodeCountInDC - 1) / nodeCountInDC;
NActorsInterconnect::TNodeLocation location;
location.SetDataCenter(ToString(dcNum));
location.SetRack(ToString(nodeId));
diff --git a/ydb/core/util/testactorsys.h b/ydb/core/util/testactorsys.h
index 722d5a81638..6bbe4dd3eb3 100644
--- a/ydb/core/util/testactorsys.h
+++ b/ydb/core/util/testactorsys.h
@@ -660,7 +660,7 @@ public:
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// tablet-related utility functions
- void SetupTabletRuntime(bool isMirror3dc = false, ui32 stateStorageNodeId = 0, ui32 targetNodeId = 0);
+ void SetupTabletRuntime(ui32 numDataCenters = 1, ui32 stateStorageNodeId = 0, ui32 targetNodeId = 0);
static NTabletPipe::TClientConfig GetPipeConfigWithRetries();
void SendToPipe(ui64 tabletId, const TActorId& sender, IEventBase* payload, ui64 cookie, const NKikimr::NTabletPipe::TClientConfig& pipeConfig);
static TTabletStorageInfo *CreateTestTabletInfo(ui64 tabletId, TTabletTypes::EType tabletType, TBlobStorageGroupType::EErasureSpecies erasure, ui32 groupId);