diff options
author | yuryalekseev <yuryalekseev@yandex-team.com> | 2023-02-14 14:32:10 +0300 |
---|---|---|
committer | yuryalekseev <yuryalekseev@yandex-team.com> | 2023-02-14 14:32:10 +0300 |
commit | 4f395457741333ecbc606ef3ef91606181d7c2f2 (patch) | |
tree | 4c7863375fc8537a3d7ab0cee7c0beb859e6b9e9 | |
parent | a9759cfaa2afbd284f0744d8ebafbe844baa8b1e (diff) | |
download | ydb-4f395457741333ecbc606ef3ef91606181d7c2f2.tar.gz |
Use DrivesSerials instead of NodeIdByDiskSerialNumber.
-rw-r--r-- | ydb/core/mind/bscontroller/cmds_drive_status.cpp | 44 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/config.h | 2 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/config_fit_pdisks.cpp | 4 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/impl.h | 22 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/register_node.cpp | 33 | ||||
-rw-r--r-- | ydb/core/protos/blobstorage_config.proto | 6 | ||||
-rw-r--r-- | ydb/core/protos/counters_bs_controller.proto | 6 |
7 files changed, 74 insertions, 43 deletions
diff --git a/ydb/core/mind/bscontroller/cmds_drive_status.cpp b/ydb/core/mind/bscontroller/cmds_drive_status.cpp index bf9e65db797..0762acbc455 100644 --- a/ydb/core/mind/bscontroller/cmds_drive_status.cpp +++ b/ydb/core/mind/bscontroller/cmds_drive_status.cpp @@ -99,15 +99,23 @@ namespace NKikimr::NBsController { auto boxId = cmd.GetBoxId(); auto driveInfo = DrivesSerials.Find(serial); - if (driveInfo && driveInfo->LifeStage != NKikimrBlobStorage::TDriveLifeStage::REMOVED) { + if (!driveInfo) { + throw TExError() << "Couldn't get drive info for disk with serial number" << TErrorParams::DiskSerialNumber(serial); + } + + switch (driveInfo->LifeStage) { + case NKikimrBlobStorage::TDriveLifeStage::SEEN_ON_NODE: + case NKikimrBlobStorage::TDriveLifeStage::REMOVED_FROM_BSC: + break; + default: throw TExAlready() << "Device with such serial already exists in BSC database in lifeStage " << driveInfo->LifeStage; } - auto it = NodeIdByDiskSerialNumber.find(serial); - if (it == NodeIdByDiskSerialNumber.end()) { - throw TExError() << "Couldn't find node id for disk with serial number" << TErrorParams::DiskSerialNumber(serial); + if (!driveInfo->NodeId) { + throw TExError() << "Couldn't get node id for disk with serial number" << TErrorParams::DiskSerialNumber(serial); } - auto nodeId = it->second; + + auto nodeId = driveInfo->NodeId.GetRef(); const auto& nodes = Nodes.Get(); auto nodeIt = nodes.find(nodeId); @@ -121,7 +129,7 @@ namespace NKikimr::NBsController { throw TExError() << "Couldn't find disk on node" << TErrorParams::NodeId(nodeId) << " by serial number" << TErrorParams::DiskSerialNumber(serial); } - // delete REMOVED entry, if any, but keep its GUID + // delete SEEN or REMOVED entry, if any, but keep its GUID auto guid = driveInfo ? std::make_optional(driveInfo->Guid) : std::nullopt; if (driveInfo) { DrivesSerials.DeleteExistingEntry(serial); @@ -142,7 +150,7 @@ namespace NKikimr::NBsController { throw TExError() << "Couldn't serialize PDiskConfig for disk with serial number" << TErrorParams::DiskSerialNumber(serial); } driveInfoMutable->PDiskConfig = config; - driveInfoMutable->LifeStage = NKikimrBlobStorage::TDriveLifeStage::ADDED; + driveInfoMutable->LifeStage = NKikimrBlobStorage::TDriveLifeStage::ADDED_TO_BSC; driveInfoMutable->NodeId = nodeId; driveInfoMutable->Path = driveIt->second.Path; @@ -162,14 +170,18 @@ namespace NKikimr::NBsController { throw TExError() << "Couldn't find disk with serial number" << TErrorParams::DiskSerialNumber(serial); } - if (driveInfo->LifeStage == NKikimrBlobStorage::TDriveLifeStage::REMOVED) { + if (driveInfo->LifeStage == NKikimrBlobStorage::TDriveLifeStage::SEEN_ON_NODE) { + throw TExError() << "Disk with serial number" << TErrorParams::DiskSerialNumber(serial) << " hasn't been added to BSC yet "; + } + + if (driveInfo->LifeStage == NKikimrBlobStorage::TDriveLifeStage::REMOVED_FROM_BSC) { throw TExError() << "Disk with serial number" << TErrorParams::DiskSerialNumber(serial) << " has already been removed"; } auto driveInfoMutable = DrivesSerials.FindForUpdate(serial); driveInfoMutable->NodeId.Clear(); driveInfoMutable->PDiskId.Clear(); - driveInfoMutable->LifeStage = NKikimrBlobStorage::TDriveLifeStage::REMOVED; + driveInfoMutable->LifeStage = NKikimrBlobStorage::TDriveLifeStage::REMOVED_FROM_BSC; driveInfoMutable->Path.Clear(); Fit.Boxes.insert(driveInfo->BoxId); @@ -184,15 +196,11 @@ namespace NKikimr::NBsController { if (auto driveInfo = DrivesSerials.Find(serial)) { switch (driveInfo->LifeStage) { - case NKikimrBlobStorage::TDriveLifeStage::NOT_SEEN: - [[fallthrough]]; - case NKikimrBlobStorage::TDriveLifeStage::REMOVED: - DrivesSerials.DeleteExistingEntry(serial); - break; - default: { - throw TExError() << "Drive not in {NOT_SEEN, REMOVED} lifestage and cannot be forgotten. Remove it first"; - break; - } + case NKikimrBlobStorage::TDriveLifeStage::REMOVED_FROM_BSC: + DrivesSerials.DeleteExistingEntry(serial); + break; + default: + throw TExError() << "Drive not in {REMOVED} lifestage and cannot be forgotten. Remove it first"; } } else { throw TExAlready() << "Drive is unknown for BS_CONTROLLER and cannot be forgotten"; diff --git a/ydb/core/mind/bscontroller/config.h b/ydb/core/mind/bscontroller/config.h index 01d8f4fcef6..0f7fc5749cc 100644 --- a/ydb/core/mind/bscontroller/config.h +++ b/ydb/core/mind/bscontroller/config.h @@ -105,7 +105,6 @@ namespace NKikimr { // static pdisk/vdisk states std::map<TVSlotId, TStaticVSlotInfo>& StaticVSlots; std::map<TPDiskId, TStaticPDiskInfo>& StaticPDisks; - const std::unordered_map<TString, TNodeId>& NodeIdByDiskSerialNumber; TCowHolder<Schema::State::SerialManagementStage::Type> SerialManagementStage; @@ -135,7 +134,6 @@ namespace NKikimr { , DefaultMaxSlots(controller.DefaultMaxSlots) , StaticVSlots(controller.StaticVSlots) , StaticPDisks(controller.StaticPDisks) - , NodeIdByDiskSerialNumber(controller.NodeIdByDiskSerialNumber) , SerialManagementStage(&controller.SerialManagementStage) , StoragePoolStat(*controller.StoragePoolStat) { diff --git a/ydb/core/mind/bscontroller/config_fit_pdisks.cpp b/ydb/core/mind/bscontroller/config_fit_pdisks.cpp index 6cd26c0030d..b6f09b35dd3 100644 --- a/ydb/core/mind/bscontroller/config_fit_pdisks.cpp +++ b/ydb/core/mind/bscontroller/config_fit_pdisks.cpp @@ -217,6 +217,10 @@ namespace NKikimr { return true; } + if (driveInfo.LifeStage != NKikimrBlobStorage::TDriveLifeStage::ADDED_TO_BSC) { + return true; + } + if (serial.Serial.empty()) { STLOG(PRI_ERROR, BS_CONTROLLER, BSCFP04, "Missing disks's serial number"); return true; diff --git a/ydb/core/mind/bscontroller/impl.h b/ydb/core/mind/bscontroller/impl.h index 17262774630..56f287f3ce9 100644 --- a/ydb/core/mind/bscontroller/impl.h +++ b/ydb/core/mind/bscontroller/impl.h @@ -848,7 +848,6 @@ public: }; - std::unordered_map<TString, TNodeId> NodeIdByDiskSerialNumber; TMap<ui32, TSet<ui32>> NodesAwaitingKeysForGroup; struct THostConfigInfo { @@ -1291,7 +1290,6 @@ public: TDriveSerialInfo(Table::BoxId::Type boxId) : BoxId(boxId) - , LifeStage(NKikimrBlobStorage::TDriveLifeStage::NOT_SEEN) {} template<typename T> @@ -1972,28 +1970,36 @@ public: counters[NBlobStorageController::COUNTER_PDISKS_WITHOUT_EXPECTED_SLOT_COUNT].Set(numWithoutSlotCount); counters[NBlobStorageController::COUNTER_PDISKS_WITHOUT_EXPECTED_SERIAL].Set(numWithoutSerial); - ui32 numNotSeen = 0; + ui32 numUnknown = 0; + ui32 numSeen = 0; + ui32 numAdded = 0; ui32 numRemoved = 0; ui32 numError = 0; for (const auto& [serial, driveInfo] : DrivesSerials) { switch (driveInfo->LifeStage) { - case NKikimrBlobStorage::TDriveLifeStage::NOT_SEEN: - ++numNotSeen; + case NKikimrBlobStorage::TDriveLifeStage::SEEN_ON_NODE: + ++numSeen; break; - case NKikimrBlobStorage::TDriveLifeStage::REMOVED: + case NKikimrBlobStorage::TDriveLifeStage::ADDED_TO_BSC: + ++numAdded; + break; + case NKikimrBlobStorage::TDriveLifeStage::REMOVED_FROM_BSC: ++numRemoved; break; case NKikimrBlobStorage::TDriveLifeStage::ERROR: ++numError; break; default: + ++numUnknown; break; } } - counters[NBlobStorageController::COUNTER_DRIVE_SERIAL_NOT_SEEN].Set(numNotSeen); - counters[NBlobStorageController::COUNTER_DRIVE_SERIAL_REMOVED].Set(numRemoved); + counters[NBlobStorageController::COUNTER_DRIVE_SERIAL_SEEN_ON_NODE].Set(numSeen); + counters[NBlobStorageController::COUNTER_DRIVE_SERIAL_ADDED_TO_BSC].Set(numAdded); + counters[NBlobStorageController::COUNTER_DRIVE_SERIAL_REMOVED_FROM_BSC].Set(numRemoved); counters[NBlobStorageController::COUNTER_DRIVE_SERIAL_ERROR].Set(numError); + counters[NBlobStorageController::COUNTER_DRIVE_SERIAL_UNKNOWN].Set(numUnknown); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/ydb/core/mind/bscontroller/register_node.cpp b/ydb/core/mind/bscontroller/register_node.cpp index 81d6c11820a..39202a7ed1e 100644 --- a/ydb/core/mind/bscontroller/register_node.cpp +++ b/ydb/core/mind/bscontroller/register_node.cpp @@ -85,18 +85,34 @@ class TBlobStorageController::TTxUpdateNodeDrives } } - TNodeInfo& nodeInfo = Self->GetNode(nodeId); + auto& nodeInfo = Self->GetNode(nodeId); Self->EraseKnownDrivesOnDisconnected(&nodeInfo); - // Update NodeIdByDiskSerialNumber and KnownDrives + // Update DrivesSerials and KnownDrives for (const auto& data : Record.GetDrivesData()) { const auto& serial = data.GetSerialNumber(); - if (auto it = Self->NodeIdByDiskSerialNumber.find(serial); it != Self->NodeIdByDiskSerialNumber.end() && it->second != nodeId) { - STLOG(PRI_ERROR, BS_CONTROLLER, BSCTXRN03, + + auto it = Self->DrivesSerials.find(serial); + if (it == Self->DrivesSerials.end()) { + auto newInfo = MakeHolder<TDriveSerialInfo>(); + newInfo->LifeStage = NKikimrBlobStorage::TDriveLifeStage::SEEN_ON_NODE; + newInfo->NodeId = nodeId; + newInfo->Path = data.GetPath(); + Self->DrivesSerials.emplace(serial, std::move(newInfo)); + } else if (it->second->LifeStage == NKikimrBlobStorage::TDriveLifeStage::ADDED_TO_BSC) { + if (it->second->NodeId != nodeId) { + STLOG(PRI_ERROR, BS_CONTROLLER, BSCTXRN03, "Received drive from NewNodeId, but drive is reported as placed in OldNodeId", - (NewNodeId, nodeId), (OldNodeId, it->second), (Serial, serial)); - } else { - Self->NodeIdByDiskSerialNumber[serial] = nodeId; + (NewNodeId, nodeId), (OldNodeId, it->second->NodeId), (Serial, serial)); + } + if (it->second->Path != data.GetPath()) { + STLOG(PRI_ERROR, BS_CONTROLLER, BSCTXRN04, + "Received drive by NewPath, but drive is reported as placed by OldPath", + (NewPath, data.GetPath()), (OldPath, it->second->Path), (Serial, serial)); + } + } else if (it->second->LifeStage == NKikimrBlobStorage::TDriveLifeStage::SEEN_ON_NODE) { + it->second->NodeId = nodeId; + it->second->Path = data.GetPath(); } NPDisk::TDriveData driveData; DriveDataToDriveData(data, driveData); @@ -491,9 +507,6 @@ void TBlobStorageController::OnWardenDisconnected(TNodeId nodeId) { } void TBlobStorageController::EraseKnownDrivesOnDisconnected(TNodeInfo *nodeInfo) { - for (const auto& [serial, driveData] : nodeInfo->KnownDrives) { - NodeIdByDiskSerialNumber.erase(serial); - } nodeInfo->KnownDrives.clear(); } diff --git a/ydb/core/protos/blobstorage_config.proto b/ydb/core/protos/blobstorage_config.proto index b0316a0f29c..83225a48b14 100644 --- a/ydb/core/protos/blobstorage_config.proto +++ b/ydb/core/protos/blobstorage_config.proto @@ -220,9 +220,9 @@ message TGroupStatus { message TDriveLifeStage { enum E { UNKNOWN = 0; // life stage is unknown (default) - NOT_SEEN = 1; // info about drive is located in BSC db, but drive is not seen in any node - ADDED = 2; // PDisk has been added to the DrivesSerials table - REMOVED = 3; // PDisk has been removed from the DrivesSerials table + SEEN_ON_NODE = 1; // disk drive has been seen on a node, but is not added to BSC yet + ADDED_TO_BSC = 2; // disk drive has been added to BSC + REMOVED_FROM_BSC = 3; // disk drive has been removed from BSC ERROR = 4; // drive was moved between nodes with allocated VDisks } } diff --git a/ydb/core/protos/counters_bs_controller.proto b/ydb/core/protos/counters_bs_controller.proto index 3979f33442e..0b1186f2ecc 100644 --- a/ydb/core/protos/counters_bs_controller.proto +++ b/ydb/core/protos/counters_bs_controller.proto @@ -17,8 +17,8 @@ enum ESimpleCounters { COUNTER_SLOTS_ON_TO_BE_REMOVED_DISKS = 7 [(CounterOpts) = {Name: "SlotsOnToBeRemovedDisks"}]; COUNTER_BYTES_ON_TO_BE_REMOVED_DISKS = 8 [(CounterOpts) = {Name: "BytesOnToBeRemovedDisks"}]; COUNTER_PDISKS_WITHOUT_EXPECTED_SERIAL = 9 [(CounterOpts) = {Name: "PDisksWithoutExpectedSerial"}]; - COUNTER_DRIVE_SERIAL_NOT_SEEN = 10 [(CounterOpts) = {Name: "DriveSerialNotSeen"}]; - COUNTER_DRIVE_SERIAL_REMOVED = 11 [(CounterOpts) = {Name: "DriveSerialRemoved"}]; + COUNTER_DRIVE_SERIAL_SEEN_ON_NODE = 10 [(CounterOpts) = {Name: "DriveSerialSeenOnNode"}]; + COUNTER_DRIVE_SERIAL_REMOVED_FROM_BSC = 11 [(CounterOpts) = {Name: "DriveSerialRemovedFromBsc"}]; COUNTER_DRIVE_SERIAL_ERROR = 12 [(CounterOpts) = {Name: "DriveSerialError"}]; COUNTER_DISK_SCRUB_WAITING_FOR_START = 13 [(CounterOpts) = {Name: "DiskScrubWaitingForStart"}]; COUNTER_DISK_SCRUB_RUNNING = 14 [(CounterOpts) = {Name: "DiskScrubRunning"}]; @@ -28,6 +28,8 @@ enum ESimpleCounters { COUNTER_DISK_SCRUB_CUR_DISKS = 18 [(CounterOpts) = {Name: "CurrentlyScrubbedDisks"}]; COUNTER_DISK_SCRUB_CUR_GROUPS = 19 [(CounterOpts) = {Name: "CurrentlyScrubbedGroups"}]; COUNTER_SELF_HEAL_UNREASSIGNABLE_GROUPS = 20 [(CounterOpts) = {Name: "SelfHealUnreassignableGroups"}]; + COUNTER_DRIVE_SERIAL_UNKNOWN = 21 [(CounterOpts) = {Name: "DriveSerialUnknown"}]; + COUNTER_DRIVE_SERIAL_ADDED_TO_BSC = 22 [(CounterOpts) = {Name: "DriveSerialAddedToBsc"}]; } enum ECumulativeCounters { |