aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryuryalekseev <yuryalekseev@yandex-team.com>2023-02-14 14:32:10 +0300
committeryuryalekseev <yuryalekseev@yandex-team.com>2023-02-14 14:32:10 +0300
commit4f395457741333ecbc606ef3ef91606181d7c2f2 (patch)
tree4c7863375fc8537a3d7ab0cee7c0beb859e6b9e9
parenta9759cfaa2afbd284f0744d8ebafbe844baa8b1e (diff)
downloadydb-4f395457741333ecbc606ef3ef91606181d7c2f2.tar.gz
Use DrivesSerials instead of NodeIdByDiskSerialNumber.
-rw-r--r--ydb/core/mind/bscontroller/cmds_drive_status.cpp44
-rw-r--r--ydb/core/mind/bscontroller/config.h2
-rw-r--r--ydb/core/mind/bscontroller/config_fit_pdisks.cpp4
-rw-r--r--ydb/core/mind/bscontroller/impl.h22
-rw-r--r--ydb/core/mind/bscontroller/register_node.cpp33
-rw-r--r--ydb/core/protos/blobstorage_config.proto6
-rw-r--r--ydb/core/protos/counters_bs_controller.proto6
7 files changed, 74 insertions, 43 deletions
diff --git a/ydb/core/mind/bscontroller/cmds_drive_status.cpp b/ydb/core/mind/bscontroller/cmds_drive_status.cpp
index bf9e65db797..0762acbc455 100644
--- a/ydb/core/mind/bscontroller/cmds_drive_status.cpp
+++ b/ydb/core/mind/bscontroller/cmds_drive_status.cpp
@@ -99,15 +99,23 @@ namespace NKikimr::NBsController {
auto boxId = cmd.GetBoxId();
auto driveInfo = DrivesSerials.Find(serial);
- if (driveInfo && driveInfo->LifeStage != NKikimrBlobStorage::TDriveLifeStage::REMOVED) {
+ if (!driveInfo) {
+ throw TExError() << "Couldn't get drive info for disk with serial number" << TErrorParams::DiskSerialNumber(serial);
+ }
+
+ switch (driveInfo->LifeStage) {
+ case NKikimrBlobStorage::TDriveLifeStage::SEEN_ON_NODE:
+ case NKikimrBlobStorage::TDriveLifeStage::REMOVED_FROM_BSC:
+ break;
+ default:
throw TExAlready() << "Device with such serial already exists in BSC database in lifeStage " << driveInfo->LifeStage;
}
- auto it = NodeIdByDiskSerialNumber.find(serial);
- if (it == NodeIdByDiskSerialNumber.end()) {
- throw TExError() << "Couldn't find node id for disk with serial number" << TErrorParams::DiskSerialNumber(serial);
+ if (!driveInfo->NodeId) {
+ throw TExError() << "Couldn't get node id for disk with serial number" << TErrorParams::DiskSerialNumber(serial);
}
- auto nodeId = it->second;
+
+ auto nodeId = driveInfo->NodeId.GetRef();
const auto& nodes = Nodes.Get();
auto nodeIt = nodes.find(nodeId);
@@ -121,7 +129,7 @@ namespace NKikimr::NBsController {
throw TExError() << "Couldn't find disk on node" << TErrorParams::NodeId(nodeId) << " by serial number" << TErrorParams::DiskSerialNumber(serial);
}
- // delete REMOVED entry, if any, but keep its GUID
+ // delete SEEN or REMOVED entry, if any, but keep its GUID
auto guid = driveInfo ? std::make_optional(driveInfo->Guid) : std::nullopt;
if (driveInfo) {
DrivesSerials.DeleteExistingEntry(serial);
@@ -142,7 +150,7 @@ namespace NKikimr::NBsController {
throw TExError() << "Couldn't serialize PDiskConfig for disk with serial number" << TErrorParams::DiskSerialNumber(serial);
}
driveInfoMutable->PDiskConfig = config;
- driveInfoMutable->LifeStage = NKikimrBlobStorage::TDriveLifeStage::ADDED;
+ driveInfoMutable->LifeStage = NKikimrBlobStorage::TDriveLifeStage::ADDED_TO_BSC;
driveInfoMutable->NodeId = nodeId;
driveInfoMutable->Path = driveIt->second.Path;
@@ -162,14 +170,18 @@ namespace NKikimr::NBsController {
throw TExError() << "Couldn't find disk with serial number" << TErrorParams::DiskSerialNumber(serial);
}
- if (driveInfo->LifeStage == NKikimrBlobStorage::TDriveLifeStage::REMOVED) {
+ if (driveInfo->LifeStage == NKikimrBlobStorage::TDriveLifeStage::SEEN_ON_NODE) {
+ throw TExError() << "Disk with serial number" << TErrorParams::DiskSerialNumber(serial) << " hasn't been added to BSC yet ";
+ }
+
+ if (driveInfo->LifeStage == NKikimrBlobStorage::TDriveLifeStage::REMOVED_FROM_BSC) {
throw TExError() << "Disk with serial number" << TErrorParams::DiskSerialNumber(serial) << " has already been removed";
}
auto driveInfoMutable = DrivesSerials.FindForUpdate(serial);
driveInfoMutable->NodeId.Clear();
driveInfoMutable->PDiskId.Clear();
- driveInfoMutable->LifeStage = NKikimrBlobStorage::TDriveLifeStage::REMOVED;
+ driveInfoMutable->LifeStage = NKikimrBlobStorage::TDriveLifeStage::REMOVED_FROM_BSC;
driveInfoMutable->Path.Clear();
Fit.Boxes.insert(driveInfo->BoxId);
@@ -184,15 +196,11 @@ namespace NKikimr::NBsController {
if (auto driveInfo = DrivesSerials.Find(serial)) {
switch (driveInfo->LifeStage) {
- case NKikimrBlobStorage::TDriveLifeStage::NOT_SEEN:
- [[fallthrough]];
- case NKikimrBlobStorage::TDriveLifeStage::REMOVED:
- DrivesSerials.DeleteExistingEntry(serial);
- break;
- default: {
- throw TExError() << "Drive not in {NOT_SEEN, REMOVED} lifestage and cannot be forgotten. Remove it first";
- break;
- }
+ case NKikimrBlobStorage::TDriveLifeStage::REMOVED_FROM_BSC:
+ DrivesSerials.DeleteExistingEntry(serial);
+ break;
+ default:
+ throw TExError() << "Drive not in {REMOVED} lifestage and cannot be forgotten. Remove it first";
}
} else {
throw TExAlready() << "Drive is unknown for BS_CONTROLLER and cannot be forgotten";
diff --git a/ydb/core/mind/bscontroller/config.h b/ydb/core/mind/bscontroller/config.h
index 01d8f4fcef6..0f7fc5749cc 100644
--- a/ydb/core/mind/bscontroller/config.h
+++ b/ydb/core/mind/bscontroller/config.h
@@ -105,7 +105,6 @@ namespace NKikimr {
// static pdisk/vdisk states
std::map<TVSlotId, TStaticVSlotInfo>& StaticVSlots;
std::map<TPDiskId, TStaticPDiskInfo>& StaticPDisks;
- const std::unordered_map<TString, TNodeId>& NodeIdByDiskSerialNumber;
TCowHolder<Schema::State::SerialManagementStage::Type> SerialManagementStage;
@@ -135,7 +134,6 @@ namespace NKikimr {
, DefaultMaxSlots(controller.DefaultMaxSlots)
, StaticVSlots(controller.StaticVSlots)
, StaticPDisks(controller.StaticPDisks)
- , NodeIdByDiskSerialNumber(controller.NodeIdByDiskSerialNumber)
, SerialManagementStage(&controller.SerialManagementStage)
, StoragePoolStat(*controller.StoragePoolStat)
{
diff --git a/ydb/core/mind/bscontroller/config_fit_pdisks.cpp b/ydb/core/mind/bscontroller/config_fit_pdisks.cpp
index 6cd26c0030d..b6f09b35dd3 100644
--- a/ydb/core/mind/bscontroller/config_fit_pdisks.cpp
+++ b/ydb/core/mind/bscontroller/config_fit_pdisks.cpp
@@ -217,6 +217,10 @@ namespace NKikimr {
return true;
}
+ if (driveInfo.LifeStage != NKikimrBlobStorage::TDriveLifeStage::ADDED_TO_BSC) {
+ return true;
+ }
+
if (serial.Serial.empty()) {
STLOG(PRI_ERROR, BS_CONTROLLER, BSCFP04, "Missing disks's serial number");
return true;
diff --git a/ydb/core/mind/bscontroller/impl.h b/ydb/core/mind/bscontroller/impl.h
index 17262774630..56f287f3ce9 100644
--- a/ydb/core/mind/bscontroller/impl.h
+++ b/ydb/core/mind/bscontroller/impl.h
@@ -848,7 +848,6 @@ public:
};
- std::unordered_map<TString, TNodeId> NodeIdByDiskSerialNumber;
TMap<ui32, TSet<ui32>> NodesAwaitingKeysForGroup;
struct THostConfigInfo {
@@ -1291,7 +1290,6 @@ public:
TDriveSerialInfo(Table::BoxId::Type boxId)
: BoxId(boxId)
- , LifeStage(NKikimrBlobStorage::TDriveLifeStage::NOT_SEEN)
{}
template<typename T>
@@ -1972,28 +1970,36 @@ public:
counters[NBlobStorageController::COUNTER_PDISKS_WITHOUT_EXPECTED_SLOT_COUNT].Set(numWithoutSlotCount);
counters[NBlobStorageController::COUNTER_PDISKS_WITHOUT_EXPECTED_SERIAL].Set(numWithoutSerial);
- ui32 numNotSeen = 0;
+ ui32 numUnknown = 0;
+ ui32 numSeen = 0;
+ ui32 numAdded = 0;
ui32 numRemoved = 0;
ui32 numError = 0;
for (const auto& [serial, driveInfo] : DrivesSerials) {
switch (driveInfo->LifeStage) {
- case NKikimrBlobStorage::TDriveLifeStage::NOT_SEEN:
- ++numNotSeen;
+ case NKikimrBlobStorage::TDriveLifeStage::SEEN_ON_NODE:
+ ++numSeen;
break;
- case NKikimrBlobStorage::TDriveLifeStage::REMOVED:
+ case NKikimrBlobStorage::TDriveLifeStage::ADDED_TO_BSC:
+ ++numAdded;
+ break;
+ case NKikimrBlobStorage::TDriveLifeStage::REMOVED_FROM_BSC:
++numRemoved;
break;
case NKikimrBlobStorage::TDriveLifeStage::ERROR:
++numError;
break;
default:
+ ++numUnknown;
break;
}
}
- counters[NBlobStorageController::COUNTER_DRIVE_SERIAL_NOT_SEEN].Set(numNotSeen);
- counters[NBlobStorageController::COUNTER_DRIVE_SERIAL_REMOVED].Set(numRemoved);
+ counters[NBlobStorageController::COUNTER_DRIVE_SERIAL_SEEN_ON_NODE].Set(numSeen);
+ counters[NBlobStorageController::COUNTER_DRIVE_SERIAL_ADDED_TO_BSC].Set(numAdded);
+ counters[NBlobStorageController::COUNTER_DRIVE_SERIAL_REMOVED_FROM_BSC].Set(numRemoved);
counters[NBlobStorageController::COUNTER_DRIVE_SERIAL_ERROR].Set(numError);
+ counters[NBlobStorageController::COUNTER_DRIVE_SERIAL_UNKNOWN].Set(numUnknown);
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/ydb/core/mind/bscontroller/register_node.cpp b/ydb/core/mind/bscontroller/register_node.cpp
index 81d6c11820a..39202a7ed1e 100644
--- a/ydb/core/mind/bscontroller/register_node.cpp
+++ b/ydb/core/mind/bscontroller/register_node.cpp
@@ -85,18 +85,34 @@ class TBlobStorageController::TTxUpdateNodeDrives
}
}
- TNodeInfo& nodeInfo = Self->GetNode(nodeId);
+ auto& nodeInfo = Self->GetNode(nodeId);
Self->EraseKnownDrivesOnDisconnected(&nodeInfo);
- // Update NodeIdByDiskSerialNumber and KnownDrives
+ // Update DrivesSerials and KnownDrives
for (const auto& data : Record.GetDrivesData()) {
const auto& serial = data.GetSerialNumber();
- if (auto it = Self->NodeIdByDiskSerialNumber.find(serial); it != Self->NodeIdByDiskSerialNumber.end() && it->second != nodeId) {
- STLOG(PRI_ERROR, BS_CONTROLLER, BSCTXRN03,
+
+ auto it = Self->DrivesSerials.find(serial);
+ if (it == Self->DrivesSerials.end()) {
+ auto newInfo = MakeHolder<TDriveSerialInfo>();
+ newInfo->LifeStage = NKikimrBlobStorage::TDriveLifeStage::SEEN_ON_NODE;
+ newInfo->NodeId = nodeId;
+ newInfo->Path = data.GetPath();
+ Self->DrivesSerials.emplace(serial, std::move(newInfo));
+ } else if (it->second->LifeStage == NKikimrBlobStorage::TDriveLifeStage::ADDED_TO_BSC) {
+ if (it->second->NodeId != nodeId) {
+ STLOG(PRI_ERROR, BS_CONTROLLER, BSCTXRN03,
"Received drive from NewNodeId, but drive is reported as placed in OldNodeId",
- (NewNodeId, nodeId), (OldNodeId, it->second), (Serial, serial));
- } else {
- Self->NodeIdByDiskSerialNumber[serial] = nodeId;
+ (NewNodeId, nodeId), (OldNodeId, it->second->NodeId), (Serial, serial));
+ }
+ if (it->second->Path != data.GetPath()) {
+ STLOG(PRI_ERROR, BS_CONTROLLER, BSCTXRN04,
+ "Received drive by NewPath, but drive is reported as placed by OldPath",
+ (NewPath, data.GetPath()), (OldPath, it->second->Path), (Serial, serial));
+ }
+ } else if (it->second->LifeStage == NKikimrBlobStorage::TDriveLifeStage::SEEN_ON_NODE) {
+ it->second->NodeId = nodeId;
+ it->second->Path = data.GetPath();
}
NPDisk::TDriveData driveData;
DriveDataToDriveData(data, driveData);
@@ -491,9 +507,6 @@ void TBlobStorageController::OnWardenDisconnected(TNodeId nodeId) {
}
void TBlobStorageController::EraseKnownDrivesOnDisconnected(TNodeInfo *nodeInfo) {
- for (const auto& [serial, driveData] : nodeInfo->KnownDrives) {
- NodeIdByDiskSerialNumber.erase(serial);
- }
nodeInfo->KnownDrives.clear();
}
diff --git a/ydb/core/protos/blobstorage_config.proto b/ydb/core/protos/blobstorage_config.proto
index b0316a0f29c..83225a48b14 100644
--- a/ydb/core/protos/blobstorage_config.proto
+++ b/ydb/core/protos/blobstorage_config.proto
@@ -220,9 +220,9 @@ message TGroupStatus {
message TDriveLifeStage {
enum E {
UNKNOWN = 0; // life stage is unknown (default)
- NOT_SEEN = 1; // info about drive is located in BSC db, but drive is not seen in any node
- ADDED = 2; // PDisk has been added to the DrivesSerials table
- REMOVED = 3; // PDisk has been removed from the DrivesSerials table
+ SEEN_ON_NODE = 1; // disk drive has been seen on a node, but is not added to BSC yet
+ ADDED_TO_BSC = 2; // disk drive has been added to BSC
+ REMOVED_FROM_BSC = 3; // disk drive has been removed from BSC
ERROR = 4; // drive was moved between nodes with allocated VDisks
}
}
diff --git a/ydb/core/protos/counters_bs_controller.proto b/ydb/core/protos/counters_bs_controller.proto
index 3979f33442e..0b1186f2ecc 100644
--- a/ydb/core/protos/counters_bs_controller.proto
+++ b/ydb/core/protos/counters_bs_controller.proto
@@ -17,8 +17,8 @@ enum ESimpleCounters {
COUNTER_SLOTS_ON_TO_BE_REMOVED_DISKS = 7 [(CounterOpts) = {Name: "SlotsOnToBeRemovedDisks"}];
COUNTER_BYTES_ON_TO_BE_REMOVED_DISKS = 8 [(CounterOpts) = {Name: "BytesOnToBeRemovedDisks"}];
COUNTER_PDISKS_WITHOUT_EXPECTED_SERIAL = 9 [(CounterOpts) = {Name: "PDisksWithoutExpectedSerial"}];
- COUNTER_DRIVE_SERIAL_NOT_SEEN = 10 [(CounterOpts) = {Name: "DriveSerialNotSeen"}];
- COUNTER_DRIVE_SERIAL_REMOVED = 11 [(CounterOpts) = {Name: "DriveSerialRemoved"}];
+ COUNTER_DRIVE_SERIAL_SEEN_ON_NODE = 10 [(CounterOpts) = {Name: "DriveSerialSeenOnNode"}];
+ COUNTER_DRIVE_SERIAL_REMOVED_FROM_BSC = 11 [(CounterOpts) = {Name: "DriveSerialRemovedFromBsc"}];
COUNTER_DRIVE_SERIAL_ERROR = 12 [(CounterOpts) = {Name: "DriveSerialError"}];
COUNTER_DISK_SCRUB_WAITING_FOR_START = 13 [(CounterOpts) = {Name: "DiskScrubWaitingForStart"}];
COUNTER_DISK_SCRUB_RUNNING = 14 [(CounterOpts) = {Name: "DiskScrubRunning"}];
@@ -28,6 +28,8 @@ enum ESimpleCounters {
COUNTER_DISK_SCRUB_CUR_DISKS = 18 [(CounterOpts) = {Name: "CurrentlyScrubbedDisks"}];
COUNTER_DISK_SCRUB_CUR_GROUPS = 19 [(CounterOpts) = {Name: "CurrentlyScrubbedGroups"}];
COUNTER_SELF_HEAL_UNREASSIGNABLE_GROUPS = 20 [(CounterOpts) = {Name: "SelfHealUnreassignableGroups"}];
+ COUNTER_DRIVE_SERIAL_UNKNOWN = 21 [(CounterOpts) = {Name: "DriveSerialUnknown"}];
+ COUNTER_DRIVE_SERIAL_ADDED_TO_BSC = 22 [(CounterOpts) = {Name: "DriveSerialAddedToBsc"}];
}
enum ECumulativeCounters {