diff options
author | Alexander Rutkovsky <alexander.rutkovsky@gmail.com> | 2022-04-28 14:47:16 +0300 |
---|---|---|
committer | Alexander Rutkovsky <alexander.rutkovsky@gmail.com> | 2022-04-28 14:47:16 +0300 |
commit | 27092a58a53008cd50c571b459d95d1631d55a4e (patch) | |
tree | 142e8b35d4e3104003a9e55d2dc48acb3ca56fea | |
parent | ed1b327b748bfc62f10eb935200de8a2087762f5 (diff) | |
download | ydb-27092a58a53008cd50c571b459d95d1631d55a4e.tar.gz |
Report availability timestamps for nodes and PDisks KIKIMR-14794
ref:1218d244284fd7fd9a9dbc7ef9d07cda0e53f466
9 files changed, 76 insertions, 13 deletions
diff --git a/ydb/core/mind/bscontroller/cmds_storage_pool.cpp b/ydb/core/mind/bscontroller/cmds_storage_pool.cpp index 0ca115ec3ee..5ea91f6c57e 100644 --- a/ydb/core/mind/bscontroller/cmds_storage_pool.cpp +++ b/ydb/core/mind/bscontroller/cmds_storage_pool.cpp @@ -504,6 +504,7 @@ namespace NKikimr::NBsController { } } + const TInstant now = TActivationContext::Now(); TMap<TNodeId, NKikimrBlobStorage::TBaseConfig::TNode> nodes; for (const auto& [hostId, record] : *HostRecords) { TStringStream s; @@ -517,6 +518,13 @@ namespace NKikimr::NBsController { node.SetNodeId(record.NodeId); node.SetPhysicalLocation(s.Str()); record.Location.Serialize(node.MutableLocation(), false); // this field has been introduced recently, so it doesn't have compatibility format + const auto& nodes = Nodes.Get(); + if (const auto it = nodes.find(record.NodeId); it != nodes.end()) { + node.SetLastConnectTimestamp(it->second.LastConnectTimestamp.GetValue()); + node.SetLastDisconnectTimestamp(it->second.LastDisconnectTimestamp.GetValue()); + node.SetLastSeenTimestamp(it->second.LastConnectTimestamp <= it->second.LastDisconnectTimestamp ? + it->second.LastDisconnectTimestamp.GetValue() : now.GetValue()); + } auto *key = node.MutableHostKey(); key->SetFqdn(std::get<0>(hostId)); key->SetIcPort(std::get<1>(hostId)); diff --git a/ydb/core/mind/bscontroller/disk_metrics.cpp b/ydb/core/mind/bscontroller/disk_metrics.cpp index 3d045243fe4..e97e6ddce98 100644 --- a/ydb/core/mind/bscontroller/disk_metrics.cpp +++ b/ydb/core/mind/bscontroller/disk_metrics.cpp @@ -1,7 +1,6 @@ #include "impl.h" -namespace NKikimr { -namespace NBsController { +namespace NKikimr::NBsController { class TBlobStorageController::TTxUpdateDiskMetrics : public TTransactionBase<TBlobStorageController> { public: @@ -47,6 +46,7 @@ void TBlobStorageController::Handle(TEvBlobStorage::TEvControllerUpdateDiskStatu TabletCounters->Cumulative()[NBlobStorageController::COUNTER_UPDATE_DISK_METRICS_COUNT].Increment(1); TRequestCounter counter(TabletCounters, NBlobStorageController::COUNTER_UPDATE_DISK_METRICS_USEC); + const TInstant now = TActivationContext::Now(); auto& record = ev->Get()->Record; STLOG(PRI_DEBUG, BS_CONTROLLER, BSCTXUDM01, "Updating disk status", (Record, record)); @@ -83,7 +83,7 @@ void TBlobStorageController::Handle(TEvBlobStorage::TEvControllerUpdateDiskStatu for (const auto& m : record.GetPDisksMetrics()) { const TPDiskId pdiskId(ev->Sender.NodeId(), m.GetPDiskId()); if (auto *pdisk = FindPDisk(pdiskId)) { - if (pdisk->UpdatePDiskMetrics(m)) { + if (pdisk->UpdatePDiskMetrics(m, now)) { const auto first = std::make_pair(pdiskId, TList<TSelectGroupsQueueItem>::iterator()); for (auto it = PDiskToQueue.lower_bound(first); it != PDiskToQueue.end() && it->first == pdiskId; ++it) { queues.insert(it->second); @@ -104,12 +104,10 @@ void TBlobStorageController::Handle(TEvBlobStorage::TEvControllerUpdateDiskStatu ProcessVDiskStatus(record.GetVDiskStatus()); // commit into database if enough time has passed - const TInstant now = TActivationContext::Now(); if (now - LastMetricsCommit >= TDuration::Seconds(15)) { Execute(new TTxUpdateDiskMetrics(this)); LastMetricsCommit = now; } } -} -} +} // NKikimr::NBsController diff --git a/ydb/core/mind/bscontroller/impl.h b/ydb/core/mind/bscontroller/impl.h index e69265827bb..5991397bd0a 100644 --- a/ydb/core/mind/bscontroller/impl.h +++ b/ydb/core/mind/bscontroller/impl.h @@ -71,6 +71,7 @@ public: class TTxScrubQuantumFinished; class TTxUpdateLastSeenReady; class TTxUpdateNodeDrives; + class TTxUpdateNodeDisconnectTimestamp; class TVSlotInfo; class TPDiskInfo; @@ -394,9 +395,10 @@ public: && Metrics.HasMaxWriteThroughput(); } - bool UpdatePDiskMetrics(const NKikimrBlobStorage::TPDiskMetrics& pDiskMetrics) { + bool UpdatePDiskMetrics(const NKikimrBlobStorage::TPDiskMetrics& pDiskMetrics, TInstant now) { const bool hadMetrics = HasFullMetrics(); Metrics.CopyFrom(pDiskMetrics); + Metrics.SetUpdateTimestamp(now.GetValue()); MetricsDirty = true; return !hadMetrics && HasFullMetrics(); // true if metrics have just arrived } @@ -755,15 +757,21 @@ public: ui32 ConnectedCount = 0; Table::NextPDiskID::Type NextPDiskID; + TInstant LastConnectTimestamp; + TInstant LastDisconnectTimestamp; // in-mem only std::map<TString, NPDisk::TDriveData> KnownDrives; template<typename T> static void Apply(TBlobStorageController* /*controller*/, T&& callback) { static TTableAdapter<Table, TNodeInfo, - Table::NextPDiskID + Table::NextPDiskID, + Table::LastConnectTimestamp, + Table::LastDisconnectTimestamp > adapter( - &TNodeInfo::NextPDiskID + &TNodeInfo::NextPDiskID, + &TNodeInfo::LastConnectTimestamp, + &TNodeInfo::LastDisconnectTimestamp ); callback(&adapter); } diff --git a/ydb/core/mind/bscontroller/register_node.cpp b/ydb/core/mind/bscontroller/register_node.cpp index 2a88fec38e2..1e2959cb888 100644 --- a/ydb/core/mind/bscontroller/register_node.cpp +++ b/ydb/core/mind/bscontroller/register_node.cpp @@ -203,7 +203,7 @@ public: TTxType GetTxType() const override { return NBlobStorageController::TXTYPE_REGISTER_NODE; } - bool Execute(TTransactionContext& /*txc*/, const TActorContext&) override { + bool Execute(TTransactionContext& txc, const TActorContext&) override { Self->TabletCounters->Cumulative()[NBlobStorageController::COUNTER_REGISTER_NODE_COUNT].Increment(1); TRequestCounter counter(Self->TabletCounters, NBlobStorageController::COUNTER_REGISTER_NODE_USEC); @@ -292,6 +292,9 @@ public: res->Record.SetAvailDomain(AppData()->DomainsInfo->GetDomainUidByTabletId(Self->TabletID())); Response = std::make_unique<IEventHandle>(request->Sender, Self->SelfId(), res.release(), 0, request->Cookie); + NIceDb::TNiceDb db(txc.DB); + auto& node = Self->GetNode(nodeId); + db.Table<Schema::Node>().Key(nodeId).Update<Schema::Node::LastConnectTimestamp>(node.LastConnectTimestamp); return true; } @@ -302,6 +305,29 @@ public: } }; +class TBlobStorageController::TTxUpdateNodeDisconnectTimestamp + : public TTransactionBase<TBlobStorageController> +{ + TNodeId NodeId; + +public: + TTxUpdateNodeDisconnectTimestamp(TNodeId nodeId, TBlobStorageController *controller) + : TTransactionBase(controller) + , NodeId(nodeId) + {} + + TTxType GetTxType() const override { return NBlobStorageController::TXTYPE_UPDATE_NODE_DISCONNECT_TIMESTAMP; } + + bool Execute(TTransactionContext& txc, const TActorContext&) override { + NIceDb::TNiceDb db(txc.DB); + auto& node = Self->GetNode(NodeId); + db.Table<Schema::Node>().Key(NodeId).Update<Schema::Node::LastDisconnectTimestamp>(node.LastDisconnectTimestamp); + return true; + } + + void Complete(const TActorContext&) override {} +}; + void TBlobStorageController::ReadGroups(TSet<ui32>& groupIDsToRead, bool discard, TEvBlobStorage::TEvControllerNodeServiceSetUpdate *result) { for (auto it = groupIDsToRead.begin(); it != groupIDsToRead.end(); ) { @@ -429,6 +455,8 @@ void TBlobStorageController::OnWardenConnected(TNodeId nodeId) { it->second->UpdateOperational(true); SysViewChangedPDisks.insert(it->first); } + + node.LastConnectTimestamp = TInstant::Now(); } void TBlobStorageController::OnWardenDisconnected(TNodeId nodeId) { @@ -469,6 +497,8 @@ void TBlobStorageController::OnWardenDisconnected(TNodeId nodeId) { if (!lastSeenReadyQ.empty()) { Execute(CreateTxUpdateLastSeenReady(std::move(lastSeenReadyQ))); } + node.LastDisconnectTimestamp = now; + Execute(new TTxUpdateNodeDisconnectTimestamp(nodeId, this)); } void TBlobStorageController::EraseKnownDrivesOnDisconnected(TNodeInfo *nodeInfo) { diff --git a/ydb/core/mind/bscontroller/scheme.h b/ydb/core/mind/bscontroller/scheme.h index a799ee29ba1..c422dcc89e0 100644 --- a/ydb/core/mind/bscontroller/scheme.h +++ b/ydb/core/mind/bscontroller/scheme.h @@ -15,10 +15,11 @@ struct Schema : NIceDb::Schema { struct Node : Table<2> { struct ID : Column<1, NScheme::NTypeIds::Uint32> {}; struct NextPDiskID : Column<2, NScheme::NTypeIds::Uint32> {}; - struct NextGroupKeyNonce : Column<9, NScheme::NTypeIds::Uint64> { static constexpr Type Default = 0; }; + struct LastConnectTimestamp : Column<10, NScheme::NTypeIds::Uint64> { using Type = TInstant; static constexpr Type Default = TInstant::Zero(); }; + struct LastDisconnectTimestamp : Column<11, NScheme::NTypeIds::Uint64> { using Type = TInstant; static constexpr Type Default = TInstant::Zero(); }; using TKey = TableKey<ID>; - using TColumns = TableColumns<ID, NextPDiskID>; + using TColumns = TableColumns<ID, NextPDiskID, LastConnectTimestamp, LastDisconnectTimestamp>; }; struct PDisk : Table<3> { diff --git a/ydb/core/protos/blobstorage_config.proto b/ydb/core/protos/blobstorage_config.proto index 503496c1303..cc94aac87ab 100644 --- a/ydb/core/protos/blobstorage_config.proto +++ b/ydb/core/protos/blobstorage_config.proto @@ -574,6 +574,9 @@ message TBaseConfig { bytes PhysicalLocation = 2 [deprecated=true]; THostKey HostKey = 3; NActorsInterconnect.TNodeLocation Location = 4; + uint64 LastConnectTimestamp = 5; // TInstant + uint64 LastDisconnectTimestamp = 6; // TInstant + uint64 LastSeenTimestamp = 7; // when seen working for the last time } repeated TPDisk PDisk = 1; diff --git a/ydb/core/protos/blobstorage_disk.proto b/ydb/core/protos/blobstorage_disk.proto index 6b3c4b1ed81..42d82927eb1 100644 --- a/ydb/core/protos/blobstorage_disk.proto +++ b/ydb/core/protos/blobstorage_disk.proto @@ -73,4 +73,6 @@ message TPDiskMetrics { optional uint64 EnforcedDynamicSlotSize = 9; optional TPDiskState.E State = 10; + + optional uint64 UpdateTimestamp = 11; // TInstant::GetValue() } diff --git a/ydb/core/protos/counters_bs_controller.proto b/ydb/core/protos/counters_bs_controller.proto index 496d60e0b23..668f4cc52f8 100644 --- a/ydb/core/protos/counters_bs_controller.proto +++ b/ydb/core/protos/counters_bs_controller.proto @@ -213,4 +213,5 @@ enum ETxTypes { TXTYPE_UPDATE_LAST_SEEN_READY = 21 [(TxTypeOpts) = {Name: "TTxUpdateLastSeenReady"}]; TXTYPE_UPDATE_NODE_DRIVES = 22 [(TxTypeOpts) = {Name: "TTxUpdateNodeDrives"}]; TXTYPE_MON_EVENT_OPERATION_LOG_ENTRY = 23 [(TxTypeOpts) = {Name: "TTxMonEvent_OperationLogEntry"}]; + TXTYPE_UPDATE_NODE_DISCONNECT_TIMESTAMP = 24 [(TxTypeOpts) = {Name: "TTxUpdateNodeDisconnectTimestamp"}]; } diff --git a/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_bs_controller_/flat_bs_controller.schema b/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_bs_controller_/flat_bs_controller.schema index 7e95548fba3..3cb844ae5da 100644 --- a/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_bs_controller_/flat_bs_controller.schema +++ b/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_bs_controller_/flat_bs_controller.schema @@ -146,6 +146,16 @@ "ColumnId": 2, "ColumnName": "NextPDiskID", "ColumnType": "Uint32" + }, + { + "ColumnId": 10, + "ColumnName": "LastConnectTimestamp", + "ColumnType": "Uint64" + }, + { + "ColumnId": 11, + "ColumnName": "LastDisconnectTimestamp", + "ColumnType": "Uint64" } ], "ColumnsDropped": [], @@ -153,7 +163,9 @@ "0": { "Columns": [ 1, - 2 + 2, + 10, + 11 ], "RoomID": 0, "Codec": 0, |