aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Rutkovsky <alexander.rutkovsky@gmail.com>2022-04-28 14:47:16 +0300
committerAlexander Rutkovsky <alexander.rutkovsky@gmail.com>2022-04-28 14:47:16 +0300
commit27092a58a53008cd50c571b459d95d1631d55a4e (patch)
tree142e8b35d4e3104003a9e55d2dc48acb3ca56fea
parented1b327b748bfc62f10eb935200de8a2087762f5 (diff)
downloadydb-27092a58a53008cd50c571b459d95d1631d55a4e.tar.gz
Report availability timestamps for nodes and PDisks KIKIMR-14794
ref:1218d244284fd7fd9a9dbc7ef9d07cda0e53f466
-rw-r--r--ydb/core/mind/bscontroller/cmds_storage_pool.cpp8
-rw-r--r--ydb/core/mind/bscontroller/disk_metrics.cpp10
-rw-r--r--ydb/core/mind/bscontroller/impl.h14
-rw-r--r--ydb/core/mind/bscontroller/register_node.cpp32
-rw-r--r--ydb/core/mind/bscontroller/scheme.h5
-rw-r--r--ydb/core/protos/blobstorage_config.proto3
-rw-r--r--ydb/core/protos/blobstorage_disk.proto2
-rw-r--r--ydb/core/protos/counters_bs_controller.proto1
-rw-r--r--ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_bs_controller_/flat_bs_controller.schema14
9 files changed, 76 insertions, 13 deletions
diff --git a/ydb/core/mind/bscontroller/cmds_storage_pool.cpp b/ydb/core/mind/bscontroller/cmds_storage_pool.cpp
index 0ca115ec3ee..5ea91f6c57e 100644
--- a/ydb/core/mind/bscontroller/cmds_storage_pool.cpp
+++ b/ydb/core/mind/bscontroller/cmds_storage_pool.cpp
@@ -504,6 +504,7 @@ namespace NKikimr::NBsController {
}
}
+ const TInstant now = TActivationContext::Now();
TMap<TNodeId, NKikimrBlobStorage::TBaseConfig::TNode> nodes;
for (const auto& [hostId, record] : *HostRecords) {
TStringStream s;
@@ -517,6 +518,13 @@ namespace NKikimr::NBsController {
node.SetNodeId(record.NodeId);
node.SetPhysicalLocation(s.Str());
record.Location.Serialize(node.MutableLocation(), false); // this field has been introduced recently, so it doesn't have compatibility format
+ const auto& nodes = Nodes.Get();
+ if (const auto it = nodes.find(record.NodeId); it != nodes.end()) {
+ node.SetLastConnectTimestamp(it->second.LastConnectTimestamp.GetValue());
+ node.SetLastDisconnectTimestamp(it->second.LastDisconnectTimestamp.GetValue());
+ node.SetLastSeenTimestamp(it->second.LastConnectTimestamp <= it->second.LastDisconnectTimestamp ?
+ it->second.LastDisconnectTimestamp.GetValue() : now.GetValue());
+ }
auto *key = node.MutableHostKey();
key->SetFqdn(std::get<0>(hostId));
key->SetIcPort(std::get<1>(hostId));
diff --git a/ydb/core/mind/bscontroller/disk_metrics.cpp b/ydb/core/mind/bscontroller/disk_metrics.cpp
index 3d045243fe4..e97e6ddce98 100644
--- a/ydb/core/mind/bscontroller/disk_metrics.cpp
+++ b/ydb/core/mind/bscontroller/disk_metrics.cpp
@@ -1,7 +1,6 @@
#include "impl.h"
-namespace NKikimr {
-namespace NBsController {
+namespace NKikimr::NBsController {
class TBlobStorageController::TTxUpdateDiskMetrics : public TTransactionBase<TBlobStorageController> {
public:
@@ -47,6 +46,7 @@ void TBlobStorageController::Handle(TEvBlobStorage::TEvControllerUpdateDiskStatu
TabletCounters->Cumulative()[NBlobStorageController::COUNTER_UPDATE_DISK_METRICS_COUNT].Increment(1);
TRequestCounter counter(TabletCounters, NBlobStorageController::COUNTER_UPDATE_DISK_METRICS_USEC);
+ const TInstant now = TActivationContext::Now();
auto& record = ev->Get()->Record;
STLOG(PRI_DEBUG, BS_CONTROLLER, BSCTXUDM01, "Updating disk status", (Record, record));
@@ -83,7 +83,7 @@ void TBlobStorageController::Handle(TEvBlobStorage::TEvControllerUpdateDiskStatu
for (const auto& m : record.GetPDisksMetrics()) {
const TPDiskId pdiskId(ev->Sender.NodeId(), m.GetPDiskId());
if (auto *pdisk = FindPDisk(pdiskId)) {
- if (pdisk->UpdatePDiskMetrics(m)) {
+ if (pdisk->UpdatePDiskMetrics(m, now)) {
const auto first = std::make_pair(pdiskId, TList<TSelectGroupsQueueItem>::iterator());
for (auto it = PDiskToQueue.lower_bound(first); it != PDiskToQueue.end() && it->first == pdiskId; ++it) {
queues.insert(it->second);
@@ -104,12 +104,10 @@ void TBlobStorageController::Handle(TEvBlobStorage::TEvControllerUpdateDiskStatu
ProcessVDiskStatus(record.GetVDiskStatus());
// commit into database if enough time has passed
- const TInstant now = TActivationContext::Now();
if (now - LastMetricsCommit >= TDuration::Seconds(15)) {
Execute(new TTxUpdateDiskMetrics(this));
LastMetricsCommit = now;
}
}
-}
-}
+} // NKikimr::NBsController
diff --git a/ydb/core/mind/bscontroller/impl.h b/ydb/core/mind/bscontroller/impl.h
index e69265827bb..5991397bd0a 100644
--- a/ydb/core/mind/bscontroller/impl.h
+++ b/ydb/core/mind/bscontroller/impl.h
@@ -71,6 +71,7 @@ public:
class TTxScrubQuantumFinished;
class TTxUpdateLastSeenReady;
class TTxUpdateNodeDrives;
+ class TTxUpdateNodeDisconnectTimestamp;
class TVSlotInfo;
class TPDiskInfo;
@@ -394,9 +395,10 @@ public:
&& Metrics.HasMaxWriteThroughput();
}
- bool UpdatePDiskMetrics(const NKikimrBlobStorage::TPDiskMetrics& pDiskMetrics) {
+ bool UpdatePDiskMetrics(const NKikimrBlobStorage::TPDiskMetrics& pDiskMetrics, TInstant now) {
const bool hadMetrics = HasFullMetrics();
Metrics.CopyFrom(pDiskMetrics);
+ Metrics.SetUpdateTimestamp(now.GetValue());
MetricsDirty = true;
return !hadMetrics && HasFullMetrics(); // true if metrics have just arrived
}
@@ -755,15 +757,21 @@ public:
ui32 ConnectedCount = 0;
Table::NextPDiskID::Type NextPDiskID;
+ TInstant LastConnectTimestamp;
+ TInstant LastDisconnectTimestamp;
// in-mem only
std::map<TString, NPDisk::TDriveData> KnownDrives;
template<typename T>
static void Apply(TBlobStorageController* /*controller*/, T&& callback) {
static TTableAdapter<Table, TNodeInfo,
- Table::NextPDiskID
+ Table::NextPDiskID,
+ Table::LastConnectTimestamp,
+ Table::LastDisconnectTimestamp
> adapter(
- &TNodeInfo::NextPDiskID
+ &TNodeInfo::NextPDiskID,
+ &TNodeInfo::LastConnectTimestamp,
+ &TNodeInfo::LastDisconnectTimestamp
);
callback(&adapter);
}
diff --git a/ydb/core/mind/bscontroller/register_node.cpp b/ydb/core/mind/bscontroller/register_node.cpp
index 2a88fec38e2..1e2959cb888 100644
--- a/ydb/core/mind/bscontroller/register_node.cpp
+++ b/ydb/core/mind/bscontroller/register_node.cpp
@@ -203,7 +203,7 @@ public:
TTxType GetTxType() const override { return NBlobStorageController::TXTYPE_REGISTER_NODE; }
- bool Execute(TTransactionContext& /*txc*/, const TActorContext&) override {
+ bool Execute(TTransactionContext& txc, const TActorContext&) override {
Self->TabletCounters->Cumulative()[NBlobStorageController::COUNTER_REGISTER_NODE_COUNT].Increment(1);
TRequestCounter counter(Self->TabletCounters, NBlobStorageController::COUNTER_REGISTER_NODE_USEC);
@@ -292,6 +292,9 @@ public:
res->Record.SetAvailDomain(AppData()->DomainsInfo->GetDomainUidByTabletId(Self->TabletID()));
Response = std::make_unique<IEventHandle>(request->Sender, Self->SelfId(), res.release(), 0, request->Cookie);
+ NIceDb::TNiceDb db(txc.DB);
+ auto& node = Self->GetNode(nodeId);
+ db.Table<Schema::Node>().Key(nodeId).Update<Schema::Node::LastConnectTimestamp>(node.LastConnectTimestamp);
return true;
}
@@ -302,6 +305,29 @@ public:
}
};
+class TBlobStorageController::TTxUpdateNodeDisconnectTimestamp
+ : public TTransactionBase<TBlobStorageController>
+{
+ TNodeId NodeId;
+
+public:
+ TTxUpdateNodeDisconnectTimestamp(TNodeId nodeId, TBlobStorageController *controller)
+ : TTransactionBase(controller)
+ , NodeId(nodeId)
+ {}
+
+ TTxType GetTxType() const override { return NBlobStorageController::TXTYPE_UPDATE_NODE_DISCONNECT_TIMESTAMP; }
+
+ bool Execute(TTransactionContext& txc, const TActorContext&) override {
+ NIceDb::TNiceDb db(txc.DB);
+ auto& node = Self->GetNode(NodeId);
+ db.Table<Schema::Node>().Key(NodeId).Update<Schema::Node::LastDisconnectTimestamp>(node.LastDisconnectTimestamp);
+ return true;
+ }
+
+ void Complete(const TActorContext&) override {}
+};
+
void TBlobStorageController::ReadGroups(TSet<ui32>& groupIDsToRead, bool discard,
TEvBlobStorage::TEvControllerNodeServiceSetUpdate *result) {
for (auto it = groupIDsToRead.begin(); it != groupIDsToRead.end(); ) {
@@ -429,6 +455,8 @@ void TBlobStorageController::OnWardenConnected(TNodeId nodeId) {
it->second->UpdateOperational(true);
SysViewChangedPDisks.insert(it->first);
}
+
+ node.LastConnectTimestamp = TInstant::Now();
}
void TBlobStorageController::OnWardenDisconnected(TNodeId nodeId) {
@@ -469,6 +497,8 @@ void TBlobStorageController::OnWardenDisconnected(TNodeId nodeId) {
if (!lastSeenReadyQ.empty()) {
Execute(CreateTxUpdateLastSeenReady(std::move(lastSeenReadyQ)));
}
+ node.LastDisconnectTimestamp = now;
+ Execute(new TTxUpdateNodeDisconnectTimestamp(nodeId, this));
}
void TBlobStorageController::EraseKnownDrivesOnDisconnected(TNodeInfo *nodeInfo) {
diff --git a/ydb/core/mind/bscontroller/scheme.h b/ydb/core/mind/bscontroller/scheme.h
index a799ee29ba1..c422dcc89e0 100644
--- a/ydb/core/mind/bscontroller/scheme.h
+++ b/ydb/core/mind/bscontroller/scheme.h
@@ -15,10 +15,11 @@ struct Schema : NIceDb::Schema {
struct Node : Table<2> {
struct ID : Column<1, NScheme::NTypeIds::Uint32> {};
struct NextPDiskID : Column<2, NScheme::NTypeIds::Uint32> {};
- struct NextGroupKeyNonce : Column<9, NScheme::NTypeIds::Uint64> { static constexpr Type Default = 0; };
+ struct LastConnectTimestamp : Column<10, NScheme::NTypeIds::Uint64> { using Type = TInstant; static constexpr Type Default = TInstant::Zero(); };
+ struct LastDisconnectTimestamp : Column<11, NScheme::NTypeIds::Uint64> { using Type = TInstant; static constexpr Type Default = TInstant::Zero(); };
using TKey = TableKey<ID>;
- using TColumns = TableColumns<ID, NextPDiskID>;
+ using TColumns = TableColumns<ID, NextPDiskID, LastConnectTimestamp, LastDisconnectTimestamp>;
};
struct PDisk : Table<3> {
diff --git a/ydb/core/protos/blobstorage_config.proto b/ydb/core/protos/blobstorage_config.proto
index 503496c1303..cc94aac87ab 100644
--- a/ydb/core/protos/blobstorage_config.proto
+++ b/ydb/core/protos/blobstorage_config.proto
@@ -574,6 +574,9 @@ message TBaseConfig {
bytes PhysicalLocation = 2 [deprecated=true];
THostKey HostKey = 3;
NActorsInterconnect.TNodeLocation Location = 4;
+ uint64 LastConnectTimestamp = 5; // TInstant
+ uint64 LastDisconnectTimestamp = 6; // TInstant
+ uint64 LastSeenTimestamp = 7; // when seen working for the last time
}
repeated TPDisk PDisk = 1;
diff --git a/ydb/core/protos/blobstorage_disk.proto b/ydb/core/protos/blobstorage_disk.proto
index 6b3c4b1ed81..42d82927eb1 100644
--- a/ydb/core/protos/blobstorage_disk.proto
+++ b/ydb/core/protos/blobstorage_disk.proto
@@ -73,4 +73,6 @@ message TPDiskMetrics {
optional uint64 EnforcedDynamicSlotSize = 9;
optional TPDiskState.E State = 10;
+
+ optional uint64 UpdateTimestamp = 11; // TInstant::GetValue()
}
diff --git a/ydb/core/protos/counters_bs_controller.proto b/ydb/core/protos/counters_bs_controller.proto
index 496d60e0b23..668f4cc52f8 100644
--- a/ydb/core/protos/counters_bs_controller.proto
+++ b/ydb/core/protos/counters_bs_controller.proto
@@ -213,4 +213,5 @@ enum ETxTypes {
TXTYPE_UPDATE_LAST_SEEN_READY = 21 [(TxTypeOpts) = {Name: "TTxUpdateLastSeenReady"}];
TXTYPE_UPDATE_NODE_DRIVES = 22 [(TxTypeOpts) = {Name: "TTxUpdateNodeDrives"}];
TXTYPE_MON_EVENT_OPERATION_LOG_ENTRY = 23 [(TxTypeOpts) = {Name: "TTxMonEvent_OperationLogEntry"}];
+ TXTYPE_UPDATE_NODE_DISCONNECT_TIMESTAMP = 24 [(TxTypeOpts) = {Name: "TTxUpdateNodeDisconnectTimestamp"}];
}
diff --git a/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_bs_controller_/flat_bs_controller.schema b/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_bs_controller_/flat_bs_controller.schema
index 7e95548fba3..3cb844ae5da 100644
--- a/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_bs_controller_/flat_bs_controller.schema
+++ b/ydb/tests/functional/scheme_tests/canondata/tablet_scheme_tests.TestTabletSchemes.test_tablet_schemes_flat_bs_controller_/flat_bs_controller.schema
@@ -146,6 +146,16 @@
"ColumnId": 2,
"ColumnName": "NextPDiskID",
"ColumnType": "Uint32"
+ },
+ {
+ "ColumnId": 10,
+ "ColumnName": "LastConnectTimestamp",
+ "ColumnType": "Uint64"
+ },
+ {
+ "ColumnId": 11,
+ "ColumnName": "LastDisconnectTimestamp",
+ "ColumnType": "Uint64"
}
],
"ColumnsDropped": [],
@@ -153,7 +163,9 @@
"0": {
"Columns": [
1,
- 2
+ 2,
+ 10,
+ 11
],
"RoomID": 0,
"Codec": 0,