aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorandrew-rykov <arykov@ydb.tech>2023-10-30 20:07:43 +0300
committerandrew-rykov <arykov@ydb.tech>2023-10-30 20:42:41 +0300
commit8ccc99f1093aa077b04d35edfed58e21a01f0dcd (patch)
tree690a5c47451f5dc79353770e510debc3bca2c9a9
parentf3fc65218eee3457eb087a9f6df33d5a797075c5 (diff)
downloadydb-8ccc99f1093aa077b04d35edfed58e21a01f0dcd.tar.gz
KIKIMR-19066 added hc optimal clock skew calculation
Interconnect в режиме реального времени отправляет данные о разнице во времени (ClockSkew) между собой и каждым подключенным пиром на whiteboard. На whiteboard эта информация собирается за период в 15 секунд, и из всех значений выбирается максимальное. Этот максимальный ClockSkew затем используется в ответах на запросы healthcheck-а в течение следующих 15 секунд.
-rw-r--r--library/cpp/actors/interconnect/interconnect_common.h27
-rw-r--r--library/cpp/actors/interconnect/interconnect_tcp_session.cpp6
-rw-r--r--ydb/core/driver_lib/run/kikimr_services_initializers.cpp17
-rw-r--r--ydb/core/health_check/health_check.cpp48
-rw-r--r--ydb/core/node_whiteboard/node_whiteboard.h10
-rw-r--r--ydb/core/protos/node_whiteboard.proto7
-rw-r--r--ydb/core/tablet/node_whiteboard.cpp22
7 files changed, 120 insertions, 17 deletions
diff --git a/library/cpp/actors/interconnect/interconnect_common.h b/library/cpp/actors/interconnect/interconnect_common.h
index d313038094..300153d8de 100644
--- a/library/cpp/actors/interconnect/interconnect_common.h
+++ b/library/cpp/actors/interconnect/interconnect_common.h
@@ -61,6 +61,30 @@ namespace NActors {
}
};
+ struct TWhiteboardSessionStatus {
+ TActorSystem* ActorSystem;
+ ui32 PeerId;
+ TString Peer;
+ bool Connected;
+ bool Green;
+ bool Yellow;
+ bool Orange;
+ bool Red;
+ i64 ClockSkew;
+
+ TWhiteboardSessionStatus(TActorSystem* actorSystem, ui32 peerId, const TString& peer, bool connected, bool green, bool yellow, bool orange, bool red, i64 clockSkew)
+ : ActorSystem(actorSystem)
+ , PeerId(peerId)
+ , Peer(peer)
+ , Connected(connected)
+ , Green(green)
+ , Yellow(yellow)
+ , Orange(orange)
+ , Red(red)
+ , ClockSkew(clockSkew)
+ {}
+ };
+
struct TChannelSettings {
ui16 Weight;
};
@@ -72,8 +96,7 @@ namespace NActors {
using TInitWhiteboardCallback = std::function<void(ui16 icPort, TActorSystem* actorSystem)>;
- using TUpdateWhiteboardCallback = std::function<void(const TString& peer, bool connected, bool green, bool yellow,
- bool orange, bool red, TActorSystem* actorSystem)>;
+ using TUpdateWhiteboardCallback = std::function<void(const TWhiteboardSessionStatus& data)>;
struct TInterconnectProxyCommon : TAtomicRefCount<TInterconnectProxyCommon> {
TActorId NameserviceId;
diff --git a/library/cpp/actors/interconnect/interconnect_tcp_session.cpp b/library/cpp/actors/interconnect/interconnect_tcp_session.cpp
index a7509fbc94..25c05898d2 100644
--- a/library/cpp/actors/interconnect/interconnect_tcp_session.cpp
+++ b/library/cpp/actors/interconnect/interconnect_tcp_session.cpp
@@ -994,13 +994,15 @@ namespace NActors {
} while (false);
}
- callback(Proxy->Metrics->GetHumanFriendlyPeerHostName(),
+ callback({TlsActivationContext->ExecutorThread.ActorSystem,
+ Proxy->PeerNodeId,
+ Proxy->Metrics->GetHumanFriendlyPeerHostName(),
connected,
flagState == EFlag::GREEN,
flagState == EFlag::YELLOW,
flagState == EFlag::ORANGE,
flagState == EFlag::RED,
- TlsActivationContext->ExecutorThread.ActorSystem);
+ ReceiveContext->ClockSkew_us.load()});
}
if (connected) {
diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp
index e0c1da5572..a7dd1907a5 100644
--- a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp
+++ b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp
@@ -740,14 +740,15 @@ void TBasicServicesInitializer::InitializeServices(NActors::TActorSystemSetup* s
icCommon->InitWhiteboard = [whiteboardId](ui16 port, TActorSystem *actorSystem) {
actorSystem->Send(whiteboardId, new NNodeWhiteboard::TEvWhiteboard::TEvSystemStateAddEndpoint("ic", Sprintf(":%d", port)));
};
- icCommon->UpdateWhiteboard = [whiteboardId](const TString& peer, bool connected, bool green, bool yellow,
- bool orange, bool red, TActorSystem *actorSystem) {
- actorSystem->Send(whiteboardId, new NNodeWhiteboard::TEvWhiteboard::TEvNodeStateUpdate(
- peer, connected,
- green ? NKikimrWhiteboard::EFlag::Green :
- yellow ? NKikimrWhiteboard::EFlag::Yellow :
- orange ? NKikimrWhiteboard::EFlag::Orange :
- red ? NKikimrWhiteboard::EFlag::Red : NKikimrWhiteboard::EFlag()));
+ icCommon->UpdateWhiteboard = [whiteboardId](const TWhiteboardSessionStatus& data) {
+ data.ActorSystem->Send(whiteboardId, new NNodeWhiteboard::TEvWhiteboard::TEvNodeStateUpdate(
+ data.Peer, data.Connected,
+ data.Green ? NKikimrWhiteboard::EFlag::Green :
+ data.Yellow ? NKikimrWhiteboard::EFlag::Yellow :
+ data.Orange ? NKikimrWhiteboard::EFlag::Orange :
+ data.Red ? NKikimrWhiteboard::EFlag::Red : NKikimrWhiteboard::EFlag()));
+ data.ActorSystem->Send(whiteboardId, new NNodeWhiteboard::TEvWhiteboard::TEvClockSkewUpdate(
+ data.PeerId, data.ClockSkew));
};
}
diff --git a/ydb/core/health_check/health_check.cpp b/ydb/core/health_check/health_check.cpp
index c94c29907f..9178b76dcb 100644
--- a/ydb/core/health_check/health_check.cpp
+++ b/ydb/core/health_check/health_check.cpp
@@ -139,6 +139,7 @@ public:
TabletState,
SystemTabletState,
OverloadState,
+ SyncState,
};
struct TTenantInfo {
@@ -1858,7 +1859,7 @@ public:
TString vDiskId = GetVSlotId(vSlotIdProto);
Ydb::Monitoring::StorageVDiskStatus& vDiskStatus = *storageGroupStatus.add_vdisks();
FillVDiskStatus(vDiskId, vDiskStatus, {&context, "VDISK"});
- onlyGoodDisks &= vDiskStatus.overall() != Ydb::Monitoring::StatusFlag::RED
+ onlyGoodDisks &= vDiskStatus.overall() != Ydb::Monitoring::StatusFlag::RED
&& vDiskStatus.overall() != Ydb::Monitoring::StatusFlag::GREY;
}
@@ -2050,6 +2051,42 @@ public:
}
}
+ const TDuration MAX_CLOCKSKEW_RED_ISSUE_TIME = TDuration::MicroSeconds(25000);
+ const TDuration MAX_CLOCKSKEW_YELLOW_ISSUE_TIME = TDuration::MicroSeconds(5000);
+
+ void FillNodesSyncStatus(TOverallStateContext& context) {
+ long maxClockSkewUs = 0;
+ TNodeId maxClockSkewPeerId = 0;
+ TNodeId maxClockSkewNodeId = 0;
+ for (auto& [nodeId, nodeSystemState] : MergedNodeSystemState) {
+ if (abs(nodeSystemState->GetMaxClockSkewWithPeerUs()) > maxClockSkewUs) {
+ maxClockSkewUs = abs(nodeSystemState->GetMaxClockSkewWithPeerUs());
+ maxClockSkewPeerId = nodeSystemState->GetMaxClockSkewPeerId();
+ maxClockSkewNodeId = nodeId;
+ }
+ }
+ if (!maxClockSkewNodeId) {
+ return;
+ }
+
+ TSelfCheckResult syncContext;
+ syncContext.Type = "NODES_SYNC";
+ FillNodeInfo(maxClockSkewNodeId, syncContext.Location.mutable_node());
+ FillNodeInfo(maxClockSkewPeerId, syncContext.Location.mutable_peer());
+
+ TDuration maxClockSkewTime = TDuration::MicroSeconds(maxClockSkewUs);
+ if (maxClockSkewTime > MAX_CLOCKSKEW_RED_ISSUE_TIME) {
+ syncContext.ReportStatus(Ydb::Monitoring::StatusFlag::RED, TStringBuilder() << "The nodes have a time discrepancy of " << maxClockSkewTime.MilliSeconds() << " ms", ETags::SyncState);
+ } else if (maxClockSkewTime > MAX_CLOCKSKEW_YELLOW_ISSUE_TIME) {
+ syncContext.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, TStringBuilder() << "The nodes have a time discrepancy of " << maxClockSkewTime.MilliSeconds() << " ms", ETags::SyncState);
+ } else {
+ syncContext.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN);
+ }
+
+ context.UpdateMaxStatus(syncContext.GetOverallStatus());
+ context.AddIssues(syncContext.IssueRecords);
+ }
+
void FillResult(TOverallStateContext context) {
if (IsSpecificDatabaseFilter()) {
FillDatabaseResult(context, FilterDatabase, DatabaseState[FilterDatabase]);
@@ -2058,6 +2095,7 @@ public:
FillDatabaseResult(context, path, state);
}
}
+ FillNodesSyncStatus(context);
if (DatabaseState.empty()) {
Ydb::Monitoring::DatabaseStatus& databaseStatus(*context.Result->add_database_status());
TSelfCheckResult tabletContext;
@@ -2112,9 +2150,9 @@ public:
void TruncateIssuesIfBeyondLimit(Ydb::Monitoring::SelfCheckResult& result, ui64 byteLimit) {
auto truncateStatusPriority = {
- Ydb::Monitoring::StatusFlag::BLUE,
- Ydb::Monitoring::StatusFlag::YELLOW,
- Ydb::Monitoring::StatusFlag::ORANGE,
+ Ydb::Monitoring::StatusFlag::BLUE,
+ Ydb::Monitoring::StatusFlag::YELLOW,
+ Ydb::Monitoring::StatusFlag::ORANGE,
Ydb::Monitoring::StatusFlag::RED
};
for (Ydb::Monitoring::StatusFlag::Status truncateStatus: truncateStatusPriority) {
@@ -2162,7 +2200,7 @@ public:
id << Ydb::Monitoring::StatusFlag_Status_Name(issue->status());
id << '-' << TSelfCheckResult::crc16(issue->message());
issue->set_id(id.Str());
- }
+ }
for (TActorId pipe : PipeClients) {
NTabletPipe::CloseClient(SelfId(), pipe);
diff --git a/ydb/core/node_whiteboard/node_whiteboard.h b/ydb/core/node_whiteboard/node_whiteboard.h
index a95ac0d348..81d1f67430 100644
--- a/ydb/core/node_whiteboard/node_whiteboard.h
+++ b/ydb/core/node_whiteboard/node_whiteboard.h
@@ -58,6 +58,7 @@ struct TEvWhiteboard{
EvPDiskStateDelete,
EvVDiskStateGenerationChange,
EvVDiskDropDonors,
+ EvClockSkewUpdate,
EvEnd
};
@@ -417,6 +418,15 @@ struct TEvWhiteboard{
struct TEvSystemStateResponse : public TEventPB<TEvSystemStateResponse, NKikimrWhiteboard::TEvSystemStateResponse, EvSystemStateResponse> {};
+ struct TEvClockSkewUpdate : TEventPB<TEvClockSkewUpdate, NKikimrWhiteboard::TNodeClockSkew, EvClockSkewUpdate> {
+ TEvClockSkewUpdate() = default;
+
+ TEvClockSkewUpdate(const ui32 peerNodeId, i64 clockSkewUs) {
+ Record.SetPeerNodeId(peerNodeId);
+ Record.SetClockSkewUs(clockSkewUs);
+ }
+ };
+
struct TEvNodeStateUpdate : TEventPB<TEvNodeStateUpdate, NKikimrWhiteboard::TNodeStateInfo, EvNodeStateUpdate> {
TEvNodeStateUpdate() = default;
diff --git a/ydb/core/protos/node_whiteboard.proto b/ydb/core/protos/node_whiteboard.proto
index d168a64f59..4d496a014f 100644
--- a/ydb/core/protos/node_whiteboard.proto
+++ b/ydb/core/protos/node_whiteboard.proto
@@ -81,6 +81,11 @@ message TEvTabletStateResponse {
optional bytes Packed5 = 5;
}
+message TNodeClockSkew {
+ optional uint32 PeerNodeId = 1;
+ optional int64 ClockSkewUs = 2;
+}
+
message TNodeStateInfo {
optional string PeerName = 1;
optional bool Connected = 2;
@@ -306,6 +311,8 @@ message TSystemStateInfo {
optional uint64 MemoryUsedInAlloc = 29;
optional double MaxDiskUsage = 30;
optional NActorsInterconnect.TNodeLocation Location = 31;
+ optional int64 MaxClockSkewWithPeerUs = 32; // a positive value means the peer is ahead in time; a negative value means it's behind
+ optional uint32 MaxClockSkewPeerId = 33;
optional uint64 DisconnectTime = 34;
}
diff --git a/ydb/core/tablet/node_whiteboard.cpp b/ydb/core/tablet/node_whiteboard.cpp
index 19ad017b12..36c8bb9f51 100644
--- a/ydb/core/tablet/node_whiteboard.cpp
+++ b/ydb/core/tablet/node_whiteboard.cpp
@@ -24,6 +24,7 @@ class TNodeWhiteboardService : public TActorBootstrapped<TNodeWhiteboardService>
enum EEv {
EvUpdateRuntimeStats = EventSpaceBegin(TEvents::ES_PRIVATE),
EvCleanupDeadTablets,
+ EvUpdateClockSkew,
EvEnd
};
@@ -31,6 +32,7 @@ class TNodeWhiteboardService : public TActorBootstrapped<TNodeWhiteboardService>
struct TEvUpdateRuntimeStats : TEventLocal<TEvUpdateRuntimeStats, EvUpdateRuntimeStats> {};
struct TEvCleanupDeadTablets : TEventLocal<TEvCleanupDeadTablets, EvCleanupDeadTablets> {};
+ struct TEvUpdateClockSkew : TEventLocal<TEvUpdateClockSkew, EvUpdateClockSkew> {};
};
public:
static constexpr NKikimrServices::TActivity::EType ActorActivityType() {
@@ -63,6 +65,7 @@ public:
}
ctx.Send(ctx.SelfID, new TEvPrivate::TEvUpdateRuntimeStats());
ctx.Schedule(TDuration::Seconds(60), new TEvPrivate::TEvCleanupDeadTablets());
+ ctx.Schedule(TDuration::Seconds(15), new TEvPrivate::TEvUpdateClockSkew());
Become(&TNodeWhiteboardService::StateFunc);
}
@@ -72,6 +75,8 @@ protected:
std::unordered_map<ui32, NKikimrWhiteboard::TPDiskStateInfo> PDiskStateInfo;
std::unordered_map<TVDiskID, NKikimrWhiteboard::TVDiskStateInfo, THash<TVDiskID>> VDiskStateInfo;
std::unordered_map<ui32, NKikimrWhiteboard::TBSGroupStateInfo> BSGroupStateInfo;
+ i64 MaxClockSkewWithPeerUs;
+ ui32 MaxClockSkewPeerId;
NKikimrWhiteboard::TSystemStateInfo SystemStateInfo;
THolder<NTracing::ITraceCollection> TabletIntrospectionData;
TProcStat ProcessStats;
@@ -382,6 +387,7 @@ protected:
STRICT_STFUNC(StateFunc,
HFunc(TEvWhiteboard::TEvTabletStateUpdate, Handle);
HFunc(TEvWhiteboard::TEvTabletStateRequest, Handle);
+ HFunc(TEvWhiteboard::TEvClockSkewUpdate, Handle);
HFunc(TEvWhiteboard::TEvNodeStateUpdate, Handle);
HFunc(TEvWhiteboard::TEvNodeStateDelete, Handle);
HFunc(TEvWhiteboard::TEvNodeStateRequest, Handle);
@@ -409,6 +415,7 @@ protected:
HFunc(TEvWhiteboard::TEvSignalBodyRequest, Handle);
HFunc(TEvPrivate::TEvUpdateRuntimeStats, Handle);
HFunc(TEvPrivate::TEvCleanupDeadTablets, Handle);
+ HFunc(TEvPrivate::TEvUpdateClockSkew, Handle);
)
void Handle(TEvWhiteboard::TEvTabletStateUpdate::TPtr &ev, const TActorContext &ctx) {
@@ -422,6 +429,14 @@ protected:
}
}
+ void Handle(TEvWhiteboard::TEvClockSkewUpdate::TPtr &ev, const TActorContext &) {
+ i64 skew = ev->Get()->Record.GetClockSkewUs();
+ if (abs(skew) > abs(MaxClockSkewWithPeerUs)) {
+ MaxClockSkewWithPeerUs = skew;
+ MaxClockSkewPeerId = ev->Get()->Record.GetPeerNodeId();
+ }
+ }
+
void Handle(TEvWhiteboard::TEvNodeStateUpdate::TPtr &ev, const TActorContext &ctx) {
auto& nodeStateInfo = NodeStateInfo[ev->Get()->Record.GetPeerName()];
if (CheckedMerge(nodeStateInfo, ev->Get()->Record) >= 100) {
@@ -944,6 +959,13 @@ protected:
}
ctx.Schedule(TDuration::Seconds(60), new TEvPrivate::TEvCleanupDeadTablets());
}
+
+ void Handle(TEvPrivate::TEvUpdateClockSkew::TPtr &, const TActorContext &ctx) {
+ SystemStateInfo.SetMaxClockSkewWithPeerUs(MaxClockSkewWithPeerUs);
+ SystemStateInfo.SetMaxClockSkewPeerId(MaxClockSkewPeerId);
+ MaxClockSkewWithPeerUs = 0;
+ ctx.Schedule(TDuration::Seconds(15), new TEvPrivate::TEvUpdateClockSkew());
+ }
};
IActor* CreateNodeWhiteboardService() {