diff options
author | andrew-rykov <arykov@ydb.tech> | 2023-10-30 20:07:43 +0300 |
---|---|---|
committer | andrew-rykov <arykov@ydb.tech> | 2023-10-30 20:42:41 +0300 |
commit | 8ccc99f1093aa077b04d35edfed58e21a01f0dcd (patch) | |
tree | 690a5c47451f5dc79353770e510debc3bca2c9a9 | |
parent | f3fc65218eee3457eb087a9f6df33d5a797075c5 (diff) | |
download | ydb-8ccc99f1093aa077b04d35edfed58e21a01f0dcd.tar.gz |
KIKIMR-19066 added hc optimal clock skew calculation
Interconnect в режиме реального времени отправляет данные о разнице во времени (ClockSkew) между собой и каждым подключенным пиром на whiteboard. На whiteboard эта информация собирается за период в 15 секунд, и из всех значений выбирается максимальное. Этот максимальный ClockSkew затем используется в ответах на запросы healthcheck-а в течение следующих 15 секунд.
-rw-r--r-- | library/cpp/actors/interconnect/interconnect_common.h | 27 | ||||
-rw-r--r-- | library/cpp/actors/interconnect/interconnect_tcp_session.cpp | 6 | ||||
-rw-r--r-- | ydb/core/driver_lib/run/kikimr_services_initializers.cpp | 17 | ||||
-rw-r--r-- | ydb/core/health_check/health_check.cpp | 48 | ||||
-rw-r--r-- | ydb/core/node_whiteboard/node_whiteboard.h | 10 | ||||
-rw-r--r-- | ydb/core/protos/node_whiteboard.proto | 7 | ||||
-rw-r--r-- | ydb/core/tablet/node_whiteboard.cpp | 22 |
7 files changed, 120 insertions, 17 deletions
diff --git a/library/cpp/actors/interconnect/interconnect_common.h b/library/cpp/actors/interconnect/interconnect_common.h index d313038094..300153d8de 100644 --- a/library/cpp/actors/interconnect/interconnect_common.h +++ b/library/cpp/actors/interconnect/interconnect_common.h @@ -61,6 +61,30 @@ namespace NActors { } }; + struct TWhiteboardSessionStatus { + TActorSystem* ActorSystem; + ui32 PeerId; + TString Peer; + bool Connected; + bool Green; + bool Yellow; + bool Orange; + bool Red; + i64 ClockSkew; + + TWhiteboardSessionStatus(TActorSystem* actorSystem, ui32 peerId, const TString& peer, bool connected, bool green, bool yellow, bool orange, bool red, i64 clockSkew) + : ActorSystem(actorSystem) + , PeerId(peerId) + , Peer(peer) + , Connected(connected) + , Green(green) + , Yellow(yellow) + , Orange(orange) + , Red(red) + , ClockSkew(clockSkew) + {} + }; + struct TChannelSettings { ui16 Weight; }; @@ -72,8 +96,7 @@ namespace NActors { using TInitWhiteboardCallback = std::function<void(ui16 icPort, TActorSystem* actorSystem)>; - using TUpdateWhiteboardCallback = std::function<void(const TString& peer, bool connected, bool green, bool yellow, - bool orange, bool red, TActorSystem* actorSystem)>; + using TUpdateWhiteboardCallback = std::function<void(const TWhiteboardSessionStatus& data)>; struct TInterconnectProxyCommon : TAtomicRefCount<TInterconnectProxyCommon> { TActorId NameserviceId; diff --git a/library/cpp/actors/interconnect/interconnect_tcp_session.cpp b/library/cpp/actors/interconnect/interconnect_tcp_session.cpp index a7509fbc94..25c05898d2 100644 --- a/library/cpp/actors/interconnect/interconnect_tcp_session.cpp +++ b/library/cpp/actors/interconnect/interconnect_tcp_session.cpp @@ -994,13 +994,15 @@ namespace NActors { } while (false); } - callback(Proxy->Metrics->GetHumanFriendlyPeerHostName(), + callback({TlsActivationContext->ExecutorThread.ActorSystem, + Proxy->PeerNodeId, + Proxy->Metrics->GetHumanFriendlyPeerHostName(), connected, flagState == EFlag::GREEN, flagState == EFlag::YELLOW, flagState == EFlag::ORANGE, flagState == EFlag::RED, - TlsActivationContext->ExecutorThread.ActorSystem); + ReceiveContext->ClockSkew_us.load()}); } if (connected) { diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp index e0c1da5572..a7dd1907a5 100644 --- a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp +++ b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp @@ -740,14 +740,15 @@ void TBasicServicesInitializer::InitializeServices(NActors::TActorSystemSetup* s icCommon->InitWhiteboard = [whiteboardId](ui16 port, TActorSystem *actorSystem) { actorSystem->Send(whiteboardId, new NNodeWhiteboard::TEvWhiteboard::TEvSystemStateAddEndpoint("ic", Sprintf(":%d", port))); }; - icCommon->UpdateWhiteboard = [whiteboardId](const TString& peer, bool connected, bool green, bool yellow, - bool orange, bool red, TActorSystem *actorSystem) { - actorSystem->Send(whiteboardId, new NNodeWhiteboard::TEvWhiteboard::TEvNodeStateUpdate( - peer, connected, - green ? NKikimrWhiteboard::EFlag::Green : - yellow ? NKikimrWhiteboard::EFlag::Yellow : - orange ? NKikimrWhiteboard::EFlag::Orange : - red ? NKikimrWhiteboard::EFlag::Red : NKikimrWhiteboard::EFlag())); + icCommon->UpdateWhiteboard = [whiteboardId](const TWhiteboardSessionStatus& data) { + data.ActorSystem->Send(whiteboardId, new NNodeWhiteboard::TEvWhiteboard::TEvNodeStateUpdate( + data.Peer, data.Connected, + data.Green ? NKikimrWhiteboard::EFlag::Green : + data.Yellow ? NKikimrWhiteboard::EFlag::Yellow : + data.Orange ? NKikimrWhiteboard::EFlag::Orange : + data.Red ? NKikimrWhiteboard::EFlag::Red : NKikimrWhiteboard::EFlag())); + data.ActorSystem->Send(whiteboardId, new NNodeWhiteboard::TEvWhiteboard::TEvClockSkewUpdate( + data.PeerId, data.ClockSkew)); }; } diff --git a/ydb/core/health_check/health_check.cpp b/ydb/core/health_check/health_check.cpp index c94c29907f..9178b76dcb 100644 --- a/ydb/core/health_check/health_check.cpp +++ b/ydb/core/health_check/health_check.cpp @@ -139,6 +139,7 @@ public: TabletState, SystemTabletState, OverloadState, + SyncState, }; struct TTenantInfo { @@ -1858,7 +1859,7 @@ public: TString vDiskId = GetVSlotId(vSlotIdProto); Ydb::Monitoring::StorageVDiskStatus& vDiskStatus = *storageGroupStatus.add_vdisks(); FillVDiskStatus(vDiskId, vDiskStatus, {&context, "VDISK"}); - onlyGoodDisks &= vDiskStatus.overall() != Ydb::Monitoring::StatusFlag::RED + onlyGoodDisks &= vDiskStatus.overall() != Ydb::Monitoring::StatusFlag::RED && vDiskStatus.overall() != Ydb::Monitoring::StatusFlag::GREY; } @@ -2050,6 +2051,42 @@ public: } } + const TDuration MAX_CLOCKSKEW_RED_ISSUE_TIME = TDuration::MicroSeconds(25000); + const TDuration MAX_CLOCKSKEW_YELLOW_ISSUE_TIME = TDuration::MicroSeconds(5000); + + void FillNodesSyncStatus(TOverallStateContext& context) { + long maxClockSkewUs = 0; + TNodeId maxClockSkewPeerId = 0; + TNodeId maxClockSkewNodeId = 0; + for (auto& [nodeId, nodeSystemState] : MergedNodeSystemState) { + if (abs(nodeSystemState->GetMaxClockSkewWithPeerUs()) > maxClockSkewUs) { + maxClockSkewUs = abs(nodeSystemState->GetMaxClockSkewWithPeerUs()); + maxClockSkewPeerId = nodeSystemState->GetMaxClockSkewPeerId(); + maxClockSkewNodeId = nodeId; + } + } + if (!maxClockSkewNodeId) { + return; + } + + TSelfCheckResult syncContext; + syncContext.Type = "NODES_SYNC"; + FillNodeInfo(maxClockSkewNodeId, syncContext.Location.mutable_node()); + FillNodeInfo(maxClockSkewPeerId, syncContext.Location.mutable_peer()); + + TDuration maxClockSkewTime = TDuration::MicroSeconds(maxClockSkewUs); + if (maxClockSkewTime > MAX_CLOCKSKEW_RED_ISSUE_TIME) { + syncContext.ReportStatus(Ydb::Monitoring::StatusFlag::RED, TStringBuilder() << "The nodes have a time discrepancy of " << maxClockSkewTime.MilliSeconds() << " ms", ETags::SyncState); + } else if (maxClockSkewTime > MAX_CLOCKSKEW_YELLOW_ISSUE_TIME) { + syncContext.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, TStringBuilder() << "The nodes have a time discrepancy of " << maxClockSkewTime.MilliSeconds() << " ms", ETags::SyncState); + } else { + syncContext.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN); + } + + context.UpdateMaxStatus(syncContext.GetOverallStatus()); + context.AddIssues(syncContext.IssueRecords); + } + void FillResult(TOverallStateContext context) { if (IsSpecificDatabaseFilter()) { FillDatabaseResult(context, FilterDatabase, DatabaseState[FilterDatabase]); @@ -2058,6 +2095,7 @@ public: FillDatabaseResult(context, path, state); } } + FillNodesSyncStatus(context); if (DatabaseState.empty()) { Ydb::Monitoring::DatabaseStatus& databaseStatus(*context.Result->add_database_status()); TSelfCheckResult tabletContext; @@ -2112,9 +2150,9 @@ public: void TruncateIssuesIfBeyondLimit(Ydb::Monitoring::SelfCheckResult& result, ui64 byteLimit) { auto truncateStatusPriority = { - Ydb::Monitoring::StatusFlag::BLUE, - Ydb::Monitoring::StatusFlag::YELLOW, - Ydb::Monitoring::StatusFlag::ORANGE, + Ydb::Monitoring::StatusFlag::BLUE, + Ydb::Monitoring::StatusFlag::YELLOW, + Ydb::Monitoring::StatusFlag::ORANGE, Ydb::Monitoring::StatusFlag::RED }; for (Ydb::Monitoring::StatusFlag::Status truncateStatus: truncateStatusPriority) { @@ -2162,7 +2200,7 @@ public: id << Ydb::Monitoring::StatusFlag_Status_Name(issue->status()); id << '-' << TSelfCheckResult::crc16(issue->message()); issue->set_id(id.Str()); - } + } for (TActorId pipe : PipeClients) { NTabletPipe::CloseClient(SelfId(), pipe); diff --git a/ydb/core/node_whiteboard/node_whiteboard.h b/ydb/core/node_whiteboard/node_whiteboard.h index a95ac0d348..81d1f67430 100644 --- a/ydb/core/node_whiteboard/node_whiteboard.h +++ b/ydb/core/node_whiteboard/node_whiteboard.h @@ -58,6 +58,7 @@ struct TEvWhiteboard{ EvPDiskStateDelete, EvVDiskStateGenerationChange, EvVDiskDropDonors, + EvClockSkewUpdate, EvEnd }; @@ -417,6 +418,15 @@ struct TEvWhiteboard{ struct TEvSystemStateResponse : public TEventPB<TEvSystemStateResponse, NKikimrWhiteboard::TEvSystemStateResponse, EvSystemStateResponse> {}; + struct TEvClockSkewUpdate : TEventPB<TEvClockSkewUpdate, NKikimrWhiteboard::TNodeClockSkew, EvClockSkewUpdate> { + TEvClockSkewUpdate() = default; + + TEvClockSkewUpdate(const ui32 peerNodeId, i64 clockSkewUs) { + Record.SetPeerNodeId(peerNodeId); + Record.SetClockSkewUs(clockSkewUs); + } + }; + struct TEvNodeStateUpdate : TEventPB<TEvNodeStateUpdate, NKikimrWhiteboard::TNodeStateInfo, EvNodeStateUpdate> { TEvNodeStateUpdate() = default; diff --git a/ydb/core/protos/node_whiteboard.proto b/ydb/core/protos/node_whiteboard.proto index d168a64f59..4d496a014f 100644 --- a/ydb/core/protos/node_whiteboard.proto +++ b/ydb/core/protos/node_whiteboard.proto @@ -81,6 +81,11 @@ message TEvTabletStateResponse { optional bytes Packed5 = 5; } +message TNodeClockSkew { + optional uint32 PeerNodeId = 1; + optional int64 ClockSkewUs = 2; +} + message TNodeStateInfo { optional string PeerName = 1; optional bool Connected = 2; @@ -306,6 +311,8 @@ message TSystemStateInfo { optional uint64 MemoryUsedInAlloc = 29; optional double MaxDiskUsage = 30; optional NActorsInterconnect.TNodeLocation Location = 31; + optional int64 MaxClockSkewWithPeerUs = 32; // a positive value means the peer is ahead in time; a negative value means it's behind + optional uint32 MaxClockSkewPeerId = 33; optional uint64 DisconnectTime = 34; } diff --git a/ydb/core/tablet/node_whiteboard.cpp b/ydb/core/tablet/node_whiteboard.cpp index 19ad017b12..36c8bb9f51 100644 --- a/ydb/core/tablet/node_whiteboard.cpp +++ b/ydb/core/tablet/node_whiteboard.cpp @@ -24,6 +24,7 @@ class TNodeWhiteboardService : public TActorBootstrapped<TNodeWhiteboardService> enum EEv { EvUpdateRuntimeStats = EventSpaceBegin(TEvents::ES_PRIVATE), EvCleanupDeadTablets, + EvUpdateClockSkew, EvEnd }; @@ -31,6 +32,7 @@ class TNodeWhiteboardService : public TActorBootstrapped<TNodeWhiteboardService> struct TEvUpdateRuntimeStats : TEventLocal<TEvUpdateRuntimeStats, EvUpdateRuntimeStats> {}; struct TEvCleanupDeadTablets : TEventLocal<TEvCleanupDeadTablets, EvCleanupDeadTablets> {}; + struct TEvUpdateClockSkew : TEventLocal<TEvUpdateClockSkew, EvUpdateClockSkew> {}; }; public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { @@ -63,6 +65,7 @@ public: } ctx.Send(ctx.SelfID, new TEvPrivate::TEvUpdateRuntimeStats()); ctx.Schedule(TDuration::Seconds(60), new TEvPrivate::TEvCleanupDeadTablets()); + ctx.Schedule(TDuration::Seconds(15), new TEvPrivate::TEvUpdateClockSkew()); Become(&TNodeWhiteboardService::StateFunc); } @@ -72,6 +75,8 @@ protected: std::unordered_map<ui32, NKikimrWhiteboard::TPDiskStateInfo> PDiskStateInfo; std::unordered_map<TVDiskID, NKikimrWhiteboard::TVDiskStateInfo, THash<TVDiskID>> VDiskStateInfo; std::unordered_map<ui32, NKikimrWhiteboard::TBSGroupStateInfo> BSGroupStateInfo; + i64 MaxClockSkewWithPeerUs; + ui32 MaxClockSkewPeerId; NKikimrWhiteboard::TSystemStateInfo SystemStateInfo; THolder<NTracing::ITraceCollection> TabletIntrospectionData; TProcStat ProcessStats; @@ -382,6 +387,7 @@ protected: STRICT_STFUNC(StateFunc, HFunc(TEvWhiteboard::TEvTabletStateUpdate, Handle); HFunc(TEvWhiteboard::TEvTabletStateRequest, Handle); + HFunc(TEvWhiteboard::TEvClockSkewUpdate, Handle); HFunc(TEvWhiteboard::TEvNodeStateUpdate, Handle); HFunc(TEvWhiteboard::TEvNodeStateDelete, Handle); HFunc(TEvWhiteboard::TEvNodeStateRequest, Handle); @@ -409,6 +415,7 @@ protected: HFunc(TEvWhiteboard::TEvSignalBodyRequest, Handle); HFunc(TEvPrivate::TEvUpdateRuntimeStats, Handle); HFunc(TEvPrivate::TEvCleanupDeadTablets, Handle); + HFunc(TEvPrivate::TEvUpdateClockSkew, Handle); ) void Handle(TEvWhiteboard::TEvTabletStateUpdate::TPtr &ev, const TActorContext &ctx) { @@ -422,6 +429,14 @@ protected: } } + void Handle(TEvWhiteboard::TEvClockSkewUpdate::TPtr &ev, const TActorContext &) { + i64 skew = ev->Get()->Record.GetClockSkewUs(); + if (abs(skew) > abs(MaxClockSkewWithPeerUs)) { + MaxClockSkewWithPeerUs = skew; + MaxClockSkewPeerId = ev->Get()->Record.GetPeerNodeId(); + } + } + void Handle(TEvWhiteboard::TEvNodeStateUpdate::TPtr &ev, const TActorContext &ctx) { auto& nodeStateInfo = NodeStateInfo[ev->Get()->Record.GetPeerName()]; if (CheckedMerge(nodeStateInfo, ev->Get()->Record) >= 100) { @@ -944,6 +959,13 @@ protected: } ctx.Schedule(TDuration::Seconds(60), new TEvPrivate::TEvCleanupDeadTablets()); } + + void Handle(TEvPrivate::TEvUpdateClockSkew::TPtr &, const TActorContext &ctx) { + SystemStateInfo.SetMaxClockSkewWithPeerUs(MaxClockSkewWithPeerUs); + SystemStateInfo.SetMaxClockSkewPeerId(MaxClockSkewPeerId); + MaxClockSkewWithPeerUs = 0; + ctx.Schedule(TDuration::Seconds(15), new TEvPrivate::TEvUpdateClockSkew()); + } }; IActor* CreateNodeWhiteboardService() { |