aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorandrew-rykov <arykov@ydb.tech>2022-12-01 22:31:22 +0300
committerandrew-rykov <arykov@ydb.tech>2022-12-01 22:31:22 +0300
commit2b0b4db1eacf74f69bad89a8e15c141a9ff37ad0 (patch)
tree2775fbf766bc1b5f645fa8c037804bcf23fc922f
parent0378892eff2ed24098930e64fec6083daf7627c9 (diff)
downloadydb-2b0b4db1eacf74f69bad89a8e15c141a9ff37ad0.tar.gz
health checker unknown statement
-rw-r--r--ydb/core/health_check/health_check.cpp12
-rw-r--r--ydb/core/mind/hive/hive_impl.cpp2
-rw-r--r--ydb/core/protos/hive.proto2
3 files changed, 15 insertions, 1 deletions
diff --git a/ydb/core/health_check/health_check.cpp b/ydb/core/health_check/health_check.cpp
index 3cb47e4fc9..aec507cf1f 100644
--- a/ydb/core/health_check/health_check.cpp
+++ b/ydb/core/health_check/health_check.cpp
@@ -126,6 +126,7 @@ public:
ui32 MaxRestartsPerPeriod = 30; // per hour
ui32 MaxTabletIdsStored = 10;
bool ReportGoodTabletsIds = false;
+ bool IsHiveSynchronizationPeriod = false;
};
enum class ETabletState {
@@ -147,7 +148,8 @@ public:
Leader = info.followerid() == 0;
if (info.volatilestate() == NKikimrHive::TABLET_VOLATILE_STATE_STOPPED) {
State = ETabletState::Stopped;
- } else if (info.volatilestate() != NKikimrHive::TABLET_VOLATILE_STATE_RUNNING
+ } else if (!settings.IsHiveSynchronizationPeriod
+ && info.volatilestate() != NKikimrHive::TABLET_VOLATILE_STATE_RUNNING
&& TInstant::MilliSeconds(info.lastalivetimestamp()) < settings.AliveBarrier
&& info.tabletbootmode() == NKikimrHive::TABLET_BOOT_MODE_DEFAULT) {
State = ETabletState::Dead;
@@ -1017,11 +1019,19 @@ public:
}
}
+ static const int HIVE_SYNCHRONIZATION_PERIOD_MS = 10000;
+
+ bool IsHiveSynchronizationPeriod(NKikimrHive::TEvResponseHiveInfo& hiveInfo) {
+ auto hiveUptime = hiveInfo.GetStartTimeTimestamp() - hiveInfo.GetResponseTimestamp();
+ return hiveUptime > HIVE_SYNCHRONIZATION_PERIOD_MS;
+ }
+
void AggregateHiveInfo() {
TNodeTabletState::TTabletStateSettings settings;
settings.AliveBarrier = TInstant::Now() - TDuration::Minutes(5);
for (const auto& [hiveId, hiveResponse] : HiveInfo) {
if (hiveResponse) {
+ settings.IsHiveSynchronizationPeriod = IsHiveSynchronizationPeriod(hiveResponse->Record);
for (const NKikimrHive::TTabletInfo& hiveTablet : hiveResponse->Record.GetTablets()) {
TSubDomainKey tenantId = TSubDomainKey(hiveTablet.GetObjectDomain());
auto itDomain = FilterDomainKey.find(tenantId);
diff --git a/ydb/core/mind/hive/hive_impl.cpp b/ydb/core/mind/hive/hive_impl.cpp
index 1553779dea..b93431e053 100644
--- a/ydb/core/mind/hive/hive_impl.cpp
+++ b/ydb/core/mind/hive/hive_impl.cpp
@@ -1709,6 +1709,8 @@ void THive::Handle(TEvHive::TEvRequestHiveInfo::TPtr& ev) {
it->second.ActualizeTabletStatistics(now);
FillTabletInfo(response->Record, it->first, &it->second, record);
}
+ response->Record.set_starttimetimestamp(StartTime().MilliSeconds());
+ response->Record.set_responsetimestamp(TAppData::TimeProvider->Now().MilliSeconds());
}
Send(ev->Sender, response.Release(), 0, ev->Cookie);
diff --git a/ydb/core/protos/hive.proto b/ydb/core/protos/hive.proto
index 642e9827e0..7cc8c21d6d 100644
--- a/ydb/core/protos/hive.proto
+++ b/ydb/core/protos/hive.proto
@@ -270,6 +270,8 @@ message TEvRequestHiveInfo {
message TEvResponseHiveInfo {
repeated TTabletInfo Tablets = 1;
optional TForwardRequest ForwardRequest = 2;
+ optional uint64 StartTimeTimestamp = 3;
+ optional uint64 ResponseTimestamp = 4;
}
message TEvRequestHiveDomainStats {