aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexey Efimov <xeno@prnwatch.com>2022-05-25 19:29:25 +0300
committerAlexey Efimov <xeno@prnwatch.com>2022-05-25 19:29:25 +0300
commitecf2787c8858d1e045562a8bcbebb9a56fe8f848 (patch)
treed93739516a4c8250c9518f1d68f7ed1d1019d8b7
parentab6d08ba5049a701adf054e48943c85d90607319 (diff)
downloadydb-ecf2787c8858d1e045562a8bcbebb9a56fe8f848.tar.gz
report degraded hc status on degraded storage KIKIMR-14952
ref:be7089b54c3d201149f57683b9e80d10feee854c
-rw-r--r--ydb/core/health_check/health_check.cpp29
1 files changed, 26 insertions, 3 deletions
diff --git a/ydb/core/health_check/health_check.cpp b/ydb/core/health_check/health_check.cpp
index 49f71d47ef..5afb541558 100644
--- a/ydb/core/health_check/health_check.cpp
+++ b/ydb/core/health_check/health_check.cpp
@@ -247,7 +247,7 @@ public:
std::initializer_list<TString> includeTags = {}) {
OverallStatus = MaxStatus(OverallStatus, status);
if (IsErrorStatus(status)) {
- TVector<TString> reason;
+ std::vector<TString> reason;
if (includeTags.size() != 0) {
for (const TIssueRecord& record : IssueLog) {
for (const TString& tag : includeTags) {
@@ -258,6 +258,8 @@ public:
}
}
}
+ std::sort(reason.begin(), reason.end());
+ reason.erase(std::unique(reason.begin(), reason.end()), reason.end());
TIssueRecord& issueRecord(*IssueLog.emplace(IssueLog.begin()));
Ydb::Monitoring::IssueLog& issueLog(issueRecord.IssueLog);
issueLog.set_status(status);
@@ -270,7 +272,7 @@ public:
issueLog.set_type(Type);
}
issueLog.set_level(Level);
- if (reason) {
+ if (!reason.empty()) {
for (const TString& r : reason) {
issueLog.add_reason(r);
}
@@ -281,6 +283,17 @@ public:
}
}
+ bool HasTags(std::initializer_list<TString> tags) const {
+ for (const TIssueRecord& record : IssueLog) {
+ for (const TString& tag : tags) {
+ if (record.Tag == tag) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
Ydb::Monitoring::StatusFlag::Status GetOverallStatus() const {
return OverallStatus;
}
@@ -1783,6 +1796,7 @@ public:
void FillResult(Ydb::Monitoring::SelfCheckResult& result) {
Ydb::Monitoring::StatusFlag::Status overall = Ydb::Monitoring::StatusFlag::GREY;
std::unordered_set<std::pair<TString, TString>> issueIds;
+ bool hasDegraded = false;
for (auto& [path, state] : DatabaseState) {
Ydb::Monitoring::DatabaseStatus& databaseStatus(*result.add_database_status());
TSelfCheckResult context;
@@ -1808,6 +1822,9 @@ public:
result.mutable_issue_log()->Add()->CopyFrom(issueRecord.IssueLog);
}
}
+ if (!hasDegraded && overall != Ydb::Monitoring::StatusFlag::GREEN && context.HasTags({"storage-state"})) {
+ hasDegraded = true;
+ }
}
if (DatabaseState.empty()) {
Ydb::Monitoring::DatabaseStatus& databaseStatus(*result.add_database_status());
@@ -1842,9 +1859,15 @@ public:
}
switch (overall) {
case Ydb::Monitoring::StatusFlag::GREEN:
- case Ydb::Monitoring::StatusFlag::YELLOW:
result.set_self_check_result(Ydb::Monitoring::SelfCheck::GOOD);
break;
+ case Ydb::Monitoring::StatusFlag::YELLOW:
+ if (hasDegraded) {
+ result.set_self_check_result(Ydb::Monitoring::SelfCheck::DEGRADED);
+ } else {
+ result.set_self_check_result(Ydb::Monitoring::SelfCheck::GOOD);
+ }
+ break;
case Ydb::Monitoring::StatusFlag::BLUE:
result.set_self_check_result(Ydb::Monitoring::SelfCheck::DEGRADED);
break;