aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorandrew-rykov <arykov@ydb.tech>2023-09-10 12:21:32 +0300
committerandrew-rykov <arykov@ydb.tech>2023-09-10 12:35:49 +0300
commitbaa0cbd68f8f553654efb401e6d06efe381a2351 (patch)
treea4bdea877ec324950704f501e6485ae1def3a93a
parentbd07cd341fd0ab69c78f657bd8ea152d37311df2 (diff)
downloadydb-baa0cbd68f8f553654efb401e6d06efe381a2351.tar.gz
KIKIMR-19304 hc added parameters for merging and limiting records
-rw-r--r--ydb/core/health_check/health_check.cpp29
-rw-r--r--ydb/core/health_check/health_check_ut.cpp14
-rw-r--r--ydb/core/viewer/json_healthcheck.h4
-rw-r--r--ydb/public/api/protos/ydb_monitoring.proto2
4 files changed, 32 insertions, 17 deletions
diff --git a/ydb/core/health_check/health_check.cpp b/ydb/core/health_check/health_check.cpp
index 146a06ff57d..1d8f22e62aa 100644
--- a/ydb/core/health_check/health_check.cpp
+++ b/ydb/core/health_check/health_check.cpp
@@ -491,6 +491,7 @@ public:
TTabletRequestsState TabletRequests;
TDuration Timeout = TDuration::MilliSeconds(10000);
+ ui32 ChildrenRecordsLimit = 0;
static constexpr TStringBuf STATIC_STORAGE_POOL_NAME = "static";
bool IsSpecificDatabaseFilter() {
@@ -502,6 +503,7 @@ public:
if (Request->Request.operation_params().has_operation_timeout()) {
Timeout = GetDuration(Request->Request.operation_params().operation_timeout());
}
+ ChildrenRecordsLimit = Request->Request.records_limit();
TIntrusivePtr<TDomainsInfo> domains = AppData()->DomainsInfo;
TIntrusivePtr<TDomainsInfo::TDomain> domain = domains->Domains.begin()->second;
DomainPath = "/" + domain->Name;
@@ -1699,7 +1701,6 @@ public:
static const inline TString BLOCK_4_2 = "block-4-2";
static const inline TString MIRROR_3_DC = "mirror-3-dc";
static const int MERGING_IGNORE_SIZE = 4;
- static const int MERGER_ISSUE_LIMIT = 10;
static void IncrementFor(TStackVec<std::pair<ui32, int>>& realms, ui32 realm) {
auto itRealm = FindIf(realms, [realm](const std::pair<ui32, int>& p) -> bool {
@@ -1966,9 +1967,9 @@ public:
}
void RemoveRecordsAboveLimit(TMergeIssuesContext& context, TList<TSelfCheckContext::TIssueRecord>& records) {
- int commonListed = 0;
+ ui32 commonListed = 0;
for (auto it = records.begin(); it != records.end(); it++) {
- if (commonListed == MERGER_ISSUE_LIMIT) {
+ if (commonListed == ChildrenRecordsLimit) {
auto removeIt = it;
it--;
SetIssueCount(*it, GetIssueCount(*it) + GetIssueCount(*removeIt));
@@ -1979,8 +1980,8 @@ public:
}
context.removeIssuesIds.insert(removeIt->IssueLog.id());
records.erase(removeIt);
- } else if (commonListed + GetIssueListed(*it) > MERGER_ISSUE_LIMIT) {
- auto aboveLimit = commonListed + GetIssueListed(*it) - MERGER_ISSUE_LIMIT;
+ } else if (commonListed + GetIssueListed(*it) > ChildrenRecordsLimit) {
+ auto aboveLimit = commonListed + GetIssueListed(*it) - ChildrenRecordsLimit;
SetIssueListed(*it, GetIssueListed(*it) - aboveLimit);
switch (it->Tag) {
@@ -2010,7 +2011,7 @@ public:
}
default: {}
}
- commonListed = MERGER_ISSUE_LIMIT;
+ commonListed = ChildrenRecordsLimit;
} else {
commonListed += GetIssueListed(*it);
}
@@ -2127,12 +2128,16 @@ public:
void MergeRecords(TList<TSelfCheckContext::TIssueRecord>& records) {
TMergeIssuesContext mergeContext(records);
- MergeLevelRecords(mergeContext, ETags::GroupState);
- MergeLevelRecords(mergeContext, ETags::VDiskState, ETags::GroupState);
- MergeLevelRecords(mergeContext, ETags::PDiskState, ETags::VDiskState);
- RemoveRecordsAboveLimit(mergeContext, ETags::PDiskState, ETags::VDiskState);
- RemoveRecordsAboveLimit(mergeContext, ETags::VDiskState, ETags::GroupState);
- RemoveRecordsAboveLimit(mergeContext, ETags::GroupState);
+ if (Request->Request.merge_records()) {
+ MergeLevelRecords(mergeContext, ETags::GroupState);
+ MergeLevelRecords(mergeContext, ETags::VDiskState, ETags::GroupState);
+ MergeLevelRecords(mergeContext, ETags::PDiskState, ETags::VDiskState);
+ }
+ if (ChildrenRecordsLimit != 0) {
+ RemoveRecordsAboveLimit(mergeContext, ETags::PDiskState, ETags::VDiskState);
+ RemoveRecordsAboveLimit(mergeContext, ETags::VDiskState, ETags::GroupState);
+ RemoveRecordsAboveLimit(mergeContext, ETags::GroupState);
+ }
mergeContext.FillRecords(records);
}
diff --git a/ydb/core/health_check/health_check_ut.cpp b/ydb/core/health_check/health_check_ut.cpp
index d78beec2fd1..9ef27845152 100644
--- a/ydb/core/health_check/health_check_ut.cpp
+++ b/ydb/core/health_check/health_check_ut.cpp
@@ -181,6 +181,8 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
}
}
+ static const int MERGER_ISSUE_LIMIT = 10;
+
void ListingTest(int const groupNumber, int const vdiscPerGroupNumber) {
TPortManager tp;
ui16 port = tp.GetPort(2134);
@@ -226,6 +228,8 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
runtime.SetObserverFunc(observerFunc);
auto *request = new NHealthCheck::TEvSelfCheckRequest;
+ request->Request.set_merge_records(true);
+ request->Request.set_records_limit(MERGER_ISSUE_LIMIT);
runtime.Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, request, 0));
NHealthCheck::TEvSelfCheckResult* result = runtime.GrabEdgeEvent<NHealthCheck::TEvSelfCheckResult>(handle);
@@ -238,7 +242,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
UNIT_ASSERT_VALUES_EQUAL(issue_log.listed(), 0);
UNIT_ASSERT_VALUES_EQUAL(issue_log.count(), 0);
} else {
- int groupListed = std::min<int>(groupNumber, (int)NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT);
+ int groupListed = std::min<int>(groupNumber, MERGER_ISSUE_LIMIT);
UNIT_ASSERT_VALUES_EQUAL(issue_log.location().storage().pool().group().id_size(), groupListed);
UNIT_ASSERT_VALUES_EQUAL(issue_log.listed(), groupListed);
UNIT_ASSERT_VALUES_EQUAL(issue_log.count(), groupNumber);
@@ -394,18 +398,18 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
}
Y_UNIT_TEST(IssuesGroupsMerging) {
- int groupNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT;
+ int groupNumber = MERGER_ISSUE_LIMIT;
ListingTest(groupNumber, 1);
}
Y_UNIT_TEST(IssuesVCardMerging) {
- int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT;
+ int vcardNumber = MERGER_ISSUE_LIMIT;
ListingTest(1, vcardNumber);
}
Y_UNIT_TEST(IssuesGroupsVCardMerging) {
- int groupNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT;
- int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT;
+ int groupNumber = MERGER_ISSUE_LIMIT;
+ int vcardNumber = MERGER_ISSUE_LIMIT;
ListingTest(groupNumber, vcardNumber);
}
diff --git a/ydb/core/viewer/json_healthcheck.h b/ydb/core/viewer/json_healthcheck.h
index eb4e1e272c0..a98ef7b7e80 100644
--- a/ydb/core/viewer/json_healthcheck.h
+++ b/ydb/core/viewer/json_healthcheck.h
@@ -71,6 +71,8 @@ public:
request->Database = Database = params.Get("tenant");
request->Request.set_return_verbose_status(FromStringWithDefault<bool>(params.Get("verbose"), false));
request->Request.set_maximum_level(FromStringWithDefault<ui32>(params.Get("max_level"), 0));
+ request->Request.set_merge_records(FromStringWithDefault<bool>(params.Get("merge_records"), false));
+ request->Request.set_records_limit(FromStringWithDefault<ui32>(params.Get("records_limit"), 0));
SetDuration(TDuration::MilliSeconds(Timeout), *request->Request.mutable_operation_params()->mutable_operation_timeout());
if (params.Has("min_status")) {
Ydb::Monitoring::StatusFlag::Status minStatus;
@@ -207,6 +209,8 @@ struct TJsonRequestParameters<TJsonHealthCheck> {
{"name":"timeout","in":"query","description":"timeout in ms","required":false,"type":"integer"},
{"name":"tenant","in":"query","description":"path to database","required":false,"type":"string"},
{"name":"verbose","in":"query","description":"return verbose status","required":false,"type":"boolean"},
+ {"name":"merge_records","in":"query","description":"merge records","required":false,"type":"boolean"},
+ {"name":"records_limit","in":"query","description":"children records limit","required":false,"type":"integer"},
{"name":"max_level","in":"query","description":"max depth of issues to return","required":false,"type":"integer"},
{"name":"min_status","in":"query","description":"min status of issues to return","required":false,"type":"string"},
{"name":"format","in":"query","description":"format of reply","required":false,"type":"string"}])___";
diff --git a/ydb/public/api/protos/ydb_monitoring.proto b/ydb/public/api/protos/ydb_monitoring.proto
index fbaaf7d1d8c..91a4ffbb5d1 100644
--- a/ydb/public/api/protos/ydb_monitoring.proto
+++ b/ydb/public/api/protos/ydb_monitoring.proto
@@ -28,6 +28,8 @@ message SelfCheckRequest {
StatusFlag.Status minimum_status = 3; // minimum status of issues to return
uint32 maximum_level = 4; // maximum level of issues to return
bool do_not_cache = 5; // by default database health state is taken from metadata cache; this option can be used to force bypassing that cache
+ bool merge_records = 6; // combine similar records with similar status, message and level into one issue
+ uint32 records_limit = 7; // limit the number of records that have one parent record, default - without limit
}
message SelfCheckResponse {