diff options
author | andrew-rykov <arykov@ydb.tech> | 2022-10-24 18:31:31 +0300 |
---|---|---|
committer | andrew-rykov <arykov@ydb.tech> | 2022-10-24 18:31:31 +0300 |
commit | a4b4ed91314a97907ef9a10f2b82768f763fdfe5 (patch) | |
tree | ca67c9ab1fdd657b5d5a3e028ef895ec928f92e2 | |
parent | e4b354c8c71dfba4df4cae2cb370231f322b2dae (diff) | |
download | ydb-a4b4ed91314a97907ef9a10f2b82768f763fdfe5.tar.gz |
PR from branch users/andrew-rykov//merge-issues3
added count and listed
groups unity
PR from branch users/andrew-rykov//merge-issues
-rw-r--r-- | ydb/core/health_check/health_check.cpp | 524 | ||||
-rw-r--r-- | ydb/core/health_check/health_check_ut.cpp | 107 | ||||
-rw-r--r-- | ydb/public/api/protos/ydb_monitoring.proto | 8 |
3 files changed, 528 insertions, 111 deletions
diff --git a/ydb/core/health_check/health_check.cpp b/ydb/core/health_check/health_check.cpp index 3c4fb0a983..f0f950dc55 100644 --- a/ydb/core/health_check/health_check.cpp +++ b/ydb/core/health_check/health_check.cpp @@ -26,6 +26,7 @@ #include <ydb/core/util/tuples.h> #include <ydb/public/api/grpc/ydb_monitoring_v1.grpc.pb.h> +#include <regex> static decltype(auto) make_vslot_tuple(const NKikimrBlobStorage::TVSlotId& id) { return std::make_tuple(id.GetNodeId(), id.GetPDiskId(), id.GetVSlotId()); @@ -93,6 +94,23 @@ public: using TGroupId = ui32; + enum ETags { + None, + DBState, + StorageState, + PoolState, + GroupState, + VDiskState, + PDiskState, + NodeState, + VDiskSpace, + PDiskSpace, + ComputeState, + TabletState, + SystemTabletState, + OverloadState, + }; + struct TTenantInfo { TString Name; Ydb::Cms::GetDatabaseStatusResult::State State; @@ -183,11 +201,11 @@ public: struct TSelfCheckResult { struct TIssueRecord { Ydb::Monitoring::IssueLog IssueLog; - TString Tag; + ETags Tag; }; Ydb::Monitoring::StatusFlag::Status OverallStatus = Ydb::Monitoring::StatusFlag::GREY; - TList<TIssueRecord> IssueLog; + TList<TIssueRecord> IssueRecords; Ydb::Monitoring::Location Location; int Level = 1; TString Type; @@ -218,19 +236,19 @@ public: id << '-' << location.storage().node().port(); } } - if (location.storage().pool().group().vdisk().id()) { - id << '-' << location.storage().pool().group().vdisk().id(); + if (!location.storage().pool().group().vdisk().id().empty()) { + id << '-' << location.storage().pool().group().vdisk().id()[0]; } else { - if (location.storage().pool().group().id()) { - id << '-' << location.storage().pool().group().id(); + if (!location.storage().pool().group().id().empty()) { + id << '-' << location.storage().pool().group().id()[0]; } else { if (location.storage().pool().name()) { id << '-' << crc16(location.storage().pool().name()); } } } - if (location.storage().pool().group().vdisk().pdisk().id()) { - id << '-' << location.storage().pool().group().vdisk().pdisk().id(); + if (!location.storage().pool().group().vdisk().pdisk().empty() && location.storage().pool().group().vdisk().pdisk()[0].id()) { + id << '-' << location.storage().pool().group().vdisk().pdisk()[0].id(); } if (location.compute().node().id()) { id << '-' << location.compute().node().id(); @@ -253,14 +271,14 @@ public: void ReportStatus(Ydb::Monitoring::StatusFlag::Status status, const TString& message = {}, - const TString& setTag = {}, - std::initializer_list<TString> includeTags = {}) { + ETags setTag = ETags::None, + std::initializer_list<ETags> includeTags = {}) { OverallStatus = MaxStatus(OverallStatus, status); if (IsErrorStatus(status)) { std::vector<TString> reason; if (includeTags.size() != 0) { - for (const TIssueRecord& record : IssueLog) { - for (const TString& tag : includeTags) { + for (const TIssueRecord& record : IssueRecords) { + for (const ETags& tag : includeTags) { if (record.Tag == tag) { reason.push_back(record.IssueLog.id()); break; @@ -270,7 +288,7 @@ public: } std::sort(reason.begin(), reason.end()); reason.erase(std::unique(reason.begin(), reason.end()), reason.end()); - TIssueRecord& issueRecord(*IssueLog.emplace(IssueLog.begin())); + TIssueRecord& issueRecord(*IssueRecords.emplace(IssueRecords.begin())); Ydb::Monitoring::IssueLog& issueLog(issueRecord.IssueLog); issueLog.set_status(status); issueLog.set_message(message); @@ -293,9 +311,9 @@ public: } } - bool HasTags(std::initializer_list<TString> tags) const { - for (const TIssueRecord& record : IssueLog) { - for (const TString& tag : tags) { + bool HasTags(std::initializer_list<ETags> tags) const { + for (const TIssueRecord& record : IssueRecords) { + for (const ETags tag : tags) { if (record.Tag == tag) { return true; } @@ -311,12 +329,12 @@ public: void SetOverallStatus(Ydb::Monitoring::StatusFlag::Status status) { OverallStatus = status; } - + void InheritFrom(TSelfCheckResult& lower) { if (lower.GetOverallStatus() >= OverallStatus) { OverallStatus = lower.GetOverallStatus(); } - IssueLog.splice(IssueLog.end(), std::move(lower.IssueLog)); + IssueRecords.splice(IssueRecords.end(), std::move(lower.IssueRecords)); } }; @@ -1269,19 +1287,19 @@ public: static void Check(TSelfCheckContext& context, const NKikimrWhiteboard::TSystemStateInfo::TPoolStats& poolStats) { if (poolStats.name() == "System" || poolStats.name() == "IC" || poolStats.name() == "IO") { if (poolStats.usage() >= 0.99) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Pool usage over 99%", "overload-state"); + context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Pool usage over 99%", ETags::OverloadState); } else if (poolStats.usage() >= 0.95) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Pool usage over 95%", "overload-state"); + context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Pool usage over 95%", ETags::OverloadState); } else if (poolStats.usage() >= 0.90) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Pool usage over 90%", "overload-state"); + context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Pool usage over 90%", ETags::OverloadState); } else { context.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN); } } else { if (poolStats.usage() >= 0.99) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Pool usage over 99%", "overload-state"); + context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Pool usage over 99%", ETags::OverloadState); } else if (poolStats.usage() >= 0.95) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Pool usage over 95%", "overload-state"); + context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Pool usage over 95%", ETags::OverloadState); } else { context.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN); } @@ -1300,11 +1318,11 @@ public: } protoTablet.add_id(ToString(tabletId)); if (tablet.IsUnresponsive) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, TStringBuilder() << "System tablet is unresponsive", "system-tablet-state"); + context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, TStringBuilder() << "System tablet is unresponsive", ETags::SystemTabletState); } else if (tablet.MaxResponseTime >= TDuration::MilliSeconds(5000)) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "System tablet response time is over 5000ms", "system-tablet-state"); + context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "System tablet response time is over 5000ms", ETags::SystemTabletState); } else if (tablet.MaxResponseTime >= TDuration::MilliSeconds(1000)) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "System tablet response time is over 1000ms", "system-tablet-state"); + context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "System tablet response time is over 1000ms", ETags::SystemTabletState); } } } @@ -1348,14 +1366,14 @@ public: break; case TNodeTabletState::ETabletState::RestartsTooOften: computeTabletStatus.set_state("RESTARTS_TOO_OFTEN"); - tabletContext.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Tablets are restarting too often", "tablet-state"); + tabletContext.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Tablets are restarting too often", ETags::TabletState); break; case TNodeTabletState::ETabletState::Dead: computeTabletStatus.set_state("DEAD"); if (count.Leader) { - tabletContext.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Tablets are dead", "tablet-state"); + tabletContext.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Tablets are dead", ETags::TabletState); } else { - tabletContext.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Followers are dead", "tablet-state"); + tabletContext.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Followers are dead", ETags::TabletState); } break; } @@ -1401,7 +1419,7 @@ public: loadAverageStatus.set_load(nodeSystemState.loadaverage(0)); loadAverageStatus.set_cores(nodeSystemState.numberofcpus()); if (loadAverageStatus.load() > loadAverageStatus.cores()) { - laContext.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "LoadAverage above 100%", "overload-state"); + laContext.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "LoadAverage above 100%", ETags::OverloadState); } else { laContext.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN); } @@ -1410,7 +1428,7 @@ public: } else { // context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, // TStringBuilder() << "Compute node is not available", - // "node-state"); + // ETags::NodeState); } computeNodeStatus.set_id(id); computeNodeStatus.set_overall(context.GetOverallStatus()); @@ -1434,7 +1452,7 @@ public: } else { Ydb::Monitoring::StatusFlag::Status systemStatus = FillSystemTablets({&context, "SYSTEM_TABLET"}); if (systemStatus != Ydb::Monitoring::StatusFlag::GREEN && systemStatus != Ydb::Monitoring::StatusFlag::GREY) { - context.ReportStatus(systemStatus, "Compute has issues with system tablets", "compute-state", {"system-tablet-state"}); + context.ReportStatus(systemStatus, "Compute has issues with system tablets", ETags::DBState, {ETags::SystemTabletState}); } Ydb::Monitoring::StatusFlag::Status nodesStatus = Ydb::Monitoring::StatusFlag::GREEN; for (TNodeId nodeId : *computeNodeIds) { @@ -1443,7 +1461,7 @@ public: nodesStatus = MaxStatus(nodesStatus, computeNode.overall()); } if (nodesStatus != Ydb::Monitoring::StatusFlag::GREEN) { - context.ReportStatus(nodesStatus, "Compute is overloaded", "compute-state", {"overload-state"}); + context.ReportStatus(nodesStatus, "Compute is overloaded", ETags::DBState, {ETags::OverloadState}); } Ydb::Monitoring::StatusFlag::Status tabletsStatus = Ydb::Monitoring::StatusFlag::GREEN; computeNodeIds->push_back(0); // for tablets without node @@ -1451,7 +1469,7 @@ public: tabletsStatus = MaxStatus(tabletsStatus, FillTablets(databaseState, nodeId, *computeStatus.mutable_tablets(), context)); } if (tabletsStatus != Ydb::Monitoring::StatusFlag::GREEN) { - context.ReportStatus(tabletsStatus, "Compute has issues with tablets", "compute-state", {"tablet-state"}); + context.ReportStatus(tabletsStatus, "Compute has issues with tablets", ETags::DBState, {ETags::TabletState}); } } computeStatus.set_overall(context.GetOverallStatus()); @@ -1501,11 +1519,14 @@ public: void FillPDiskStatus(const TString& pDiskId, const NKikimrWhiteboard::TPDiskStateInfo& pDiskInfo, Ydb::Monitoring::StoragePDiskStatus& storagePDiskStatus, TSelfCheckContext context) { context.Location.clear_database(); // PDisks are shared between databases + if (context.Location.mutable_storage()->mutable_pool()->mutable_group()->mutable_vdisk()->mutable_pdisk()->empty()) { + context.Location.mutable_storage()->mutable_pool()->mutable_group()->mutable_vdisk()->add_pdisk(); + } context.Location.mutable_storage()->mutable_pool()->clear_name(); // PDisks are shared between pools context.Location.mutable_storage()->mutable_pool()->mutable_group()->clear_id(); // PDisks are shared between groups context.Location.mutable_storage()->mutable_pool()->mutable_group()->mutable_vdisk()->clear_id(); // PDisks are shared between vdisks - context.Location.mutable_storage()->mutable_pool()->mutable_group()->mutable_vdisk()->mutable_pdisk()->set_id(pDiskId); - context.Location.mutable_storage()->mutable_pool()->mutable_group()->mutable_vdisk()->mutable_pdisk()->set_path(pDiskInfo.path()); + context.Location.mutable_storage()->mutable_pool()->mutable_group()->mutable_vdisk()->mutable_pdisk()->begin()->set_id(pDiskId); + context.Location.mutable_storage()->mutable_pool()->mutable_group()->mutable_vdisk()->mutable_pdisk()->begin()->set_path(pDiskInfo.path()); storagePDiskStatus.set_id(pDiskId); if (pDiskInfo.HasState()) { @@ -1519,7 +1540,7 @@ public: case NKikimrBlobStorage::TPDiskState::InitialCommonLogRead: context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, TStringBuilder() << "PDisk state is " << NKikimrBlobStorage::TPDiskState::E_Name(pDiskInfo.GetState()), - "pdisk-state"); + ETags::PDiskState); break; case NKikimrBlobStorage::TPDiskState::InitialFormatReadError: case NKikimrBlobStorage::TPDiskState::InitialSysLogReadError: @@ -1536,7 +1557,7 @@ public: case NKikimrBlobStorage::TPDiskState::Unknown: context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, TStringBuilder() << "PDisk state is " << NKikimrBlobStorage::TPDiskState::E_Name(pDiskInfo.GetState()), - "pdisk-state"); + ETags::PDiskState); break; case NKikimrBlobStorage::TPDiskState::Reserved14: case NKikimrBlobStorage::TPDiskState::Reserved15: @@ -1549,11 +1570,11 @@ public: if (pDiskInfo.GetAvailableSize() != 0 && pDiskInfo.GetTotalSize() != 0) { // hotfix until KIKIMR-12659 double avail = (double)pDiskInfo.GetAvailableSize() / pDiskInfo.GetTotalSize(); if (avail < 0.06) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Available size is less than 6%", "pdisk-space"); + context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Available size is less than 6%", ETags::PDiskSpace); } else if (avail < 0.09) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Available size is less than 9%", "pdisk-space"); + context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Available size is less than 9%", ETags::PDiskSpace); } else if (avail < 0.12) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Available size is less than 12%", "pdisk-space"); + context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Available size is less than 12%", ETags::PDiskSpace); } } } else { @@ -1572,12 +1593,12 @@ public: } nodeContext.ReportStatus(Ydb::Monitoring::StatusFlag::RED, TStringBuilder() << "Storage node is not available", - "node-state"); + ETags::NodeState); } context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, TStringBuilder() << "PDisk is not available", - "pdisk-state", - {"node-state"}); + ETags::PDiskState, + {ETags::NodeState}); } storagePDiskStatus.set_overall(context.GetOverallStatus()); @@ -1599,7 +1620,11 @@ public: } void FillVDiskStatus(const TString& vDiskId, const NKikimrWhiteboard::TVDiskStateInfo& vDiskInfo, Ydb::Monitoring::StorageVDiskStatus& storageVDiskStatus, TSelfCheckContext context) { - context.Location.mutable_storage()->mutable_pool()->mutable_group()->mutable_vdisk()->set_id(vDiskId); + if (context.Location.mutable_storage()->mutable_pool()->mutable_group()->mutable_vdisk()->mutable_id()->empty()) { + context.Location.mutable_storage()->mutable_pool()->mutable_group()->mutable_vdisk()->add_id(); + } + context.Location.mutable_storage()->mutable_pool()->mutable_group()->mutable_vdisk()->set_id(0, vDiskId); + context.Location.mutable_storage()->mutable_pool()->mutable_group()->clear_id(); // you can see VDisks Group Id in vDiskId field storageVDiskStatus.set_id(vDiskId); TString pDiskId = GetPDiskId(vDiskInfo); auto itPDisk = MergedPDiskState.find(pDiskId); @@ -1610,8 +1635,8 @@ public: if (!vDiskInfo.HasVDiskState()) { context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, TStringBuilder() << "VDisk is not available", - "vdisk-state", - {"pdisk-state"}); + ETags::VDiskState, + {ETags::PDiskState}); storageVDiskStatus.set_overall(context.GetOverallStatus()); return; } @@ -1624,15 +1649,15 @@ public: case NKikimrWhiteboard::EVDiskState::SyncGuidRecovery: context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, TStringBuilder() << "VDisk state is " << NKikimrWhiteboard::EVDiskState_Name(vDiskInfo.GetVDiskState()), - "vdisk-state"); + ETags::VDiskState); break; case NKikimrWhiteboard::EVDiskState::LocalRecoveryError: case NKikimrWhiteboard::EVDiskState::SyncGuidRecoveryError: case NKikimrWhiteboard::EVDiskState::PDiskError: context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, TStringBuilder() << "VDisk state is " << NKikimrWhiteboard::EVDiskState_Name(vDiskInfo.GetVDiskState()), - "vdisk-state", - {"pdisk-state"}); + ETags::VDiskState, + {ETags::PDiskState}); break; } @@ -1645,19 +1670,19 @@ public: break; context.ReportStatus(GetFlagFromWhiteboardFlag(vDiskInfo.GetDiskSpace()), TStringBuilder() << "DiskSpace is " << NKikimrWhiteboard::EFlag_Name(vDiskInfo.GetDiskSpace()), - "vdisk-state", - {"pdisk-space"}); + ETags::VDiskState, + {ETags::PDiskSpace}); default: context.ReportStatus(GetFlagFromWhiteboardFlag(vDiskInfo.GetDiskSpace()), TStringBuilder() << "DiskSpace is " << NKikimrWhiteboard::EFlag_Name(vDiskInfo.GetDiskSpace()), - "vdisk-space", - {"pdisk-space"}); + ETags::VDiskSpace, + {ETags::PDiskSpace}); break; } } if (context.GetOverallStatus() == Ydb::Monitoring::StatusFlag::GREEN && !vDiskInfo.GetReplicated()) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::BLUE, "Replication in progress", "vdisk-state"); + context.ReportStatus(Ydb::Monitoring::StatusFlag::BLUE, "Replication in progress", ETags::VDiskState); } storageVDiskStatus.set_overall(context.GetOverallStatus()); @@ -1666,6 +1691,8 @@ public: static const inline TString NONE = "none"; static const inline TString BLOCK_4_2 = "block-4-2"; static const inline TString MIRROR_3_DC = "mirror-3-dc"; + static const int MERGING_IGNORE_SIZE = 4; + static const int MERGER_ISSUE_LIMIT = 10; static void IncrementFor(TStackVec<std::pair<ui32, int>>& realms, ui32 realm) { auto itRealm = FindIf(realms, [realm](const std::pair<ui32, int>& p) -> bool { @@ -1678,8 +1705,337 @@ public: } } + struct TMergeIssuesContext { + std::unordered_map<ETags, TList<TSelfCheckContext::TIssueRecord>> recordsMap; + std::unordered_set<TString> removeIssuesIds; + + TMergeIssuesContext(TList<TSelfCheckContext::TIssueRecord>& records) { + for (auto it = records.begin(); it != records.end(); ) { + auto move = it++; + recordsMap[move->Tag].splice(recordsMap[move->Tag].end(), records, move); + } + } + + void RemoveUnlinkIssues(TList<TSelfCheckContext::TIssueRecord>& records) { + bool isRemovingIssuesIteration = true; + while (isRemovingIssuesIteration) { + isRemovingIssuesIteration = false; + + std::unordered_set<TString> necessaryIssuesIds; + for (auto it = records.begin(); it != records.end(); it++) { + auto reasons = it->IssueLog.reason(); + for (auto reasonIt = reasons.begin(); reasonIt != reasons.end(); reasonIt++) { + necessaryIssuesIds.insert(*reasonIt); + } + } + + for (auto it = records.begin(); it != records.end(); ) { + if (!necessaryIssuesIds.contains(it->IssueLog.id()) && removeIssuesIds.contains(it->IssueLog.id())) { + auto reasons = it->IssueLog.reason(); + for (auto reasonIt = reasons.begin(); reasonIt != reasons.end(); reasonIt++) { + removeIssuesIds.insert(*reasonIt); + } + isRemovingIssuesIteration = true; + it = records.erase(it); + } else { + it++; + } + } + } + + { + std::unordered_set<TString> issueIds; + for (auto it = records.begin(); it != records.end(); it++) { + issueIds.insert(it->IssueLog.id()); + } + + for (auto it = records.begin(); it != records.end(); it++) { + auto reasons = it->IssueLog.mutable_reason(); + for (auto reasonIt = reasons->begin(); reasonIt != reasons->end(); ) { + if (!issueIds.contains(*reasonIt)) { + reasonIt = reasons->erase(reasonIt); + } else { + reasonIt++; + } + } + } + } + } + + void RenameMergingIssues(TList<TSelfCheckContext::TIssueRecord>& records) { + for (auto it = records.begin(); it != records.end(); it++) { + if (it->IssueLog.count() > 0) { + TString message = it->IssueLog.message(); + switch (it->Tag) { + case ETags::GroupState: { + message = std::regex_replace(message.c_str(), std::regex("^Group has "), "Groups have "); + message = std::regex_replace(message.c_str(), std::regex("^Group is "), "Groups are "); + message = std::regex_replace(message.c_str(), std::regex("^Group "), "Groups "); + break; + } + case ETags::VDiskState: { + message = std::regex_replace(message.c_str(), std::regex("^VDisk has "), "VDisk have "); + message = std::regex_replace(message.c_str(), std::regex("^VDisk is "), "VDisks are "); + message = std::regex_replace(message.c_str(), std::regex("^VDisk "), "VDisk "); + break; + } + case ETags::PDiskState: { + message = std::regex_replace(message.c_str(), std::regex("^PDisk has "), "PDisk have "); + message = std::regex_replace(message.c_str(), std::regex("^PDisk is "), "PDisks are "); + message = std::regex_replace(message.c_str(), std::regex("^PDisk "), "PDisk "); + break; + } + default: + break; + } + it->IssueLog.set_message(message); + } + } + } + + void FillRecords(TList<TSelfCheckContext::TIssueRecord>& records) { + for(auto it = recordsMap.begin(); it != recordsMap.end(); ++it) { + records.splice(records.end(), it->second); + } + RemoveUnlinkIssues(records); + RenameMergingIssues(records); + } + + TList<TSelfCheckContext::TIssueRecord>& GetRecords(ETags tag) { + return recordsMap[tag]; + } + }; + + bool FindRecordsForMerge(TList<TSelfCheckContext::TIssueRecord>& records, TList<TSelfCheckContext::TIssueRecord>& similar, TList<TSelfCheckContext::TIssueRecord>& Mergeed) { + while (!records.empty() && similar.empty()) { + similar.splice(similar.end(), records, records.begin()); + for (auto it = records.begin(); it != records.end(); ) { + bool isSimilar = it->IssueLog.status() == similar.begin()->IssueLog.status() + && it->IssueLog.message() == similar.begin()->IssueLog.message() + && it->IssueLog.level() == similar.begin()->IssueLog.level() ; + if (isSimilar && similar.begin()->Tag == ETags::VDiskState) { + isSimilar = it->IssueLog.location().storage().node().id() == similar.begin()->IssueLog.location().storage().node().id(); + } + if (isSimilar) { + auto move = it++; + similar.splice(similar.end(), records, move); + } else { + ++it; + } + } + + if (similar.size() <= MERGING_IGNORE_SIZE) { + Mergeed.splice(Mergeed.end(), similar); + } + } + + return !similar.empty(); + } + + std::shared_ptr<TList<TSelfCheckContext::TIssueRecord>> FindChildrenRecords(TList<TSelfCheckContext::TIssueRecord>& records, TSelfCheckContext::TIssueRecord& parent) { + std::shared_ptr<TList<TSelfCheckContext::TIssueRecord>> children(new TList<TSelfCheckContext::TIssueRecord>); + std::unordered_set<TString> childrenIds; + for (auto reason: parent.IssueLog.reason()) { + childrenIds.insert(reason); + } + + for (auto it = records.begin(); it != records.end(); ) { + if (childrenIds.contains(it->IssueLog.id())) { + auto move = it++; + children->splice(children->end(), records, move); + } else { + it++; + } + } + + return children; + } + + void MoveDataInFirstRecord(TMergeIssuesContext& context, TList<TSelfCheckContext::TIssueRecord>& similar) { + auto mainReasons = similar.begin()->IssueLog.mutable_reason(); + std::unordered_set<TString> ids; + ids.insert(similar.begin()->IssueLog.id()); + std::unordered_set<TString> mainReasonIds; + for (auto it = mainReasons->begin(); it != mainReasons->end(); it++) { + mainReasonIds.insert(*it); + } + + for (auto it = std::next(similar.begin(), 1); it != similar.end(); ) { + if (ids.contains(it->IssueLog.id())) { + it++; + continue; + } + ids.insert(it->IssueLog.id()); + + switch (similar.begin()->Tag) { + case ETags::GroupState: { + auto mainGroupIds = similar.begin()->IssueLog.mutable_location()->mutable_storage()->mutable_pool()->mutable_group()->mutable_id(); + auto donorGroupIds = it->IssueLog.mutable_location()->mutable_storage()->mutable_pool()->mutable_group()->mutable_id(); + mainGroupIds->Add(donorGroupIds->begin(), donorGroupIds->end()); + break; + } + case ETags::VDiskState: { + auto mainVdiskIds = similar.begin()->IssueLog.mutable_location()->mutable_storage()->mutable_pool()->mutable_group()->mutable_vdisk()->mutable_id(); + auto donorVdiskIds = it->IssueLog.mutable_location()->mutable_storage()->mutable_pool()->mutable_group()->mutable_vdisk()->mutable_id(); + mainVdiskIds->Add(donorVdiskIds->begin(), donorVdiskIds->end()); + break; + } + case ETags::PDiskState: { + auto mainPdisk = similar.begin()->IssueLog.mutable_location()->mutable_storage()->mutable_pool()->mutable_group()->mutable_vdisk()->mutable_pdisk(); + auto donorPdisk = it->IssueLog.mutable_location()->mutable_storage()->mutable_pool()->mutable_group()->mutable_vdisk()->mutable_pdisk(); + mainPdisk->Add(donorPdisk->begin(), donorPdisk->end()); + break; + } + default: + break; + } + + auto donorReasons = it->IssueLog.mutable_reason(); + for (auto donorReasonIt = donorReasons->begin(); donorReasonIt != donorReasons->end(); donorReasonIt++) { + if (!mainReasonIds.contains(*donorReasonIt)) { + mainReasons->Add(donorReasonIt->c_str()); + mainReasonIds.insert(*donorReasonIt); + } + } + + context.removeIssuesIds.insert(it->IssueLog.id()); + it = similar.erase(it); + } + + similar.begin()->IssueLog.set_count(ids.size()); + similar.begin()->IssueLog.set_listed(ids.size()); + } + + void MergeLevelRecords(TMergeIssuesContext& context, TList<TSelfCheckContext::TIssueRecord>& records) { + TList<TSelfCheckContext::TIssueRecord> handled; + while (!records.empty()) { + TList<TSelfCheckContext::TIssueRecord> similar; + if (FindRecordsForMerge(records, similar, handled)) { + MoveDataInFirstRecord(context, similar); + handled.splice(handled.end(), similar, similar.begin()); + } + } + records.splice(records.end(), handled); + } + + void MergeLevelRecords(TMergeIssuesContext& context, ETags levelTag) { + auto& records = context.GetRecords(levelTag); + MergeLevelRecords(context, records); + } + + void MergeLevelRecords(TMergeIssuesContext& context, ETags levelTag, ETags upperTag) { + auto& levelRecords = context.GetRecords(levelTag); + auto& upperRecords = context.GetRecords(upperTag); + + for (auto it = upperRecords.begin(); it != upperRecords.end(); it++) { + auto children = FindChildrenRecords(levelRecords, *it); + if (children->size() > 1) { + MergeLevelRecords(context, *children); + } + levelRecords.splice(levelRecords.end(), *children); + } + } + + int GetIssueCount(TSelfCheckContext::TIssueRecord& record) { + return record.IssueLog.count() == 0 ? 1 : record.IssueLog.count(); + } + + void SetIssueCount(TSelfCheckContext::TIssueRecord& record, int value) { + if (record.IssueLog.listed() == 0) { + record.IssueLog.set_listed(1); + } + record.IssueLog.set_count(value); + } + + int GetIssueListed(TSelfCheckContext::TIssueRecord& record) { + return record.IssueLog.listed() == 0 ? 1 : record.IssueLog.listed(); + } + + void SetIssueListed(TSelfCheckContext::TIssueRecord& record, int value) { + if (record.IssueLog.count() == 0) { + record.IssueLog.set_count(1); + } + record.IssueLog.set_listed(value); + } + + void RemoveRecordsAboveLimit(TMergeIssuesContext& context, TList<TSelfCheckContext::TIssueRecord>& records) { + int commonListed = 0; + for (auto it = records.begin(); it != records.end(); it++) { + if (commonListed == MERGER_ISSUE_LIMIT) { + auto removeIt = it; + it--; + SetIssueCount(*it, GetIssueCount(*it) + GetIssueCount(*removeIt)); + + auto reasons = removeIt->IssueLog.reason(); + for (auto reasonIt = reasons.begin(); reasonIt != reasons.end(); reasonIt++) { + context.removeIssuesIds.insert(*reasonIt); + } + context.removeIssuesIds.insert(removeIt->IssueLog.id()); + records.erase(removeIt); + } else if (commonListed + GetIssueListed(*it) > MERGER_ISSUE_LIMIT) { + auto aboveLimit = commonListed + GetIssueListed(*it) - MERGER_ISSUE_LIMIT; + SetIssueListed(*it, GetIssueListed(*it) - aboveLimit); + + switch (it->Tag) { + case ETags::GroupState: { + auto groupIds = it->IssueLog.mutable_location()->mutable_storage()->mutable_pool()->mutable_group()->mutable_id(); + while (aboveLimit > 0) { + groupIds->RemoveLast(); + aboveLimit--; + } + break; + } + case ETags::VDiskState: { + auto vdiscIds = it->IssueLog.mutable_location()->mutable_storage()->mutable_pool()->mutable_group()->mutable_vdisk()->mutable_id(); + while (aboveLimit > 0) { + vdiscIds->RemoveLast(); + aboveLimit--; + } + break; + } + case ETags::PDiskState: { + auto pdiscs = it->IssueLog.mutable_location()->mutable_storage()->mutable_pool()->mutable_group()->mutable_vdisk()->mutable_pdisk(); + while (aboveLimit > 0) { + pdiscs->RemoveLast(); + aboveLimit--; + } + break; + } + default: {} + } + commonListed = MERGER_ISSUE_LIMIT; + } else { + commonListed += GetIssueListed(*it); + } + } + } + + void RemoveRecordsAboveLimit(TMergeIssuesContext& context, ETags levelTag) { + auto& records = context.GetRecords(levelTag); + if (records.size() > 0) { + RemoveRecordsAboveLimit(context, records); + } + } + + void RemoveRecordsAboveLimit(TMergeIssuesContext& context, ETags levelTag, ETags upperTag) { + auto& levelRecords = context.GetRecords(levelTag); + auto& upperRecords = context.GetRecords(upperTag); + + TList<TSelfCheckResult::TIssueRecord> handled; + for (auto it = upperRecords.begin(); it != upperRecords.end(); it++) { + auto children = FindChildrenRecords(levelRecords, *it); + + RemoveRecordsAboveLimit(context, *children); + handled.splice(handled.end(), *children); + } + levelRecords.splice(levelRecords.end(), handled); + } + void FillGroupStatus(TGroupId groupId, const NKikimrWhiteboard::TBSGroupStateInfo& groupInfo, Ydb::Monitoring::StorageGroupStatus& storageGroupStatus, TSelfCheckContext context) { - context.Location.mutable_storage()->mutable_pool()->mutable_group()->set_id(ToString(groupId)); + if (context.Location.mutable_storage()->mutable_pool()->mutable_group()->mutable_id()->empty()) { + context.Location.mutable_storage()->mutable_pool()->mutable_group()->add_id(); + } + context.Location.mutable_storage()->mutable_pool()->mutable_group()->set_id(0, ToString(groupId)); storageGroupStatus.set_id(ToString(groupId)); int disksColors[Ydb::Monitoring::StatusFlag::Status_ARRAYSIZE] = {}; TStackVec<std::pair<ui32, int>> failedRealms; @@ -1725,30 +2081,30 @@ public: if (groupInfo.erasurespecies() == NONE) { if (failedDisks > 0) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Group failed", "group-state", {"vdisk-state"}); + context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Group failed", ETags::GroupState, {ETags::VDiskState}); } } else if (groupInfo.erasurespecies() == BLOCK_4_2) { if (failedDisks > 2) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Group failed", "group-state", {"vdisk-state"}); + context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Group failed", ETags::GroupState, {ETags::VDiskState}); } else if (failedDisks > 1) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Group has no redundancy", "group-state", {"vdisk-state"}); + context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Group has no redundancy", ETags::GroupState, {ETags::VDiskState}); } else if (failedDisks > 0) { if (disksColors[Ydb::Monitoring::StatusFlag::BLUE] == failedDisks) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::BLUE, "Group degraded", "group-state", {"vdisk-state"}); + context.ReportStatus(Ydb::Monitoring::StatusFlag::BLUE, "Group degraded", ETags::GroupState, {ETags::VDiskState}); } else { - context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", "group-state", {"vdisk-state"}); + context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState}); } } } else if (groupInfo.erasurespecies() == MIRROR_3_DC) { if (failedRealms.size() > 2 || (failedRealms.size() == 2 && failedRealms[0].second > 1 && failedRealms[1].second > 1)) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Group failed", "group-state", {"vdisk-state"}); + context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Group failed", ETags::GroupState, {ETags::VDiskState}); } else if (failedRealms.size() == 2) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Group has no redundancy", "group-state", {"vdisk-state"}); + context.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Group has no redundancy", ETags::GroupState, {ETags::VDiskState}); } else if (failedDisks > 0) { if (disksColors[Ydb::Monitoring::StatusFlag::BLUE] == failedDisks) { - context.ReportStatus(Ydb::Monitoring::StatusFlag::BLUE, "Group degraded", "group-state", {"vdisk-state"}); + context.ReportStatus(Ydb::Monitoring::StatusFlag::BLUE, "Group degraded", ETags::GroupState, {ETags::VDiskState}); } else { - context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", "group-state", {"vdisk-state"}); + context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState}); } } } @@ -1756,6 +2112,17 @@ public: storageGroupStatus.set_overall(context.GetOverallStatus()); } + void MergeRecords(TList<TSelfCheckContext::TIssueRecord>& records) { + TMergeIssuesContext mergeContext(records); + MergeLevelRecords(mergeContext, ETags::GroupState); + MergeLevelRecords(mergeContext, ETags::VDiskState, ETags::GroupState); + MergeLevelRecords(mergeContext, ETags::PDiskState, ETags::VDiskState); + RemoveRecordsAboveLimit(mergeContext, ETags::PDiskState, ETags::VDiskState); + RemoveRecordsAboveLimit(mergeContext, ETags::VDiskState, ETags::GroupState); + RemoveRecordsAboveLimit(mergeContext, ETags::GroupState); + mergeContext.FillRecords(records); + } + void FillPoolStatus(const TString& poolName, const TStoragePoolState& pool, Ydb::Monitoring::StoragePoolStatus& storagePoolStatus, TSelfCheckContext context) { context.Location.mutable_storage()->mutable_pool()->set_name(poolName); storagePoolStatus.set_id(poolName); @@ -1765,16 +2132,19 @@ public: FillGroupStatus(groupId, *itGroup->second, *storagePoolStatus.add_groups(), {&context, "STORAGE_GROUP"}); } } + + MergeRecords(context.IssueRecords); + switch (context.GetOverallStatus()) { case Ydb::Monitoring::StatusFlag::BLUE: case Ydb::Monitoring::StatusFlag::YELLOW: - context.ReportStatus(context.GetOverallStatus(), "Pool degraded", "pool-state", {"group-state"}); + context.ReportStatus(context.GetOverallStatus(), "Pool degraded", ETags::PoolState, {ETags::GroupState}); break; case Ydb::Monitoring::StatusFlag::ORANGE: - context.ReportStatus(context.GetOverallStatus(), "Pool has no redundancy", "pool-state", {"group-state"}); + context.ReportStatus(context.GetOverallStatus(), "Pool has no redundancy", ETags::PoolState, {ETags::GroupState}); break; case Ydb::Monitoring::StatusFlag::RED: - context.ReportStatus(context.GetOverallStatus(), "Pool failed", "pool-state", {"group-state"}); + context.ReportStatus(context.GetOverallStatus(), "Pool failed", ETags::PoolState, {ETags::GroupState}); break; default: break; @@ -1800,13 +2170,13 @@ public: switch (context.GetOverallStatus()) { case Ydb::Monitoring::StatusFlag::BLUE: case Ydb::Monitoring::StatusFlag::YELLOW: - context.ReportStatus(context.GetOverallStatus(), "Storage degraded", "storage-state", {"pool-state"}); + context.ReportStatus(context.GetOverallStatus(), "Storage degraded", ETags::StorageState, {ETags::PoolState}); break; case Ydb::Monitoring::StatusFlag::ORANGE: - context.ReportStatus(context.GetOverallStatus(), "Storage has no redundancy", "storage-state", {"pool-state"}); + context.ReportStatus(context.GetOverallStatus(), "Storage has no redundancy", ETags::StorageState, {ETags::PoolState}); break; case Ydb::Monitoring::StatusFlag::RED: - context.ReportStatus(context.GetOverallStatus(), "Storage failed", "storage-state", {"pool-state"}); + context.ReportStatus(context.GetOverallStatus(), "Storage failed", ETags::StorageState, {ETags::PoolState}); break; default: break; @@ -1876,16 +2246,16 @@ public: if (databaseStatus.compute().overall() != Ydb::Monitoring::StatusFlag::GREEN && databaseStatus.storage().overall() != Ydb::Monitoring::StatusFlag::GREEN) { dbContext.ReportStatus(MaxStatus(databaseStatus.compute().overall(), databaseStatus.storage().overall()), - "Database has multiple issues", "database-state", {"compute-state", "storage-state"}); + "Database has multiple issues", ETags::DBState, { ETags::ComputeState, ETags::StorageState}); } else if (databaseStatus.compute().overall() != Ydb::Monitoring::StatusFlag::GREEN) { - dbContext.ReportStatus(databaseStatus.compute().overall(), "Database has compute issues", "database-state", {"compute-state"}); + dbContext.ReportStatus(databaseStatus.compute().overall(), "Database has compute issues", ETags::DBState, {ETags::ComputeState}); } else if (databaseStatus.storage().overall() != Ydb::Monitoring::StatusFlag::GREEN) { - dbContext.ReportStatus(databaseStatus.storage().overall(), "Database has storage issues", "database-state", {"storage-state"}); + dbContext.ReportStatus(databaseStatus.storage().overall(), "Database has storage issues", ETags::DBState, {ETags::StorageState}); } databaseStatus.set_overall(dbContext.GetOverallStatus()); context.UpdateMaxStatus(dbContext.GetOverallStatus()); - context.AddIssues(dbContext.IssueLog); - if (!context.HasDegraded && context.Status != Ydb::Monitoring::StatusFlag::GREEN && dbContext.HasTags({"storage-state"})) { + context.AddIssues(dbContext.IssueRecords); + if (!context.HasDegraded && context.Status != Ydb::Monitoring::StatusFlag::GREEN && dbContext.HasTags({ETags::StorageState})) { context.HasDegraded = true; } } @@ -1921,7 +2291,7 @@ public: FillStorage(unknownDatabase, *databaseStatus.mutable_storage(), {&storageContext, "STORAGE"}); databaseStatus.set_overall(storageContext.GetOverallStatus()); context.UpdateMaxStatus(storageContext.GetOverallStatus()); - context.AddIssues(storageContext.IssueLog); + context.AddIssues(storageContext.IssueRecords); } } context.FillSelfCheckResult(); diff --git a/ydb/core/health_check/health_check_ut.cpp b/ydb/core/health_check/health_check_ut.cpp index befb205e02..7586ad31d7 100644 --- a/ydb/core/health_check/health_check_ut.cpp +++ b/ydb/core/health_check/health_check_ut.cpp @@ -6,9 +6,7 @@ #include <ydb/core/node_whiteboard/node_whiteboard.h> #include <ydb/core/blobstorage/base/blobstorage_events.h> #include <ydb/core/tx/schemeshard/schemeshard.h> -#include "health_check.h" - -#include <unordered_map> +#include "health_check.cpp" namespace NKikimr { @@ -58,27 +56,27 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { }; void AddGroupsInControllerSelectGroupsResult(TEvBlobStorage::TEvControllerSelectGroupsResult::TPtr* ev, int groupCount) { - auto &pbRecord = (*ev)->Get()->Record; + auto& pbRecord = (*ev)->Get()->Record; auto pbMatchGroups = pbRecord.mutable_matchinggroups(0); - auto sample = pbMatchGroups->GetGroups(0); + auto sample = pbMatchGroups->groups(0); pbMatchGroups->ClearGroups(); auto groupId = GROUP_START_ID; for (int i = 0; i < groupCount; i++) { auto group = pbMatchGroups->add_groups(); group->CopyFrom(sample); - group->SetGroupID(groupId++); + group->set_groupid(groupId++); } }; void AddGroupVSlotInControllerConfigResponse(TEvBlobStorage::TEvControllerConfigResponse::TPtr* ev, int groupCount, int vslotCount) { - auto &pbRecord = (*ev)->Get()->Record; + auto& pbRecord = (*ev)->Get()->Record; auto pbConfig = pbRecord.mutable_response()->mutable_status(0)->mutable_baseconfig(); - auto groupSample = pbConfig->GetGroup(0); - auto vslotSample = pbConfig->GetVSlot(0); - auto vslotIdSample = pbConfig->GetGroup(0).GetVSlotId(0); + auto groupSample = pbConfig->group(0); + auto vslotSample = pbConfig->vslot(0); + auto vslotIdSample = pbConfig->group(0).vslotid(0); pbConfig->clear_group(); pbConfig->clear_vslot(); @@ -94,7 +92,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { for (int j = 0; j < vslotCount; j++) { auto vslot = pbConfig->add_vslot(); vslot->CopyFrom(vslotSample); - vslot->SetVDiskIdx(vslotId); + vslot->set_vdiskidx(vslotId); vslot->set_groupid(groupId); vslot->mutable_vslotid()->set_vslotid(vslotId); @@ -109,9 +107,9 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { }; void AddVSlotInVDiskStateResponse(NNodeWhiteboard::TEvWhiteboard::TEvVDiskStateResponse::TPtr* ev, int groupCount, int vslotCount) { - auto &pbRecord = (*ev)->Get()->Record; + auto& pbRecord = (*ev)->Get()->Record; - auto sample = pbRecord.GetVDiskStateInfo(0); + auto sample = pbRecord.vdiskstateinfo(0); pbRecord.clear_vdiskstateinfo(); auto groupId = GROUP_START_ID; @@ -127,7 +125,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } } - void ListingTest(int const groupNumber, int const groupVdiscNumber) { + void ListingTest(int const groupNumber, int const vdiscPerGroupNumber) { TPortManager tp; ui16 port = tp.GetPort(2134); ui16 grpcPort = tp.GetPort(2135); @@ -157,12 +155,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case TEvBlobStorage::EvControllerConfigResponse: { auto *x = reinterpret_cast<TEvBlobStorage::TEvControllerConfigResponse::TPtr*>(&ev); - AddGroupVSlotInControllerConfigResponse(x, groupNumber, groupVdiscNumber); + AddGroupVSlotInControllerConfigResponse(x, groupNumber, vdiscPerGroupNumber); break; } case NNodeWhiteboard::TEvWhiteboard::EvVDiskStateResponse: { auto *x = reinterpret_cast<NNodeWhiteboard::TEvWhiteboard::TEvVDiskStateResponse::TPtr*>(&ev); - AddVSlotInVDiskStateResponse(x, groupNumber, groupVdiscNumber); + AddVSlotInVDiskStateResponse(x, groupNumber, vdiscPerGroupNumber); break; } } @@ -175,33 +173,80 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { runtime.Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, request, 0)); NHealthCheck::TEvSelfCheckResult* result = runtime.GrabEdgeEvent<NHealthCheck::TEvSelfCheckResult>(handle); - int groupCount = 0; + int groupIssuesCount = 0; + int groupIssuesNumber = groupNumber <= NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE ? groupNumber : 1; for (const auto& issue_log : result->Result.Getissue_log()) { - if (issue_log.Gettype() == "STORAGE_GROUP" && issue_log.Getlocation().Getstorage().Getpool().Getname() == "/Root:test") { - groupCount++; + if (issue_log.type() == "STORAGE_GROUP" && issue_log.location().storage().pool().name() == "/Root:test") { + if (groupNumber <= NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE) { + UNIT_ASSERT_VALUES_EQUAL(issue_log.location().storage().pool().group().id_size(), 1); + UNIT_ASSERT_VALUES_EQUAL(issue_log.listed(), 0); + UNIT_ASSERT_VALUES_EQUAL(issue_log.count(), 0); + } else { + int groupListed = std::min<int>(groupNumber, (int)NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT); + UNIT_ASSERT_VALUES_EQUAL(issue_log.location().storage().pool().group().id_size(), groupListed); + UNIT_ASSERT_VALUES_EQUAL(issue_log.listed(), groupListed); + UNIT_ASSERT_VALUES_EQUAL(issue_log.count(), groupNumber); + } + groupIssuesCount++; } } - UNIT_ASSERT_VALUES_EQUAL(groupCount, groupNumber); + UNIT_ASSERT_VALUES_EQUAL(groupIssuesCount, groupIssuesNumber); - int vdiscCount = 0; - for (const auto& issue_log : result->Result.Getissue_log()) { - if (issue_log.Gettype() == "VDISK" && issue_log.Getlocation().Getstorage().Getpool().Getname() == "/Root:test") { - vdiscCount++; + int vdiscCountField = groupNumber <= NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE ? vdiscPerGroupNumber : groupNumber * vdiscPerGroupNumber; + int issueVdiscPerGroupNumber = vdiscCountField <= NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE ? vdiscCountField : 1; + int issueVdiscNumber = groupIssuesNumber * issueVdiscPerGroupNumber; + + int issueVdiscCount = 0; + for (const auto& issue_log : result->Result.issue_log()) { + if (issue_log.type() == "VDISK" && issue_log.location().storage().pool().name() == "/Root:test") { + issueVdiscCount++; } } - UNIT_ASSERT_VALUES_EQUAL(vdiscCount, groupNumber * groupVdiscNumber); + UNIT_ASSERT_VALUES_EQUAL(issueVdiscCount, issueVdiscNumber); + } + + Y_UNIT_TEST(IssuesGroupsListing) { + int groupNumber = NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE; + ListingTest(groupNumber, 1); + } + + Y_UNIT_TEST(IssuesVCardListing) { + int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE; + ListingTest(1, vcardNumber); + } + + Y_UNIT_TEST(IssuesGroupsVCardListing) { + int groupNumber = NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE; + int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGING_IGNORE_SIZE; + ListingTest(groupNumber, vcardNumber); + } + + Y_UNIT_TEST(IssuesGroupsMerging) { + int groupNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT; + ListingTest(groupNumber, 1); + } + + Y_UNIT_TEST(IssuesVCardMerging) { + int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT; + ListingTest(1, vcardNumber); + } + + Y_UNIT_TEST(IssuesGroupsVCardMerging) { + int groupNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT; + int vcardNumber = NHealthCheck::TSelfCheckRequest::MERGER_ISSUE_LIMIT; + ListingTest(groupNumber, vcardNumber); } - Y_UNIT_TEST(GroupsListing) { - ListingTest(15, 1); + Y_UNIT_TEST(IssuesGroupsDeleting) { + ListingTest(100, 1); } - Y_UNIT_TEST(VCardListing) { - ListingTest(1, 20); + Y_UNIT_TEST(IssuesVCardDeleting) { + ListingTest(1, 100); } - Y_UNIT_TEST(GroupsVCardListing) { - ListingTest(15, 20); + Y_UNIT_TEST(IssuesGroupsVCardDeleting) { + ListingTest(100, 100); } } diff --git a/ydb/public/api/protos/ydb_monitoring.proto b/ydb/public/api/protos/ydb_monitoring.proto index d279e415d4..96908f7963 100644 --- a/ydb/public/api/protos/ydb_monitoring.proto +++ b/ydb/public/api/protos/ydb_monitoring.proto @@ -130,12 +130,12 @@ message LocationStoragePDisk { } message LocationStorageVDisk { - string id = 1; - LocationStoragePDisk pdisk = 2; + repeated string id = 1; + repeated LocationStoragePDisk pdisk = 2; } message LocationStorageGroup { - string id = 1; + repeated string id = 1; LocationStorageVDisk vdisk = 2; } @@ -183,6 +183,8 @@ message IssueLog { repeated string reason = 5; string type = 6; uint32 level = 7; + uint32 listed = 8; + uint32 count = 9; } message DatabaseStatus { |