diff options
author | Alexander Rutkovsky <alexvru@ydb.tech> | 2025-02-25 19:19:15 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-02-25 19:19:15 +0300 |
commit | 38b28a98382a639bdc96fa961609b2b815572d7f (patch) | |
tree | a5ba34ae8049a618bf71b67f8cc75ca98ab0e9f9 | |
parent | b74ed7274a782de870e21adbc9febc429319967b (diff) | |
download | ydb-38b28a98382a639bdc96fa961609b2b815572d7f.tar.gz |
Support LayoutCorrect fields for SysView (#15006)
-rw-r--r-- | ydb/core/mind/bscontroller/bsc.cpp | 21 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/config_fit_groups.cpp | 8 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/group_geometry_info.h | 18 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/group_layout_checker.h | 58 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/impl.h | 8 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/load_everything.cpp | 15 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/monitoring.cpp | 2 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/sys_view.cpp | 2 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/virtual_group.cpp | 10 | ||||
-rw-r--r-- | ydb/core/protos/sys_view.proto | 1 | ||||
-rw-r--r-- | ydb/core/sys_view/common/schema.h | 4 | ||||
-rw-r--r-- | ydb/core/sys_view/storage/groups.cpp | 1 | ||||
-rw-r--r-- | ydb/core/sys_view/ut_kqp.cpp | 6 |
13 files changed, 136 insertions, 18 deletions
diff --git a/ydb/core/mind/bscontroller/bsc.cpp b/ydb/core/mind/bscontroller/bsc.cpp index 41e97f7338..06750c1f42 100644 --- a/ydb/core/mind/bscontroller/bsc.cpp +++ b/ydb/core/mind/bscontroller/bsc.cpp @@ -3,6 +3,8 @@ #include "self_heal.h" #include "sys_view.h" #include "console_interaction.h" +#include "group_geometry_info.h" +#include "group_layout_checker.h" #include <library/cpp/streams/zstd/zstd.h> @@ -82,6 +84,25 @@ void TBlobStorageController::TGroupInfo::CalculateGroupStatus() { } } +void TBlobStorageController::TGroupInfo::CalculateLayoutStatus(TBlobStorageController *self, + TBlobStorageGroupInfo::TTopology *topology, const std::function<TGroupGeometryInfo()>& getGeom) { + LayoutCorrect = true; + if (VDisksInGroup) { + NLayoutChecker::TGroupLayout layout(*topology); + NLayoutChecker::TDomainMapper mapper; + auto geom = getGeom(); + + for (size_t index = 0; index < VDisksInGroup.size(); ++index) { + const TVSlotInfo *slot = VDisksInGroup[index]; + TPDiskId pdiskId = slot->VSlotId.ComprisingPDiskId(); + const auto& location = self->HostRecords->GetLocation(pdiskId.NodeId); + layout.AddDisk({mapper, location, pdiskId, geom}, index); + } + + LayoutCorrect = layout.IsCorrect(); + } +} + NKikimrBlobStorage::TGroupStatus::E TBlobStorageController::DeriveStatus(const TBlobStorageGroupInfo::TTopology *topology, const TBlobStorageGroupInfo::TGroupVDisks& failed) { auto& checker = *topology->QuorumChecker; diff --git a/ydb/core/mind/bscontroller/config_fit_groups.cpp b/ydb/core/mind/bscontroller/config_fit_groups.cpp index e3f1f199de..df353cd0b0 100644 --- a/ydb/core/mind/bscontroller/config_fit_groups.cpp +++ b/ydb/core/mind/bscontroller/config_fit_groups.cpp @@ -621,6 +621,14 @@ namespace NKikimr { groupInfo->FinishVDisksInGroup(); groupInfo->CalculateGroupStatus(); + groupInfo->CalculateLayoutStatus(&State.Self, groupInfo->Topology.get(), [&] { + const auto& pools = State.StoragePools.Get(); + if (const auto it = pools.find(groupInfo->StoragePoolId); it != pools.end()) { + return TGroupGeometryInfo(groupInfo->Topology->GType, it->second.GetGroupGeometry()); + } + Y_DEBUG_ABORT(); // this can't normally happen + return TGroupGeometryInfo(); + }); return res; } diff --git a/ydb/core/mind/bscontroller/group_geometry_info.h b/ydb/core/mind/bscontroller/group_geometry_info.h index 2e6e0ff14b..1d3b7d77b0 100644 --- a/ydb/core/mind/bscontroller/group_geometry_info.h +++ b/ydb/core/mind/bscontroller/group_geometry_info.h @@ -11,16 +11,18 @@ namespace NKikimr::NBsController { struct TExFitGroupError : yexception {}; class TGroupGeometryInfo { - const TBlobStorageGroupType Type; - ui32 NumFailRealms; - ui32 NumFailDomainsPerFailRealm; - ui32 NumVDisksPerFailDomain; - ui32 RealmLevelBegin; - ui32 RealmLevelEnd; - ui32 DomainLevelBegin; - ui32 DomainLevelEnd; + TBlobStorageGroupType Type; + ui32 NumFailRealms = 0; + ui32 NumFailDomainsPerFailRealm = 0; + ui32 NumVDisksPerFailDomain = 0; + ui32 RealmLevelBegin = 0; + ui32 RealmLevelEnd = 0; + ui32 DomainLevelBegin = 0; + ui32 DomainLevelEnd = 0; public: + explicit TGroupGeometryInfo() = default; + TGroupGeometryInfo(TBlobStorageGroupType type, NKikimrBlobStorage::TGroupGeometry g) : Type(type) , NumFailRealms(g.GetNumFailRealms()) diff --git a/ydb/core/mind/bscontroller/group_layout_checker.h b/ydb/core/mind/bscontroller/group_layout_checker.h index e2e2e66246..3c42fef3d4 100644 --- a/ydb/core/mind/bscontroller/group_layout_checker.h +++ b/ydb/core/mind/bscontroller/group_layout_checker.h @@ -177,6 +177,8 @@ namespace NKikimr::NBsController { THashMap<TEntityId, ui32> NumDisksPerDevice; + bool Correct = true; + TGroupLayout(const TBlobStorageGroupInfo::TTopology& topology) : Topology(topology) , NumDisksInRealm(Topology.GetTotalFailRealmsNum()) @@ -187,17 +189,19 @@ namespace NKikimr::NBsController { void UpdateDisk(const TPDiskLayoutPosition& pos, ui32 orderNumber, ui32 value) { NumDisks += value; - NumDisksPerRealmGroup[pos.RealmGroup] += value; + const ui32 z = NumDisksPerRealmGroup[pos.RealmGroup] += value; const TVDiskIdShort vdisk = Topology.GetVDiskId(orderNumber); - NumDisksInRealm[vdisk.FailRealm] += value; - NumDisksPerRealm[vdisk.FailRealm][pos.Realm] += value; - NumDisksPerRealmTotal[pos.Realm] += value; + const ui32 x1 = NumDisksInRealm[vdisk.FailRealm] += value; + const ui32 x2 = NumDisksPerRealm[vdisk.FailRealm][pos.Realm] += value; + const ui32 x3 = NumDisksPerRealmTotal[pos.Realm] += value; const ui32 domainIdx = Topology.GetFailDomainOrderNumber(vdisk); - NumDisksInDomain[domainIdx] += value; - NumDisksPerDomain[domainIdx][pos.Domain] += value; - NumDisksPerDomainTotal[pos.Domain] += value; + const ui32 y1 = NumDisksInDomain[domainIdx] += value; + const ui32 y2 = NumDisksPerDomain[domainIdx][pos.Domain] += value; + const ui32 y3 = NumDisksPerDomainTotal[pos.Domain] += value; NumDisksPerDevice[pos.Device] += value; + + Correct = Correct && x1 == x2 && x2 == x3 && y1 == y2 && y2 == y3 && z == NumDisks; } void AddDisk(const TPDiskLayoutPosition& pos, ui32 orderNumber) { @@ -233,6 +237,46 @@ namespace NKikimr::NBsController { AddDisk(pos, orderNumber); return score; } + + bool IsCorrect() const { +#ifdef NDEBUG + return Correct; +#endif + + if (NumDisksPerRealmGroup.size() != 1) { // all disks must reside in the same realm group + Y_DEBUG_ABORT_UNLESS(!Correct); + return false; + } + + for (size_t i = 0, num = NumDisksInRealm.size(); i < num; ++i) { + for (const auto& [entityId, numDisks] : NumDisksPerRealm[i]) { + Y_DEBUG_ABORT_UNLESS(NumDisksPerRealmTotal.contains(entityId)); + if (numDisks != NumDisksInRealm[i] || numDisks != NumDisksPerRealmTotal.at(entityId)) { + // the first case is when group realm contains disks from different real-world realms (DC's) + // -- this is not as bad as it seems, but breaks strict failure model; the second one is a bit + // worse, it means that disks from this real-world realm (DC) are in several realms, which + // may lead to unavailability when DC goes down + Y_DEBUG_ABORT_UNLESS(!Correct); + return false; + } + } + } + + // the same code goes for domains + for (size_t j = 0, num = NumDisksInDomain.size(); j < num; ++j) { + for (const auto& [entityId, numDisks] : NumDisksPerDomain[j]) { + Y_DEBUG_ABORT_UNLESS(NumDisksPerDomainTotal.contains(entityId)); + if (numDisks != NumDisksInDomain[j] || numDisks != NumDisksPerDomainTotal.at(entityId)) { + Y_DEBUG_ABORT_UNLESS(!Correct); + return false; + } + + } + } + + Y_DEBUG_ABORT_UNLESS(Correct); + return true; + } }; } // NLayoutChecker diff --git a/ydb/core/mind/bscontroller/impl.h b/ydb/core/mind/bscontroller/impl.h index 20d38c32ad..e5b745e3f6 100644 --- a/ydb/core/mind/bscontroller/impl.h +++ b/ydb/core/mind/bscontroller/impl.h @@ -20,6 +20,8 @@ namespace NKikimr { namespace NBsController { +class TGroupGeometryInfo; + using NTabletFlatExecutor::TTabletExecutedFlat; using NTabletFlatExecutor::ITransaction; using NTabletFlatExecutor::TTransactionBase; @@ -618,6 +620,12 @@ public: // be recalculated too void CalculateGroupStatus(); + // group layout status: whether it is positioned correctly + bool LayoutCorrect = false; + + void CalculateLayoutStatus(TBlobStorageController *self, TBlobStorageGroupInfo::TTopology *topology, + const std::function<TGroupGeometryInfo()>& getGeom); + template<typename T> static void Apply(TBlobStorageController* /*controller*/, T&& callback) { static TTableAdapter<Table, TGroupInfo, diff --git a/ydb/core/mind/bscontroller/load_everything.cpp b/ydb/core/mind/bscontroller/load_everything.cpp index 742b7fc798..ec28cef8a2 100644 --- a/ydb/core/mind/bscontroller/load_everything.cpp +++ b/ydb/core/mind/bscontroller/load_everything.cpp @@ -1,5 +1,6 @@ #include "impl.h" #include "console_interaction.h" +#include "group_geometry_info.h" #include <ydb/library/yaml_config/yaml_config.h> @@ -515,9 +516,23 @@ public: } } + THashMap<TBoxStoragePoolId, TGroupGeometryInfo> cache; + // calculate group status for all groups for (auto& [id, group] : Self->GroupMap) { group->CalculateGroupStatus(); + + group->CalculateLayoutStatus(Self, group->Topology.get(), [&] { + const auto [it, inserted] = cache.try_emplace(group->StoragePoolId); + if (inserted) { + if (const auto jt = Self->StoragePools.find(it->first); jt != Self->StoragePools.end()) { + it->second = TGroupGeometryInfo(group->Topology->GType, jt->second.GetGroupGeometry()); + } else { + Y_DEBUG_ABORT(); + } + } + return it->second; + }); } return true; diff --git a/ydb/core/mind/bscontroller/monitoring.cpp b/ydb/core/mind/bscontroller/monitoring.cpp index c566743ef2..15758be7dc 100644 --- a/ydb/core/mind/bscontroller/monitoring.cpp +++ b/ydb/core/mind/bscontroller/monitoring.cpp @@ -1388,6 +1388,7 @@ void TBlobStorageController::RenderGroupTable(IOutputStream& out, std::function< TAG_ATTRS(TTableH, {{"title", "PutUserData Latency"}}) { out << "PutUserData<br/>Latency"; } TAG_ATTRS(TTableH, {{"title", "GetFast Latency"}}) { out << "GetFast<br/>Latency"; } TABLEH() { out << "Seen operational"; } + TABLEH() { out << "Layout correct"; } TABLEH() { out << "Operating<br/>status"; } TABLEH() { out << "Expected<br/>status"; } TABLEH() { out << "Donors"; } @@ -1448,6 +1449,7 @@ void TBlobStorageController::RenderGroupRow(IOutputStream& out, const TGroupInfo renderLatency(group.LatencyStats.PutUserData); renderLatency(group.LatencyStats.GetFast); TABLED() { out << (group.SeenOperational ? "YES" : ""); } + TABLED() { out << (group.LayoutCorrect ? "" : "NO"); } const auto& status = group.Status; TABLED() { out << NKikimrBlobStorage::TGroupStatus::E_Name(status.OperatingStatus); } diff --git a/ydb/core/mind/bscontroller/sys_view.cpp b/ydb/core/mind/bscontroller/sys_view.cpp index c28c1440d2..a5897af5a5 100644 --- a/ydb/core/mind/bscontroller/sys_view.cpp +++ b/ydb/core/mind/bscontroller/sys_view.cpp @@ -398,6 +398,8 @@ void CopyInfo(NKikimrSysView::TGroupInfo* info, const THolder<TBlobStorageContro if (latencyStats.GetFast) { info->SetGetFastLatency(latencyStats.GetFast->MicroSeconds()); } + + info->SetLayoutCorrect(groupInfo->LayoutCorrect); } void CopyInfo(NKikimrSysView::TStoragePoolInfo* info, const TBlobStorageController::TStoragePoolInfo& poolInfo) { diff --git a/ydb/core/mind/bscontroller/virtual_group.cpp b/ydb/core/mind/bscontroller/virtual_group.cpp index ee3b31fb2a..c49349750e 100644 --- a/ydb/core/mind/bscontroller/virtual_group.cpp +++ b/ydb/core/mind/bscontroller/virtual_group.cpp @@ -1,5 +1,6 @@ #include "impl.h" #include "config.h" +#include "group_geometry_info.h" namespace NKikimr::NBsController { @@ -89,6 +90,7 @@ namespace NKikimr::NBsController { GroupFailureModelChanged.insert(group->ID); group->CalculateGroupStatus(); + group->CalculateLayoutStatus(&Self, group->Topology.get(), {}); NKikimrBlobDepot::TBlobDepotConfig config; config.SetVirtualGroupId(group->ID.GetRawId()); @@ -255,6 +257,14 @@ namespace NKikimr::NBsController { State->DeleteExistingGroup(group->ID); } group->CalculateGroupStatus(); + group->CalculateLayoutStatus(Self, group->Topology.get(), [&] { + const auto& pools = State->StoragePools.Get(); + if (const auto it = pools.find(group->StoragePoolId); it != pools.end()) { + return TGroupGeometryInfo(group->Topology->GType, it->second.GetGroupGeometry()); + } + Y_DEBUG_ABORT(); + return TGroupGeometryInfo(); + }); TString error; if (State->Changed() && !Self->CommitConfigUpdates(*State, true, true, true, txc, &error)) { STLOG(PRI_ERROR, BS_CONTROLLER, BSCVG08, "failed to commit update", (VirtualGroupId, GroupId), (Error, error)); diff --git a/ydb/core/protos/sys_view.proto b/ydb/core/protos/sys_view.proto index e5f215dec8..e0f2c4f81d 100644 --- a/ydb/core/protos/sys_view.proto +++ b/ydb/core/protos/sys_view.proto @@ -265,6 +265,7 @@ message TGroupInfo { // desired disk categories ? // down/persisted down ? // metrics ? + optional bool LayoutCorrect = 16; // is the group layout correct? } message TGroupEntry { diff --git a/ydb/core/sys_view/common/schema.h b/ydb/core/sys_view/common/schema.h index ddcfcab7b7..7c38021c4d 100644 --- a/ydb/core/sys_view/common/schema.h +++ b/ydb/core/sys_view/common/schema.h @@ -306,6 +306,7 @@ struct Schema : NIceDb::Schema { struct PutTabletLogLatency : Column<13, NScheme::NTypeIds::Interval> {}; struct PutUserDataLatency : Column<14, NScheme::NTypeIds::Interval> {}; struct GetFastLatency : Column<15, NScheme::NTypeIds::Interval> {}; + struct LayoutCorrect : Column<16, NScheme::NTypeIds::Bool> {}; using TKey = TableKey<GroupId>; using TColumns = TableColumns< @@ -321,7 +322,8 @@ struct Schema : NIceDb::Schema { SeenOperational, PutTabletLogLatency, PutUserDataLatency, - GetFastLatency>; + GetFastLatency, + LayoutCorrect>; }; struct StoragePools : Table<7> { diff --git a/ydb/core/sys_view/storage/groups.cpp b/ydb/core/sys_view/storage/groups.cpp index cca51da225..11a0ded276 100644 --- a/ydb/core/sys_view/storage/groups.cpp +++ b/ydb/core/sys_view/storage/groups.cpp @@ -36,6 +36,7 @@ public: {T::PutTabletLogLatency::ColumnId, {E::kInfoFieldNumber, V::kPutTabletLogLatencyFieldNumber}}, {T::PutUserDataLatency::ColumnId, {E::kInfoFieldNumber, V::kPutUserDataLatencyFieldNumber}}, {T::GetFastLatency::ColumnId, {E::kInfoFieldNumber, V::kGetFastLatencyFieldNumber}}, + {T::LayoutCorrect::ColumnId, {E::kInfoFieldNumber, V::kLayoutCorrectFieldNumber}}, }; return fieldMap; } diff --git a/ydb/core/sys_view/ut_kqp.cpp b/ydb/core/sys_view/ut_kqp.cpp index 214a3e9bad..a708d422dc 100644 --- a/ydb/core/sys_view/ut_kqp.cpp +++ b/ydb/core/sys_view/ut_kqp.cpp @@ -1058,7 +1058,8 @@ Y_UNIT_TEST_SUITE(SystemView) { LifeCyclePhase, PutTabletLogLatency, PutUserDataLatency, - StoragePoolId + StoragePoolId, + LayoutCorrect FROM `/Root/.sys/ds_groups` WHERE GroupId >= 0x80000000; )").GetValueSync(); @@ -1074,7 +1075,7 @@ Y_UNIT_TEST_SUITE(SystemView) { } } - TYsonFieldChecker check(ysonString, 12); + TYsonFieldChecker check(ysonString, 13); check.Uint64(0u); // AllocatedSize check.Uint64GreaterOrEquals(0u); // AvailableSize @@ -1088,6 +1089,7 @@ Y_UNIT_TEST_SUITE(SystemView) { check.Null(); // PutTabletLogLatency check.Null(); // PutUserDataLatency check.Uint64(2u); // StoragePoolId + check.Bool(true); // LayoutCorrect } Y_UNIT_TEST(StoragePoolsFields) { |