aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Rutkovsky <alexvru@ydb.tech>2025-02-25 19:19:15 +0300
committerGitHub <noreply@github.com>2025-02-25 19:19:15 +0300
commit38b28a98382a639bdc96fa961609b2b815572d7f (patch)
treea5ba34ae8049a618bf71b67f8cc75ca98ab0e9f9
parentb74ed7274a782de870e21adbc9febc429319967b (diff)
downloadydb-38b28a98382a639bdc96fa961609b2b815572d7f.tar.gz
Support LayoutCorrect fields for SysView (#15006)
-rw-r--r--ydb/core/mind/bscontroller/bsc.cpp21
-rw-r--r--ydb/core/mind/bscontroller/config_fit_groups.cpp8
-rw-r--r--ydb/core/mind/bscontroller/group_geometry_info.h18
-rw-r--r--ydb/core/mind/bscontroller/group_layout_checker.h58
-rw-r--r--ydb/core/mind/bscontroller/impl.h8
-rw-r--r--ydb/core/mind/bscontroller/load_everything.cpp15
-rw-r--r--ydb/core/mind/bscontroller/monitoring.cpp2
-rw-r--r--ydb/core/mind/bscontroller/sys_view.cpp2
-rw-r--r--ydb/core/mind/bscontroller/virtual_group.cpp10
-rw-r--r--ydb/core/protos/sys_view.proto1
-rw-r--r--ydb/core/sys_view/common/schema.h4
-rw-r--r--ydb/core/sys_view/storage/groups.cpp1
-rw-r--r--ydb/core/sys_view/ut_kqp.cpp6
13 files changed, 136 insertions, 18 deletions
diff --git a/ydb/core/mind/bscontroller/bsc.cpp b/ydb/core/mind/bscontroller/bsc.cpp
index 41e97f7338..06750c1f42 100644
--- a/ydb/core/mind/bscontroller/bsc.cpp
+++ b/ydb/core/mind/bscontroller/bsc.cpp
@@ -3,6 +3,8 @@
#include "self_heal.h"
#include "sys_view.h"
#include "console_interaction.h"
+#include "group_geometry_info.h"
+#include "group_layout_checker.h"
#include <library/cpp/streams/zstd/zstd.h>
@@ -82,6 +84,25 @@ void TBlobStorageController::TGroupInfo::CalculateGroupStatus() {
}
}
+void TBlobStorageController::TGroupInfo::CalculateLayoutStatus(TBlobStorageController *self,
+ TBlobStorageGroupInfo::TTopology *topology, const std::function<TGroupGeometryInfo()>& getGeom) {
+ LayoutCorrect = true;
+ if (VDisksInGroup) {
+ NLayoutChecker::TGroupLayout layout(*topology);
+ NLayoutChecker::TDomainMapper mapper;
+ auto geom = getGeom();
+
+ for (size_t index = 0; index < VDisksInGroup.size(); ++index) {
+ const TVSlotInfo *slot = VDisksInGroup[index];
+ TPDiskId pdiskId = slot->VSlotId.ComprisingPDiskId();
+ const auto& location = self->HostRecords->GetLocation(pdiskId.NodeId);
+ layout.AddDisk({mapper, location, pdiskId, geom}, index);
+ }
+
+ LayoutCorrect = layout.IsCorrect();
+ }
+}
+
NKikimrBlobStorage::TGroupStatus::E TBlobStorageController::DeriveStatus(const TBlobStorageGroupInfo::TTopology *topology,
const TBlobStorageGroupInfo::TGroupVDisks& failed) {
auto& checker = *topology->QuorumChecker;
diff --git a/ydb/core/mind/bscontroller/config_fit_groups.cpp b/ydb/core/mind/bscontroller/config_fit_groups.cpp
index e3f1f199de..df353cd0b0 100644
--- a/ydb/core/mind/bscontroller/config_fit_groups.cpp
+++ b/ydb/core/mind/bscontroller/config_fit_groups.cpp
@@ -621,6 +621,14 @@ namespace NKikimr {
groupInfo->FinishVDisksInGroup();
groupInfo->CalculateGroupStatus();
+ groupInfo->CalculateLayoutStatus(&State.Self, groupInfo->Topology.get(), [&] {
+ const auto& pools = State.StoragePools.Get();
+ if (const auto it = pools.find(groupInfo->StoragePoolId); it != pools.end()) {
+ return TGroupGeometryInfo(groupInfo->Topology->GType, it->second.GetGroupGeometry());
+ }
+ Y_DEBUG_ABORT(); // this can't normally happen
+ return TGroupGeometryInfo();
+ });
return res;
}
diff --git a/ydb/core/mind/bscontroller/group_geometry_info.h b/ydb/core/mind/bscontroller/group_geometry_info.h
index 2e6e0ff14b..1d3b7d77b0 100644
--- a/ydb/core/mind/bscontroller/group_geometry_info.h
+++ b/ydb/core/mind/bscontroller/group_geometry_info.h
@@ -11,16 +11,18 @@ namespace NKikimr::NBsController {
struct TExFitGroupError : yexception {};
class TGroupGeometryInfo {
- const TBlobStorageGroupType Type;
- ui32 NumFailRealms;
- ui32 NumFailDomainsPerFailRealm;
- ui32 NumVDisksPerFailDomain;
- ui32 RealmLevelBegin;
- ui32 RealmLevelEnd;
- ui32 DomainLevelBegin;
- ui32 DomainLevelEnd;
+ TBlobStorageGroupType Type;
+ ui32 NumFailRealms = 0;
+ ui32 NumFailDomainsPerFailRealm = 0;
+ ui32 NumVDisksPerFailDomain = 0;
+ ui32 RealmLevelBegin = 0;
+ ui32 RealmLevelEnd = 0;
+ ui32 DomainLevelBegin = 0;
+ ui32 DomainLevelEnd = 0;
public:
+ explicit TGroupGeometryInfo() = default;
+
TGroupGeometryInfo(TBlobStorageGroupType type, NKikimrBlobStorage::TGroupGeometry g)
: Type(type)
, NumFailRealms(g.GetNumFailRealms())
diff --git a/ydb/core/mind/bscontroller/group_layout_checker.h b/ydb/core/mind/bscontroller/group_layout_checker.h
index e2e2e66246..3c42fef3d4 100644
--- a/ydb/core/mind/bscontroller/group_layout_checker.h
+++ b/ydb/core/mind/bscontroller/group_layout_checker.h
@@ -177,6 +177,8 @@ namespace NKikimr::NBsController {
THashMap<TEntityId, ui32> NumDisksPerDevice;
+ bool Correct = true;
+
TGroupLayout(const TBlobStorageGroupInfo::TTopology& topology)
: Topology(topology)
, NumDisksInRealm(Topology.GetTotalFailRealmsNum())
@@ -187,17 +189,19 @@ namespace NKikimr::NBsController {
void UpdateDisk(const TPDiskLayoutPosition& pos, ui32 orderNumber, ui32 value) {
NumDisks += value;
- NumDisksPerRealmGroup[pos.RealmGroup] += value;
+ const ui32 z = NumDisksPerRealmGroup[pos.RealmGroup] += value;
const TVDiskIdShort vdisk = Topology.GetVDiskId(orderNumber);
- NumDisksInRealm[vdisk.FailRealm] += value;
- NumDisksPerRealm[vdisk.FailRealm][pos.Realm] += value;
- NumDisksPerRealmTotal[pos.Realm] += value;
+ const ui32 x1 = NumDisksInRealm[vdisk.FailRealm] += value;
+ const ui32 x2 = NumDisksPerRealm[vdisk.FailRealm][pos.Realm] += value;
+ const ui32 x3 = NumDisksPerRealmTotal[pos.Realm] += value;
const ui32 domainIdx = Topology.GetFailDomainOrderNumber(vdisk);
- NumDisksInDomain[domainIdx] += value;
- NumDisksPerDomain[domainIdx][pos.Domain] += value;
- NumDisksPerDomainTotal[pos.Domain] += value;
+ const ui32 y1 = NumDisksInDomain[domainIdx] += value;
+ const ui32 y2 = NumDisksPerDomain[domainIdx][pos.Domain] += value;
+ const ui32 y3 = NumDisksPerDomainTotal[pos.Domain] += value;
NumDisksPerDevice[pos.Device] += value;
+
+ Correct = Correct && x1 == x2 && x2 == x3 && y1 == y2 && y2 == y3 && z == NumDisks;
}
void AddDisk(const TPDiskLayoutPosition& pos, ui32 orderNumber) {
@@ -233,6 +237,46 @@ namespace NKikimr::NBsController {
AddDisk(pos, orderNumber);
return score;
}
+
+ bool IsCorrect() const {
+#ifdef NDEBUG
+ return Correct;
+#endif
+
+ if (NumDisksPerRealmGroup.size() != 1) { // all disks must reside in the same realm group
+ Y_DEBUG_ABORT_UNLESS(!Correct);
+ return false;
+ }
+
+ for (size_t i = 0, num = NumDisksInRealm.size(); i < num; ++i) {
+ for (const auto& [entityId, numDisks] : NumDisksPerRealm[i]) {
+ Y_DEBUG_ABORT_UNLESS(NumDisksPerRealmTotal.contains(entityId));
+ if (numDisks != NumDisksInRealm[i] || numDisks != NumDisksPerRealmTotal.at(entityId)) {
+ // the first case is when group realm contains disks from different real-world realms (DC's)
+ // -- this is not as bad as it seems, but breaks strict failure model; the second one is a bit
+ // worse, it means that disks from this real-world realm (DC) are in several realms, which
+ // may lead to unavailability when DC goes down
+ Y_DEBUG_ABORT_UNLESS(!Correct);
+ return false;
+ }
+ }
+ }
+
+ // the same code goes for domains
+ for (size_t j = 0, num = NumDisksInDomain.size(); j < num; ++j) {
+ for (const auto& [entityId, numDisks] : NumDisksPerDomain[j]) {
+ Y_DEBUG_ABORT_UNLESS(NumDisksPerDomainTotal.contains(entityId));
+ if (numDisks != NumDisksInDomain[j] || numDisks != NumDisksPerDomainTotal.at(entityId)) {
+ Y_DEBUG_ABORT_UNLESS(!Correct);
+ return false;
+ }
+
+ }
+ }
+
+ Y_DEBUG_ABORT_UNLESS(Correct);
+ return true;
+ }
};
} // NLayoutChecker
diff --git a/ydb/core/mind/bscontroller/impl.h b/ydb/core/mind/bscontroller/impl.h
index 20d38c32ad..e5b745e3f6 100644
--- a/ydb/core/mind/bscontroller/impl.h
+++ b/ydb/core/mind/bscontroller/impl.h
@@ -20,6 +20,8 @@ namespace NKikimr {
namespace NBsController {
+class TGroupGeometryInfo;
+
using NTabletFlatExecutor::TTabletExecutedFlat;
using NTabletFlatExecutor::ITransaction;
using NTabletFlatExecutor::TTransactionBase;
@@ -618,6 +620,12 @@ public:
// be recalculated too
void CalculateGroupStatus();
+ // group layout status: whether it is positioned correctly
+ bool LayoutCorrect = false;
+
+ void CalculateLayoutStatus(TBlobStorageController *self, TBlobStorageGroupInfo::TTopology *topology,
+ const std::function<TGroupGeometryInfo()>& getGeom);
+
template<typename T>
static void Apply(TBlobStorageController* /*controller*/, T&& callback) {
static TTableAdapter<Table, TGroupInfo,
diff --git a/ydb/core/mind/bscontroller/load_everything.cpp b/ydb/core/mind/bscontroller/load_everything.cpp
index 742b7fc798..ec28cef8a2 100644
--- a/ydb/core/mind/bscontroller/load_everything.cpp
+++ b/ydb/core/mind/bscontroller/load_everything.cpp
@@ -1,5 +1,6 @@
#include "impl.h"
#include "console_interaction.h"
+#include "group_geometry_info.h"
#include <ydb/library/yaml_config/yaml_config.h>
@@ -515,9 +516,23 @@ public:
}
}
+ THashMap<TBoxStoragePoolId, TGroupGeometryInfo> cache;
+
// calculate group status for all groups
for (auto& [id, group] : Self->GroupMap) {
group->CalculateGroupStatus();
+
+ group->CalculateLayoutStatus(Self, group->Topology.get(), [&] {
+ const auto [it, inserted] = cache.try_emplace(group->StoragePoolId);
+ if (inserted) {
+ if (const auto jt = Self->StoragePools.find(it->first); jt != Self->StoragePools.end()) {
+ it->second = TGroupGeometryInfo(group->Topology->GType, jt->second.GetGroupGeometry());
+ } else {
+ Y_DEBUG_ABORT();
+ }
+ }
+ return it->second;
+ });
}
return true;
diff --git a/ydb/core/mind/bscontroller/monitoring.cpp b/ydb/core/mind/bscontroller/monitoring.cpp
index c566743ef2..15758be7dc 100644
--- a/ydb/core/mind/bscontroller/monitoring.cpp
+++ b/ydb/core/mind/bscontroller/monitoring.cpp
@@ -1388,6 +1388,7 @@ void TBlobStorageController::RenderGroupTable(IOutputStream& out, std::function<
TAG_ATTRS(TTableH, {{"title", "PutUserData Latency"}}) { out << "PutUserData<br/>Latency"; }
TAG_ATTRS(TTableH, {{"title", "GetFast Latency"}}) { out << "GetFast<br/>Latency"; }
TABLEH() { out << "Seen operational"; }
+ TABLEH() { out << "Layout correct"; }
TABLEH() { out << "Operating<br/>status"; }
TABLEH() { out << "Expected<br/>status"; }
TABLEH() { out << "Donors"; }
@@ -1448,6 +1449,7 @@ void TBlobStorageController::RenderGroupRow(IOutputStream& out, const TGroupInfo
renderLatency(group.LatencyStats.PutUserData);
renderLatency(group.LatencyStats.GetFast);
TABLED() { out << (group.SeenOperational ? "YES" : ""); }
+ TABLED() { out << (group.LayoutCorrect ? "" : "NO"); }
const auto& status = group.Status;
TABLED() { out << NKikimrBlobStorage::TGroupStatus::E_Name(status.OperatingStatus); }
diff --git a/ydb/core/mind/bscontroller/sys_view.cpp b/ydb/core/mind/bscontroller/sys_view.cpp
index c28c1440d2..a5897af5a5 100644
--- a/ydb/core/mind/bscontroller/sys_view.cpp
+++ b/ydb/core/mind/bscontroller/sys_view.cpp
@@ -398,6 +398,8 @@ void CopyInfo(NKikimrSysView::TGroupInfo* info, const THolder<TBlobStorageContro
if (latencyStats.GetFast) {
info->SetGetFastLatency(latencyStats.GetFast->MicroSeconds());
}
+
+ info->SetLayoutCorrect(groupInfo->LayoutCorrect);
}
void CopyInfo(NKikimrSysView::TStoragePoolInfo* info, const TBlobStorageController::TStoragePoolInfo& poolInfo) {
diff --git a/ydb/core/mind/bscontroller/virtual_group.cpp b/ydb/core/mind/bscontroller/virtual_group.cpp
index ee3b31fb2a..c49349750e 100644
--- a/ydb/core/mind/bscontroller/virtual_group.cpp
+++ b/ydb/core/mind/bscontroller/virtual_group.cpp
@@ -1,5 +1,6 @@
#include "impl.h"
#include "config.h"
+#include "group_geometry_info.h"
namespace NKikimr::NBsController {
@@ -89,6 +90,7 @@ namespace NKikimr::NBsController {
GroupFailureModelChanged.insert(group->ID);
group->CalculateGroupStatus();
+ group->CalculateLayoutStatus(&Self, group->Topology.get(), {});
NKikimrBlobDepot::TBlobDepotConfig config;
config.SetVirtualGroupId(group->ID.GetRawId());
@@ -255,6 +257,14 @@ namespace NKikimr::NBsController {
State->DeleteExistingGroup(group->ID);
}
group->CalculateGroupStatus();
+ group->CalculateLayoutStatus(Self, group->Topology.get(), [&] {
+ const auto& pools = State->StoragePools.Get();
+ if (const auto it = pools.find(group->StoragePoolId); it != pools.end()) {
+ return TGroupGeometryInfo(group->Topology->GType, it->second.GetGroupGeometry());
+ }
+ Y_DEBUG_ABORT();
+ return TGroupGeometryInfo();
+ });
TString error;
if (State->Changed() && !Self->CommitConfigUpdates(*State, true, true, true, txc, &error)) {
STLOG(PRI_ERROR, BS_CONTROLLER, BSCVG08, "failed to commit update", (VirtualGroupId, GroupId), (Error, error));
diff --git a/ydb/core/protos/sys_view.proto b/ydb/core/protos/sys_view.proto
index e5f215dec8..e0f2c4f81d 100644
--- a/ydb/core/protos/sys_view.proto
+++ b/ydb/core/protos/sys_view.proto
@@ -265,6 +265,7 @@ message TGroupInfo {
// desired disk categories ?
// down/persisted down ?
// metrics ?
+ optional bool LayoutCorrect = 16; // is the group layout correct?
}
message TGroupEntry {
diff --git a/ydb/core/sys_view/common/schema.h b/ydb/core/sys_view/common/schema.h
index ddcfcab7b7..7c38021c4d 100644
--- a/ydb/core/sys_view/common/schema.h
+++ b/ydb/core/sys_view/common/schema.h
@@ -306,6 +306,7 @@ struct Schema : NIceDb::Schema {
struct PutTabletLogLatency : Column<13, NScheme::NTypeIds::Interval> {};
struct PutUserDataLatency : Column<14, NScheme::NTypeIds::Interval> {};
struct GetFastLatency : Column<15, NScheme::NTypeIds::Interval> {};
+ struct LayoutCorrect : Column<16, NScheme::NTypeIds::Bool> {};
using TKey = TableKey<GroupId>;
using TColumns = TableColumns<
@@ -321,7 +322,8 @@ struct Schema : NIceDb::Schema {
SeenOperational,
PutTabletLogLatency,
PutUserDataLatency,
- GetFastLatency>;
+ GetFastLatency,
+ LayoutCorrect>;
};
struct StoragePools : Table<7> {
diff --git a/ydb/core/sys_view/storage/groups.cpp b/ydb/core/sys_view/storage/groups.cpp
index cca51da225..11a0ded276 100644
--- a/ydb/core/sys_view/storage/groups.cpp
+++ b/ydb/core/sys_view/storage/groups.cpp
@@ -36,6 +36,7 @@ public:
{T::PutTabletLogLatency::ColumnId, {E::kInfoFieldNumber, V::kPutTabletLogLatencyFieldNumber}},
{T::PutUserDataLatency::ColumnId, {E::kInfoFieldNumber, V::kPutUserDataLatencyFieldNumber}},
{T::GetFastLatency::ColumnId, {E::kInfoFieldNumber, V::kGetFastLatencyFieldNumber}},
+ {T::LayoutCorrect::ColumnId, {E::kInfoFieldNumber, V::kLayoutCorrectFieldNumber}},
};
return fieldMap;
}
diff --git a/ydb/core/sys_view/ut_kqp.cpp b/ydb/core/sys_view/ut_kqp.cpp
index 214a3e9bad..a708d422dc 100644
--- a/ydb/core/sys_view/ut_kqp.cpp
+++ b/ydb/core/sys_view/ut_kqp.cpp
@@ -1058,7 +1058,8 @@ Y_UNIT_TEST_SUITE(SystemView) {
LifeCyclePhase,
PutTabletLogLatency,
PutUserDataLatency,
- StoragePoolId
+ StoragePoolId,
+ LayoutCorrect
FROM `/Root/.sys/ds_groups` WHERE GroupId >= 0x80000000;
)").GetValueSync();
@@ -1074,7 +1075,7 @@ Y_UNIT_TEST_SUITE(SystemView) {
}
}
- TYsonFieldChecker check(ysonString, 12);
+ TYsonFieldChecker check(ysonString, 13);
check.Uint64(0u); // AllocatedSize
check.Uint64GreaterOrEquals(0u); // AvailableSize
@@ -1088,6 +1089,7 @@ Y_UNIT_TEST_SUITE(SystemView) {
check.Null(); // PutTabletLogLatency
check.Null(); // PutUserDataLatency
check.Uint64(2u); // StoragePoolId
+ check.Bool(true); // LayoutCorrect
}
Y_UNIT_TEST(StoragePoolsFields) {