summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVlad Kuznetsov <[email protected]>2024-10-29 12:16:49 +0100
committerGitHub <[email protected]>2024-10-29 12:16:49 +0100
commit012816bc7bdbccda84f744dc3be4cfb87bc00fb0 (patch)
tree7d1d8977f528bf7be79ef60e9882d51171f797aa
parent8acfed4a5b94b7d2e1994cd252e9eccd08b42fbd (diff)
Consider free slots count in BSC during group allocation (#11008)
-rw-r--r--ydb/core/mind/bscontroller/group_layout_checker.h11
-rw-r--r--ydb/core/mind/bscontroller/group_mapper.cpp49
-rw-r--r--ydb/core/mind/bscontroller/group_mapper_ut.cpp37
3 files changed, 66 insertions, 31 deletions
diff --git a/ydb/core/mind/bscontroller/group_layout_checker.h b/ydb/core/mind/bscontroller/group_layout_checker.h
index 407f0b7c7f7..78c9e3e05f8 100644
--- a/ydb/core/mind/bscontroller/group_layout_checker.h
+++ b/ydb/core/mind/bscontroller/group_layout_checker.h
@@ -201,12 +201,15 @@ namespace NKikimr::NBsController {
const TVDiskIdShort vdisk = Topology.GetVDiskId(orderNumber);
const ui32 domainIdx = Topology.GetFailDomainOrderNumber(vdisk);
+ const auto& disksPerRealm = NumDisksPerRealm[vdisk.FailRealm][pos.Realm];
+ const auto& disksPerDomain = NumDisksPerDomain[domainIdx][pos.Domain];
+
return {
- .RealmInterlace = NumDisksPerRealmTotal[pos.Realm] - NumDisksPerRealm[vdisk.FailRealm][pos.Realm],
- .DomainInterlace = NumDisksPerDomainTotal[pos.Domain] - NumDisksPerDomain[domainIdx][pos.Domain],
+ .RealmInterlace = NumDisksPerRealmTotal[pos.Realm] - disksPerRealm,
+ .DomainInterlace = NumDisksPerDomainTotal[pos.Domain] - disksPerDomain,
.RealmGroupScatter = NumDisks - NumDisksPerRealmGroup[pos.RealmGroup],
- .RealmScatter = NumDisksInRealm[vdisk.FailRealm] - NumDisksPerRealm[vdisk.FailRealm][pos.Realm],
- .DomainScatter = NumDisksInDomain[domainIdx] - NumDisksPerDomain[domainIdx][pos.Domain],
+ .RealmScatter = NumDisksInRealm[vdisk.FailRealm] - disksPerRealm,
+ .DomainScatter = NumDisksInDomain[domainIdx] - disksPerDomain,
};
}
diff --git a/ydb/core/mind/bscontroller/group_mapper.cpp b/ydb/core/mind/bscontroller/group_mapper.cpp
index 0aaa2ad11dc..c31fd5c83c5 100644
--- a/ydb/core/mind/bscontroller/group_mapper.cpp
+++ b/ydb/core/mind/bscontroller/group_mapper.cpp
@@ -40,8 +40,13 @@ namespace NKikimr::NBsController {
}
}
- ui32 GetPickerScore() const {
- return NumSlots;
+ // can be negative
+ i32 FreeSlots() const {
+ return i32(MaxSlots) - NumSlots;
+ }
+
+ double GetPickerScore() const {
+ return double(NumSlots) / MaxSlots;
}
};
@@ -65,7 +70,7 @@ namespace NKikimr::NBsController {
using TPDomainCandidatesRange = std::pair<std::vector<ui32>::const_iterator, std::vector<ui32>::const_iterator>;
using TPDiskCandidatesRange = std::pair<std::vector<TPDiskInfo*>::const_iterator, std::vector<TPDiskInfo*>::const_iterator>;
-
+
struct TDiskManager {
TImpl& Self;
const TBlobStorageGroupInfo::TTopology Topology;
@@ -126,7 +131,7 @@ namespace NKikimr::NBsController {
return res;
}
-
+
TGroupConstraints ProcessGroupConstraints(const TGroupConstraintsDefinition& groupConstraints) {
TGroupConstraints res(Topology.GetTotalVDisksNum());
Traverse(groupConstraints, [&](TVDiskIdShort vdisk, TTargetDiskConstraints diskConstraints) {
@@ -159,7 +164,7 @@ namespace NKikimr::NBsController {
return true;
}
- TPDiskByPosition SetupMatchingDisks(ui32 maxScore) {
+ TPDiskByPosition SetupMatchingDisks(double maxScore) {
TPDiskByPosition res;
res.reserve(Self.PDiskByPosition.size());
@@ -245,8 +250,8 @@ namespace NKikimr::NBsController {
}
bool DiskIsBetter(const TPDiskInfo& pretender, const TPDiskInfo& king) const {
- if (pretender.NumSlots != king.NumSlots) {
- return pretender.NumSlots < king.NumSlots;
+ if (pretender.FreeSlots() != king.FreeSlots()) {
+ return pretender.FreeSlots() > king.FreeSlots();
} else if (GivesLocalityBoost(pretender, king) || BetterQuotaMatch(pretender, king)) {
return true;
} else {
@@ -293,7 +298,7 @@ namespace NKikimr::NBsController {
const auto it = LocalityFactor.find(groupId);
return it != LocalityFactor.end() ? it->second : 0;
}
- };
+ };
struct TAllocator : public TDiskManager {
@@ -303,7 +308,7 @@ namespace NKikimr::NBsController {
{
}
- bool FillInGroup(ui32 maxScore, TUndoLog& undo, TGroup& group, const TGroupConstraints& constraints) {
+ bool FillInGroup(double maxScore, TUndoLog& undo, TGroup& group, const TGroupConstraints& constraints) {
// determine PDisks that fit our requirements (including score)
auto v = SetupMatchingDisks(maxScore);
@@ -575,7 +580,7 @@ namespace NKikimr::NBsController {
}
bool SetupNavigation(const TGroup& group) {
- TPDiskByPosition matchingDisks = SetupMatchingDisks(::Max<ui32>());
+ TPDiskByPosition matchingDisks = SetupMatchingDisks(::Max<double>());
const ui32 totalFailRealmsNum = Topology.GetTotalFailRealmsNum();
const ui32 numFailDomainsPerFailRealm = Topology.GetNumFailDomainsPerFailRealm();
const ui32 numDisksPerFailRealm = numFailDomainsPerFailRealm * Topology.GetNumVDisksPerFailDomain();
@@ -653,7 +658,7 @@ namespace NKikimr::NBsController {
if (toMoveOut + freeDomains < toMoveIn) {
continue; // not enough free domains to place all the disks
}
- if (newMovesRequired < movesRequired || (newMovesRequired == movesRequired &&
+ if (newMovesRequired < movesRequired || (newMovesRequired == movesRequired &&
freeDomains > pDomainsInPRealm[bestRealm].size())) {
bestRealm = pRealm;
movesRequired = newMovesRequired;
@@ -684,7 +689,7 @@ namespace NKikimr::NBsController {
}
}
- void SetupCandidates(ui32 maxScore) {
+ void SetupCandidates(double maxScore) {
TPDiskByPosition matchingDisks = SetupMatchingDisks(maxScore);
DomainCandidates.clear();
DiskCandidates.clear();
@@ -738,7 +743,7 @@ namespace NKikimr::NBsController {
std::pair<TMisplacedVDisks::EFailLevel, std::vector<ui32>> FindMisplacedVDisks(const TGroup& group) {
using EFailLevel = TMisplacedVDisks::EFailLevel;
std::unordered_map<ui32, std::unordered_set<ui32>> usedPDomains; // pRealm -> { pDomain1, pDomain2, ... }
- std::set<TPDiskId> usedPDisks;
+ std::set<TPDiskId> usedPDisks;
// {pRealm, pDomain} -> { pdisk1, pdisk2, ... }
EFailLevel failLevel = EFailLevel::ALL_OK;
@@ -806,7 +811,7 @@ namespace NKikimr::NBsController {
return {failLevel, misplacedVDisks};
}
- std::optional<TPDiskId> TargetMisplacedVDisk(ui32 maxScore, const TGroup& group, const TVDiskIdShort& vdisk) {
+ std::optional<TPDiskId> TargetMisplacedVDisk(double maxScore, const TGroup& group, const TVDiskIdShort& vdisk) {
for (ui32 orderNumber = 0; orderNumber < group.size(); ++orderNumber) {
if (!group[orderNumber] && orderNumber != Topology.GetOrderNumber(vdisk)) {
return std::nullopt;
@@ -821,11 +826,11 @@ namespace NKikimr::NBsController {
const auto& domainCandidates = DomainCandidates[pRealm];
TPDomainCandidatesRange pDomainRange = { domainCandidates.begin(), domainCandidates.end() };
-
+
for (; pDomainRange.first != pDomainRange.second;) {
ui32 pDomain = *pDomainRange.first++;
const auto& diskCandidates = DiskCandidates[pRealm][pDomain];
-
+
if (!diskCandidates.empty()) {
return (*diskCandidates.begin())->PDiskId;
}
@@ -966,7 +971,7 @@ namespace NKikimr::NBsController {
}
// calculate score table
- std::vector<ui32> scores;
+ std::vector<double> scores;
for (const auto& [pdiskId, pdisk] : PDisks) {
if (allocator.DiskIsUsable(pdisk)) {
scores.push_back(pdisk.GetPickerScore());
@@ -1033,7 +1038,7 @@ namespace NKikimr::NBsController {
return TMisplacedVDisks(EFailLevel::INCORRECT_LAYOUT, {}, "Cannot map failRealms to pRealms");
}
- sanitizer.SetupCandidates(::Max<ui32>());
+ sanitizer.SetupCandidates(::Max<double>());
auto [failLevel, misplacedVDiskNums] = sanitizer.FindMisplacedVDisks(group);
std::vector<TVDiskIdShort> misplacedVDisks;
for (ui32 orderNum : misplacedVDiskNums) {
@@ -1042,7 +1047,7 @@ namespace NKikimr::NBsController {
return TMisplacedVDisks(failLevel, misplacedVDisks);
}
- std::optional<TPDiskId> TargetMisplacedVDisk(ui32 groupId, TGroupDefinition& groupDefinition, TVDiskIdShort vdisk,
+ std::optional<TPDiskId> TargetMisplacedVDisk(ui32 groupId, TGroupDefinition& groupDefinition, TVDiskIdShort vdisk,
TForbiddenPDisks forbid, i64 requiredSpace, bool requireOperational, TString& error) {
if (Dirty) {
std::sort(PDiskByPosition.begin(), PDiskByPosition.end());
@@ -1067,7 +1072,7 @@ namespace NKikimr::NBsController {
}
// calculate score table
- std::vector<ui32> scores;
+ std::vector<double> scores;
for (const auto& [pdiskId, pdisk] : PDisks) {
if (sanitizer.DiskIsUsable(pdisk)) {
scores.push_back(pdisk.GetPickerScore());
@@ -1077,7 +1082,7 @@ namespace NKikimr::NBsController {
scores.erase(std::unique(scores.begin(), scores.end()), scores.end());
// bisect scores to find optimal working one
- sanitizer.SetupCandidates(::Max<ui32>());
+ sanitizer.SetupCandidates(::Max<double>());
std::optional<TPDiskId> result;
@@ -1152,7 +1157,7 @@ namespace NKikimr::NBsController {
return Impl->FindMisplacedVDisks(group);
}
- std::optional<TPDiskId> TGroupMapper::TargetMisplacedVDisk(TGroupId groupId, TGroupMapper::TGroupDefinition& group,
+ std::optional<TPDiskId> TGroupMapper::TargetMisplacedVDisk(TGroupId groupId, TGroupMapper::TGroupDefinition& group,
TVDiskIdShort vdisk, TForbiddenPDisks forbid, i64 requiredSpace, bool requireOperational, TString& error) {
return Impl->TargetMisplacedVDisk(groupId.GetRawId(), group, vdisk, std::move(forbid), requiredSpace, requireOperational, error);
}
diff --git a/ydb/core/mind/bscontroller/group_mapper_ut.cpp b/ydb/core/mind/bscontroller/group_mapper_ut.cpp
index 714967e7fd5..8126b2d5310 100644
--- a/ydb/core/mind/bscontroller/group_mapper_ut.cpp
+++ b/ydb/core/mind/bscontroller/group_mapper_ut.cpp
@@ -293,7 +293,7 @@ public:
ESanitizeResult status = ESanitizeResult::ALREADY;
TString error;
-
+
if (!result.Disks.empty()) {
status = ESanitizeResult::FAIL;
for (auto vdisk : result.Disks) {
@@ -434,7 +434,7 @@ public:
}
void PopulateGroupMapper(TGroupMapper& mapper, ui32 maxSlots = 16, TSet<TPDiskId> unusableDisks = {},
- TSet<TPDiskId> nonoperationalDisks = {}, std::optional<ui32> decommittedDataCenter = std::nullopt) {
+ TSet<TPDiskId> nonoperationalDisks = {}, std::optional<ui32> decommittedDataCenter = std::nullopt, bool equalSlots = true) {
std::map<TPDiskId, std::vector<ui32>> groupDisks;
for (const auto& [groupId, group] : Groups) {
for (TPDiskId pdiskId : group.PDisks) {
@@ -443,12 +443,13 @@ public:
}
for (const auto& pair : PDisks) {
auto& g = groupDisks[pair.first];
+ const auto& location = pair.second.GetLocation().GetLegacyValue();
mapper.RegisterPDisk({
.PDiskId = pair.first,
.Location = pair.second.GetLocation(),
.Usable = !unusableDisks.count(pair.first),
.NumSlots = pair.second.NumSlots,
- .MaxSlots = maxSlots,
+ .MaxSlots = equalSlots || location.Rack < 8 ? maxSlots : 2 * maxSlots,
.Groups{g.begin(), g.end()},
.SpaceAvailable = 0,
.Operational = !nonoperationalDisks.contains(pair.first),
@@ -637,6 +638,32 @@ Y_UNIT_TEST_SUITE(TGroupMapperTest) {
}
}
+ Y_UNIT_TEST(NonUniformClusterDifferentSlotsPerDisk) {
+ std::vector<std::tuple<ui32, ui32, ui32, ui32, ui32>> disks;
+ for (ui32 rack = 0; rack < 12; ++rack) {
+ disks.emplace_back(1, 1, rack, 1, 1);
+ }
+ std::random_shuffle(disks.begin(), disks.end());
+ TTestContext context(disks);
+ UNIT_ASSERT_VALUES_EQUAL((8 + 4), context.GetTotalDisks());
+ TGroupMapper mapper(TTestContext::CreateGroupGeometry(TBlobStorageGroupType::Erasure4Plus2Block));
+ context.PopulateGroupMapper(mapper, 8, {}, {}, std::nullopt, false);
+ for (ui32 i = 0; i < 16; ++i) {
+ Ctest << i << "/" << 16 << Endl;
+ TGroupMapper::TGroupDefinition group;
+ context.AllocateGroup(mapper, group);
+ context.CheckGroupErasure(group);
+ }
+ TVector<ui32> slots = context.GetSlots();
+ ui64 slots_total = 0;
+ for (ui32 numSlots : slots) {
+ slots_total += numSlots;
+ Ctest << "slots " << numSlots << " ";
+ }
+ Ctest << slots_total << Endl;
+ UNIT_ASSERT_VALUES_EQUAL(slots_total, 8 * 8 + 4 * 16);
+ }
+
Y_UNIT_TEST(NonUniformCluster2) {
std::vector<std::tuple<ui32, ui32, ui32, ui32, ui32>> disks;
for (ui32 rack = 0, body = 0; rack < 12; ++rack) {
@@ -1012,9 +1039,9 @@ Y_UNIT_TEST_SUITE(TGroupMapperTest) {
Ctest << "group after layout shuffling:" << Endl;
context.DumpGroup(groupDef);
-
+
ui32 sanitationStep = 0;
-
+
TGroupMapper::TGroupDefinition group = groupDef;
TString path = "";
TSet<TGroupMapper::TGroupDefinition> seen;