diff options
author | alexvru <alexvru@ydb.tech> | 2023-07-18 22:06:33 +0300 |
---|---|---|
committer | alexvru <alexvru@ydb.tech> | 2023-07-18 22:06:33 +0300 |
commit | 7223dce4d544c704d4391a80060ab65228490b31 (patch) | |
tree | fce09f37f2163378ddd828b0e0a5032f4ec7428c | |
parent | 7bfe494b8e29f16fe2d67555e614c93769731d46 (diff) | |
download | ydb-7223dce4d544c704d4391a80060ab65228490b31.tar.gz |
Improve group status flags in BSC KIKIMR-18781
-rw-r--r-- | ydb/core/mind/bscontroller/bsc.cpp | 2 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/cmds_storage_pool.cpp | 8 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/config.cpp | 35 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/config.h | 9 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/config_fit_groups.cpp | 4 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/impl.h | 2 | ||||
-rw-r--r-- | ydb/core/mind/bscontroller/virtual_group.cpp | 22 |
7 files changed, 42 insertions, 40 deletions
diff --git a/ydb/core/mind/bscontroller/bsc.cpp b/ydb/core/mind/bscontroller/bsc.cpp index 6117764369..e128d3c9cc 100644 --- a/ydb/core/mind/bscontroller/bsc.cpp +++ b/ydb/core/mind/bscontroller/bsc.cpp @@ -215,7 +215,7 @@ void TBlobStorageController::ValidateInternalState() { Y_VERIFY(donor); Y_VERIFY(donor->Mood == TMood::Donor); Y_VERIFY(donor->GroupId == vslot->GroupId); - Y_VERIFY(donor->GroupGeneration < vslot->GroupGeneration + group->ContentChanged); + Y_VERIFY(donor->GroupGeneration < vslot->GroupGeneration); Y_VERIFY(donor->GetShortVDiskId() == vslot->GetShortVDiskId()); } if (vslot->Group) { diff --git a/ydb/core/mind/bscontroller/cmds_storage_pool.cpp b/ydb/core/mind/bscontroller/cmds_storage_pool.cpp index 09da9e3e67..5433b599d9 100644 --- a/ydb/core/mind/bscontroller/cmds_storage_pool.cpp +++ b/ydb/core/mind/bscontroller/cmds_storage_pool.cpp @@ -159,9 +159,7 @@ namespace NKikimr::NBsController { TStoragePoolInfo& cur = spIt->second; if (cur.SchemeshardId != storagePool.SchemeshardId || cur.PathItemId != storagePool.PathItemId) { for (auto it = r.first; it != r.second; ++it) { - TGroupInfo *group = Groups.FindForUpdate(it->second); - Y_VERIFY(group); - group->ContentChanged = true; + GroupContentChanged.insert(it->second); } } cur = std::move(storagePool); // update existing storage pool @@ -219,7 +217,7 @@ namespace NKikimr::NBsController { for (const TVSlotInfo *vslot : groupInfo->VDisksInGroup) { DestroyVSlot(vslot->VSlotId); } - Groups.DeleteExistingEntry(groupId); + DeleteExistingGroup(groupId); } else { throw TExError() << "GroupId# " << groupId << " not found"; } @@ -663,7 +661,7 @@ namespace NKikimr::NBsController { vslot->Mood = TMood::Wipe; vslot->Status = NKikimrBlobStorage::EVDiskStatus::INIT_PENDING; vslot->IsReady = false; - group->FailureModelChanged = true; + GroupFailureModelChanged.insert(group->ID); group->CalculateGroupStatus(); } diff --git a/ydb/core/mind/bscontroller/config.cpp b/ydb/core/mind/bscontroller/config.cpp index b13830b9b8..f7fb07b54c 100644 --- a/ydb/core/mind/bscontroller/config.cpp +++ b/ydb/core/mind/bscontroller/config.cpp @@ -266,16 +266,15 @@ namespace NKikimr::NBsController { TTransactionContext& txc, TString *errorDescription) { NIceDb::TNiceDb db(txc.DB); - for (auto&& [base, overlay] : state.Groups.Diff()) { - if (base && overlay->second && std::exchange(overlay->second->ContentChanged, false)) { - const auto& groupInfo = overlay->second; - ++groupInfo->Generation; - for (const TVSlotInfo *slot : groupInfo->VDisksInGroup) { - if (slot->GroupGeneration != groupInfo->Generation) { - TVSlotInfo *mutableSlot = state.VSlots.FindForUpdate(slot->VSlotId); - Y_VERIFY(mutableSlot); - mutableSlot->GroupGeneration = groupInfo->Generation; - } + for (TGroupId groupId : state.GroupContentChanged) { + TGroupInfo *group = state.Groups.FindForUpdate(groupId); + Y_VERIFY(group); + ++group->Generation; + for (const TVSlotInfo *slot : group->VDisksInGroup) { + if (slot->GroupGeneration != group->Generation) { + TVSlotInfo *mutableSlot = state.VSlots.FindForUpdate(slot->VSlotId); + Y_VERIFY(mutableSlot); + mutableSlot->GroupGeneration = group->Generation; } } } @@ -297,11 +296,8 @@ namespace NKikimr::NBsController { // check that group modification would not degrade failure model if (!suppressFailModelChecking) { - for (auto&& [base, overlay] : state.Groups.Diff()) { - if (!overlay->second || !base) { - continue; - } - if (auto& group = overlay->second; group->FailureModelChanged && group->VDisksInGroup) { + for (TGroupId groupId : state.GroupFailureModelChanged) { + if (const TGroupInfo *group = state.Groups.Find(groupId); group && group->VDisksInGroup) { // process only groups with changed content; create topology for group auto& topology = *group->Topology; // fill in vector of failed disks (that are not fully operational) @@ -314,14 +310,16 @@ namespace NKikimr::NBsController { // check the failure model auto& checker = *topology.QuorumChecker; if (!checker.CheckFailModelForGroup(failed)) { - *errorDescription = TStringBuilder() << "GroupId# " << base->first + *errorDescription = TStringBuilder() << "GroupId# " << groupId << " may lose data while modifying group"; return false; } else if (!suppressDegradedGroupsChecking && checker.IsDegraded(failed)) { - *errorDescription = TStringBuilder() << "GroupId# " << base->first + *errorDescription = TStringBuilder() << "GroupId# " << groupId << " may become DEGRADED while modifying group"; return false; } + } else { + Y_VERIFY(group); // group must exist } } } @@ -698,7 +696,7 @@ namespace NKikimr::NBsController { Y_VERIFY(donor); Y_VERIFY(donor->Mood == TMood::Donor); Y_VERIFY(donor->GroupId == vslot.GroupId); - Y_VERIFY(donor->GroupGeneration < vslot.GroupGeneration + group->ContentChanged); + Y_VERIFY(donor->GroupGeneration < vslot.GroupGeneration + GroupContentChanged.count(vslot.GroupId)); Y_VERIFY(donor->GetShortVDiskId() == vslot.GetShortVDiskId()); } }); @@ -736,7 +734,6 @@ namespace NKikimr::NBsController { for (const auto& slot : VDisksInGroup) { slot.Mutable().Group = this; } - FailureModelChanged = false; } void TBlobStorageController::Serialize(NKikimrBlobStorage::TDefineHostConfig *pb, const THostConfigId &id, diff --git a/ydb/core/mind/bscontroller/config.h b/ydb/core/mind/bscontroller/config.h index 9e7f2bf408..5ca88f2cb6 100644 --- a/ydb/core/mind/bscontroller/config.h +++ b/ydb/core/mind/bscontroller/config.h @@ -113,6 +113,9 @@ namespace NKikimr { TConfigFitAction Fit; + THashSet<TGroupId> GroupContentChanged; + THashSet<TGroupId> GroupFailureModelChanged; + public: TConfigState(TBlobStorageController &controller, const THostRecordMap &hostRecords, TInstant timestamp) : Self(controller) @@ -236,6 +239,12 @@ namespace NKikimr { return res; } + void DeleteExistingGroup(TGroupId groupId) { + Groups.DeleteExistingEntry(groupId); + GroupContentChanged.erase(groupId); + GroupFailureModelChanged.erase(groupId); + } + private: template<typename TCommand, typename TKey, typename TValue> static ui64 CheckGeneration(const TCommand &cmd, const TMap<TKey, TValue> &map, const TKey &id) { diff --git a/ydb/core/mind/bscontroller/config_fit_groups.cpp b/ydb/core/mind/bscontroller/config_fit_groups.cpp index f91bb48d4b..004ef1f7f0 100644 --- a/ydb/core/mind/bscontroller/config_fit_groups.cpp +++ b/ydb/core/mind/bscontroller/config_fit_groups.cpp @@ -350,8 +350,8 @@ namespace NKikimr { // create slots for the new group auto newSlots = CreateVSlotsForGroup(groupInfo, group, preservedSlots); - groupInfo->ContentChanged = true; - groupInfo->FailureModelChanged = true; + State.GroupContentChanged.insert(groupId); + State.GroupFailureModelChanged.insert(groupId); if (replacedSlots) { if (!IgnoreGroupFailModelChecks) { diff --git a/ydb/core/mind/bscontroller/impl.h b/ydb/core/mind/bscontroller/impl.h index a90e598c02..f9e4008e37 100644 --- a/ydb/core/mind/bscontroller/impl.h +++ b/ydb/core/mind/bscontroller/impl.h @@ -548,8 +548,6 @@ public: TGroupLatencyStats LatencyStats; TBoxStoragePoolId StoragePoolId; mutable TStorageStatusFlags StatusFlags; - bool ContentChanged = false; - bool FailureModelChanged = false; TActorId VirtualGroupSetupMachineId; diff --git a/ydb/core/mind/bscontroller/virtual_group.cpp b/ydb/core/mind/bscontroller/virtual_group.cpp index fc1659dd8f..03cb7ab4ef 100644 --- a/ydb/core/mind/bscontroller/virtual_group.cpp +++ b/ydb/core/mind/bscontroller/virtual_group.cpp @@ -224,12 +224,12 @@ namespace NKikimr::NBsController { const TGroupId GroupId; const std::weak_ptr<TToken> Token; std::optional<TConfigState> State; - const std::function<bool(TGroupInfo&)> Callback; + const std::function<bool(TGroupInfo&, TConfigState&)> Callback; public: TTxType GetTxType() const override { return NBlobStorageController::TXTYPE_UPDATE_GROUP; } - TTxUpdateGroup(TVirtualGroupSetupMachine *machine, std::function<bool(TGroupInfo&)>&& callback) + TTxUpdateGroup(TVirtualGroupSetupMachine *machine, std::function<bool(TGroupInfo&, TConfigState&)>&& callback) : TTransactionBase(machine->Self) , Machine(machine) , MachineId(Machine->SelfId()) @@ -248,8 +248,8 @@ namespace NKikimr::NBsController { State.emplace(*Self, Self->HostRecords, TActivationContext::Now()); TGroupInfo *group = State->Groups.FindForUpdate(GroupId); Y_VERIFY(group); - if (!Callback(*group)) { - State->Groups.DeleteExistingEntry(group->ID); + if (!Callback(*group, *State)) { + State->DeleteExistingGroup(group->ID); } group->CalculateGroupStatus(); TString error; @@ -331,7 +331,7 @@ namespace NKikimr::NBsController { template<typename T> void UpdateBlobDepotConfig(T&& callback) { - Self->Execute(std::make_unique<TTxUpdateGroup>(this, [this, callback](TGroupInfo& group) { + Self->Execute(std::make_unique<TTxUpdateGroup>(this, [this, callback](TGroupInfo& group, TConfigState&) { auto& config = GetConfig(&group); callback(config); TString data; @@ -444,7 +444,7 @@ namespace NKikimr::NBsController { NKikimrSubDomains::TDomainKey domainKey; domainKey.CopyFrom(domain.GetDomainKey()); - Self->Execute(std::make_unique<TTxUpdateGroup>(this, [=](TGroupInfo& group) { + Self->Execute(std::make_unique<TTxUpdateGroup>(this, [=](TGroupInfo& group, TConfigState&) { auto& config = GetConfig(&group); config.MutableHiveParams()->MutableObjectDomain()->CopyFrom(domainKey); TString data; @@ -602,7 +602,7 @@ namespace NKikimr::NBsController { } void CreateFailed(const TString& error) { - Self->Execute(std::make_unique<TTxUpdateGroup>(this, [=](TGroupInfo& group) { + Self->Execute(std::make_unique<TTxUpdateGroup>(this, [=](TGroupInfo& group, TConfigState&) { group.VirtualGroupState = NKikimrBlobStorage::EVirtualGroupState::CREATE_FAILED; group.NeedAlter = false; group.ErrorReason = error; @@ -638,7 +638,7 @@ namespace NKikimr::NBsController { void DeleteBlobDepot() { auto *group = GetGroup(); STLOG(PRI_DEBUG, BS_CONTROLLER, BSCVG15, "DeleteBlobDepot", (GroupId, group->ID)); - Self->Execute(std::make_unique<TTxUpdateGroup>(this, [](TGroupInfo& group) { + Self->Execute(std::make_unique<TTxUpdateGroup>(this, [](TGroupInfo& group, TConfigState&) { if (group.VDisksInGroup) { group.VirtualGroupName = {}; group.VirtualGroupState = {}; @@ -657,7 +657,7 @@ namespace NKikimr::NBsController { void Handle(TEvBlobDepot::TEvApplyConfigResult::TPtr /*ev*/) { NTabletPipe::CloseAndForgetClient(SelfId(), BlobDepotPipeId); - Self->Execute(std::make_unique<TTxUpdateGroup>(this, [&](TGroupInfo& group) { + Self->Execute(std::make_unique<TTxUpdateGroup>(this, [&](TGroupInfo& group, TConfigState& state) { group.VirtualGroupState = NKikimrBlobStorage::EVirtualGroupState::WORKING; auto& config = GetConfig(&group); Y_VERIFY(config.HasTabletId()); @@ -665,7 +665,7 @@ namespace NKikimr::NBsController { group.NeedAlter = false; if (group.DecommitStatus == NKikimrBlobStorage::TGroupDecommitStatus::PENDING) { group.DecommitStatus = NKikimrBlobStorage::TGroupDecommitStatus::IN_PROGRESS; - group.ContentChanged = true; + state.GroupContentChanged.insert(GroupId); } return true; })); @@ -822,8 +822,8 @@ namespace NKikimr::NBsController { } group->VDisksInGroup.clear(); group->DecommitStatus = NKikimrBlobStorage::TGroupDecommitStatus::DONE; - group->ContentChanged = true; group->Topology = std::make_shared<TBlobStorageGroupInfo::TTopology>(group->Topology->GType, 0, 0, 0); + state.GroupContentChanged.insert(groupId); } STLOG(PRI_INFO, BS_CONTROLLER, BSCVG10, "decommission update processed", (Status, Status), |