aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoralexvru <alexvru@ydb.tech>2022-08-24 12:41:44 +0300
committeralexvru <alexvru@ydb.tech>2022-08-24 12:41:44 +0300
commitde502915fff58d768033431a6c0b3baab3f5be9a (patch)
treefe9df0ae36deee40c0f28c12a033c108c5c78880
parent30a75ad702329a4178784acc5bad610931c32382 (diff)
downloadydb-de502915fff58d768033431a6c0b3baab3f5be9a.tar.gz
Fix bug in BS_CONTROLLER
-rw-r--r--ydb/core/mind/bscontroller/bsc.cpp11
-rw-r--r--ydb/core/mind/bscontroller/config.cpp19
-rw-r--r--ydb/core/mind/bscontroller/config_fit_groups.cpp2
-rw-r--r--ydb/core/mind/bscontroller/impl.h25
-rw-r--r--ydb/core/mind/bscontroller/register_node.cpp5
-rw-r--r--ydb/core/mind/bscontroller/self_heal.cpp10
-rw-r--r--ydb/core/mind/bscontroller/sys_view.cpp2
7 files changed, 49 insertions, 25 deletions
diff --git a/ydb/core/mind/bscontroller/bsc.cpp b/ydb/core/mind/bscontroller/bsc.cpp
index 7393fb33bc0..c977d67cde0 100644
--- a/ydb/core/mind/bscontroller/bsc.cpp
+++ b/ydb/core/mind/bscontroller/bsc.cpp
@@ -235,6 +235,15 @@ void TBlobStorageController::ValidateInternalState() {
Y_VERIFY(donor->Mood == TMood::Donor);
Y_VERIFY(donor->AcceptorVSlotId == vslotId);
}
+ if (vslot->Group) {
+ if (vslot->Status == NKikimrBlobStorage::EVDiskStatus::READY) {
+ Y_VERIFY_DEBUG(vslot->IsReady || vslot->IsInVSlotReadyTimestampQ());
+ } else {
+ Y_VERIFY_DEBUG(!vslot->IsReady && !vslot->IsInVSlotReadyTimestampQ());
+ }
+ } else {
+ Y_VERIFY_DEBUG(!vslot->IsInVSlotReadyTimestampQ());
+ }
}
for (const auto& [groupId, group] : GroupMap) {
Y_VERIFY(groupId == group->ID);
@@ -291,7 +300,7 @@ ui32 TBlobStorageController::GetEventPriority(IEventHandle *ev) {
const auto& record = msg->Record;
for (const auto& item : record.GetVDiskStatus()) {
const TVSlotId vslotId(item.GetNodeId(), item.GetPDiskId(), item.GetVSlotId());
- if (TVSlotInfo *slot = FindVSlot(vslotId); slot && slot->GetStatus() > item.GetStatus()) {
+ if (TVSlotInfo *slot = FindVSlot(vslotId); slot && slot->Status > item.GetStatus()) {
return 1;
} else if (const auto it = StaticVSlots.find(vslotId); it != StaticVSlots.end() && it->second.VDiskStatus > item.GetStatus()) {
return 1;
diff --git a/ydb/core/mind/bscontroller/config.cpp b/ydb/core/mind/bscontroller/config.cpp
index d3f8e6369f2..09a57e2bb67 100644
--- a/ydb/core/mind/bscontroller/config.cpp
+++ b/ydb/core/mind/bscontroller/config.cpp
@@ -391,14 +391,18 @@ namespace NKikimr::NBsController {
CommitSysViewUpdates(state);
CommitVirtualGroupUpdates(state);
- // remove deleted vslots from VSlotReadyTimestampQ
+ // add updated and remove deleted vslots from VSlotReadyTimestampQ
+ const TMonotonic now = TActivationContext::Monotonic();
for (auto&& [base, overlay] : state.VSlots.Diff()) {
if (!overlay->second || !overlay->second->Group) { // deleted one
- base->second->DropFromVSlotReadyTimestampQ();
- if (overlay->second) {
- overlay->second->ResetVSlotReadyTimestampIter();
- }
+ (overlay->second ? overlay->second : base->second)->DropFromVSlotReadyTimestampQ();
NotReadyVSlotIds.erase(overlay->first);
+ } else if (overlay->second->Status != NKikimrBlobStorage::EVDiskStatus::READY) {
+ overlay->second->DropFromVSlotReadyTimestampQ();
+ } else if (!base || base->second->Status != NKikimrBlobStorage::EVDiskStatus::READY) {
+ overlay->second->PutInVSlotReadyTimestampQ(now);
+ } else {
+ Y_VERIFY_DEBUG(overlay->second->IsReady || overlay->second->IsInVSlotReadyTimestampQ());
}
}
@@ -406,6 +410,9 @@ namespace NKikimr::NBsController {
state.CheckConsistency();
state.Commit();
+ ValidateInternalState();
+
+ ScheduleVSlotReadyUpdate();
return true;
}
@@ -838,7 +845,7 @@ namespace NKikimr::NBsController {
pb->SetAllocatedSize(vslot.Metrics.GetAllocatedSize());
pb->MutableVDiskMetrics()->CopyFrom(vslot.Metrics);
pb->MutableVDiskMetrics()->ClearVDiskId();
- pb->SetStatus(NKikimrBlobStorage::EVDiskStatus_Name(vslot.GetStatus()));
+ pb->SetStatus(NKikimrBlobStorage::EVDiskStatus_Name(vslot.Status));
for (const auto& [vslotId, vdiskId] : vslot.Donors) {
auto *item = pb->AddDonors();
Serialize(item->MutableVSlotId(), vslotId);
diff --git a/ydb/core/mind/bscontroller/config_fit_groups.cpp b/ydb/core/mind/bscontroller/config_fit_groups.cpp
index e86d44bae79..dfb88493df9 100644
--- a/ydb/core/mind/bscontroller/config_fit_groups.cpp
+++ b/ydb/core/mind/bscontroller/config_fit_groups.cpp
@@ -429,7 +429,7 @@ namespace NKikimr {
// also we have to find replicating VSlots on this PDisk and assume they consume up to
// max(vslotSize for every slot in group), not their actual AllocatedSize
for (const auto& [id, slot] : info.VSlotsOnPDisk) {
- if (slot->Group && slot->GetStatus() != NKikimrBlobStorage::EVDiskStatus::READY) {
+ if (slot->Group && slot->Status != NKikimrBlobStorage::EVDiskStatus::READY) {
ui64 maxGroupSlotSize = 0;
for (const TVSlotInfo *peer : slot->Group->VDisksInGroup) {
maxGroupSlotSize = Max(maxGroupSlotSize, peer->Metrics.GetAllocatedSize());
diff --git a/ydb/core/mind/bscontroller/impl.h b/ydb/core/mind/bscontroller/impl.h
index 5afdba9c653..96b803f5fe4 100644
--- a/ydb/core/mind/bscontroller/impl.h
+++ b/ydb/core/mind/bscontroller/impl.h
@@ -81,7 +81,7 @@ public:
class TGroupFitter;
class TSelfHealActor;
- using TVSlotReadyTimestampQ = std::list<std::pair<TInstant, TVSlotInfo*>>;
+ using TVSlotReadyTimestampQ = std::list<std::pair<TMonotonic, TVSlotInfo*>>;
class TVSlotInfo : public TIndirectReferable<TVSlotInfo> {
public:
@@ -120,30 +120,35 @@ public:
private:
TVSlotReadyTimestampQ& VSlotReadyTimestampQ;
TVSlotReadyTimestampQ::iterator VSlotReadyTimestampIter;
- NKikimrBlobStorage::EVDiskStatus Status = NKikimrBlobStorage::EVDiskStatus::INIT_PENDING;
// VDisk will be considered READY during this period after reporting its READY state
static constexpr TDuration ReadyStablePeriod = TDuration::Seconds(15);
public:
+ NKikimrBlobStorage::EVDiskStatus Status = NKikimrBlobStorage::EVDiskStatus::INIT_PENDING;
bool IsReady = false;
public:
- void SetStatus(NKikimrBlobStorage::EVDiskStatus status, TInstant now) {
+ void SetStatus(NKikimrBlobStorage::EVDiskStatus status, TMonotonic now) {
if (status != Status) {
Status = status;
IsReady = false;
if (status == NKikimrBlobStorage::EVDiskStatus::READY) {
- const TInstant readyAfter = now + ReadyStablePeriod; // vdisk will be treated as READY one shortly, but not now
- Y_VERIFY(VSlotReadyTimestampIter == TVSlotReadyTimestampQ::iterator());
- Y_VERIFY(Group);
- VSlotReadyTimestampIter = VSlotReadyTimestampQ.emplace(VSlotReadyTimestampQ.end(), readyAfter, this);
+ PutInVSlotReadyTimestampQ(now);
} else {
DropFromVSlotReadyTimestampQ();
}
}
}
+ void PutInVSlotReadyTimestampQ(TMonotonic now) {
+ const TMonotonic readyAfter = now + ReadyStablePeriod; // vdisk will be treated as READY one shortly, but not now
+ Y_VERIFY(VSlotReadyTimestampIter == TVSlotReadyTimestampQ::iterator());
+ Y_VERIFY(Group);
+ Y_VERIFY_DEBUG(VSlotReadyTimestampQ.empty() || VSlotReadyTimestampQ.back().first <= readyAfter);
+ VSlotReadyTimestampIter = VSlotReadyTimestampQ.emplace(VSlotReadyTimestampQ.end(), readyAfter, this);
+ }
+
void DropFromVSlotReadyTimestampQ() {
if (VSlotReadyTimestampIter != TVSlotReadyTimestampQ::iterator()) {
VSlotReadyTimestampQ.erase(VSlotReadyTimestampIter);
@@ -155,8 +160,8 @@ public:
VSlotReadyTimestampIter = {};
}
- NKikimrBlobStorage::EVDiskStatus GetStatus() const {
- return Status;
+ bool IsInVSlotReadyTimestampQ() const {
+ return VSlotReadyTimestampIter != TVSlotReadyTimestampQ::iterator();
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -2009,7 +2014,7 @@ public:
Y_VERIFY(VSlotReadyUpdateScheduled);
VSlotReadyUpdateScheduled = false;
- const TInstant now = TActivationContext::Now();
+ const TMonotonic now = TActivationContext::Monotonic();
THashSet<TGroupInfo*> groups;
for (auto it = VSlotReadyTimestampQ.begin(); it != VSlotReadyTimestampQ.end() && it->first <= now;
it = VSlotReadyTimestampQ.erase(it)) {
diff --git a/ydb/core/mind/bscontroller/register_node.cpp b/ydb/core/mind/bscontroller/register_node.cpp
index d4dac9cf484..7479b8b16ef 100644
--- a/ydb/core/mind/bscontroller/register_node.cpp
+++ b/ydb/core/mind/bscontroller/register_node.cpp
@@ -479,6 +479,7 @@ void TBlobStorageController::OnWardenDisconnected(TNodeId nodeId) {
}
const TInstant now = TActivationContext::Now();
+ const TMonotonic mono = TActivationContext::Monotonic();
std::vector<std::pair<TVSlotId, TInstant>> lastSeenReadyQ;
for (auto it = PDisks.lower_bound(TPDiskId::MinForNode(nodeId)); it != PDisks.end() && it->first.NodeId == nodeId; ++it) {
it->second->UpdateOperational(false);
@@ -493,9 +494,9 @@ void TBlobStorageController::OnWardenDisconnected(TNodeId nodeId) {
lastSeenReadyQ.emplace_back(it->second->VSlotId, now);
NotReadyVSlotIds.insert(it->second->VSlotId);
}
- it->second->SetStatus(NKikimrBlobStorage::EVDiskStatus::ERROR, now);
+ it->second->SetStatus(NKikimrBlobStorage::EVDiskStatus::ERROR, mono);
const_cast<TGroupInfo*>(group)->CalculateGroupStatus();
- sh->VDiskStatusUpdate.emplace_back(it->second->GetVDiskId(), it->second->GetStatus());
+ sh->VDiskStatusUpdate.emplace_back(it->second->GetVDiskId(), it->second->Status);
ScrubState.UpdateVDiskState(&*it->second);
}
}
diff --git a/ydb/core/mind/bscontroller/self_heal.cpp b/ydb/core/mind/bscontroller/self_heal.cpp
index 24c6045625e..e2d6bd391a3 100644
--- a/ydb/core/mind/bscontroller/self_heal.cpp
+++ b/ydb/core/mind/bscontroller/self_heal.cpp
@@ -639,7 +639,7 @@ namespace NKikimr::NBsController {
slot->PDisk->ShouldBeSettledBySelfHeal(),
slot->PDisk->BadInTermsOfSelfHeal(),
slot->PDisk->Decommitted(),
- slot->GetStatus(),
+ slot->Status,
};
}
}
@@ -649,6 +649,7 @@ namespace NKikimr::NBsController {
const google::protobuf::RepeatedPtrField<NKikimrBlobStorage::TVDiskStatus>& s) {
THashSet<TGroupInfo*> groups, status;
const TInstant now = TActivationContext::Now();
+ const TMonotonic mono = TActivationContext::Monotonic();
std::vector<std::pair<TVSlotId, TInstant>> lastSeenReadyQ;
std::unique_ptr<TEvPrivate::TEvDropDonor> dropDonorEv;
@@ -662,7 +663,7 @@ namespace NKikimr::NBsController {
const bool was = slot->IsOperational();
if (const TGroupInfo *group = slot->Group) {
const bool wasReady = slot->IsReady;
- slot->SetStatus(m.GetStatus(), now);
+ slot->SetStatus(m.GetStatus(), mono);
if (slot->IsReady != wasReady) {
ScrubState.UpdateVDiskState(slot);
if (wasReady) {
@@ -671,14 +672,13 @@ namespace NKikimr::NBsController {
NotReadyVSlotIds.insert(slot->VSlotId);
}
}
- ScheduleVSlotReadyUpdate();
status.insert(const_cast<TGroupInfo*>(group));
ev->VDiskStatusUpdate.emplace_back(vdiskId, m.GetStatus());
if (!was && slot->IsOperational() && !group->SeenOperational) {
groups.insert(const_cast<TGroupInfo*>(group));
}
}
- if (slot->GetStatus() == NKikimrBlobStorage::EVDiskStatus::READY) {
+ if (slot->Status == NKikimrBlobStorage::EVDiskStatus::READY) {
// we can release donor slots without further notice then the VDisk is completely replicated; we
// intentionally use GetStatus() here instead of IsReady() to prevent waiting
for (const auto& [donorVSlotId, donorVDiskId] : slot->Donors) {
@@ -722,6 +722,8 @@ namespace NKikimr::NBsController {
for (TGroupInfo *group : status) {
group->CalculateGroupStatus();
}
+
+ ScheduleVSlotReadyUpdate();
}
void TBlobStorageController::UpdateSelfHealCounters() {
diff --git a/ydb/core/mind/bscontroller/sys_view.cpp b/ydb/core/mind/bscontroller/sys_view.cpp
index 159a38ec700..81cbc89b545 100644
--- a/ydb/core/mind/bscontroller/sys_view.cpp
+++ b/ydb/core/mind/bscontroller/sys_view.cpp
@@ -344,7 +344,7 @@ void SerializeVSlotInfo(NKikimrSysView::TVSlotInfo *pb, const TVDiskID& vdiskId,
}
void CopyInfo(NKikimrSysView::TVSlotInfo* info, const THolder<TBlobStorageController::TVSlotInfo>& vSlotInfo) {
- SerializeVSlotInfo(info, vSlotInfo->GetVDiskId(), vSlotInfo->Metrics, vSlotInfo->GetStatus(), vSlotInfo->Kind,
+ SerializeVSlotInfo(info, vSlotInfo->GetVDiskId(), vSlotInfo->Metrics, vSlotInfo->Status, vSlotInfo->Kind,
vSlotInfo->IsBeingDeleted());
}