aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSemyon Danilov <senya@ydb.tech>2025-05-02 13:41:14 +0400
committerGitHub <noreply@github.com>2025-05-02 13:41:14 +0400
commitcfede7fd10c5032b322bc335caff4d30c7674e6f (patch)
tree4813348df001643c4ee49dbb6fa7041b292c9ac2
parent1978732b4e8a3759e7593768d65a6025882f6d33 (diff)
downloadydb-cfede7fd10c5032b322bc335caff4d30c7674e6f.tar.gz
Handle PDisk stop event if PDisk is in error or init state (#17780)
Closes #17953
-rw-r--r--ydb/core/blobstorage/pdisk/blobstorage_pdisk_actor.cpp30
-rw-r--r--ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp37
-rw-r--r--ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_env.h4
-rw-r--r--ydb/library/pdisk_io/aio_linux.cpp22
4 files changed, 80 insertions, 13 deletions
diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_actor.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_actor.cpp
index 7ef4af63839..fb1dce11ac9 100644
--- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_actor.cpp
+++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_actor.cpp
@@ -642,14 +642,27 @@ public:
ev->Sender, evYardInit.CutLogID, evYardInit.WhiteboardProxyId, evYardInit.SlotId);
}
- void InitHandle(NPDisk::TEvYardControl::TPtr &ev) {
+ void OnPDiskStop(TActorId &sender, void *cookie) {
+ if (PDisk) {
+ PDisk->Stop();
+ *PDisk->Mon.PDiskState = NKikimrBlobStorage::TPDiskState::Stopped;
+ *PDisk->Mon.PDiskBriefState = TPDiskMon::TPDisk::Stopped;
+ *PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::StoppedByYardControl;
+ }
+ InitError("Received TEvYardControl::PDiskStop");
+ Send(sender, new NPDisk::TEvYardControlResult(NKikimrProto::OK, cookie, {}));
+ }
+ void InitHandle(NPDisk::TEvYardControl::TPtr &ev) {
const NPDisk::TEvYardControl &evControl = *ev->Get();
switch (evControl.Action) {
case TEvYardControl::PDiskStart:
ControledStartResult = MakeHolder<IEventHandle>(ev->Sender, SelfId(),
new TEvYardControlResult(NKikimrProto::OK, evControl.Cookie, {}));
break;
+ case TEvYardControl::PDiskStop:
+ OnPDiskStop(ev->Sender, evControl.Cookie);
+ break;
default:
Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::CORRUPTED, evControl.Cookie,
"Unexpected control action for pdisk in StateInit"));
@@ -837,12 +850,19 @@ public:
break;
}
default:
+ // Only PDiskStart is allowed in StateError. PDiskStop is not allowed since PDisk in error state should already be stopped
+ // or in the process of being stopped.
Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::CORRUPTED, evControl.Cookie, StateErrorReason));
PDisk->Mon.YardControl.CountResponse();
break;
}
}
+ void ErrorHandle(TEvReadFormatResult::TPtr &ev) {
+ // Just ignore the event, disk is in error state.
+ Y_UNUSED(ev);
+ }
+
void ErrorHandle(NPDisk::TEvAskForCutLog::TPtr &ev) {
// Just ignore the event, can't send cut log in this state.
Y_UNUSED(ev);
@@ -968,12 +988,7 @@ public:
Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::OK, evControl.Cookie, {}));
break;
case TEvYardControl::PDiskStop:
- PDisk->Stop();
- *PDisk->Mon.PDiskState = NKikimrBlobStorage::TPDiskState::Stopped;
- *PDisk->Mon.PDiskBriefState = TPDiskMon::TPDisk::Stopped;
- *PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::StoppedByYardControl;
- InitError("Received TEvYardControl::PDiskStop");
- Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::OK, evControl.Cookie, {}));
+ OnPDiskStop(ev->Sender, evControl.Cookie);
break;
case TEvYardControl::GetPDiskPointer:
Y_VERIFY_S(!evControl.Cookie, PCtx->PDiskLogPrefix);
@@ -1505,6 +1520,7 @@ public:
hFunc(NPDisk::TEvChunkForget, ErrorHandle);
hFunc(NPDisk::TEvYardControl, ErrorHandle);
hFunc(NPDisk::TEvAskForCutLog, ErrorHandle);
+ hFunc(NPDisk::TEvReadFormatResult, ErrorHandle);
hFunc(NPDisk::TEvWhiteboardReportResult, Handle);
hFunc(NPDisk::TEvHttpInfoResult, Handle);
hFunc(NPDisk::TEvReadLogContinue, Handle);
diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp
index c6242855adf..373b2b5c5f2 100644
--- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp
+++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp
@@ -108,6 +108,43 @@ Y_UNIT_TEST_SUITE(TPDiskTest) {
testCtx.Send(new NActors::TEvents::TEvPoisonPill());
}
+ Y_UNIT_TEST(TestPDiskActorPDiskStopBroken) {
+ TActorTestContext testCtx{{}};
+
+ testCtx.GetRuntime()->WaitFor("Block device start", [&] {
+ return testCtx.SafeRunOnPDisk([&] (auto* pdisk) {
+ // Check that the PDisk is up
+ return pdisk->BlockDevice->IsGood();
+ });
+ });
+
+ testCtx.Send(new NPDisk::TEvDeviceError("test"));
+
+ // This doesn't stop the PDisk, it will be stopped by TEvDeviceError some time in the future
+ testCtx.TestResponse<NPDisk::TEvYardControlResult>(
+ new NPDisk::TEvYardControl(NPDisk::TEvYardControl::PDiskStop, nullptr),
+ NKikimrProto::CORRUPTED);
+
+ testCtx.GetRuntime()->WaitFor("Block device stop", [&] {
+ return testCtx.SafeRunOnPDisk([&] (auto* pdisk) {
+ // Check that the PDisk is stopped
+ return !pdisk->BlockDevice->IsGood();
+ });
+ });
+
+ testCtx.Send(new NActors::TEvents::TEvPoisonPill());
+ }
+
+ Y_UNIT_TEST(TestPDiskActorPDiskStopUninitialized) {
+ TActorTestContext testCtx{{}};
+
+ testCtx.TestResponse<NPDisk::TEvYardControlResult>(
+ new NPDisk::TEvYardControl(NPDisk::TEvYardControl::PDiskStop, nullptr),
+ NKikimrProto::OK);
+
+ testCtx.Send(new NActors::TEvents::TEvPoisonPill());
+ }
+
Y_UNIT_TEST(TestChunkWriteRelease) {
for (ui32 i = 0; i < 16; ++i) {
TestChunkWriteReleaseRun();
diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_env.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_env.h
index d29f2e43b9d..b7b49b21aa0 100644
--- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_env.h
+++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_env.h
@@ -120,6 +120,10 @@ public:
return nullptr;
}
+ TTestActorRuntime* GetRuntime() {
+ return Runtime.Get();
+ }
+
void UpdateConfigRecreatePDisk(TIntrusivePtr<TPDiskConfig> cfg, bool reformat = false) {
if (PDiskActor) {
TestResponse<NPDisk::TEvYardControlResult>(
diff --git a/ydb/library/pdisk_io/aio_linux.cpp b/ydb/library/pdisk_io/aio_linux.cpp
index 8d0a6cb6649..59fc505f698 100644
--- a/ydb/library/pdisk_io/aio_linux.cpp
+++ b/ydb/library/pdisk_io/aio_linux.cpp
@@ -154,23 +154,33 @@ public:
}
EIoResult Destroy() override {
+ EIoResult result = EIoResult::Ok;
+
int ret = io_destroy(IoContext);
if (ret < 0) {
switch (-ret) {
- case EFAULT: return EIoResult::BadAddress;
- case EINVAL: return EIoResult::InvalidArgument;
- case ENOSYS: return EIoResult::FunctionNotImplemented;
- default: Y_FAIL_S(PDiskInfo << " unexpected error in io_destroy, error# " << -ret
- << " strerror# " << strerror(-ret));
+ case EFAULT:
+ result = EIoResult::BadAddress;
+ break;
+ case EINVAL:
+ result = EIoResult::InvalidArgument;
+ break;
+ case ENOSYS:
+ result = EIoResult::FunctionNotImplemented;
+ break;
+ default:
+ Y_FAIL_S(PDiskInfo << " unexpected error in io_destroy, error# " << -ret << " strerror# " << strerror(-ret));
}
}
+
if (File) {
ret = File->Flock(LOCK_UN);
Y_VERIFY_S(ret == 0, "Error in Flock(LOCK_UN), errno# " << errno << " strerror# " << strerror(errno));
bool isOk = File->Close();
Y_VERIFY_S(isOk, PDiskInfo << " error on file close, errno# " << errno << " strerror# " << strerror(errno));
}
- return EIoResult::Ok;
+
+ return result;
}
i64 GetEvents(ui64 minEvents, ui64 maxEvents, TAsyncIoOperationResult *events, TDuration timeout) override {