diff options
author | Semyon Danilov <senya@ydb.tech> | 2025-05-02 13:41:14 +0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-05-02 13:41:14 +0400 |
commit | cfede7fd10c5032b322bc335caff4d30c7674e6f (patch) | |
tree | 4813348df001643c4ee49dbb6fa7041b292c9ac2 | |
parent | 1978732b4e8a3759e7593768d65a6025882f6d33 (diff) | |
download | ydb-cfede7fd10c5032b322bc335caff4d30c7674e6f.tar.gz |
Handle PDisk stop event if PDisk is in error or init state (#17780)
Closes #17953
-rw-r--r-- | ydb/core/blobstorage/pdisk/blobstorage_pdisk_actor.cpp | 30 | ||||
-rw-r--r-- | ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp | 37 | ||||
-rw-r--r-- | ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_env.h | 4 | ||||
-rw-r--r-- | ydb/library/pdisk_io/aio_linux.cpp | 22 |
4 files changed, 80 insertions, 13 deletions
diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_actor.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_actor.cpp index 7ef4af63839..fb1dce11ac9 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_actor.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_actor.cpp @@ -642,14 +642,27 @@ public: ev->Sender, evYardInit.CutLogID, evYardInit.WhiteboardProxyId, evYardInit.SlotId); } - void InitHandle(NPDisk::TEvYardControl::TPtr &ev) { + void OnPDiskStop(TActorId &sender, void *cookie) { + if (PDisk) { + PDisk->Stop(); + *PDisk->Mon.PDiskState = NKikimrBlobStorage::TPDiskState::Stopped; + *PDisk->Mon.PDiskBriefState = TPDiskMon::TPDisk::Stopped; + *PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::StoppedByYardControl; + } + InitError("Received TEvYardControl::PDiskStop"); + Send(sender, new NPDisk::TEvYardControlResult(NKikimrProto::OK, cookie, {})); + } + void InitHandle(NPDisk::TEvYardControl::TPtr &ev) { const NPDisk::TEvYardControl &evControl = *ev->Get(); switch (evControl.Action) { case TEvYardControl::PDiskStart: ControledStartResult = MakeHolder<IEventHandle>(ev->Sender, SelfId(), new TEvYardControlResult(NKikimrProto::OK, evControl.Cookie, {})); break; + case TEvYardControl::PDiskStop: + OnPDiskStop(ev->Sender, evControl.Cookie); + break; default: Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::CORRUPTED, evControl.Cookie, "Unexpected control action for pdisk in StateInit")); @@ -837,12 +850,19 @@ public: break; } default: + // Only PDiskStart is allowed in StateError. PDiskStop is not allowed since PDisk in error state should already be stopped + // or in the process of being stopped. Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::CORRUPTED, evControl.Cookie, StateErrorReason)); PDisk->Mon.YardControl.CountResponse(); break; } } + void ErrorHandle(TEvReadFormatResult::TPtr &ev) { + // Just ignore the event, disk is in error state. + Y_UNUSED(ev); + } + void ErrorHandle(NPDisk::TEvAskForCutLog::TPtr &ev) { // Just ignore the event, can't send cut log in this state. Y_UNUSED(ev); @@ -968,12 +988,7 @@ public: Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::OK, evControl.Cookie, {})); break; case TEvYardControl::PDiskStop: - PDisk->Stop(); - *PDisk->Mon.PDiskState = NKikimrBlobStorage::TPDiskState::Stopped; - *PDisk->Mon.PDiskBriefState = TPDiskMon::TPDisk::Stopped; - *PDisk->Mon.PDiskDetailedState = TPDiskMon::TPDisk::StoppedByYardControl; - InitError("Received TEvYardControl::PDiskStop"); - Send(ev->Sender, new NPDisk::TEvYardControlResult(NKikimrProto::OK, evControl.Cookie, {})); + OnPDiskStop(ev->Sender, evControl.Cookie); break; case TEvYardControl::GetPDiskPointer: Y_VERIFY_S(!evControl.Cookie, PCtx->PDiskLogPrefix); @@ -1505,6 +1520,7 @@ public: hFunc(NPDisk::TEvChunkForget, ErrorHandle); hFunc(NPDisk::TEvYardControl, ErrorHandle); hFunc(NPDisk::TEvAskForCutLog, ErrorHandle); + hFunc(NPDisk::TEvReadFormatResult, ErrorHandle); hFunc(NPDisk::TEvWhiteboardReportResult, Handle); hFunc(NPDisk::TEvHttpInfoResult, Handle); hFunc(NPDisk::TEvReadLogContinue, Handle); diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp index c6242855adf..373b2b5c5f2 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp @@ -108,6 +108,43 @@ Y_UNIT_TEST_SUITE(TPDiskTest) { testCtx.Send(new NActors::TEvents::TEvPoisonPill()); } + Y_UNIT_TEST(TestPDiskActorPDiskStopBroken) { + TActorTestContext testCtx{{}}; + + testCtx.GetRuntime()->WaitFor("Block device start", [&] { + return testCtx.SafeRunOnPDisk([&] (auto* pdisk) { + // Check that the PDisk is up + return pdisk->BlockDevice->IsGood(); + }); + }); + + testCtx.Send(new NPDisk::TEvDeviceError("test")); + + // This doesn't stop the PDisk, it will be stopped by TEvDeviceError some time in the future + testCtx.TestResponse<NPDisk::TEvYardControlResult>( + new NPDisk::TEvYardControl(NPDisk::TEvYardControl::PDiskStop, nullptr), + NKikimrProto::CORRUPTED); + + testCtx.GetRuntime()->WaitFor("Block device stop", [&] { + return testCtx.SafeRunOnPDisk([&] (auto* pdisk) { + // Check that the PDisk is stopped + return !pdisk->BlockDevice->IsGood(); + }); + }); + + testCtx.Send(new NActors::TEvents::TEvPoisonPill()); + } + + Y_UNIT_TEST(TestPDiskActorPDiskStopUninitialized) { + TActorTestContext testCtx{{}}; + + testCtx.TestResponse<NPDisk::TEvYardControlResult>( + new NPDisk::TEvYardControl(NPDisk::TEvYardControl::PDiskStop, nullptr), + NKikimrProto::OK); + + testCtx.Send(new NActors::TEvents::TEvPoisonPill()); + } + Y_UNIT_TEST(TestChunkWriteRelease) { for (ui32 i = 0; i < 16; ++i) { TestChunkWriteReleaseRun(); diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_env.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_env.h index d29f2e43b9d..b7b49b21aa0 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_env.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut_env.h @@ -120,6 +120,10 @@ public: return nullptr; } + TTestActorRuntime* GetRuntime() { + return Runtime.Get(); + } + void UpdateConfigRecreatePDisk(TIntrusivePtr<TPDiskConfig> cfg, bool reformat = false) { if (PDiskActor) { TestResponse<NPDisk::TEvYardControlResult>( diff --git a/ydb/library/pdisk_io/aio_linux.cpp b/ydb/library/pdisk_io/aio_linux.cpp index 8d0a6cb6649..59fc505f698 100644 --- a/ydb/library/pdisk_io/aio_linux.cpp +++ b/ydb/library/pdisk_io/aio_linux.cpp @@ -154,23 +154,33 @@ public: } EIoResult Destroy() override { + EIoResult result = EIoResult::Ok; + int ret = io_destroy(IoContext); if (ret < 0) { switch (-ret) { - case EFAULT: return EIoResult::BadAddress; - case EINVAL: return EIoResult::InvalidArgument; - case ENOSYS: return EIoResult::FunctionNotImplemented; - default: Y_FAIL_S(PDiskInfo << " unexpected error in io_destroy, error# " << -ret - << " strerror# " << strerror(-ret)); + case EFAULT: + result = EIoResult::BadAddress; + break; + case EINVAL: + result = EIoResult::InvalidArgument; + break; + case ENOSYS: + result = EIoResult::FunctionNotImplemented; + break; + default: + Y_FAIL_S(PDiskInfo << " unexpected error in io_destroy, error# " << -ret << " strerror# " << strerror(-ret)); } } + if (File) { ret = File->Flock(LOCK_UN); Y_VERIFY_S(ret == 0, "Error in Flock(LOCK_UN), errno# " << errno << " strerror# " << strerror(errno)); bool isOk = File->Close(); Y_VERIFY_S(isOk, PDiskInfo << " error on file close, errno# " << errno << " strerror# " << strerror(errno)); } - return EIoResult::Ok; + + return result; } i64 GetEvents(ui64 minEvents, ui64 maxEvents, TAsyncIoOperationResult *events, TDuration timeout) override { |