diff options
author | vporyadke <zalyalov@ydb.tech> | 2024-11-29 14:22:22 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-29 12:22:22 +0100 |
commit | a4d9d0f7390d6908cad534bb1a7563a486d8d7d6 (patch) | |
tree | cf30035a20c414249aec294c8cf32c696b2d0239 | |
parent | b71bea96dfc56755e2d35880628d8c622952ff5c (diff) | |
download | ydb-a4d9d0f7390d6908cad534bb1a7563a486d8d7d6.tar.gz |
fix not-persisted change when locking tablet (#12109)
-rw-r--r-- | ydb/core/mind/hive/hive_ut.cpp | 42 | ||||
-rw-r--r-- | ydb/core/mind/hive/tx__lock_tablet.cpp | 1 |
2 files changed, 42 insertions, 1 deletions
diff --git a/ydb/core/mind/hive/hive_ut.cpp b/ydb/core/mind/hive/hive_ut.cpp index dd19b3025a..cf3bc1fa32 100644 --- a/ydb/core/mind/hive/hive_ut.cpp +++ b/ydb/core/mind/hive/hive_ut.cpp @@ -16,6 +16,7 @@ #include <ydb/core/mind/tenant_pool.h> #include <ydb/core/tablet_flat/tablet_flat_executed.h> #include <ydb/core/tablet/tablet_impl.h> +#include <ydb/core/testlib/actors/block_events.h> #include <ydb/core/testlib/basics/appdata.h> #include <ydb/core/testlib/basics/helpers.h> #include <ydb/core/testlib/tablet_helpers.h> @@ -6154,13 +6155,14 @@ Y_UNIT_TEST_SUITE(THiveTest) { ActorIdToProto(owner, event->Record.MutableOwnerActor()); } TActorId senderB = runtime.AllocateEdgeActor(nodeIndex); + Ctest << "Send UnlockTablet\n"; runtime.SendToPipe(hiveTablet, senderB, event.Release(), nodeIndex, GetPipeConfigWithRetries()); TAutoPtr<IEventHandle> handle; auto result = runtime.GrabEdgeEventRethrow<TEvHive::TEvUnlockTabletExecutionResult>(handle); UNIT_ASSERT(result); UNIT_ASSERT_VALUES_EQUAL(result->Record.GetTabletID(), tabletId); - UNIT_ASSERT_VALUES_EQUAL(result->Record.GetStatus(), expectedStatus); + UNIT_ASSERT_C(result->Record.GetStatus() == expectedStatus, "Expected status " << expectedStatus << ", got reply " << result->Record.ShortDebugString()); } Y_UNIT_TEST(TestLockTabletExecutionBadUnlock) { @@ -6237,6 +6239,44 @@ Y_UNIT_TEST_SUITE(THiveTest) { VerifyLockTabletExecutionLost(runtime, tabletId, owner); } + Y_UNIT_TEST(TestLockTabletExecutionLocalGone) { + TTestBasicRuntime runtime(3, false); + Setup(runtime, false); + CreateLocal(runtime, 0); // only the 1st node has local running + const ui64 hiveTablet = MakeDefaultHiveID(); + const ui64 testerTablet = MakeTabletID(false, 1); + const TActorId hiveActor = CreateTestBootstrapper(runtime, CreateTestTabletInfo(hiveTablet, TTabletTypes::Hive), &CreateDefaultHive); + const TActorId senderA = runtime.AllocateEdgeActor(0); + runtime.EnableScheduleForActor(hiveActor); + + TTabletTypes::EType tabletType = TTabletTypes::Dummy; + THolder<TEvHive::TEvCreateTablet> ev(new TEvHive::TEvCreateTablet(testerTablet, 0, tabletType, BINDED_CHANNELS)); + ui64 tabletId = SendCreateTestTablet(runtime, hiveTablet, testerTablet, std::move(ev), 0, true); + MakeSureTabletIsUp(runtime, tabletId, 0); + + TActorId owner = runtime.AllocateEdgeActor(1); + SendLockTabletExecution(runtime, hiveTablet, tabletId, 1, NKikimrProto::OK, owner, 100500); + MakeSureTabletIsDown(runtime, tabletId, 0); + + // Block events related to node disconnect + // TEvents::TEvUndelivered - actor system does not guarantee that this event will be sent + // TEvLocal::TEvStatus - we cannot expect that the disconnecting node will always be able to send this + // TEvInterconnect::TEvNodeDisconnected - we will send this one, but follow it up with NodeConnected + // This is simulating a case when a new host is using the old node id and that new host does not run Local + TBlockEvents<TEvents::TEvUndelivered> blockUndleivered(runtime, [](auto&& ev) { return ev->Get()->SourceType == TEvLocal::EvPing; }); + TBlockEvents<TEvLocal::TEvStatus> blockStatus(runtime, [](auto&& ev) { return ev->Get()->Record.GetStatus() != NKikimrProto::OK; }); + SendKillLocal(runtime, 0); + runtime.SendToPipe(hiveTablet, senderA, new TEvInterconnect::TEvNodeDisconnected(runtime.GetNodeId(0)), 0, GetPipeConfigWithRetries()); + CreateLocal(runtime, 2); + runtime.Register(CreateTabletKiller(hiveTablet)); + runtime.SendToPipe(hiveTablet, senderA, new TEvInterconnect::TEvNodeConnected(runtime.GetNodeId(0)), 0, GetPipeConfigWithRetries()); + + // Unlocking with the same owner should succeed and boot the tablet + SendUnlockTabletExecution(runtime, hiveTablet, tabletId, 1, NKikimrProto::OK, owner); + WaitForTabletIsUp(runtime, tabletId, 0); + + } + Y_UNIT_TEST(TestExternalBoot) { TTestBasicRuntime runtime(1, false); Setup(runtime, true); diff --git a/ydb/core/mind/hive/tx__lock_tablet.cpp b/ydb/core/mind/hive/tx__lock_tablet.cpp index 7874403abe..3afa2802b6 100644 --- a/ydb/core/mind/hive/tx__lock_tablet.cpp +++ b/ydb/core/mind/hive/tx__lock_tablet.cpp @@ -95,6 +95,7 @@ public: follower.InitiateStop(SideEffects); } tablet->InitiateStop(SideEffects); + db.Table<Schema::Tablet>().Key(TabletId).Update<Schema::Tablet::LeaderNode>(0); } if (tablet->LockedToActor == OwnerActor && tablet->PendingUnlockSeqNo == 0) { // Lock is still valid, watch for node disconnections |