aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorvporyadke <zalyalov@ydb.tech>2024-11-29 14:22:22 +0300
committerGitHub <noreply@github.com>2024-11-29 12:22:22 +0100
commita4d9d0f7390d6908cad534bb1a7563a486d8d7d6 (patch)
treecf30035a20c414249aec294c8cf32c696b2d0239
parentb71bea96dfc56755e2d35880628d8c622952ff5c (diff)
downloadydb-a4d9d0f7390d6908cad534bb1a7563a486d8d7d6.tar.gz
fix not-persisted change when locking tablet (#12109)
-rw-r--r--ydb/core/mind/hive/hive_ut.cpp42
-rw-r--r--ydb/core/mind/hive/tx__lock_tablet.cpp1
2 files changed, 42 insertions, 1 deletions
diff --git a/ydb/core/mind/hive/hive_ut.cpp b/ydb/core/mind/hive/hive_ut.cpp
index dd19b3025a..cf3bc1fa32 100644
--- a/ydb/core/mind/hive/hive_ut.cpp
+++ b/ydb/core/mind/hive/hive_ut.cpp
@@ -16,6 +16,7 @@
#include <ydb/core/mind/tenant_pool.h>
#include <ydb/core/tablet_flat/tablet_flat_executed.h>
#include <ydb/core/tablet/tablet_impl.h>
+#include <ydb/core/testlib/actors/block_events.h>
#include <ydb/core/testlib/basics/appdata.h>
#include <ydb/core/testlib/basics/helpers.h>
#include <ydb/core/testlib/tablet_helpers.h>
@@ -6154,13 +6155,14 @@ Y_UNIT_TEST_SUITE(THiveTest) {
ActorIdToProto(owner, event->Record.MutableOwnerActor());
}
TActorId senderB = runtime.AllocateEdgeActor(nodeIndex);
+ Ctest << "Send UnlockTablet\n";
runtime.SendToPipe(hiveTablet, senderB, event.Release(), nodeIndex, GetPipeConfigWithRetries());
TAutoPtr<IEventHandle> handle;
auto result = runtime.GrabEdgeEventRethrow<TEvHive::TEvUnlockTabletExecutionResult>(handle);
UNIT_ASSERT(result);
UNIT_ASSERT_VALUES_EQUAL(result->Record.GetTabletID(), tabletId);
- UNIT_ASSERT_VALUES_EQUAL(result->Record.GetStatus(), expectedStatus);
+ UNIT_ASSERT_C(result->Record.GetStatus() == expectedStatus, "Expected status " << expectedStatus << ", got reply " << result->Record.ShortDebugString());
}
Y_UNIT_TEST(TestLockTabletExecutionBadUnlock) {
@@ -6237,6 +6239,44 @@ Y_UNIT_TEST_SUITE(THiveTest) {
VerifyLockTabletExecutionLost(runtime, tabletId, owner);
}
+ Y_UNIT_TEST(TestLockTabletExecutionLocalGone) {
+ TTestBasicRuntime runtime(3, false);
+ Setup(runtime, false);
+ CreateLocal(runtime, 0); // only the 1st node has local running
+ const ui64 hiveTablet = MakeDefaultHiveID();
+ const ui64 testerTablet = MakeTabletID(false, 1);
+ const TActorId hiveActor = CreateTestBootstrapper(runtime, CreateTestTabletInfo(hiveTablet, TTabletTypes::Hive), &CreateDefaultHive);
+ const TActorId senderA = runtime.AllocateEdgeActor(0);
+ runtime.EnableScheduleForActor(hiveActor);
+
+ TTabletTypes::EType tabletType = TTabletTypes::Dummy;
+ THolder<TEvHive::TEvCreateTablet> ev(new TEvHive::TEvCreateTablet(testerTablet, 0, tabletType, BINDED_CHANNELS));
+ ui64 tabletId = SendCreateTestTablet(runtime, hiveTablet, testerTablet, std::move(ev), 0, true);
+ MakeSureTabletIsUp(runtime, tabletId, 0);
+
+ TActorId owner = runtime.AllocateEdgeActor(1);
+ SendLockTabletExecution(runtime, hiveTablet, tabletId, 1, NKikimrProto::OK, owner, 100500);
+ MakeSureTabletIsDown(runtime, tabletId, 0);
+
+ // Block events related to node disconnect
+ // TEvents::TEvUndelivered - actor system does not guarantee that this event will be sent
+ // TEvLocal::TEvStatus - we cannot expect that the disconnecting node will always be able to send this
+ // TEvInterconnect::TEvNodeDisconnected - we will send this one, but follow it up with NodeConnected
+ // This is simulating a case when a new host is using the old node id and that new host does not run Local
+ TBlockEvents<TEvents::TEvUndelivered> blockUndleivered(runtime, [](auto&& ev) { return ev->Get()->SourceType == TEvLocal::EvPing; });
+ TBlockEvents<TEvLocal::TEvStatus> blockStatus(runtime, [](auto&& ev) { return ev->Get()->Record.GetStatus() != NKikimrProto::OK; });
+ SendKillLocal(runtime, 0);
+ runtime.SendToPipe(hiveTablet, senderA, new TEvInterconnect::TEvNodeDisconnected(runtime.GetNodeId(0)), 0, GetPipeConfigWithRetries());
+ CreateLocal(runtime, 2);
+ runtime.Register(CreateTabletKiller(hiveTablet));
+ runtime.SendToPipe(hiveTablet, senderA, new TEvInterconnect::TEvNodeConnected(runtime.GetNodeId(0)), 0, GetPipeConfigWithRetries());
+
+ // Unlocking with the same owner should succeed and boot the tablet
+ SendUnlockTabletExecution(runtime, hiveTablet, tabletId, 1, NKikimrProto::OK, owner);
+ WaitForTabletIsUp(runtime, tabletId, 0);
+
+ }
+
Y_UNIT_TEST(TestExternalBoot) {
TTestBasicRuntime runtime(1, false);
Setup(runtime, true);
diff --git a/ydb/core/mind/hive/tx__lock_tablet.cpp b/ydb/core/mind/hive/tx__lock_tablet.cpp
index 7874403abe..3afa2802b6 100644
--- a/ydb/core/mind/hive/tx__lock_tablet.cpp
+++ b/ydb/core/mind/hive/tx__lock_tablet.cpp
@@ -95,6 +95,7 @@ public:
follower.InitiateStop(SideEffects);
}
tablet->InitiateStop(SideEffects);
+ db.Table<Schema::Tablet>().Key(TabletId).Update<Schema::Tablet::LeaderNode>(0);
}
if (tablet->LockedToActor == OwnerActor && tablet->PendingUnlockSeqNo == 0) {
// Lock is still valid, watch for node disconnections