aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorzalyalov <zalyalov@yandex-team.com>2023-06-08 14:23:28 +0300
committerzalyalov <zalyalov@yandex-team.com>2023-06-08 14:23:28 +0300
commita2cacd6516597be1cb46d9965500435db5ffad64 (patch)
treede656013e5914320a779de64ae65835e1e3f7499
parent40479c2d3d1e9bd3eb213aba537570d28d37d8a2 (diff)
downloadydb-a2cacd6516597be1cb46d9965500435db5ffad64.tar.gz
consider tablets alive only after they signal active
Local no longer counts tablets as alive until they connect to pipe and actually start operating. This way, if a tablet is stuck reading its state on restart, it will not be incorrectly reported as alive. This is achieved by adding a new event, TEvReady, that is similar to previously used TEvRestored, but is only sent when the tablet receives TEvTabletActive from itself. This way, the only change in general tablet behavior is the addition of a new event, that may need to be handled.
-rw-r--r--ydb/core/base/tablet.h16
-rw-r--r--ydb/core/mind/local.cpp19
-rw-r--r--ydb/core/tablet/tablet_sys.cpp1
-rw-r--r--ydb/core/tablet_flat/tablet_flat_executed.cpp2
-rw-r--r--ydb/core/tablet_flat/test/libs/exec/owner.h2
5 files changed, 32 insertions, 8 deletions
diff --git a/ydb/core/base/tablet.h b/ydb/core/base/tablet.h
index e93a3c4454b..1c04c09a815 100644
--- a/ydb/core/base/tablet.h
+++ b/ydb/core/base/tablet.h
@@ -53,6 +53,7 @@ struct TEvTablet {
EvCutTabletHistory,
EvUpdateConfig,
EvDropLease,
+ EvReady,
EvCommit = EvBoot + 512,
EvAux,
@@ -233,7 +234,7 @@ struct TEvTablet {
{}
};
- // tablet is ready for operation
+ // tablet is restored, but may not yet be ready to accept messages
struct TEvRestored : public TEventLocal<TEvRestored, EvRestored> {
const ui64 TabletID;
const ui32 Generation;
@@ -248,6 +249,19 @@ struct TEvTablet {
{}
};
+ // tablet is ready for operation
+ struct TEvReady : public TEventLocal<TEvReady, EvReady> {
+ const ui64 TabletID;
+ const ui32 Generation;
+ const TActorId UserTabletActor;
+
+ TEvReady(ui64 tabletId, ui32 generation, const TActorId &userTabletActor)
+ : TabletID(tabletId)
+ , Generation(generation)
+ , UserTabletActor(userTabletActor)
+ {}
+ };
+
struct TEvNewFollowerAttached : public TEventLocal<TEvNewFollowerAttached, EvNewFollowerAttached> {
const ui32 TotalFollowers;
diff --git a/ydb/core/mind/local.cpp b/ydb/core/mind/local.cpp
index 9e4a4341128..4e003784d06 100644
--- a/ydb/core/mind/local.cpp
+++ b/ydb/core/mind/local.cpp
@@ -707,16 +707,20 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
InbootTablets.erase(inbootIt);
}
- void Handle(TEvTablet::TEvRestored::TPtr &ev, const TActorContext &ctx) {
+ void Handle(TEvTablet::TEvRestored::TPtr &ev, const TActorContext&) {
TEvTablet::TEvRestored *msg = ev->Get();
if (msg->Follower) // ignore follower notifications
return;
- CounterRestored->Inc(); // always update counter for every tablet, even non-actual one. it's about tracking not resource allocation
+ CounterRestored->Inc(); // always update counter for every tablet, even out-of-date ones. it's about tracking not resource allocation
+ }
+
+ void Handle(TEvTablet::TEvReady::TPtr &ev, const TActorContext &ctx) {
+ TEvTablet::TEvReady *msg = ev->Get();
const auto tabletId = msg->TabletID;
- LOG_DEBUG_S(ctx, NKikimrServices::LOCAL, "TLocalNodeRegistrar: Handle TEvTablet::TEvRestored tablet "
+ LOG_DEBUG_S(ctx, NKikimrServices::LOCAL, "TLocalNodeRegistrar: Handle TEvTablet::TEvReady tablet "
<< tabletId
<< " generation "
<< msg->Generation);
@@ -727,11 +731,11 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
return;
TTabletEntry &entry = inbootIt->second;
if (msg->Generation < entry.Generation) {
- LOG_WARN_S(ctx, NKikimrServices::LOCAL, "TLocalNodeRegistrar: Handle TEvTablet::TEvRestored tablet "
+ LOG_WARN_S(ctx, NKikimrServices::LOCAL, "TLocalNodeRegistrar: Handle TEvTablet::TEvReady tablet "
<< tabletId
- << " restored to generation "
+ << " ready with generation "
<< msg->Generation
- << " but we waiting for generation "
+ << " but we are waiting for generation "
<< entry.Generation
<< " - ignored");
return;
@@ -930,7 +934,8 @@ public:
STFUNC(StateWork) {
switch (ev->GetTypeRewrite()) {
- HFunc(TEvTablet::TEvRestored, Handle); // tablet restored, notify queue about update
+ HFunc(TEvTablet::TEvRestored, Handle); // tablet restored, update counter
+ HFunc(TEvTablet::TEvReady, Handle); // tablet ready, notify hive about update
HFunc(TEvTablet::TEvTabletDead, Handle); // tablet dead, notify queue about update
HFunc(TEvTablet::TEvCutTabletHistory, Handle);
HFunc(TEvLocal::TEvBootTablet, Handle); // command to boot tablet
diff --git a/ydb/core/tablet/tablet_sys.cpp b/ydb/core/tablet/tablet_sys.cpp
index 3093463924c..1bb0e5c5e57 100644
--- a/ydb/core/tablet/tablet_sys.cpp
+++ b/ydb/core/tablet/tablet_sys.cpp
@@ -1003,6 +1003,7 @@ void TTablet::Handle(TEvTablet::TEvPing::TPtr &ev) {
void TTablet::HandleByLeader(TEvTablet::TEvTabletActive::TPtr &ev) {
Y_UNUSED(ev);
ReportTabletStateChange(TTabletStateInfo::Active);
+ Send(Launcher, new TEvTablet::TEvReady(TabletID(), StateStorageInfo.KnownGeneration, UserTablet));
ActivateTime = AppData()->TimeProvider->Now();
BLOG_I("Active! Generation: " << StateStorageInfo.KnownGeneration
<< ", Type: " << TTabletTypes::TypeToStr((TTabletTypes::EType)Info->TabletType)
diff --git a/ydb/core/tablet_flat/tablet_flat_executed.cpp b/ydb/core/tablet_flat/tablet_flat_executed.cpp
index b2b149adc04..35f653b6b14 100644
--- a/ydb/core/tablet_flat/tablet_flat_executed.cpp
+++ b/ydb/core/tablet_flat/tablet_flat_executed.cpp
@@ -263,6 +263,7 @@ bool TTabletExecutedFlat::HandleDefaultEvents(TAutoPtr<IEventHandle>& ev, const
hFunc(TEvTablet::TEvGetCounters, HandleGetCounters);
hFunc(TEvTablet::TEvUpdateConfig, Handle);
HFuncCtx(NMon::TEvRemoteHttpInfo, RenderHtmlPage, ctx);
+ IgnoreFunc(TEvTablet::TEvReady);
default:
return false;
}
@@ -283,6 +284,7 @@ void TTabletExecutedFlat::StateInitImpl(TAutoPtr<IEventHandle>& ev, const TActor
hFunc(TEvTablet::TEvFollowerSyncComplete, Handle);
hFunc(TEvTablet::TEvUpdateConfig, Handle);
HFuncCtx(NMon::TEvRemoteHttpInfo, RenderHtmlPage, ctx);
+ IgnoreFunc(TEvTablet::TEvReady);
default:
return Enqueue(ev);
}
diff --git a/ydb/core/tablet_flat/test/libs/exec/owner.h b/ydb/core/tablet_flat/test/libs/exec/owner.h
index 574ef19fcfd..0d6dc6fb9c6 100644
--- a/ydb/core/tablet_flat/test/libs/exec/owner.h
+++ b/ydb/core/tablet_flat/test/libs/exec/owner.h
@@ -76,6 +76,8 @@ namespace NFake {
logl << "Got kill req for Tablet " << Info->TabletID;
Send(Agent, new TEvents::TEvPoison);
+ } else if (eh->CastAsLocal<TEvTablet::TEvReady>()) {
+
} else {
Y_Fail("Unexpected event " << eh->GetTypeName());
}