summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAleksandr Kriukov <[email protected]>2022-04-12 16:28:50 +0300
committerAleksandr Kriukov <[email protected]>2022-04-12 16:28:50 +0300
commitd611e42d1b0694e12b846e10bdaa47247c95a9ba (patch)
treebc1f87b9f52857e164aea2bec339e136d759f8be
parentd731aa4b191e1f646ad26e7b7d440bc30dfccdab (diff)
Fix race of collecting garbage in KV tablet, KIKIMR-14680
ref:b81b7fc11a863ba4899bc337fe666ec02d4f86d6
-rw-r--r--ydb/core/keyvalue/keyvalue_collector.cpp2
-rw-r--r--ydb/core/keyvalue/keyvalue_flat_impl.h24
-rw-r--r--ydb/core/keyvalue/keyvalue_state.cpp28
-rw-r--r--ydb/core/keyvalue/keyvalue_state.h6
-rw-r--r--ydb/core/keyvalue/keyvalue_state_collect.cpp2
-rw-r--r--ydb/core/keyvalue/keyvalue_ut.cpp54
6 files changed, 101 insertions, 15 deletions
diff --git a/ydb/core/keyvalue/keyvalue_collector.cpp b/ydb/core/keyvalue/keyvalue_collector.cpp
index 8b7696bcb25..6aa83dfd203 100644
--- a/ydb/core/keyvalue/keyvalue_collector.cpp
+++ b/ydb/core/keyvalue/keyvalue_collector.cpp
@@ -177,7 +177,7 @@ public:
new TEvBlobStorage::TEvCollectGarbage(TabletInfo->TabletID, RecordGeneration, PerGenerationCounter,
channelIdx, true,
CollectOperation->Header.CollectGeneration, CollectOperation->Header.CollectStep,
- keep.Release(), doNotKeep.Release(), TInstant::Max(), true), 0);
+ keep.Release(), doNotKeep.Release(), TInstant::Max(), true), (ui64)TKeyValueState::ECollectCookie::Soft);
}
STFUNC(StateWait) {
diff --git a/ydb/core/keyvalue/keyvalue_flat_impl.h b/ydb/core/keyvalue/keyvalue_flat_impl.h
index ae30344e143..859e07d4de9 100644
--- a/ydb/core/keyvalue/keyvalue_flat_impl.h
+++ b/ydb/core/keyvalue/keyvalue_flat_impl.h
@@ -368,8 +368,28 @@ protected:
void Handle(TEvBlobStorage::TEvCollectGarbageResult::TPtr &ev, const TActorContext &ctx) {
Y_UNUSED(ev);
LOG_DEBUG_S(ctx, NKikimrServices::KEYVALUE, "KeyValue# " << TabletID()
- << " Handle TEvCollectGarbageResult Marker# KV52");
- State.RegisterInitialCollectResult(ctx);
+ << " Handle TEvCollectGarbageResult Cookie# " << ev->Cookie << " Marker# KV52");
+ if (ev->Cookie == (ui64)TKeyValueState::ECollectCookie::Hard) {
+ return;
+ }
+
+ if (ev->Cookie == (ui64)TKeyValueState::ECollectCookie::SoftInitial) {
+ NKikimrProto::EReplyStatus status = ev->Get()->Status;
+ if (status == NKikimrProto::OK) {
+ State.RegisterInitialCollectResult(ctx);
+ } else {
+ LOG_ERROR_S(ctx, NKikimrServices::KEYVALUE, "KeyValue# " << TabletID()
+ << " received not ok TEvCollectGarbageResult"
+ << " Status# " << NKikimrProto::EReplyStatus_Name(status)
+ << " ErrorReason# " << ev->Get()->ErrorReason);
+ Send(SelfId(), new TKikimrEvents::TEvPoisonPill);
+ }
+ return;
+ }
+
+ LOG_CRIT_S(ctx, NKikimrServices::KEYVALUE, "KeyValue# " << TabletID()
+ << " received TEvCollectGarbageResult with unexpected Cookie# " << ev->Cookie);
+ Send(SelfId(), new TKikimrEvents::TEvPoisonPill);
}
void Handle(TEvKeyValue::TEvRequest::TPtr ev, const TActorContext &ctx) {
diff --git a/ydb/core/keyvalue/keyvalue_state.cpp b/ydb/core/keyvalue/keyvalue_state.cpp
index c344d37b62d..92818898f19 100644
--- a/ydb/core/keyvalue/keyvalue_state.cpp
+++ b/ydb/core/keyvalue/keyvalue_state.cpp
@@ -563,7 +563,7 @@ void TKeyValueState::InitExecute(ui64 tabletId, TActorId keyValueActorId, ui32 e
const TActorId nodeWarden = MakeBlobStorageNodeWardenID(ctx.SelfID.NodeId());
const TActorId proxy = MakeBlobStorageProxyID(group);
ctx.ExecutorThread.Send(new IEventHandle(proxy, TActorId(), ev.Release(),
- IEventHandle::FlagForwardOnNondelivery, 0, &nodeWarden));
+ IEventHandle::FlagForwardOnNondelivery, (ui64)TKeyValueState::ECollectCookie::Hard, &nodeWarden));
}
}
@@ -628,7 +628,7 @@ void TKeyValueState::InitExecute(ui64 tabletId, TActorId keyValueActorId, ui32 e
const TActorId nodeWarden = MakeBlobStorageNodeWardenID(ctx.SelfID.NodeId());
const TActorId proxy = MakeBlobStorageProxyID(group);
ctx.ExecutorThread.Send(new IEventHandle(proxy, KeyValueActorId, ev.Release(),
- IEventHandle::FlagForwardOnNondelivery, 0, &nodeWarden));
+ IEventHandle::FlagForwardOnNondelivery, (ui64)TKeyValueState::ECollectCookie::SoftInitial, &nodeWarden));
}
}
@@ -701,14 +701,14 @@ void TKeyValueState::InitExecute(ui64 tabletId, TActorId keyValueActorId, ui32 e
// corner case, if no CollectGarbage events were sent
if (InitialCollectsSent == 0) {
SendCutHistory(ctx);
- }
- if (CollectOperation.Get()) {
- // finish collect operation from local base
- IsCollectEventSent = true;
- StoreCollectComplete(ctx);
- } else {
- // initiate collection if trash was loaded from local base
- PrepareCollectIfNeeded(ctx);
+ if (CollectOperation.Get()) {
+ // finish collect operation from local base
+ IsCollectEventSent = true;
+ StoreCollectComplete(ctx);
+ } else {
+ // initiate collection if trash was loaded from local base
+ PrepareCollectIfNeeded(ctx);
+ }
}
}
@@ -717,6 +717,14 @@ void TKeyValueState::RegisterInitialCollectResult(const TActorContext &ctx) {
<< " InitialCollectsSent# " << InitialCollectsSent << " Marker# KV50");
if (--InitialCollectsSent == 0) {
SendCutHistory(ctx);
+ if (CollectOperation.Get()) {
+ // finish collect operation from local base
+ IsCollectEventSent = true;
+ StoreCollectComplete(ctx);
+ } else {
+ // initiate collection if trash was loaded from local base
+ PrepareCollectIfNeeded(ctx);
+ }
}
}
diff --git a/ydb/core/keyvalue/keyvalue_state.h b/ydb/core/keyvalue/keyvalue_state.h
index 8052258941f..98d6023fdaa 100644
--- a/ydb/core/keyvalue/keyvalue_state.h
+++ b/ydb/core/keyvalue/keyvalue_state.h
@@ -232,6 +232,12 @@ public:
}
};
+ enum class ECollectCookie {
+ Hard = 1,
+ SoftInitial = 2,
+ Soft = 3,
+ };
+
ui32 GetGeneration() const {
return StoredState.UserGeneration;
}
diff --git a/ydb/core/keyvalue/keyvalue_state_collect.cpp b/ydb/core/keyvalue/keyvalue_state_collect.cpp
index 604c1e20ffc..248d62fd3e6 100644
--- a/ydb/core/keyvalue/keyvalue_state_collect.cpp
+++ b/ydb/core/keyvalue/keyvalue_state_collect.cpp
@@ -6,7 +6,7 @@ namespace NKeyValue {
void TKeyValueState::PrepareCollectIfNeeded(const TActorContext &ctx) {
LOG_TRACE_S(ctx, NKikimrServices::KEYVALUE, "PrepareCollectIfNeeded KeyValue# " << TabletId << " Marker# KV61");
- if (CollectOperation.Get()) {
+ if (CollectOperation.Get() || InitialCollectsSent) {
// We already are trying to collect something, just pass this time.
return;
}
diff --git a/ydb/core/keyvalue/keyvalue_ut.cpp b/ydb/core/keyvalue/keyvalue_ut.cpp
index f2110acc1bb..cbac73b9daf 100644
--- a/ydb/core/keyvalue/keyvalue_ut.cpp
+++ b/ydb/core/keyvalue/keyvalue_ut.cpp
@@ -82,9 +82,10 @@ struct TTestContext {
SetupLogging(*Runtime);
SetupTabletServices(*Runtime);
setup(*Runtime);
- CreateTestBootstrapper(*Runtime,
+ TActorId bootstrapId = CreateTestBootstrapper(*Runtime,
CreateTestTabletInfo(TabletId, TabletType, TErasureType::ErasureNone),
&CreateKeyValueFlat);
+ Cerr << (TStringBuilder() << "BootstrapId# " << bootstrapId << Endl);
TDispatchOptions options;
options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot));
@@ -508,6 +509,7 @@ void ExecuteEvent(TDesiredPair<TRequestEvent> &dp, TTestContext &tc) {
tc.Runtime->ResetScheduledCount();
request = std::make_unique<TRequestEvent>();
request->Record = dp.Request;
+ Cerr << (TStringBuilder() << "Execute event# " << TypeName(*request) << Endl);
tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.release(), 0, GetPipeConfigWithRetries());
response = tc.Runtime->GrabEdgeEvent<typename TRequestEvent::TResponse>(handle);
dp.Response = response->Record;
@@ -869,6 +871,56 @@ Y_UNIT_TEST(TestWriteReadDeleteWithRestartsThenResponseOk) {
}
+Y_UNIT_TEST(TestWriteReadDeleteWithRestartsAndCatchCollectGarbageEvents) {
+ TTestContext tc;
+ TMaybe<TActorId> tabletActor;
+ bool firstCollect = true;
+ auto setup = [&] (TTestActorRuntime &runtime) {
+ runtime.SetObserverFunc([&](TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event) {
+ if (tabletActor && *tabletActor == event->Recipient && event->GetTypeRewrite() == TEvBlobStorage::TEvCollectGarbageResult::EventType) {
+ Cerr << (TStringBuilder() << "CollectGarbageResult!!! " << event->Sender << "->" << event->Recipient << " Cookie# " << event->Cookie << Endl);
+ }
+ if (tabletActor && *tabletActor == event->Sender && event->GetTypeRewrite() == TEvBlobStorage::TEvCollectGarbage::EventType) {
+ Cerr << (TStringBuilder() << "CollectGarbage!!! " << event->Sender << "->" << event->Recipient << " Cookie# " << event->Cookie << Endl);
+ if (!firstCollect) {
+ Cerr << (TStringBuilder() << "Drop!" << Endl);
+ return TTestActorRuntime::EEventAction::DROP;
+ }
+ }
+ if (tabletActor && *tabletActor == event->Recipient && event->GetTypeRewrite() == TEvKeyValue::TEvCollect::EventType) {
+ Cerr << (TStringBuilder() << "Collect!!! " << event->Sender << "->" << event->Recipient << " Cookie# " << event->Cookie << Endl);
+ if (firstCollect) {
+ Cerr << (TStringBuilder() << "Drop!" << Endl);
+ runtime.Send(new IEventHandle(event->Recipient, event->Recipient, new TKikimrEvents::TEvPoisonPill));
+ firstCollect = false;
+ return TTestActorRuntime::EEventAction::DROP;
+ }
+ }
+ return TTestActorRuntime::EEventAction::PROCESS;
+ });
+ runtime.SetRegistrationObserverFunc([&](TTestActorRuntimeBase& runtime, const TActorId& /*parentId*/, const TActorId& actorId) {
+ if (TypeName(*runtime.FindActor(actorId)) == "NKikimr::NKeyValue::TKeyValueFlat") {
+ tabletActor = actorId;
+ Cerr << (TStringBuilder() << "KV tablet was created " << actorId << Endl);
+ }
+ });
+ };
+ TFinalizer finalizer(tc);
+ bool activeZone = false;
+ tc.Prepare(INITIAL_TEST_DISPATCH_NAME, setup, activeZone);
+ ExecuteWrite(tc, {{"key", "value"}}, 0, 2, NKikimrKeyValue::Priorities::PRIORITY_REALTIME);
+ tc.Runtime->Send(new IEventHandle(*tabletActor, *tabletActor, new TKikimrEvents::TEvPoisonPill));
+ ExecuteWrite(tc, {{"key1", "value1"}}, 0, 2, NKikimrKeyValue::Priorities::PRIORITY_REALTIME);
+ ExecuteWrite(tc, {{"key2", "value2"}}, 0, 2, NKikimrKeyValue::Priorities::PRIORITY_REALTIME);
+ ExecuteRead(tc, "key", "value", 0, 0, 0);
+ ExecuteDeleteRange(tc, "key", EBorderKind::Include, "key", EBorderKind::Include, 0);
+ TDispatchOptions options;
+ options.FinalEvents.push_back(TKikimrEvents::TEvPoisonPill::EventType);
+ tc.Runtime->DispatchEvents(options);
+ ExecuteRead<NKikimrKeyValue::Statuses::RSTATUS_NO_DATA>(tc, "key", "", 0, 0, 0);
+ ExecuteRead(tc, "key1", "value1", 0, 0, 0);
+}
+
Y_UNIT_TEST(TestWriteReadDeleteWithRestartsThenResponseOkWithNewApi) {
TTestContext tc;
RunTestWithReboots(tc.TabletIds, [&]() {