diff options
author | Aleksandr Kriukov <[email protected]> | 2022-04-12 16:28:50 +0300 |
---|---|---|
committer | Aleksandr Kriukov <[email protected]> | 2022-04-12 16:28:50 +0300 |
commit | d611e42d1b0694e12b846e10bdaa47247c95a9ba (patch) | |
tree | bc1f87b9f52857e164aea2bec339e136d759f8be | |
parent | d731aa4b191e1f646ad26e7b7d440bc30dfccdab (diff) |
Fix race of collecting garbage in KV tablet, KIKIMR-14680
ref:b81b7fc11a863ba4899bc337fe666ec02d4f86d6
-rw-r--r-- | ydb/core/keyvalue/keyvalue_collector.cpp | 2 | ||||
-rw-r--r-- | ydb/core/keyvalue/keyvalue_flat_impl.h | 24 | ||||
-rw-r--r-- | ydb/core/keyvalue/keyvalue_state.cpp | 28 | ||||
-rw-r--r-- | ydb/core/keyvalue/keyvalue_state.h | 6 | ||||
-rw-r--r-- | ydb/core/keyvalue/keyvalue_state_collect.cpp | 2 | ||||
-rw-r--r-- | ydb/core/keyvalue/keyvalue_ut.cpp | 54 |
6 files changed, 101 insertions, 15 deletions
diff --git a/ydb/core/keyvalue/keyvalue_collector.cpp b/ydb/core/keyvalue/keyvalue_collector.cpp index 8b7696bcb25..6aa83dfd203 100644 --- a/ydb/core/keyvalue/keyvalue_collector.cpp +++ b/ydb/core/keyvalue/keyvalue_collector.cpp @@ -177,7 +177,7 @@ public: new TEvBlobStorage::TEvCollectGarbage(TabletInfo->TabletID, RecordGeneration, PerGenerationCounter, channelIdx, true, CollectOperation->Header.CollectGeneration, CollectOperation->Header.CollectStep, - keep.Release(), doNotKeep.Release(), TInstant::Max(), true), 0); + keep.Release(), doNotKeep.Release(), TInstant::Max(), true), (ui64)TKeyValueState::ECollectCookie::Soft); } STFUNC(StateWait) { diff --git a/ydb/core/keyvalue/keyvalue_flat_impl.h b/ydb/core/keyvalue/keyvalue_flat_impl.h index ae30344e143..859e07d4de9 100644 --- a/ydb/core/keyvalue/keyvalue_flat_impl.h +++ b/ydb/core/keyvalue/keyvalue_flat_impl.h @@ -368,8 +368,28 @@ protected: void Handle(TEvBlobStorage::TEvCollectGarbageResult::TPtr &ev, const TActorContext &ctx) { Y_UNUSED(ev); LOG_DEBUG_S(ctx, NKikimrServices::KEYVALUE, "KeyValue# " << TabletID() - << " Handle TEvCollectGarbageResult Marker# KV52"); - State.RegisterInitialCollectResult(ctx); + << " Handle TEvCollectGarbageResult Cookie# " << ev->Cookie << " Marker# KV52"); + if (ev->Cookie == (ui64)TKeyValueState::ECollectCookie::Hard) { + return; + } + + if (ev->Cookie == (ui64)TKeyValueState::ECollectCookie::SoftInitial) { + NKikimrProto::EReplyStatus status = ev->Get()->Status; + if (status == NKikimrProto::OK) { + State.RegisterInitialCollectResult(ctx); + } else { + LOG_ERROR_S(ctx, NKikimrServices::KEYVALUE, "KeyValue# " << TabletID() + << " received not ok TEvCollectGarbageResult" + << " Status# " << NKikimrProto::EReplyStatus_Name(status) + << " ErrorReason# " << ev->Get()->ErrorReason); + Send(SelfId(), new TKikimrEvents::TEvPoisonPill); + } + return; + } + + LOG_CRIT_S(ctx, NKikimrServices::KEYVALUE, "KeyValue# " << TabletID() + << " received TEvCollectGarbageResult with unexpected Cookie# " << ev->Cookie); + Send(SelfId(), new TKikimrEvents::TEvPoisonPill); } void Handle(TEvKeyValue::TEvRequest::TPtr ev, const TActorContext &ctx) { diff --git a/ydb/core/keyvalue/keyvalue_state.cpp b/ydb/core/keyvalue/keyvalue_state.cpp index c344d37b62d..92818898f19 100644 --- a/ydb/core/keyvalue/keyvalue_state.cpp +++ b/ydb/core/keyvalue/keyvalue_state.cpp @@ -563,7 +563,7 @@ void TKeyValueState::InitExecute(ui64 tabletId, TActorId keyValueActorId, ui32 e const TActorId nodeWarden = MakeBlobStorageNodeWardenID(ctx.SelfID.NodeId()); const TActorId proxy = MakeBlobStorageProxyID(group); ctx.ExecutorThread.Send(new IEventHandle(proxy, TActorId(), ev.Release(), - IEventHandle::FlagForwardOnNondelivery, 0, &nodeWarden)); + IEventHandle::FlagForwardOnNondelivery, (ui64)TKeyValueState::ECollectCookie::Hard, &nodeWarden)); } } @@ -628,7 +628,7 @@ void TKeyValueState::InitExecute(ui64 tabletId, TActorId keyValueActorId, ui32 e const TActorId nodeWarden = MakeBlobStorageNodeWardenID(ctx.SelfID.NodeId()); const TActorId proxy = MakeBlobStorageProxyID(group); ctx.ExecutorThread.Send(new IEventHandle(proxy, KeyValueActorId, ev.Release(), - IEventHandle::FlagForwardOnNondelivery, 0, &nodeWarden)); + IEventHandle::FlagForwardOnNondelivery, (ui64)TKeyValueState::ECollectCookie::SoftInitial, &nodeWarden)); } } @@ -701,14 +701,14 @@ void TKeyValueState::InitExecute(ui64 tabletId, TActorId keyValueActorId, ui32 e // corner case, if no CollectGarbage events were sent if (InitialCollectsSent == 0) { SendCutHistory(ctx); - } - if (CollectOperation.Get()) { - // finish collect operation from local base - IsCollectEventSent = true; - StoreCollectComplete(ctx); - } else { - // initiate collection if trash was loaded from local base - PrepareCollectIfNeeded(ctx); + if (CollectOperation.Get()) { + // finish collect operation from local base + IsCollectEventSent = true; + StoreCollectComplete(ctx); + } else { + // initiate collection if trash was loaded from local base + PrepareCollectIfNeeded(ctx); + } } } @@ -717,6 +717,14 @@ void TKeyValueState::RegisterInitialCollectResult(const TActorContext &ctx) { << " InitialCollectsSent# " << InitialCollectsSent << " Marker# KV50"); if (--InitialCollectsSent == 0) { SendCutHistory(ctx); + if (CollectOperation.Get()) { + // finish collect operation from local base + IsCollectEventSent = true; + StoreCollectComplete(ctx); + } else { + // initiate collection if trash was loaded from local base + PrepareCollectIfNeeded(ctx); + } } } diff --git a/ydb/core/keyvalue/keyvalue_state.h b/ydb/core/keyvalue/keyvalue_state.h index 8052258941f..98d6023fdaa 100644 --- a/ydb/core/keyvalue/keyvalue_state.h +++ b/ydb/core/keyvalue/keyvalue_state.h @@ -232,6 +232,12 @@ public: } }; + enum class ECollectCookie { + Hard = 1, + SoftInitial = 2, + Soft = 3, + }; + ui32 GetGeneration() const { return StoredState.UserGeneration; } diff --git a/ydb/core/keyvalue/keyvalue_state_collect.cpp b/ydb/core/keyvalue/keyvalue_state_collect.cpp index 604c1e20ffc..248d62fd3e6 100644 --- a/ydb/core/keyvalue/keyvalue_state_collect.cpp +++ b/ydb/core/keyvalue/keyvalue_state_collect.cpp @@ -6,7 +6,7 @@ namespace NKeyValue { void TKeyValueState::PrepareCollectIfNeeded(const TActorContext &ctx) { LOG_TRACE_S(ctx, NKikimrServices::KEYVALUE, "PrepareCollectIfNeeded KeyValue# " << TabletId << " Marker# KV61"); - if (CollectOperation.Get()) { + if (CollectOperation.Get() || InitialCollectsSent) { // We already are trying to collect something, just pass this time. return; } diff --git a/ydb/core/keyvalue/keyvalue_ut.cpp b/ydb/core/keyvalue/keyvalue_ut.cpp index f2110acc1bb..cbac73b9daf 100644 --- a/ydb/core/keyvalue/keyvalue_ut.cpp +++ b/ydb/core/keyvalue/keyvalue_ut.cpp @@ -82,9 +82,10 @@ struct TTestContext { SetupLogging(*Runtime); SetupTabletServices(*Runtime); setup(*Runtime); - CreateTestBootstrapper(*Runtime, + TActorId bootstrapId = CreateTestBootstrapper(*Runtime, CreateTestTabletInfo(TabletId, TabletType, TErasureType::ErasureNone), &CreateKeyValueFlat); + Cerr << (TStringBuilder() << "BootstrapId# " << bootstrapId << Endl); TDispatchOptions options; options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot)); @@ -508,6 +509,7 @@ void ExecuteEvent(TDesiredPair<TRequestEvent> &dp, TTestContext &tc) { tc.Runtime->ResetScheduledCount(); request = std::make_unique<TRequestEvent>(); request->Record = dp.Request; + Cerr << (TStringBuilder() << "Execute event# " << TypeName(*request) << Endl); tc.Runtime->SendToPipe(tc.TabletId, tc.Edge, request.release(), 0, GetPipeConfigWithRetries()); response = tc.Runtime->GrabEdgeEvent<typename TRequestEvent::TResponse>(handle); dp.Response = response->Record; @@ -869,6 +871,56 @@ Y_UNIT_TEST(TestWriteReadDeleteWithRestartsThenResponseOk) { } +Y_UNIT_TEST(TestWriteReadDeleteWithRestartsAndCatchCollectGarbageEvents) { + TTestContext tc; + TMaybe<TActorId> tabletActor; + bool firstCollect = true; + auto setup = [&] (TTestActorRuntime &runtime) { + runtime.SetObserverFunc([&](TTestActorRuntimeBase& runtime, TAutoPtr<IEventHandle>& event) { + if (tabletActor && *tabletActor == event->Recipient && event->GetTypeRewrite() == TEvBlobStorage::TEvCollectGarbageResult::EventType) { + Cerr << (TStringBuilder() << "CollectGarbageResult!!! " << event->Sender << "->" << event->Recipient << " Cookie# " << event->Cookie << Endl); + } + if (tabletActor && *tabletActor == event->Sender && event->GetTypeRewrite() == TEvBlobStorage::TEvCollectGarbage::EventType) { + Cerr << (TStringBuilder() << "CollectGarbage!!! " << event->Sender << "->" << event->Recipient << " Cookie# " << event->Cookie << Endl); + if (!firstCollect) { + Cerr << (TStringBuilder() << "Drop!" << Endl); + return TTestActorRuntime::EEventAction::DROP; + } + } + if (tabletActor && *tabletActor == event->Recipient && event->GetTypeRewrite() == TEvKeyValue::TEvCollect::EventType) { + Cerr << (TStringBuilder() << "Collect!!! " << event->Sender << "->" << event->Recipient << " Cookie# " << event->Cookie << Endl); + if (firstCollect) { + Cerr << (TStringBuilder() << "Drop!" << Endl); + runtime.Send(new IEventHandle(event->Recipient, event->Recipient, new TKikimrEvents::TEvPoisonPill)); + firstCollect = false; + return TTestActorRuntime::EEventAction::DROP; + } + } + return TTestActorRuntime::EEventAction::PROCESS; + }); + runtime.SetRegistrationObserverFunc([&](TTestActorRuntimeBase& runtime, const TActorId& /*parentId*/, const TActorId& actorId) { + if (TypeName(*runtime.FindActor(actorId)) == "NKikimr::NKeyValue::TKeyValueFlat") { + tabletActor = actorId; + Cerr << (TStringBuilder() << "KV tablet was created " << actorId << Endl); + } + }); + }; + TFinalizer finalizer(tc); + bool activeZone = false; + tc.Prepare(INITIAL_TEST_DISPATCH_NAME, setup, activeZone); + ExecuteWrite(tc, {{"key", "value"}}, 0, 2, NKikimrKeyValue::Priorities::PRIORITY_REALTIME); + tc.Runtime->Send(new IEventHandle(*tabletActor, *tabletActor, new TKikimrEvents::TEvPoisonPill)); + ExecuteWrite(tc, {{"key1", "value1"}}, 0, 2, NKikimrKeyValue::Priorities::PRIORITY_REALTIME); + ExecuteWrite(tc, {{"key2", "value2"}}, 0, 2, NKikimrKeyValue::Priorities::PRIORITY_REALTIME); + ExecuteRead(tc, "key", "value", 0, 0, 0); + ExecuteDeleteRange(tc, "key", EBorderKind::Include, "key", EBorderKind::Include, 0); + TDispatchOptions options; + options.FinalEvents.push_back(TKikimrEvents::TEvPoisonPill::EventType); + tc.Runtime->DispatchEvents(options); + ExecuteRead<NKikimrKeyValue::Statuses::RSTATUS_NO_DATA>(tc, "key", "", 0, 0, 0); + ExecuteRead(tc, "key1", "value1", 0, 0, 0); +} + Y_UNIT_TEST(TestWriteReadDeleteWithRestartsThenResponseOkWithNewApi) { TTestContext tc; RunTestWithReboots(tc.TabletIds, [&]() { |