diff options
author | yuryalekseev <yuryalekseev@yandex-team.com> | 2022-08-29 12:39:48 +0300 |
---|---|---|
committer | yuryalekseev <yuryalekseev@yandex-team.com> | 2022-08-29 12:39:48 +0300 |
commit | 050fcbdcfe1defb6f0eaa1926e59ad352dbc61c4 (patch) | |
tree | e2ee8443c33848d79772cbbc1acd3b349025d2b9 | |
parent | fa9d9d43f2a19e75b30e4c478e33a951506fbabe (diff) | |
download | ydb-050fcbdcfe1defb6f0eaa1926e59ad352dbc61c4.tar.gz |
Add support for self termination.
-rw-r--r-- | ydb/core/driver_lib/cli_utils/cli_cmds_server.cpp | 1 | ||||
-rw-r--r-- | ydb/core/driver_lib/run/kikimr_services_initializers.cpp | 2 | ||||
-rw-r--r-- | ydb/core/protos/config.proto | 8 | ||||
-rw-r--r-- | ydb/core/util/failure_injection.cpp | 76 | ||||
-rw-r--r-- | ydb/core/util/failure_injection.h | 4 |
5 files changed, 83 insertions, 8 deletions
diff --git a/ydb/core/driver_lib/cli_utils/cli_cmds_server.cpp b/ydb/core/driver_lib/cli_utils/cli_cmds_server.cpp index 63d4863b3a5..70dbcd97c34 100644 --- a/ydb/core/driver_lib/cli_utils/cli_cmds_server.cpp +++ b/ydb/core/driver_lib/cli_utils/cli_cmds_server.cpp @@ -458,6 +458,7 @@ protected: OPTION("alloc-file", AllocatorConfig); OPTION("yq-file", YandexQueryConfig); OPTION(nullptr, TracingConfig); + OPTION(nullptr, FailureInjectionConfig); if (!AppConfig.HasAllocatorConfig()) { AppConfig.MutableAllocatorConfig()->CopyFrom(*DummyAllocatorConfig()); diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp index 2f675786994..21d0a6efc12 100644 --- a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp +++ b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp @@ -1851,7 +1851,7 @@ TFailureInjectionInitializer::TFailureInjectionInitializer(const TKikimrRunConfi {} void TFailureInjectionInitializer::InitializeServices(NActors::TActorSystemSetup *setup, const NKikimr::TAppData *appData) { - IActor *actor = CreateFailureInjectionActor(); + IActor *actor = CreateFailureInjectionActor(Config.GetFailureInjectionConfig(), *appData); setup->LocalServices.emplace_back(MakeBlobStorageFailureInjectionID(NodeId), TActorSetupCmd(actor, TMailboxType::HTSwap, appData->UserPoolId)); // FIXME: correct service id diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto index f84c2a99e84..6bc5f12f4e8 100644 --- a/ydb/core/protos/config.proto +++ b/ydb/core/protos/config.proto @@ -708,6 +708,8 @@ message TFeatureFlags { optional bool EnablePredicateExtractForDataQueries = 68 [default = false]; optional bool EnableKqpPatternCacheLiteral = 69 [default = false]; optional bool EnableMoveIndex = 70 [default = false]; + // enable http handle for self termination + optional bool EnableFailureInjectionTermination = 71 [default = false]; } @@ -1529,6 +1531,11 @@ message TTracingConfig { optional string RootCA = 3; } +message TFailureInjectionConfig { + // approximate time in seconds between self terminations + optional uint32 ApproximateTerminationInterval = 1 [default = 0]; // disabled by default +} + // This message is used to upload custom service configs // to CMS. Config name is used to identify owner and // data format. @@ -1601,6 +1608,7 @@ message TAppConfig { optional THttpProxyConfig HttpProxyConfig = 53; optional TSchemeShardConfig SchemeShardConfig = 54; optional TTracingConfig TracingConfig = 55; + optional TFailureInjectionConfig FailureInjectionConfig = 56; repeated TNamedConfig NamedConfigs = 100; optional string ClusterYamlConfig = 101; diff --git a/ydb/core/util/failure_injection.cpp b/ydb/core/util/failure_injection.cpp index 9eeaaa968ba..4dcc7e598b1 100644 --- a/ydb/core/util/failure_injection.cpp +++ b/ydb/core/util/failure_injection.cpp @@ -1,13 +1,22 @@ #include "failure_injection.h" + +#include <ydb/core/base/appdata.h> +#include <ydb/core/protos/config.pb.h> #include <ydb/core/protos/services.pb.h> -#include <util/system/mutex.h> -#include <util/generic/queue.h> + #include <library/cpp/monlib/service/pages/templates.h> #include <library/cpp/lwtrace/all.h> +#include <library/cpp/actors/core/events.h> #include <library/cpp/actors/core/event_local.h> #include <library/cpp/actors/core/actor_bootstrapped.h> #include <library/cpp/actors/core/log.h> +#include <util/generic/queue.h> +#include <util/system/types.h> +#include <util/system/mutex.h> + +#include <random> + using namespace NActors; namespace NKikimr { @@ -117,14 +126,26 @@ namespace NKikimr { TVector<TString> Probes; TFailureInjectionManager Manager; bool Enabled = false; + bool EnableFailureInjectionTermination = false; + ui32 ApproximateTerminationInterval = 0; + + struct TEvPrivate { + enum EEv { + EvTerminateProcess = EventSpaceBegin(TEvents::ES_PRIVATE), + }; + + struct TEvTerminateProcess : TEventLocal<TEvTerminateProcess, EvTerminateProcess> {}; + }; public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::BS_FAILURE_INJECTION; } - TFailureInjectionActor() + TFailureInjectionActor(const NKikimrConfig::TFailureInjectionConfig& config, const NKikimr::TAppData& appData) : TraceManager(*Singleton<TProbeRegistry>(), true) + , EnableFailureInjectionTermination(appData.FeatureFlags.GetEnableFailureInjectionTermination()) + , ApproximateTerminationInterval(config.GetApproximateTerminationInterval()) {} void Bootstrap(const TActorContext& /*ctx*/) { @@ -145,6 +166,13 @@ namespace NKikimr { TCallback callback(Probes); TraceManager.ReadProbes(callback); + if (EnableFailureInjectionTermination && ApproximateTerminationInterval > 0) { + std::random_device rd; + std::mt19937 rng(rd()); + std::poisson_distribution<> poisson(ApproximateTerminationInterval); + Schedule(TDuration::Seconds(poisson(rng)), new TEvPrivate::TEvTerminateProcess()); + } + Become(&TFailureInjectionActor::StateFunc); } @@ -174,14 +202,49 @@ namespace NKikimr { } } + void TerminateProcess() { + if (EnableFailureInjectionTermination) { + Y_FAIL("Terminating itself from TFailureInjectionActor"); + } + } + + void SendReplyAndTerminateProcess(NMon::TEvHttpInfo::TPtr& ev, const TActorContext& ctx) { + TStringStream str; + + HTML(str) { + DIV() { + if (EnableFailureInjectionTermination) { + str << "<h1>" << "Process is going to terminate" << "</font></h1>"; + } else { + str << "<h1>" << "Process termination is not enabled" << "</font></h1>"; + } + } + } + + ctx.Send(ev->Sender, new NMon::TEvHttpInfoRes(str.Str(), ev->Get()->SubRequestId)); + + TerminateProcess(); + } + void HandlePoison(TEvents::TEvPoisonPill::TPtr& ev, const TActorContext& ctx) { ctx.Send(ev->Sender, new TEvents::TEvPoisonTaken); Die(ctx); } + void HandleTermination(TEvPrivate::TEvTerminateProcess::TPtr&) { + TerminateProcess(); + } + void Handle(NMon::TEvHttpInfo::TPtr& ev, const TActorContext& ctx) { - TStringStream str; + if (ev->Get()->Request.GetMethod() == HTTP_METHOD_POST) { + if (ev->Get()->Request.GetPostParams().Has("terminate")) { + SendReplyAndTerminateProcess(ev, ctx); + } + + return; + } + TStringStream str; const auto& params = ev->Get()->Request.GetParams(); if (params.Has("queue")) { TString queue = params.Get("queue"); @@ -296,13 +359,14 @@ namespace NKikimr { STRICT_STFUNC(StateFunc, HFunc(TEvents::TEvPoisonPill, HandlePoison) HFunc(NMon::TEvHttpInfo, Handle) + hFunc(TEvPrivate::TEvTerminateProcess, HandleTermination); ) }; } // anon - IActor *CreateFailureInjectionActor() { - return new TFailureInjectionActor(); + IActor *CreateFailureInjectionActor(const NKikimrConfig::TFailureInjectionConfig& config, const NKikimr::TAppData& appData) { + return new TFailureInjectionActor(config, appData); } } // NKikimr diff --git a/ydb/core/util/failure_injection.h b/ydb/core/util/failure_injection.h index 65d3397d534..f5ead9ede06 100644 --- a/ydb/core/util/failure_injection.h +++ b/ydb/core/util/failure_injection.h @@ -1,10 +1,12 @@ #pragma once #include "defs.h" +#include <ydb/core/base/appdata.h> +#include <ydb/core/protos/config.pb.h> #include <library/cpp/actors/core/actor.h> namespace NKikimr { - NActors::IActor *CreateFailureInjectionActor(); + NActors::IActor *CreateFailureInjectionActor(const NKikimrConfig::TFailureInjectionConfig& config, const NKikimr::TAppData& appData); } // NKikimr |