aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryuryalekseev <yuryalekseev@yandex-team.com>2022-08-29 12:39:48 +0300
committeryuryalekseev <yuryalekseev@yandex-team.com>2022-08-29 12:39:48 +0300
commit050fcbdcfe1defb6f0eaa1926e59ad352dbc61c4 (patch)
treee2ee8443c33848d79772cbbc1acd3b349025d2b9
parentfa9d9d43f2a19e75b30e4c478e33a951506fbabe (diff)
downloadydb-050fcbdcfe1defb6f0eaa1926e59ad352dbc61c4.tar.gz
Add support for self termination.
-rw-r--r--ydb/core/driver_lib/cli_utils/cli_cmds_server.cpp1
-rw-r--r--ydb/core/driver_lib/run/kikimr_services_initializers.cpp2
-rw-r--r--ydb/core/protos/config.proto8
-rw-r--r--ydb/core/util/failure_injection.cpp76
-rw-r--r--ydb/core/util/failure_injection.h4
5 files changed, 83 insertions, 8 deletions
diff --git a/ydb/core/driver_lib/cli_utils/cli_cmds_server.cpp b/ydb/core/driver_lib/cli_utils/cli_cmds_server.cpp
index 63d4863b3a5..70dbcd97c34 100644
--- a/ydb/core/driver_lib/cli_utils/cli_cmds_server.cpp
+++ b/ydb/core/driver_lib/cli_utils/cli_cmds_server.cpp
@@ -458,6 +458,7 @@ protected:
OPTION("alloc-file", AllocatorConfig);
OPTION("yq-file", YandexQueryConfig);
OPTION(nullptr, TracingConfig);
+ OPTION(nullptr, FailureInjectionConfig);
if (!AppConfig.HasAllocatorConfig()) {
AppConfig.MutableAllocatorConfig()->CopyFrom(*DummyAllocatorConfig());
diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp
index 2f675786994..21d0a6efc12 100644
--- a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp
+++ b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp
@@ -1851,7 +1851,7 @@ TFailureInjectionInitializer::TFailureInjectionInitializer(const TKikimrRunConfi
{}
void TFailureInjectionInitializer::InitializeServices(NActors::TActorSystemSetup *setup, const NKikimr::TAppData *appData) {
- IActor *actor = CreateFailureInjectionActor();
+ IActor *actor = CreateFailureInjectionActor(Config.GetFailureInjectionConfig(), *appData);
setup->LocalServices.emplace_back(MakeBlobStorageFailureInjectionID(NodeId),
TActorSetupCmd(actor, TMailboxType::HTSwap, appData->UserPoolId));
// FIXME: correct service id
diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto
index f84c2a99e84..6bc5f12f4e8 100644
--- a/ydb/core/protos/config.proto
+++ b/ydb/core/protos/config.proto
@@ -708,6 +708,8 @@ message TFeatureFlags {
optional bool EnablePredicateExtractForDataQueries = 68 [default = false];
optional bool EnableKqpPatternCacheLiteral = 69 [default = false];
optional bool EnableMoveIndex = 70 [default = false];
+ // enable http handle for self termination
+ optional bool EnableFailureInjectionTermination = 71 [default = false];
}
@@ -1529,6 +1531,11 @@ message TTracingConfig {
optional string RootCA = 3;
}
+message TFailureInjectionConfig {
+ // approximate time in seconds between self terminations
+ optional uint32 ApproximateTerminationInterval = 1 [default = 0]; // disabled by default
+}
+
// This message is used to upload custom service configs
// to CMS. Config name is used to identify owner and
// data format.
@@ -1601,6 +1608,7 @@ message TAppConfig {
optional THttpProxyConfig HttpProxyConfig = 53;
optional TSchemeShardConfig SchemeShardConfig = 54;
optional TTracingConfig TracingConfig = 55;
+ optional TFailureInjectionConfig FailureInjectionConfig = 56;
repeated TNamedConfig NamedConfigs = 100;
optional string ClusterYamlConfig = 101;
diff --git a/ydb/core/util/failure_injection.cpp b/ydb/core/util/failure_injection.cpp
index 9eeaaa968ba..4dcc7e598b1 100644
--- a/ydb/core/util/failure_injection.cpp
+++ b/ydb/core/util/failure_injection.cpp
@@ -1,13 +1,22 @@
#include "failure_injection.h"
+
+#include <ydb/core/base/appdata.h>
+#include <ydb/core/protos/config.pb.h>
#include <ydb/core/protos/services.pb.h>
-#include <util/system/mutex.h>
-#include <util/generic/queue.h>
+
#include <library/cpp/monlib/service/pages/templates.h>
#include <library/cpp/lwtrace/all.h>
+#include <library/cpp/actors/core/events.h>
#include <library/cpp/actors/core/event_local.h>
#include <library/cpp/actors/core/actor_bootstrapped.h>
#include <library/cpp/actors/core/log.h>
+#include <util/generic/queue.h>
+#include <util/system/types.h>
+#include <util/system/mutex.h>
+
+#include <random>
+
using namespace NActors;
namespace NKikimr {
@@ -117,14 +126,26 @@ namespace NKikimr {
TVector<TString> Probes;
TFailureInjectionManager Manager;
bool Enabled = false;
+ bool EnableFailureInjectionTermination = false;
+ ui32 ApproximateTerminationInterval = 0;
+
+ struct TEvPrivate {
+ enum EEv {
+ EvTerminateProcess = EventSpaceBegin(TEvents::ES_PRIVATE),
+ };
+
+ struct TEvTerminateProcess : TEventLocal<TEvTerminateProcess, EvTerminateProcess> {};
+ };
public:
static constexpr NKikimrServices::TActivity::EType ActorActivityType() {
return NKikimrServices::TActivity::BS_FAILURE_INJECTION;
}
- TFailureInjectionActor()
+ TFailureInjectionActor(const NKikimrConfig::TFailureInjectionConfig& config, const NKikimr::TAppData& appData)
: TraceManager(*Singleton<TProbeRegistry>(), true)
+ , EnableFailureInjectionTermination(appData.FeatureFlags.GetEnableFailureInjectionTermination())
+ , ApproximateTerminationInterval(config.GetApproximateTerminationInterval())
{}
void Bootstrap(const TActorContext& /*ctx*/) {
@@ -145,6 +166,13 @@ namespace NKikimr {
TCallback callback(Probes);
TraceManager.ReadProbes(callback);
+ if (EnableFailureInjectionTermination && ApproximateTerminationInterval > 0) {
+ std::random_device rd;
+ std::mt19937 rng(rd());
+ std::poisson_distribution<> poisson(ApproximateTerminationInterval);
+ Schedule(TDuration::Seconds(poisson(rng)), new TEvPrivate::TEvTerminateProcess());
+ }
+
Become(&TFailureInjectionActor::StateFunc);
}
@@ -174,14 +202,49 @@ namespace NKikimr {
}
}
+ void TerminateProcess() {
+ if (EnableFailureInjectionTermination) {
+ Y_FAIL("Terminating itself from TFailureInjectionActor");
+ }
+ }
+
+ void SendReplyAndTerminateProcess(NMon::TEvHttpInfo::TPtr& ev, const TActorContext& ctx) {
+ TStringStream str;
+
+ HTML(str) {
+ DIV() {
+ if (EnableFailureInjectionTermination) {
+ str << "<h1>" << "Process is going to terminate" << "</font></h1>";
+ } else {
+ str << "<h1>" << "Process termination is not enabled" << "</font></h1>";
+ }
+ }
+ }
+
+ ctx.Send(ev->Sender, new NMon::TEvHttpInfoRes(str.Str(), ev->Get()->SubRequestId));
+
+ TerminateProcess();
+ }
+
void HandlePoison(TEvents::TEvPoisonPill::TPtr& ev, const TActorContext& ctx) {
ctx.Send(ev->Sender, new TEvents::TEvPoisonTaken);
Die(ctx);
}
+ void HandleTermination(TEvPrivate::TEvTerminateProcess::TPtr&) {
+ TerminateProcess();
+ }
+
void Handle(NMon::TEvHttpInfo::TPtr& ev, const TActorContext& ctx) {
- TStringStream str;
+ if (ev->Get()->Request.GetMethod() == HTTP_METHOD_POST) {
+ if (ev->Get()->Request.GetPostParams().Has("terminate")) {
+ SendReplyAndTerminateProcess(ev, ctx);
+ }
+
+ return;
+ }
+ TStringStream str;
const auto& params = ev->Get()->Request.GetParams();
if (params.Has("queue")) {
TString queue = params.Get("queue");
@@ -296,13 +359,14 @@ namespace NKikimr {
STRICT_STFUNC(StateFunc,
HFunc(TEvents::TEvPoisonPill, HandlePoison)
HFunc(NMon::TEvHttpInfo, Handle)
+ hFunc(TEvPrivate::TEvTerminateProcess, HandleTermination);
)
};
} // anon
- IActor *CreateFailureInjectionActor() {
- return new TFailureInjectionActor();
+ IActor *CreateFailureInjectionActor(const NKikimrConfig::TFailureInjectionConfig& config, const NKikimr::TAppData& appData) {
+ return new TFailureInjectionActor(config, appData);
}
} // NKikimr
diff --git a/ydb/core/util/failure_injection.h b/ydb/core/util/failure_injection.h
index 65d3397d534..f5ead9ede06 100644
--- a/ydb/core/util/failure_injection.h
+++ b/ydb/core/util/failure_injection.h
@@ -1,10 +1,12 @@
#pragma once
#include "defs.h"
+#include <ydb/core/base/appdata.h>
+#include <ydb/core/protos/config.pb.h>
#include <library/cpp/actors/core/actor.h>
namespace NKikimr {
- NActors::IActor *CreateFailureInjectionActor();
+ NActors::IActor *CreateFailureInjectionActor(const NKikimrConfig::TFailureInjectionConfig& config, const NKikimr::TAppData& appData);
} // NKikimr