diff options
author | hcpp <hcpp@ydb.tech> | 2022-11-07 09:42:53 +0300 |
---|---|---|
committer | hcpp <hcpp@ydb.tech> | 2022-11-07 09:42:53 +0300 |
commit | a8f6baed8b40e0ca1eda9692d9b3eeed727a3098 (patch) | |
tree | 12c1ca9fccc37639730c7cf3ff4a6d7a44eafbaa | |
parent | a18f362530891f312ab6fd30d28853ca1d81e20d (diff) | |
download | ydb-a8f6baed8b40e0ca1eda9692d9b3eeed727a3098.tar.gz |
retries has been added for get folder
-rw-r--r-- | ydb/core/yq/libs/control_plane_proxy/control_plane_proxy.cpp | 51 |
1 files changed, 45 insertions, 6 deletions
diff --git a/ydb/core/yq/libs/control_plane_proxy/control_plane_proxy.cpp b/ydb/core/yq/libs/control_plane_proxy/control_plane_proxy.cpp index ebc34c2b6cf..be60c5c780f 100644 --- a/ydb/core/yq/libs/control_plane_proxy/control_plane_proxy.cpp +++ b/ydb/core/yq/libs/control_plane_proxy/control_plane_proxy.cpp @@ -33,9 +33,10 @@ #include <library/cpp/lwtrace/mon/mon_lwtrace.h> #include <ydb/library/security/util.h> +#include <library/cpp/monlib/service/pages/templates.h> +#include <library/cpp/retry/retry_policy.h> #include <ydb/core/base/appdata.h> #include <ydb/core/mon/mon.h> -#include <library/cpp/monlib/service/pages/templates.h> #include <ydb/library/folder_service/folder_service.h> #include <ydb/library/folder_service/events.h> @@ -61,6 +62,7 @@ struct TRequestScopeCounters: public virtual TThrRefBase { ::NMonitoring::TDynamicCounters::TCounterPtr Ok; ::NMonitoring::TDynamicCounters::TCounterPtr Error; ::NMonitoring::TDynamicCounters::TCounterPtr Timeout; + ::NMonitoring::TDynamicCounters::TCounterPtr Retry; explicit TRequestScopeCounters(const TString& name) : Name(name) @@ -73,6 +75,7 @@ struct TRequestScopeCounters: public virtual TThrRefBase { Ok = subgroup->GetCounter("Ok", true); Error = subgroup->GetCounter("Error", true); Timeout = subgroup->GetCounter("Timeout", true); + Timeout = subgroup->GetCounter("Retry", true); } virtual ~TRequestScopeCounters() override { @@ -93,6 +96,7 @@ struct TRequestCommonCounters: public virtual TThrRefBase { ::NMonitoring::TDynamicCounters::TCounterPtr Ok; ::NMonitoring::TDynamicCounters::TCounterPtr Error; ::NMonitoring::TDynamicCounters::TCounterPtr Timeout; + ::NMonitoring::TDynamicCounters::TCounterPtr Retry; ::NMonitoring::THistogramPtr LatencyMs; explicit TRequestCommonCounters(const TString& name) @@ -106,6 +110,7 @@ struct TRequestCommonCounters: public virtual TThrRefBase { Ok = subgroup->GetCounter("Ok", true); Error = subgroup->GetCounter("Error", true); Timeout = subgroup->GetCounter("Timeout", true); + Retry = subgroup->GetCounter("Retry", true); LatencyMs = subgroup->GetHistogram("LatencyMs", GetLatencyHistogramBuckets()); } @@ -165,6 +170,16 @@ struct TRequestCounters { Scope->Timeout->Dec(); Common->Timeout->Dec(); } + + void IncRetry() { + Scope->Retry->Inc(); + Common->Retry->Inc(); + } + + void DecRetry() { + Scope->Retry->Dec(); + Common->Retry->Dec(); + } }; template<class TEventRequest, class TResponseProxy> @@ -220,6 +235,7 @@ class TResolveFolderActor : public NActors::TActorBootstrapped<TResolveFolderAct using TBase::PassAway; using TBase::Become; using TBase::Register; + using IRetryPolicy = IRetryPolicy<NKikimr::NFolderService::TEvFolderService::TEvGetFolderResponse::TPtr&>; ::NYq::TControlPlaneProxyConfig Config; TActorId Sender; @@ -231,6 +247,8 @@ class TResolveFolderActor : public NActors::TActorBootstrapped<TResolveFolderAct ui32 Cookie; TInstant StartTime; bool GetQuotas; + IRetryPolicy::IRetryState::TPtr RetryState; + public: TResolveFolderActor(const TRequestCommonCountersPtr& counters, @@ -249,6 +267,7 @@ public: , Cookie(cookie) , StartTime(TInstant::Now()) , GetQuotas(getQuotas) + , RetryState(GetRetryPolicy()->CreateRetryState()) {} static constexpr char ActorName[] = "YQ_CONTROL_PLANE_PROXY_RESOLVE_FOLDER"; @@ -256,11 +275,15 @@ public: void Bootstrap() { CPP_LOG_T("Resolve folder bootstrap. Folder id: " << FolderId << " Actor id: " << SelfId()); Become(&TResolveFolderActor::StateFunc, Config.RequestTimeout, new NActors::TEvents::TEvWakeup()); + Counters->InFly->Inc(); + Send(NKikimr::NFolderService::FolderServiceActorId(), CreateRequest().release(), 0, 0); + } + + std::unique_ptr<NKikimr::NFolderService::TEvFolderService::TEvGetFolderRequest> CreateRequest() { auto request = std::make_unique<NKikimr::NFolderService::TEvFolderService::TEvGetFolderRequest>(); request->Request.set_folder_id(FolderId); request->Token = Token; - Counters->InFly->Inc(); - Send(NKikimr::NFolderService::FolderServiceActorId(), request.release(), 0, 0); + return request; } STRICT_STFUNC(StateFunc, @@ -286,15 +309,21 @@ public: Counters->LatencyMs->Collect((TInstant::Now() - StartTime).MilliSeconds()); const auto& response = ev->Get()->Response; const auto& status = ev->Get()->Status; - if (!status.Ok() || !ev->Get()->Response.has_folder()) { - Counters->Error->Inc(); + if (!status.Ok() || !response.has_folder()) { TString errorMessage = "Msg: " + status.Msg + " Details: " + status.Details + " Code: " + ToString(status.GRpcStatusCode) + " InternalError: " + ToString(status.InternalError); + auto delay = RetryState->GetNextRetryDelay(ev); + if (delay) { + Counters->Retry->Inc(); + CPP_LOG_E("Folder resolve error. Retry with delay " << *delay << ", " << errorMessage); + TActivationContext::Schedule(*delay, new IEventHandle(NKikimr::NFolderService::FolderServiceActorId(), static_cast<const TActorId&>(SelfId()), CreateRequest().release())); + return; + } + Counters->Error->Inc(); CPP_LOG_E(errorMessage); NYql::TIssues issues; NYql::TIssue issue = MakeErrorIssue(TIssuesIds::INTERNAL_ERROR, "Resolve folder error"); issues.AddIssue(issue); Counters->Error->Inc(); - Counters->Timeout->Inc(); const TDuration delta = TInstant::Now() - StartTime; Probe(delta, false, false); Send(Sender, new TResponseProxy(issues), 0, Cookie); @@ -314,6 +343,16 @@ public: } PassAway(); } + +private: + static const IRetryPolicy::TPtr& GetRetryPolicy() { + static IRetryPolicy::TPtr policy = IRetryPolicy::GetExponentialBackoffPolicy([](NKikimr::NFolderService::TEvFolderService::TEvGetFolderResponse::TPtr& ev) { + const auto& response = ev->Get()->Response; + const auto& status = ev->Get()->Status; + return !status.Ok() || !response.has_folder() ? ERetryErrorClass::ShortRetry : ERetryErrorClass::NoRetry; + }, TDuration::MilliSeconds(10), TDuration::MilliSeconds(200), TDuration::Seconds(30), 5); + return policy; + } }; template<class TRequestProto, class TRequest, class TResponse, class TResponseProxy> |