diff options
author | hor911 <hor911@ydb.tech> | 2022-07-23 11:14:08 +0300 |
---|---|---|
committer | hor911 <hor911@ydb.tech> | 2022-07-23 11:14:08 +0300 |
commit | 4fdcbb93872d8192072780d5b346e91d229fa951 (patch) | |
tree | 0959e6a40295908520037e1b9c73b075d07ee9a6 | |
parent | 5764e2a8718a138e1caac2e052c977e83ab950a4 (diff) | |
download | ydb-4fdcbb93872d8192072780d5b346e91d229fa951.tar.gz |
RetryCount sensors
3 files changed, 22 insertions, 15 deletions
diff --git a/ydb/core/yq/libs/control_plane_storage/internal/task_get.cpp b/ydb/core/yq/libs/control_plane_storage/internal/task_get.cpp index 18f7295aa33..be3dcc39d08 100644 --- a/ydb/core/yq/libs/control_plane_storage/internal/task_get.cpp +++ b/ydb/core/yq/libs/control_plane_storage/internal/task_get.cpp @@ -205,7 +205,8 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvGetTaskRequ ); auto responseTasks = std::make_shared<TResponseTasks>(); - auto prepareParams = [=, actorSystem=NActors::TActivationContext::ActorSystem(), responseTasks=responseTasks](const TVector<TResultSet>& resultSets) mutable { + + auto prepareParams = [=, rootCounters=Counters.Counters, actorSystem=NActors::TActivationContext::ActorSystem(), responseTasks=responseTasks](const TVector<TResultSet>& resultSets) mutable { TVector<TTaskInternal> tasks; TVector<TPickTaskParams> pickTaskParams; const auto now = TInstant::Now(); @@ -240,6 +241,8 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvGetTaskRequ taskInternal.ShouldAbortTask = !taskInternal.RetryLimiter.UpdateOnRetry(lastSeenAt, Config.TaskLeaseRetryPolicy, now); } + *rootCounters->GetSubgroup("scope", task.Scope)->GetSubgroup("query_id", task.QueryId)->GetCounter("RetryCount") = taskInternal.RetryLimiter.RetryCount; + CPS_LOG_AS_T(*actorSystem, "Task (Query): " << task.QueryId << " RetryRate: " << taskInternal.RetryLimiter.RetryRate << " RetryCounter: " << taskInternal.RetryLimiter.RetryCount << " At: " << taskInternal.RetryLimiter.RetryCounterUpdatedAt << (taskInternal.ShouldAbortTask ? " ABORTED" : "")); diff --git a/ydb/core/yq/libs/control_plane_storage/internal/task_ping.cpp b/ydb/core/yq/libs/control_plane_storage/internal/task_ping.cpp index 9fed43cf0a6..417d8ec78c1 100644 --- a/ydb/core/yq/libs/control_plane_storage/internal/task_ping.cpp +++ b/ydb/core/yq/libs/control_plane_storage/internal/task_ping.cpp @@ -21,12 +21,17 @@ bool IsFinishedStatus(YandexQuery::QueryMeta::ComputeStatus status) { std::tuple<TString, TParams, const std::function<std::pair<TString, NYdb::TParams>(const TVector<NYdb::TResultSet>&)>> ConstructHardPingTask( const Fq::Private::PingTaskRequest& request, std::shared_ptr<Fq::Private::PingTaskResult> response, - const TString& tablePathPrefix, const TDuration& automaticQueriesTtl, const TDuration& taskLeaseTtl, const THashMap<ui64, TRetryPolicyItem>& retryPolicies) { + const TString& tablePathPrefix, const TDuration& automaticQueriesTtl, const TDuration& taskLeaseTtl, const THashMap<ui64, TRetryPolicyItem>& retryPolicies, + ::NMonitoring::TDynamicCounterPtr rootCounters) { + + auto scope = request.scope(); + auto query_id = request.query_id().value(); + auto counters = rootCounters->GetSubgroup("scope", scope)->GetSubgroup("query_id", query_id); TSqlQueryBuilder readQueryBuilder(tablePathPrefix, "HardPingTask(read)"); readQueryBuilder.AddString("tenant", request.tenant()); - readQueryBuilder.AddString("scope", request.scope()); - readQueryBuilder.AddString("query_id", request.query_id().value()); + readQueryBuilder.AddString("scope", scope); + readQueryBuilder.AddString("query_id", query_id); readQueryBuilder.AddText( "$last_job_id = SELECT `" LAST_JOB_ID_COLUMN_NAME "` FROM `" QUERIES_TABLE_NAME "`\n" " WHERE `" SCOPE_COLUMN_NAME "` = $scope AND `" QUERY_ID_COLUMN_NAME "` = $query_id;\n" @@ -38,15 +43,12 @@ std::tuple<TString, TParams, const std::function<std::pair<TString, NYdb::TParam "FROM `" PENDING_SMALL_TABLE_NAME "` WHERE `" TENANT_COLUMN_NAME "` = $tenant AND `" SCOPE_COLUMN_NAME "` = $scope AND `" QUERY_ID_COLUMN_NAME "` = $query_id;\n" ); - auto prepareParams = [=, actorSystem = NActors::TActivationContext::ActorSystem()](const TVector<TResultSet>& resultSets) { + auto prepareParams = [=, counters=counters, actorSystem = NActors::TActivationContext::ActorSystem()](const TVector<TResultSet>& resultSets) { TString jobId; YandexQuery::Query query; YandexQuery::Internal::QueryInternal internal; YandexQuery::Job job; TString owner; - ui64 retryCounter = 0; - TInstant retryCounterUpdatedAt = TInstant::Zero(); - double retryRate = 0.0; if (resultSets.size() != 3) { ythrow TControlPlaneStorageException(TIssuesIds::INTERNAL_ERROR) << "RESULT SET SIZE of " << resultSets.size() << " != 3"; @@ -88,8 +90,8 @@ std::tuple<TString, TParams, const std::function<std::pair<TString, NYdb::TParam } retryLimiter.Assign( parser.ColumnParser(RETRY_COUNTER_COLUMN_NAME).GetOptionalUint64().GetOrElse(0), - retryCounterUpdatedAt = parser.ColumnParser(RETRY_COUNTER_UPDATE_COLUMN_NAME).GetOptionalTimestamp().GetOrElse(TInstant::Zero()), - retryRate = parser.ColumnParser(RETRY_RATE_COLUMN_NAME).GetOptionalDouble().GetOrElse(0.0) + parser.ColumnParser(RETRY_COUNTER_UPDATE_COLUMN_NAME).GetOptionalTimestamp().GetOrElse(TInstant::Zero()), + parser.ColumnParser(RETRY_RATE_COLUMN_NAME).GetOptionalDouble().GetOrElse(0.0) ); } @@ -286,13 +288,14 @@ std::tuple<TString, TParams, const std::function<std::pair<TString, NYdb::TParam "WHERE `" TENANT_COLUMN_NAME "` = $tenant AND `" SCOPE_COLUMN_NAME "` = $scope AND `" QUERY_ID_COLUMN_NAME "` = $query_id;\n" ); } else { + *counters->GetCounter("RetryCount") = retryLimiter.RetryCount; // update pending small ttl = TInstant::Now() + backoff; writeQueryBuilder.AddTimestamp("now", TInstant::Now()); writeQueryBuilder.AddTimestamp("ttl", ttl); - writeQueryBuilder.AddTimestamp("retry_counter_update_time", retryCounterUpdatedAt); - writeQueryBuilder.AddDouble("retry_rate", retryRate); - writeQueryBuilder.AddUint64("retry_counter", retryCounter); + writeQueryBuilder.AddTimestamp("retry_counter_update_time", retryLimiter.RetryCounterUpdatedAt); + writeQueryBuilder.AddDouble("retry_rate", retryLimiter.RetryRate); + writeQueryBuilder.AddUint64("retry_counter", retryLimiter.RetryCount); writeQueryBuilder.AddString("owner", owner); writeQueryBuilder.AddText( "UPDATE `" PENDING_SMALL_TABLE_NAME "` SET `" LAST_SEEN_AT_COLUMN_NAME "` = $now, `" ASSIGNED_UNTIL_COLUMN_NAME "` = $ttl,\n" @@ -444,7 +447,7 @@ void TYdbControlPlaneStorageActor::Handle(TEvControlPlaneStorage::TEvPingTaskReq if (request.status()) Counters.GetFinalStatusCounters(cloudId, scope)->IncByStatus(request.status()); auto pingTaskParams = DoesPingTaskUpdateQueriesTable(request) ? - ConstructHardPingTask(request, response, YdbConnection->TablePathPrefix, Config.AutomaticQueriesTtl, Config.TaskLeaseTtl, Config.RetryPolicies) : + ConstructHardPingTask(request, response, YdbConnection->TablePathPrefix, Config.AutomaticQueriesTtl, Config.TaskLeaseTtl, Config.RetryPolicies, Counters.Counters) : ConstructSoftPingTask(request, response, YdbConnection->TablePathPrefix, Config.TaskLeaseTtl); auto readQuery = std::get<0>(pingTaskParams); // Use std::get for win compiler auto readParams = std::get<1>(pingTaskParams); diff --git a/ydb/core/yq/libs/control_plane_storage/ydb_control_plane_storage_impl.h b/ydb/core/yq/libs/control_plane_storage/ydb_control_plane_storage_impl.h index 0938f1ee37d..ce7d7037087 100644 --- a/ydb/core/yq/libs/control_plane_storage/ydb_control_plane_storage_impl.h +++ b/ydb/core/yq/libs/control_plane_storage/ydb_control_plane_storage_impl.h @@ -219,9 +219,10 @@ class TYdbControlPlaneStorageActor : public NActors::TActorBootstrapped<TYdbCont TMap<TMetricsScope, TScopeCountersPtr> ScopeCounters; TMap<TMetricsScope, TFinalStatusCountersPtr> FinalStatusCounters; - ::NMonitoring::TDynamicCounterPtr Counters; public: + ::NMonitoring::TDynamicCounterPtr Counters; + explicit TCounters(const ::NMonitoring::TDynamicCounterPtr& counters) : Counters(counters) { |