diff options
author | t1mursadykov <t1mursadykov@ydb.tech> | 2023-04-04 11:40:13 +0300 |
---|---|---|
committer | t1mursadykov <t1mursadykov@ydb.tech> | 2023-04-04 11:40:13 +0300 |
commit | 7c5ef2a5f8a2b7e1be3102dd0920bb0a44bc3a73 (patch) | |
tree | a0babd08ecc4c4e7123bde80876f7cbb2acfbc8a | |
parent | b1096383308a7d5c9906a5395338d04f0d814c5f (diff) | |
download | ydb-7c5ef2a5f8a2b7e1be3102dd0920bb0a44bc3a73.tar.gz |
CMS request processing metrics
-rw-r--r-- | ydb/core/cms/cms.cpp | 22 | ||||
-rw-r--r-- | ydb/core/cms/cms_impl.h | 20 | ||||
-rw-r--r-- | ydb/core/protos/counters_cms.proto | 23 |
3 files changed, 57 insertions, 8 deletions
diff --git a/ydb/core/cms/cms.cpp b/ydb/core/cms/cms.cpp index bc14f117e22..356cc7ada8a 100644 --- a/ydb/core/cms/cms.cpp +++ b/ydb/core/cms/cms.cpp @@ -15,12 +15,14 @@ #include <ydb/core/erasure/erasure.h> #include <ydb/core/protos/cms.pb.h> #include <ydb/core/protos/config_units.pb.h> +#include <ydb/core/protos/counters_cms.pb.h> #include <ydb/core/tablet_flat/tablet_flat_executed.h> #include <library/cpp/actors/core/actor.h> #include <library/cpp/actors/core/hfunc.h> #include <library/cpp/actors/interconnect/interconnect.h> +#include <util/datetime/base.h> #include <util/generic/serialized_enum.h> #include <util/string/builder.h> #include <util/string/join.h> @@ -1255,7 +1257,8 @@ void TCms::EnqueueRequest(TAutoPtr<IEventHandle> ev, const TActorContext &ctx) ctx.Schedule(TDuration::MilliSeconds(100), new TEvPrivate::TEvStartCollecting); } - NextQueue.push(ev); + NextQueue.emplace(ev, ctx.Now()); + TabletCounters->Simple()[COUNTER_REQUESTS_QUEUE_SIZE].Add(1); } void TCms::StartCollecting(const TActorContext &ctx) @@ -1263,6 +1266,8 @@ void TCms::StartCollecting(const TActorContext &ctx) Y_VERIFY(Queue.empty()); std::swap(NextQueue, Queue); + InfoCollectorStartTime = ctx.Now(); + auto collector = CreateInfoCollector(SelfId(), State->Config.InfoCollectionTimeout); ctx.ExecutorThread.RegisterActor(collector); } @@ -1339,7 +1344,10 @@ void TCms::PersistNodeTenants(TTransactionContext& txc, const TActorContext& ctx void TCms::ProcessQueue(const TActorContext &ctx) { while (!Queue.empty()) { - ProcessRequest(Queue.front(), ctx); + TabletCounters->Percentile()[COUNTER_LATENCY_REQUEST_QUEUING].IncrementFor((ctx.Now() - Queue.front().ArrivedTime).MilliSeconds()); + TabletCounters->Simple()[COUNTER_REQUESTS_QUEUE_SIZE].Sub(1); + + ProcessRequest(Queue.front().Request, ctx); Queue.pop(); } @@ -1389,6 +1397,8 @@ void TCms::Handle(TEvCms::TEvGetClusterInfoRequest::TPtr &ev, const TActorContex void TCms::Handle(TEvPrivate::TEvClusterInfo::TPtr &ev, const TActorContext &ctx) { + TabletCounters->Percentile()[COUNTER_LATENCY_INFO_COLLECTOR].IncrementFor((ctx.Now() - InfoCollectorStartTime).MilliSeconds()); + if (!ev->Get()->Success) { LOG_NOTICE_S(ctx, NKikimrServices::CMS, "Couldn't collect cluster state."); @@ -1559,6 +1569,8 @@ void TCms::Handle(TEvCms::TEvPermissionRequest::TPtr &ev, auto &rec = ev->Get()->Record; TString user = rec.GetUser(); + auto requestStartTime = TInstant::Now(); + auto actions(std::move(*rec.MutableActions())); rec.ClearActions(); @@ -1617,6 +1629,8 @@ void TCms::Handle(TEvCms::TEvPermissionRequest::TPtr &ev, auto handle = new IEventHandle(ev->Sender, SelfId(), resp.Release(), 0, ev->Cookie); Execute(CreateTxStorePermissions(std::move(ev->Release()), handle, user, std::move(copy)), ctx); } + + TabletCounters->Percentile()[COUNTER_LATENCY_PERMISSION_REQUEST].IncrementFor((TInstant::Now() - requestStartTime).MilliSeconds()); } void TCms::Handle(TEvCms::TEvCheckRequest::TPtr &ev, const TActorContext &ctx) @@ -1642,6 +1656,8 @@ void TCms::Handle(TEvCms::TEvCheckRequest::TPtr &ev, const TActorContext &ctx) TAutoPtr<TEvCms::TEvPermissionResponse> resp = new TEvCms::TEvPermissionResponse; TRequestInfo scheduled; + auto requestStartTime = TInstant::Now(); + ClusterInfo->LogManager.PushRollbackPoint(); for (const auto &scheduled_request : State->ScheduledRequests) { if (scheduled_request.second.Order < request.Order) { @@ -1688,6 +1704,8 @@ void TCms::Handle(TEvCms::TEvCheckRequest::TPtr &ev, const TActorContext &ctx) auto handle = new IEventHandle(ev->Sender, SelfId(), resp.Release(), 0, ev->Cookie); Execute(CreateTxStorePermissions(std::move(ev->Release()), handle, user, std::move(copy)), ctx); } + + TabletCounters->Percentile()[COUNTER_LATENCY_CHECK_REQUEST].IncrementFor((TInstant::Now() - requestStartTime).MilliSeconds()); } bool TCms::CheckNotificationDeadline(const TAction &action, TInstant time, diff --git a/ydb/core/cms/cms_impl.h b/ydb/core/cms/cms_impl.h index 249c8238c47..8f940e39fe8 100644 --- a/ydb/core/cms/cms_impl.h +++ b/ydb/core/cms/cms_impl.h @@ -17,9 +17,9 @@ #include <ydb/core/tablet_flat/tablet_flat_executed.h> #include <ydb/core/engine/minikql/flat_local_tx_factory.h> - -#include <util/generic/stack.h> +#include <util/datetime/base.h> #include <util/generic/queue.h> +#include <util/generic/stack.h> namespace NKikimr::NCms { @@ -110,6 +110,16 @@ private: {} }; + struct TRequestsQueueItem { + TAutoPtr<IEventHandle> Request; + TInstant ArrivedTime; + + TRequestsQueueItem(TAutoPtr<IEventHandle> req, TInstant arrivedTime) + : Request(std::move(req)) + , ArrivedTime(arrivedTime) + {} + }; + ITransaction *CreateTxGetLogTail(TEvCms::TEvGetLogTailRequest::TPtr &ev); ITransaction *CreateTxInitScheme(); ITransaction *CreateTxLoadState(); @@ -402,8 +412,8 @@ private: TString NotSupportedReason; TQueue<TAutoPtr<IEventHandle>> InitQueue; - TQueue<TAutoPtr<IEventHandle>> Queue; - TQueue<TAutoPtr<IEventHandle>> NextQueue; + TQueue<TRequestsQueueItem> Queue; + TQueue<TRequestsQueueItem> NextQueue; TCmsStatePtr State; TLogger Logger; @@ -422,6 +432,8 @@ private: THolder<class NKikimr::TTabletCountersBase> TabletCountersPtr; TTabletCountersBase* TabletCounters; + TInstant InfoCollectorStartTime; + public: TCms(const TActorId &tablet, TTabletStorageInfo *info) : TActor(&TThis::StateInit) diff --git a/ydb/core/protos/counters_cms.proto b/ydb/core/protos/counters_cms.proto index 970ae802e21..952dc8c0810 100644 --- a/ydb/core/protos/counters_cms.proto +++ b/ydb/core/protos/counters_cms.proto @@ -11,6 +11,7 @@ enum ESimpleCounters { // 1 when local config differs from console COUNTER_BOOTSTRAP_DIFFERS = 1 [(CounterOpts) = {Name: "BootstrapConfigDiffersFromConsole"}]; + COUNTER_REQUESTS_QUEUE_SIZE = 2 [(CounterOpts) = {Name: "RequestsQueueSize"}]; } enum ECumulativeCounters { @@ -19,11 +20,29 @@ enum ECumulativeCounters { enum EPercentileCounters { option (GlobalCounterOpts) = { - Ranges { Value: 0 Name: "0 ms" } - Ranges { Value: 1 Name: "1 ms" } + Ranges { Value: 100 Name: "100 ms" } + Ranges { Value: 500 Name: "500 ms" } + Ranges { Value: 1000 Name: "1 s" } + Ranges { Value: 2000 Name: "2 s" } + Ranges { Value: 5000 Name: "5 s" } + Ranges { Value: 10000 Name: "10 s" } + Ranges { Value: 15000 Name: "15 s" } + Ranges { Value: 30000 Name: "30 s" } + Ranges { Value: 45000 Name: "45 s" } + Ranges { Value: 60000 Name: "60 s" } + Ranges { Value: 120000 Name: "120 s" } + Ranges { Value: 150000 Name: "150 s" } + Ranges { Value: 180000 Name: "180 s" } + Ranges { Value: 200000 Name: "200 s" } }; COUNTER_PERCENTILE_IGNORE = 0; + + COUNTER_LATENCY_INFO_COLLECTOR = 1 [(CounterOpts) = {Name: "LatencyInfoCollector"}]; + COUNTER_LATENCY_REQUEST_QUEUING = 2 [(CounterOpts) = {Name: "LatencyRequestQueuing"}]; + + COUNTER_LATENCY_PERMISSION_REQUEST = 3 [(CounterOpts) = {Name: "LatencyPermissionRequest"}]; + COUNTER_LATENCY_CHECK_REQUEST = 4 [(CounterOpts) = {Name: "LatencyCheckRequest"}]; } enum ETxTypes { |