aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authort1mursadykov <t1mursadykov@ydb.tech>2023-04-04 11:40:13 +0300
committert1mursadykov <t1mursadykov@ydb.tech>2023-04-04 11:40:13 +0300
commit7c5ef2a5f8a2b7e1be3102dd0920bb0a44bc3a73 (patch)
treea0babd08ecc4c4e7123bde80876f7cbb2acfbc8a
parentb1096383308a7d5c9906a5395338d04f0d814c5f (diff)
downloadydb-7c5ef2a5f8a2b7e1be3102dd0920bb0a44bc3a73.tar.gz
CMS request processing metrics
-rw-r--r--ydb/core/cms/cms.cpp22
-rw-r--r--ydb/core/cms/cms_impl.h20
-rw-r--r--ydb/core/protos/counters_cms.proto23
3 files changed, 57 insertions, 8 deletions
diff --git a/ydb/core/cms/cms.cpp b/ydb/core/cms/cms.cpp
index bc14f117e22..356cc7ada8a 100644
--- a/ydb/core/cms/cms.cpp
+++ b/ydb/core/cms/cms.cpp
@@ -15,12 +15,14 @@
#include <ydb/core/erasure/erasure.h>
#include <ydb/core/protos/cms.pb.h>
#include <ydb/core/protos/config_units.pb.h>
+#include <ydb/core/protos/counters_cms.pb.h>
#include <ydb/core/tablet_flat/tablet_flat_executed.h>
#include <library/cpp/actors/core/actor.h>
#include <library/cpp/actors/core/hfunc.h>
#include <library/cpp/actors/interconnect/interconnect.h>
+#include <util/datetime/base.h>
#include <util/generic/serialized_enum.h>
#include <util/string/builder.h>
#include <util/string/join.h>
@@ -1255,7 +1257,8 @@ void TCms::EnqueueRequest(TAutoPtr<IEventHandle> ev, const TActorContext &ctx)
ctx.Schedule(TDuration::MilliSeconds(100), new TEvPrivate::TEvStartCollecting);
}
- NextQueue.push(ev);
+ NextQueue.emplace(ev, ctx.Now());
+ TabletCounters->Simple()[COUNTER_REQUESTS_QUEUE_SIZE].Add(1);
}
void TCms::StartCollecting(const TActorContext &ctx)
@@ -1263,6 +1266,8 @@ void TCms::StartCollecting(const TActorContext &ctx)
Y_VERIFY(Queue.empty());
std::swap(NextQueue, Queue);
+ InfoCollectorStartTime = ctx.Now();
+
auto collector = CreateInfoCollector(SelfId(), State->Config.InfoCollectionTimeout);
ctx.ExecutorThread.RegisterActor(collector);
}
@@ -1339,7 +1344,10 @@ void TCms::PersistNodeTenants(TTransactionContext& txc, const TActorContext& ctx
void TCms::ProcessQueue(const TActorContext &ctx)
{
while (!Queue.empty()) {
- ProcessRequest(Queue.front(), ctx);
+ TabletCounters->Percentile()[COUNTER_LATENCY_REQUEST_QUEUING].IncrementFor((ctx.Now() - Queue.front().ArrivedTime).MilliSeconds());
+ TabletCounters->Simple()[COUNTER_REQUESTS_QUEUE_SIZE].Sub(1);
+
+ ProcessRequest(Queue.front().Request, ctx);
Queue.pop();
}
@@ -1389,6 +1397,8 @@ void TCms::Handle(TEvCms::TEvGetClusterInfoRequest::TPtr &ev, const TActorContex
void TCms::Handle(TEvPrivate::TEvClusterInfo::TPtr &ev, const TActorContext &ctx)
{
+ TabletCounters->Percentile()[COUNTER_LATENCY_INFO_COLLECTOR].IncrementFor((ctx.Now() - InfoCollectorStartTime).MilliSeconds());
+
if (!ev->Get()->Success) {
LOG_NOTICE_S(ctx, NKikimrServices::CMS,
"Couldn't collect cluster state.");
@@ -1559,6 +1569,8 @@ void TCms::Handle(TEvCms::TEvPermissionRequest::TPtr &ev,
auto &rec = ev->Get()->Record;
TString user = rec.GetUser();
+ auto requestStartTime = TInstant::Now();
+
auto actions(std::move(*rec.MutableActions()));
rec.ClearActions();
@@ -1617,6 +1629,8 @@ void TCms::Handle(TEvCms::TEvPermissionRequest::TPtr &ev,
auto handle = new IEventHandle(ev->Sender, SelfId(), resp.Release(), 0, ev->Cookie);
Execute(CreateTxStorePermissions(std::move(ev->Release()), handle, user, std::move(copy)), ctx);
}
+
+ TabletCounters->Percentile()[COUNTER_LATENCY_PERMISSION_REQUEST].IncrementFor((TInstant::Now() - requestStartTime).MilliSeconds());
}
void TCms::Handle(TEvCms::TEvCheckRequest::TPtr &ev, const TActorContext &ctx)
@@ -1642,6 +1656,8 @@ void TCms::Handle(TEvCms::TEvCheckRequest::TPtr &ev, const TActorContext &ctx)
TAutoPtr<TEvCms::TEvPermissionResponse> resp = new TEvCms::TEvPermissionResponse;
TRequestInfo scheduled;
+ auto requestStartTime = TInstant::Now();
+
ClusterInfo->LogManager.PushRollbackPoint();
for (const auto &scheduled_request : State->ScheduledRequests) {
if (scheduled_request.second.Order < request.Order) {
@@ -1688,6 +1704,8 @@ void TCms::Handle(TEvCms::TEvCheckRequest::TPtr &ev, const TActorContext &ctx)
auto handle = new IEventHandle(ev->Sender, SelfId(), resp.Release(), 0, ev->Cookie);
Execute(CreateTxStorePermissions(std::move(ev->Release()), handle, user, std::move(copy)), ctx);
}
+
+ TabletCounters->Percentile()[COUNTER_LATENCY_CHECK_REQUEST].IncrementFor((TInstant::Now() - requestStartTime).MilliSeconds());
}
bool TCms::CheckNotificationDeadline(const TAction &action, TInstant time,
diff --git a/ydb/core/cms/cms_impl.h b/ydb/core/cms/cms_impl.h
index 249c8238c47..8f940e39fe8 100644
--- a/ydb/core/cms/cms_impl.h
+++ b/ydb/core/cms/cms_impl.h
@@ -17,9 +17,9 @@
#include <ydb/core/tablet_flat/tablet_flat_executed.h>
#include <ydb/core/engine/minikql/flat_local_tx_factory.h>
-
-#include <util/generic/stack.h>
+#include <util/datetime/base.h>
#include <util/generic/queue.h>
+#include <util/generic/stack.h>
namespace NKikimr::NCms {
@@ -110,6 +110,16 @@ private:
{}
};
+ struct TRequestsQueueItem {
+ TAutoPtr<IEventHandle> Request;
+ TInstant ArrivedTime;
+
+ TRequestsQueueItem(TAutoPtr<IEventHandle> req, TInstant arrivedTime)
+ : Request(std::move(req))
+ , ArrivedTime(arrivedTime)
+ {}
+ };
+
ITransaction *CreateTxGetLogTail(TEvCms::TEvGetLogTailRequest::TPtr &ev);
ITransaction *CreateTxInitScheme();
ITransaction *CreateTxLoadState();
@@ -402,8 +412,8 @@ private:
TString NotSupportedReason;
TQueue<TAutoPtr<IEventHandle>> InitQueue;
- TQueue<TAutoPtr<IEventHandle>> Queue;
- TQueue<TAutoPtr<IEventHandle>> NextQueue;
+ TQueue<TRequestsQueueItem> Queue;
+ TQueue<TRequestsQueueItem> NextQueue;
TCmsStatePtr State;
TLogger Logger;
@@ -422,6 +432,8 @@ private:
THolder<class NKikimr::TTabletCountersBase> TabletCountersPtr;
TTabletCountersBase* TabletCounters;
+ TInstant InfoCollectorStartTime;
+
public:
TCms(const TActorId &tablet, TTabletStorageInfo *info)
: TActor(&TThis::StateInit)
diff --git a/ydb/core/protos/counters_cms.proto b/ydb/core/protos/counters_cms.proto
index 970ae802e21..952dc8c0810 100644
--- a/ydb/core/protos/counters_cms.proto
+++ b/ydb/core/protos/counters_cms.proto
@@ -11,6 +11,7 @@ enum ESimpleCounters {
// 1 when local config differs from console
COUNTER_BOOTSTRAP_DIFFERS = 1 [(CounterOpts) = {Name: "BootstrapConfigDiffersFromConsole"}];
+ COUNTER_REQUESTS_QUEUE_SIZE = 2 [(CounterOpts) = {Name: "RequestsQueueSize"}];
}
enum ECumulativeCounters {
@@ -19,11 +20,29 @@ enum ECumulativeCounters {
enum EPercentileCounters {
option (GlobalCounterOpts) = {
- Ranges { Value: 0 Name: "0 ms" }
- Ranges { Value: 1 Name: "1 ms" }
+ Ranges { Value: 100 Name: "100 ms" }
+ Ranges { Value: 500 Name: "500 ms" }
+ Ranges { Value: 1000 Name: "1 s" }
+ Ranges { Value: 2000 Name: "2 s" }
+ Ranges { Value: 5000 Name: "5 s" }
+ Ranges { Value: 10000 Name: "10 s" }
+ Ranges { Value: 15000 Name: "15 s" }
+ Ranges { Value: 30000 Name: "30 s" }
+ Ranges { Value: 45000 Name: "45 s" }
+ Ranges { Value: 60000 Name: "60 s" }
+ Ranges { Value: 120000 Name: "120 s" }
+ Ranges { Value: 150000 Name: "150 s" }
+ Ranges { Value: 180000 Name: "180 s" }
+ Ranges { Value: 200000 Name: "200 s" }
};
COUNTER_PERCENTILE_IGNORE = 0;
+
+ COUNTER_LATENCY_INFO_COLLECTOR = 1 [(CounterOpts) = {Name: "LatencyInfoCollector"}];
+ COUNTER_LATENCY_REQUEST_QUEUING = 2 [(CounterOpts) = {Name: "LatencyRequestQueuing"}];
+
+ COUNTER_LATENCY_PERMISSION_REQUEST = 3 [(CounterOpts) = {Name: "LatencyPermissionRequest"}];
+ COUNTER_LATENCY_CHECK_REQUEST = 4 [(CounterOpts) = {Name: "LatencyCheckRequest"}];
}
enum ETxTypes {