summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorhor911 <[email protected]>2023-02-15 21:12:06 +0300
committerhor911 <[email protected]>2023-02-15 21:12:06 +0300
commit6f3a7e5889e0ebcf5e02329a9a6bb14ae32dbc66 (patch)
tree7fdeb4d27197767c363ed49317b8cde123a10fca
parentedf6e0cd45309faebb548c84879d12781b6f2fe2 (diff)
Setting to dump mem alloc stack to files
-rw-r--r--ydb/core/driver_lib/run/kikimr_services_initializers.cpp9
-rw-r--r--ydb/core/mon_alloc/monitor.cpp49
-rw-r--r--ydb/core/mon_alloc/monitor.h3
-rw-r--r--ydb/core/mon_alloc/stats.cpp10
-rw-r--r--ydb/core/mon_alloc/stats.h20
-rw-r--r--ydb/core/protos/config.proto1
6 files changed, 67 insertions, 25 deletions
diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp
index dc75e2ed254..9ce77bebbed 100644
--- a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp
+++ b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp
@@ -1974,9 +1974,16 @@ void TMemProfMonitorInitializer::InitializeServices(
NActors::TActorSystemSetup* setup,
const NKikimr::TAppData* appData)
{
+ TString filePathPrefix;
+
+ if (Config.HasMonitoringConfig()) {
+ filePathPrefix = Config.GetMonitoringConfig().GetMemAllocDumpPathPrefix();
+ }
+
IActor* monitorActor = CreateMemProfMonitor(
1, // seconds
- appData->Counters);
+ appData->Counters,
+ filePathPrefix);
setup->LocalServices.emplace_back(
MakeMemProfMonitorID(NodeId),
diff --git a/ydb/core/mon_alloc/monitor.cpp b/ydb/core/mon_alloc/monitor.cpp
index 4069bc5e9e9..652d5bdf4ab 100644
--- a/ydb/core/mon_alloc/monitor.cpp
+++ b/ydb/core/mon_alloc/monitor.cpp
@@ -412,6 +412,7 @@ namespace NKikimr {
const TDuration Interval;
const std::unique_ptr<IAllocMonitor> AllocMonitor;
+ const TString FilePathPrefix;
TInstant LogMemoryStatsTime = TInstant::Now() - TDumpLogConfig::DumpInterval;
@@ -422,9 +423,10 @@ namespace NKikimr {
return ACTORLIB_STATS;
}
- TMemProfMonitor(TDuration interval, std::unique_ptr<IAllocMonitor> allocMonitor)
+ TMemProfMonitor(TDuration interval, std::unique_ptr<IAllocMonitor> allocMonitor, const TString& filePathPrefix)
: Interval(interval)
, AllocMonitor(std::move(allocMonitor))
+ , FilePathPrefix(filePathPrefix)
{}
void Bootstrap(const TActorContext& ctx) {
@@ -436,6 +438,8 @@ namespace NKikimr {
return;
}
+ LOG_NOTICE_S(ctx, NKikimrServices::MEMORY_PROFILER, "Bootstrapped");
+
auto* indexPage = mon->RegisterIndexPage("memory", "Memory");
mon->RegisterActorPage(
indexPage, "statistics", "Statistics",
@@ -459,26 +463,38 @@ namespace NKikimr {
void LogMemoryStats(const TActorContext& ctx, size_t limit) noexcept {
LogMemoryStatsTime = TInstant::Now();
- TStringStream out;
- AllocMonitor->DumpForLog(out, limit);
- TVector<TString> split;
- Split(out.Str(), "\n", split);
- for (const auto& line : split) {
- LOG_WARN_S(ctx, NKikimrServices::MEMORY_PROFILER, line);
+ if (FilePathPrefix) {
+ try {
+ TString name = LogMemoryStatsTime.ToStringUpToSeconds();
+ SubstGlobal(name, ':', '-');
+ TString fileName = FilePathPrefix + name + ".mem";
+ TFileOutput out(fileName);
+ AllocMonitor->DumpForLog(out, limit);
+ LOG_WARN_S(ctx, NKikimrServices::MEMORY_PROFILER, "Memory stats saved to " + fileName);
+ } catch (const std::exception& err) {
+ LOG_WARN_S(ctx, NKikimrServices::MEMORY_PROFILER, err.what());
+ }
+ } else {
+ TStringStream out;
+ AllocMonitor->DumpForLog(out, limit);
+ TVector<TString> split;
+ Split(out.Str(), "\n", split);
+ for (const auto& line : split) {
+ LOG_WARN_S(ctx, NKikimrServices::MEMORY_PROFILER, line);
+ }
}
}
void LogMemoryStatsIfNeeded(const TActorContext& ctx) noexcept {
auto memoryUsage = TAllocState::GetMemoryUsage();
-
- if (IsDangerous && memoryUsage < TDumpLogConfig::RssUsageSoft) {
+ auto usage = memoryUsage.Usage();
+ LOG_DEBUG_S(ctx, NKikimrServices::MEMORY_PROFILER, memoryUsage.ToString());
+ if (IsDangerous && usage < TDumpLogConfig::RssUsageSoft) {
IsDangerous = false;
- } else if (!IsDangerous && memoryUsage > TDumpLogConfig::RssUsageHard) {
+ } else if (!IsDangerous && usage > TDumpLogConfig::RssUsageHard) {
if (TInstant::Now() - LogMemoryStatsTime > TDumpLogConfig::DumpInterval) {
IsDangerous = true;
- LOG_WARN_S(ctx, NKikimrServices::MEMORY_PROFILER,
- "RSS usage " << memoryUsage * 100. << "%");
LogMemoryStats(ctx, 256);
ctx.Schedule(TDumpLogConfig::RepeatInterval, new TEvDumpLogStats);
}
@@ -493,9 +509,7 @@ namespace NKikimr {
void HandleDump(TEvDumpLogStats::TPtr&, const TActorContext& ctx) noexcept {
if (IsDangerous) {
- auto memoryUsage = TAllocState::GetMemoryUsage();
- LOG_WARN_S(ctx, NKikimrServices::MEMORY_PROFILER,
- "RSS usage " << memoryUsage * 100. << "%");
+ LOG_WARN_S(ctx, NKikimrServices::MEMORY_PROFILER, TAllocState::GetMemoryUsage().ToString());
LogMemoryStats(ctx, 256);
}
}
@@ -516,9 +530,10 @@ namespace NKikimr {
};
}
- IActor* CreateMemProfMonitor(ui32 intervalSec, TDynamicCountersPtr counters) {
+ IActor* CreateMemProfMonitor(ui32 intervalSec, TDynamicCountersPtr counters, const TString& filePathPrefix) {
return new TMemProfMonitor(
TDuration::Seconds(intervalSec),
- CreateAllocMonitor(GetServiceCounters(counters, "utils")));
+ CreateAllocMonitor(GetServiceCounters(counters, "utils")),
+ filePathPrefix);
}
}
diff --git a/ydb/core/mon_alloc/monitor.h b/ydb/core/mon_alloc/monitor.h
index 3209e44305a..52ade6297ad 100644
--- a/ydb/core/mon_alloc/monitor.h
+++ b/ydb/core/mon_alloc/monitor.h
@@ -46,5 +46,6 @@ namespace NKikimr {
NActors::IActor* CreateMemProfMonitor(
ui32 intervalSec,
- TIntrusivePtr<::NMonitoring::TDynamicCounters> counters);
+ TIntrusivePtr<::NMonitoring::TDynamicCounters> counters,
+ const TString& filePathPrefix = "");
}
diff --git a/ydb/core/mon_alloc/stats.cpp b/ydb/core/mon_alloc/stats.cpp
index c8e6844c17b..569806fc034 100644
--- a/ydb/core/mon_alloc/stats.cpp
+++ b/ydb/core/mon_alloc/stats.cpp
@@ -297,12 +297,12 @@ namespace NKikimr {
return AllocState->GetAllocatedMemoryEstimate();
}
- double TAllocState::GetMemoryUsage() {
+ TMemoryUsage TAllocState::GetMemoryUsage() {
NActors::TProcStat procStat;
procStat.Fill(getpid());
- if (!procStat.CGroupMemLim) {
- return 0;
- }
- return (double)procStat.AnonRss / procStat.CGroupMemLim;
+ return TMemoryUsage {
+ .AnonRss = procStat.AnonRss,
+ .CGroupLimit = procStat.CGroupMemLim
+ };
}
}
diff --git a/ydb/core/mon_alloc/stats.h b/ydb/core/mon_alloc/stats.h
index 0a54242bb07..e21c0eac3cd 100644
--- a/ydb/core/mon_alloc/stats.h
+++ b/ydb/core/mon_alloc/stats.h
@@ -21,10 +21,28 @@ namespace NKikimr {
virtual ui64 GetAllocatedMemoryEstimate() const = 0;
};
+ struct TMemoryUsage {
+ ui64 AnonRss;
+ ui64 CGroupLimit;
+
+ double Usage() const {
+ return CGroupLimit ? static_cast<double>(AnonRss) / CGroupLimit : 0;
+ }
+
+ TString ToString() const {
+ auto usage = Usage();
+ if (usage) {
+ return TStringBuilder() << "RSS usage " << usage * 100. << "% (" << AnonRss << " of " << CGroupLimit << " bytes)";
+ } else {
+ return TStringBuilder() << "RSS usage " << AnonRss << " bytes";
+ }
+ }
+ };
+
struct TAllocState {
static std::unique_ptr<IAllocState> AllocState;
static ui64 GetAllocatedMemoryEstimate();
- static double GetMemoryUsage();
+ static TMemoryUsage GetMemoryUsage();
};
}
diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto
index 3b185fe9b49..5e9ef843637 100644
--- a/ydb/core/protos/config.proto
+++ b/ydb/core/protos/config.proto
@@ -512,6 +512,7 @@ message TMonitoringConfig {
optional string RedirectMainPageTo = 13 [default = "monitoring/"];
optional string MonitoringCertificate = 14;
optional string MonitoringCertificateFile = 15;
+ optional string MemAllocDumpPathPrefix = 16;
}
message TRestartsCountConfig {