diff options
author | hor911 <[email protected]> | 2023-02-15 21:12:06 +0300 |
---|---|---|
committer | hor911 <[email protected]> | 2023-02-15 21:12:06 +0300 |
commit | 6f3a7e5889e0ebcf5e02329a9a6bb14ae32dbc66 (patch) | |
tree | 7fdeb4d27197767c363ed49317b8cde123a10fca | |
parent | edf6e0cd45309faebb548c84879d12781b6f2fe2 (diff) |
Setting to dump mem alloc stack to files
-rw-r--r-- | ydb/core/driver_lib/run/kikimr_services_initializers.cpp | 9 | ||||
-rw-r--r-- | ydb/core/mon_alloc/monitor.cpp | 49 | ||||
-rw-r--r-- | ydb/core/mon_alloc/monitor.h | 3 | ||||
-rw-r--r-- | ydb/core/mon_alloc/stats.cpp | 10 | ||||
-rw-r--r-- | ydb/core/mon_alloc/stats.h | 20 | ||||
-rw-r--r-- | ydb/core/protos/config.proto | 1 |
6 files changed, 67 insertions, 25 deletions
diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp index dc75e2ed254..9ce77bebbed 100644 --- a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp +++ b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp @@ -1974,9 +1974,16 @@ void TMemProfMonitorInitializer::InitializeServices( NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) { + TString filePathPrefix; + + if (Config.HasMonitoringConfig()) { + filePathPrefix = Config.GetMonitoringConfig().GetMemAllocDumpPathPrefix(); + } + IActor* monitorActor = CreateMemProfMonitor( 1, // seconds - appData->Counters); + appData->Counters, + filePathPrefix); setup->LocalServices.emplace_back( MakeMemProfMonitorID(NodeId), diff --git a/ydb/core/mon_alloc/monitor.cpp b/ydb/core/mon_alloc/monitor.cpp index 4069bc5e9e9..652d5bdf4ab 100644 --- a/ydb/core/mon_alloc/monitor.cpp +++ b/ydb/core/mon_alloc/monitor.cpp @@ -412,6 +412,7 @@ namespace NKikimr { const TDuration Interval; const std::unique_ptr<IAllocMonitor> AllocMonitor; + const TString FilePathPrefix; TInstant LogMemoryStatsTime = TInstant::Now() - TDumpLogConfig::DumpInterval; @@ -422,9 +423,10 @@ namespace NKikimr { return ACTORLIB_STATS; } - TMemProfMonitor(TDuration interval, std::unique_ptr<IAllocMonitor> allocMonitor) + TMemProfMonitor(TDuration interval, std::unique_ptr<IAllocMonitor> allocMonitor, const TString& filePathPrefix) : Interval(interval) , AllocMonitor(std::move(allocMonitor)) + , FilePathPrefix(filePathPrefix) {} void Bootstrap(const TActorContext& ctx) { @@ -436,6 +438,8 @@ namespace NKikimr { return; } + LOG_NOTICE_S(ctx, NKikimrServices::MEMORY_PROFILER, "Bootstrapped"); + auto* indexPage = mon->RegisterIndexPage("memory", "Memory"); mon->RegisterActorPage( indexPage, "statistics", "Statistics", @@ -459,26 +463,38 @@ namespace NKikimr { void LogMemoryStats(const TActorContext& ctx, size_t limit) noexcept { LogMemoryStatsTime = TInstant::Now(); - TStringStream out; - AllocMonitor->DumpForLog(out, limit); - TVector<TString> split; - Split(out.Str(), "\n", split); - for (const auto& line : split) { - LOG_WARN_S(ctx, NKikimrServices::MEMORY_PROFILER, line); + if (FilePathPrefix) { + try { + TString name = LogMemoryStatsTime.ToStringUpToSeconds(); + SubstGlobal(name, ':', '-'); + TString fileName = FilePathPrefix + name + ".mem"; + TFileOutput out(fileName); + AllocMonitor->DumpForLog(out, limit); + LOG_WARN_S(ctx, NKikimrServices::MEMORY_PROFILER, "Memory stats saved to " + fileName); + } catch (const std::exception& err) { + LOG_WARN_S(ctx, NKikimrServices::MEMORY_PROFILER, err.what()); + } + } else { + TStringStream out; + AllocMonitor->DumpForLog(out, limit); + TVector<TString> split; + Split(out.Str(), "\n", split); + for (const auto& line : split) { + LOG_WARN_S(ctx, NKikimrServices::MEMORY_PROFILER, line); + } } } void LogMemoryStatsIfNeeded(const TActorContext& ctx) noexcept { auto memoryUsage = TAllocState::GetMemoryUsage(); - - if (IsDangerous && memoryUsage < TDumpLogConfig::RssUsageSoft) { + auto usage = memoryUsage.Usage(); + LOG_DEBUG_S(ctx, NKikimrServices::MEMORY_PROFILER, memoryUsage.ToString()); + if (IsDangerous && usage < TDumpLogConfig::RssUsageSoft) { IsDangerous = false; - } else if (!IsDangerous && memoryUsage > TDumpLogConfig::RssUsageHard) { + } else if (!IsDangerous && usage > TDumpLogConfig::RssUsageHard) { if (TInstant::Now() - LogMemoryStatsTime > TDumpLogConfig::DumpInterval) { IsDangerous = true; - LOG_WARN_S(ctx, NKikimrServices::MEMORY_PROFILER, - "RSS usage " << memoryUsage * 100. << "%"); LogMemoryStats(ctx, 256); ctx.Schedule(TDumpLogConfig::RepeatInterval, new TEvDumpLogStats); } @@ -493,9 +509,7 @@ namespace NKikimr { void HandleDump(TEvDumpLogStats::TPtr&, const TActorContext& ctx) noexcept { if (IsDangerous) { - auto memoryUsage = TAllocState::GetMemoryUsage(); - LOG_WARN_S(ctx, NKikimrServices::MEMORY_PROFILER, - "RSS usage " << memoryUsage * 100. << "%"); + LOG_WARN_S(ctx, NKikimrServices::MEMORY_PROFILER, TAllocState::GetMemoryUsage().ToString()); LogMemoryStats(ctx, 256); } } @@ -516,9 +530,10 @@ namespace NKikimr { }; } - IActor* CreateMemProfMonitor(ui32 intervalSec, TDynamicCountersPtr counters) { + IActor* CreateMemProfMonitor(ui32 intervalSec, TDynamicCountersPtr counters, const TString& filePathPrefix) { return new TMemProfMonitor( TDuration::Seconds(intervalSec), - CreateAllocMonitor(GetServiceCounters(counters, "utils"))); + CreateAllocMonitor(GetServiceCounters(counters, "utils")), + filePathPrefix); } } diff --git a/ydb/core/mon_alloc/monitor.h b/ydb/core/mon_alloc/monitor.h index 3209e44305a..52ade6297ad 100644 --- a/ydb/core/mon_alloc/monitor.h +++ b/ydb/core/mon_alloc/monitor.h @@ -46,5 +46,6 @@ namespace NKikimr { NActors::IActor* CreateMemProfMonitor( ui32 intervalSec, - TIntrusivePtr<::NMonitoring::TDynamicCounters> counters); + TIntrusivePtr<::NMonitoring::TDynamicCounters> counters, + const TString& filePathPrefix = ""); } diff --git a/ydb/core/mon_alloc/stats.cpp b/ydb/core/mon_alloc/stats.cpp index c8e6844c17b..569806fc034 100644 --- a/ydb/core/mon_alloc/stats.cpp +++ b/ydb/core/mon_alloc/stats.cpp @@ -297,12 +297,12 @@ namespace NKikimr { return AllocState->GetAllocatedMemoryEstimate(); } - double TAllocState::GetMemoryUsage() { + TMemoryUsage TAllocState::GetMemoryUsage() { NActors::TProcStat procStat; procStat.Fill(getpid()); - if (!procStat.CGroupMemLim) { - return 0; - } - return (double)procStat.AnonRss / procStat.CGroupMemLim; + return TMemoryUsage { + .AnonRss = procStat.AnonRss, + .CGroupLimit = procStat.CGroupMemLim + }; } } diff --git a/ydb/core/mon_alloc/stats.h b/ydb/core/mon_alloc/stats.h index 0a54242bb07..e21c0eac3cd 100644 --- a/ydb/core/mon_alloc/stats.h +++ b/ydb/core/mon_alloc/stats.h @@ -21,10 +21,28 @@ namespace NKikimr { virtual ui64 GetAllocatedMemoryEstimate() const = 0; }; + struct TMemoryUsage { + ui64 AnonRss; + ui64 CGroupLimit; + + double Usage() const { + return CGroupLimit ? static_cast<double>(AnonRss) / CGroupLimit : 0; + } + + TString ToString() const { + auto usage = Usage(); + if (usage) { + return TStringBuilder() << "RSS usage " << usage * 100. << "% (" << AnonRss << " of " << CGroupLimit << " bytes)"; + } else { + return TStringBuilder() << "RSS usage " << AnonRss << " bytes"; + } + } + }; + struct TAllocState { static std::unique_ptr<IAllocState> AllocState; static ui64 GetAllocatedMemoryEstimate(); - static double GetMemoryUsage(); + static TMemoryUsage GetMemoryUsage(); }; } diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto index 3b185fe9b49..5e9ef843637 100644 --- a/ydb/core/protos/config.proto +++ b/ydb/core/protos/config.proto @@ -512,6 +512,7 @@ message TMonitoringConfig { optional string RedirectMainPageTo = 13 [default = "monitoring/"]; optional string MonitoringCertificate = 14; optional string MonitoringCertificateFile = 15; + optional string MemAllocDumpPathPrefix = 16; } message TRestartsCountConfig { |