diff options
author | capone212 <capone212@yandex-team.com> | 2024-08-22 23:17:10 +0300 |
---|---|---|
committer | capone212 <capone212@yandex-team.com> | 2024-08-22 23:28:31 +0300 |
commit | b874288489d25f18bcefad50de1844c95c428a00 (patch) | |
tree | 668a961b990ff4e32c87356c21033ca90827d208 | |
parent | 42ef08bcdd0e908143796ea97472d0abf107cbb8 (diff) | |
download | ydb-b874288489d25f18bcefad50de1844c95c428a00.tar.gz |
YT-22435: Using anonymous memory limit in tcmalloc
93d6930ce24f343ccd23c55e7264db80f2e0046b
-rw-r--r-- | yt/yt/core/misc/proc.cpp | 14 | ||||
-rw-r--r-- | yt/yt/core/misc/proc.h | 5 | ||||
-rw-r--r-- | yt/yt/library/oom/tcmalloc_memory_limit_handler.cpp | 2 | ||||
-rw-r--r-- | yt/yt/library/profiling/resource_tracker/resource_tracker.cpp | 36 | ||||
-rw-r--r-- | yt/yt/library/profiling/resource_tracker/resource_tracker.h | 10 | ||||
-rw-r--r-- | yt/yt/library/program/config.cpp | 2 | ||||
-rw-r--r-- | yt/yt/library/program/config.h | 8 | ||||
-rw-r--r-- | yt/yt/library/program/helpers.cpp | 19 |
8 files changed, 82 insertions, 14 deletions
diff --git a/yt/yt/core/misc/proc.cpp b/yt/yt/core/misc/proc.cpp index 95e8a85ea5..6947e74465 100644 --- a/yt/yt/core/misc/proc.cpp +++ b/yt/yt/core/misc/proc.cpp @@ -523,6 +523,20 @@ TCgroupMemoryStat GetCgroupMemoryStat( #endif } +std::optional<i64> GetCgroupAnonymousMemoryLimit( + const TString& cgroupPath, + const TString& cgroupMountPoint) +{ +#ifdef _linux_ + TString path = cgroupMountPoint + "/memory" + cgroupPath + "/memory.anon.limit"; + auto content = Trim(TUnbufferedFileInput(path).ReadAll(), "\n"); + return FromString<i64>(content); +#else + Y_UNUSED(cgroupPath, cgroupMountPoint); + return {}; +#endif +} + THashMap<TString, i64> GetVmstat() { #ifdef _linux_ diff --git a/yt/yt/core/misc/proc.h b/yt/yt/core/misc/proc.h index 2ae656326a..be90a2a16d 100644 --- a/yt/yt/core/misc/proc.h +++ b/yt/yt/core/misc/proc.h @@ -104,6 +104,11 @@ TCgroupMemoryStat GetCgroupMemoryStat( const TString& cgroupPath, const TString& cgroupMountPoint = "/sys/fs/cgroup"); + +std::optional<i64> GetCgroupAnonymousMemoryLimit( + const TString& cgroupPath, + const TString& cgroupMountPoint = "/sys/fs/cgroup"); + THashMap<TString, i64> GetVmstat(); ui64 GetProcessCumulativeMajorPageFaults(int pid = -1); diff --git a/yt/yt/library/oom/tcmalloc_memory_limit_handler.cpp b/yt/yt/library/oom/tcmalloc_memory_limit_handler.cpp index 1828ada3c9..38e7f267a8 100644 --- a/yt/yt/library/oom/tcmalloc_memory_limit_handler.cpp +++ b/yt/yt/library/oom/tcmalloc_memory_limit_handler.cpp @@ -204,7 +204,7 @@ private: TString GetProfilePaths(const TString& timestamp) const { return Format( - "%v/oom_profile_paths_%v.pb.gz", + "%v/oom_profile_paths_%v.yson", Options_.HeapDumpDirectory, timestamp); } diff --git a/yt/yt/library/profiling/resource_tracker/resource_tracker.cpp b/yt/yt/library/profiling/resource_tracker/resource_tracker.cpp index 0b814383ac..09cf67a482 100644 --- a/yt/yt/library/profiling/resource_tracker/resource_tracker.cpp +++ b/yt/yt/library/profiling/resource_tracker/resource_tracker.cpp @@ -119,7 +119,10 @@ void TMemoryCgroupTracker::CollectSensors(ISensorWriter* writer) writer->AddGauge("/dirty", stat.Dirty); writer->AddGauge("/writeback", stat.Writeback); - TotalMemoryLimit.store(stat.HierarchicalMemoryLimit); + TotalMemoryLimit_.store(stat.HierarchicalMemoryLimit); + AnonymousMemoryLimit_.store(SafeGetAnonymousMemoryLimit( + group.Path, + stat.HierarchicalMemoryLimit)); return; } @@ -133,9 +136,31 @@ void TMemoryCgroupTracker::CollectSensors(ISensorWriter* writer) } } -i64 TMemoryCgroupTracker::GetTotalMemoryLimit() +i64 TMemoryCgroupTracker::SafeGetAnonymousMemoryLimit(const TString& cgroupPath, i64 totalMemoryLimit) { - return TotalMemoryLimit.load(); + try { + auto anonymousLimit = GetCgroupAnonymousMemoryLimit(cgroupPath); + auto result = anonymousLimit.value_or(totalMemoryLimit); + result = std::min(result, totalMemoryLimit); + return result != 0 ? result : totalMemoryLimit; + } catch (const std::exception& ex) { + if (!AnonymousLimitErrorLogged_) { + YT_LOG_INFO(ex, "Failed to collect cgroup anonymous memory limit"); + AnonymousLimitErrorLogged_ = true; + } + } + + return totalMemoryLimit; +} + +i64 TMemoryCgroupTracker::GetTotalMemoryLimit() const +{ + return TotalMemoryLimit_.load(); +} + +i64 TMemoryCgroupTracker::GetAnonymousMemoryLimit() const +{ + return AnonymousMemoryLimit_.load(); } TResourceTracker::TTimings TResourceTracker::TTimings::operator-(const TResourceTracker::TTimings& other) const @@ -424,6 +449,11 @@ i64 TResourceTracker::GetTotalMemoryLimit() return MemoryCgroupTracker_->GetTotalMemoryLimit(); } +i64 TResourceTracker::GetAnonymousMemoryLimit() +{ + return MemoryCgroupTracker_->GetAnonymousMemoryLimit(); +} + TResourceTrackerPtr GetResourceTracker() { return LeakyRefCountedSingleton<TResourceTracker>(); diff --git a/yt/yt/library/profiling/resource_tracker/resource_tracker.h b/yt/yt/library/profiling/resource_tracker/resource_tracker.h index 0bebe8a6c8..424c499ffc 100644 --- a/yt/yt/library/profiling/resource_tracker/resource_tracker.h +++ b/yt/yt/library/profiling/resource_tracker/resource_tracker.h @@ -34,12 +34,17 @@ class TMemoryCgroupTracker public: void CollectSensors(ISensorWriter* writer) override; - i64 GetTotalMemoryLimit(); + i64 GetTotalMemoryLimit() const; + i64 GetAnonymousMemoryLimit() const; private: bool CgroupErrorLogged_ = false; + bool AnonymousLimitErrorLogged_ = false; + + std::atomic<i64> TotalMemoryLimit_ = 0; + std::atomic<i64> AnonymousMemoryLimit_ = 0; - std::atomic<i64> TotalMemoryLimit = 0; + i64 SafeGetAnonymousMemoryLimit(const TString& cgroupPath, i64 totalMemoryLimit); }; //////////////////////////////////////////////////////////////////////////////// @@ -57,6 +62,7 @@ public: double GetCpuWait(); i64 GetTotalMemoryLimit(); + i64 GetAnonymousMemoryLimit(); void CollectSensors(ISensorWriter* writer) override; diff --git a/yt/yt/library/program/config.cpp b/yt/yt/library/program/config.cpp index a52015ca29..a8765946b8 100644 --- a/yt/yt/library/program/config.cpp +++ b/yt/yt/library/program/config.cpp @@ -12,6 +12,8 @@ void THeapSizeLimitConfig::Register(TRegistrar registrar) { registrar.Parameter("container_memory_ratio", &TThis::ContainerMemoryRatio) .Optional(); + registrar.Parameter("container_memory_margin", &TThis::ContainerMemoryMargin) + .Optional(); registrar.Parameter("hard", &TThis::Hard) .Default(false); registrar.Parameter("dump_memory_profile_on_violation", &TThis::DumpMemoryProfileOnViolation) diff --git a/yt/yt/library/program/config.h b/yt/yt/library/program/config.h index c91f6bac1e..5e3aea6b40 100644 --- a/yt/yt/library/program/config.h +++ b/yt/yt/library/program/config.h @@ -43,7 +43,13 @@ public: // If program heap size exceeds the limit tcmalloc is instructed to release memory to the kernel. std::optional<double> ContainerMemoryRatio; - //! If true tcmalloc crashes when system allocates more memory than #ContainerMemoryRatio. + //! Similar to #ContainerMemoryRatio, but is set in terms of absolute difference from + //! the container memory limit. + //! For example, if ContainerMemoryLimit=200Gb and ContainerMemoryMargin=1Gb + // then tcmalloc limit will be 199Gb. + std::optional<double> ContainerMemoryMargin; + + //! If true tcmalloc crashes when system allocates more memory than #ContainerMemoryRatio/#ContainerMemoryMargin. bool Hard; bool DumpMemoryProfileOnViolation; diff --git a/yt/yt/library/program/helpers.cpp b/yt/yt/library/program/helpers.cpp index 606c1cc406..3b11d8bcb1 100644 --- a/yt/yt/library/program/helpers.cpp +++ b/yt/yt/library/program/helpers.cpp @@ -62,7 +62,7 @@ class TCMallocLimitsAdjuster public: void Adjust(const TTCMallocConfigPtr& config) { - i64 totalMemory = GetContainerMemoryLimit(); + i64 totalMemory = GetAnonymousMemoryLimit(); AdjustPageHeapLimit(totalMemory, config); AdjustAggressiveReleaseThreshold(totalMemory, config); SetupMemoryLimitHandler(config); @@ -120,27 +120,32 @@ private: } } - i64 GetContainerMemoryLimit() const + i64 GetAnonymousMemoryLimit() const { auto resourceTracker = NProfiling::GetResourceTracker(); if (!resourceTracker) { return 0; } - return resourceTracker->GetTotalMemoryLimit(); + return resourceTracker->GetAnonymousMemoryLimit(); } TAllocatorMemoryLimit ProposeHeapMemoryLimit(i64 totalMemory, const TTCMallocConfigPtr& config) const { - const auto& heapLimitConfig = config->HeapSizeLimit; + const auto& heapSizeConfig = config->HeapSizeLimit; - if (totalMemory == 0 || !heapLimitConfig->ContainerMemoryRatio) { + if (totalMemory == 0 || !heapSizeConfig->ContainerMemoryRatio && !heapSizeConfig->ContainerMemoryMargin) { return {}; } TAllocatorMemoryLimit proposed; - proposed.limit = *heapLimitConfig->ContainerMemoryRatio * totalMemory; - proposed.hard = heapLimitConfig->Hard; + proposed.hard = heapSizeConfig->Hard; + + if (heapSizeConfig->ContainerMemoryMargin) { + proposed.limit = totalMemory - *heapSizeConfig->ContainerMemoryMargin; + } else { + proposed.limit = *heapSizeConfig->ContainerMemoryRatio * totalMemory; + } return proposed; } |