aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorcapone212 <capone212@yandex-team.com>2024-08-22 23:17:10 +0300
committercapone212 <capone212@yandex-team.com>2024-08-22 23:28:31 +0300
commitb874288489d25f18bcefad50de1844c95c428a00 (patch)
tree668a961b990ff4e32c87356c21033ca90827d208
parent42ef08bcdd0e908143796ea97472d0abf107cbb8 (diff)
downloadydb-b874288489d25f18bcefad50de1844c95c428a00.tar.gz
YT-22435: Using anonymous memory limit in tcmalloc
93d6930ce24f343ccd23c55e7264db80f2e0046b
-rw-r--r--yt/yt/core/misc/proc.cpp14
-rw-r--r--yt/yt/core/misc/proc.h5
-rw-r--r--yt/yt/library/oom/tcmalloc_memory_limit_handler.cpp2
-rw-r--r--yt/yt/library/profiling/resource_tracker/resource_tracker.cpp36
-rw-r--r--yt/yt/library/profiling/resource_tracker/resource_tracker.h10
-rw-r--r--yt/yt/library/program/config.cpp2
-rw-r--r--yt/yt/library/program/config.h8
-rw-r--r--yt/yt/library/program/helpers.cpp19
8 files changed, 82 insertions, 14 deletions
diff --git a/yt/yt/core/misc/proc.cpp b/yt/yt/core/misc/proc.cpp
index 95e8a85ea5..6947e74465 100644
--- a/yt/yt/core/misc/proc.cpp
+++ b/yt/yt/core/misc/proc.cpp
@@ -523,6 +523,20 @@ TCgroupMemoryStat GetCgroupMemoryStat(
#endif
}
+std::optional<i64> GetCgroupAnonymousMemoryLimit(
+ const TString& cgroupPath,
+ const TString& cgroupMountPoint)
+{
+#ifdef _linux_
+ TString path = cgroupMountPoint + "/memory" + cgroupPath + "/memory.anon.limit";
+ auto content = Trim(TUnbufferedFileInput(path).ReadAll(), "\n");
+ return FromString<i64>(content);
+#else
+ Y_UNUSED(cgroupPath, cgroupMountPoint);
+ return {};
+#endif
+}
+
THashMap<TString, i64> GetVmstat()
{
#ifdef _linux_
diff --git a/yt/yt/core/misc/proc.h b/yt/yt/core/misc/proc.h
index 2ae656326a..be90a2a16d 100644
--- a/yt/yt/core/misc/proc.h
+++ b/yt/yt/core/misc/proc.h
@@ -104,6 +104,11 @@ TCgroupMemoryStat GetCgroupMemoryStat(
const TString& cgroupPath,
const TString& cgroupMountPoint = "/sys/fs/cgroup");
+
+std::optional<i64> GetCgroupAnonymousMemoryLimit(
+ const TString& cgroupPath,
+ const TString& cgroupMountPoint = "/sys/fs/cgroup");
+
THashMap<TString, i64> GetVmstat();
ui64 GetProcessCumulativeMajorPageFaults(int pid = -1);
diff --git a/yt/yt/library/oom/tcmalloc_memory_limit_handler.cpp b/yt/yt/library/oom/tcmalloc_memory_limit_handler.cpp
index 1828ada3c9..38e7f267a8 100644
--- a/yt/yt/library/oom/tcmalloc_memory_limit_handler.cpp
+++ b/yt/yt/library/oom/tcmalloc_memory_limit_handler.cpp
@@ -204,7 +204,7 @@ private:
TString GetProfilePaths(const TString& timestamp) const
{
return Format(
- "%v/oom_profile_paths_%v.pb.gz",
+ "%v/oom_profile_paths_%v.yson",
Options_.HeapDumpDirectory,
timestamp);
}
diff --git a/yt/yt/library/profiling/resource_tracker/resource_tracker.cpp b/yt/yt/library/profiling/resource_tracker/resource_tracker.cpp
index 0b814383ac..09cf67a482 100644
--- a/yt/yt/library/profiling/resource_tracker/resource_tracker.cpp
+++ b/yt/yt/library/profiling/resource_tracker/resource_tracker.cpp
@@ -119,7 +119,10 @@ void TMemoryCgroupTracker::CollectSensors(ISensorWriter* writer)
writer->AddGauge("/dirty", stat.Dirty);
writer->AddGauge("/writeback", stat.Writeback);
- TotalMemoryLimit.store(stat.HierarchicalMemoryLimit);
+ TotalMemoryLimit_.store(stat.HierarchicalMemoryLimit);
+ AnonymousMemoryLimit_.store(SafeGetAnonymousMemoryLimit(
+ group.Path,
+ stat.HierarchicalMemoryLimit));
return;
}
@@ -133,9 +136,31 @@ void TMemoryCgroupTracker::CollectSensors(ISensorWriter* writer)
}
}
-i64 TMemoryCgroupTracker::GetTotalMemoryLimit()
+i64 TMemoryCgroupTracker::SafeGetAnonymousMemoryLimit(const TString& cgroupPath, i64 totalMemoryLimit)
{
- return TotalMemoryLimit.load();
+ try {
+ auto anonymousLimit = GetCgroupAnonymousMemoryLimit(cgroupPath);
+ auto result = anonymousLimit.value_or(totalMemoryLimit);
+ result = std::min(result, totalMemoryLimit);
+ return result != 0 ? result : totalMemoryLimit;
+ } catch (const std::exception& ex) {
+ if (!AnonymousLimitErrorLogged_) {
+ YT_LOG_INFO(ex, "Failed to collect cgroup anonymous memory limit");
+ AnonymousLimitErrorLogged_ = true;
+ }
+ }
+
+ return totalMemoryLimit;
+}
+
+i64 TMemoryCgroupTracker::GetTotalMemoryLimit() const
+{
+ return TotalMemoryLimit_.load();
+}
+
+i64 TMemoryCgroupTracker::GetAnonymousMemoryLimit() const
+{
+ return AnonymousMemoryLimit_.load();
}
TResourceTracker::TTimings TResourceTracker::TTimings::operator-(const TResourceTracker::TTimings& other) const
@@ -424,6 +449,11 @@ i64 TResourceTracker::GetTotalMemoryLimit()
return MemoryCgroupTracker_->GetTotalMemoryLimit();
}
+i64 TResourceTracker::GetAnonymousMemoryLimit()
+{
+ return MemoryCgroupTracker_->GetAnonymousMemoryLimit();
+}
+
TResourceTrackerPtr GetResourceTracker()
{
return LeakyRefCountedSingleton<TResourceTracker>();
diff --git a/yt/yt/library/profiling/resource_tracker/resource_tracker.h b/yt/yt/library/profiling/resource_tracker/resource_tracker.h
index 0bebe8a6c8..424c499ffc 100644
--- a/yt/yt/library/profiling/resource_tracker/resource_tracker.h
+++ b/yt/yt/library/profiling/resource_tracker/resource_tracker.h
@@ -34,12 +34,17 @@ class TMemoryCgroupTracker
public:
void CollectSensors(ISensorWriter* writer) override;
- i64 GetTotalMemoryLimit();
+ i64 GetTotalMemoryLimit() const;
+ i64 GetAnonymousMemoryLimit() const;
private:
bool CgroupErrorLogged_ = false;
+ bool AnonymousLimitErrorLogged_ = false;
+
+ std::atomic<i64> TotalMemoryLimit_ = 0;
+ std::atomic<i64> AnonymousMemoryLimit_ = 0;
- std::atomic<i64> TotalMemoryLimit = 0;
+ i64 SafeGetAnonymousMemoryLimit(const TString& cgroupPath, i64 totalMemoryLimit);
};
////////////////////////////////////////////////////////////////////////////////
@@ -57,6 +62,7 @@ public:
double GetCpuWait();
i64 GetTotalMemoryLimit();
+ i64 GetAnonymousMemoryLimit();
void CollectSensors(ISensorWriter* writer) override;
diff --git a/yt/yt/library/program/config.cpp b/yt/yt/library/program/config.cpp
index a52015ca29..a8765946b8 100644
--- a/yt/yt/library/program/config.cpp
+++ b/yt/yt/library/program/config.cpp
@@ -12,6 +12,8 @@ void THeapSizeLimitConfig::Register(TRegistrar registrar)
{
registrar.Parameter("container_memory_ratio", &TThis::ContainerMemoryRatio)
.Optional();
+ registrar.Parameter("container_memory_margin", &TThis::ContainerMemoryMargin)
+ .Optional();
registrar.Parameter("hard", &TThis::Hard)
.Default(false);
registrar.Parameter("dump_memory_profile_on_violation", &TThis::DumpMemoryProfileOnViolation)
diff --git a/yt/yt/library/program/config.h b/yt/yt/library/program/config.h
index c91f6bac1e..5e3aea6b40 100644
--- a/yt/yt/library/program/config.h
+++ b/yt/yt/library/program/config.h
@@ -43,7 +43,13 @@ public:
// If program heap size exceeds the limit tcmalloc is instructed to release memory to the kernel.
std::optional<double> ContainerMemoryRatio;
- //! If true tcmalloc crashes when system allocates more memory than #ContainerMemoryRatio.
+ //! Similar to #ContainerMemoryRatio, but is set in terms of absolute difference from
+ //! the container memory limit.
+ //! For example, if ContainerMemoryLimit=200Gb and ContainerMemoryMargin=1Gb
+ // then tcmalloc limit will be 199Gb.
+ std::optional<double> ContainerMemoryMargin;
+
+ //! If true tcmalloc crashes when system allocates more memory than #ContainerMemoryRatio/#ContainerMemoryMargin.
bool Hard;
bool DumpMemoryProfileOnViolation;
diff --git a/yt/yt/library/program/helpers.cpp b/yt/yt/library/program/helpers.cpp
index 606c1cc406..3b11d8bcb1 100644
--- a/yt/yt/library/program/helpers.cpp
+++ b/yt/yt/library/program/helpers.cpp
@@ -62,7 +62,7 @@ class TCMallocLimitsAdjuster
public:
void Adjust(const TTCMallocConfigPtr& config)
{
- i64 totalMemory = GetContainerMemoryLimit();
+ i64 totalMemory = GetAnonymousMemoryLimit();
AdjustPageHeapLimit(totalMemory, config);
AdjustAggressiveReleaseThreshold(totalMemory, config);
SetupMemoryLimitHandler(config);
@@ -120,27 +120,32 @@ private:
}
}
- i64 GetContainerMemoryLimit() const
+ i64 GetAnonymousMemoryLimit() const
{
auto resourceTracker = NProfiling::GetResourceTracker();
if (!resourceTracker) {
return 0;
}
- return resourceTracker->GetTotalMemoryLimit();
+ return resourceTracker->GetAnonymousMemoryLimit();
}
TAllocatorMemoryLimit ProposeHeapMemoryLimit(i64 totalMemory, const TTCMallocConfigPtr& config) const
{
- const auto& heapLimitConfig = config->HeapSizeLimit;
+ const auto& heapSizeConfig = config->HeapSizeLimit;
- if (totalMemory == 0 || !heapLimitConfig->ContainerMemoryRatio) {
+ if (totalMemory == 0 || !heapSizeConfig->ContainerMemoryRatio && !heapSizeConfig->ContainerMemoryMargin) {
return {};
}
TAllocatorMemoryLimit proposed;
- proposed.limit = *heapLimitConfig->ContainerMemoryRatio * totalMemory;
- proposed.hard = heapLimitConfig->Hard;
+ proposed.hard = heapSizeConfig->Hard;
+
+ if (heapSizeConfig->ContainerMemoryMargin) {
+ proposed.limit = totalMemory - *heapSizeConfig->ContainerMemoryMargin;
+ } else {
+ proposed.limit = *heapSizeConfig->ContainerMemoryRatio * totalMemory;
+ }
return proposed;
}