aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/ytalloc/api/ytalloc.h
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/ytalloc/api/ytalloc.h
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/ytalloc/api/ytalloc.h')
-rw-r--r--library/cpp/ytalloc/api/ytalloc.h416
1 files changed, 416 insertions, 0 deletions
diff --git a/library/cpp/ytalloc/api/ytalloc.h b/library/cpp/ytalloc/api/ytalloc.h
new file mode 100644
index 0000000000..d942dde638
--- /dev/null
+++ b/library/cpp/ytalloc/api/ytalloc.h
@@ -0,0 +1,416 @@
+#pragma once
+
+#include <stddef.h>
+
+#include <library/cpp/yt/misc/enum.h>
+
+#include <util/system/types.h>
+
+#include <util/generic/size_literals.h>
+
+#include <util/datetime/base.h>
+
+namespace NYT::NYTAlloc {
+
+////////////////////////////////////////////////////////////////////////////////
+// Macros
+
+#if defined(_linux_) && \
+ !defined(_asan_enabled_) && \
+ !defined(_msan_enabled_) && \
+ !defined(_tsan_enabled_)
+ #define YT_ALLOC_ENABLED
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// Constants
+
+constexpr int SmallRankCount = 23;
+constexpr int MinLargeRank = 15;
+constexpr int LargeRankCount = 30;
+constexpr size_t LargeAllocationSizeThreshold = 32_KB;
+constexpr size_t HugeAllocationSizeThreshold = 1ULL << (LargeRankCount - 1);
+constexpr size_t MaxAllocationSize = 1_TB;
+constexpr size_t PageSize = 4_KB;
+constexpr size_t RightReadableAreaSize = 16;
+
+////////////////////////////////////////////////////////////////////////////////
+// Allocation API
+
+// Allocates a chunk of memory of (at least) #size bytes.
+// The returned pointer is guaranteed to be 16-byte aligned.
+// Moreover, it is guaranteeed that #RightReadableAreaSize bytes immediately following
+// the allocated chunk are readable (but may belong to another allocated chunk).
+// This enables eliminating some nasty corner cases in SIMD memory manipulations.
+void* Allocate(size_t size);
+
+// Allocates a chunk of memory of (at least) #size bytes.
+// The returned pointer is guaranteed to be 4K-byte aligned.
+// #size, however, need not be divisible by page size (but internally it will be rounded up).
+void* AllocatePageAligned(size_t size);
+
+// An optimized version of #Allocate with #Size being known at compile-time.
+template <size_t Size>
+void* AllocateConstSize();
+
+// Frees a chunk of memory previously allocated via Allocate functions.
+// Does nothing if #ptr is null.
+void Free(void* ptr);
+
+// Similar to #Free but assumes that #ptr is not null.
+void FreeNonNull(void* ptr);
+
+// Returns the size of the chunk pointed to by #ptr.
+// This size is not guaranteed to be exactly equal to #size passed to allocation functions
+// due to rounding; the returned size, however, is never less than the latter size.
+// If #ptr is null or we are unable to determine the allocation size, then 0 is returned.
+size_t GetAllocationSize(const void* ptr);
+
+// Returns the size of the chunk that will actually be allocated
+// when requesting an allocation of given #size. This is never less than #size.
+size_t GetAllocationSize(size_t size);
+
+////////////////////////////////////////////////////////////////////////////////
+// Memory tagging API
+//
+// Each allocation can be tagged with a number (from 1 to MaxMemoryTag).
+// Setting this to NullMemoryTag disables tagging.
+// Internally, YTAlloc tracks the number of bytes used by each tag.
+//
+// Tagged allocations are somewhat slower. Others (large and huge) are not affected
+// (but for these performance implications are negligible anyway).
+//
+// The current memory tag used for allocations is stored in TLS.
+
+using TMemoryTag = ui32;
+constexpr TMemoryTag NullMemoryTag = 0;
+constexpr TMemoryTag MaxMemoryTag = (1ULL << 22) - 1;
+
+// Updates the current tag value in TLS.
+void SetCurrentMemoryTag(TMemoryTag tag);
+
+// Returns the current tag value from TLS.
+TMemoryTag GetCurrentMemoryTag();
+
+// Returns the memory usage for a given tag.
+// The value is somewhat approxiate and racy.
+size_t GetMemoryUsageForTag(TMemoryTag tag);
+
+// A batched version of GetMemoryUsageForTag.
+void GetMemoryUsageForTags(const TMemoryTag* tags, size_t count, size_t* results);
+
+////////////////////////////////////////////////////////////////////////////////
+// Memory zone API
+//
+// Each allocation is either in the "normal zone" or "undumpable zone".
+// The latter indicates that this memory region will be excluded from a coredump
+// should it happen.
+//
+// The current zone used for allocations is stored in TLS.
+
+// Memory zone is used to pass hint to the allocator.
+DEFINE_ENUM(EMemoryZone,
+ ((Unknown) (-1)) // not a valid zone
+ ((Normal) ( 0)) // default memory type
+ ((Undumpable) ( 1)) // memory is omitted from the core dump
+);
+
+// Updates the current zone in TLS.
+void SetCurrentMemoryZone(EMemoryZone zone);
+
+// Returns the current zone from TLS.
+EMemoryZone GetCurrentMemoryZone();
+
+// Returns the zone where #ptr resides;
+// EMemoryZone::Invalid indicates that #ptr is outside of any recognized memory zone.
+EMemoryZone GetAllocationMemoryZone(const void* ptr);
+
+////////////////////////////////////////////////////////////////////////////////
+// When a "timing event" (hiccup) occurs during an allocation,
+// YTAlloc records this event and captures the current fiber id.
+// The latter is provided externally by calling SetCurrentFiberId.
+//
+// This may be helpful to correlate various application-level timings
+// with internal events in YTAlloc.
+//
+// The current fiber id is stored in TLS.
+
+using TFiberId = ui64;
+
+// Updates the current fiber id in TLS.
+void SetCurrentFiberId(TFiberId id);
+
+// Returns the currently assinged fiber id from TLS.
+TFiberId GetCurrentFiberId();
+
+////////////////////////////////////////////////////////////////////////////////
+// Logging
+
+DEFINE_ENUM(ELogEventSeverity,
+ (Debug)
+ (Info)
+ (Warning)
+ (Error)
+);
+
+struct TLogEvent
+{
+ ELogEventSeverity Severity;
+ TStringBuf Message;
+};
+
+using TLogHandler = void(*)(const TLogEvent& event);
+
+// Sets the handler to be invoked for each log event produced by YTAlloc.
+// Can be called multiple times (but calls to the previous incarnations of the handler
+// are racy).
+void EnableLogging(TLogHandler logHandler);
+
+////////////////////////////////////////////////////////////////////////////////
+// Backtraces
+
+using TBacktraceProvider = int(*)(void** frames, int maxFrames, int skipFrames);
+
+// Sets the provider used for collecting backtraces when allocation profiling
+// is turned ON. Can be called multiple times (but calls to the previous
+// incarnations of the provider are racy).
+void SetBacktraceProvider(TBacktraceProvider provider);
+
+using TBacktraceFormatter = TString(*)(const void* const* frames, int frameCount);
+
+// Sets the callback used for formatting backtraces during large arena mmap calls
+// to help detect memory leaks. Can be called multiple times (but calls to the
+// previous incarnations of the provider are racy).
+void SetBacktraceFormatter(TBacktraceFormatter provider);
+
+////////////////////////////////////////////////////////////////////////////////
+// Misc
+
+//! Tries to mlock all opened file mappings of the current process.
+//! Typically invoked on application startup to lock all binaries in memory
+//! and prevent executable code and static data to be paged out
+//! causing latency spikes.
+void MlockFileMappings(bool populate = true);
+
+////////////////////////////////////////////////////////////////////////////////
+// Configuration API
+
+// Calling this function enables periodic calls to madvise(ADV_STOCKPILE);
+// cf. https://st.yandex-team.ru/KERNEL-186
+void EnableStockpile();
+
+// Sets the interval between madvise(ADV_STOCKPILE) calls.
+// Only makes sense if stockpile was enabled.
+void SetStockpileInterval(TDuration value);
+
+// Sets the number of threads to be invoking madvise(ADV_STOCKPILE).
+// This call should be made before calling #EnableStockpile.
+void SetStockpileThreadCount(int value);
+
+// Sets the size passsed to madvise(ADV_STOCKPILE) calls.
+// Only makes sense if stockpile was enabled.
+void SetStockpileSize(size_t value);
+
+// For large blobs, YTAlloc keeps at least
+// LargeUnreclaimableCoeff * TotalLargeBytesUsed clamped to range
+// [MinLargeUnreclaimableBytes, MaxLargeUnreclaimableBytes]
+// bytes of pooled (unreclaimable) memory.
+void SetLargeUnreclaimableCoeff(double value);
+void SetMinLargeUnreclaimableBytes(size_t value);
+void SetMaxLargeUnreclaimableBytes(size_t value);
+
+// When a syscall (mmap, munmap, or madvise) or an internal lock acquisition
+// takes longer then the configured time, a "timing event" is recorded.
+void SetTimingEventThreshold(TDuration value);
+
+// Toggles the global allocation profiling knob (OFF by default).
+// For profiled allocations, YTAlloc collects (see #SetBacktraceProvider) and aggregates their
+// backtraces.
+void SetAllocationProfilingEnabled(bool value);
+
+// Determines the fraction of allocations to be sampled for profiling.
+void SetAllocationProfilingSamplingRate(double rate);
+
+// Controls if small allocations of a given rank are profiled (OFF by default).
+void SetSmallArenaAllocationProfilingEnabled(size_t rank, bool value);
+
+// Controls if large allocations of a given rank are profiled (OFF by default).
+void SetLargeArenaAllocationProfilingEnabled(size_t rank, bool value);
+
+// Controls the depth of the backtraces to collect. Deeper backtraces
+// take more time and affect the program performance.
+void SetProfilingBacktraceDepth(int depth);
+
+// Controls the minimum number of bytes a certain backtrace must
+// allocate to appear in profiling reports.
+void SetMinProfilingBytesUsedToReport(size_t size);
+
+// If set to true (default), YTAlloc uses madvise with MADV_DONTNEED to release unused large blob pages
+// (slower but leads to more predicable RSS values);
+// if false then MADV_FREE is used instead, if available
+// (faster but RSS may get stuck arbitrary higher than the actual usage as long
+// as no memory pressure is applied).
+void SetEnableEagerMemoryRelease(bool value);
+
+// If set to true, YTAlloc uses madvise with MADV_POPULATE to prefault freshly acclaimed pages.
+// Otherwise (this is the default), these pages are prefaulted with linear memory access.
+// See https://st.yandex-team.ru/KERNEL-185.
+void SetEnableMadvisePopulate(bool value);
+
+////////////////////////////////////////////////////////////////////////////////
+// Statistics API
+
+DEFINE_ENUM(EBasicCounter,
+ (BytesAllocated)
+ (BytesFreed)
+ (BytesUsed)
+);
+
+using ESystemCounter = EBasicCounter;
+using ESmallCounter = EBasicCounter;
+using ELargeCounter = EBasicCounter;
+using EUndumpableCounter = EBasicCounter;
+
+DEFINE_ENUM(ESmallArenaCounter,
+ (PagesMapped)
+ (BytesMapped)
+ (PagesCommitted)
+ (BytesCommitted)
+);
+
+DEFINE_ENUM(ELargeArenaCounter,
+ (BytesSpare)
+ (BytesOverhead)
+ (BlobsAllocated)
+ (BlobsFreed)
+ (BlobsUsed)
+ (BytesAllocated)
+ (BytesFreed)
+ (BytesUsed)
+ (ExtentsAllocated)
+ (PagesMapped)
+ (BytesMapped)
+ (PagesPopulated)
+ (BytesPopulated)
+ (PagesReleased)
+ (BytesReleased)
+ (PagesCommitted)
+ (BytesCommitted)
+ (OverheadBytesReclaimed)
+ (SpareBytesReclaimed)
+);
+
+DEFINE_ENUM(EHugeCounter,
+ (BytesAllocated)
+ (BytesFreed)
+ (BytesUsed)
+ (BlobsAllocated)
+ (BlobsFreed)
+ (BlobsUsed)
+);
+
+DEFINE_ENUM(ETotalCounter,
+ (BytesAllocated)
+ (BytesFreed)
+ (BytesUsed)
+ (BytesCommitted)
+ (BytesUnaccounted)
+);
+
+// Returns statistics for all user allocations.
+TEnumIndexedVector<ETotalCounter, ssize_t> GetTotalAllocationCounters();
+
+// Returns statistics for small allocations; these are included into total statistics.
+TEnumIndexedVector<ESmallCounter, ssize_t> GetSmallAllocationCounters();
+
+// Returns statistics for large allocations; these are included into total statistics.
+TEnumIndexedVector<ELargeCounter, ssize_t> GetLargeAllocationCounters();
+
+// Returns per-arena statistics for small allocations; these are included into total statistics.
+std::array<TEnumIndexedVector<ESmallArenaCounter, ssize_t>, SmallRankCount> GetSmallArenaAllocationCounters();
+
+// Returns per-arena statistics for large allocations; these are included into total statistics.
+std::array<TEnumIndexedVector<ELargeArenaCounter, ssize_t>, LargeRankCount> GetLargeArenaAllocationCounters();
+
+// Returns statistics for huge allocations; these are included into total statistics.
+TEnumIndexedVector<EHugeCounter, ssize_t> GetHugeAllocationCounters();
+
+// Returns statistics for all system allocations; these are not included into total statistics.
+TEnumIndexedVector<ESystemCounter, ssize_t> GetSystemAllocationCounters();
+
+// Returns statistics for undumpable allocations.
+TEnumIndexedVector<EUndumpableCounter, ssize_t> GetUndumpableAllocationCounters();
+
+DEFINE_ENUM(ETimingEventType,
+ (Mmap)
+ (Munmap)
+ (MadvisePopulate)
+ (MadviseFree)
+ (MadviseDontNeed)
+ (Locking)
+ (Prefault)
+ (FilePrefault)
+);
+
+struct TTimingEventCounters
+{
+ // Number of events happened since start.
+ size_t Count = 0;
+ // Total size of memory blocks involved in these events (if applicable).
+ size_t Size = 0;
+};
+
+// Returns statistics for timing events happened since start.
+// See SetTimingEventThreshold.
+TEnumIndexedVector<ETimingEventType, TTimingEventCounters> GetTimingEventCounters();
+
+////////////////////////////////////////////////////////////////////////////////
+
+// We never collect backtraces deeper than this limit.
+constexpr int MaxAllocationProfilingBacktraceDepth = 16;
+
+struct TBacktrace
+{
+ int FrameCount;
+ std::array<void*, MaxAllocationProfilingBacktraceDepth> Frames;
+};
+
+struct TProfiledAllocation
+{
+ TBacktrace Backtrace;
+ TEnumIndexedVector<EBasicCounter, ssize_t> Counters;
+};
+
+// Returns statistics for profiled allocations (available when allocation
+// profiling is ON). Allocations are grouped by backtrace; for each backtrace
+// we provide the counters indicating the number of allocated, freed, and used bytes.
+// To appear here, used bytes counter must be at least the value configured
+// via SetMinProfilingBytesUsedToReport.
+std::vector<TProfiledAllocation> GetProfiledAllocationStatistics();
+
+////////////////////////////////////////////////////////////////////////////////
+
+//! An RAII guard for setting the current memory tag in a scope.
+class TMemoryTagGuard
+{
+public:
+ TMemoryTagGuard();
+ explicit TMemoryTagGuard(TMemoryTag tag);
+
+ TMemoryTagGuard(const TMemoryTagGuard& other) = delete;
+ TMemoryTagGuard(TMemoryTagGuard&& other);
+
+ ~TMemoryTagGuard();
+
+private:
+ bool Active_;
+ TMemoryTag PreviousTag_;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT::NYTAlloc
+
+#define YT_ALLOC_INL_H_
+#include "ytalloc-inl.h"
+#undef YT_ALLOC_INL_H_