aboutsummaryrefslogblamecommitdiffstats
path: root/library/cpp/ytalloc/api/ytalloc.h
blob: 38879074ddadfde5612d10d164352e581d6d3801 (plain) (tree)
1
2
3
4
5
6
7



                                     
                                                         










                                                                                

                                 



















































                                                                                             











































































                                                                                      




                                                                                 





                                                                                
                                             





























































































































                                                                                                       
                                                                       
                                                                                      
                                                                       
                                                                                      
                                                                       
                                                                                                
                                                                                                             
                                                                                                
                                                                                                             
                                                                                     
                                                                     
                                                                                               
                                                                         
                                                 
                                                                                 







                             
                  










                                                                            
                                                                                   













                                                                                
                                                       









                                                                                     



                            
#pragma once

#include <stddef.h>

#include <library/cpp/yt/misc/enum.h>

#include <library/cpp/yt/containers/enum_indexed_array.h>

#include <util/system/types.h>

#include <util/generic/size_literals.h>

#include <util/datetime/base.h>

namespace NYT::NYTAlloc {

////////////////////////////////////////////////////////////////////////////////
// Macros

#if defined(_linux_) && \
    !defined(_asan_enabled_) && \
    !defined(_msan_enabled_) && \
    !defined(_tsan_enabled_)
    #define YT_ALLOC_ENABLED
#endif

////////////////////////////////////////////////////////////////////////////////
// Constants

constexpr int SmallRankCount = 23;
constexpr int MinLargeRank = 15;
constexpr int LargeRankCount = 30;
constexpr size_t LargeAllocationSizeThreshold = 32_KB;
constexpr size_t HugeAllocationSizeThreshold = 1ULL << (LargeRankCount - 1);
constexpr size_t MaxAllocationSize = 1_TB;
constexpr size_t PageSize = 4_KB;
constexpr size_t RightReadableAreaSize = 16;

////////////////////////////////////////////////////////////////////////////////
// Allocation API

// Allocates a chunk of memory of (at least) #size bytes.
// The returned pointer is guaranteed to be 16-byte aligned.
// Moreover, it is guaranteeed that #RightReadableAreaSize bytes immediately following
// the allocated chunk are readable (but may belong to another allocated chunk).
// This enables eliminating some nasty corner cases in SIMD memory manipulations.
void* Allocate(size_t size);

// Allocates a chunk of memory of (at least) #size bytes.
// The returned pointer is guaranteed to be 4K-byte aligned.
// #size, however, need not be divisible by page size (but internally it will be rounded up).
void* AllocatePageAligned(size_t size);

// An optimized version of #Allocate with #Size being known at compile-time.
template <size_t Size>
void* AllocateConstSize();

// Frees a chunk of memory previously allocated via Allocate functions.
// Does nothing if #ptr is null.
void Free(void* ptr);

// Similar to #Free but assumes that #ptr is not null.
void FreeNonNull(void* ptr);

// Returns the size of the chunk pointed to by #ptr.
// This size is not guaranteed to be exactly equal to #size passed to allocation functions
// due to rounding; the returned size, however, is never less than the latter size.
// If #ptr is null or we are unable to determine the allocation size, then 0 is returned.
size_t GetAllocationSize(const void* ptr);

// Returns the size of the chunk that will actually be allocated
// when requesting an allocation of given #size. This is never less than #size.
size_t GetAllocationSize(size_t size);

////////////////////////////////////////////////////////////////////////////////
// Memory zone API
//
// Each allocation is either in the "normal zone" or "undumpable zone".
// The latter indicates that this memory region will be excluded from a coredump
// should it happen.
//
// The current zone used for allocations is stored in TLS.

// Memory zone is used to pass hint to the allocator.
DEFINE_ENUM(EMemoryZone,
    ((Unknown)    (-1)) // not a valid zone
    ((Normal)     ( 0)) // default memory type
    ((Undumpable) ( 1)) // memory is omitted from the core dump
);

// Updates the current zone in TLS.
void SetCurrentMemoryZone(EMemoryZone zone);

// Returns the current zone from TLS.
EMemoryZone GetCurrentMemoryZone();

// Returns the zone where #ptr resides;
// EMemoryZone::Invalid indicates that #ptr is outside of any recognized memory zone.
EMemoryZone GetAllocationMemoryZone(const void* ptr);

////////////////////////////////////////////////////////////////////////////////
// When a "timing event" (hiccup) occurs during an allocation,
// YTAlloc records this event and captures the current fiber id.
// The latter is provided externally by calling SetCurrentFiberId.
//
// This may be helpful to correlate various application-level timings
// with internal events in YTAlloc.
//
// The current fiber id is stored in TLS.

using TFiberId = ui64;

// Updates the current fiber id in TLS.
void SetCurrentFiberId(TFiberId id);

// Returns the currently assinged fiber id from TLS.
TFiberId GetCurrentFiberId();

////////////////////////////////////////////////////////////////////////////////
// Logging

DEFINE_ENUM(ELogEventSeverity,
    (Debug)
    (Info)
    (Warning)
    (Error)
);

struct TLogEvent
{
    ELogEventSeverity Severity;
    TStringBuf Message;
};

using TLogHandler = void(*)(const TLogEvent& event);

// Sets the handler to be invoked for each log event produced by YTAlloc.
// Can be called multiple times (but calls to the previous incarnations of the handler
// are racy).
void EnableLogging(TLogHandler logHandler);

////////////////////////////////////////////////////////////////////////////////
// Backtraces

using TBacktraceProvider = int(*)(void** frames, int maxFrames, int skipFrames);

// Sets the provider used for collecting backtraces when allocation profiling
// is turned ON. Can be called multiple times (but calls to the previous
// incarnations of the provider are racy).
void SetBacktraceProvider(TBacktraceProvider provider);

using TBacktraceFormatter = TString(*)(const void* const* frames, int frameCount);

// Sets the callback used for formatting backtraces during large arena mmap calls
// to help detect memory leaks. Can be called multiple times (but calls to the
// previous incarnations of the provider are racy).
void SetBacktraceFormatter(TBacktraceFormatter provider);

////////////////////////////////////////////////////////////////////////////////
// Misc

//! Tries to mlock all opened file mappings of the current process.
//! Typically invoked on application startup to lock all binaries in memory
//! and prevent executable code and static data to be paged out
//! causing latency spikes.
void MlockFileMappings(bool populate = true);

////////////////////////////////////////////////////////////////////////////////
// Configuration API

// Calling this function enables periodic calls to madvise(ADV_STOCKPILE);
// cf. https://st.yandex-team.ru/KERNEL-186
void EnableStockpile();

// Sets the interval between madvise(ADV_STOCKPILE) calls.
// Only makes sense if stockpile was enabled.
void SetStockpileInterval(TDuration value);

// Sets the number of threads to be invoking madvise(ADV_STOCKPILE).
// This call should be made before calling #EnableStockpile.
void SetStockpileThreadCount(int value);

// Sets the size passsed to madvise(ADV_STOCKPILE) calls.
// Only makes sense if stockpile was enabled.
void SetStockpileSize(size_t value);

// For large blobs, YTAlloc keeps at least
// LargeUnreclaimableCoeff * TotalLargeBytesUsed clamped to range
// [MinLargeUnreclaimableBytes, MaxLargeUnreclaimableBytes]
// bytes of pooled (unreclaimable) memory.
void SetLargeUnreclaimableCoeff(double value);
void SetMinLargeUnreclaimableBytes(size_t value);
void SetMaxLargeUnreclaimableBytes(size_t value);

// When a syscall (mmap, munmap, or madvise) or an internal lock acquisition
// takes longer then the configured time, a "timing event" is recorded.
void SetTimingEventThreshold(TDuration value);

// Toggles the global allocation profiling knob (OFF by default).
// For profiled allocations, YTAlloc collects (see #SetBacktraceProvider) and aggregates their
// backtraces.
void SetAllocationProfilingEnabled(bool value);

// Determines the fraction of allocations to be sampled for profiling.
void SetAllocationProfilingSamplingRate(double rate);

// Controls if small allocations of a given rank are profiled (OFF by default).
void SetSmallArenaAllocationProfilingEnabled(size_t rank, bool value);

// Controls if large allocations of a given rank are profiled (OFF by default).
void SetLargeArenaAllocationProfilingEnabled(size_t rank, bool value);

// Controls the depth of the backtraces to collect. Deeper backtraces
// take more time and affect the program performance.
void SetProfilingBacktraceDepth(int depth);

// Controls the minimum number of bytes a certain backtrace must
// allocate to appear in profiling reports.
void SetMinProfilingBytesUsedToReport(size_t size);

// If set to true (default), YTAlloc uses madvise with MADV_DONTNEED to release unused large blob pages
// (slower but leads to more predicable RSS values);
// if false then MADV_FREE is used instead, if available
// (faster but RSS may get stuck arbitrary higher than the actual usage as long
// as no memory pressure is applied).
void SetEnableEagerMemoryRelease(bool value);

// If set to true, YTAlloc uses madvise with MADV_POPULATE to prefault freshly acclaimed pages.
// Otherwise (this is the default), these pages are prefaulted with linear memory access.
// See https://st.yandex-team.ru/KERNEL-185.
void SetEnableMadvisePopulate(bool value);

////////////////////////////////////////////////////////////////////////////////
// Statistics API

DEFINE_ENUM(EBasicCounter,
    (BytesAllocated)
    (BytesFreed)
    (BytesUsed)
);

using ESystemCounter = EBasicCounter;
using ESmallCounter = EBasicCounter;
using ELargeCounter = EBasicCounter;
using EUndumpableCounter = EBasicCounter;

DEFINE_ENUM(ESmallArenaCounter,
    (PagesMapped)
    (BytesMapped)
    (PagesCommitted)
    (BytesCommitted)
);

DEFINE_ENUM(ELargeArenaCounter,
    (BytesSpare)
    (BytesOverhead)
    (BlobsAllocated)
    (BlobsFreed)
    (BlobsUsed)
    (BytesAllocated)
    (BytesFreed)
    (BytesUsed)
    (ExtentsAllocated)
    (PagesMapped)
    (BytesMapped)
    (PagesPopulated)
    (BytesPopulated)
    (PagesReleased)
    (BytesReleased)
    (PagesCommitted)
    (BytesCommitted)
    (OverheadBytesReclaimed)
    (SpareBytesReclaimed)
);

DEFINE_ENUM(EHugeCounter,
    (BytesAllocated)
    (BytesFreed)
    (BytesUsed)
    (BlobsAllocated)
    (BlobsFreed)
    (BlobsUsed)
);

DEFINE_ENUM(ETotalCounter,
    (BytesAllocated)
    (BytesFreed)
    (BytesUsed)
    (BytesCommitted)
    (BytesUnaccounted)
);

// Returns statistics for all user allocations.
TEnumIndexedArray<ETotalCounter, ssize_t> GetTotalAllocationCounters();

// Returns statistics for small allocations; these are included into total statistics.
TEnumIndexedArray<ESmallCounter, ssize_t> GetSmallAllocationCounters();

// Returns statistics for large allocations; these are included into total statistics.
TEnumIndexedArray<ELargeCounter, ssize_t> GetLargeAllocationCounters();

// Returns per-arena statistics for small allocations; these are included into total statistics.
std::array<TEnumIndexedArray<ESmallArenaCounter, ssize_t>, SmallRankCount> GetSmallArenaAllocationCounters();

// Returns per-arena statistics for large allocations; these are included into total statistics.
std::array<TEnumIndexedArray<ELargeArenaCounter, ssize_t>, LargeRankCount> GetLargeArenaAllocationCounters();

// Returns statistics for huge allocations; these are included into total statistics.
TEnumIndexedArray<EHugeCounter, ssize_t> GetHugeAllocationCounters();

// Returns statistics for all system allocations; these are not included into total statistics.
TEnumIndexedArray<ESystemCounter, ssize_t> GetSystemAllocationCounters();

// Returns statistics for undumpable allocations.
TEnumIndexedArray<EUndumpableCounter, ssize_t> GetUndumpableAllocationCounters();

DEFINE_ENUM(ETimingEventType,
    (Mmap)
    (Munmap)
    (MadvisePopulate)
    (MadviseFree)
    (MadviseDontNeed)
    (Locking)
    (Prefault)
    (FilePrefault)
);

struct TTimingEventCounters
{
    // Number of events happened since start.
    size_t Count = 0;
    // Total size of memory blocks involved in these events (if applicable).
    size_t Size = 0;
};

// Returns statistics for timing events happened since start.
// See SetTimingEventThreshold.
TEnumIndexedArray<ETimingEventType, TTimingEventCounters> GetTimingEventCounters();

////////////////////////////////////////////////////////////////////////////////

// We never collect backtraces deeper than this limit.
constexpr int MaxAllocationProfilingBacktraceDepth = 16;

struct TBacktrace
{
    int FrameCount;
    std::array<void*, MaxAllocationProfilingBacktraceDepth> Frames;
};

struct TProfiledAllocation
{
    TBacktrace Backtrace;
    TEnumIndexedArray<EBasicCounter, ssize_t> Counters;
};

// Returns statistics for profiled allocations (available when allocation
// profiling is ON). Allocations are grouped by backtrace; for each backtrace
// we provide the counters indicating the number of allocated, freed, and used bytes.
// To appear here, used bytes counter must be at least the value configured
// via SetMinProfilingBytesUsedToReport.
std::vector<TProfiledAllocation> GetProfiledAllocationStatistics();

////////////////////////////////////////////////////////////////////////////////

} // namespace NYT::NYTAlloc

#define YT_ALLOC_INL_H_
#include "ytalloc-inl.h"
#undef YT_ALLOC_INL_H_