1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
|
#pragma once
#include <stddef.h>
#include <library/cpp/yt/misc/enum.h>
#include <library/cpp/yt/containers/enum_indexed_array.h>
#include <util/system/types.h>
#include <util/generic/size_literals.h>
#include <util/datetime/base.h>
namespace NYT::NYTAlloc {
////////////////////////////////////////////////////////////////////////////////
// Macros
#if defined(_linux_) && \
!defined(_asan_enabled_) && \
!defined(_msan_enabled_) && \
!defined(_tsan_enabled_)
#define YT_ALLOC_ENABLED
#endif
////////////////////////////////////////////////////////////////////////////////
// Constants
constexpr int SmallRankCount = 23;
constexpr int MinLargeRank = 15;
constexpr int LargeRankCount = 30;
constexpr size_t LargeAllocationSizeThreshold = 32_KB;
constexpr size_t HugeAllocationSizeThreshold = 1ULL << (LargeRankCount - 1);
constexpr size_t MaxAllocationSize = 1_TB;
constexpr size_t PageSize = 4_KB;
constexpr size_t RightReadableAreaSize = 16;
////////////////////////////////////////////////////////////////////////////////
// Allocation API
// Allocates a chunk of memory of (at least) #size bytes.
// The returned pointer is guaranteed to be 16-byte aligned.
// Moreover, it is guaranteeed that #RightReadableAreaSize bytes immediately following
// the allocated chunk are readable (but may belong to another allocated chunk).
// This enables eliminating some nasty corner cases in SIMD memory manipulations.
void* Allocate(size_t size);
// Allocates a chunk of memory of (at least) #size bytes.
// The returned pointer is guaranteed to be 4K-byte aligned.
// #size, however, need not be divisible by page size (but internally it will be rounded up).
void* AllocatePageAligned(size_t size);
// An optimized version of #Allocate with #Size being known at compile-time.
template <size_t Size>
void* AllocateConstSize();
// Frees a chunk of memory previously allocated via Allocate functions.
// Does nothing if #ptr is null.
void Free(void* ptr);
// Similar to #Free but assumes that #ptr is not null.
void FreeNonNull(void* ptr);
// Returns the size of the chunk pointed to by #ptr.
// This size is not guaranteed to be exactly equal to #size passed to allocation functions
// due to rounding; the returned size, however, is never less than the latter size.
// If #ptr is null or we are unable to determine the allocation size, then 0 is returned.
size_t GetAllocationSize(const void* ptr);
// Returns the size of the chunk that will actually be allocated
// when requesting an allocation of given #size. This is never less than #size.
size_t GetAllocationSize(size_t size);
////////////////////////////////////////////////////////////////////////////////
// Memory zone API
//
// Each allocation is either in the "normal zone" or "undumpable zone".
// The latter indicates that this memory region will be excluded from a coredump
// should it happen.
//
// The current zone used for allocations is stored in TLS.
// Memory zone is used to pass hint to the allocator.
DEFINE_ENUM(EMemoryZone,
((Unknown) (-1)) // not a valid zone
((Normal) ( 0)) // default memory type
((Undumpable) ( 1)) // memory is omitted from the core dump
);
// Updates the current zone in TLS.
void SetCurrentMemoryZone(EMemoryZone zone);
// Returns the current zone from TLS.
EMemoryZone GetCurrentMemoryZone();
// Returns the zone where #ptr resides;
// EMemoryZone::Invalid indicates that #ptr is outside of any recognized memory zone.
EMemoryZone GetAllocationMemoryZone(const void* ptr);
////////////////////////////////////////////////////////////////////////////////
// When a "timing event" (hiccup) occurs during an allocation,
// YTAlloc records this event and captures the current fiber id.
// The latter is provided externally by calling SetCurrentFiberId.
//
// This may be helpful to correlate various application-level timings
// with internal events in YTAlloc.
//
// The current fiber id is stored in TLS.
using TFiberId = ui64;
// Updates the current fiber id in TLS.
void SetCurrentFiberId(TFiberId id);
// Returns the currently assinged fiber id from TLS.
TFiberId GetCurrentFiberId();
////////////////////////////////////////////////////////////////////////////////
// Logging
DEFINE_ENUM(ELogEventSeverity,
(Debug)
(Info)
(Warning)
(Error)
);
struct TLogEvent
{
ELogEventSeverity Severity;
TStringBuf Message;
};
using TLogHandler = void(*)(const TLogEvent& event);
// Sets the handler to be invoked for each log event produced by YTAlloc.
// Can be called multiple times (but calls to the previous incarnations of the handler
// are racy).
void EnableLogging(TLogHandler logHandler);
////////////////////////////////////////////////////////////////////////////////
// Backtraces
using TBacktraceProvider = int(*)(void** frames, int maxFrames, int skipFrames);
// Sets the provider used for collecting backtraces when allocation profiling
// is turned ON. Can be called multiple times (but calls to the previous
// incarnations of the provider are racy).
void SetBacktraceProvider(TBacktraceProvider provider);
using TBacktraceFormatter = TString(*)(const void* const* frames, int frameCount);
// Sets the callback used for formatting backtraces during large arena mmap calls
// to help detect memory leaks. Can be called multiple times (but calls to the
// previous incarnations of the provider are racy).
void SetBacktraceFormatter(TBacktraceFormatter provider);
////////////////////////////////////////////////////////////////////////////////
// Misc
//! Tries to mlock all opened file mappings of the current process.
//! Typically invoked on application startup to lock all binaries in memory
//! and prevent executable code and static data to be paged out
//! causing latency spikes.
void MlockFileMappings(bool populate = true);
////////////////////////////////////////////////////////////////////////////////
// Configuration API
// Calling this function enables periodic calls to madvise(ADV_STOCKPILE);
// cf. https://st.yandex-team.ru/KERNEL-186
void EnableStockpile();
// Sets the interval between madvise(ADV_STOCKPILE) calls.
// Only makes sense if stockpile was enabled.
void SetStockpileInterval(TDuration value);
// Sets the number of threads to be invoking madvise(ADV_STOCKPILE).
// This call should be made before calling #EnableStockpile.
void SetStockpileThreadCount(int value);
// Sets the size passsed to madvise(ADV_STOCKPILE) calls.
// Only makes sense if stockpile was enabled.
void SetStockpileSize(size_t value);
// For large blobs, YTAlloc keeps at least
// LargeUnreclaimableCoeff * TotalLargeBytesUsed clamped to range
// [MinLargeUnreclaimableBytes, MaxLargeUnreclaimableBytes]
// bytes of pooled (unreclaimable) memory.
void SetLargeUnreclaimableCoeff(double value);
void SetMinLargeUnreclaimableBytes(size_t value);
void SetMaxLargeUnreclaimableBytes(size_t value);
// When a syscall (mmap, munmap, or madvise) or an internal lock acquisition
// takes longer then the configured time, a "timing event" is recorded.
void SetTimingEventThreshold(TDuration value);
// Toggles the global allocation profiling knob (OFF by default).
// For profiled allocations, YTAlloc collects (see #SetBacktraceProvider) and aggregates their
// backtraces.
void SetAllocationProfilingEnabled(bool value);
// Determines the fraction of allocations to be sampled for profiling.
void SetAllocationProfilingSamplingRate(double rate);
// Controls if small allocations of a given rank are profiled (OFF by default).
void SetSmallArenaAllocationProfilingEnabled(size_t rank, bool value);
// Controls if large allocations of a given rank are profiled (OFF by default).
void SetLargeArenaAllocationProfilingEnabled(size_t rank, bool value);
// Controls the depth of the backtraces to collect. Deeper backtraces
// take more time and affect the program performance.
void SetProfilingBacktraceDepth(int depth);
// Controls the minimum number of bytes a certain backtrace must
// allocate to appear in profiling reports.
void SetMinProfilingBytesUsedToReport(size_t size);
// If set to true (default), YTAlloc uses madvise with MADV_DONTNEED to release unused large blob pages
// (slower but leads to more predicable RSS values);
// if false then MADV_FREE is used instead, if available
// (faster but RSS may get stuck arbitrary higher than the actual usage as long
// as no memory pressure is applied).
void SetEnableEagerMemoryRelease(bool value);
// If set to true, YTAlloc uses madvise with MADV_POPULATE to prefault freshly acclaimed pages.
// Otherwise (this is the default), these pages are prefaulted with linear memory access.
// See https://st.yandex-team.ru/KERNEL-185.
void SetEnableMadvisePopulate(bool value);
////////////////////////////////////////////////////////////////////////////////
// Statistics API
DEFINE_ENUM(EBasicCounter,
(BytesAllocated)
(BytesFreed)
(BytesUsed)
);
using ESystemCounter = EBasicCounter;
using ESmallCounter = EBasicCounter;
using ELargeCounter = EBasicCounter;
using EUndumpableCounter = EBasicCounter;
DEFINE_ENUM(ESmallArenaCounter,
(PagesMapped)
(BytesMapped)
(PagesCommitted)
(BytesCommitted)
);
DEFINE_ENUM(ELargeArenaCounter,
(BytesSpare)
(BytesOverhead)
(BlobsAllocated)
(BlobsFreed)
(BlobsUsed)
(BytesAllocated)
(BytesFreed)
(BytesUsed)
(ExtentsAllocated)
(PagesMapped)
(BytesMapped)
(PagesPopulated)
(BytesPopulated)
(PagesReleased)
(BytesReleased)
(PagesCommitted)
(BytesCommitted)
(OverheadBytesReclaimed)
(SpareBytesReclaimed)
);
DEFINE_ENUM(EHugeCounter,
(BytesAllocated)
(BytesFreed)
(BytesUsed)
(BlobsAllocated)
(BlobsFreed)
(BlobsUsed)
);
DEFINE_ENUM(ETotalCounter,
(BytesAllocated)
(BytesFreed)
(BytesUsed)
(BytesCommitted)
(BytesUnaccounted)
);
// Returns statistics for all user allocations.
TEnumIndexedArray<ETotalCounter, ssize_t> GetTotalAllocationCounters();
// Returns statistics for small allocations; these are included into total statistics.
TEnumIndexedArray<ESmallCounter, ssize_t> GetSmallAllocationCounters();
// Returns statistics for large allocations; these are included into total statistics.
TEnumIndexedArray<ELargeCounter, ssize_t> GetLargeAllocationCounters();
// Returns per-arena statistics for small allocations; these are included into total statistics.
std::array<TEnumIndexedArray<ESmallArenaCounter, ssize_t>, SmallRankCount> GetSmallArenaAllocationCounters();
// Returns per-arena statistics for large allocations; these are included into total statistics.
std::array<TEnumIndexedArray<ELargeArenaCounter, ssize_t>, LargeRankCount> GetLargeArenaAllocationCounters();
// Returns statistics for huge allocations; these are included into total statistics.
TEnumIndexedArray<EHugeCounter, ssize_t> GetHugeAllocationCounters();
// Returns statistics for all system allocations; these are not included into total statistics.
TEnumIndexedArray<ESystemCounter, ssize_t> GetSystemAllocationCounters();
// Returns statistics for undumpable allocations.
TEnumIndexedArray<EUndumpableCounter, ssize_t> GetUndumpableAllocationCounters();
DEFINE_ENUM(ETimingEventType,
(Mmap)
(Munmap)
(MadvisePopulate)
(MadviseFree)
(MadviseDontNeed)
(Locking)
(Prefault)
(FilePrefault)
);
struct TTimingEventCounters
{
// Number of events happened since start.
size_t Count = 0;
// Total size of memory blocks involved in these events (if applicable).
size_t Size = 0;
};
// Returns statistics for timing events happened since start.
// See SetTimingEventThreshold.
TEnumIndexedArray<ETimingEventType, TTimingEventCounters> GetTimingEventCounters();
////////////////////////////////////////////////////////////////////////////////
// We never collect backtraces deeper than this limit.
constexpr int MaxAllocationProfilingBacktraceDepth = 16;
struct TBacktrace
{
int FrameCount;
std::array<void*, MaxAllocationProfilingBacktraceDepth> Frames;
};
struct TProfiledAllocation
{
TBacktrace Backtrace;
TEnumIndexedArray<EBasicCounter, ssize_t> Counters;
};
// Returns statistics for profiled allocations (available when allocation
// profiling is ON). Allocations are grouped by backtrace; for each backtrace
// we provide the counters indicating the number of allocated, freed, and used bytes.
// To appear here, used bytes counter must be at least the value configured
// via SetMinProfilingBytesUsedToReport.
std::vector<TProfiledAllocation> GetProfiledAllocationStatistics();
////////////////////////////////////////////////////////////////////////////////
} // namespace NYT::NYTAlloc
#define YT_ALLOC_INL_H_
#include "ytalloc-inl.h"
#undef YT_ALLOC_INL_H_
|