1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
|
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2020, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..)
and the segment and huge object allocation by mimalloc. There may be multiple
implementations of this (one could be the identity going directly to the OS,
another could be a simple cache etc), but the current one uses large "regions".
In contrast to the rest of mimalloc, the "regions" are shared between threads and
need to be accessed using atomic operations.
We need this memory layer between the raw OS calls because of:
1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order
to reuse memory effectively.
2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of
an OS allocation/free is still (much) too expensive relative to the accesses
in that object :-( (`malloc-large` tests this). This means we need a cheaper
way to reuse memory.
3. This layer allows for NUMA aware allocation.
Possible issues:
- (2) can potentially be addressed too with a small cache per thread which is much
simpler. Generally though that requires shrinking of huge pages, and may overuse
memory per thread. (and is not compatible with `sbrk`).
- Since the current regions are per-process, we need atomic operations to
claim blocks which may be contended
- In the worst case, we need to search the whole region map (16KiB for 256GiB)
linearly. At what point will direct OS calls be faster? Is there a way to
do this better without adding too much complexity?
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc-internal.h"
#include "mimalloc-atomic.h"
#include <string.h> // memset
#include "bitmap.h"
// Internal raw OS interface
size_t _mi_os_large_page_size();
bool _mi_os_protect(void* addr, size_t size);
bool _mi_os_unprotect(void* addr, size_t size);
bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
// arena.c
void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_stats_t* stats);
void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
// Constants
#if (MI_INTPTR_SIZE==8)
#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 64KiB for the region map
#elif (MI_INTPTR_SIZE==4)
#define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // ~ KiB for the region map
#else
#error "define the maximum heap space allowed for regions on this platform"
#endif
#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE
#define MI_REGION_MAX_BLOCKS MI_BITMAP_FIELD_BITS
#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits)
#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits)
#define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB
#define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE)
// Region info
typedef union mi_region_info_u {
uintptr_t value;
struct {
bool valid; // initialized?
bool is_large:1; // allocated in fixed large/huge OS pages
bool is_pinned:1; // pinned memory cannot be decommitted
short numa_node; // the associated NUMA node (where -1 means no associated node)
} x;
} mi_region_info_t;
// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
typedef struct mem_region_s {
_Atomic(uintptr_t) info; // mi_region_info_t.value
_Atomic(void*) start; // start of the memory area
mi_bitmap_field_t in_use; // bit per in-use block
mi_bitmap_field_t dirty; // track if non-zero per block
mi_bitmap_field_t commit; // track if committed per block
mi_bitmap_field_t reset; // track if reset per block
_Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena
uintptr_t padding; // round to 8 fields
} mem_region_t;
// The region map
static mem_region_t regions[MI_REGION_MAX];
// Allocated regions
static _Atomic(uintptr_t) regions_count; // = 0;
/* ----------------------------------------------------------------------------
Utility functions
-----------------------------------------------------------------------------*/
// Blocks (of 4MiB) needed for the given size.
static size_t mi_region_block_count(size_t size) {
return _mi_divide_up(size, MI_SEGMENT_SIZE);
}
/*
// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones.
static size_t mi_good_commit_size(size_t size) {
if (size > (SIZE_MAX - _mi_os_large_page_size())) return size;
return _mi_align_up(size, _mi_os_large_page_size());
}
*/
// Return if a pointer points into a region reserved by us.
bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
if (p==NULL) return false;
size_t count = mi_atomic_load_relaxed(®ions_count);
for (size_t i = 0; i < count; i++) {
uint8_t* start = (uint8_t*)mi_atomic_load_ptr_relaxed(uint8_t, ®ions[i].start);
if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true;
}
return false;
}
static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) {
uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start);
mi_assert_internal(start != NULL);
return (start + (bit_idx * MI_SEGMENT_SIZE));
}
static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) {
mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS);
size_t idx = region - regions;
mi_assert_internal(®ions[idx] == region);
return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1;
}
static size_t mi_memid_create_from_arena(size_t arena_memid) {
return (arena_memid << 1) | 1;
}
static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) {
if ((id&1)==1) {
if (arena_memid != NULL) *arena_memid = (id>>1);
return true;
}
else {
size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS;
*bit_idx = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS;
*region = ®ions[idx];
return false;
}
}
/* ----------------------------------------------------------------------------
Allocate a region is allocated from the OS (or an arena)
-----------------------------------------------------------------------------*/
static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
{
// not out of regions yet?
if (mi_atomic_load_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false;
// try to allocate a fresh region from the OS
bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit));
bool region_large = (commit && allow_large);
bool is_zero = false;
bool is_pinned = false;
size_t arena_memid = 0;
void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_pinned, &is_zero, &arena_memid, tld);
if (start == NULL) return false;
mi_assert_internal(!(region_large && !allow_large));
mi_assert_internal(!region_large || region_commit);
// claim a fresh slot
const uintptr_t idx = mi_atomic_increment_acq_rel(®ions_count);
if (idx >= MI_REGION_MAX) {
mi_atomic_decrement_acq_rel(®ions_count);
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, region_commit, tld->stats);
_mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, GiB));
return false;
}
// allocated, initialize and claim the initial blocks
mem_region_t* r = ®ions[idx];
r->arena_memid = arena_memid;
mi_atomic_store_release(&r->in_use, (uintptr_t)0);
mi_atomic_store_release(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL));
mi_atomic_store_release(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0));
mi_atomic_store_release(&r->reset, (uintptr_t)0);
*bit_idx = 0;
_mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL);
mi_atomic_store_ptr_release(void,&r->start, start);
// and share it
mi_region_info_t info;
info.value = 0; // initialize the full union to zero
info.x.valid = true;
info.x.is_large = region_large;
info.x.is_pinned = is_pinned;
info.x.numa_node = (short)_mi_os_numa_node(tld);
mi_atomic_store_release(&r->info, info.value); // now make it available to others
*region = r;
return true;
}
/* ----------------------------------------------------------------------------
Try to claim blocks in suitable regions
-----------------------------------------------------------------------------*/
static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) {
// initialized at all?
mi_region_info_t info;
info.value = mi_atomic_load_relaxed(&((mem_region_t*)region)->info);
if (info.value==0) return false;
// numa correct
if (numa_node >= 0) { // use negative numa node to always succeed
int rnode = info.x.numa_node;
if (rnode >= 0 && rnode != numa_node) return false;
}
// check allow-large
if (!allow_large && info.x.is_large) return false;
return true;
}
static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
{
// try all regions for a free slot
const size_t count = mi_atomic_load_relaxed(®ions_count); // monotonic, so ok to be relaxed
size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though
for (size_t visited = 0; visited < count; visited++, idx++) {
if (idx >= count) idx = 0; // wrap around
mem_region_t* r = ®ions[idx];
// if this region suits our demand (numa node matches, large OS page matches)
if (mi_region_is_suitable(r, numa_node, allow_large)) {
// then try to atomically claim a segment(s) in this region
if (_mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) {
tld->region_idx = idx; // remember the last found position
*region = r;
return true;
}
}
}
return false;
}
static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
{
mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS);
mem_region_t* region;
mi_bitmap_index_t bit_idx;
const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
// try to claim in existing regions
if (!mi_region_try_claim(numa_node, blocks, *large, ®ion, &bit_idx, tld)) {
// otherwise try to allocate a fresh region and claim in there
if (!mi_region_try_alloc_os(blocks, *commit, *large, ®ion, &bit_idx, tld)) {
// out of regions or memory
return NULL;
}
}
// ------------------------------------------------
// found a region and claimed `blocks` at `bit_idx`, initialize them now
mi_assert_internal(region != NULL);
mi_assert_internal(_mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx));
mi_region_info_t info;
info.value = mi_atomic_load_acquire(®ion->info);
uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ion->start);
mi_assert_internal(!(info.x.is_large && !*large));
mi_assert_internal(start != NULL);
*is_zero = _mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, NULL);
*large = info.x.is_large;
*is_pinned = info.x.is_pinned;
*memid = mi_memid_create(region, bit_idx);
void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE);
// commit
if (*commit) {
// ensure commit
bool any_uncommitted;
_mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_uncommitted);
if (any_uncommitted) {
mi_assert_internal(!info.x.is_large && !info.x.is_pinned);
bool commit_zero = false;
if (!_mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld)) {
// failed to commit! unclaim and return
mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx);
return NULL;
}
if (commit_zero) *is_zero = true;
}
}
else {
// no need to commit, but check if already fully committed
*commit = _mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx);
}
mi_assert_internal(!*commit || _mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx));
// unreset reset blocks
if (_mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) {
// some blocks are still reset
mi_assert_internal(!info.x.is_large && !info.x.is_pinned);
mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0);
mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx);
if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed
bool reset_zero = false;
_mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld);
if (reset_zero) *is_zero = true;
}
}
mi_assert_internal(!_mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx));
#if (MI_DEBUG>=2)
if (*commit) { ((uint8_t*)p)[0] = 0; }
#endif
// and return the allocation
mi_assert_internal(p != NULL);
return p;
}
/* ----------------------------------------------------------------------------
Allocation
-----------------------------------------------------------------------------*/
// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`.
// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`)
void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
{
mi_assert_internal(memid != NULL && tld != NULL);
mi_assert_internal(size > 0);
*memid = 0;
*is_zero = false;
*is_pinned = false;
bool default_large = false;
if (large==NULL) large = &default_large; // ensure `large != NULL`
if (size == 0) return NULL;
size = _mi_align_up(size, _mi_os_page_size());
// allocate from regions if possible
void* p = NULL;
size_t arena_memid;
const size_t blocks = mi_region_block_count(size);
if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) {
p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld);
if (p == NULL) {
_mi_warning_message("unable to allocate from region: size %zu\n", size);
}
}
if (p == NULL) {
// and otherwise fall back to the OS
p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_pinned, is_zero, &arena_memid, tld);
*memid = mi_memid_create_from_arena(arena_memid);
}
if (p != NULL) {
mi_assert_internal((uintptr_t)p % alignment == 0);
#if (MI_DEBUG>=2)
if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed
#endif
}
return p;
}
/* ----------------------------------------------------------------------------
Free
-----------------------------------------------------------------------------*/
// Free previously allocated memory with a given id.
void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) {
mi_assert_internal(size > 0 && tld != NULL);
if (p==NULL) return;
if (size==0) return;
size = _mi_align_up(size, _mi_os_page_size());
size_t arena_memid = 0;
mi_bitmap_index_t bit_idx;
mem_region_t* region;
if (mi_memid_is_arena(id,®ion,&bit_idx,&arena_memid)) {
// was a direct arena allocation, pass through
_mi_arena_free(p, size, arena_memid, full_commit, tld->stats);
}
else {
// allocated in a region
mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return;
const size_t blocks = mi_region_block_count(size);
mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS);
mi_region_info_t info;
info.value = mi_atomic_load_acquire(®ion->info);
mi_assert_internal(info.value != 0);
void* blocks_start = mi_region_blocks_start(region, bit_idx);
mi_assert_internal(blocks_start == p); // not a pointer in our area?
mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS);
if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`?
// committed?
if (full_commit && (size % MI_SEGMENT_SIZE) == 0) {
_mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, NULL);
}
if (any_reset) {
// set the is_reset bits if any pages were reset
_mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, NULL);
}
// reset the blocks to reduce the working set.
if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset)
&& (mi_option_is_enabled(mi_option_eager_commit) ||
mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead
{
bool any_unreset;
_mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset);
if (any_unreset) {
_mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit)
_mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld);
}
}
// and unclaim
bool all_unclaimed = mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx);
mi_assert_internal(all_unclaimed); UNUSED(all_unclaimed);
}
}
/* ----------------------------------------------------------------------------
collection
-----------------------------------------------------------------------------*/
void _mi_mem_collect(mi_os_tld_t* tld) {
// free every region that has no segments in use.
uintptr_t rcount = mi_atomic_load_relaxed(®ions_count);
for (size_t i = 0; i < rcount; i++) {
mem_region_t* region = ®ions[i];
if (mi_atomic_load_relaxed(®ion->info) != 0) {
// if no segments used, try to claim the whole region
uintptr_t m = mi_atomic_load_relaxed(®ion->in_use);
while (m == 0 && !mi_atomic_cas_weak_release(®ion->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ };
if (m == 0) {
// on success, free the whole region
uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ions[i].start);
size_t arena_memid = mi_atomic_load_relaxed(®ions[i].arena_memid);
uintptr_t commit = mi_atomic_load_relaxed(®ions[i].commit);
memset(®ions[i], 0, sizeof(mem_region_t));
// and release the whole region
mi_atomic_store_release(®ion->info, (uintptr_t)0);
if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
_mi_abandoned_await_readers(); // ensure no pending reads
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, (~commit == 0), tld->stats);
}
}
}
}
}
/* ----------------------------------------------------------------------------
Other
-----------------------------------------------------------------------------*/
bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
return _mi_os_reset(p, size, tld->stats);
}
bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
return _mi_os_unreset(p, size, is_zero, tld->stats);
}
bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
return _mi_os_commit(p, size, is_zero, tld->stats);
}
bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) {
return _mi_os_decommit(p, size, tld->stats);
}
bool _mi_mem_protect(void* p, size_t size) {
return _mi_os_protect(p, size);
}
bool _mi_mem_unprotect(void* p, size_t size) {
return _mi_os_unprotect(p, size);
}
|