summaryrefslogtreecommitdiffstats
path: root/contrib/tools/python3/Objects/obmalloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/tools/python3/Objects/obmalloc.c')
-rw-r--r--contrib/tools/python3/Objects/obmalloc.c1013
1 files changed, 931 insertions, 82 deletions
diff --git a/contrib/tools/python3/Objects/obmalloc.c b/contrib/tools/python3/Objects/obmalloc.c
index 9620a8fbb44..3fdc9d01f2d 100644
--- a/contrib/tools/python3/Objects/obmalloc.c
+++ b/contrib/tools/python3/Objects/obmalloc.c
@@ -2,13 +2,30 @@
#include "Python.h"
#include "pycore_code.h" // stats
-#include "pycore_pystate.h" // _PyInterpreterState_GET
-
+#include "pycore_object.h" // _PyDebugAllocatorStats() definition
#include "pycore_obmalloc.h"
+#include "pycore_pyerrors.h" // _Py_FatalErrorFormat()
#include "pycore_pymem.h"
+#include "pycore_pystate.h" // _PyInterpreterState_GET
+#include "pycore_obmalloc_init.h"
#include <stdlib.h> // malloc()
#include <stdbool.h>
+#ifdef WITH_MIMALLOC
+// Forward declarations of functions used in our mimalloc modifications
+static void _PyMem_mi_page_clear_qsbr(mi_page_t *page);
+static bool _PyMem_mi_page_is_safe_to_free(mi_page_t *page);
+static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force);
+static void _PyMem_mi_page_reclaimed(mi_page_t *page);
+static void _PyMem_mi_heap_collect_qsbr(mi_heap_t *heap);
+# include "pycore_mimalloc.h"
+# include "mimalloc/static.c"
+# include "mimalloc/internal.h" // for stats
+#endif
+
+#if defined(Py_GIL_DISABLED) && !defined(WITH_MIMALLOC)
+# error "Py_GIL_DISABLED requires WITH_MIMALLOC"
+#endif
#undef uint
#define uint pymem_uint
@@ -73,25 +90,272 @@ _PyMem_RawFree(void *Py_UNUSED(ctx), void *ptr)
free(ptr);
}
+#ifdef WITH_MIMALLOC
+
+static void
+_PyMem_mi_page_clear_qsbr(mi_page_t *page)
+{
+#ifdef Py_GIL_DISABLED
+ // Clear the QSBR goal and remove the page from the QSBR linked list.
+ page->qsbr_goal = 0;
+ if (page->qsbr_node.next != NULL) {
+ llist_remove(&page->qsbr_node);
+ }
+#endif
+}
+
+// Check if an empty, newly reclaimed page is safe to free now.
+static bool
+_PyMem_mi_page_is_safe_to_free(mi_page_t *page)
+{
+ assert(mi_page_all_free(page));
+#ifdef Py_GIL_DISABLED
+ assert(page->qsbr_node.next == NULL);
+ if (page->use_qsbr && page->qsbr_goal != 0) {
+ _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
+ if (tstate == NULL) {
+ return false;
+ }
+ return _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal);
+ }
+#endif
+ return true;
+
+}
+
+#ifdef Py_GIL_DISABLED
+
+// If we are deferring collection of more than this amount of memory for
+// mimalloc pages, advance the write sequence. Advancing allows these
+// pages to be re-used in a different thread or for a different size class.
+#define QSBR_PAGE_MEM_LIMIT 4096*20
+
+// Return true if the global write sequence should be advanced for a mimalloc
+// page that is deferred from collection.
+static bool
+should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page)
+{
+ size_t bsize = mi_page_block_size(page);
+ size_t page_size = page->capacity*bsize;
+ if (page_size > QSBR_PAGE_MEM_LIMIT) {
+ qsbr->deferred_page_memory = 0;
+ return true;
+ }
+ qsbr->deferred_page_memory += page_size;
+ if (qsbr->deferred_page_memory > QSBR_PAGE_MEM_LIMIT) {
+ qsbr->deferred_page_memory = 0;
+ return true;
+ }
+ return false;
+}
+#endif
+
+static bool
+_PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
+{
+#ifdef Py_GIL_DISABLED
+ assert(mi_page_all_free(page));
+ if (page->use_qsbr) {
+ _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
+ if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal)) {
+ _PyMem_mi_page_clear_qsbr(page);
+ _mi_page_free(page, pq, force);
+ return true;
+ }
+
+ _PyMem_mi_page_clear_qsbr(page);
+ page->retire_expire = 0;
+
+ if (should_advance_qsbr_for_page(tstate->qsbr, page)) {
+ page->qsbr_goal = _Py_qsbr_advance(tstate->qsbr->shared);
+ }
+ else {
+ page->qsbr_goal = _Py_qsbr_shared_next(tstate->qsbr->shared);
+ }
+
+ llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
+ return false;
+ }
+#endif
+ _mi_page_free(page, pq, force);
+ return true;
+}
+
+static void
+_PyMem_mi_page_reclaimed(mi_page_t *page)
+{
+#ifdef Py_GIL_DISABLED
+ assert(page->qsbr_node.next == NULL);
+ if (page->qsbr_goal != 0) {
+ if (mi_page_all_free(page)) {
+ assert(page->qsbr_node.next == NULL);
+ _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
+ page->retire_expire = 0;
+ llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
+ }
+ else {
+ page->qsbr_goal = 0;
+ }
+ }
+#endif
+}
+
+static void
+_PyMem_mi_heap_collect_qsbr(mi_heap_t *heap)
+{
+#ifdef Py_GIL_DISABLED
+ if (!heap->page_use_qsbr) {
+ return;
+ }
+
+ _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
+ struct llist_node *head = &tstate->mimalloc.page_list;
+ if (llist_empty(head)) {
+ return;
+ }
+
+ struct llist_node *node;
+ llist_for_each_safe(node, head) {
+ mi_page_t *page = llist_data(node, mi_page_t, qsbr_node);
+ if (!mi_page_all_free(page)) {
+ // We allocated from this page some point after the delayed free
+ _PyMem_mi_page_clear_qsbr(page);
+ continue;
+ }
+
+ if (!_Py_qsbr_poll(tstate->qsbr, page->qsbr_goal)) {
+ return;
+ }
+
+ _PyMem_mi_page_clear_qsbr(page);
+ _mi_page_free(page, mi_page_queue_of(page), false);
+ }
+#endif
+}
+
+void *
+_PyMem_MiMalloc(void *ctx, size_t size)
+{
+#ifdef Py_GIL_DISABLED
+ _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
+ mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
+ return mi_heap_malloc(heap, size);
+#else
+ return mi_malloc(size);
+#endif
+}
+
+void *
+_PyMem_MiCalloc(void *ctx, size_t nelem, size_t elsize)
+{
+#ifdef Py_GIL_DISABLED
+ _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
+ mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
+ return mi_heap_calloc(heap, nelem, elsize);
+#else
+ return mi_calloc(nelem, elsize);
+#endif
+}
+
+void *
+_PyMem_MiRealloc(void *ctx, void *ptr, size_t size)
+{
+#ifdef Py_GIL_DISABLED
+ _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
+ mi_heap_t *heap = &tstate->mimalloc.heaps[_Py_MIMALLOC_HEAP_MEM];
+ return mi_heap_realloc(heap, ptr, size);
+#else
+ return mi_realloc(ptr, size);
+#endif
+}
+
+void
+_PyMem_MiFree(void *ctx, void *ptr)
+{
+ mi_free(ptr);
+}
+
+void *
+_PyObject_MiMalloc(void *ctx, size_t nbytes)
+{
+#ifdef Py_GIL_DISABLED
+ _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
+ mi_heap_t *heap = tstate->mimalloc.current_object_heap;
+ return mi_heap_malloc(heap, nbytes);
+#else
+ return mi_malloc(nbytes);
+#endif
+}
+
+void *
+_PyObject_MiCalloc(void *ctx, size_t nelem, size_t elsize)
+{
+#ifdef Py_GIL_DISABLED
+ _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
+ mi_heap_t *heap = tstate->mimalloc.current_object_heap;
+ return mi_heap_calloc(heap, nelem, elsize);
+#else
+ return mi_calloc(nelem, elsize);
+#endif
+}
+
+
+void *
+_PyObject_MiRealloc(void *ctx, void *ptr, size_t nbytes)
+{
+#ifdef Py_GIL_DISABLED
+ _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
+ mi_heap_t *heap = tstate->mimalloc.current_object_heap;
+ return mi_heap_realloc(heap, ptr, nbytes);
+#else
+ return mi_realloc(ptr, nbytes);
+#endif
+}
+
+void
+_PyObject_MiFree(void *ctx, void *ptr)
+{
+ mi_free(ptr);
+}
+
+#endif // WITH_MIMALLOC
+
+
#define MALLOC_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree}
-#define PYRAW_ALLOC MALLOC_ALLOC
-/* the default object allocator */
+
+#ifdef WITH_MIMALLOC
+# define MIMALLOC_ALLOC {NULL, _PyMem_MiMalloc, _PyMem_MiCalloc, _PyMem_MiRealloc, _PyMem_MiFree}
+# define MIMALLOC_OBJALLOC {NULL, _PyObject_MiMalloc, _PyObject_MiCalloc, _PyObject_MiRealloc, _PyObject_MiFree}
+#endif
+
+/* the pymalloc allocator */
// The actual implementation is further down.
-#ifdef WITH_PYMALLOC
+#if defined(WITH_PYMALLOC)
void* _PyObject_Malloc(void *ctx, size_t size);
void* _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize);
void _PyObject_Free(void *ctx, void *p);
void* _PyObject_Realloc(void *ctx, void *ptr, size_t size);
# define PYMALLOC_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free}
+#endif // WITH_PYMALLOC
+
+#if defined(Py_GIL_DISABLED)
+// Py_GIL_DISABLED requires using mimalloc for "mem" and "obj" domains.
+# define PYRAW_ALLOC MALLOC_ALLOC
+# define PYMEM_ALLOC MIMALLOC_ALLOC
+# define PYOBJ_ALLOC MIMALLOC_OBJALLOC
+#elif defined(WITH_PYMALLOC)
+# define PYRAW_ALLOC MALLOC_ALLOC
+# define PYMEM_ALLOC PYMALLOC_ALLOC
# define PYOBJ_ALLOC PYMALLOC_ALLOC
#else
+# define PYRAW_ALLOC MALLOC_ALLOC
+# define PYMEM_ALLOC MALLOC_ALLOC
# define PYOBJ_ALLOC MALLOC_ALLOC
-#endif // WITH_PYMALLOC
+#endif
-#define PYMEM_ALLOC PYOBJ_ALLOC
/* the default debug allocators */
@@ -156,8 +420,16 @@ _PyMem_ArenaFree(void *Py_UNUSED(ctx), void *ptr,
)
{
#ifdef MS_WINDOWS
+ /* Unlike free(), VirtualFree() does not special-case NULL to noop. */
+ if (ptr == NULL) {
+ return;
+ }
VirtualFree(ptr, 0, MEM_RELEASE);
#elif defined(ARENAS_USE_MMAP)
+ /* Unlike free(), munmap() does not special-case NULL to noop. */
+ if (ptr == NULL) {
+ return;
+ }
munmap(ptr, size);
#else
free(ptr);
@@ -258,13 +530,9 @@ int
_PyMem_SetDefaultAllocator(PyMemAllocatorDomain domain,
PyMemAllocatorEx *old_alloc)
{
- if (ALLOCATORS_MUTEX == NULL) {
- /* The runtime must be initializing. */
- return set_default_allocator_unlocked(domain, pydebug, old_alloc);
- }
- PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK);
+ PyMutex_Lock(&ALLOCATORS_MUTEX);
int res = set_default_allocator_unlocked(domain, pydebug, old_alloc);
- PyThread_release_lock(ALLOCATORS_MUTEX);
+ PyMutex_Unlock(&ALLOCATORS_MUTEX);
return res;
}
@@ -283,7 +551,7 @@ _PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
else if (strcmp(name, "debug") == 0) {
*allocator = PYMEM_ALLOCATOR_DEBUG;
}
-#ifdef WITH_PYMALLOC
+#if defined(WITH_PYMALLOC) && !defined(Py_GIL_DISABLED)
else if (strcmp(name, "pymalloc") == 0) {
*allocator = PYMEM_ALLOCATOR_PYMALLOC;
}
@@ -291,12 +559,22 @@ _PyMem_GetAllocatorName(const char *name, PyMemAllocatorName *allocator)
*allocator = PYMEM_ALLOCATOR_PYMALLOC_DEBUG;
}
#endif
+#ifdef WITH_MIMALLOC
+ else if (strcmp(name, "mimalloc") == 0) {
+ *allocator = PYMEM_ALLOCATOR_MIMALLOC;
+ }
+ else if (strcmp(name, "mimalloc_debug") == 0) {
+ *allocator = PYMEM_ALLOCATOR_MIMALLOC_DEBUG;
+ }
+#endif
+#ifndef Py_GIL_DISABLED
else if (strcmp(name, "malloc") == 0) {
*allocator = PYMEM_ALLOCATOR_MALLOC;
}
else if (strcmp(name, "malloc_debug") == 0) {
*allocator = PYMEM_ALLOCATOR_MALLOC_DEBUG;
}
+#endif
else {
/* unknown allocator */
return -1;
@@ -317,12 +595,14 @@ set_up_allocators_unlocked(PyMemAllocatorName allocator)
(void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, pydebug, NULL);
(void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, pydebug, NULL);
(void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, pydebug, NULL);
+ _PyRuntime.allocators.is_debug_enabled = pydebug;
break;
case PYMEM_ALLOCATOR_DEBUG:
(void)set_default_allocator_unlocked(PYMEM_DOMAIN_RAW, 1, NULL);
(void)set_default_allocator_unlocked(PYMEM_DOMAIN_MEM, 1, NULL);
(void)set_default_allocator_unlocked(PYMEM_DOMAIN_OBJ, 1, NULL);
+ _PyRuntime.allocators.is_debug_enabled = 1;
break;
#ifdef WITH_PYMALLOC
@@ -336,9 +616,33 @@ set_up_allocators_unlocked(PyMemAllocatorName allocator)
set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &pymalloc);
- if (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG) {
+ int is_debug = (allocator == PYMEM_ALLOCATOR_PYMALLOC_DEBUG);
+ _PyRuntime.allocators.is_debug_enabled = is_debug;
+ if (is_debug) {
+ set_up_debug_hooks_unlocked();
+ }
+ break;
+ }
+#endif
+#ifdef WITH_MIMALLOC
+ case PYMEM_ALLOCATOR_MIMALLOC:
+ case PYMEM_ALLOCATOR_MIMALLOC_DEBUG:
+ {
+ PyMemAllocatorEx malloc_alloc = MALLOC_ALLOC;
+ set_allocator_unlocked(PYMEM_DOMAIN_RAW, &malloc_alloc);
+
+ PyMemAllocatorEx pymalloc = MIMALLOC_ALLOC;
+ set_allocator_unlocked(PYMEM_DOMAIN_MEM, &pymalloc);
+
+ PyMemAllocatorEx objmalloc = MIMALLOC_OBJALLOC;
+ set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &objmalloc);
+
+ int is_debug = (allocator == PYMEM_ALLOCATOR_MIMALLOC_DEBUG);
+ _PyRuntime.allocators.is_debug_enabled = is_debug;
+ if (is_debug) {
set_up_debug_hooks_unlocked();
}
+
break;
}
#endif
@@ -351,7 +655,9 @@ set_up_allocators_unlocked(PyMemAllocatorName allocator)
set_allocator_unlocked(PYMEM_DOMAIN_MEM, &malloc_alloc);
set_allocator_unlocked(PYMEM_DOMAIN_OBJ, &malloc_alloc);
- if (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG) {
+ int is_debug = (allocator == PYMEM_ALLOCATOR_MALLOC_DEBUG);
+ _PyRuntime.allocators.is_debug_enabled = is_debug;
+ if (is_debug) {
set_up_debug_hooks_unlocked();
}
break;
@@ -368,9 +674,9 @@ set_up_allocators_unlocked(PyMemAllocatorName allocator)
int
_PyMem_SetupAllocators(PyMemAllocatorName allocator)
{
- PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK);
+ PyMutex_Lock(&ALLOCATORS_MUTEX);
int res = set_up_allocators_unlocked(allocator);
- PyThread_release_lock(ALLOCATORS_MUTEX);
+ PyMutex_Unlock(&ALLOCATORS_MUTEX);
return res;
}
@@ -389,6 +695,10 @@ get_current_allocator_name_unlocked(void)
#ifdef WITH_PYMALLOC
PyMemAllocatorEx pymalloc = PYMALLOC_ALLOC;
#endif
+#ifdef WITH_MIMALLOC
+ PyMemAllocatorEx mimalloc = MIMALLOC_ALLOC;
+ PyMemAllocatorEx mimalloc_obj = MIMALLOC_OBJALLOC;
+#endif
if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
pymemallocator_eq(&_PyMem, &malloc_alloc) &&
@@ -404,6 +714,14 @@ get_current_allocator_name_unlocked(void)
return "pymalloc";
}
#endif
+#ifdef WITH_MIMALLOC
+ if (pymemallocator_eq(&_PyMem_Raw, &malloc_alloc) &&
+ pymemallocator_eq(&_PyMem, &mimalloc) &&
+ pymemallocator_eq(&_PyObject, &mimalloc_obj))
+ {
+ return "mimalloc";
+ }
+#endif
PyMemAllocatorEx dbg_raw = PYDBGRAW_ALLOC;
PyMemAllocatorEx dbg_mem = PYDBGMEM_ALLOC;
@@ -428,6 +746,14 @@ get_current_allocator_name_unlocked(void)
return "pymalloc_debug";
}
#endif
+#ifdef WITH_MIMALLOC
+ if (pymemallocator_eq(&_PyMem_Debug.raw.alloc, &malloc_alloc) &&
+ pymemallocator_eq(&_PyMem_Debug.mem.alloc, &mimalloc) &&
+ pymemallocator_eq(&_PyMem_Debug.obj.alloc, &mimalloc_obj))
+ {
+ return "mimalloc_debug";
+ }
+#endif
}
return NULL;
}
@@ -435,20 +761,20 @@ get_current_allocator_name_unlocked(void)
const char*
_PyMem_GetCurrentAllocatorName(void)
{
- PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK);
+ PyMutex_Lock(&ALLOCATORS_MUTEX);
const char *name = get_current_allocator_name_unlocked();
- PyThread_release_lock(ALLOCATORS_MUTEX);
+ PyMutex_Unlock(&ALLOCATORS_MUTEX);
return name;
}
-#ifdef WITH_PYMALLOC
-static int
+int
_PyMem_DebugEnabled(void)
{
- return (_PyObject.malloc == _PyMem_DebugMalloc);
+ return _PyRuntime.allocators.is_debug_enabled;
}
+#ifdef WITH_PYMALLOC
static int
_PyMem_PymallocEnabled(void)
{
@@ -459,7 +785,25 @@ _PyMem_PymallocEnabled(void)
return (_PyObject.malloc == _PyObject_Malloc);
}
}
+
+#ifdef WITH_MIMALLOC
+static int
+_PyMem_MimallocEnabled(void)
+{
+#ifdef Py_GIL_DISABLED
+ return 1;
+#else
+ if (_PyMem_DebugEnabled()) {
+ return (_PyMem_Debug.obj.alloc.malloc == _PyObject_MiMalloc);
+ }
+ else {
+ return (_PyObject.malloc == _PyObject_MiMalloc);
+ }
#endif
+}
+#endif // WITH_MIMALLOC
+
+#endif // WITH_PYMALLOC
static void
@@ -515,19 +859,15 @@ set_up_debug_hooks_unlocked(void)
set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_RAW);
set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_MEM);
set_up_debug_hooks_domain_unlocked(PYMEM_DOMAIN_OBJ);
+ _PyRuntime.allocators.is_debug_enabled = 1;
}
void
PyMem_SetupDebugHooks(void)
{
- if (ALLOCATORS_MUTEX == NULL) {
- /* The runtime must not be completely initialized yet. */
- set_up_debug_hooks_unlocked();
- return;
- }
- PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK);
+ PyMutex_Lock(&ALLOCATORS_MUTEX);
set_up_debug_hooks_unlocked();
- PyThread_release_lock(ALLOCATORS_MUTEX);
+ PyMutex_Unlock(&ALLOCATORS_MUTEX);
}
static void
@@ -563,53 +903,33 @@ set_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
void
PyMem_GetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
{
- if (ALLOCATORS_MUTEX == NULL) {
- /* The runtime must not be completely initialized yet. */
- get_allocator_unlocked(domain, allocator);
- return;
- }
- PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK);
+ PyMutex_Lock(&ALLOCATORS_MUTEX);
get_allocator_unlocked(domain, allocator);
- PyThread_release_lock(ALLOCATORS_MUTEX);
+ PyMutex_Unlock(&ALLOCATORS_MUTEX);
}
void
PyMem_SetAllocator(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator)
{
- if (ALLOCATORS_MUTEX == NULL) {
- /* The runtime must not be completely initialized yet. */
- set_allocator_unlocked(domain, allocator);
- return;
- }
- PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK);
+ PyMutex_Lock(&ALLOCATORS_MUTEX);
set_allocator_unlocked(domain, allocator);
- PyThread_release_lock(ALLOCATORS_MUTEX);
+ PyMutex_Unlock(&ALLOCATORS_MUTEX);
}
void
PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator)
{
- if (ALLOCATORS_MUTEX == NULL) {
- /* The runtime must not be completely initialized yet. */
- *allocator = _PyObject_Arena;
- return;
- }
- PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK);
+ PyMutex_Lock(&ALLOCATORS_MUTEX);
*allocator = _PyObject_Arena;
- PyThread_release_lock(ALLOCATORS_MUTEX);
+ PyMutex_Unlock(&ALLOCATORS_MUTEX);
}
void
PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator)
{
- if (ALLOCATORS_MUTEX == NULL) {
- /* The runtime must not be completely initialized yet. */
- _PyObject_Arena = *allocator;
- return;
- }
- PyThread_acquire_lock(ALLOCATORS_MUTEX, WAIT_LOCK);
+ PyMutex_Lock(&ALLOCATORS_MUTEX);
_PyObject_Arena = *allocator;
- PyThread_release_lock(ALLOCATORS_MUTEX);
+ PyMutex_Unlock(&ALLOCATORS_MUTEX);
}
@@ -783,6 +1103,285 @@ _PyMem_Strdup(const char *str)
return copy;
}
+/***********************************************/
+/* Delayed freeing support for Py_GIL_DISABLED */
+/***********************************************/
+
+// So that sizeof(struct _mem_work_chunk) is 4096 bytes on 64-bit platforms.
+#define WORK_ITEMS_PER_CHUNK 254
+
+// A pointer to be freed once the QSBR read sequence reaches qsbr_goal.
+struct _mem_work_item {
+ uintptr_t ptr; // lowest bit tagged 1 for objects freed with PyObject_Free
+ uint64_t qsbr_goal;
+};
+
+// A fixed-size buffer of pointers to be freed
+struct _mem_work_chunk {
+ // Linked list node of chunks in queue
+ struct llist_node node;
+
+ Py_ssize_t rd_idx; // index of next item to read
+ Py_ssize_t wr_idx; // index of next item to write
+ struct _mem_work_item array[WORK_ITEMS_PER_CHUNK];
+};
+
+static void
+free_work_item(uintptr_t ptr)
+{
+ if (ptr & 0x01) {
+ PyObject_Free((char *)(ptr - 1));
+ }
+ else {
+ PyMem_Free((void *)ptr);
+ }
+}
+
+
+#ifdef Py_GIL_DISABLED
+
+// For deferred advance on free: the number of deferred items before advancing
+// the write sequence. This is based on WORK_ITEMS_PER_CHUNK. We ideally
+// want to process a chunk before it overflows.
+#define QSBR_DEFERRED_LIMIT 127
+
+// If the deferred memory exceeds 1 MiB, advance the write sequence. This
+// helps limit memory usage due to QSBR delaying frees too long.
+#define QSBR_FREE_MEM_LIMIT 1024*1024
+
+// Return true if the global write sequence should be advanced for a deferred
+// memory free.
+static bool
+should_advance_qsbr_for_free(struct _qsbr_thread_state *qsbr, size_t size)
+{
+ if (size > QSBR_FREE_MEM_LIMIT) {
+ qsbr->deferred_count = 0;
+ qsbr->deferred_memory = 0;
+ qsbr->should_process = true;
+ return true;
+ }
+ qsbr->deferred_count++;
+ qsbr->deferred_memory += size;
+ if (qsbr->deferred_count > QSBR_DEFERRED_LIMIT ||
+ qsbr->deferred_memory > QSBR_FREE_MEM_LIMIT) {
+ qsbr->deferred_count = 0;
+ qsbr->deferred_memory = 0;
+ qsbr->should_process = true;
+ return true;
+ }
+ return false;
+}
+#endif
+
+static void
+free_delayed(uintptr_t ptr, size_t size)
+{
+#ifndef Py_GIL_DISABLED
+ free_work_item(ptr);
+#else
+ if (_PyRuntime.stoptheworld.world_stopped) {
+ // Free immediately if the world is stopped, including during
+ // interpreter shutdown.
+ free_work_item(ptr);
+ return;
+ }
+
+ _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
+ struct llist_node *head = &tstate->mem_free_queue;
+
+ struct _mem_work_chunk *buf = NULL;
+ if (!llist_empty(head)) {
+ // Try to re-use the last buffer
+ buf = llist_data(head->prev, struct _mem_work_chunk, node);
+ if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
+ // already full
+ buf = NULL;
+ }
+ }
+
+ if (buf == NULL) {
+ buf = PyMem_Calloc(1, sizeof(*buf));
+ if (buf != NULL) {
+ llist_insert_tail(head, &buf->node);
+ }
+ }
+
+ if (buf == NULL) {
+ // failed to allocate a buffer, free immediately
+ _PyEval_StopTheWorld(tstate->base.interp);
+ free_work_item(ptr);
+ _PyEval_StartTheWorld(tstate->base.interp);
+ return;
+ }
+
+ assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK);
+ uint64_t seq;
+ if (should_advance_qsbr_for_free(tstate->qsbr, size)) {
+ seq = _Py_qsbr_advance(tstate->qsbr->shared);
+ }
+ else {
+ seq = _Py_qsbr_shared_next(tstate->qsbr->shared);
+ }
+ buf->array[buf->wr_idx].ptr = ptr;
+ buf->array[buf->wr_idx].qsbr_goal = seq;
+ buf->wr_idx++;
+
+ if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) {
+ // Normally the processing of delayed items is done from the eval
+ // breaker. Processing here is a safety measure to ensure too much
+ // work does not accumulate.
+ _PyMem_ProcessDelayed((PyThreadState *)tstate);
+ }
+#endif
+}
+
+void
+_PyMem_FreeDelayed(void *ptr, size_t size)
+{
+ assert(!((uintptr_t)ptr & 0x01));
+ if (ptr != NULL) {
+ free_delayed((uintptr_t)ptr, size);
+ }
+}
+
+void
+_PyObject_FreeDelayed(void *ptr)
+{
+ assert(!((uintptr_t)ptr & 0x01));
+ // We use 0 as the size since we don't have an easy way to know the
+ // actual size. If we are freeing many objects, the write sequence
+ // will be advanced due to QSBR_DEFERRED_LIMIT.
+ free_delayed(((uintptr_t)ptr)|0x01, 0);
+}
+
+static struct _mem_work_chunk *
+work_queue_first(struct llist_node *head)
+{
+ return llist_data(head->next, struct _mem_work_chunk, node);
+}
+
+static void
+process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr,
+ bool keep_empty)
+{
+ while (!llist_empty(head)) {
+ struct _mem_work_chunk *buf = work_queue_first(head);
+
+ while (buf->rd_idx < buf->wr_idx) {
+ struct _mem_work_item *item = &buf->array[buf->rd_idx];
+ if (!_Py_qsbr_poll(qsbr, item->qsbr_goal)) {
+ return;
+ }
+
+ free_work_item(item->ptr);
+ buf->rd_idx++;
+ }
+
+ assert(buf->rd_idx == buf->wr_idx);
+ if (keep_empty && buf->node.next == head) {
+ // Keep the last buffer in the queue to reduce re-allocations
+ buf->rd_idx = buf->wr_idx = 0;
+ return;
+ }
+
+ llist_remove(&buf->node);
+ PyMem_Free(buf);
+ }
+}
+
+static void
+process_interp_queue(struct _Py_mem_interp_free_queue *queue,
+ struct _qsbr_thread_state *qsbr)
+{
+ assert(PyMutex_IsLocked(&queue->mutex));
+ process_queue(&queue->head, qsbr, false);
+
+ int more_work = !llist_empty(&queue->head);
+ _Py_atomic_store_int_relaxed(&queue->has_work, more_work);
+}
+
+static void
+maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue,
+ struct _qsbr_thread_state *qsbr)
+{
+ if (!_Py_atomic_load_int_relaxed(&queue->has_work)) {
+ return;
+ }
+
+ // Try to acquire the lock, but don't block if it's already held.
+ if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) {
+ process_interp_queue(queue, qsbr);
+ PyMutex_Unlock(&queue->mutex);
+ }
+}
+
+void
+_PyMem_ProcessDelayed(PyThreadState *tstate)
+{
+ PyInterpreterState *interp = tstate->interp;
+ _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
+
+ tstate_impl->qsbr->should_process = false;
+
+ // Process thread-local work
+ process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true);
+
+ // Process shared interpreter work
+ maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr);
+}
+
+void
+_PyMem_AbandonDelayed(PyThreadState *tstate)
+{
+ PyInterpreterState *interp = tstate->interp;
+ struct llist_node *queue = &((_PyThreadStateImpl *)tstate)->mem_free_queue;
+
+ if (llist_empty(queue)) {
+ return;
+ }
+
+ // Check if the queue contains one empty buffer
+ struct _mem_work_chunk *buf = work_queue_first(queue);
+ if (buf->rd_idx == buf->wr_idx) {
+ llist_remove(&buf->node);
+ PyMem_Free(buf);
+ assert(llist_empty(queue));
+ return;
+ }
+
+ PyMutex_Lock(&interp->mem_free_queue.mutex);
+
+ // Merge the thread's work queue into the interpreter's work queue.
+ llist_concat(&interp->mem_free_queue.head, queue);
+
+ // Process the merged queue now (see gh-130794).
+ _PyThreadStateImpl *this_tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
+ process_interp_queue(&interp->mem_free_queue, this_tstate->qsbr);
+
+ PyMutex_Unlock(&interp->mem_free_queue.mutex);
+
+ assert(llist_empty(queue)); // the thread's queue is now empty
+}
+
+void
+_PyMem_FiniDelayed(PyInterpreterState *interp)
+{
+ struct llist_node *head = &interp->mem_free_queue.head;
+ while (!llist_empty(head)) {
+ struct _mem_work_chunk *buf = work_queue_first(head);
+
+ while (buf->rd_idx < buf->wr_idx) {
+ // Free the remaining items immediately. There should be no other
+ // threads accessing the memory at this point during shutdown.
+ struct _mem_work_item *item = &buf->array[buf->rd_idx];
+ free_work_item(item->ptr);
+ buf->rd_idx++;
+ }
+
+ llist_remove(&buf->node);
+ PyMem_Free(buf);
+ }
+}
/**************************/
/* the "object" allocator */
@@ -852,6 +1451,13 @@ static int running_on_valgrind = -1;
typedef struct _obmalloc_state OMState;
+/* obmalloc state for main interpreter and shared by all interpreters without
+ * their own obmalloc state. By not explicitly initalizing this structure, it
+ * will be allocated in the BSS which is a small performance win. The radix
+ * tree arrays are fairly large but are sparsely used. */
+static struct _obmalloc_state obmalloc_state_main;
+static bool obmalloc_state_initialized;
+
static inline int
has_own_state(PyInterpreterState *interp)
{
@@ -864,10 +1470,8 @@ static inline OMState *
get_state(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
- if (!has_own_state(interp)) {
- interp = _PyInterpreterState_Main();
- }
- return &interp->obmalloc;
+ assert(interp->obmalloc != NULL); // otherwise not initialized or freed
+ return interp->obmalloc;
}
// These macros all rely on a local "state" variable.
@@ -882,9 +1486,51 @@ get_state(void)
#define narenas_highwater (state->mgmt.narenas_highwater)
#define raw_allocated_blocks (state->mgmt.raw_allocated_blocks)
+#ifdef WITH_MIMALLOC
+static bool count_blocks(
+ const mi_heap_t* heap, const mi_heap_area_t* area,
+ void* block, size_t block_size, void* allocated_blocks)
+{
+ *(size_t *)allocated_blocks += area->used;
+ return 1;
+}
+
+static Py_ssize_t
+get_mimalloc_allocated_blocks(PyInterpreterState *interp)
+{
+ size_t allocated_blocks = 0;
+#ifdef Py_GIL_DISABLED
+ for (PyThreadState *t = interp->threads.head; t != NULL; t = t->next) {
+ _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)t;
+ for (int i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
+ mi_heap_t *heap = &tstate->mimalloc.heaps[i];
+ mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
+ }
+ }
+
+ mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool;
+ for (uint8_t tag = 0; tag < _Py_MIMALLOC_HEAP_COUNT; tag++) {
+ _mi_abandoned_pool_visit_blocks(pool, tag, false, &count_blocks,
+ &allocated_blocks);
+ }
+#else
+ // TODO(sgross): this only counts the current thread's blocks.
+ mi_heap_t *heap = mi_heap_get_default();
+ mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks);
+#endif
+ return allocated_blocks;
+}
+#endif
+
Py_ssize_t
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
{
+#ifdef WITH_MIMALLOC
+ if (_PyMem_MimallocEnabled()) {
+ return get_mimalloc_allocated_blocks(interp);
+ }
+#endif
+
#ifdef Py_DEBUG
assert(has_own_state(interp));
#else
@@ -893,7 +1539,11 @@ _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
"the interpreter doesn't have its own allocator");
}
#endif
- OMState *state = &interp->obmalloc;
+ OMState *state = interp->obmalloc;
+
+ if (state == NULL) {
+ return 0;
+ }
Py_ssize_t n = raw_allocated_blocks;
/* add up allocated blocks for used pools */
@@ -915,19 +1565,36 @@ _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
return n;
}
+static void free_obmalloc_arenas(PyInterpreterState *interp);
+
void
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
{
- if (has_own_state(interp)) {
+#ifdef WITH_MIMALLOC
+ if (_PyMem_MimallocEnabled()) {
+ return;
+ }
+#endif
+ if (has_own_state(interp) && interp->obmalloc != NULL) {
Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
assert(has_own_state(interp) || leaked == 0);
interp->runtime->obmalloc.interpreter_leaks += leaked;
+ if (_PyMem_obmalloc_state_on_heap(interp) && leaked == 0) {
+ // free the obmalloc arenas and radix tree nodes. If leaked > 0
+ // then some of the memory allocated by obmalloc has not been
+ // freed. It might be safe to free the arenas in that case but
+ // it's possible that extension modules are still using that
+ // memory. So, it is safer to not free and to leak. Perhaps there
+ // should be warning when this happens. It should be possible to
+ // use a tool like "-fsanitize=address" to track down these leaks.
+ free_obmalloc_arenas(interp);
+ }
}
}
static Py_ssize_t get_num_global_allocated_blocks(_PyRuntimeState *);
-/* We preserve the number of blockss leaked during runtime finalization,
+/* We preserve the number of blocks leaked during runtime finalization,
so they can be reported if the runtime is initialized again. */
// XXX We don't lose any information by dropping this,
// so we should consider doing so.
@@ -961,6 +1628,7 @@ get_num_global_allocated_blocks(_PyRuntimeState *runtime)
}
}
else {
+ _PyEval_StopTheWorldAll(&_PyRuntime);
HEAD_LOCK(runtime);
PyInterpreterState *interp = PyInterpreterState_Head();
assert(interp != NULL);
@@ -980,6 +1648,7 @@ get_num_global_allocated_blocks(_PyRuntimeState *runtime)
}
}
HEAD_UNLOCK(runtime);
+ _PyEval_StartTheWorldAll(&_PyRuntime);
#ifdef Py_DEBUG
assert(got_main);
#endif
@@ -2034,6 +2703,33 @@ write_size_t(void *p, size_t n)
}
}
+static void
+fill_mem_debug(debug_alloc_api_t *api, void *data, int c, size_t nbytes,
+ bool is_alloc)
+{
+#ifdef Py_GIL_DISABLED
+ if (api->api_id == 'o') {
+ // Don't overwrite the first few bytes of a PyObject allocation in the
+ // free-threaded build
+ _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
+ size_t debug_offset;
+ if (is_alloc) {
+ debug_offset = tstate->mimalloc.current_object_heap->debug_offset;
+ }
+ else {
+ char *alloc = (char *)data - 2*SST; // start of the allocation
+ debug_offset = _mi_ptr_page(alloc)->debug_offset;
+ }
+ debug_offset -= 2*SST; // account for pymalloc extra bytes
+ if (debug_offset < nbytes) {
+ memset((char *)data + debug_offset, c, nbytes - debug_offset);
+ }
+ return;
+ }
+#endif
+ memset(data, c, nbytes);
+}
+
/* Let S = sizeof(size_t). The debug malloc asks for 4 * S extra bytes and
fills them with useful stuff, here calling the underlying malloc's result p:
@@ -2110,7 +2806,7 @@ _PyMem_DebugRawAlloc(int use_calloc, void *ctx, size_t nbytes)
memset(p + SST + 1, PYMEM_FORBIDDENBYTE, SST-1);
if (nbytes > 0 && !use_calloc) {
- memset(data, PYMEM_CLEANBYTE, nbytes);
+ fill_mem_debug(api, data, PYMEM_CLEANBYTE, nbytes, true);
}
/* at tail, write pad (SST bytes) and serialno (SST bytes) */
@@ -2158,8 +2854,9 @@ _PyMem_DebugRawFree(void *ctx, void *p)
_PyMem_DebugCheckAddress(__func__, api->api_id, p);
nbytes = read_size_t(q);
- nbytes += PYMEM_DEBUG_EXTRA_BYTES;
- memset(q, PYMEM_DEADBYTE, nbytes);
+ nbytes += PYMEM_DEBUG_EXTRA_BYTES - 2*SST;
+ memset(q, PYMEM_DEADBYTE, 2*SST);
+ fill_mem_debug(api, p, PYMEM_DEADBYTE, nbytes, false);
api->alloc.free(api->alloc.ctx, q);
}
@@ -2179,7 +2876,6 @@ _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
size_t total; /* 2 * SST + nbytes + 2 * SST */
size_t original_nbytes;
#define ERASED_SIZE 64
- uint8_t save[2*ERASED_SIZE]; /* A copy of erased bytes. */
_PyMem_DebugCheckAddress(__func__, api->api_id, p);
@@ -2196,9 +2892,11 @@ _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
#ifdef PYMEM_DEBUG_SERIALNO
size_t block_serialno = read_size_t(tail + SST);
#endif
+#ifndef Py_GIL_DISABLED
/* Mark the header, the trailer, ERASED_SIZE bytes at the begin and
ERASED_SIZE bytes at the end as dead and save the copy of erased bytes.
*/
+ uint8_t save[2*ERASED_SIZE]; /* A copy of erased bytes. */
if (original_nbytes <= sizeof(save)) {
memcpy(save, data, original_nbytes);
memset(data - 2 * SST, PYMEM_DEADBYTE,
@@ -2211,6 +2909,7 @@ _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
memset(tail - ERASED_SIZE, PYMEM_DEADBYTE,
ERASED_SIZE + PYMEM_DEBUG_EXTRA_BYTES - 2 * SST);
}
+#endif
/* Resize and add decorations. */
r = (uint8_t *)api->alloc.realloc(api->alloc.ctx, head, total);
@@ -2238,6 +2937,7 @@ _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
write_size_t(tail + SST, block_serialno);
#endif
+#ifndef Py_GIL_DISABLED
/* Restore saved bytes. */
if (original_nbytes <= sizeof(save)) {
memcpy(data, save, Py_MIN(nbytes, original_nbytes));
@@ -2250,6 +2950,7 @@ _PyMem_DebugRawRealloc(void *ctx, void *p, size_t nbytes)
Py_MIN(nbytes - i, ERASED_SIZE));
}
}
+#endif
if (r == NULL) {
return NULL;
@@ -2511,9 +3212,96 @@ _PyDebugAllocatorStats(FILE *out,
(void)printone(out, buf2, num_blocks * sizeof_block);
}
+// Return true if the obmalloc state structure is heap allocated,
+// by PyMem_RawCalloc(). For the main interpreter, this structure
+// allocated in the BSS. Allocating that way gives some memory savings
+// and a small performance win (at least on a demand paged OS). On
+// 64-bit platforms, the obmalloc structure is 256 kB. Most of that
+// memory is for the arena_map_top array. Since normally only one entry
+// of that array is used, only one page of resident memory is actually
+// used, rather than the full 256 kB.
+bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp)
+{
+#if WITH_PYMALLOC
+ return interp->obmalloc && interp->obmalloc != &obmalloc_state_main;
+#else
+ return false;
+#endif
+}
+
+#ifdef WITH_PYMALLOC
+static void
+init_obmalloc_pools(PyInterpreterState *interp)
+{
+ // initialize the obmalloc->pools structure. This must be done
+ // before the obmalloc alloc/free functions can be called.
+ poolp temp[OBMALLOC_USED_POOLS_SIZE] =
+ _obmalloc_pools_INIT(interp->obmalloc->pools);
+ memcpy(&interp->obmalloc->pools.used, temp, sizeof(temp));
+}
+#endif /* WITH_PYMALLOC */
+
+int _PyMem_init_obmalloc(PyInterpreterState *interp)
+{
+#ifdef WITH_PYMALLOC
+ /* Initialize obmalloc, but only for subinterpreters,
+ since the main interpreter is initialized statically. */
+ if (_Py_IsMainInterpreter(interp)
+ || _PyInterpreterState_HasFeature(interp,
+ Py_RTFLAGS_USE_MAIN_OBMALLOC)) {
+ interp->obmalloc = &obmalloc_state_main;
+ if (!obmalloc_state_initialized) {
+ init_obmalloc_pools(interp);
+ obmalloc_state_initialized = true;
+ }
+ } else {
+ interp->obmalloc = PyMem_RawCalloc(1, sizeof(struct _obmalloc_state));
+ if (interp->obmalloc == NULL) {
+ return -1;
+ }
+ init_obmalloc_pools(interp);
+ }
+#endif /* WITH_PYMALLOC */
+ return 0; // success
+}
+
#ifdef WITH_PYMALLOC
+static void
+free_obmalloc_arenas(PyInterpreterState *interp)
+{
+ OMState *state = interp->obmalloc;
+ for (uint i = 0; i < maxarenas; ++i) {
+ // free each obmalloc memory arena
+ struct arena_object *ao = &allarenas[i];
+ _PyObject_Arena.free(_PyObject_Arena.ctx,
+ (void *)ao->address, ARENA_SIZE);
+ }
+ // free the array containing pointers to all arenas
+ PyMem_RawFree(allarenas);
+#if WITH_PYMALLOC_RADIX_TREE
+#ifdef USE_INTERIOR_NODES
+ // Free the middle and bottom nodes of the radix tree. These are allocated
+ // by arena_map_mark_used() but not freed when arenas are freed.
+ for (int i1 = 0; i1 < MAP_TOP_LENGTH; i1++) {
+ arena_map_mid_t *mid = arena_map_root.ptrs[i1];
+ if (mid == NULL) {
+ continue;
+ }
+ for (int i2 = 0; i2 < MAP_MID_LENGTH; i2++) {
+ arena_map_bot_t *bot = arena_map_root.ptrs[i1]->ptrs[i2];
+ if (bot == NULL) {
+ continue;
+ }
+ PyMem_RawFree(bot);
+ }
+ PyMem_RawFree(mid);
+ }
+#endif
+#endif
+}
+
#ifdef Py_DEBUG
/* Is target in the list? The list is traversed via the nextpool pointers.
* The list may be NULL-terminated, or circular. Return 1 if target is in
@@ -2535,19 +3323,55 @@ pool_is_in_list(const poolp target, poolp list)
}
#endif
-/* Print summary info to "out" about the state of pymalloc's structures.
- * In Py_DEBUG mode, also perform some expensive internal consistency
- * checks.
- *
- * Return 0 if the memory debug hooks are not installed or no statistics was
- * written into out, return 1 otherwise.
- */
-int
-_PyObject_DebugMallocStats(FILE *out)
+#ifdef WITH_MIMALLOC
+struct _alloc_stats {
+ size_t allocated_blocks;
+ size_t allocated_bytes;
+ size_t allocated_with_overhead;
+ size_t bytes_reserved;
+ size_t bytes_committed;
+};
+
+static bool _collect_alloc_stats(
+ const mi_heap_t* heap, const mi_heap_area_t* area,
+ void* block, size_t block_size, void* arg)
+{
+ struct _alloc_stats *stats = (struct _alloc_stats *)arg;
+ stats->allocated_blocks += area->used;
+ stats->allocated_bytes += area->used * area->block_size;
+ stats->allocated_with_overhead += area->used * area->full_block_size;
+ stats->bytes_reserved += area->reserved;
+ stats->bytes_committed += area->committed;
+ return 1;
+}
+
+static void
+py_mimalloc_print_stats(FILE *out)
+{
+ fprintf(out, "Small block threshold = %zd, in %u size classes.\n",
+ MI_SMALL_OBJ_SIZE_MAX, MI_BIN_HUGE);
+ fprintf(out, "Medium block threshold = %zd\n",
+ MI_MEDIUM_OBJ_SIZE_MAX);
+ fprintf(out, "Large object max size = %zd\n",
+ MI_LARGE_OBJ_SIZE_MAX);
+
+ mi_heap_t *heap = mi_heap_get_default();
+ struct _alloc_stats stats;
+ memset(&stats, 0, sizeof(stats));
+ mi_heap_visit_blocks(heap, false, &_collect_alloc_stats, &stats);
+
+ fprintf(out, " Allocated Blocks: %zd\n", stats.allocated_blocks);
+ fprintf(out, " Allocated Bytes: %zd\n", stats.allocated_bytes);
+ fprintf(out, " Allocated Bytes w/ Overhead: %zd\n", stats.allocated_with_overhead);
+ fprintf(out, " Bytes Reserved: %zd\n", stats.bytes_reserved);
+ fprintf(out, " Bytes Committed: %zd\n", stats.bytes_committed);
+}
+#endif
+
+
+static void
+pymalloc_print_stats(FILE *out)
{
- if (!_PyMem_PymallocEnabled()) {
- return 0;
- }
OMState *state = get_state();
uint i;
@@ -2700,7 +3524,32 @@ _PyObject_DebugMallocStats(FILE *out)
#endif
#endif
- return 1;
+}
+
+/* Print summary info to "out" about the state of pymalloc's structures.
+ * In Py_DEBUG mode, also perform some expensive internal consistency
+ * checks.
+ *
+ * Return 0 if the memory debug hooks are not installed or no statistics was
+ * written into out, return 1 otherwise.
+ */
+int
+_PyObject_DebugMallocStats(FILE *out)
+{
+#ifdef WITH_MIMALLOC
+ if (_PyMem_MimallocEnabled()) {
+ py_mimalloc_print_stats(out);
+ return 1;
+ }
+ else
+#endif
+ if (_PyMem_PymallocEnabled()) {
+ pymalloc_print_stats(out);
+ return 1;
+ }
+ else {
+ return 0;
+ }
}
#endif /* #ifdef WITH_PYMALLOC */