summaryrefslogtreecommitdiffstats
path: root/contrib/tools/python3/Python/perf_trampoline.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/tools/python3/Python/perf_trampoline.c')
-rw-r--r--contrib/tools/python3/Python/perf_trampoline.c207
1 files changed, 174 insertions, 33 deletions
diff --git a/contrib/tools/python3/Python/perf_trampoline.c b/contrib/tools/python3/Python/perf_trampoline.c
index ea9dc83dd0f..7ef9eca11f5 100644
--- a/contrib/tools/python3/Python/perf_trampoline.c
+++ b/contrib/tools/python3/Python/perf_trampoline.c
@@ -130,7 +130,7 @@ any DWARF information available for them).
*/
#include "Python.h"
-#include "pycore_ceval.h"
+#include "pycore_ceval.h" // _PyPerf_Callbacks
#include "pycore_frame.h"
#include "pycore_interp.h"
@@ -140,9 +140,11 @@ any DWARF information available for them).
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
-#include <sys/mman.h>
+#include <sys/mman.h> // mmap()
#include <sys/types.h>
-#include <unistd.h>
+#include <unistd.h> // sysconf()
+#include <sys/time.h> // gettimeofday()
+
#if defined(__arm__) || defined(__arm64__) || defined(__aarch64__)
#define PY_HAVE_INVALIDATE_ICACHE
@@ -184,15 +186,61 @@ struct code_arena_st {
*prev; // Pointer to the arena or NULL if this is the first arena.
};
+#define CODE_ALIGNMENT 32
+
typedef struct code_arena_st code_arena_t;
typedef struct trampoline_api_st trampoline_api_t;
+enum perf_trampoline_type {
+ PERF_TRAMPOLINE_UNSET = 0,
+ PERF_TRAMPOLINE_TYPE_MAP = 1,
+ PERF_TRAMPOLINE_TYPE_JITDUMP = 2,
+};
+
#define perf_status _PyRuntime.ceval.perf.status
#define extra_code_index _PyRuntime.ceval.perf.extra_code_index
#define perf_code_arena _PyRuntime.ceval.perf.code_arena
#define trampoline_api _PyRuntime.ceval.perf.trampoline_api
#define perf_map_file _PyRuntime.ceval.perf.map_file
+#define persist_after_fork _PyRuntime.ceval.perf.persist_after_fork
+#define perf_trampoline_type _PyRuntime.ceval.perf.perf_trampoline_type
+#define prev_eval_frame _PyRuntime.ceval.perf.prev_eval_frame
+#define trampoline_refcount _PyRuntime.ceval.perf.trampoline_refcount
+#define code_watcher_id _PyRuntime.ceval.perf.code_watcher_id
+
+static void free_code_arenas(void);
+static void
+perf_trampoline_reset_state(void)
+{
+ free_code_arenas();
+ if (code_watcher_id >= 0) {
+ PyCode_ClearWatcher(code_watcher_id);
+ code_watcher_id = -1;
+ }
+ extra_code_index = -1;
+}
+
+static int
+perf_trampoline_code_watcher(PyCodeEvent event, PyCodeObject *co)
+{
+ if (event != PY_CODE_EVENT_DESTROY) {
+ return 0;
+ }
+ if (extra_code_index == -1) {
+ return 0;
+ }
+ py_trampoline f = NULL;
+ int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f);
+ if (ret != 0 || f == NULL) {
+ return 0;
+ }
+ trampoline_refcount--;
+ if (trampoline_refcount == 0) {
+ perf_trampoline_reset_state();
+ }
+ return 0;
+}
static void
perf_map_write_entry(void *state, const void *code_addr,
@@ -220,6 +268,8 @@ static void*
perf_map_init_state(void)
{
PyUnstable_PerfMapState_Init();
+ trampoline_api.code_padding = 0;
+ perf_trampoline_type = PERF_TRAMPOLINE_TYPE_MAP;
return NULL;
}
@@ -236,6 +286,30 @@ _PyPerf_Callbacks _Py_perfmap_callbacks = {
&perf_map_free_state,
};
+
+static size_t round_up(int64_t value, int64_t multiple) {
+ if (multiple == 0) {
+ // Avoid division by zero
+ return value;
+ }
+
+ int64_t remainder = value % multiple;
+ if (remainder == 0) {
+ // Value is already a multiple of 'multiple'
+ return value;
+ }
+
+ // Calculate the difference to the next multiple
+ int64_t difference = multiple - remainder;
+
+ // Add the difference to the value
+ int64_t rounded_up_value = value + difference;
+
+ return rounded_up_value;
+}
+
+// TRAMPOLINE MANAGEMENT API
+
static int
new_code_arena(void)
{
@@ -249,14 +323,16 @@ new_code_arena(void)
0); // offset (not used here)
if (memory == MAP_FAILED) {
PyErr_SetFromErrno(PyExc_OSError);
- _PyErr_WriteUnraisableMsg(
- "Failed to create new mmap for perf trampoline", NULL);
+ PyErr_FormatUnraisable("Failed to create new mmap for perf trampoline");
perf_status = PERF_STATUS_FAILED;
return -1;
}
void *start = &_Py_trampoline_func_start;
void *end = &_Py_trampoline_func_end;
size_t code_size = end - start;
+ size_t unaligned_size = code_size + trampoline_api.code_padding;
+ size_t chunk_size = round_up(unaligned_size, CODE_ALIGNMENT);
+ assert(chunk_size % CODE_ALIGNMENT == 0);
// TODO: Check the effect of alignment of the code chunks. Initial investigation
// showed that this has no effect on performance in x86-64 or aarch64 and the current
// version has the advantage that the unwinder in GDB can unwind across JIT-ed code.
@@ -265,18 +341,17 @@ new_code_arena(void)
// measurable performance improvement by rounding trampolines up to 32-bit
// or 64-bit alignment.
- size_t n_copies = mem_size / code_size;
+ size_t n_copies = mem_size / chunk_size;
for (size_t i = 0; i < n_copies; i++) {
- memcpy(memory + i * code_size, start, code_size * sizeof(char));
+ memcpy(memory + i * chunk_size, start, code_size * sizeof(char));
}
// Some systems may prevent us from creating executable code on the fly.
int res = mprotect(memory, mem_size, PROT_READ | PROT_EXEC);
if (res == -1) {
PyErr_SetFromErrno(PyExc_OSError);
munmap(memory, mem_size);
- _PyErr_WriteUnraisableMsg(
- "Failed to set mmap for perf trampoline to PROT_READ | PROT_EXEC",
- NULL);
+ PyErr_FormatUnraisable("Failed to set mmap for perf trampoline to "
+ "PROT_READ | PROT_EXEC");
return -1;
}
@@ -290,8 +365,7 @@ new_code_arena(void)
if (new_arena == NULL) {
PyErr_NoMemory();
munmap(memory, mem_size);
- _PyErr_WriteUnraisableMsg("Failed to allocate new code arena struct",
- NULL);
+ PyErr_FormatUnraisable("Failed to allocate new code arena struct for perf trampoline");
return -1;
}
@@ -323,16 +397,20 @@ static inline py_trampoline
code_arena_new_code(code_arena_t *code_arena)
{
py_trampoline trampoline = (py_trampoline)code_arena->current_addr;
- code_arena->size_left -= code_arena->code_size;
- code_arena->current_addr += code_arena->code_size;
+ size_t total_code_size = round_up(code_arena->code_size + trampoline_api.code_padding,
+ CODE_ALIGNMENT);
+ assert(total_code_size % CODE_ALIGNMENT == 0);
+ code_arena->size_left -= total_code_size;
+ code_arena->current_addr += total_code_size;
return trampoline;
}
static inline py_trampoline
compile_trampoline(void)
{
+ size_t total_code_size = round_up(perf_code_arena->code_size + trampoline_api.code_padding, 16);
if ((perf_code_arena == NULL) ||
- (perf_code_arena->size_left <= perf_code_arena->code_size)) {
+ (perf_code_arena->size_left <= total_code_size)) {
if (new_code_arena() < 0) {
return NULL;
}
@@ -349,7 +427,7 @@ py_trampoline_evaluator(PyThreadState *ts, _PyInterpreterFrame *frame,
perf_status == PERF_STATUS_NO_INIT) {
goto default_eval;
}
- PyCodeObject *co = frame->f_code;
+ PyCodeObject *co = _PyFrame_GetCode(frame);
py_trampoline f = NULL;
assert(extra_code_index != -1);
int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f);
@@ -364,6 +442,7 @@ py_trampoline_evaluator(PyThreadState *ts, _PyInterpreterFrame *frame,
perf_code_arena->code_size, co);
_PyCode_SetExtra((PyObject *)co, extra_code_index,
(void *)new_trampoline);
+ trampoline_refcount++;
f = new_trampoline;
}
assert(f != NULL);
@@ -374,6 +453,27 @@ default_eval:
}
#endif // PY_HAVE_PERF_TRAMPOLINE
+int PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *co)
+{
+#ifdef PY_HAVE_PERF_TRAMPOLINE
+ py_trampoline f = NULL;
+ assert(extra_code_index != -1);
+ int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f);
+ if (ret != 0 || f == NULL) {
+ py_trampoline new_trampoline = compile_trampoline();
+ if (new_trampoline == NULL) {
+ return 0;
+ }
+ trampoline_api.write_state(trampoline_api.state, new_trampoline,
+ perf_code_arena->code_size, co);
+ trampoline_refcount++;
+ return _PyCode_SetExtra((PyObject *)co, extra_code_index,
+ (void *)new_trampoline);
+ }
+#endif // PY_HAVE_PERF_TRAMPOLINE
+ return 0;
+}
+
int
_PyIsPerfTrampolineActive(void)
{
@@ -416,18 +516,15 @@ _PyPerfTrampoline_SetCallbacks(_PyPerf_Callbacks *callbacks)
return 0;
}
-void _PyPerfTrampoline_FreeArenas(void) {
-#ifdef PY_HAVE_PERF_TRAMPOLINE
- free_code_arenas();
-#endif
- return;
-}
-
int
_PyPerfTrampoline_Init(int activate)
{
#ifdef PY_HAVE_PERF_TRAMPOLINE
PyThreadState *tstate = _PyThreadState_GET();
+ if (code_watcher_id == 0) {
+ // Initialize to -1 since 0 is a valid watcher ID
+ code_watcher_id = -1;
+ }
if (tstate->interp->eval_frame &&
tstate->interp->eval_frame != py_trampoline_evaluator) {
PyErr_SetString(PyExc_RuntimeError,
@@ -441,9 +538,6 @@ _PyPerfTrampoline_Init(int activate)
}
else {
tstate->interp->eval_frame = py_trampoline_evaluator;
- if (new_code_arena() < 0) {
- return -1;
- }
extra_code_index = _PyEval_RequestCodeExtraIndex(NULL);
if (extra_code_index == -1) {
return -1;
@@ -451,6 +545,16 @@ _PyPerfTrampoline_Init(int activate)
if (trampoline_api.state == NULL && trampoline_api.init_state != NULL) {
trampoline_api.state = trampoline_api.init_state();
}
+ if (new_code_arena() < 0) {
+ return -1;
+ }
+ code_watcher_id = PyCode_AddWatcher(perf_trampoline_code_watcher);
+ if (code_watcher_id < 0) {
+ PyErr_FormatUnraisable("Failed to register code watcher for perf trampoline");
+ free_code_arenas();
+ return -1;
+ }
+ trampoline_refcount = 1; // Base refcount held by the system
perf_status = PERF_STATUS_OK;
}
#endif
@@ -470,9 +574,28 @@ _PyPerfTrampoline_Fini(void)
}
if (perf_status == PERF_STATUS_OK) {
trampoline_api.free_state(trampoline_api.state);
+ perf_trampoline_type = PERF_TRAMPOLINE_UNSET;
}
- extra_code_index = -1;
+
+ // Prevent new trampolines from being created
perf_status = PERF_STATUS_NO_INIT;
+
+ // Decrement base refcount. If refcount reaches 0, all code objects are already
+ // dead so clean up now. Otherwise, watcher remains active to clean up when last
+ // code object dies; extra_code_index stays valid so watcher can identify them.
+ trampoline_refcount--;
+ if (trampoline_refcount == 0) {
+ perf_trampoline_reset_state();
+ }
+#endif
+ return 0;
+}
+
+int
+PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable){
+#ifdef PY_HAVE_PERF_TRAMPOLINE
+ persist_after_fork = enable;
+ return persist_after_fork;
#endif
return 0;
}
@@ -481,12 +604,30 @@ PyStatus
_PyPerfTrampoline_AfterFork_Child(void)
{
#ifdef PY_HAVE_PERF_TRAMPOLINE
- // Restart trampoline in file in child.
- int was_active = _PyIsPerfTrampolineActive();
- _PyPerfTrampoline_Fini();
- PyUnstable_PerfMapState_Fini();
- if (was_active) {
- _PyPerfTrampoline_Init(1);
+ if (persist_after_fork) {
+ if (perf_trampoline_type != PERF_TRAMPOLINE_TYPE_MAP) {
+ return PyStatus_Error("Failed to copy perf map file as perf trampoline type is not type map.");
+ }
+ _PyPerfTrampoline_Fini();
+ char filename[256];
+ pid_t parent_pid = getppid();
+ snprintf(filename, sizeof(filename), "/tmp/perf-%d.map", parent_pid);
+ if (PyUnstable_CopyPerfMapFile(filename) != 0) {
+ return PyStatus_Error("Failed to copy perf map file.");
+ }
+ } else {
+ // Restart trampoline in file in child.
+ int was_active = _PyIsPerfTrampolineActive();
+ _PyPerfTrampoline_Fini();
+ if (was_active) {
+ // After fork, Fini may leave the old code watcher registered
+ // if trampolined code objects from the parent still exist
+ // (trampoline_refcount > 0). Clear it unconditionally before
+ // Init registers a new one, to prevent two watchers sharing
+ // the same globals and double-decrementing trampoline_refcount.
+ perf_trampoline_reset_state();
+ _PyPerfTrampoline_Init(1);
+ }
}
#endif
return PyStatus_Ok();