aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/tools/python3/src/Python/ceval_macros.h
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.com>2024-02-12 07:53:52 +0300
committershadchin <shadchin@yandex-team.com>2024-02-12 08:07:36 +0300
commitce1b7ca3171f9158180640c6a02a74b4afffedea (patch)
treee47c1e8391b1b0128262c1e9b1e6ed4c8fff2348 /contrib/tools/python3/src/Python/ceval_macros.h
parent57350d96f030db90f220ce50ee591d5c5d403df7 (diff)
downloadydb-ce1b7ca3171f9158180640c6a02a74b4afffedea.tar.gz
Update Python from 3.11.8 to 3.12.2
Diffstat (limited to 'contrib/tools/python3/src/Python/ceval_macros.h')
-rw-r--r--contrib/tools/python3/src/Python/ceval_macros.h344
1 files changed, 344 insertions, 0 deletions
diff --git a/contrib/tools/python3/src/Python/ceval_macros.h b/contrib/tools/python3/src/Python/ceval_macros.h
new file mode 100644
index 0000000000..fccf9088cb
--- /dev/null
+++ b/contrib/tools/python3/src/Python/ceval_macros.h
@@ -0,0 +1,344 @@
+// Macros needed by ceval.c and bytecodes.c
+
+/* Computed GOTOs, or
+ the-optimization-commonly-but-improperly-known-as-"threaded code"
+ using gcc's labels-as-values extension
+ (http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html).
+
+ The traditional bytecode evaluation loop uses a "switch" statement, which
+ decent compilers will optimize as a single indirect branch instruction
+ combined with a lookup table of jump addresses. However, since the
+ indirect jump instruction is shared by all opcodes, the CPU will have a
+ hard time making the right prediction for where to jump next (actually,
+ it will be always wrong except in the uncommon case of a sequence of
+ several identical opcodes).
+
+ "Threaded code" in contrast, uses an explicit jump table and an explicit
+ indirect jump instruction at the end of each opcode. Since the jump
+ instruction is at a different address for each opcode, the CPU will make a
+ separate prediction for each of these instructions, which is equivalent to
+ predicting the second opcode of each opcode pair. These predictions have
+ a much better chance to turn out valid, especially in small bytecode loops.
+
+ A mispredicted branch on a modern CPU flushes the whole pipeline and
+ can cost several CPU cycles (depending on the pipeline depth),
+ and potentially many more instructions (depending on the pipeline width).
+ A correctly predicted branch, however, is nearly free.
+
+ At the time of this writing, the "threaded code" version is up to 15-20%
+ faster than the normal "switch" version, depending on the compiler and the
+ CPU architecture.
+
+ NOTE: care must be taken that the compiler doesn't try to "optimize" the
+ indirect jumps by sharing them between all opcodes. Such optimizations
+ can be disabled on gcc by using the -fno-gcse flag (or possibly
+ -fno-crossjumping).
+*/
+
+/* Use macros rather than inline functions, to make it as clear as possible
+ * to the C compiler that the tracing check is a simple test then branch.
+ * We want to be sure that the compiler knows this before it generates
+ * the CFG.
+ */
+
+#ifdef WITH_DTRACE
+#define OR_DTRACE_LINE | (PyDTrace_LINE_ENABLED() ? 255 : 0)
+#else
+#define OR_DTRACE_LINE
+#endif
+
+#ifdef HAVE_COMPUTED_GOTOS
+ #ifndef USE_COMPUTED_GOTOS
+ #define USE_COMPUTED_GOTOS 1
+ #endif
+#else
+ #if defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
+ #error "Computed gotos are not supported on this compiler."
+ #endif
+ #undef USE_COMPUTED_GOTOS
+ #define USE_COMPUTED_GOTOS 0
+#endif
+
+#ifdef Py_STATS
+#define INSTRUCTION_START(op) \
+ do { \
+ frame->prev_instr = next_instr++; \
+ OPCODE_EXE_INC(op); \
+ if (_py_stats) _py_stats->opcode_stats[lastopcode].pair_count[op]++; \
+ lastopcode = op; \
+ } while (0)
+#else
+#define INSTRUCTION_START(op) (frame->prev_instr = next_instr++)
+#endif
+
+#if USE_COMPUTED_GOTOS
+# define TARGET(op) TARGET_##op: INSTRUCTION_START(op);
+# define DISPATCH_GOTO() goto *opcode_targets[opcode]
+#else
+# define TARGET(op) case op: TARGET_##op: INSTRUCTION_START(op);
+# define DISPATCH_GOTO() goto dispatch_opcode
+#endif
+
+/* PRE_DISPATCH_GOTO() does lltrace if enabled. Normally a no-op */
+#ifdef LLTRACE
+#define PRE_DISPATCH_GOTO() if (lltrace) { \
+ lltrace_instruction(frame, stack_pointer, next_instr); }
+#else
+#define PRE_DISPATCH_GOTO() ((void)0)
+#endif
+
+
+/* Do interpreter dispatch accounting for tracing and instrumentation */
+#define DISPATCH() \
+ { \
+ NEXTOPARG(); \
+ PRE_DISPATCH_GOTO(); \
+ DISPATCH_GOTO(); \
+ }
+
+#define DISPATCH_SAME_OPARG() \
+ { \
+ opcode = next_instr->op.code; \
+ PRE_DISPATCH_GOTO(); \
+ DISPATCH_GOTO(); \
+ }
+
+#define DISPATCH_INLINED(NEW_FRAME) \
+ do { \
+ assert(tstate->interp->eval_frame == NULL); \
+ _PyFrame_SetStackPointer(frame, stack_pointer); \
+ frame->prev_instr = next_instr - 1; \
+ (NEW_FRAME)->previous = frame; \
+ frame = cframe.current_frame = (NEW_FRAME); \
+ CALL_STAT_INC(inlined_py_calls); \
+ goto start_frame; \
+ } while (0)
+
+#define CHECK_EVAL_BREAKER() \
+ _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \
+ if (_Py_atomic_load_relaxed_int32(&tstate->interp->ceval.eval_breaker)) { \
+ goto handle_eval_breaker; \
+ }
+
+
+/* Tuple access macros */
+
+#ifndef Py_DEBUG
+#define GETITEM(v, i) PyTuple_GET_ITEM((v), (i))
+#else
+static inline PyObject *
+GETITEM(PyObject *v, Py_ssize_t i) {
+ assert(PyTuple_Check(v));
+ assert(i >= 0);
+ assert(i < PyTuple_GET_SIZE(v));
+ return PyTuple_GET_ITEM(v, i);
+}
+#endif
+
+/* Code access macros */
+
+/* The integer overflow is checked by an assertion below. */
+#define INSTR_OFFSET() ((int)(next_instr - _PyCode_CODE(frame->f_code)))
+#define NEXTOPARG() do { \
+ _Py_CODEUNIT word = *next_instr; \
+ opcode = word.op.code; \
+ oparg = word.op.arg; \
+ } while (0)
+#define JUMPTO(x) (next_instr = _PyCode_CODE(frame->f_code) + (x))
+#define JUMPBY(x) (next_instr += (x))
+
+/* OpCode prediction macros
+ Some opcodes tend to come in pairs thus making it possible to
+ predict the second code when the first is run. For example,
+ COMPARE_OP is often followed by POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE.
+
+ Verifying the prediction costs a single high-speed test of a register
+ variable against a constant. If the pairing was good, then the
+ processor's own internal branch predication has a high likelihood of
+ success, resulting in a nearly zero-overhead transition to the
+ next opcode. A successful prediction saves a trip through the eval-loop
+ including its unpredictable switch-case branch. Combined with the
+ processor's internal branch prediction, a successful PREDICT has the
+ effect of making the two opcodes run as if they were a single new opcode
+ with the bodies combined.
+
+ If collecting opcode statistics, your choices are to either keep the
+ predictions turned-on and interpret the results as if some opcodes
+ had been combined or turn-off predictions so that the opcode frequency
+ counter updates for both opcodes.
+
+ Opcode prediction is disabled with threaded code, since the latter allows
+ the CPU to record separate branch prediction information for each
+ opcode.
+
+*/
+
+#define PREDICT_ID(op) PRED_##op
+
+#if USE_COMPUTED_GOTOS
+#define PREDICT(op) if (0) goto PREDICT_ID(op)
+#else
+#define PREDICT(next_op) \
+ do { \
+ _Py_CODEUNIT word = *next_instr; \
+ opcode = word.op.code; \
+ if (opcode == next_op) { \
+ oparg = word.op.arg; \
+ INSTRUCTION_START(next_op); \
+ goto PREDICT_ID(next_op); \
+ } \
+ } while(0)
+#endif
+#define PREDICTED(op) PREDICT_ID(op):
+
+
+/* Stack manipulation macros */
+
+/* The stack can grow at most MAXINT deep, as co_nlocals and
+ co_stacksize are ints. */
+#define STACK_LEVEL() ((int)(stack_pointer - _PyFrame_Stackbase(frame)))
+#define STACK_SIZE() (frame->f_code->co_stacksize)
+#define EMPTY() (STACK_LEVEL() == 0)
+#define TOP() (stack_pointer[-1])
+#define SECOND() (stack_pointer[-2])
+#define THIRD() (stack_pointer[-3])
+#define FOURTH() (stack_pointer[-4])
+#define PEEK(n) (stack_pointer[-(n)])
+#define POKE(n, v) (stack_pointer[-(n)] = (v))
+#define SET_TOP(v) (stack_pointer[-1] = (v))
+#define SET_SECOND(v) (stack_pointer[-2] = (v))
+#define BASIC_STACKADJ(n) (stack_pointer += n)
+#define BASIC_PUSH(v) (*stack_pointer++ = (v))
+#define BASIC_POP() (*--stack_pointer)
+
+#ifdef Py_DEBUG
+#define PUSH(v) do { \
+ BASIC_PUSH(v); \
+ assert(STACK_LEVEL() <= STACK_SIZE()); \
+ } while (0)
+#define POP() (assert(STACK_LEVEL() > 0), BASIC_POP())
+#define STACK_GROW(n) do { \
+ assert(n >= 0); \
+ BASIC_STACKADJ(n); \
+ assert(STACK_LEVEL() <= STACK_SIZE()); \
+ } while (0)
+#define STACK_SHRINK(n) do { \
+ assert(n >= 0); \
+ assert(STACK_LEVEL() >= n); \
+ BASIC_STACKADJ(-(n)); \
+ } while (0)
+#else
+#define PUSH(v) BASIC_PUSH(v)
+#define POP() BASIC_POP()
+#define STACK_GROW(n) BASIC_STACKADJ(n)
+#define STACK_SHRINK(n) BASIC_STACKADJ(-(n))
+#endif
+
+/* Local variable macros */
+
+#define GETLOCAL(i) (frame->localsplus[i])
+
+/* The SETLOCAL() macro must not DECREF the local variable in-place and
+ then store the new value; it must copy the old value to a temporary
+ value, then store the new value, and then DECREF the temporary value.
+ This is because it is possible that during the DECREF the frame is
+ accessed by other code (e.g. a __del__ method or gc.collect()) and the
+ variable would be pointing to already-freed memory. */
+#define SETLOCAL(i, value) do { PyObject *tmp = GETLOCAL(i); \
+ GETLOCAL(i) = value; \
+ Py_XDECREF(tmp); } while (0)
+
+#define GO_TO_INSTRUCTION(op) goto PREDICT_ID(op)
+
+#ifdef Py_STATS
+#define UPDATE_MISS_STATS(INSTNAME) \
+ do { \
+ STAT_INC(opcode, miss); \
+ STAT_INC((INSTNAME), miss); \
+ /* The counter is always the first cache entry: */ \
+ if (ADAPTIVE_COUNTER_IS_ZERO(next_instr->cache)) { \
+ STAT_INC((INSTNAME), deopt); \
+ } \
+ else { \
+ /* This is about to be (incorrectly) incremented: */ \
+ STAT_DEC((INSTNAME), deferred); \
+ } \
+ } while (0)
+#else
+#define UPDATE_MISS_STATS(INSTNAME) ((void)0)
+#endif
+
+#define DEOPT_IF(COND, INSTNAME) \
+ if ((COND)) { \
+ /* This is only a single jump on release builds! */ \
+ UPDATE_MISS_STATS((INSTNAME)); \
+ assert(_PyOpcode_Deopt[opcode] == (INSTNAME)); \
+ GO_TO_INSTRUCTION(INSTNAME); \
+ }
+
+
+#define GLOBALS() frame->f_globals
+#define BUILTINS() frame->f_builtins
+#define LOCALS() frame->f_locals
+
+#define DTRACE_FUNCTION_ENTRY() \
+ if (PyDTrace_FUNCTION_ENTRY_ENABLED()) { \
+ dtrace_function_entry(frame); \
+ }
+
+#define ADAPTIVE_COUNTER_IS_ZERO(COUNTER) \
+ (((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == 0)
+
+#define ADAPTIVE_COUNTER_IS_MAX(COUNTER) \
+ (((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == ((1 << MAX_BACKOFF_VALUE) - 1))
+
+#define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \
+ do { \
+ assert(!ADAPTIVE_COUNTER_IS_ZERO((COUNTER))); \
+ (COUNTER) -= (1 << ADAPTIVE_BACKOFF_BITS); \
+ } while (0);
+
+#define INCREMENT_ADAPTIVE_COUNTER(COUNTER) \
+ do { \
+ assert(!ADAPTIVE_COUNTER_IS_MAX((COUNTER))); \
+ (COUNTER) += (1 << ADAPTIVE_BACKOFF_BITS); \
+ } while (0);
+
+#define NAME_ERROR_MSG "name '%.200s' is not defined"
+
+#define KWNAMES_LEN() \
+ (kwnames == NULL ? 0 : ((int)PyTuple_GET_SIZE(kwnames)))
+
+#define DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dval, result) \
+do { \
+ if (Py_REFCNT(left) == 1) { \
+ ((PyFloatObject *)left)->ob_fval = (dval); \
+ _Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc);\
+ result = (left); \
+ } \
+ else if (Py_REFCNT(right) == 1) {\
+ ((PyFloatObject *)right)->ob_fval = (dval); \
+ _Py_DECREF_NO_DEALLOC(left); \
+ result = (right); \
+ }\
+ else { \
+ result = PyFloat_FromDouble(dval); \
+ if ((result) == NULL) goto error; \
+ _Py_DECREF_NO_DEALLOC(left); \
+ _Py_DECREF_NO_DEALLOC(right); \
+ } \
+} while (0)
+
+// If a trace function sets a new f_lineno and
+// *then* raises, we use the destination when searching
+// for an exception handler, displaying the traceback, and so on
+#define INSTRUMENTED_JUMP(src, dest, event) \
+do { \
+ _PyFrame_SetStackPointer(frame, stack_pointer); \
+ next_instr = _Py_call_instrumentation_jump(tstate, event, frame, src, dest); \
+ stack_pointer = _PyFrame_GetStackPointer(frame); \
+ if (next_instr == NULL) { \
+ next_instr = (dest)+1; \
+ goto error; \
+ } \
+} while (0);