diff options
Diffstat (limited to 'contrib/tools/python3/Python/assemble.c')
| -rw-r--r-- | contrib/tools/python3/Python/assemble.c | 241 |
1 files changed, 199 insertions, 42 deletions
diff --git a/contrib/tools/python3/Python/assemble.c b/contrib/tools/python3/Python/assemble.c index 8789d8ef978..35453277dd8 100644 --- a/contrib/tools/python3/Python/assemble.c +++ b/contrib/tools/python3/Python/assemble.c @@ -1,11 +1,12 @@ -#include <stdbool.h> - #include "Python.h" -#include "pycore_code.h" // write_location_entry_start() +#include "pycore_code.h" // write_location_entry_start() #include "pycore_compile.h" -#include "pycore_opcode.h" // _PyOpcode_Caches[] and opcode category macros -#include "pycore_pymem.h" // _PyMem_IsPtrFreed() +#include "pycore_instruction_sequence.h" +#include "pycore_opcode_utils.h" // IS_BACKWARDS_JUMP_OPCODE +#include "pycore_opcode_metadata.h" // is_pseudo_target, _PyOpcode_Caches +#include "pycore_symtable.h" // _Py_SourceLocation +#include <stdbool.h> #define DEFAULT_CODE_SIZE 128 #define DEFAULT_LNOTAB_SIZE 16 @@ -17,13 +18,13 @@ #define ERROR -1 #define RETURN_IF_ERROR(X) \ - if ((X) == -1) { \ + if ((X) < 0) { \ return ERROR; \ } -typedef _PyCompilerSrcLocation location; -typedef _PyCompile_Instruction instruction; -typedef _PyCompile_InstructionSequence instr_sequence; +typedef _Py_SourceLocation location; +typedef _PyInstruction instruction; +typedef _PyInstructionSequence instr_sequence; static inline bool same_location(location a, location b) @@ -34,6 +35,18 @@ same_location(location a, location b) a.end_col_offset == b.end_col_offset; } +static int +instr_size(instruction *instr) +{ + int opcode = instr->i_opcode; + int oparg = instr->i_oparg; + assert(!IS_PSEUDO_INSTR(opcode)); + assert(OPCODE_HAS_ARG(opcode) || oparg == 0); + int extended_args = (0xFFFFFF < oparg) + (0xFFFF < oparg) + (0xFF < oparg); + int caches = _PyOpcode_Caches[opcode]; + return extended_args + 1 + caches; +} + struct assembler { PyObject *a_bytecode; /* bytes containing bytecode */ int a_offset; /* offset into bytecode */ @@ -118,7 +131,8 @@ assemble_emit_exception_table_item(struct assembler *a, int value, int msb) static int assemble_emit_exception_table_entry(struct assembler *a, int start, int end, - _PyCompile_ExceptHandlerInfo *handler) + int handler_offset, + _PyExceptHandlerInfo *handler) { Py_ssize_t len = PyBytes_GET_SIZE(a->a_except_table); if (a->a_except_table_off + MAX_SIZE_OF_ENTRY >= len) { @@ -126,7 +140,7 @@ assemble_emit_exception_table_entry(struct assembler *a, int start, int end, } int size = end-start; assert(end > start); - int target = handler->h_offset; + int target = handler_offset; int depth = handler->h_startdepth - 1; if (handler->h_preserve_lasti > 0) { depth -= 1; @@ -144,24 +158,31 @@ static int assemble_exception_table(struct assembler *a, instr_sequence *instrs) { int ioffset = 0; - _PyCompile_ExceptHandlerInfo handler; - handler.h_offset = -1; + _PyExceptHandlerInfo handler; + handler.h_label = -1; + handler.h_startdepth = -1; handler.h_preserve_lasti = -1; int start = -1; for (int i = 0; i < instrs->s_used; i++) { instruction *instr = &instrs->s_instrs[i]; - if (instr->i_except_handler_info.h_offset != handler.h_offset) { - if (handler.h_offset >= 0) { + if (instr->i_except_handler_info.h_label != handler.h_label) { + if (handler.h_label >= 0) { + int handler_offset = instrs->s_instrs[handler.h_label].i_offset; RETURN_IF_ERROR( - assemble_emit_exception_table_entry(a, start, ioffset, &handler)); + assemble_emit_exception_table_entry(a, start, ioffset, + handler_offset, + &handler)); } start = ioffset; handler = instr->i_except_handler_info; } - ioffset += _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg); + ioffset += instr_size(instr); } - if (handler.h_offset >= 0) { - RETURN_IF_ERROR(assemble_emit_exception_table_entry(a, start, ioffset, &handler)); + if (handler.h_label >= 0) { + int handler_offset = instrs->s_instrs[handler.h_label].i_offset; + RETURN_IF_ERROR(assemble_emit_exception_table_entry(a, start, ioffset, + handler_offset, + &handler)); } return SUCCESS; } @@ -269,17 +290,15 @@ write_location_info_entry(struct assembler* a, location loc, int isize) assert(len > THEORETICAL_MAX_ENTRY_SIZE); RETURN_IF_ERROR(_PyBytes_Resize(&a->a_linetable, len*2)); } - if (loc.lineno < 0) { + if (loc.lineno == NO_LOCATION.lineno) { write_location_info_none(a, isize); return SUCCESS; } int line_delta = loc.lineno - a->a_lineno; int column = loc.col_offset; int end_column = loc.end_col_offset; - assert(column >= -1); - assert(end_column >= -1); if (column < 0 || end_column < 0) { - if (loc.end_lineno == loc.lineno || loc.end_lineno == -1) { + if (loc.end_lineno == loc.lineno || loc.end_lineno < 0) { write_location_info_no_column(a, isize, line_delta); a->a_lineno = loc.lineno; return SUCCESS; @@ -328,7 +347,7 @@ assemble_location_info(struct assembler *a, instr_sequence *instrs, loc = instr->i_loc; size = 0; } - size += _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg); + size += instr_size(instr); } RETURN_IF_ERROR(assemble_emit_location(a, loc, size)); return SUCCESS; @@ -338,9 +357,9 @@ static void write_instr(_Py_CODEUNIT *codestr, instruction *instr, int ilen) { int opcode = instr->i_opcode; - assert(!IS_PSEUDO_OPCODE(opcode)); + assert(!IS_PSEUDO_INSTR(opcode)); int oparg = instr->i_oparg; - assert(HAS_ARG(opcode) || oparg == 0); + assert(OPCODE_HAS_ARG(opcode) || oparg == 0); int caches = _PyOpcode_Caches[opcode]; switch (ilen - caches) { case 4: @@ -384,7 +403,7 @@ assemble_emit_instr(struct assembler *a, instruction *instr) Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode); _Py_CODEUNIT *code; - int size = _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg); + int size = instr_size(instr); if (a->a_offset + size >= len / (int)sizeof(_Py_CODEUNIT)) { if (len > PY_SSIZE_T_MAX / 2) { return ERROR; @@ -427,13 +446,17 @@ static PyObject * dict_keys_inorder(PyObject *dict, Py_ssize_t offset) { PyObject *tuple, *k, *v; - Py_ssize_t i, pos = 0, size = PyDict_GET_SIZE(dict); + Py_ssize_t pos = 0, size = PyDict_GET_SIZE(dict); tuple = PyTuple_New(size); if (tuple == NULL) return NULL; while (PyDict_Next(dict, &pos, &k, &v)) { - i = PyLong_AS_LONG(v); + Py_ssize_t i = PyLong_AsSsize_t(v); + if (i == -1 && PyErr_Occurred()) { + Py_DECREF(tuple); + return NULL; + } assert((i - offset) < size); assert((i - offset) >= 0); PyTuple_SET_ITEM(tuple, i - offset, Py_NewRef(k)); @@ -445,52 +468,76 @@ dict_keys_inorder(PyObject *dict, Py_ssize_t offset) extern void _Py_set_localsplus_info(int, PyObject *, unsigned char, PyObject *, PyObject *); -static void +static int compute_localsplus_info(_PyCompile_CodeUnitMetadata *umd, int nlocalsplus, PyObject *names, PyObject *kinds) { PyObject *k, *v; Py_ssize_t pos = 0; while (PyDict_Next(umd->u_varnames, &pos, &k, &v)) { - int offset = (int)PyLong_AS_LONG(v); + int offset = PyLong_AsInt(v); + if (offset == -1 && PyErr_Occurred()) { + return ERROR; + } assert(offset >= 0); assert(offset < nlocalsplus); + // For now we do not distinguish arg kinds. _PyLocals_Kind kind = CO_FAST_LOCAL; - if (PyDict_Contains(umd->u_fasthidden, k)) { + int has_key = PyDict_Contains(umd->u_fasthidden, k); + RETURN_IF_ERROR(has_key); + if (has_key) { kind |= CO_FAST_HIDDEN; } - if (PyDict_GetItem(umd->u_cellvars, k) != NULL) { + + has_key = PyDict_Contains(umd->u_cellvars, k); + RETURN_IF_ERROR(has_key); + if (has_key) { kind |= CO_FAST_CELL; } + _Py_set_localsplus_info(offset, k, kind, names, kinds); } int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames); // This counter mirrors the fix done in fix_cell_offsets(). - int numdropped = 0; + int numdropped = 0, cellvar_offset = -1; pos = 0; while (PyDict_Next(umd->u_cellvars, &pos, &k, &v)) { - if (PyDict_GetItem(umd->u_varnames, k) != NULL) { + int has_name = PyDict_Contains(umd->u_varnames, k); + RETURN_IF_ERROR(has_name); + if (has_name) { // Skip cells that are already covered by locals. numdropped += 1; continue; } - int offset = (int)PyLong_AS_LONG(v); - assert(offset >= 0); - offset += nlocals - numdropped; - assert(offset < nlocalsplus); - _Py_set_localsplus_info(offset, k, CO_FAST_CELL, names, kinds); + + cellvar_offset = PyLong_AsInt(v); + if (cellvar_offset == -1 && PyErr_Occurred()) { + return ERROR; + } + assert(cellvar_offset >= 0); + cellvar_offset += nlocals - numdropped; + assert(cellvar_offset < nlocalsplus); + _Py_set_localsplus_info(cellvar_offset, k, CO_FAST_CELL, names, kinds); } pos = 0; while (PyDict_Next(umd->u_freevars, &pos, &k, &v)) { - int offset = (int)PyLong_AS_LONG(v); + int offset = PyLong_AsInt(v); + if (offset == -1 && PyErr_Occurred()) { + return ERROR; + } assert(offset >= 0); offset += nlocals - numdropped; assert(offset < nlocalsplus); + /* XXX If the assertion below fails it is most likely because a freevar + was added to u_freevars with the wrong index due to not taking into + account cellvars already present, see gh-128632. */ + assert(offset > cellvar_offset); _Py_set_localsplus_info(offset, k, CO_FAST_FREE, names, kinds); } + return SUCCESS; } static PyCodeObject * @@ -535,7 +582,10 @@ makecode(_PyCompile_CodeUnitMetadata *umd, struct assembler *a, PyObject *const_ if (localspluskinds == NULL) { goto error; } - compute_localsplus_info(umd, nlocalsplus, localsplusnames, localspluskinds); + if (compute_localsplus_info(umd, nlocalsplus, + localsplusnames, localspluskinds) == ERROR) { + goto error; + } struct _PyCodeConstructor con = { .filename = filename, @@ -584,12 +634,119 @@ error: return co; } +static int +resolve_jump_offsets(instr_sequence *instrs) +{ + /* Compute the size of each instruction and fixup jump args. + * Replace instruction index with position in bytecode. + */ + + for (int i = 0; i < instrs->s_used; i++) { + instruction *instr = &instrs->s_instrs[i]; + if (OPCODE_HAS_JUMP(instr->i_opcode)) { + instr->i_target = instr->i_oparg; + } + } + + int extended_arg_recompile; + + do { + int totsize = 0; + for (int i = 0; i < instrs->s_used; i++) { + instruction *instr = &instrs->s_instrs[i]; + instr->i_offset = totsize; + int isize = instr_size(instr); + totsize += isize; + } + extended_arg_recompile = 0; + + int offset = 0; + for (int i = 0; i < instrs->s_used; i++) { + instruction *instr = &instrs->s_instrs[i]; + int isize = instr_size(instr); + /* jump offsets are computed relative to + * the instruction pointer after fetching + * the jump instruction. + */ + offset += isize; + if (OPCODE_HAS_JUMP(instr->i_opcode)) { + instruction *target = &instrs->s_instrs[instr->i_target]; + instr->i_oparg = target->i_offset; + if (instr->i_oparg < offset) { + assert(IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode)); + instr->i_oparg = offset - instr->i_oparg; + } + else { + assert(!IS_BACKWARDS_JUMP_OPCODE(instr->i_opcode)); + instr->i_oparg = instr->i_oparg - offset; + } + if (instr_size(instr) != isize) { + extended_arg_recompile = 1; + } + } + } + /* XXX: This is an awful hack that could hurt performance, but + on the bright side it should work until we come up + with a better solution. + + The issue is that in the first loop instr_size() is + called, and it requires i_oparg be set appropriately. + There is a bootstrap problem because i_oparg is + calculated in the second loop above. + + So we loop until we stop seeing new EXTENDED_ARGs. + The only EXTENDED_ARGs that could be popping up are + ones in jump instructions. So this should converge + fairly quickly. + */ + } while (extended_arg_recompile); + return SUCCESS; +} + +static int +resolve_unconditional_jumps(instr_sequence *instrs) +{ + /* Resolve directions of unconditional jumps */ + + for (int i = 0; i < instrs->s_used; i++) { + instruction *instr = &instrs->s_instrs[i]; + bool is_forward = (instr->i_oparg > i); + switch(instr->i_opcode) { + case JUMP: + assert(is_pseudo_target(JUMP, JUMP_FORWARD)); + assert(is_pseudo_target(JUMP, JUMP_BACKWARD)); + instr->i_opcode = is_forward ? JUMP_FORWARD : JUMP_BACKWARD; + break; + case JUMP_NO_INTERRUPT: + assert(is_pseudo_target(JUMP_NO_INTERRUPT, JUMP_FORWARD)); + assert(is_pseudo_target(JUMP_NO_INTERRUPT, JUMP_BACKWARD_NO_INTERRUPT)); + instr->i_opcode = is_forward ? + JUMP_FORWARD : JUMP_BACKWARD_NO_INTERRUPT; + break; + default: + if (OPCODE_HAS_JUMP(instr->i_opcode) && + IS_PSEUDO_INSTR(instr->i_opcode)) { + Py_UNREACHABLE(); + } + } + } + return SUCCESS; +} PyCodeObject * _PyAssemble_MakeCodeObject(_PyCompile_CodeUnitMetadata *umd, PyObject *const_cache, PyObject *consts, int maxdepth, instr_sequence *instrs, int nlocalsplus, int code_flags, PyObject *filename) { + if (_PyInstructionSequence_ApplyLabelMap(instrs) < 0) { + return NULL; + } + if (resolve_unconditional_jumps(instrs) < 0) { + return NULL; + } + if (resolve_jump_offsets(instrs) < 0) { + return NULL; + } PyCodeObject *co = NULL; struct assembler a; |
