diff options
author | shadchin <shadchin@yandex-team.com> | 2024-02-12 07:53:52 +0300 |
---|---|---|
committer | shadchin <shadchin@yandex-team.com> | 2024-02-12 08:07:36 +0300 |
commit | ce1b7ca3171f9158180640c6a02a74b4afffedea (patch) | |
tree | e47c1e8391b1b0128262c1e9b1e6ed4c8fff2348 /contrib/tools/python3/src/Python/assemble.c | |
parent | 57350d96f030db90f220ce50ee591d5c5d403df7 (diff) | |
download | ydb-ce1b7ca3171f9158180640c6a02a74b4afffedea.tar.gz |
Update Python from 3.11.8 to 3.12.2
Diffstat (limited to 'contrib/tools/python3/src/Python/assemble.c')
-rw-r--r-- | contrib/tools/python3/src/Python/assemble.c | 603 |
1 files changed, 603 insertions, 0 deletions
diff --git a/contrib/tools/python3/src/Python/assemble.c b/contrib/tools/python3/src/Python/assemble.c new file mode 100644 index 0000000000..8789d8ef97 --- /dev/null +++ b/contrib/tools/python3/src/Python/assemble.c @@ -0,0 +1,603 @@ +#include <stdbool.h> + +#include "Python.h" +#include "pycore_code.h" // write_location_entry_start() +#include "pycore_compile.h" +#include "pycore_opcode.h" // _PyOpcode_Caches[] and opcode category macros +#include "pycore_pymem.h" // _PyMem_IsPtrFreed() + + +#define DEFAULT_CODE_SIZE 128 +#define DEFAULT_LNOTAB_SIZE 16 +#define DEFAULT_CNOTAB_SIZE 32 + +#undef SUCCESS +#undef ERROR +#define SUCCESS 0 +#define ERROR -1 + +#define RETURN_IF_ERROR(X) \ + if ((X) == -1) { \ + return ERROR; \ + } + +typedef _PyCompilerSrcLocation location; +typedef _PyCompile_Instruction instruction; +typedef _PyCompile_InstructionSequence instr_sequence; + +static inline bool +same_location(location a, location b) +{ + return a.lineno == b.lineno && + a.end_lineno == b.end_lineno && + a.col_offset == b.col_offset && + a.end_col_offset == b.end_col_offset; +} + +struct assembler { + PyObject *a_bytecode; /* bytes containing bytecode */ + int a_offset; /* offset into bytecode */ + PyObject *a_except_table; /* bytes containing exception table */ + int a_except_table_off; /* offset into exception table */ + /* Location Info */ + int a_lineno; /* lineno of last emitted instruction */ + PyObject* a_linetable; /* bytes containing location info */ + int a_location_off; /* offset of last written location info frame */ +}; + +static int +assemble_init(struct assembler *a, int firstlineno) +{ + memset(a, 0, sizeof(struct assembler)); + a->a_lineno = firstlineno; + a->a_linetable = NULL; + a->a_location_off = 0; + a->a_except_table = NULL; + a->a_bytecode = PyBytes_FromStringAndSize(NULL, DEFAULT_CODE_SIZE); + if (a->a_bytecode == NULL) { + goto error; + } + a->a_linetable = PyBytes_FromStringAndSize(NULL, DEFAULT_CNOTAB_SIZE); + if (a->a_linetable == NULL) { + goto error; + } + a->a_except_table = PyBytes_FromStringAndSize(NULL, DEFAULT_LNOTAB_SIZE); + if (a->a_except_table == NULL) { + goto error; + } + return SUCCESS; +error: + Py_XDECREF(a->a_bytecode); + Py_XDECREF(a->a_linetable); + Py_XDECREF(a->a_except_table); + return ERROR; +} + +static void +assemble_free(struct assembler *a) +{ + Py_XDECREF(a->a_bytecode); + Py_XDECREF(a->a_linetable); + Py_XDECREF(a->a_except_table); +} + +static inline void +write_except_byte(struct assembler *a, int byte) { + unsigned char *p = (unsigned char *) PyBytes_AS_STRING(a->a_except_table); + p[a->a_except_table_off++] = byte; +} + +#define CONTINUATION_BIT 64 + +static void +assemble_emit_exception_table_item(struct assembler *a, int value, int msb) +{ + assert ((msb | 128) == 128); + assert(value >= 0 && value < (1 << 30)); + if (value >= 1 << 24) { + write_except_byte(a, (value >> 24) | CONTINUATION_BIT | msb); + msb = 0; + } + if (value >= 1 << 18) { + write_except_byte(a, ((value >> 18)&0x3f) | CONTINUATION_BIT | msb); + msb = 0; + } + if (value >= 1 << 12) { + write_except_byte(a, ((value >> 12)&0x3f) | CONTINUATION_BIT | msb); + msb = 0; + } + if (value >= 1 << 6) { + write_except_byte(a, ((value >> 6)&0x3f) | CONTINUATION_BIT | msb); + msb = 0; + } + write_except_byte(a, (value&0x3f) | msb); +} + +/* See Objects/exception_handling_notes.txt for details of layout */ +#define MAX_SIZE_OF_ENTRY 20 + +static int +assemble_emit_exception_table_entry(struct assembler *a, int start, int end, + _PyCompile_ExceptHandlerInfo *handler) +{ + Py_ssize_t len = PyBytes_GET_SIZE(a->a_except_table); + if (a->a_except_table_off + MAX_SIZE_OF_ENTRY >= len) { + RETURN_IF_ERROR(_PyBytes_Resize(&a->a_except_table, len * 2)); + } + int size = end-start; + assert(end > start); + int target = handler->h_offset; + int depth = handler->h_startdepth - 1; + if (handler->h_preserve_lasti > 0) { + depth -= 1; + } + assert(depth >= 0); + int depth_lasti = (depth<<1) | handler->h_preserve_lasti; + assemble_emit_exception_table_item(a, start, (1<<7)); + assemble_emit_exception_table_item(a, size, 0); + assemble_emit_exception_table_item(a, target, 0); + assemble_emit_exception_table_item(a, depth_lasti, 0); + return SUCCESS; +} + +static int +assemble_exception_table(struct assembler *a, instr_sequence *instrs) +{ + int ioffset = 0; + _PyCompile_ExceptHandlerInfo handler; + handler.h_offset = -1; + handler.h_preserve_lasti = -1; + int start = -1; + for (int i = 0; i < instrs->s_used; i++) { + instruction *instr = &instrs->s_instrs[i]; + if (instr->i_except_handler_info.h_offset != handler.h_offset) { + if (handler.h_offset >= 0) { + RETURN_IF_ERROR( + assemble_emit_exception_table_entry(a, start, ioffset, &handler)); + } + start = ioffset; + handler = instr->i_except_handler_info; + } + ioffset += _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg); + } + if (handler.h_offset >= 0) { + RETURN_IF_ERROR(assemble_emit_exception_table_entry(a, start, ioffset, &handler)); + } + return SUCCESS; +} + + +/* Code location emitting code. See locations.md for a description of the format. */ + +#define MSB 0x80 + +static void +write_location_byte(struct assembler* a, int val) +{ + PyBytes_AS_STRING(a->a_linetable)[a->a_location_off] = val&255; + a->a_location_off++; +} + + +static uint8_t * +location_pointer(struct assembler* a) +{ + return (uint8_t *)PyBytes_AS_STRING(a->a_linetable) + + a->a_location_off; +} + +static void +write_location_first_byte(struct assembler* a, int code, int length) +{ + a->a_location_off += write_location_entry_start( + location_pointer(a), code, length); +} + +static void +write_location_varint(struct assembler* a, unsigned int val) +{ + uint8_t *ptr = location_pointer(a); + a->a_location_off += write_varint(ptr, val); +} + + +static void +write_location_signed_varint(struct assembler* a, int val) +{ + uint8_t *ptr = location_pointer(a); + a->a_location_off += write_signed_varint(ptr, val); +} + +static void +write_location_info_short_form(struct assembler* a, int length, int column, int end_column) +{ + assert(length > 0 && length <= 8); + int column_low_bits = column & 7; + int column_group = column >> 3; + assert(column < 80); + assert(end_column >= column); + assert(end_column - column < 16); + write_location_first_byte(a, PY_CODE_LOCATION_INFO_SHORT0 + column_group, length); + write_location_byte(a, (column_low_bits << 4) | (end_column - column)); +} + +static void +write_location_info_oneline_form(struct assembler* a, int length, int line_delta, int column, int end_column) +{ + assert(length > 0 && length <= 8); + assert(line_delta >= 0 && line_delta < 3); + assert(column < 128); + assert(end_column < 128); + write_location_first_byte(a, PY_CODE_LOCATION_INFO_ONE_LINE0 + line_delta, length); + write_location_byte(a, column); + write_location_byte(a, end_column); +} + +static void +write_location_info_long_form(struct assembler* a, location loc, int length) +{ + assert(length > 0 && length <= 8); + write_location_first_byte(a, PY_CODE_LOCATION_INFO_LONG, length); + write_location_signed_varint(a, loc.lineno - a->a_lineno); + assert(loc.end_lineno >= loc.lineno); + write_location_varint(a, loc.end_lineno - loc.lineno); + write_location_varint(a, loc.col_offset + 1); + write_location_varint(a, loc.end_col_offset + 1); +} + +static void +write_location_info_none(struct assembler* a, int length) +{ + write_location_first_byte(a, PY_CODE_LOCATION_INFO_NONE, length); +} + +static void +write_location_info_no_column(struct assembler* a, int length, int line_delta) +{ + write_location_first_byte(a, PY_CODE_LOCATION_INFO_NO_COLUMNS, length); + write_location_signed_varint(a, line_delta); +} + +#define THEORETICAL_MAX_ENTRY_SIZE 25 /* 1 + 6 + 6 + 6 + 6 */ + + +static int +write_location_info_entry(struct assembler* a, location loc, int isize) +{ + Py_ssize_t len = PyBytes_GET_SIZE(a->a_linetable); + if (a->a_location_off + THEORETICAL_MAX_ENTRY_SIZE >= len) { + assert(len > THEORETICAL_MAX_ENTRY_SIZE); + RETURN_IF_ERROR(_PyBytes_Resize(&a->a_linetable, len*2)); + } + if (loc.lineno < 0) { + write_location_info_none(a, isize); + return SUCCESS; + } + int line_delta = loc.lineno - a->a_lineno; + int column = loc.col_offset; + int end_column = loc.end_col_offset; + assert(column >= -1); + assert(end_column >= -1); + if (column < 0 || end_column < 0) { + if (loc.end_lineno == loc.lineno || loc.end_lineno == -1) { + write_location_info_no_column(a, isize, line_delta); + a->a_lineno = loc.lineno; + return SUCCESS; + } + } + else if (loc.end_lineno == loc.lineno) { + if (line_delta == 0 && column < 80 && end_column - column < 16 && end_column >= column) { + write_location_info_short_form(a, isize, column, end_column); + return SUCCESS; + } + if (line_delta >= 0 && line_delta < 3 && column < 128 && end_column < 128) { + write_location_info_oneline_form(a, isize, line_delta, column, end_column); + a->a_lineno = loc.lineno; + return SUCCESS; + } + } + write_location_info_long_form(a, loc, isize); + a->a_lineno = loc.lineno; + return SUCCESS; +} + +static int +assemble_emit_location(struct assembler* a, location loc, int isize) +{ + if (isize == 0) { + return SUCCESS; + } + while (isize > 8) { + RETURN_IF_ERROR(write_location_info_entry(a, loc, 8)); + isize -= 8; + } + return write_location_info_entry(a, loc, isize); +} + +static int +assemble_location_info(struct assembler *a, instr_sequence *instrs, + int firstlineno) +{ + a->a_lineno = firstlineno; + location loc = NO_LOCATION; + int size = 0; + for (int i = 0; i < instrs->s_used; i++) { + instruction *instr = &instrs->s_instrs[i]; + if (!same_location(loc, instr->i_loc)) { + RETURN_IF_ERROR(assemble_emit_location(a, loc, size)); + loc = instr->i_loc; + size = 0; + } + size += _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg); + } + RETURN_IF_ERROR(assemble_emit_location(a, loc, size)); + return SUCCESS; +} + +static void +write_instr(_Py_CODEUNIT *codestr, instruction *instr, int ilen) +{ + int opcode = instr->i_opcode; + assert(!IS_PSEUDO_OPCODE(opcode)); + int oparg = instr->i_oparg; + assert(HAS_ARG(opcode) || oparg == 0); + int caches = _PyOpcode_Caches[opcode]; + switch (ilen - caches) { + case 4: + codestr->op.code = EXTENDED_ARG; + codestr->op.arg = (oparg >> 24) & 0xFF; + codestr++; + /* fall through */ + case 3: + codestr->op.code = EXTENDED_ARG; + codestr->op.arg = (oparg >> 16) & 0xFF; + codestr++; + /* fall through */ + case 2: + codestr->op.code = EXTENDED_ARG; + codestr->op.arg = (oparg >> 8) & 0xFF; + codestr++; + /* fall through */ + case 1: + codestr->op.code = opcode; + codestr->op.arg = oparg & 0xFF; + codestr++; + break; + default: + Py_UNREACHABLE(); + } + while (caches--) { + codestr->op.code = CACHE; + codestr->op.arg = 0; + codestr++; + } +} + +/* assemble_emit_instr() + Extend the bytecode with a new instruction. + Update lnotab if necessary. +*/ + +static int +assemble_emit_instr(struct assembler *a, instruction *instr) +{ + Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode); + _Py_CODEUNIT *code; + + int size = _PyCompile_InstrSize(instr->i_opcode, instr->i_oparg); + if (a->a_offset + size >= len / (int)sizeof(_Py_CODEUNIT)) { + if (len > PY_SSIZE_T_MAX / 2) { + return ERROR; + } + RETURN_IF_ERROR(_PyBytes_Resize(&a->a_bytecode, len * 2)); + } + code = (_Py_CODEUNIT *)PyBytes_AS_STRING(a->a_bytecode) + a->a_offset; + a->a_offset += size; + write_instr(code, instr, size); + return SUCCESS; +} + +static int +assemble_emit(struct assembler *a, instr_sequence *instrs, + int first_lineno, PyObject *const_cache) +{ + RETURN_IF_ERROR(assemble_init(a, first_lineno)); + + for (int i = 0; i < instrs->s_used; i++) { + instruction *instr = &instrs->s_instrs[i]; + RETURN_IF_ERROR(assemble_emit_instr(a, instr)); + } + + RETURN_IF_ERROR(assemble_location_info(a, instrs, a->a_lineno)); + + RETURN_IF_ERROR(assemble_exception_table(a, instrs)); + + RETURN_IF_ERROR(_PyBytes_Resize(&a->a_except_table, a->a_except_table_off)); + RETURN_IF_ERROR(_PyCompile_ConstCacheMergeOne(const_cache, &a->a_except_table)); + + RETURN_IF_ERROR(_PyBytes_Resize(&a->a_linetable, a->a_location_off)); + RETURN_IF_ERROR(_PyCompile_ConstCacheMergeOne(const_cache, &a->a_linetable)); + + RETURN_IF_ERROR(_PyBytes_Resize(&a->a_bytecode, a->a_offset * sizeof(_Py_CODEUNIT))); + RETURN_IF_ERROR(_PyCompile_ConstCacheMergeOne(const_cache, &a->a_bytecode)); + return SUCCESS; +} + +static PyObject * +dict_keys_inorder(PyObject *dict, Py_ssize_t offset) +{ + PyObject *tuple, *k, *v; + Py_ssize_t i, pos = 0, size = PyDict_GET_SIZE(dict); + + tuple = PyTuple_New(size); + if (tuple == NULL) + return NULL; + while (PyDict_Next(dict, &pos, &k, &v)) { + i = PyLong_AS_LONG(v); + assert((i - offset) < size); + assert((i - offset) >= 0); + PyTuple_SET_ITEM(tuple, i - offset, Py_NewRef(k)); + } + return tuple; +} + +// This is in codeobject.c. +extern void _Py_set_localsplus_info(int, PyObject *, unsigned char, + PyObject *, PyObject *); + +static void +compute_localsplus_info(_PyCompile_CodeUnitMetadata *umd, int nlocalsplus, + PyObject *names, PyObject *kinds) +{ + PyObject *k, *v; + Py_ssize_t pos = 0; + while (PyDict_Next(umd->u_varnames, &pos, &k, &v)) { + int offset = (int)PyLong_AS_LONG(v); + assert(offset >= 0); + assert(offset < nlocalsplus); + // For now we do not distinguish arg kinds. + _PyLocals_Kind kind = CO_FAST_LOCAL; + if (PyDict_Contains(umd->u_fasthidden, k)) { + kind |= CO_FAST_HIDDEN; + } + if (PyDict_GetItem(umd->u_cellvars, k) != NULL) { + kind |= CO_FAST_CELL; + } + _Py_set_localsplus_info(offset, k, kind, names, kinds); + } + int nlocals = (int)PyDict_GET_SIZE(umd->u_varnames); + + // This counter mirrors the fix done in fix_cell_offsets(). + int numdropped = 0; + pos = 0; + while (PyDict_Next(umd->u_cellvars, &pos, &k, &v)) { + if (PyDict_GetItem(umd->u_varnames, k) != NULL) { + // Skip cells that are already covered by locals. + numdropped += 1; + continue; + } + int offset = (int)PyLong_AS_LONG(v); + assert(offset >= 0); + offset += nlocals - numdropped; + assert(offset < nlocalsplus); + _Py_set_localsplus_info(offset, k, CO_FAST_CELL, names, kinds); + } + + pos = 0; + while (PyDict_Next(umd->u_freevars, &pos, &k, &v)) { + int offset = (int)PyLong_AS_LONG(v); + assert(offset >= 0); + offset += nlocals - numdropped; + assert(offset < nlocalsplus); + _Py_set_localsplus_info(offset, k, CO_FAST_FREE, names, kinds); + } +} + +static PyCodeObject * +makecode(_PyCompile_CodeUnitMetadata *umd, struct assembler *a, PyObject *const_cache, + PyObject *constslist, int maxdepth, int nlocalsplus, int code_flags, + PyObject *filename) +{ + PyCodeObject *co = NULL; + PyObject *names = NULL; + PyObject *consts = NULL; + PyObject *localsplusnames = NULL; + PyObject *localspluskinds = NULL; + names = dict_keys_inorder(umd->u_names, 0); + if (!names) { + goto error; + } + if (_PyCompile_ConstCacheMergeOne(const_cache, &names) < 0) { + goto error; + } + + consts = PyList_AsTuple(constslist); /* PyCode_New requires a tuple */ + if (consts == NULL) { + goto error; + } + if (_PyCompile_ConstCacheMergeOne(const_cache, &consts) < 0) { + goto error; + } + + assert(umd->u_posonlyargcount < INT_MAX); + assert(umd->u_argcount < INT_MAX); + assert(umd->u_kwonlyargcount < INT_MAX); + int posonlyargcount = (int)umd->u_posonlyargcount; + int posorkwargcount = (int)umd->u_argcount; + assert(INT_MAX - posonlyargcount - posorkwargcount > 0); + int kwonlyargcount = (int)umd->u_kwonlyargcount; + + localsplusnames = PyTuple_New(nlocalsplus); + if (localsplusnames == NULL) { + goto error; + } + localspluskinds = PyBytes_FromStringAndSize(NULL, nlocalsplus); + if (localspluskinds == NULL) { + goto error; + } + compute_localsplus_info(umd, nlocalsplus, localsplusnames, localspluskinds); + + struct _PyCodeConstructor con = { + .filename = filename, + .name = umd->u_name, + .qualname = umd->u_qualname ? umd->u_qualname : umd->u_name, + .flags = code_flags, + + .code = a->a_bytecode, + .firstlineno = umd->u_firstlineno, + .linetable = a->a_linetable, + + .consts = consts, + .names = names, + + .localsplusnames = localsplusnames, + .localspluskinds = localspluskinds, + + .argcount = posonlyargcount + posorkwargcount, + .posonlyargcount = posonlyargcount, + .kwonlyargcount = kwonlyargcount, + + .stacksize = maxdepth, + + .exceptiontable = a->a_except_table, + }; + + if (_PyCode_Validate(&con) < 0) { + goto error; + } + + if (_PyCompile_ConstCacheMergeOne(const_cache, &localsplusnames) < 0) { + goto error; + } + con.localsplusnames = localsplusnames; + + co = _PyCode_New(&con); + if (co == NULL) { + goto error; + } + +error: + Py_XDECREF(names); + Py_XDECREF(consts); + Py_XDECREF(localsplusnames); + Py_XDECREF(localspluskinds); + return co; +} + + +PyCodeObject * +_PyAssemble_MakeCodeObject(_PyCompile_CodeUnitMetadata *umd, PyObject *const_cache, + PyObject *consts, int maxdepth, instr_sequence *instrs, + int nlocalsplus, int code_flags, PyObject *filename) +{ + PyCodeObject *co = NULL; + + struct assembler a; + int res = assemble_emit(&a, instrs, umd->u_firstlineno, const_cache); + if (res == SUCCESS) { + co = makecode(umd, &a, const_cache, consts, maxdepth, nlocalsplus, + code_flags, filename); + } + assemble_free(&a); + return co; +} |