diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-04-18 12:39:32 +0300 |
---|---|---|
committer | shadchin <shadchin@yandex-team.ru> | 2022-04-18 12:39:32 +0300 |
commit | d4be68e361f4258cf0848fc70018dfe37a2acc24 (patch) | |
tree | 153e294cd97ac8b5d7a989612704a0c1f58e8ad4 /contrib/tools/python3/src/Python/compile.c | |
parent | 260c02f5ccf242d9d9b8a873afaf6588c00237d6 (diff) | |
download | ydb-d4be68e361f4258cf0848fc70018dfe37a2acc24.tar.gz |
IGNIETFERRO-1816 Update Python 3 from 3.9.12 to 3.10.4
ref:9f96be6d02ee8044fdd6f124b799b270c20ce641
Diffstat (limited to 'contrib/tools/python3/src/Python/compile.c')
-rw-r--r-- | contrib/tools/python3/src/Python/compile.c | 3200 |
1 files changed, 2517 insertions, 683 deletions
diff --git a/contrib/tools/python3/src/Python/compile.c b/contrib/tools/python3/src/Python/compile.c index 28003b66bd4..f012406c066 100644 --- a/contrib/tools/python3/src/Python/compile.c +++ b/contrib/tools/python3/src/Python/compile.c @@ -1,7 +1,7 @@ /* * This file compiles an abstract syntax tree (AST) into Python bytecode. * - * The primary entry point is PyAST_Compile(), which returns a + * The primary entry point is _PyAST_Compile(), which returns a * PyCodeObject. The compiler makes several passes to build the code * object: * 1. Checks for future statements. See future.c @@ -9,7 +9,7 @@ * 3. Generate code for basic blocks. See compiler_mod() in this file. * 4. Assemble the basic blocks into final code. See assemble() in * this file. - * 5. Optimize the byte code (peephole optimizations). See peephole.c + * 5. Optimize the byte code (peephole optimizations). * * Note that compiler_mod() suggests module, but the module ast type * (mod_ty) has cases for expressions and interactive statements. @@ -21,14 +21,19 @@ * objects. */ +#include <stdbool.h> + #include "Python.h" +#include "pycore_ast.h" // _PyAST_GetDocString() +#include "pycore_compile.h" // _PyFuture_FromAST() +#include "pycore_pymem.h" // _PyMem_IsPtrFreed() +#include "pycore_long.h" // _PyLong_GetZero() +#include "pycore_symtable.h" // PySTEntryObject + +#define NEED_OPCODE_JUMP_TABLES +#include "opcode.h" // EXTENDED_ARG +#include "wordcode_helpers.h" // instrsize() -#include "Python-ast.h" -#include "ast.h" -#include "code.h" -#include "symtable.h" -#include "opcode.h" -#include "wordcode_helpers.h" #define DEFAULT_BLOCK_SIZE 16 #define DEFAULT_BLOCKS 8 @@ -40,19 +45,67 @@ #define COMP_SETCOMP 2 #define COMP_DICTCOMP 3 +/* A soft limit for stack use, to avoid excessive + * memory use for large constants, etc. + * + * The value 30 is plucked out of thin air. + * Code that could use more stack than this is + * rare, so the exact value is unimportant. + */ +#define STACK_USE_GUIDELINE 30 + +/* If we exceed this limit, it should + * be considered a compiler bug. + * Currently it should be impossible + * to exceed STACK_USE_GUIDELINE * 100, + * as 100 is the maximum parse depth. + * For performance reasons we will + * want to reduce this to a + * few hundred in the future. + * + * NOTE: Whatever MAX_ALLOWED_STACK_USE is + * set to, it should never restrict what Python + * we can write, just how we compile it. + */ +#define MAX_ALLOWED_STACK_USE (STACK_USE_GUIDELINE * 100) + #define IS_TOP_LEVEL_AWAIT(c) ( \ (c->c_flags->cf_flags & PyCF_ALLOW_TOP_LEVEL_AWAIT) \ && (c->u->u_ste->ste_type == ModuleBlock)) struct instr { - unsigned i_jabs : 1; - unsigned i_jrel : 1; unsigned char i_opcode; int i_oparg; struct basicblock_ *i_target; /* target block (if jump instruction) */ int i_lineno; }; +#define LOG_BITS_PER_INT 5 +#define MASK_LOW_LOG_BITS 31 + +static inline int +is_bit_set_in_table(uint32_t *table, int bitindex) { + /* Is the relevant bit set in the relevant word? */ + /* 256 bits fit into 8 32-bits words. + * Word is indexed by (bitindex>>ln(size of int in bits)). + * Bit within word is the low bits of bitindex. + */ + uint32_t word = table[bitindex >> LOG_BITS_PER_INT]; + return (word >> (bitindex & MASK_LOW_LOG_BITS)) & 1; +} + +static inline int +is_relative_jump(struct instr *i) +{ + return is_bit_set_in_table(_PyOpcode_RelativeJump, i->i_opcode); +} + +static inline int +is_jump(struct instr *i) +{ + return is_bit_set_in_table(_PyOpcode_Jump, i->i_opcode); +} + typedef struct basicblock_ { /* Each basicblock in a compilation unit is linked via b_list in the reverse order that the block are allocated. b_list points to the next @@ -67,10 +120,16 @@ typedef struct basicblock_ { /* If b_next is non-NULL, it is a pointer to the next block reached by normal control flow. */ struct basicblock_ *b_next; - /* b_seen is used to perform a DFS of basicblocks. */ - unsigned b_seen : 1; /* b_return is true if a RETURN_VALUE opcode is inserted. */ unsigned b_return : 1; + /* Number of predecssors that a block has. */ + int b_predecessors; + /* Basic block has no fall through (it ends with a return, raise or jump) */ + unsigned b_nofallthrough : 1; + /* Basic block exits scope (it ends with a return or raise) */ + unsigned b_exit : 1; + /* Used by compiler passes to mark whether they have visited a basic block. */ + unsigned b_visited : 1; /* depth of stack upon entry of block, computed by stackdepth() */ int b_startdepth; /* instruction offset for block, computed by assemble_jump_offsets() */ @@ -85,7 +144,8 @@ compiler IR. */ enum fblocktype { WHILE_LOOP, FOR_LOOP, TRY_EXCEPT, FINALLY_TRY, FINALLY_END, - WITH, ASYNC_WITH, HANDLER_CLEANUP, POP_VALUE, EXCEPTION_HANDLER }; + WITH, ASYNC_WITH, HANDLER_CLEANUP, POP_VALUE, EXCEPTION_HANDLER, + ASYNC_COMPREHENSION_GENERATOR }; struct fblockinfo { enum fblocktype fb_type; @@ -141,6 +201,8 @@ struct compiler_unit { int u_firstlineno; /* the first lineno of the block */ int u_lineno; /* the lineno for the current stmt */ int u_col_offset; /* the offset of the current stmt */ + int u_end_lineno; /* the end line of the current stmt */ + int u_end_col_offset; /* the end offset of the current stmt */ }; /* This struct captures the global state of a compilation. @@ -164,12 +226,6 @@ struct compiler { int c_optimize; /* optimization level */ int c_interactive; /* true if in interactive mode */ int c_nestlevel; - int c_do_not_emit_bytecode; /* The compiler won't emit any bytecode - if this value is different from zero. - This can be used to temporarily visit - nodes without emitting bytecode to - check only errors. */ - PyObject *c_const_cache; /* Python dict holding all constants, including names tuple */ struct compiler_unit *u; /* compiler state for current block */ @@ -177,14 +233,40 @@ struct compiler { PyArena *c_arena; /* pointer to memory allocation arena */ }; +typedef struct { + // A list of strings corresponding to name captures. It is used to track: + // - Repeated name assignments in the same pattern. + // - Different name assignments in alternatives. + // - The order of name assignments in alternatives. + PyObject *stores; + // If 0, any name captures against our subject will raise. + int allow_irrefutable; + // An array of blocks to jump to on failure. Jumping to fail_pop[i] will pop + // i items off of the stack. The end result looks like this (with each block + // falling through to the next): + // fail_pop[4]: POP_TOP + // fail_pop[3]: POP_TOP + // fail_pop[2]: POP_TOP + // fail_pop[1]: POP_TOP + // fail_pop[0]: NOP + basicblock **fail_pop; + // The current length of fail_pop. + Py_ssize_t fail_pop_size; + // The number of items on top of the stack that need to *stay* on top of the + // stack. Variable captures go beneath these. All of them will be popped on + // failure. + Py_ssize_t on_top; +} pattern_context; + static int compiler_enter_scope(struct compiler *, identifier, int, void *, int); static void compiler_free(struct compiler *); static basicblock *compiler_new_block(struct compiler *); static int compiler_next_instr(basicblock *); static int compiler_addop(struct compiler *, int); static int compiler_addop_i(struct compiler *, int, Py_ssize_t); -static int compiler_addop_j(struct compiler *, int, basicblock *, int); -static int compiler_error(struct compiler *, const char *); +static int compiler_addop_j(struct compiler *, int, basicblock *); +static int compiler_addop_j_noline(struct compiler *, int, basicblock *); +static int compiler_error(struct compiler *, const char *, ...); static int compiler_warn(struct compiler *, const char *, ...); static int compiler_nameop(struct compiler *, identifier, expr_context_ty); @@ -198,30 +280,35 @@ static int compiler_subscript(struct compiler *, expr_ty); static int compiler_slice(struct compiler *, expr_ty); static int inplace_binop(operator_ty); -static int are_all_items_const(asdl_seq *, Py_ssize_t, Py_ssize_t); -static int expr_constant(expr_ty); +static int are_all_items_const(asdl_expr_seq *, Py_ssize_t, Py_ssize_t); + static int compiler_with(struct compiler *, stmt_ty, int); static int compiler_async_with(struct compiler *, stmt_ty, int); static int compiler_async_for(struct compiler *, stmt_ty); static int compiler_call_helper(struct compiler *c, int n, - asdl_seq *args, - asdl_seq *keywords); + asdl_expr_seq *args, + asdl_keyword_seq *keywords); static int compiler_try_except(struct compiler *, stmt_ty); static int compiler_set_qualname(struct compiler *); static int compiler_sync_comprehension_generator( struct compiler *c, - asdl_seq *generators, int gen_index, + asdl_comprehension_seq *generators, int gen_index, int depth, expr_ty elt, expr_ty val, int type); static int compiler_async_comprehension_generator( struct compiler *c, - asdl_seq *generators, int gen_index, + asdl_comprehension_seq *generators, int gen_index, int depth, expr_ty elt, expr_ty val, int type); +static int compiler_pattern(struct compiler *, pattern_ty, pattern_context *); +static int compiler_match(struct compiler *, stmt_ty); +static int compiler_pattern_subpattern(struct compiler *, pattern_ty, + pattern_context *); + static PyCodeObject *assemble(struct compiler *, int addNone); static PyObject *__doc__, *__annotations__; @@ -315,8 +402,8 @@ compiler_init(struct compiler *c) } PyCodeObject * -PyAST_CompileObject(mod_ty mod, PyObject *filename, PyCompilerFlags *flags, - int optimize, PyArena *arena) +_PyAST_Compile(mod_ty mod, PyObject *filename, PyCompilerFlags *flags, + int optimize, PyArena *arena) { struct compiler c; PyCodeObject *co = NULL; @@ -338,7 +425,7 @@ PyAST_CompileObject(mod_ty mod, PyObject *filename, PyCompilerFlags *flags, Py_INCREF(filename); c.c_filename = filename; c.c_arena = arena; - c.c_future = PyFuture_FromASTObject(mod, filename); + c.c_future = _PyFuture_FromAST(mod, filename); if (c.c_future == NULL) goto finally; if (!flags) { @@ -350,7 +437,6 @@ PyAST_CompileObject(mod_ty mod, PyObject *filename, PyCompilerFlags *flags, c.c_flags = flags; c.c_optimize = (optimize == -1) ? _Py_GetConfig()->optimization_level : optimize; c.c_nestlevel = 0; - c.c_do_not_emit_bytecode = 0; _PyASTOptimizeState state; state.optimize = c.c_optimize; @@ -360,7 +446,7 @@ PyAST_CompileObject(mod_ty mod, PyObject *filename, PyCompilerFlags *flags, goto finally; } - c.c_st = PySymtable_BuildObject(mod, filename, c.c_future); + c.c_st = _PySymtable_Build(mod, filename, c.c_future); if (c.c_st == NULL) { if (!PyErr_Occurred()) PyErr_SetString(PyExc_SystemError, "no symtable"); @@ -375,41 +461,11 @@ PyAST_CompileObject(mod_ty mod, PyObject *filename, PyCompilerFlags *flags, return co; } -PyCodeObject * -PyAST_CompileEx(mod_ty mod, const char *filename_str, PyCompilerFlags *flags, - int optimize, PyArena *arena) -{ - PyObject *filename; - PyCodeObject *co; - filename = PyUnicode_DecodeFSDefault(filename_str); - if (filename == NULL) - return NULL; - co = PyAST_CompileObject(mod, filename, flags, optimize, arena); - Py_DECREF(filename); - return co; - -} - -PyCodeObject * -PyNode_Compile(struct _node *n, const char *filename) -{ - PyCodeObject *co = NULL; - mod_ty mod; - PyArena *arena = PyArena_New(); - if (!arena) - return NULL; - mod = PyAST_FromNode(n, NULL, filename, arena); - if (mod) - co = PyAST_Compile(mod, filename, NULL, arena); - PyArena_Free(arena); - return co; -} - static void compiler_free(struct compiler *c) { if (c->c_st) - PySymtable_Free(c->c_st); + _PySymtable_Free(c->c_st); if (c->c_future) PyObject_Free(c->c_future); Py_XDECREF(c->c_filename); @@ -480,8 +536,8 @@ dictbytype(PyObject *src, int scope_type, int flag, Py_ssize_t offset) /* XXX this should probably be a macro in symtable.h */ long vi; k = PyList_GET_ITEM(sorted_keys, key_i); - v = PyDict_GetItem(src, k); - assert(PyLong_Check(v)); + v = PyDict_GetItemWithError(src, k); + assert(v && PyLong_Check(v)); vi = PyLong_AS_LONG(v); scope = (vi >> SCOPE_OFFSET) & SCOPE_MASK; @@ -511,12 +567,10 @@ compiler_unit_check(struct compiler_unit *u) { basicblock *block; for (block = u->u_blocks; block != NULL; block = block->b_list) { - assert((uintptr_t)block != 0xcbcbcbcbU); - assert((uintptr_t)block != 0xfbfbfbfbU); - assert((uintptr_t)block != 0xdbdbdbdbU); + assert(!_PyMem_IsPtrFreed(block)); if (block->b_instr != NULL) { assert(block->b_ialloc > 0); - assert(block->b_iused > 0); + assert(block->b_iused >= 0); assert(block->b_ialloc >= block->b_iused); } else { @@ -594,7 +648,7 @@ compiler_enter_scope(struct compiler *c, identifier name, compiler_unit_free(u); return 0; } - res = PyDict_SetItem(u->u_cellvars, name, _PyLong_Zero); + res = PyDict_SetItem(u->u_cellvars, name, _PyLong_GetZero()); if (res < 0) { compiler_unit_free(u); return 0; @@ -613,6 +667,8 @@ compiler_enter_scope(struct compiler *c, identifier name, u->u_firstlineno = lineno; u->u_lineno = 0; u->u_col_offset = 0; + u->u_end_lineno = 0; + u->u_end_col_offset = 0; u->u_consts = PyDict_New(); if (!u->u_consts) { compiler_unit_free(u); @@ -658,25 +714,30 @@ compiler_enter_scope(struct compiler *c, identifier name, static void compiler_exit_scope(struct compiler *c) { - Py_ssize_t n; - PyObject *capsule; + // Don't call PySequence_DelItem() with an exception raised + PyObject *exc_type, *exc_val, *exc_tb; + PyErr_Fetch(&exc_type, &exc_val, &exc_tb); c->c_nestlevel--; compiler_unit_free(c->u); /* Restore c->u to the parent unit. */ - n = PyList_GET_SIZE(c->c_stack) - 1; + Py_ssize_t n = PyList_GET_SIZE(c->c_stack) - 1; if (n >= 0) { - capsule = PyList_GET_ITEM(c->c_stack, n); + PyObject *capsule = PyList_GET_ITEM(c->c_stack, n); c->u = (struct compiler_unit *)PyCapsule_GetPointer(capsule, CAPSULE_NAME); assert(c->u); /* we are deleting from a list so this really shouldn't fail */ - if (PySequence_DelItem(c->c_stack, n) < 0) - Py_FatalError("compiler_exit_scope()"); + if (PySequence_DelItem(c->c_stack, n) < 0) { + _PyErr_WriteUnraisableMsg("on removing the last compiler " + "stack item", NULL); + } compiler_unit_check(c->u); } - else + else { c->u = NULL; + } + PyErr_Restore(exc_type, exc_val, exc_tb); } static int @@ -707,7 +768,7 @@ compiler_set_qualname(struct compiler *c) mangled = _Py_Mangle(parent->u_private, u->u_name); if (!mangled) return 0; - scope = PyST_GetScope(parent->u_ste, mangled); + scope = _PyST_GetScope(parent->u_ste, mangled); Py_DECREF(mangled); assert(scope != GLOBAL_IMPLICIT); if (scope == GLOBAL_EXPLICIT) @@ -798,6 +859,29 @@ compiler_use_next_block(struct compiler *c, basicblock *block) return block; } +static basicblock * +compiler_copy_block(struct compiler *c, basicblock *block) +{ + /* Cannot copy a block if it has a fallthrough, since + * a block can only have one fallthrough predecessor. + */ + assert(block->b_nofallthrough); + basicblock *result = compiler_new_block(c); + if (result == NULL) { + return NULL; + } + for (int i = 0; i < block->b_iused; i++) { + int n = compiler_next_instr(result); + if (n < 0) { + return NULL; + } + result->b_instr[n] = block->b_instr[i]; + } + result->b_exit = block->b_exit; + result->b_nofallthrough = 1; + return result; +} + /* Returns the offset of the next instruction in the current block's b_instr array. Resizes the b_instr as necessary. Returns -1 on failure. @@ -855,7 +939,9 @@ compiler_next_instr(basicblock *b) #define SET_LOC(c, x) \ (c)->u->u_lineno = (x)->lineno; \ - (c)->u->u_col_offset = (x)->col_offset; + (c)->u->u_col_offset = (x)->col_offset; \ + (c)->u->u_end_lineno = (x)->end_lineno; \ + (c)->u->u_end_col_offset = (x)->end_col_offset; /* Return the stack effect of opcode with argument oparg. @@ -866,8 +952,6 @@ compiler_next_instr(basicblock *b) * 1 -- when jump * -1 -- maximal */ -/* XXX Make the stack effect of WITH_CLEANUP_START and - WITH_CLEANUP_FINISH deterministic. */ static int stack_effect(int opcode, int oparg, int jump) { @@ -1111,11 +1195,25 @@ stack_effect(int opcode, int oparg, int jump) return 1; case LIST_TO_TUPLE: return 0; + case GEN_START: + return -1; case LIST_EXTEND: case SET_UPDATE: case DICT_MERGE: case DICT_UPDATE: return -1; + case COPY_DICT_WITHOUT_KEYS: + return 0; + case MATCH_CLASS: + return -1; + case GET_LEN: + case MATCH_MAPPING: + case MATCH_SEQUENCE: + return 1; + case MATCH_KEYS: + return 2; + case ROT_N: + return 0; default: return PY_INVALID_STACK_EFFECT; } @@ -1139,15 +1237,12 @@ PyCompile_OpcodeStackEffect(int opcode, int oparg) */ static int -compiler_addop(struct compiler *c, int opcode) +compiler_addop_line(struct compiler *c, int opcode, int line) { basicblock *b; struct instr *i; int off; assert(!HAS_ARG(opcode)); - if (c->c_do_not_emit_bytecode) { - return 1; - } off = compiler_next_instr(c->u->u_curblock); if (off < 0) return 0; @@ -1157,10 +1252,23 @@ compiler_addop(struct compiler *c, int opcode) i->i_oparg = 0; if (opcode == RETURN_VALUE) b->b_return = 1; - i->i_lineno = c->u->u_lineno; + i->i_lineno = line; return 1; } +static int +compiler_addop(struct compiler *c, int opcode) +{ + return compiler_addop_line(c, opcode, c->u->u_lineno); +} + +static int +compiler_addop_noline(struct compiler *c, int opcode) +{ + return compiler_addop_line(c, opcode, -1); +} + + static Py_ssize_t compiler_add_o(PyObject *dict, PyObject *o) { @@ -1301,10 +1409,6 @@ merge_consts_recursive(struct compiler *c, PyObject *o) static Py_ssize_t compiler_add_const(struct compiler *c, PyObject *o) { - if (c->c_do_not_emit_bytecode) { - return 0; - } - PyObject *key = merge_consts_recursive(c, o); if (key == NULL) { return -1; @@ -1318,10 +1422,6 @@ compiler_add_const(struct compiler *c, PyObject *o) static int compiler_addop_load_const(struct compiler *c, PyObject *o) { - if (c->c_do_not_emit_bytecode) { - return 1; - } - Py_ssize_t arg = compiler_add_const(c, o); if (arg < 0) return 0; @@ -1332,10 +1432,6 @@ static int compiler_addop_o(struct compiler *c, int opcode, PyObject *dict, PyObject *o) { - if (c->c_do_not_emit_bytecode) { - return 1; - } - Py_ssize_t arg = compiler_add_o(dict, o); if (arg < 0) return 0; @@ -1348,10 +1444,6 @@ compiler_addop_name(struct compiler *c, int opcode, PyObject *dict, { Py_ssize_t arg; - if (c->c_do_not_emit_bytecode) { - return 1; - } - PyObject *mangled = _Py_Mangle(c->u->u_private, o); if (!mangled) return 0; @@ -1367,15 +1459,11 @@ compiler_addop_name(struct compiler *c, int opcode, PyObject *dict, */ static int -compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg) +compiler_addop_i_line(struct compiler *c, int opcode, Py_ssize_t oparg, int lineno) { struct instr *i; int off; - if (c->c_do_not_emit_bytecode) { - return 1; - } - /* oparg value is unsigned, but a signed C int is usually used to store it in the C code (like Python/ceval.c). @@ -1392,36 +1480,51 @@ compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg) i = &c->u->u_curblock->b_instr[off]; i->i_opcode = opcode; i->i_oparg = Py_SAFE_DOWNCAST(oparg, Py_ssize_t, int); - i->i_lineno = c->u->u_lineno; + i->i_lineno = lineno; return 1; } static int -compiler_addop_j(struct compiler *c, int opcode, basicblock *b, int absolute) +compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg) { - struct instr *i; - int off; + return compiler_addop_i_line(c, opcode, oparg, c->u->u_lineno); +} - if (c->c_do_not_emit_bytecode) { - return 1; - } +static int +compiler_addop_i_noline(struct compiler *c, int opcode, Py_ssize_t oparg) +{ + return compiler_addop_i_line(c, opcode, oparg, -1); +} +static int add_jump_to_block(basicblock *b, int opcode, int lineno, basicblock *target) +{ assert(HAS_ARG(opcode)); assert(b != NULL); - off = compiler_next_instr(c->u->u_curblock); - if (off < 0) + assert(target != NULL); + + int off = compiler_next_instr(b); + struct instr *i = &b->b_instr[off]; + if (off < 0) { return 0; - i = &c->u->u_curblock->b_instr[off]; + } i->i_opcode = opcode; - i->i_target = b; - if (absolute) - i->i_jabs = 1; - else - i->i_jrel = 1; - i->i_lineno = c->u->u_lineno; + i->i_target = target; + i->i_lineno = lineno; return 1; } +static int +compiler_addop_j(struct compiler *c, int opcode, basicblock *b) +{ + return add_jump_to_block(c->u->u_curblock, opcode, c->u->u_lineno, b); +} + +static int +compiler_addop_j_noline(struct compiler *c, int opcode, basicblock *b) +{ + return add_jump_to_block(c->u->u_curblock, opcode, -1, b); +} + /* NEXT_BLOCK() creates an implicit jump from the current block to the new block. @@ -1438,6 +1541,11 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *b, int absolute) return 0; \ } +#define ADDOP_NOLINE(C, OP) { \ + if (!compiler_addop_noline((C), (OP))) \ + return 0; \ +} + #define ADDOP_IN_SCOPE(C, OP) { \ if (!compiler_addop((C), (OP))) { \ compiler_exit_scope(c); \ @@ -1464,12 +1572,14 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *b, int absolute) } #define ADDOP_O(C, OP, O, TYPE) { \ + assert((OP) != LOAD_CONST); /* use ADDOP_LOAD_CONST */ \ if (!compiler_addop_o((C), (OP), (C)->u->u_ ## TYPE, (O))) \ return 0; \ } /* Same as ADDOP_O, but steals a reference. */ #define ADDOP_N(C, OP, O, TYPE) { \ + assert((OP) != LOAD_CONST); /* use ADDOP_LOAD_CONST_NEW */ \ if (!compiler_addop_o((C), (OP), (C)->u->u_ ## TYPE, (O))) { \ Py_DECREF((O)); \ return 0; \ @@ -1487,16 +1597,23 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *b, int absolute) return 0; \ } -#define ADDOP_JABS(C, OP, O) { \ - if (!compiler_addop_j((C), (OP), (O), 1)) \ +#define ADDOP_I_NOLINE(C, OP, O) { \ + if (!compiler_addop_i_noline((C), (OP), (O))) \ return 0; \ } -#define ADDOP_JREL(C, OP, O) { \ - if (!compiler_addop_j((C), (OP), (O), 0)) \ +#define ADDOP_JUMP(C, OP, O) { \ + if (!compiler_addop_j((C), (OP), (O))) \ return 0; \ } +/* Add a jump with no line number. + * Used for artificial jumps that have no corresponding + * token in the source code. */ +#define ADDOP_JUMP_NOLINE(C, OP, O) { \ + if (!compiler_addop_j_noline((C), (OP), (O))) \ + return 0; \ +} #define ADDOP_COMPARE(C, CMP) { \ if (!compiler_addcompare((C), (cmpop_ty)(CMP))) \ @@ -1526,7 +1643,7 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *b, int absolute) #define VISIT_SEQ(C, TYPE, SEQ) { \ int _i; \ - asdl_seq *seq = (SEQ); /* avoid variable capture */ \ + asdl_ ## TYPE ## _seq *seq = (SEQ); /* avoid variable capture */ \ for (_i = 0; _i < asdl_seq_LEN(seq); _i++) { \ TYPE ## _ty elt = (TYPE ## _ty)asdl_seq_GET(seq, _i); \ if (!compiler_visit_ ## TYPE((C), elt)) \ @@ -1536,7 +1653,7 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *b, int absolute) #define VISIT_SEQ_IN_SCOPE(C, TYPE, SEQ) { \ int _i; \ - asdl_seq *seq = (SEQ); /* avoid variable capture */ \ + asdl_ ## TYPE ## _seq *seq = (SEQ); /* avoid variable capture */ \ for (_i = 0; _i < asdl_seq_LEN(seq); _i++) { \ TYPE ## _ty elt = (TYPE ## _ty)asdl_seq_GET(seq, _i); \ if (!compiler_visit_ ## TYPE((C), elt)) { \ @@ -1546,21 +1663,15 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *b, int absolute) } \ } -/* These macros allows to check only for errors and not emmit bytecode - * while visiting nodes. -*/ - -#define BEGIN_DO_NOT_EMIT_BYTECODE { \ - c->c_do_not_emit_bytecode++; - -#define END_DO_NOT_EMIT_BYTECODE \ - c->c_do_not_emit_bytecode--; \ -} +#define RETURN_IF_FALSE(X) \ + if (!(X)) { \ + return 0; \ + } /* Search if variable annotations are present statically in a block. */ static int -find_ann(asdl_seq *stmts) +find_ann(asdl_stmt_seq *stmts) { int i, j, res = 0; stmt_ty st; @@ -1646,7 +1757,7 @@ compiler_pop_fblock(struct compiler *c, enum fblocktype t, basicblock *b) static int compiler_call_exit_with_nones(struct compiler *c) { - ADDOP_O(c, LOAD_CONST, Py_None, consts); + ADDOP_LOAD_CONST(c, Py_None); ADDOP(c, DUP_TOP); ADDOP(c, DUP_TOP); ADDOP_I(c, CALL_FUNCTION, 3); @@ -1665,6 +1776,7 @@ compiler_unwind_fblock(struct compiler *c, struct fblockinfo *info, switch (info->fb_type) { case WHILE_LOOP: case EXCEPTION_HANDLER: + case ASYNC_COMPREHENSION_GENERATOR: return 1; case FOR_LOOP: @@ -1680,19 +1792,22 @@ compiler_unwind_fblock(struct compiler *c, struct fblockinfo *info, return 1; case FINALLY_TRY: + /* This POP_BLOCK gets the line number of the unwinding statement */ ADDOP(c, POP_BLOCK); if (preserve_tos) { if (!compiler_push_fblock(c, POP_VALUE, NULL, NULL, NULL)) { return 0; } } - /* Emit the finally block, restoring the line number when done */ - int saved_lineno = c->u->u_lineno; + /* Emit the finally block */ VISIT_SEQ(c, stmt, info->fb_datum); - c->u->u_lineno = saved_lineno; if (preserve_tos) { compiler_pop_fblock(c, POP_VALUE, NULL); } + /* The finally block should appear to execute after the + * statement causing the unwinding, so make the unwinding + * instruction artificial */ + c->u->u_lineno = -1; return 1; case FINALLY_END: @@ -1710,6 +1825,7 @@ compiler_unwind_fblock(struct compiler *c, struct fblockinfo *info, case WITH: case ASYNC_WITH: + SET_LOC(c, (stmt_ty)info->fb_datum); ADDOP(c, POP_BLOCK); if (preserve_tos) { ADDOP(c, ROT_TWO); @@ -1723,6 +1839,10 @@ compiler_unwind_fblock(struct compiler *c, struct fblockinfo *info, ADDOP(c, YIELD_FROM); } ADDOP(c, POP_TOP); + /* The exit block should appear to execute after the + * statement causing the unwinding, so make the unwinding + * instruction artificial */ + c->u->u_lineno = -1; return 1; case HANDLER_CLEANUP: @@ -1778,7 +1898,7 @@ compiler_unwind_fblock_stack(struct compiler *c, int preserve_tos, struct fblock and for annotations. */ static int -compiler_body(struct compiler *c, asdl_seq *stmts) +compiler_body(struct compiler *c, asdl_stmt_seq *stmts) { int i = 0; stmt_ty st; @@ -1827,7 +1947,7 @@ compiler_mod(struct compiler *c, mod_ty mod) return NULL; } /* Use 0 for firstlineno initially, will fixup in assemble(). */ - if (!compiler_enter_scope(c, module, COMPILER_SCOPE_MODULE, mod, 0)) + if (!compiler_enter_scope(c, module, COMPILER_SCOPE_MODULE, mod, 1)) return NULL; switch (mod->kind) { case Module_kind: @@ -1841,8 +1961,7 @@ compiler_mod(struct compiler *c, mod_ty mod) ADDOP(c, SETUP_ANNOTATIONS); } c->c_interactive = 1; - VISIT_SEQ_IN_SCOPE(c, stmt, - mod->v.Interactive.body); + VISIT_SEQ_IN_SCOPE(c, stmt, mod->v.Interactive.body); break; case Expression_kind: VISIT_IN_SCOPE(c, expr, mod->v.Expression.body); @@ -1871,19 +1990,17 @@ get_ref_type(struct compiler *c, PyObject *name) if (c->u->u_scope_type == COMPILER_SCOPE_CLASS && _PyUnicode_EqualToASCIIString(name, "__class__")) return CELL; - scope = PyST_GetScope(c->u->u_ste, name); + scope = _PyST_GetScope(c->u->u_ste, name); if (scope == 0) { - _Py_FatalErrorFormat(__func__, - "unknown scope for %.100s in %.100s(%s)\n" - "symbols: %s\nlocals: %s\nglobals: %s", - PyUnicode_AsUTF8(name), - PyUnicode_AsUTF8(c->u->u_name), - PyUnicode_AsUTF8(PyObject_Repr(c->u->u_ste->ste_id)), - PyUnicode_AsUTF8(PyObject_Repr(c->u->u_ste->ste_symbols)), - PyUnicode_AsUTF8(PyObject_Repr(c->u->u_varnames)), - PyUnicode_AsUTF8(PyObject_Repr(c->u->u_names))); + PyErr_Format(PyExc_SystemError, + "_PyST_GetScope(name=%R) failed: " + "unknown scope in unit %S (%R); " + "symbols: %R; locals: %R; globals: %R", + name, + c->u->u_name, c->u->u_ste->ste_id, + c->u->u_ste->ste_symbols, c->u->u_varnames, c->u->u_names); + return -1; } - return scope; } @@ -1891,14 +2008,15 @@ static int compiler_lookup_arg(PyObject *dict, PyObject *name) { PyObject *v; - v = PyDict_GetItem(dict, name); + v = PyDict_GetItemWithError(dict, name); if (v == NULL) return -1; return PyLong_AS_LONG(v); } static int -compiler_make_closure(struct compiler *c, PyCodeObject *co, Py_ssize_t flags, PyObject *qualname) +compiler_make_closure(struct compiler *c, PyCodeObject *co, Py_ssize_t flags, + PyObject *qualname) { Py_ssize_t i, free = PyCode_GetNumFree(co); if (qualname == NULL) @@ -1910,7 +2028,6 @@ compiler_make_closure(struct compiler *c, PyCodeObject *co, Py_ssize_t flags, Py LOAD_DEREF but LOAD_CLOSURE is needed. */ PyObject *name = PyTuple_GET_ITEM(co->co_freevars, i); - int arg, reftype; /* Special case: If a class contains a method with a free variable that has the same name as a method, @@ -1918,20 +2035,27 @@ compiler_make_closure(struct compiler *c, PyCodeObject *co, Py_ssize_t flags, Py class. It should be handled by the closure, as well as by the normal name lookup logic. */ - reftype = get_ref_type(c, name); - if (reftype == CELL) + int reftype = get_ref_type(c, name); + if (reftype == -1) { + return 0; + } + int arg; + if (reftype == CELL) { arg = compiler_lookup_arg(c->u->u_cellvars, name); - else /* (reftype == FREE) */ + } + else { arg = compiler_lookup_arg(c->u->u_freevars, name); + } if (arg == -1) { - _Py_FatalErrorFormat(__func__, - "lookup %s in %s %d %d\n" - "freevars of %s: %s\n", - PyUnicode_AsUTF8(PyObject_Repr(name)), - PyUnicode_AsUTF8(c->u->u_name), - reftype, arg, - PyUnicode_AsUTF8(co->co_name), - PyUnicode_AsUTF8(PyObject_Repr(co->co_freevars))); + PyErr_Format(PyExc_SystemError, + "compiler_lookup_arg(name=%R) with reftype=%d failed in %S; " + "freevars of code %S: %R", + name, + reftype, + c->u->u_name, + co->co_name, + co->co_freevars); + return 0; } ADDOP_I(c, LOAD_CLOSURE, arg); } @@ -1945,7 +2069,7 @@ compiler_make_closure(struct compiler *c, PyCodeObject *co, Py_ssize_t flags, Py } static int -compiler_decorators(struct compiler *c, asdl_seq* decos) +compiler_decorators(struct compiler *c, asdl_expr_seq* decos) { int i; @@ -1959,8 +2083,8 @@ compiler_decorators(struct compiler *c, asdl_seq* decos) } static int -compiler_visit_kwonlydefaults(struct compiler *c, asdl_seq *kwonlyargs, - asdl_seq *kw_defaults) +compiler_visit_kwonlydefaults(struct compiler *c, asdl_arg_seq *kwonlyargs, + asdl_expr_seq *kw_defaults) { /* Push a dict of keyword-only default values. @@ -2024,31 +2148,32 @@ compiler_visit_annexpr(struct compiler *c, expr_ty annotation) static int compiler_visit_argannotation(struct compiler *c, identifier id, - expr_ty annotation, PyObject *names) + expr_ty annotation, Py_ssize_t *annotations_len) { - if (annotation) { - PyObject *mangled; - if (c->c_future->ff_features & CO_FUTURE_ANNOTATIONS) { - VISIT(c, annexpr, annotation) - } - else { - VISIT(c, expr, annotation); - } - mangled = _Py_Mangle(c->u->u_private, id); - if (!mangled) - return 0; - if (PyList_Append(names, mangled) < 0) { - Py_DECREF(mangled); - return 0; - } - Py_DECREF(mangled); + if (!annotation) { + return 1; } + + PyObject *mangled = _Py_Mangle(c->u->u_private, id); + if (!mangled) { + return 0; + } + ADDOP_LOAD_CONST(c, mangled); + Py_DECREF(mangled); + + if (c->c_future->ff_features & CO_FUTURE_ANNOTATIONS) { + VISIT(c, annexpr, annotation) + } + else { + VISIT(c, expr, annotation); + } + *annotations_len += 2; return 1; } static int -compiler_visit_argannotations(struct compiler *c, asdl_seq* args, - PyObject *names) +compiler_visit_argannotations(struct compiler *c, asdl_arg_seq* args, + Py_ssize_t *annotations_len) { int i; for (i = 0; i < asdl_seq_LEN(args); i++) { @@ -2057,7 +2182,7 @@ compiler_visit_argannotations(struct compiler *c, asdl_seq* args, c, arg->arg, arg->annotation, - names)) + annotations_len)) return 0; } return 1; @@ -2067,58 +2192,44 @@ static int compiler_visit_annotations(struct compiler *c, arguments_ty args, expr_ty returns) { - /* Push arg annotation dict. + /* Push arg annotation names and values. The expressions are evaluated out-of-order wrt the source code. - Return 0 on error, -1 if no dict pushed, 1 if a dict is pushed. + Return 0 on error, -1 if no annotations pushed, 1 if a annotations is pushed. */ static identifier return_str; - PyObject *names; - Py_ssize_t len; - names = PyList_New(0); - if (!names) - return 0; + Py_ssize_t annotations_len = 0; - if (!compiler_visit_argannotations(c, args->args, names)) - goto error; - if (!compiler_visit_argannotations(c, args->posonlyargs, names)) - goto error; + if (!compiler_visit_argannotations(c, args->args, &annotations_len)) + return 0; + if (!compiler_visit_argannotations(c, args->posonlyargs, &annotations_len)) + return 0; if (args->vararg && args->vararg->annotation && !compiler_visit_argannotation(c, args->vararg->arg, - args->vararg->annotation, names)) - goto error; - if (!compiler_visit_argannotations(c, args->kwonlyargs, names)) - goto error; + args->vararg->annotation, &annotations_len)) + return 0; + if (!compiler_visit_argannotations(c, args->kwonlyargs, &annotations_len)) + return 0; if (args->kwarg && args->kwarg->annotation && !compiler_visit_argannotation(c, args->kwarg->arg, - args->kwarg->annotation, names)) - goto error; + args->kwarg->annotation, &annotations_len)) + return 0; if (!return_str) { return_str = PyUnicode_InternFromString("return"); if (!return_str) - goto error; + return 0; } - if (!compiler_visit_argannotation(c, return_str, returns, names)) { - goto error; + if (!compiler_visit_argannotation(c, return_str, returns, &annotations_len)) { + return 0; } - len = PyList_GET_SIZE(names); - if (len) { - PyObject *keytuple = PyList_AsTuple(names); - Py_DECREF(names); - ADDOP_LOAD_CONST_NEW(c, keytuple); - ADDOP_I(c, BUILD_CONST_KEY_MAP, len); + if (annotations_len) { + ADDOP_I(c, BUILD_TUPLE, annotations_len); return 1; } - else { - Py_DECREF(names); - return -1; - } -error: - Py_DECREF(names); - return 0; + return -1; } static int @@ -2159,6 +2270,10 @@ forbidden_name(struct compiler *c, identifier name, expr_context_ty ctx) compiler_error(c, "cannot assign to __debug__"); return 1; } + if (ctx == Del && _PyUnicode_EqualToASCIIString(name, "__debug__")) { + compiler_error(c, "cannot delete __debug__"); + return 1; + } return 0; } @@ -2173,7 +2288,7 @@ compiler_check_debug_one_arg(struct compiler *c, arg_ty arg) } static int -compiler_check_debug_args_seq(struct compiler *c, asdl_seq *args) +compiler_check_debug_args_seq(struct compiler *c, asdl_arg_seq *args) { if (args != NULL) { for (Py_ssize_t i = 0, n = asdl_seq_LEN(args); i < n; i++) { @@ -2208,8 +2323,8 @@ compiler_function(struct compiler *c, stmt_ty s, int is_async) arguments_ty args; expr_ty returns; identifier name; - asdl_seq* decos; - asdl_seq *body; + asdl_expr_seq* decos; + asdl_stmt_seq *body; Py_ssize_t i, funcflags; int annotations; int scope_type; @@ -2277,7 +2392,9 @@ compiler_function(struct compiler *c, stmt_ty s, int is_async) c->u->u_argcount = asdl_seq_LEN(args->args); c->u->u_posonlyargcount = asdl_seq_LEN(args->posonlyargs); c->u->u_kwonlyargcount = asdl_seq_LEN(args->kwonlyargs); - VISIT_SEQ_IN_SCOPE(c, stmt, body); + for (i = docstring ? 1 : 0; i < asdl_seq_LEN(body); i++) { + VISIT_IN_SCOPE(c, stmt, (stmt_ty)asdl_seq_GET(body, i)); + } co = assemble(c, 1); qualname = c->u->u_qualname; Py_INCREF(qualname); @@ -2288,7 +2405,11 @@ compiler_function(struct compiler *c, stmt_ty s, int is_async) return 0; } - compiler_make_closure(c, co, funcflags, qualname); + if (!compiler_make_closure(c, co, funcflags, qualname)) { + Py_DECREF(qualname); + Py_DECREF(co); + return 0; + } Py_DECREF(qualname); Py_DECREF(co); @@ -2306,7 +2427,7 @@ compiler_class(struct compiler *c, stmt_ty s) PyCodeObject *co; PyObject *str; int i, firstlineno; - asdl_seq* decos = s->v.ClassDef.decorator_list; + asdl_expr_seq *decos = s->v.ClassDef.decorator_list; if (!compiler_decorators(c, decos)) return 0; @@ -2366,6 +2487,8 @@ compiler_class(struct compiler *c, stmt_ty s) compiler_exit_scope(c); return 0; } + /* The following code is artificial */ + c->u->u_lineno = -1; /* Return __classcell__ if it is referenced, otherwise return None */ if (c->u->u_ste->ste_needs_class_closure) { /* Store __classcell__ into class namespace & return it */ @@ -2410,16 +2533,17 @@ compiler_class(struct compiler *c, stmt_ty s) ADDOP(c, LOAD_BUILD_CLASS); /* 3. load a function (or closure) made from the code object */ - compiler_make_closure(c, co, 0, NULL); + if (!compiler_make_closure(c, co, 0, NULL)) { + Py_DECREF(co); + return 0; + } Py_DECREF(co); /* 4. load class name */ ADDOP_LOAD_CONST(c, s->v.ClassDef.name); /* 5. generate the rest of the code for the call */ - if (!compiler_call_helper(c, 2, - s->v.ClassDef.bases, - s->v.ClassDef.keywords)) + if (!compiler_call_helper(c, 2, s->v.ClassDef.bases, s->v.ClassDef.keywords)) return 0; /* 6. apply decorators */ @@ -2527,7 +2651,7 @@ compiler_jump_if(struct compiler *c, expr_ty e, basicblock *next, int cond) /* fallback to general implementation */ break; case BoolOp_kind: { - asdl_seq *s = e->v.BoolOp.values; + asdl_expr_seq *s = e->v.BoolOp.values; Py_ssize_t i, n = asdl_seq_LEN(s) - 1; assert(n >= 0); int cond2 = e->v.BoolOp.op == Or; @@ -2559,7 +2683,7 @@ compiler_jump_if(struct compiler *c, expr_ty e, basicblock *next, int cond) return 0; if (!compiler_jump_if(c, e->v.IfExp.body, next, cond)) return 0; - ADDOP_JREL(c, JUMP_FORWARD, end); + ADDOP_JUMP_NOLINE(c, JUMP_FORWARD, end); compiler_use_next_block(c, next2); if (!compiler_jump_if(c, e->v.IfExp.orelse, next, cond)) return 0; @@ -2582,20 +2706,21 @@ compiler_jump_if(struct compiler *c, expr_ty e, basicblock *next, int cond) ADDOP(c, DUP_TOP); ADDOP(c, ROT_THREE); ADDOP_COMPARE(c, asdl_seq_GET(e->v.Compare.ops, i)); - ADDOP_JABS(c, POP_JUMP_IF_FALSE, cleanup); + ADDOP_JUMP(c, POP_JUMP_IF_FALSE, cleanup); NEXT_BLOCK(c); } VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Compare.comparators, n)); ADDOP_COMPARE(c, asdl_seq_GET(e->v.Compare.ops, n)); - ADDOP_JABS(c, cond ? POP_JUMP_IF_TRUE : POP_JUMP_IF_FALSE, next); + ADDOP_JUMP(c, cond ? POP_JUMP_IF_TRUE : POP_JUMP_IF_FALSE, next); + NEXT_BLOCK(c); basicblock *end = compiler_new_block(c); if (end == NULL) return 0; - ADDOP_JREL(c, JUMP_FORWARD, end); + ADDOP_JUMP_NOLINE(c, JUMP_FORWARD, end); compiler_use_next_block(c, cleanup); ADDOP(c, POP_TOP); if (!cond) { - ADDOP_JREL(c, JUMP_FORWARD, next); + ADDOP_JUMP_NOLINE(c, JUMP_FORWARD, next); } compiler_use_next_block(c, end); return 1; @@ -2610,7 +2735,8 @@ compiler_jump_if(struct compiler *c, expr_ty e, basicblock *next, int cond) /* general implementation */ VISIT(c, expr, e); - ADDOP_JABS(c, cond ? POP_JUMP_IF_TRUE : POP_JUMP_IF_FALSE, next); + ADDOP_JUMP(c, cond ? POP_JUMP_IF_TRUE : POP_JUMP_IF_FALSE, next); + NEXT_BLOCK(c); return 1; } @@ -2629,7 +2755,7 @@ compiler_ifexp(struct compiler *c, expr_ty e) if (!compiler_jump_if(c, e->v.IfExp.test, next, 0)) return 0; VISIT(c, expr, e->v.IfExp.body); - ADDOP_JREL(c, JUMP_FORWARD, end); + ADDOP_JUMP_NOLINE(c, JUMP_FORWARD, end); compiler_use_next_block(c, next); VISIT(c, expr, e->v.IfExp.orelse); compiler_use_next_block(c, end); @@ -2683,10 +2809,16 @@ compiler_lambda(struct compiler *c, expr_ty e) qualname = c->u->u_qualname; Py_INCREF(qualname); compiler_exit_scope(c); - if (co == NULL) + if (co == NULL) { + Py_DECREF(qualname); return 0; + } - compiler_make_closure(c, co, funcflags, qualname); + if (!compiler_make_closure(c, co, funcflags, qualname)) { + Py_DECREF(qualname); + Py_DECREF(co); + return 0; + } Py_DECREF(qualname); Py_DECREF(co); @@ -2697,48 +2829,28 @@ static int compiler_if(struct compiler *c, stmt_ty s) { basicblock *end, *next; - int constant; assert(s->kind == If_kind); end = compiler_new_block(c); - if (end == NULL) + if (end == NULL) { return 0; - - constant = expr_constant(s->v.If.test); - /* constant = 0: "if 0" - * constant = 1: "if 1", "if 2", ... - * constant = -1: rest */ - if (constant == 0) { - BEGIN_DO_NOT_EMIT_BYTECODE - VISIT_SEQ(c, stmt, s->v.If.body); - END_DO_NOT_EMIT_BYTECODE - if (s->v.If.orelse) { - VISIT_SEQ(c, stmt, s->v.If.orelse); - } - } else if (constant == 1) { - VISIT_SEQ(c, stmt, s->v.If.body); - if (s->v.If.orelse) { - BEGIN_DO_NOT_EMIT_BYTECODE - VISIT_SEQ(c, stmt, s->v.If.orelse); - END_DO_NOT_EMIT_BYTECODE - } - } else { - if (asdl_seq_LEN(s->v.If.orelse)) { - next = compiler_new_block(c); - if (next == NULL) - return 0; - } - else { - next = end; - } - if (!compiler_jump_if(c, s->v.If.test, next, 0)) { + } + if (asdl_seq_LEN(s->v.If.orelse)) { + next = compiler_new_block(c); + if (next == NULL) { return 0; } - VISIT_SEQ(c, stmt, s->v.If.body); - if (asdl_seq_LEN(s->v.If.orelse)) { - ADDOP_JREL(c, JUMP_FORWARD, end); - compiler_use_next_block(c, next); - VISIT_SEQ(c, stmt, s->v.If.orelse); - } + } + else { + next = end; + } + if (!compiler_jump_if(c, s->v.If.test, next, 0)) { + return 0; + } + VISIT_SEQ(c, stmt, s->v.If.body); + if (asdl_seq_LEN(s->v.If.orelse)) { + ADDOP_JUMP_NOLINE(c, JUMP_FORWARD, end); + compiler_use_next_block(c, next); + VISIT_SEQ(c, stmt, s->v.If.orelse); } compiler_use_next_block(c, end); return 1; @@ -2747,12 +2859,13 @@ compiler_if(struct compiler *c, stmt_ty s) static int compiler_for(struct compiler *c, stmt_ty s) { - basicblock *start, *cleanup, *end; + basicblock *start, *body, *cleanup, *end; start = compiler_new_block(c); + body = compiler_new_block(c); cleanup = compiler_new_block(c); end = compiler_new_block(c); - if (start == NULL || end == NULL || cleanup == NULL) { + if (start == NULL || body == NULL || end == NULL || cleanup == NULL) { return 0; } if (!compiler_push_fblock(c, FOR_LOOP, start, end, NULL)) { @@ -2761,10 +2874,13 @@ compiler_for(struct compiler *c, stmt_ty s) VISIT(c, expr, s->v.For.iter); ADDOP(c, GET_ITER); compiler_use_next_block(c, start); - ADDOP_JREL(c, FOR_ITER, cleanup); + ADDOP_JUMP(c, FOR_ITER, cleanup); + compiler_use_next_block(c, body); VISIT(c, expr, s->v.For.target); VISIT_SEQ(c, stmt, s->v.For.body); - ADDOP_JABS(c, JUMP_ABSOLUTE, start); + /* Mark jump as artificial */ + c->u->u_lineno = -1; + ADDOP_JUMP(c, JUMP_ABSOLUTE, start); compiler_use_next_block(c, cleanup); compiler_pop_fblock(c, FOR_LOOP, start); @@ -2800,7 +2916,7 @@ compiler_async_for(struct compiler *c, stmt_ty s) return 0; } /* SETUP_FINALLY to guard the __anext__ call */ - ADDOP_JREL(c, SETUP_FINALLY, except); + ADDOP_JUMP(c, SETUP_FINALLY, except); ADDOP(c, GET_ANEXT); ADDOP_LOAD_CONST(c, Py_None); ADDOP(c, YIELD_FROM); @@ -2809,18 +2925,16 @@ compiler_async_for(struct compiler *c, stmt_ty s) /* Success block for __anext__ */ VISIT(c, expr, s->v.AsyncFor.target); VISIT_SEQ(c, stmt, s->v.AsyncFor.body); - ADDOP_JABS(c, JUMP_ABSOLUTE, start); + ADDOP_JUMP(c, JUMP_ABSOLUTE, start); compiler_pop_fblock(c, FOR_LOOP, start); /* Except block for __anext__ */ compiler_use_next_block(c, except); - /* We don't want to trace the END_ASYNC_FOR, so make sure - * that it has the same lineno as the following instruction. */ - if (asdl_seq_LEN(s->v.For.orelse)) { - SET_LOC(c, (stmt_ty)asdl_seq_GET(s->v.For.orelse, 0)); - } + /* Use same line number as the iterator, + * as the END_ASYNC_FOR succeeds the `for`, not the body. */ + SET_LOC(c, s->v.AsyncFor.iter); ADDOP(c, END_ASYNC_FOR); /* `else` block */ @@ -2834,63 +2948,35 @@ compiler_async_for(struct compiler *c, stmt_ty s) static int compiler_while(struct compiler *c, stmt_ty s) { - basicblock *loop, *orelse, *end, *anchor = NULL; - int constant = expr_constant(s->v.While.test); - - if (constant == 0) { - BEGIN_DO_NOT_EMIT_BYTECODE - // Push a dummy block so the VISIT_SEQ knows that we are - // inside a while loop so it can correctly evaluate syntax - // errors. - if (!compiler_push_fblock(c, WHILE_LOOP, NULL, NULL, NULL)) { - return 0; - } - VISIT_SEQ(c, stmt, s->v.While.body); - // Remove the dummy block now that is not needed. - compiler_pop_fblock(c, WHILE_LOOP, NULL); - END_DO_NOT_EMIT_BYTECODE - if (s->v.While.orelse) { - VISIT_SEQ(c, stmt, s->v.While.orelse); - } - return 1; - } + basicblock *loop, *body, *end, *anchor = NULL; loop = compiler_new_block(c); + body = compiler_new_block(c); + anchor = compiler_new_block(c); end = compiler_new_block(c); - if (constant == -1) { - anchor = compiler_new_block(c); - if (anchor == NULL) - return 0; - } - if (loop == NULL || end == NULL) + if (loop == NULL || body == NULL || anchor == NULL || end == NULL) { return 0; - if (s->v.While.orelse) { - orelse = compiler_new_block(c); - if (orelse == NULL) - return 0; } - else - orelse = NULL; - compiler_use_next_block(c, loop); - if (!compiler_push_fblock(c, WHILE_LOOP, loop, end, NULL)) + if (!compiler_push_fblock(c, WHILE_LOOP, loop, end, NULL)) { + return 0; + } + if (!compiler_jump_if(c, s->v.While.test, anchor, 0)) { return 0; - if (constant == -1) { - if (!compiler_jump_if(c, s->v.While.test, anchor, 0)) - return 0; } - VISIT_SEQ(c, stmt, s->v.While.body); - ADDOP_JABS(c, JUMP_ABSOLUTE, loop); - /* XXX should the two POP instructions be in a separate block - if there is no else clause ? - */ + compiler_use_next_block(c, body); + VISIT_SEQ(c, stmt, s->v.While.body); + SET_LOC(c, s); + if (!compiler_jump_if(c, s->v.While.test, body, 1)) { + return 0; + } - if (constant == -1) - compiler_use_next_block(c, anchor); compiler_pop_fblock(c, WHILE_LOOP, loop); - if (orelse != NULL) /* what if orelse is just pass? */ + compiler_use_next_block(c, anchor); + if (s->v.While.orelse) { VISIT_SEQ(c, stmt, s->v.While.orelse); + } compiler_use_next_block(c, end); return 1; @@ -2911,16 +2997,28 @@ compiler_return(struct compiler *c, stmt_ty s) } if (preserve_tos) { VISIT(c, expr, s->v.Return.value); + } else { + /* Emit instruction with line number for return value */ + if (s->v.Return.value != NULL) { + SET_LOC(c, s->v.Return.value); + ADDOP(c, NOP); + } + } + if (s->v.Return.value == NULL || s->v.Return.value->lineno != s->lineno) { + SET_LOC(c, s); + ADDOP(c, NOP); } + if (!compiler_unwind_fblock_stack(c, preserve_tos, NULL)) return 0; if (s->v.Return.value == NULL) { ADDOP_LOAD_CONST(c, Py_None); } else if (!preserve_tos) { - VISIT(c, expr, s->v.Return.value); + ADDOP_LOAD_CONST(c, s->v.Return.value->v.Constant.value); } ADDOP(c, RETURN_VALUE); + NEXT_BLOCK(c); return 1; } @@ -2929,6 +3027,8 @@ static int compiler_break(struct compiler *c) { struct fblockinfo *loop = NULL; + /* Emit instruction with line number */ + ADDOP(c, NOP); if (!compiler_unwind_fblock_stack(c, 0, &loop)) { return 0; } @@ -2938,7 +3038,8 @@ compiler_break(struct compiler *c) if (!compiler_unwind_fblock(c, loop, 0)) { return 0; } - ADDOP_JABS(c, JUMP_ABSOLUTE, loop->fb_exit); + ADDOP_JUMP(c, JUMP_ABSOLUTE, loop->fb_exit); + NEXT_BLOCK(c); return 1; } @@ -2946,13 +3047,16 @@ static int compiler_continue(struct compiler *c) { struct fblockinfo *loop = NULL; + /* Emit instruction with line number */ + ADDOP(c, NOP); if (!compiler_unwind_fblock_stack(c, 0, &loop)) { return 0; } if (loop == NULL) { return compiler_error(c, "'continue' not properly in loop"); } - ADDOP_JABS(c, JUMP_ABSOLUTE, loop->fb_block); + ADDOP_JUMP(c, JUMP_ABSOLUTE, loop->fb_block); + NEXT_BLOCK(c) return 1; } @@ -2998,7 +3102,7 @@ compiler_try_finally(struct compiler *c, stmt_ty s) return 0; /* `try` block */ - ADDOP_JREL(c, SETUP_FINALLY, end); + ADDOP_JUMP(c, SETUP_FINALLY, end); compiler_use_next_block(c, body); if (!compiler_push_fblock(c, FINALLY_TRY, body, end, s->v.Try.finalbody)) return 0; @@ -3009,17 +3113,17 @@ compiler_try_finally(struct compiler *c, stmt_ty s) else { VISIT_SEQ(c, stmt, s->v.Try.body); } - ADDOP(c, POP_BLOCK); + ADDOP_NOLINE(c, POP_BLOCK); compiler_pop_fblock(c, FINALLY_TRY, body); VISIT_SEQ(c, stmt, s->v.Try.finalbody); - ADDOP_JREL(c, JUMP_FORWARD, exit); + ADDOP_JUMP_NOLINE(c, JUMP_FORWARD, exit); /* `finally` block */ compiler_use_next_block(c, end); if (!compiler_push_fblock(c, FINALLY_END, end, NULL, NULL)) return 0; VISIT_SEQ(c, stmt, s->v.Try.finalbody); compiler_pop_fblock(c, FINALLY_END, end); - ADDOP(c, RERAISE); + ADDOP_I(c, RERAISE, 0); compiler_use_next_block(c, exit); return 1; } @@ -3066,14 +3170,14 @@ compiler_try_except(struct compiler *c, stmt_ty s) end = compiler_new_block(c); if (body == NULL || except == NULL || orelse == NULL || end == NULL) return 0; - ADDOP_JREL(c, SETUP_FINALLY, except); + ADDOP_JUMP(c, SETUP_FINALLY, except); compiler_use_next_block(c, body); if (!compiler_push_fblock(c, TRY_EXCEPT, body, NULL, NULL)) return 0; VISIT_SEQ(c, stmt, s->v.Try.body); - ADDOP(c, POP_BLOCK); compiler_pop_fblock(c, TRY_EXCEPT, body); - ADDOP_JREL(c, JUMP_FORWARD, orelse); + ADDOP_NOLINE(c, POP_BLOCK); + ADDOP_JUMP_NOLINE(c, JUMP_FORWARD, orelse); n = asdl_seq_LEN(s->v.Try.handlers); compiler_use_next_block(c, except); /* Runtime will push a block here, so we need to account for that */ @@ -3082,16 +3186,17 @@ compiler_try_except(struct compiler *c, stmt_ty s) for (i = 0; i < n; i++) { excepthandler_ty handler = (excepthandler_ty)asdl_seq_GET( s->v.Try.handlers, i); + SET_LOC(c, handler); if (!handler->v.ExceptHandler.type && i < n-1) return compiler_error(c, "default 'except:' must be last"); - SET_LOC(c, handler); except = compiler_new_block(c); if (except == NULL) return 0; if (handler->v.ExceptHandler.type) { ADDOP(c, DUP_TOP); VISIT(c, expr, handler->v.ExceptHandler.type); - ADDOP_JABS(c, JUMP_IF_NOT_EXC_MATCH, except); + ADDOP_JUMP(c, JUMP_IF_NOT_EXC_MATCH, except); + NEXT_BLOCK(c); } ADDOP(c, POP_TOP); if (handler->v.ExceptHandler.name) { @@ -3118,7 +3223,7 @@ compiler_try_except(struct compiler *c, stmt_ty s) */ /* second try: */ - ADDOP_JREL(c, SETUP_FINALLY, cleanup_end); + ADDOP_JUMP(c, SETUP_FINALLY, cleanup_end); compiler_use_next_block(c, cleanup_body); if (!compiler_push_fblock(c, HANDLER_CLEANUP, cleanup_body, NULL, handler->v.ExceptHandler.name)) return 0; @@ -3126,23 +3231,25 @@ compiler_try_except(struct compiler *c, stmt_ty s) /* second # body */ VISIT_SEQ(c, stmt, handler->v.ExceptHandler.body); compiler_pop_fblock(c, HANDLER_CLEANUP, cleanup_body); + /* name = None; del name; # Mark as artificial */ + c->u->u_lineno = -1; ADDOP(c, POP_BLOCK); ADDOP(c, POP_EXCEPT); - /* name = None; del name */ ADDOP_LOAD_CONST(c, Py_None); compiler_nameop(c, handler->v.ExceptHandler.name, Store); compiler_nameop(c, handler->v.ExceptHandler.name, Del); - ADDOP_JREL(c, JUMP_FORWARD, end); + ADDOP_JUMP(c, JUMP_FORWARD, end); /* except: */ compiler_use_next_block(c, cleanup_end); - /* name = None; del name */ + /* name = None; del name; # Mark as artificial */ + c->u->u_lineno = -1; ADDOP_LOAD_CONST(c, Py_None); compiler_nameop(c, handler->v.ExceptHandler.name, Store); compiler_nameop(c, handler->v.ExceptHandler.name, Del); - ADDOP(c, RERAISE); + ADDOP_I(c, RERAISE, 1); } else { basicblock *cleanup_body; @@ -3158,13 +3265,16 @@ compiler_try_except(struct compiler *c, stmt_ty s) return 0; VISIT_SEQ(c, stmt, handler->v.ExceptHandler.body); compiler_pop_fblock(c, HANDLER_CLEANUP, cleanup_body); + c->u->u_lineno = -1; ADDOP(c, POP_EXCEPT); - ADDOP_JREL(c, JUMP_FORWARD, end); + ADDOP_JUMP(c, JUMP_FORWARD, end); } compiler_use_next_block(c, except); } compiler_pop_fblock(c, EXCEPTION_HANDLER, NULL); - ADDOP(c, RERAISE); + /* Mark as artificial */ + c->u->u_lineno = -1; + ADDOP_I(c, RERAISE, 0); compiler_use_next_block(c, orelse); VISIT_SEQ(c, stmt, s->v.Try.orelse); compiler_use_next_block(c, end); @@ -3232,11 +3342,12 @@ compiler_import(struct compiler *c, stmt_ty s) */ Py_ssize_t i, n = asdl_seq_LEN(s->v.Import.names); + PyObject *zero = _PyLong_GetZero(); // borrowed reference for (i = 0; i < n; i++) { alias_ty alias = (alias_ty)asdl_seq_GET(s->v.Import.names, i); int r; - ADDOP_LOAD_CONST(c, _PyLong_Zero); + ADDOP_LOAD_CONST(c, zero); ADDOP_LOAD_CONST(c, Py_None); ADDOP_NAME(c, IMPORT_NAME, alias->name, names); @@ -3334,10 +3445,12 @@ compiler_assert(struct compiler *c, stmt_ty s) { basicblock *end; - if (c->c_optimize) - return 1; - if (s->v.Assert.test->kind == Tuple_kind && - asdl_seq_LEN(s->v.Assert.test->v.Tuple.elts) > 0) + /* Always emit a warning if the test is a non-zero length tuple */ + if ((s->v.Assert.test->kind == Tuple_kind && + asdl_seq_LEN(s->v.Assert.test->v.Tuple.elts) > 0) || + (s->v.Assert.test->kind == Constant_kind && + PyTuple_Check(s->v.Assert.test->v.Constant.value) && + PyTuple_Size(s->v.Assert.test->v.Constant.value) > 0)) { if (!compiler_warn(c, "assertion is always true, " "perhaps remove parentheses?")) @@ -3345,6 +3458,8 @@ compiler_assert(struct compiler *c, stmt_ty s) return 0; } } + if (c->c_optimize) + return 1; end = compiler_new_block(c); if (end == NULL) return 0; @@ -3371,10 +3486,13 @@ compiler_visit_stmt_expr(struct compiler *c, expr_ty value) if (value->kind == Constant_kind) { /* ignore constant statement */ + ADDOP(c, NOP); return 1; } VISIT(c, expr, value); + /* Mark POP_TOP as artificial */ + c->u->u_lineno = -1; ADDOP(c, POP_TOP); return 1; } @@ -3417,6 +3535,8 @@ compiler_visit_stmt(struct compiler *c, stmt_ty s) return compiler_while(c, s); case If_kind: return compiler_if(c, s); + case Match_kind: + return compiler_match(c, s); case Raise_kind: n = 0; if (s->v.Raise.exc) { @@ -3428,6 +3548,7 @@ compiler_visit_stmt(struct compiler *c, stmt_ty s) } } ADDOP_I(c, RAISE_VARARGS, (int)n); + NEXT_BLOCK(c); break; case Try_kind: return compiler_try(c, s); @@ -3443,6 +3564,7 @@ compiler_visit_stmt(struct compiler *c, stmt_ty s) case Expr_kind: return compiler_visit_stmt_expr(c, s->v.Expr.value); case Pass_kind: + ADDOP(c, NOP); break; case Break_kind: return compiler_break(c); @@ -3577,7 +3699,7 @@ compiler_nameop(struct compiler *c, identifier name, expr_context_ty ctx) op = 0; optype = OP_NAME; - scope = PyST_GetScope(c->u->u_ste, mangled); + scope = _PyST_GetScope(c->u->u_ste, mangled); switch (scope) { case FREE: dict = c->u->u_freevars; @@ -3654,7 +3776,7 @@ compiler_boolop(struct compiler *c, expr_ty e) basicblock *end; int jumpi; Py_ssize_t i, n; - asdl_seq *s; + asdl_expr_seq *s; assert(e->kind == BoolOp_kind); if (e->v.BoolOp.op == And) @@ -3669,7 +3791,12 @@ compiler_boolop(struct compiler *c, expr_ty e) assert(n >= 0); for (i = 0; i < n; ++i) { VISIT(c, expr, (expr_ty)asdl_seq_GET(s, i)); - ADDOP_JABS(c, jumpi, end); + ADDOP_JUMP(c, jumpi, end); + basicblock *next = compiler_new_block(c); + if (next == NULL) { + return 0; + } + compiler_use_next_block(c, next); } VISIT(c, expr, (expr_ty)asdl_seq_GET(s, n)); compiler_use_next_block(c, end); @@ -3677,18 +3804,17 @@ compiler_boolop(struct compiler *c, expr_ty e) } static int -starunpack_helper(struct compiler *c, asdl_seq *elts, int pushed, +starunpack_helper(struct compiler *c, asdl_expr_seq *elts, int pushed, int build, int add, int extend, int tuple) { Py_ssize_t n = asdl_seq_LEN(elts); - Py_ssize_t i, seen_star = 0; if (n > 2 && are_all_items_const(elts, 0, n)) { PyObject *folded = PyTuple_New(n); if (folded == NULL) { return 0; } PyObject *val; - for (i = 0; i < n; i++) { + for (Py_ssize_t i = 0; i < n; i++) { val = ((expr_ty)asdl_seq_GET(elts, i))->v.Constant.value; Py_INCREF(val); PyTuple_SET_ITEM(folded, i, val); @@ -3709,38 +3835,16 @@ starunpack_helper(struct compiler *c, asdl_seq *elts, int pushed, return 1; } - for (i = 0; i < n; i++) { + int big = n+pushed > STACK_USE_GUIDELINE; + int seen_star = 0; + for (Py_ssize_t i = 0; i < n; i++) { expr_ty elt = asdl_seq_GET(elts, i); if (elt->kind == Starred_kind) { seen_star = 1; } } - if (seen_star) { - seen_star = 0; - for (i = 0; i < n; i++) { - expr_ty elt = asdl_seq_GET(elts, i); - if (elt->kind == Starred_kind) { - if (seen_star == 0) { - ADDOP_I(c, build, i+pushed); - seen_star = 1; - } - VISIT(c, expr, elt->v.Starred.value); - ADDOP_I(c, extend, 1); - } - else { - VISIT(c, expr, elt); - if (seen_star) { - ADDOP_I(c, add, 1); - } - } - } - assert(seen_star); - if (tuple) { - ADDOP(c, LIST_TO_TUPLE); - } - } - else { - for (i = 0; i < n; i++) { + if (!seen_star && !big) { + for (Py_ssize_t i = 0; i < n; i++) { expr_ty elt = asdl_seq_GET(elts, i); VISIT(c, expr, elt); } @@ -3749,17 +3853,43 @@ starunpack_helper(struct compiler *c, asdl_seq *elts, int pushed, } else { ADDOP_I(c, build, n+pushed); } + return 1; + } + int sequence_built = 0; + if (big) { + ADDOP_I(c, build, pushed); + sequence_built = 1; + } + for (Py_ssize_t i = 0; i < n; i++) { + expr_ty elt = asdl_seq_GET(elts, i); + if (elt->kind == Starred_kind) { + if (sequence_built == 0) { + ADDOP_I(c, build, i+pushed); + sequence_built = 1; + } + VISIT(c, expr, elt->v.Starred.value); + ADDOP_I(c, extend, 1); + } + else { + VISIT(c, expr, elt); + if (sequence_built) { + ADDOP_I(c, add, 1); + } + } + } + assert(sequence_built); + if (tuple) { + ADDOP(c, LIST_TO_TUPLE); } return 1; } static int -assignment_helper(struct compiler *c, asdl_seq *elts) +unpack_helper(struct compiler *c, asdl_expr_seq *elts) { Py_ssize_t n = asdl_seq_LEN(elts); - Py_ssize_t i; int seen_star = 0; - for (i = 0; i < n; i++) { + for (Py_ssize_t i = 0; i < n; i++) { expr_ty elt = asdl_seq_GET(elts, i); if (elt->kind == Starred_kind && !seen_star) { if ((i >= (1 << 8)) || @@ -3778,7 +3908,15 @@ assignment_helper(struct compiler *c, asdl_seq *elts) if (!seen_star) { ADDOP_I(c, UNPACK_SEQUENCE, n); } - for (i = 0; i < n; i++) { + return 1; +} + +static int +assignment_helper(struct compiler *c, asdl_expr_seq *elts) +{ + Py_ssize_t n = asdl_seq_LEN(elts); + RETURN_IF_FALSE(unpack_helper(c, elts)); + for (Py_ssize_t i = 0; i < n; i++) { expr_ty elt = asdl_seq_GET(elts, i); VISIT(c, expr, elt->kind != Starred_kind ? elt : elt->v.Starred.value); } @@ -3788,7 +3926,7 @@ assignment_helper(struct compiler *c, asdl_seq *elts) static int compiler_list(struct compiler *c, expr_ty e) { - asdl_seq *elts = e->v.List.elts; + asdl_expr_seq *elts = e->v.List.elts; if (e->v.List.ctx == Store) { return assignment_helper(c, elts); } @@ -3804,7 +3942,7 @@ compiler_list(struct compiler *c, expr_ty e) static int compiler_tuple(struct compiler *c, expr_ty e) { - asdl_seq *elts = e->v.Tuple.elts; + asdl_expr_seq *elts = e->v.Tuple.elts; if (e->v.Tuple.ctx == Store) { return assignment_helper(c, elts); } @@ -3825,7 +3963,7 @@ compiler_set(struct compiler *c, expr_ty e) } static int -are_all_items_const(asdl_seq *seq, Py_ssize_t begin, Py_ssize_t end) +are_all_items_const(asdl_expr_seq *seq, Py_ssize_t begin, Py_ssize_t end) { Py_ssize_t i; for (i = begin; i < end; i++) { @@ -3841,7 +3979,8 @@ compiler_subdict(struct compiler *c, expr_ty e, Py_ssize_t begin, Py_ssize_t end { Py_ssize_t i, n = end - begin; PyObject *keys, *key; - if (n > 1 && are_all_items_const(e->v.Dict.keys, begin, end)) { + int big = n*2 > STACK_USE_GUIDELINE; + if (n > 1 && !big && are_all_items_const(e->v.Dict.keys, begin, end)) { for (i = begin; i < end; i++) { VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Dict.values, i)); } @@ -3856,12 +3995,19 @@ compiler_subdict(struct compiler *c, expr_ty e, Py_ssize_t begin, Py_ssize_t end } ADDOP_LOAD_CONST_NEW(c, keys); ADDOP_I(c, BUILD_CONST_KEY_MAP, n); + return 1; } - else { - for (i = begin; i < end; i++) { - VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Dict.keys, i)); - VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Dict.values, i)); + if (big) { + ADDOP_I(c, BUILD_MAP, 0); + } + for (i = begin; i < end; i++) { + VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Dict.keys, i)); + VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Dict.values, i)); + if (big) { + ADDOP_I(c, MAP_ADD, 1); } + } + if (!big) { ADDOP_I(c, BUILD_MAP, n); } return 1; @@ -3897,7 +4043,7 @@ compiler_dict(struct compiler *c, expr_ty e) ADDOP_I(c, DICT_UPDATE, 1); } else { - if (elements == 0xFFFF) { + if (elements*2 > STACK_USE_GUIDELINE) { if (!compiler_subdict(c, e, i - elements, i + 1)) { return 0; } @@ -3952,7 +4098,7 @@ compiler_compare(struct compiler *c, expr_ty e) ADDOP(c, DUP_TOP); ADDOP(c, ROT_THREE); ADDOP_COMPARE(c, asdl_seq_GET(e->v.Compare.ops, i)); - ADDOP_JABS(c, JUMP_IF_FALSE_OR_POP, cleanup); + ADDOP_JUMP(c, JUMP_IF_FALSE_OR_POP, cleanup); NEXT_BLOCK(c); } VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Compare.comparators, n)); @@ -3960,7 +4106,7 @@ compiler_compare(struct compiler *c, expr_ty e) basicblock *end = compiler_new_block(c); if (end == NULL) return 0; - ADDOP_JREL(c, JUMP_FORWARD, end); + ADDOP_JUMP_NOLINE(c, JUMP_FORWARD, end); compiler_use_next_block(c, cleanup); ADDOP(c, ROT_TWO); ADDOP(c, POP_TOP); @@ -4088,16 +4234,20 @@ maybe_optimize_method_call(struct compiler *c, expr_ty e) { Py_ssize_t argsl, i; expr_ty meth = e->v.Call.func; - asdl_seq *args = e->v.Call.args; + asdl_expr_seq *args = e->v.Call.args; /* Check that the call node is an attribute access, and that the call doesn't have keyword parameters. */ if (meth->kind != Attribute_kind || meth->v.Attribute.ctx != Load || - asdl_seq_LEN(e->v.Call.keywords)) + asdl_seq_LEN(e->v.Call.keywords)) { return -1; - - /* Check that there are no *varargs types of arguments. */ + } + /* Check that there aren't too many arguments */ argsl = asdl_seq_LEN(args); + if (argsl >= STACK_USE_GUIDELINE) { + return -1; + } + /* Check that there are no *varargs types of arguments. */ for (i = 0; i < argsl; i++) { expr_ty elt = asdl_seq_GET(args, i); if (elt->kind == Starred_kind) { @@ -4107,14 +4257,17 @@ maybe_optimize_method_call(struct compiler *c, expr_ty e) /* Alright, we can optimize the code. */ VISIT(c, expr, meth->v.Attribute.value); + int old_lineno = c->u->u_lineno; + c->u->u_lineno = meth->end_lineno; ADDOP_NAME(c, LOAD_METHOD, meth->v.Attribute.attr, names); VISIT_SEQ(c, expr, e->v.Call.args); ADDOP_I(c, CALL_METHOD, asdl_seq_LEN(e->v.Call.args)); + c->u->u_lineno = old_lineno; return 1; } static int -validate_keywords(struct compiler *c, asdl_seq *keywords) +validate_keywords(struct compiler *c, asdl_keyword_seq *keywords) { Py_ssize_t nkeywords = asdl_seq_LEN(keywords); for (Py_ssize_t i = 0; i < nkeywords; i++) { @@ -4128,13 +4281,8 @@ validate_keywords(struct compiler *c, asdl_seq *keywords) for (Py_ssize_t j = i + 1; j < nkeywords; j++) { keyword_ty other = ((keyword_ty)asdl_seq_GET(keywords, j)); if (other->arg && !PyUnicode_Compare(key->arg, other->arg)) { - PyObject *msg = PyUnicode_FromFormat("keyword argument repeated: %U", key->arg); - if (msg == NULL) { - return -1; - } - c->u->u_col_offset = other->col_offset; - compiler_error(c, PyUnicode_AsUTF8(msg)); - Py_DECREF(msg); + SET_LOC(c, other); + compiler_error(c, "keyword argument repeated: %U", key->arg); return -1; } } @@ -4161,9 +4309,29 @@ compiler_call(struct compiler *c, expr_ty e) static int compiler_joined_str(struct compiler *c, expr_ty e) { - VISIT_SEQ(c, expr, e->v.JoinedStr.values); - if (asdl_seq_LEN(e->v.JoinedStr.values) != 1) - ADDOP_I(c, BUILD_STRING, asdl_seq_LEN(e->v.JoinedStr.values)); + + Py_ssize_t value_count = asdl_seq_LEN(e->v.JoinedStr.values); + if (value_count > STACK_USE_GUIDELINE) { + ADDOP_LOAD_CONST_NEW(c, _PyUnicode_FromASCII("", 0)); + PyObject *join = _PyUnicode_FromASCII("join", 4); + if (join == NULL) { + return 0; + } + ADDOP_NAME(c, LOAD_METHOD, join, names); + Py_DECREF(join); + ADDOP_I(c, BUILD_LIST, 0); + for (Py_ssize_t i = 0; i < asdl_seq_LEN(e->v.JoinedStr.values); i++) { + VISIT(c, expr, asdl_seq_GET(e->v.JoinedStr.values, i)); + ADDOP_I(c, LIST_APPEND, 1); + } + ADDOP_I(c, CALL_METHOD, 1); + } + else { + VISIT_SEQ(c, expr, e->v.JoinedStr.values); + if (asdl_seq_LEN(e->v.JoinedStr.values) != 1) { + ADDOP_I(c, BUILD_STRING, asdl_seq_LEN(e->v.JoinedStr.values)); + } + } return 1; } @@ -4214,13 +4382,14 @@ compiler_formatted_value(struct compiler *c, expr_ty e) } static int -compiler_subkwargs(struct compiler *c, asdl_seq *keywords, Py_ssize_t begin, Py_ssize_t end) +compiler_subkwargs(struct compiler *c, asdl_keyword_seq *keywords, Py_ssize_t begin, Py_ssize_t end) { Py_ssize_t i, n = end - begin; keyword_ty kw; PyObject *keys, *key; assert(n > 0); - if (n > 1) { + int big = n*2 > STACK_USE_GUIDELINE; + if (n > 1 && !big) { for (i = begin; i < end; i++) { kw = asdl_seq_GET(keywords, i); VISIT(c, expr, kw->value); @@ -4236,14 +4405,20 @@ compiler_subkwargs(struct compiler *c, asdl_seq *keywords, Py_ssize_t begin, Py_ } ADDOP_LOAD_CONST_NEW(c, keys); ADDOP_I(c, BUILD_CONST_KEY_MAP, n); + return 1; } - else { - /* a for loop only executes once */ - for (i = begin; i < end; i++) { - kw = asdl_seq_GET(keywords, i); - ADDOP_LOAD_CONST(c, kw->arg); - VISIT(c, expr, kw->value); + if (big) { + ADDOP_I_NOLINE(c, BUILD_MAP, 0); + } + for (i = begin; i < end; i++) { + kw = asdl_seq_GET(keywords, i); + ADDOP_LOAD_CONST(c, kw->arg); + VISIT(c, expr, kw->value); + if (big) { + ADDOP_I_NOLINE(c, MAP_ADD, 1); } + } + if (!big) { ADDOP_I(c, BUILD_MAP, n); } return 1; @@ -4253,8 +4428,8 @@ compiler_subkwargs(struct compiler *c, asdl_seq *keywords, Py_ssize_t begin, Py_ static int compiler_call_helper(struct compiler *c, int n, /* Args already pushed */ - asdl_seq *args, - asdl_seq *keywords) + asdl_expr_seq *args, + asdl_keyword_seq *keywords) { Py_ssize_t i, nseen, nelts, nkwelts; @@ -4265,6 +4440,9 @@ compiler_call_helper(struct compiler *c, nelts = asdl_seq_LEN(args); nkwelts = asdl_seq_LEN(keywords); + if (nelts + nkwelts*2 > STACK_USE_GUIDELINE) { + goto ex_call; + } for (i = 0; i < nelts; i++) { expr_ty elt = asdl_seq_GET(args, i); if (elt->kind == Starred_kind) { @@ -4379,7 +4557,7 @@ ex_call: static int compiler_comprehension_generator(struct compiler *c, - asdl_seq *generators, int gen_index, + asdl_comprehension_seq *generators, int gen_index, int depth, expr_ty elt, expr_ty val, int type) { @@ -4396,7 +4574,7 @@ compiler_comprehension_generator(struct compiler *c, static int compiler_sync_comprehension_generator(struct compiler *c, - asdl_seq *generators, int gen_index, + asdl_comprehension_seq *generators, int gen_index, int depth, expr_ty elt, expr_ty val, int type) { @@ -4428,7 +4606,7 @@ compiler_sync_comprehension_generator(struct compiler *c, /* Fast path for the temporary variable assignment idiom: for y in [f(x)] */ - asdl_seq *elts; + asdl_expr_seq *elts; switch (gen->iter->kind) { case List_kind: elts = gen->iter->v.List.elts; @@ -4454,7 +4632,7 @@ compiler_sync_comprehension_generator(struct compiler *c, if (start) { depth++; compiler_use_next_block(c, start); - ADDOP_JREL(c, FOR_ITER, anchor); + ADDOP_JUMP(c, FOR_ITER, anchor); NEXT_BLOCK(c); } VISIT(c, expr, gen->target); @@ -4506,7 +4684,7 @@ compiler_sync_comprehension_generator(struct compiler *c, } compiler_use_next_block(c, if_cleanup); if (start) { - ADDOP_JABS(c, JUMP_ABSOLUTE, start); + ADDOP_JUMP(c, JUMP_ABSOLUTE, start); compiler_use_next_block(c, anchor); } @@ -4515,7 +4693,7 @@ compiler_sync_comprehension_generator(struct compiler *c, static int compiler_async_comprehension_generator(struct compiler *c, - asdl_seq *generators, int gen_index, + asdl_comprehension_seq *generators, int gen_index, int depth, expr_ty elt, expr_ty val, int type) { @@ -4544,8 +4722,13 @@ compiler_async_comprehension_generator(struct compiler *c, } compiler_use_next_block(c, start); + /* Runtime will push a block here, so we need to account for that */ + if (!compiler_push_fblock(c, ASYNC_COMPREHENSION_GENERATOR, start, + NULL, NULL)) { + return 0; + } - ADDOP_JREL(c, SETUP_FINALLY, except); + ADDOP_JUMP(c, SETUP_FINALLY, except); ADDOP(c, GET_ANEXT); ADDOP_LOAD_CONST(c, Py_None); ADDOP(c, YIELD_FROM); @@ -4596,7 +4779,9 @@ compiler_async_comprehension_generator(struct compiler *c, } } compiler_use_next_block(c, if_cleanup); - ADDOP_JABS(c, JUMP_ABSOLUTE, start); + ADDOP_JUMP(c, JUMP_ABSOLUTE, start); + + compiler_pop_fblock(c, ASYNC_COMPREHENSION_GENERATOR, start); compiler_use_next_block(c, except); ADDOP(c, END_ASYNC_FOR); @@ -4606,7 +4791,7 @@ compiler_async_comprehension_generator(struct compiler *c, static int compiler_comprehension(struct compiler *c, expr_ty e, int type, - identifier name, asdl_seq *generators, expr_ty elt, + identifier name, asdl_comprehension_seq *generators, expr_ty elt, expr_ty val) { PyCodeObject *co = NULL; @@ -4624,6 +4809,7 @@ compiler_comprehension(struct compiler *c, expr_ty e, int type, { goto error; } + SET_LOC(c, e); is_async_generator = c->u->u_ste->ste_coroutine; @@ -4672,8 +4858,9 @@ compiler_comprehension(struct compiler *c, expr_ty e, int type, if (co == NULL) goto error; - if (!compiler_make_closure(c, co, 0, qualname)) + if (!compiler_make_closure(c, co, 0, qualname)) { goto error; + } Py_DECREF(qualname); Py_DECREF(co); @@ -4778,22 +4965,14 @@ compiler_visit_keyword(struct compiler *c, keyword_ty k) */ static int -expr_constant(expr_ty e) -{ - if (e->kind == Constant_kind) { - return PyObject_IsTrue(e->v.Constant.value); - } - return -1; -} - -static int compiler_with_except_finish(struct compiler *c) { basicblock *exit; exit = compiler_new_block(c); if (exit == NULL) return 0; - ADDOP_JABS(c, POP_JUMP_IF_TRUE, exit); - ADDOP(c, RERAISE); + ADDOP_JUMP(c, POP_JUMP_IF_TRUE, exit); + NEXT_BLOCK(c); + ADDOP_I(c, RERAISE, 1); compiler_use_next_block(c, exit); ADDOP(c, POP_TOP); ADDOP(c, POP_TOP); @@ -4854,11 +5033,11 @@ compiler_async_with(struct compiler *c, stmt_ty s, int pos) ADDOP_LOAD_CONST(c, Py_None); ADDOP(c, YIELD_FROM); - ADDOP_JREL(c, SETUP_ASYNC_WITH, final); + ADDOP_JUMP(c, SETUP_ASYNC_WITH, final); /* SETUP_ASYNC_WITH pushes a finally block. */ compiler_use_next_block(c, block); - if (!compiler_push_fblock(c, ASYNC_WITH, block, final, NULL)) { + if (!compiler_push_fblock(c, ASYNC_WITH, block, final, s)) { return 0; } @@ -4884,19 +5063,19 @@ compiler_async_with(struct compiler *c, stmt_ty s, int pos) /* For successful outcome: * call __exit__(None, None, None) */ + SET_LOC(c, s); if(!compiler_call_exit_with_nones(c)) return 0; ADDOP(c, GET_AWAITABLE); - ADDOP_O(c, LOAD_CONST, Py_None, consts); + ADDOP_LOAD_CONST(c, Py_None); ADDOP(c, YIELD_FROM); ADDOP(c, POP_TOP); - ADDOP_JABS(c, JUMP_ABSOLUTE, exit); + ADDOP_JUMP(c, JUMP_ABSOLUTE, exit); /* For exceptional outcome: */ compiler_use_next_block(c, final); - ADDOP(c, WITH_EXCEPT_START); ADDOP(c, GET_AWAITABLE); ADDOP_LOAD_CONST(c, Py_None); @@ -4946,11 +5125,11 @@ compiler_with(struct compiler *c, stmt_ty s, int pos) /* Evaluate EXPR */ VISIT(c, expr, item->context_expr); /* Will push bound __exit__ */ - ADDOP_JREL(c, SETUP_WITH, final); + ADDOP_JUMP(c, SETUP_WITH, final); /* SETUP_WITH pushes a finally block. */ compiler_use_next_block(c, block); - if (!compiler_push_fblock(c, WITH, block, final, NULL)) { + if (!compiler_push_fblock(c, WITH, block, final, s)) { return 0; } @@ -4969,6 +5148,9 @@ compiler_with(struct compiler *c, stmt_ty s, int pos) else if (!compiler_with(c, s, pos)) return 0; + + /* Mark all following code as artificial */ + c->u->u_lineno = -1; ADDOP(c, POP_BLOCK); compiler_pop_fblock(c, WITH, block); @@ -4977,14 +5159,14 @@ compiler_with(struct compiler *c, stmt_ty s, int pos) /* For successful outcome: * call __exit__(None, None, None) */ + SET_LOC(c, s); if (!compiler_call_exit_with_nones(c)) return 0; ADDOP(c, POP_TOP); - ADDOP_JREL(c, JUMP_FORWARD, exit); + ADDOP_JUMP(c, JUMP_FORWARD, exit); /* For exceptional outcome: */ compiler_use_next_block(c, final); - ADDOP(c, WITH_EXCEPT_START); compiler_with_except_finish(c); @@ -5084,12 +5266,21 @@ compiler_visit_expr1(struct compiler *c, expr_ty e) VISIT(c, expr, e->v.Attribute.value); switch (e->v.Attribute.ctx) { case Load: + { + int old_lineno = c->u->u_lineno; + c->u->u_lineno = e->end_lineno; ADDOP_NAME(c, LOAD_ATTR, e->v.Attribute.attr, names); + c->u->u_lineno = old_lineno; break; + } case Store: - if (forbidden_name(c, e->v.Attribute.attr, e->v.Attribute.ctx)) + if (forbidden_name(c, e->v.Attribute.attr, e->v.Attribute.ctx)) { return 0; + } + int old_lineno = c->u->u_lineno; + c->u->u_lineno = e->end_lineno; ADDOP_NAME(c, STORE_ATTR, e->v.Attribute.attr, names); + c->u->u_lineno = old_lineno; break; case Del: ADDOP_NAME(c, DELETE_ATTR, e->v.Attribute.attr, names); @@ -5127,11 +5318,15 @@ static int compiler_visit_expr(struct compiler *c, expr_ty e) { int old_lineno = c->u->u_lineno; + int old_end_lineno = c->u->u_end_lineno; int old_col_offset = c->u->u_col_offset; + int old_end_col_offset = c->u->u_end_col_offset; SET_LOC(c, e); int res = compiler_visit_expr1(c, e); c->u->u_lineno = old_lineno; + c->u->u_end_lineno = old_end_lineno; c->u->u_col_offset = old_col_offset; + c->u->u_end_col_offset = old_end_col_offset; return res; } @@ -5142,14 +5337,19 @@ compiler_augassign(struct compiler *c, stmt_ty s) expr_ty e = s->v.AugAssign.target; int old_lineno = c->u->u_lineno; + int old_end_lineno = c->u->u_end_lineno; int old_col_offset = c->u->u_col_offset; + int old_end_col_offset = c->u->u_end_col_offset; SET_LOC(c, e); switch (e->kind) { case Attribute_kind: VISIT(c, expr, e->v.Attribute.value); ADDOP(c, DUP_TOP); + int old_lineno = c->u->u_lineno; + c->u->u_lineno = e->end_lineno; ADDOP_NAME(c, LOAD_ATTR, e->v.Attribute.attr, names); + c->u->u_lineno = old_lineno; break; case Subscript_kind: VISIT(c, expr, e->v.Subscript.value); @@ -5169,7 +5369,9 @@ compiler_augassign(struct compiler *c, stmt_ty s) } c->u->u_lineno = old_lineno; + c->u->u_end_lineno = old_end_lineno; c->u->u_col_offset = old_col_offset; + c->u->u_end_col_offset = old_end_col_offset; VISIT(c, expr, s->v.AugAssign.value); ADDOP(c, inplace_binop(s->v.AugAssign.op)); @@ -5178,6 +5380,7 @@ compiler_augassign(struct compiler *c, stmt_ty s) switch (e->kind) { case Attribute_kind: + c->u->u_lineno = e->end_lineno; ADDOP(c, ROT_TWO); ADDOP_NAME(c, STORE_ATTR, e->v.Attribute.attr, names); break; @@ -5204,6 +5407,12 @@ check_ann_expr(struct compiler *c, expr_ty e) static int check_annotation(struct compiler *c, stmt_ty s) { + /* Annotations of complex targets does not produce anything + under annotations future */ + if (c->c_future->ff_features & CO_FUTURE_ANNOTATIONS) { + return 1; + } + /* Annotations are only evaluated in a module or class. */ if (c->u->u_scope_type == COMPILER_SCOPE_MODULE || c->u->u_scope_type == COMPILER_SCOPE_CLASS) { @@ -5230,7 +5439,7 @@ check_ann_subscr(struct compiler *c, expr_ty e) return 1; case Tuple_kind: { /* extended slice */ - asdl_seq *elts = e->v.Tuple.elts; + asdl_expr_seq *elts = e->v.Tuple.elts; Py_ssize_t i, n = asdl_seq_LEN(elts); for (i = 0; i < n; i++) { if (!check_ann_subscr(c, asdl_seq_GET(elts, i))) { @@ -5310,28 +5519,35 @@ compiler_annassign(struct compiler *c, stmt_ty s) */ static int -compiler_error(struct compiler *c, const char *errstr) +compiler_error(struct compiler *c, const char *format, ...) { - PyObject *loc; - PyObject *u = NULL, *v = NULL; - - loc = PyErr_ProgramTextObject(c->c_filename, c->u->u_lineno); - if (!loc) { + va_list vargs; +#ifdef HAVE_STDARG_PROTOTYPES + va_start(vargs, format); +#else + va_start(vargs); +#endif + PyObject *msg = PyUnicode_FromFormatV(format, vargs); + va_end(vargs); + if (msg == NULL) { + return 0; + } + PyObject *loc = PyErr_ProgramTextObject(c->c_filename, c->u->u_lineno); + if (loc == NULL) { Py_INCREF(Py_None); loc = Py_None; } - u = Py_BuildValue("(OiiO)", c->c_filename, c->u->u_lineno, - c->u->u_col_offset + 1, loc); - if (!u) - goto exit; - v = Py_BuildValue("(zO)", errstr, u); - if (!v) + PyObject *args = Py_BuildValue("O(OiiOii)", msg, c->c_filename, + c->u->u_lineno, c->u->u_col_offset + 1, loc, + c->u->u_end_lineno, c->u->u_end_col_offset + 1); + Py_DECREF(msg); + if (args == NULL) { goto exit; - PyErr_SetObject(PyExc_SyntaxError, v); + } + PyErr_SetObject(PyExc_SyntaxError, args); exit: Py_DECREF(loc); - Py_XDECREF(u); - Py_XDECREF(v); + Py_XDECREF(args); return 0; } @@ -5426,6 +5642,812 @@ compiler_slice(struct compiler *c, expr_ty s) return 1; } + +// PEP 634: Structural Pattern Matching + +// To keep things simple, all compiler_pattern_* and pattern_helper_* routines +// follow the convention of consuming TOS (the subject for the given pattern) +// and calling jump_to_fail_pop on failure (no match). + +// When calling into these routines, it's important that pc->on_top be kept +// updated to reflect the current number of items that we are using on the top +// of the stack: they will be popped on failure, and any name captures will be +// stored *underneath* them on success. This lets us defer all names stores +// until the *entire* pattern matches. + +#define WILDCARD_CHECK(N) \ + ((N)->kind == MatchAs_kind && !(N)->v.MatchAs.name) + +#define WILDCARD_STAR_CHECK(N) \ + ((N)->kind == MatchStar_kind && !(N)->v.MatchStar.name) + +// Limit permitted subexpressions, even if the parser & AST validator let them through +#define MATCH_VALUE_EXPR(N) \ + ((N)->kind == Constant_kind || (N)->kind == Attribute_kind) + +// Allocate or resize pc->fail_pop to allow for n items to be popped on failure. +static int +ensure_fail_pop(struct compiler *c, pattern_context *pc, Py_ssize_t n) +{ + Py_ssize_t size = n + 1; + if (size <= pc->fail_pop_size) { + return 1; + } + Py_ssize_t needed = sizeof(basicblock*) * size; + basicblock **resized = PyObject_Realloc(pc->fail_pop, needed); + if (resized == NULL) { + PyErr_NoMemory(); + return 0; + } + pc->fail_pop = resized; + while (pc->fail_pop_size < size) { + basicblock *new_block; + RETURN_IF_FALSE(new_block = compiler_new_block(c)); + pc->fail_pop[pc->fail_pop_size++] = new_block; + } + return 1; +} + +// Use op to jump to the correct fail_pop block. +static int +jump_to_fail_pop(struct compiler *c, pattern_context *pc, int op) +{ + // Pop any items on the top of the stack, plus any objects we were going to + // capture on success: + Py_ssize_t pops = pc->on_top + PyList_GET_SIZE(pc->stores); + RETURN_IF_FALSE(ensure_fail_pop(c, pc, pops)); + ADDOP_JUMP(c, op, pc->fail_pop[pops]); + NEXT_BLOCK(c); + return 1; +} + +// Build all of the fail_pop blocks and reset fail_pop. +static int +emit_and_reset_fail_pop(struct compiler *c, pattern_context *pc) +{ + if (!pc->fail_pop_size) { + assert(pc->fail_pop == NULL); + NEXT_BLOCK(c); + return 1; + } + while (--pc->fail_pop_size) { + compiler_use_next_block(c, pc->fail_pop[pc->fail_pop_size]); + if (!compiler_addop(c, POP_TOP)) { + pc->fail_pop_size = 0; + PyObject_Free(pc->fail_pop); + pc->fail_pop = NULL; + return 0; + } + } + compiler_use_next_block(c, pc->fail_pop[0]); + PyObject_Free(pc->fail_pop); + pc->fail_pop = NULL; + return 1; +} + +static int +compiler_error_duplicate_store(struct compiler *c, identifier n) +{ + return compiler_error(c, "multiple assignments to name %R in pattern", n); +} + +static int +pattern_helper_store_name(struct compiler *c, identifier n, pattern_context *pc) +{ + if (n == NULL) { + ADDOP(c, POP_TOP); + return 1; + } + if (forbidden_name(c, n, Store)) { + return 0; + } + // Can't assign to the same name twice: + int duplicate = PySequence_Contains(pc->stores, n); + if (duplicate < 0) { + return 0; + } + if (duplicate) { + return compiler_error_duplicate_store(c, n); + } + // Rotate this object underneath any items we need to preserve: + ADDOP_I(c, ROT_N, pc->on_top + PyList_GET_SIZE(pc->stores) + 1); + return !PyList_Append(pc->stores, n); +} + + +static int +pattern_unpack_helper(struct compiler *c, asdl_pattern_seq *elts) +{ + Py_ssize_t n = asdl_seq_LEN(elts); + int seen_star = 0; + for (Py_ssize_t i = 0; i < n; i++) { + pattern_ty elt = asdl_seq_GET(elts, i); + if (elt->kind == MatchStar_kind && !seen_star) { + if ((i >= (1 << 8)) || + (n-i-1 >= (INT_MAX >> 8))) + return compiler_error(c, + "too many expressions in " + "star-unpacking sequence pattern"); + ADDOP_I(c, UNPACK_EX, (i + ((n-i-1) << 8))); + seen_star = 1; + } + else if (elt->kind == MatchStar_kind) { + return compiler_error(c, + "multiple starred expressions in sequence pattern"); + } + } + if (!seen_star) { + ADDOP_I(c, UNPACK_SEQUENCE, n); + } + return 1; +} + +static int +pattern_helper_sequence_unpack(struct compiler *c, asdl_pattern_seq *patterns, + Py_ssize_t star, pattern_context *pc) +{ + RETURN_IF_FALSE(pattern_unpack_helper(c, patterns)); + Py_ssize_t size = asdl_seq_LEN(patterns); + // We've now got a bunch of new subjects on the stack. They need to remain + // there after each subpattern match: + pc->on_top += size; + for (Py_ssize_t i = 0; i < size; i++) { + // One less item to keep track of each time we loop through: + pc->on_top--; + pattern_ty pattern = asdl_seq_GET(patterns, i); + RETURN_IF_FALSE(compiler_pattern_subpattern(c, pattern, pc)); + } + return 1; +} + +// Like pattern_helper_sequence_unpack, but uses BINARY_SUBSCR instead of +// UNPACK_SEQUENCE / UNPACK_EX. This is more efficient for patterns with a +// starred wildcard like [first, *_] / [first, *_, last] / [*_, last] / etc. +static int +pattern_helper_sequence_subscr(struct compiler *c, asdl_pattern_seq *patterns, + Py_ssize_t star, pattern_context *pc) +{ + // We need to keep the subject around for extracting elements: + pc->on_top++; + Py_ssize_t size = asdl_seq_LEN(patterns); + for (Py_ssize_t i = 0; i < size; i++) { + pattern_ty pattern = asdl_seq_GET(patterns, i); + if (WILDCARD_CHECK(pattern)) { + continue; + } + if (i == star) { + assert(WILDCARD_STAR_CHECK(pattern)); + continue; + } + ADDOP(c, DUP_TOP); + if (i < star) { + ADDOP_LOAD_CONST_NEW(c, PyLong_FromSsize_t(i)); + } + else { + // The subject may not support negative indexing! Compute a + // nonnegative index: + ADDOP(c, GET_LEN); + ADDOP_LOAD_CONST_NEW(c, PyLong_FromSsize_t(size - i)); + ADDOP(c, BINARY_SUBTRACT); + } + ADDOP(c, BINARY_SUBSCR); + RETURN_IF_FALSE(compiler_pattern_subpattern(c, pattern, pc)); + } + // Pop the subject, we're done with it: + pc->on_top--; + ADDOP(c, POP_TOP); + return 1; +} + +// Like compiler_pattern, but turn off checks for irrefutability. +static int +compiler_pattern_subpattern(struct compiler *c, pattern_ty p, pattern_context *pc) +{ + int allow_irrefutable = pc->allow_irrefutable; + pc->allow_irrefutable = 1; + RETURN_IF_FALSE(compiler_pattern(c, p, pc)); + pc->allow_irrefutable = allow_irrefutable; + return 1; +} + +static int +compiler_pattern_as(struct compiler *c, pattern_ty p, pattern_context *pc) +{ + assert(p->kind == MatchAs_kind); + if (p->v.MatchAs.pattern == NULL) { + // An irrefutable match: + if (!pc->allow_irrefutable) { + if (p->v.MatchAs.name) { + const char *e = "name capture %R makes remaining patterns unreachable"; + return compiler_error(c, e, p->v.MatchAs.name); + } + const char *e = "wildcard makes remaining patterns unreachable"; + return compiler_error(c, e); + } + return pattern_helper_store_name(c, p->v.MatchAs.name, pc); + } + // Need to make a copy for (possibly) storing later: + pc->on_top++; + ADDOP(c, DUP_TOP); + RETURN_IF_FALSE(compiler_pattern(c, p->v.MatchAs.pattern, pc)); + // Success! Store it: + pc->on_top--; + RETURN_IF_FALSE(pattern_helper_store_name(c, p->v.MatchAs.name, pc)); + return 1; +} + +static int +compiler_pattern_star(struct compiler *c, pattern_ty p, pattern_context *pc) +{ + assert(p->kind == MatchStar_kind); + RETURN_IF_FALSE(pattern_helper_store_name(c, p->v.MatchStar.name, pc)); + return 1; +} + +static int +validate_kwd_attrs(struct compiler *c, asdl_identifier_seq *attrs, asdl_pattern_seq* patterns) +{ + // Any errors will point to the pattern rather than the arg name as the + // parser is only supplying identifiers rather than Name or keyword nodes + Py_ssize_t nattrs = asdl_seq_LEN(attrs); + for (Py_ssize_t i = 0; i < nattrs; i++) { + identifier attr = ((identifier)asdl_seq_GET(attrs, i)); + SET_LOC(c, ((pattern_ty) asdl_seq_GET(patterns, i))); + if (forbidden_name(c, attr, Store)) { + return -1; + } + for (Py_ssize_t j = i + 1; j < nattrs; j++) { + identifier other = ((identifier)asdl_seq_GET(attrs, j)); + if (!PyUnicode_Compare(attr, other)) { + SET_LOC(c, ((pattern_ty) asdl_seq_GET(patterns, j))); + compiler_error(c, "attribute name repeated in class pattern: %U", attr); + return -1; + } + } + } + return 0; +} + +static int +compiler_pattern_class(struct compiler *c, pattern_ty p, pattern_context *pc) +{ + assert(p->kind == MatchClass_kind); + asdl_pattern_seq *patterns = p->v.MatchClass.patterns; + asdl_identifier_seq *kwd_attrs = p->v.MatchClass.kwd_attrs; + asdl_pattern_seq *kwd_patterns = p->v.MatchClass.kwd_patterns; + Py_ssize_t nargs = asdl_seq_LEN(patterns); + Py_ssize_t nattrs = asdl_seq_LEN(kwd_attrs); + Py_ssize_t nkwd_patterns = asdl_seq_LEN(kwd_patterns); + if (nattrs != nkwd_patterns) { + // AST validator shouldn't let this happen, but if it does, + // just fail, don't crash out of the interpreter + const char * e = "kwd_attrs (%d) / kwd_patterns (%d) length mismatch in class pattern"; + return compiler_error(c, e, nattrs, nkwd_patterns); + } + if (INT_MAX < nargs || INT_MAX < nargs + nattrs - 1) { + const char *e = "too many sub-patterns in class pattern %R"; + return compiler_error(c, e, p->v.MatchClass.cls); + } + if (nattrs) { + RETURN_IF_FALSE(!validate_kwd_attrs(c, kwd_attrs, kwd_patterns)); + SET_LOC(c, p); + } + VISIT(c, expr, p->v.MatchClass.cls); + PyObject *attr_names; + RETURN_IF_FALSE(attr_names = PyTuple_New(nattrs)); + Py_ssize_t i; + for (i = 0; i < nattrs; i++) { + PyObject *name = asdl_seq_GET(kwd_attrs, i); + Py_INCREF(name); + PyTuple_SET_ITEM(attr_names, i, name); + } + ADDOP_LOAD_CONST_NEW(c, attr_names); + ADDOP_I(c, MATCH_CLASS, nargs); + // TOS is now a tuple of (nargs + nattrs) attributes. Preserve it: + pc->on_top++; + RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE)); + for (i = 0; i < nargs + nattrs; i++) { + pattern_ty pattern; + if (i < nargs) { + // Positional: + pattern = asdl_seq_GET(patterns, i); + } + else { + // Keyword: + pattern = asdl_seq_GET(kwd_patterns, i - nargs); + } + if (WILDCARD_CHECK(pattern)) { + continue; + } + // Get the i-th attribute, and match it against the i-th pattern: + ADDOP(c, DUP_TOP); + ADDOP_LOAD_CONST_NEW(c, PyLong_FromSsize_t(i)); + ADDOP(c, BINARY_SUBSCR); + RETURN_IF_FALSE(compiler_pattern_subpattern(c, pattern, pc)); + } + // Success! Pop the tuple of attributes: + pc->on_top--; + ADDOP(c, POP_TOP); + return 1; +} + +static int +compiler_pattern_mapping(struct compiler *c, pattern_ty p, pattern_context *pc) +{ + assert(p->kind == MatchMapping_kind); + asdl_expr_seq *keys = p->v.MatchMapping.keys; + asdl_pattern_seq *patterns = p->v.MatchMapping.patterns; + Py_ssize_t size = asdl_seq_LEN(keys); + Py_ssize_t npatterns = asdl_seq_LEN(patterns); + if (size != npatterns) { + // AST validator shouldn't let this happen, but if it does, + // just fail, don't crash out of the interpreter + const char * e = "keys (%d) / patterns (%d) length mismatch in mapping pattern"; + return compiler_error(c, e, size, npatterns); + } + // We have a double-star target if "rest" is set + PyObject *star_target = p->v.MatchMapping.rest; + // We need to keep the subject on top during the mapping and length checks: + pc->on_top++; + ADDOP(c, MATCH_MAPPING); + RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE)); + if (!size && !star_target) { + // If the pattern is just "{}", we're done! Pop the subject: + pc->on_top--; + ADDOP(c, POP_TOP); + return 1; + } + if (size) { + // If the pattern has any keys in it, perform a length check: + ADDOP(c, GET_LEN); + ADDOP_LOAD_CONST_NEW(c, PyLong_FromSsize_t(size)); + ADDOP_COMPARE(c, GtE); + RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE)); + } + if (INT_MAX < size - 1) { + return compiler_error(c, "too many sub-patterns in mapping pattern"); + } + // Collect all of the keys into a tuple for MATCH_KEYS and + // COPY_DICT_WITHOUT_KEYS. They can either be dotted names or literals: + + // Maintaining a set of Constant_kind kind keys allows us to raise a + // SyntaxError in the case of duplicates. + PyObject *seen = PySet_New(NULL); + if (seen == NULL) { + return 0; + } + + // NOTE: goto error on failure in the loop below to avoid leaking `seen` + for (Py_ssize_t i = 0; i < size; i++) { + expr_ty key = asdl_seq_GET(keys, i); + if (key == NULL) { + const char *e = "can't use NULL keys in MatchMapping " + "(set 'rest' parameter instead)"; + SET_LOC(c, ((pattern_ty) asdl_seq_GET(patterns, i))); + compiler_error(c, e); + goto error; + } + + if (key->kind == Constant_kind) { + int in_seen = PySet_Contains(seen, key->v.Constant.value); + if (in_seen < 0) { + goto error; + } + if (in_seen) { + const char *e = "mapping pattern checks duplicate key (%R)"; + compiler_error(c, e, key->v.Constant.value); + goto error; + } + if (PySet_Add(seen, key->v.Constant.value)) { + goto error; + } + } + + else if (key->kind != Attribute_kind) { + const char *e = "mapping pattern keys may only match literals and attribute lookups"; + compiler_error(c, e); + goto error; + } + if (!compiler_visit_expr(c, key)) { + goto error; + } + } + + // all keys have been checked; there are no duplicates + Py_DECREF(seen); + + ADDOP_I(c, BUILD_TUPLE, size); + ADDOP(c, MATCH_KEYS); + // There's now a tuple of keys and a tuple of values on top of the subject: + pc->on_top += 2; + RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE)); + // So far so good. Use that tuple of values on the stack to match + // sub-patterns against: + for (Py_ssize_t i = 0; i < size; i++) { + pattern_ty pattern = asdl_seq_GET(patterns, i); + if (WILDCARD_CHECK(pattern)) { + continue; + } + ADDOP(c, DUP_TOP); + ADDOP_LOAD_CONST_NEW(c, PyLong_FromSsize_t(i)); + ADDOP(c, BINARY_SUBSCR); + RETURN_IF_FALSE(compiler_pattern_subpattern(c, pattern, pc)); + } + // If we get this far, it's a match! We're done with the tuple of values, + // and whatever happens next should consume the tuple of keys underneath it: + pc->on_top -= 2; + ADDOP(c, POP_TOP); + if (star_target) { + // If we have a starred name, bind a dict of remaining items to it: + ADDOP(c, COPY_DICT_WITHOUT_KEYS); + RETURN_IF_FALSE(pattern_helper_store_name(c, star_target, pc)); + } + else { + // Otherwise, we don't care about this tuple of keys anymore: + ADDOP(c, POP_TOP); + } + // Pop the subject: + pc->on_top--; + ADDOP(c, POP_TOP); + return 1; + +error: + Py_DECREF(seen); + return 0; +} + +static int +compiler_pattern_or(struct compiler *c, pattern_ty p, pattern_context *pc) +{ + assert(p->kind == MatchOr_kind); + basicblock *end; + RETURN_IF_FALSE(end = compiler_new_block(c)); + Py_ssize_t size = asdl_seq_LEN(p->v.MatchOr.patterns); + assert(size > 1); + // We're going to be messing with pc. Keep the original info handy: + pattern_context old_pc = *pc; + Py_INCREF(pc->stores); + // control is the list of names bound by the first alternative. It is used + // for checking different name bindings in alternatives, and for correcting + // the order in which extracted elements are placed on the stack. + PyObject *control = NULL; + // NOTE: We can't use returning macros anymore! goto error on error. + for (Py_ssize_t i = 0; i < size; i++) { + pattern_ty alt = asdl_seq_GET(p->v.MatchOr.patterns, i); + SET_LOC(c, alt); + PyObject *pc_stores = PyList_New(0); + if (pc_stores == NULL) { + goto error; + } + Py_SETREF(pc->stores, pc_stores); + // An irrefutable sub-pattern must be last, if it is allowed at all: + pc->allow_irrefutable = (i == size - 1) && old_pc.allow_irrefutable; + pc->fail_pop = NULL; + pc->fail_pop_size = 0; + pc->on_top = 0; + if (!compiler_addop(c, DUP_TOP) || !compiler_pattern(c, alt, pc)) { + goto error; + } + // Success! + Py_ssize_t nstores = PyList_GET_SIZE(pc->stores); + if (!i) { + // This is the first alternative, so save its stores as a "control" + // for the others (they can't bind a different set of names, and + // might need to be reordered): + assert(control == NULL); + control = pc->stores; + Py_INCREF(control); + } + else if (nstores != PyList_GET_SIZE(control)) { + goto diff; + } + else if (nstores) { + // There were captures. Check to see if we differ from control: + Py_ssize_t icontrol = nstores; + while (icontrol--) { + PyObject *name = PyList_GET_ITEM(control, icontrol); + Py_ssize_t istores = PySequence_Index(pc->stores, name); + if (istores < 0) { + PyErr_Clear(); + goto diff; + } + if (icontrol != istores) { + // Reorder the names on the stack to match the order of the + // names in control. There's probably a better way of doing + // this; the current solution is potentially very + // inefficient when each alternative subpattern binds lots + // of names in different orders. It's fine for reasonable + // cases, though. + assert(istores < icontrol); + Py_ssize_t rotations = istores + 1; + // Perform the same rotation on pc->stores: + PyObject *rotated = PyList_GetSlice(pc->stores, 0, + rotations); + if (rotated == NULL || + PyList_SetSlice(pc->stores, 0, rotations, NULL) || + PyList_SetSlice(pc->stores, icontrol - istores, + icontrol - istores, rotated)) + { + Py_XDECREF(rotated); + goto error; + } + Py_DECREF(rotated); + // That just did: + // rotated = pc_stores[:rotations] + // del pc_stores[:rotations] + // pc_stores[icontrol-istores:icontrol-istores] = rotated + // Do the same thing to the stack, using several ROT_Ns: + while (rotations--) { + if (!compiler_addop_i(c, ROT_N, icontrol + 1)) { + goto error; + } + } + } + } + } + assert(control); + if (!compiler_addop_j(c, JUMP_FORWARD, end) || + !compiler_next_block(c) || + !emit_and_reset_fail_pop(c, pc)) + { + goto error; + } + } + Py_DECREF(pc->stores); + *pc = old_pc; + Py_INCREF(pc->stores); + // Need to NULL this for the PyObject_Free call in the error block. + old_pc.fail_pop = NULL; + // No match. Pop the remaining copy of the subject and fail: + if (!compiler_addop(c, POP_TOP) || !jump_to_fail_pop(c, pc, JUMP_FORWARD)) { + goto error; + } + compiler_use_next_block(c, end); + Py_ssize_t nstores = PyList_GET_SIZE(control); + // There's a bunch of stuff on the stack between any where the new stores + // are and where they need to be: + // - The other stores. + // - A copy of the subject. + // - Anything else that may be on top of the stack. + // - Any previous stores we've already stashed away on the stack. + Py_ssize_t nrots = nstores + 1 + pc->on_top + PyList_GET_SIZE(pc->stores); + for (Py_ssize_t i = 0; i < nstores; i++) { + // Rotate this capture to its proper place on the stack: + if (!compiler_addop_i(c, ROT_N, nrots)) { + goto error; + } + // Update the list of previous stores with this new name, checking for + // duplicates: + PyObject *name = PyList_GET_ITEM(control, i); + int dupe = PySequence_Contains(pc->stores, name); + if (dupe < 0) { + goto error; + } + if (dupe) { + compiler_error_duplicate_store(c, name); + goto error; + } + if (PyList_Append(pc->stores, name)) { + goto error; + } + } + Py_DECREF(old_pc.stores); + Py_DECREF(control); + // NOTE: Returning macros are safe again. + // Pop the copy of the subject: + ADDOP(c, POP_TOP); + return 1; +diff: + compiler_error(c, "alternative patterns bind different names"); +error: + PyObject_Free(old_pc.fail_pop); + Py_DECREF(old_pc.stores); + Py_XDECREF(control); + return 0; +} + + +static int +compiler_pattern_sequence(struct compiler *c, pattern_ty p, pattern_context *pc) +{ + assert(p->kind == MatchSequence_kind); + asdl_pattern_seq *patterns = p->v.MatchSequence.patterns; + Py_ssize_t size = asdl_seq_LEN(patterns); + Py_ssize_t star = -1; + int only_wildcard = 1; + int star_wildcard = 0; + // Find a starred name, if it exists. There may be at most one: + for (Py_ssize_t i = 0; i < size; i++) { + pattern_ty pattern = asdl_seq_GET(patterns, i); + if (pattern->kind == MatchStar_kind) { + if (star >= 0) { + const char *e = "multiple starred names in sequence pattern"; + return compiler_error(c, e); + } + star_wildcard = WILDCARD_STAR_CHECK(pattern); + only_wildcard &= star_wildcard; + star = i; + continue; + } + only_wildcard &= WILDCARD_CHECK(pattern); + } + // We need to keep the subject on top during the sequence and length checks: + pc->on_top++; + ADDOP(c, MATCH_SEQUENCE); + RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE)); + if (star < 0) { + // No star: len(subject) == size + ADDOP(c, GET_LEN); + ADDOP_LOAD_CONST_NEW(c, PyLong_FromSsize_t(size)); + ADDOP_COMPARE(c, Eq); + RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE)); + } + else if (size > 1) { + // Star: len(subject) >= size - 1 + ADDOP(c, GET_LEN); + ADDOP_LOAD_CONST_NEW(c, PyLong_FromSsize_t(size - 1)); + ADDOP_COMPARE(c, GtE); + RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE)); + } + // Whatever comes next should consume the subject: + pc->on_top--; + if (only_wildcard) { + // Patterns like: [] / [_] / [_, _] / [*_] / [_, *_] / [_, _, *_] / etc. + ADDOP(c, POP_TOP); + } + else if (star_wildcard) { + RETURN_IF_FALSE(pattern_helper_sequence_subscr(c, patterns, star, pc)); + } + else { + RETURN_IF_FALSE(pattern_helper_sequence_unpack(c, patterns, star, pc)); + } + return 1; +} + +static int +compiler_pattern_value(struct compiler *c, pattern_ty p, pattern_context *pc) +{ + assert(p->kind == MatchValue_kind); + expr_ty value = p->v.MatchValue.value; + if (!MATCH_VALUE_EXPR(value)) { + const char *e = "patterns may only match literals and attribute lookups"; + return compiler_error(c, e); + } + VISIT(c, expr, value); + ADDOP_COMPARE(c, Eq); + RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE)); + return 1; +} + +static int +compiler_pattern_singleton(struct compiler *c, pattern_ty p, pattern_context *pc) +{ + assert(p->kind == MatchSingleton_kind); + ADDOP_LOAD_CONST(c, p->v.MatchSingleton.value); + ADDOP_COMPARE(c, Is); + RETURN_IF_FALSE(jump_to_fail_pop(c, pc, POP_JUMP_IF_FALSE)); + return 1; +} + +static int +compiler_pattern(struct compiler *c, pattern_ty p, pattern_context *pc) +{ + SET_LOC(c, p); + switch (p->kind) { + case MatchValue_kind: + return compiler_pattern_value(c, p, pc); + case MatchSingleton_kind: + return compiler_pattern_singleton(c, p, pc); + case MatchSequence_kind: + return compiler_pattern_sequence(c, p, pc); + case MatchMapping_kind: + return compiler_pattern_mapping(c, p, pc); + case MatchClass_kind: + return compiler_pattern_class(c, p, pc); + case MatchStar_kind: + return compiler_pattern_star(c, p, pc); + case MatchAs_kind: + return compiler_pattern_as(c, p, pc); + case MatchOr_kind: + return compiler_pattern_or(c, p, pc); + } + // AST validator shouldn't let this happen, but if it does, + // just fail, don't crash out of the interpreter + const char *e = "invalid match pattern node in AST (kind=%d)"; + return compiler_error(c, e, p->kind); +} + +static int +compiler_match_inner(struct compiler *c, stmt_ty s, pattern_context *pc) +{ + VISIT(c, expr, s->v.Match.subject); + basicblock *end; + RETURN_IF_FALSE(end = compiler_new_block(c)); + Py_ssize_t cases = asdl_seq_LEN(s->v.Match.cases); + assert(cases > 0); + match_case_ty m = asdl_seq_GET(s->v.Match.cases, cases - 1); + int has_default = WILDCARD_CHECK(m->pattern) && 1 < cases; + for (Py_ssize_t i = 0; i < cases - has_default; i++) { + m = asdl_seq_GET(s->v.Match.cases, i); + SET_LOC(c, m->pattern); + // Only copy the subject if we're *not* on the last case: + if (i != cases - has_default - 1) { + ADDOP(c, DUP_TOP); + } + RETURN_IF_FALSE(pc->stores = PyList_New(0)); + // Irrefutable cases must be either guarded, last, or both: + pc->allow_irrefutable = m->guard != NULL || i == cases - 1; + pc->fail_pop = NULL; + pc->fail_pop_size = 0; + pc->on_top = 0; + // NOTE: Can't use returning macros here (they'll leak pc->stores)! + if (!compiler_pattern(c, m->pattern, pc)) { + Py_DECREF(pc->stores); + return 0; + } + assert(!pc->on_top); + // It's a match! Store all of the captured names (they're on the stack). + Py_ssize_t nstores = PyList_GET_SIZE(pc->stores); + for (Py_ssize_t n = 0; n < nstores; n++) { + PyObject *name = PyList_GET_ITEM(pc->stores, n); + if (!compiler_nameop(c, name, Store)) { + Py_DECREF(pc->stores); + return 0; + } + } + Py_DECREF(pc->stores); + // NOTE: Returning macros are safe again. + if (m->guard) { + RETURN_IF_FALSE(ensure_fail_pop(c, pc, 0)); + RETURN_IF_FALSE(compiler_jump_if(c, m->guard, pc->fail_pop[0], 0)); + } + // Success! Pop the subject off, we're done with it: + if (i != cases - has_default - 1) { + ADDOP(c, POP_TOP); + } + VISIT_SEQ(c, stmt, m->body); + ADDOP_JUMP(c, JUMP_FORWARD, end); + // If the pattern fails to match, we want the line number of the + // cleanup to be associated with the failed pattern, not the last line + // of the body + SET_LOC(c, m->pattern); + RETURN_IF_FALSE(emit_and_reset_fail_pop(c, pc)); + } + if (has_default) { + // A trailing "case _" is common, and lets us save a bit of redundant + // pushing and popping in the loop above: + m = asdl_seq_GET(s->v.Match.cases, cases - 1); + SET_LOC(c, m->pattern); + if (cases == 1) { + // No matches. Done with the subject: + ADDOP(c, POP_TOP); + } + else { + // Show line coverage for default case (it doesn't create bytecode) + ADDOP(c, NOP); + } + if (m->guard) { + RETURN_IF_FALSE(compiler_jump_if(c, m->guard, end, 0)); + } + VISIT_SEQ(c, stmt, m->body); + } + compiler_use_next_block(c, end); + return 1; +} + +static int +compiler_match(struct compiler *c, stmt_ty s) +{ + pattern_context pc; + pc.fail_pop = NULL; + int result = compiler_match_inner(c, s, &pc); + PyObject_Free(pc.fail_pop); + return result; +} + +#undef WILDCARD_CHECK +#undef WILDCARD_STAR_CHECK + /* End of the compiler section, beginning of the assembler section */ /* do depth-first search of basic block graph, starting with block. @@ -5438,39 +6460,14 @@ struct assembler { PyObject *a_bytecode; /* string containing bytecode */ int a_offset; /* offset into bytecode */ int a_nblocks; /* number of reachable blocks */ - basicblock **a_postorder; /* list of blocks in dfs postorder */ PyObject *a_lnotab; /* string containing lnotab */ int a_lnotab_off; /* offset into lnotab */ - int a_lineno; /* last lineno of emitted instruction */ - int a_lineno_off; /* bytecode offset of last lineno */ + int a_prevlineno; /* lineno of last emitted line in line table */ + int a_lineno; /* lineno of last emitted instruction */ + int a_lineno_start; /* bytecode start offset of current lineno */ + basicblock *a_entry; }; -static void -dfs(struct compiler *c, basicblock *b, struct assembler *a, int end) -{ - int i, j; - - /* Get rid of recursion for normal control flow. - Since the number of blocks is limited, unused space in a_postorder - (from a_nblocks to end) can be used as a stack for still not ordered - blocks. */ - for (j = end; b && !b->b_seen; b = b->b_next) { - b->b_seen = 1; - assert(a->a_nblocks < j); - a->a_postorder[--j] = b; - } - while (j < end) { - b = a->a_postorder[j++]; - for (i = 0; i < b->b_iused; i++) { - struct instr *instr = &b->b_instr[i]; - if (instr->i_jrel || instr->i_jabs) - dfs(c, instr->i_target, a, j); - } - assert(a->a_nblocks < j); - a->a_postorder[a->a_nblocks++] = b; - } -} - Py_LOCAL_INLINE(void) stackdepth_push(basicblock ***sp, basicblock *b, int depth) { @@ -5496,8 +6493,7 @@ stackdepth(struct compiler *c) entryblock = b; nblocks++; } - if (!entryblock) - return 0; + assert(entryblock!= NULL); stack = (basicblock **)PyObject_Malloc(sizeof(basicblock *) * nblocks); if (!stack) { PyErr_NoMemory(); @@ -5505,7 +6501,11 @@ stackdepth(struct compiler *c) } sp = stack; - stackdepth_push(&sp, entryblock, 0); + if (c->u->u_ste->ste_generator || c->u->u_ste->ste_coroutine) { + stackdepth_push(&sp, entryblock, 1); + } else { + stackdepth_push(&sp, entryblock, 0); + } while (sp != stack) { b = *--sp; int depth = b->b_startdepth; @@ -5515,15 +6515,17 @@ stackdepth(struct compiler *c) struct instr *instr = &b->b_instr[i]; int effect = stack_effect(instr->i_opcode, instr->i_oparg, 0); if (effect == PY_INVALID_STACK_EFFECT) { - _Py_FatalErrorFormat(__func__, - "opcode = %d", instr->i_opcode); + PyErr_Format(PyExc_SystemError, + "compiler stack_effect(opcode=%d, arg=%i) failed", + instr->i_opcode, instr->i_oparg); + return -1; } int new_depth = depth + effect; if (new_depth > maxdepth) { maxdepth = new_depth; } assert(depth >= 0); /* invalid code or bug in stackdepth() */ - if (instr->i_jrel || instr->i_jabs) { + if (is_jump(instr)) { effect = stack_effect(instr->i_opcode, instr->i_oparg, 1); assert(effect != PY_INVALID_STACK_EFFECT); int target_depth = depth + effect; @@ -5546,6 +6548,7 @@ stackdepth(struct compiler *c) } } if (next != NULL) { + assert(b->b_nofallthrough == 0); stackdepth_push(&sp, next, depth); } } @@ -5557,24 +6560,25 @@ static int assemble_init(struct assembler *a, int nblocks, int firstlineno) { memset(a, 0, sizeof(struct assembler)); - a->a_lineno = firstlineno; + a->a_prevlineno = a->a_lineno = firstlineno; + a->a_lnotab = NULL; a->a_bytecode = PyBytes_FromStringAndSize(NULL, DEFAULT_CODE_SIZE); - if (!a->a_bytecode) - return 0; + if (a->a_bytecode == NULL) { + goto error; + } a->a_lnotab = PyBytes_FromStringAndSize(NULL, DEFAULT_LNOTAB_SIZE); - if (!a->a_lnotab) - return 0; - if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) { - PyErr_NoMemory(); - return 0; + if (a->a_lnotab == NULL) { + goto error; } - a->a_postorder = (basicblock **)PyObject_Malloc( - sizeof(basicblock *) * nblocks); - if (!a->a_postorder) { + if ((size_t)nblocks > SIZE_MAX / sizeof(basicblock *)) { PyErr_NoMemory(); - return 0; + goto error; } return 1; +error: + Py_XDECREF(a->a_bytecode); + Py_XDECREF(a->a_lnotab); + return 0; } static void @@ -5582,8 +6586,6 @@ assemble_free(struct assembler *a) { Py_XDECREF(a->a_bytecode); Py_XDECREF(a->a_lnotab); - if (a->a_postorder) - PyObject_Free(a->a_postorder); } static int @@ -5597,114 +6599,81 @@ blocksize(basicblock *b) return size; } -/* Appends a pair to the end of the line number table, a_lnotab, representing - the instruction's bytecode offset and line number. See - Objects/lnotab_notes.txt for the description of the line number table. */ - static int -assemble_lnotab(struct assembler *a, struct instr *i) +assemble_emit_linetable_pair(struct assembler *a, int bdelta, int ldelta) { - int d_bytecode, d_lineno; - Py_ssize_t len; - unsigned char *lnotab; - - d_lineno = i->i_lineno - a->a_lineno; - if (d_lineno == 0) { - return 1; + Py_ssize_t len = PyBytes_GET_SIZE(a->a_lnotab); + if (a->a_lnotab_off + 2 >= len) { + if (_PyBytes_Resize(&a->a_lnotab, len * 2) < 0) + return 0; } + unsigned char *lnotab = (unsigned char *) PyBytes_AS_STRING(a->a_lnotab); + lnotab += a->a_lnotab_off; + a->a_lnotab_off += 2; + *lnotab++ = bdelta; + *lnotab++ = ldelta; + return 1; +} - d_bytecode = (a->a_offset - a->a_lineno_off) * sizeof(_Py_CODEUNIT); - assert(d_bytecode >= 0); +/* Appends a range to the end of the line number table. See + * Objects/lnotab_notes.txt for the description of the line number table. */ - if (d_bytecode > 255) { - int j, nbytes, ncodes = d_bytecode / 255; - nbytes = a->a_lnotab_off + 2 * ncodes; - len = PyBytes_GET_SIZE(a->a_lnotab); - if (nbytes >= len) { - if ((len <= INT_MAX / 2) && (len * 2 < nbytes)) - len = nbytes; - else if (len <= INT_MAX / 2) - len *= 2; - else { - PyErr_NoMemory(); +static int +assemble_line_range(struct assembler *a) +{ + int ldelta, bdelta; + bdelta = (a->a_offset - a->a_lineno_start) * sizeof(_Py_CODEUNIT); + if (bdelta == 0) { + return 1; + } + if (a->a_lineno < 0) { + ldelta = -128; + } + else { + ldelta = a->a_lineno - a->a_prevlineno; + a->a_prevlineno = a->a_lineno; + while (ldelta > 127) { + if (!assemble_emit_linetable_pair(a, 0, 127)) { return 0; } - if (_PyBytes_Resize(&a->a_lnotab, len) < 0) - return 0; - } - lnotab = (unsigned char *) - PyBytes_AS_STRING(a->a_lnotab) + a->a_lnotab_off; - for (j = 0; j < ncodes; j++) { - *lnotab++ = 255; - *lnotab++ = 0; + ldelta -= 127; } - d_bytecode -= ncodes * 255; - a->a_lnotab_off += ncodes * 2; - } - assert(0 <= d_bytecode && d_bytecode <= 255); - - if (d_lineno < -128 || 127 < d_lineno) { - int j, nbytes, ncodes, k; - if (d_lineno < 0) { - k = -128; - /* use division on positive numbers */ - ncodes = (-d_lineno) / 128; - } - else { - k = 127; - ncodes = d_lineno / 127; - } - d_lineno -= ncodes * k; - assert(ncodes >= 1); - nbytes = a->a_lnotab_off + 2 * ncodes; - len = PyBytes_GET_SIZE(a->a_lnotab); - if (nbytes >= len) { - if ((len <= INT_MAX / 2) && len * 2 < nbytes) - len = nbytes; - else if (len <= INT_MAX / 2) - len *= 2; - else { - PyErr_NoMemory(); + while (ldelta < -127) { + if (!assemble_emit_linetable_pair(a, 0, -127)) { return 0; } - if (_PyBytes_Resize(&a->a_lnotab, len) < 0) - return 0; + ldelta += 127; } - lnotab = (unsigned char *) - PyBytes_AS_STRING(a->a_lnotab) + a->a_lnotab_off; - *lnotab++ = d_bytecode; - *lnotab++ = k; - d_bytecode = 0; - for (j = 1; j < ncodes; j++) { - *lnotab++ = 0; - *lnotab++ = k; - } - a->a_lnotab_off += ncodes * 2; } - assert(-128 <= d_lineno && d_lineno <= 127); - - len = PyBytes_GET_SIZE(a->a_lnotab); - if (a->a_lnotab_off + 2 >= len) { - if (_PyBytes_Resize(&a->a_lnotab, len * 2) < 0) + assert(-128 <= ldelta && ldelta < 128); + while (bdelta > 254) { + if (!assemble_emit_linetable_pair(a, 254, ldelta)) { return 0; + } + ldelta = a->a_lineno < 0 ? -128 : 0; + bdelta -= 254; + } + if (!assemble_emit_linetable_pair(a, bdelta, ldelta)) { + return 0; } - lnotab = (unsigned char *) - PyBytes_AS_STRING(a->a_lnotab) + a->a_lnotab_off; + a->a_lineno_start = a->a_offset; + return 1; +} - a->a_lnotab_off += 2; - if (d_bytecode) { - *lnotab++ = d_bytecode; - *lnotab++ = d_lineno; +static int +assemble_lnotab(struct assembler *a, struct instr *i) +{ + if (i->i_lineno == a->a_lineno) { + return 1; } - else { /* First line of a block; def stmt, etc. */ - *lnotab++ = 0; - *lnotab++ = d_lineno; + if (!assemble_line_range(a)) { + return 0; } a->a_lineno = i->i_lineno; - a->a_lineno_off = a->a_offset; return 1; } + /* assemble_emit() Extend the bytecode with a new instruction. Update lnotab if necessary. @@ -5734,6 +6703,31 @@ assemble_emit(struct assembler *a, struct instr *i) } static void +normalize_jumps(struct assembler *a) +{ + for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) { + b->b_visited = 0; + } + for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) { + b->b_visited = 1; + if (b->b_iused == 0) { + continue; + } + struct instr *last = &b->b_instr[b->b_iused-1]; + if (last->i_opcode == JUMP_ABSOLUTE) { + if (last->i_target->b_visited == 0) { + last->i_opcode = JUMP_FORWARD; + } + } + if (last->i_opcode == JUMP_FORWARD) { + if (last->i_target->b_visited == 1) { + last->i_opcode = JUMP_ABSOLUTE; + } + } + } +} + +static void assemble_jump_offsets(struct assembler *a, struct compiler *c) { basicblock *b; @@ -5744,8 +6738,7 @@ assemble_jump_offsets(struct assembler *a, struct compiler *c) Replace block pointer with position in bytecode. */ do { totsize = 0; - for (i = a->a_nblocks - 1; i >= 0; i--) { - b = a->a_postorder[i]; + for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) { bsize = blocksize(b); b->b_offset = totsize; totsize += bsize; @@ -5761,12 +6754,11 @@ assemble_jump_offsets(struct assembler *a, struct compiler *c) the jump instruction. */ bsize += isize; - if (instr->i_jabs || instr->i_jrel) { + if (is_jump(instr)) { instr->i_oparg = instr->i_target->b_offset; - if (instr->i_jrel) { + if (is_relative_jump(instr)) { instr->i_oparg -= bsize; } - instr->i_oparg *= sizeof(_Py_CODEUNIT); if (instrsize(instr->i_oparg) != isize) { extended_arg_recompile = 1; } @@ -5821,7 +6813,7 @@ consts_dict_keys_inorder(PyObject *dict) return NULL; while (PyDict_Next(dict, &pos, &k, &v)) { i = PyLong_AS_LONG(v); - /* The keys of the dictionary can be tuples wrapping a contant. + /* The keys of the dictionary can be tuples wrapping a constant. * (see compiler_add_o and _PyCode_ConstantKey). In that case * the object we want is always second. */ if (PyTuple_CheckExact(k)) { @@ -5868,14 +6860,12 @@ compute_code_flags(struct compiler *c) return flags; } -// Merge *tuple* with constant cache. +// Merge *obj* with constant cache. // Unlike merge_consts_recursive(), this function doesn't work recursively. static int -merge_const_tuple(struct compiler *c, PyObject **tuple) +merge_const_one(struct compiler *c, PyObject **obj) { - assert(PyTuple_CheckExact(*tuple)); - - PyObject *key = _PyCode_ConstantKey(*tuple); + PyObject *key = _PyCode_ConstantKey(*obj); if (key == NULL) { return 0; } @@ -5886,40 +6876,40 @@ merge_const_tuple(struct compiler *c, PyObject **tuple) if (t == NULL) { return 0; } - if (t == key) { // tuple is new constant. + if (t == key) { // obj is new constant. return 1; } - PyObject *u = PyTuple_GET_ITEM(t, 1); - Py_INCREF(u); - Py_DECREF(*tuple); - *tuple = u; + if (PyTuple_CheckExact(t)) { + // t is still borrowed reference + t = PyTuple_GET_ITEM(t, 1); + } + + Py_INCREF(t); + Py_DECREF(*obj); + *obj = t; return 1; } static PyCodeObject * -makecode(struct compiler *c, struct assembler *a) +makecode(struct compiler *c, struct assembler *a, PyObject *consts) { - PyObject *tmp; PyCodeObject *co = NULL; - PyObject *consts = NULL; PyObject *names = NULL; PyObject *varnames = NULL; PyObject *name = NULL; PyObject *freevars = NULL; PyObject *cellvars = NULL; - PyObject *bytecode = NULL; Py_ssize_t nlocals; int nlocals_int; int flags; int posorkeywordargcount, posonlyargcount, kwonlyargcount, maxdepth; - consts = consts_dict_keys_inorder(c->u->u_consts); names = dict_keys_inorder(c->u->u_names, 0); varnames = dict_keys_inorder(c->u->u_varnames, 0); - if (!consts || !names || !varnames) + if (!names || !varnames) { goto error; - + } cellvars = dict_keys_inorder(c->u->u_cellvars, 0); if (!cellvars) goto error; @@ -5927,10 +6917,10 @@ makecode(struct compiler *c, struct assembler *a) if (!freevars) goto error; - if (!merge_const_tuple(c, &names) || - !merge_const_tuple(c, &varnames) || - !merge_const_tuple(c, &cellvars) || - !merge_const_tuple(c, &freevars)) + if (!merge_const_one(c, &names) || + !merge_const_one(c, &varnames) || + !merge_const_one(c, &cellvars) || + !merge_const_one(c, &freevars)) { goto error; } @@ -5943,16 +6933,12 @@ makecode(struct compiler *c, struct assembler *a) if (flags < 0) goto error; - bytecode = PyCode_Optimize(a->a_bytecode, consts, names, a->a_lnotab); - if (!bytecode) - goto error; - - tmp = PyList_AsTuple(consts); /* PyCode_New requires a tuple */ - if (!tmp) + consts = PyList_AsTuple(consts); /* PyCode_New requires a tuple */ + if (consts == NULL) { goto error; - Py_DECREF(consts); - consts = tmp; - if (!merge_const_tuple(c, &consts)) { + } + if (!merge_const_one(c, &consts)) { + Py_DECREF(consts); goto error; } @@ -5961,21 +6947,28 @@ makecode(struct compiler *c, struct assembler *a) kwonlyargcount = Py_SAFE_DOWNCAST(c->u->u_kwonlyargcount, Py_ssize_t, int); maxdepth = stackdepth(c); if (maxdepth < 0) { + Py_DECREF(consts); + goto error; + } + if (maxdepth > MAX_ALLOWED_STACK_USE) { + PyErr_Format(PyExc_SystemError, + "excessive stack use: stack is %d deep", + maxdepth); + Py_DECREF(consts); goto error; } co = PyCode_NewWithPosOnlyArgs(posonlyargcount+posorkeywordargcount, posonlyargcount, kwonlyargcount, nlocals_int, - maxdepth, flags, bytecode, consts, names, + maxdepth, flags, a->a_bytecode, consts, names, varnames, freevars, cellvars, c->c_filename, c->u->u_name, c->u->u_firstlineno, a->a_lnotab); + Py_DECREF(consts); error: - Py_XDECREF(consts); Py_XDECREF(names); Py_XDECREF(varnames); Py_XDECREF(name); Py_XDECREF(freevars); Py_XDECREF(cellvars); - Py_XDECREF(bytecode); return co; } @@ -5983,10 +6976,11 @@ makecode(struct compiler *c, struct assembler *a) /* For debugging purposes only */ #if 0 static void -dump_instr(const struct instr *i) +dump_instr(struct instr *i) { - const char *jrel = i->i_jrel ? "jrel " : ""; - const char *jabs = i->i_jabs ? "jabs " : ""; + const char *jrel = (is_relative_jump(i)) ? "jrel " : ""; + const char *jabs = (is_jump(i) && !is_relative_jump(i))? "jabs " : ""; + char arg[128]; *arg = '\0'; @@ -6000,10 +6994,9 @@ dump_instr(const struct instr *i) static void dump_basicblock(const basicblock *b) { - const char *seen = b->b_seen ? "seen " : ""; const char *b_return = b->b_return ? "return " : ""; - fprintf(stderr, "used: %d, depth: %d, offset: %d %s%s\n", - b->b_iused, b->b_startdepth, b->b_offset, seen, b_return); + fprintf(stderr, "used: %d, depth: %d, offset: %d %s\n", + b->b_iused, b->b_startdepth, b->b_offset, b_return); if (b->b_instr) { int i; for (i = 0; i < b->b_iused; i++) { @@ -6014,69 +7007,910 @@ dump_basicblock(const basicblock *b) } #endif + +static int +normalize_basic_block(basicblock *bb); + +static int +optimize_cfg(struct compiler *c, struct assembler *a, PyObject *consts); + +static int +trim_unused_consts(struct compiler *c, struct assembler *a, PyObject *consts); + +/* Duplicates exit BBs, so that line numbers can be propagated to them */ +static int +duplicate_exits_without_lineno(struct compiler *c); + +static int +extend_block(basicblock *bb); + +static int +insert_generator_prefix(struct compiler *c, basicblock *entryblock) { + + int flags = compute_code_flags(c); + if (flags < 0) { + return -1; + } + int kind; + if (flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) { + if (flags & CO_COROUTINE) { + kind = 1; + } + else if (flags & CO_ASYNC_GENERATOR) { + kind = 2; + } + else { + kind = 0; + } + } + else { + return 0; + } + if (compiler_next_instr(entryblock) < 0) { + return -1; + } + for (int i = entryblock->b_iused-1; i > 0; i--) { + entryblock->b_instr[i] = entryblock->b_instr[i-1]; + } + entryblock->b_instr[0].i_opcode = GEN_START; + entryblock->b_instr[0].i_oparg = kind; + entryblock->b_instr[0].i_lineno = -1; + entryblock->b_instr[0].i_target = NULL; + return 0; +} + +/* Make sure that all returns have a line number, even if early passes + * have failed to propagate a correct line number. + * The resulting line number may not be correct according to PEP 626, + * but should be "good enough", and no worse than in older versions. */ +static void +guarantee_lineno_for_exits(struct assembler *a, int firstlineno) { + int lineno = firstlineno; + assert(lineno > 0); + for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) { + if (b->b_iused == 0) { + continue; + } + struct instr *last = &b->b_instr[b->b_iused-1]; + if (last->i_lineno < 0) { + if (last->i_opcode == RETURN_VALUE) { + for (int i = 0; i < b->b_iused; i++) { + assert(b->b_instr[i].i_lineno < 0); + + b->b_instr[i].i_lineno = lineno; + } + } + } + else { + lineno = last->i_lineno; + } + } +} + +static void +propagate_line_numbers(struct assembler *a); + static PyCodeObject * assemble(struct compiler *c, int addNone) { basicblock *b, *entryblock; struct assembler a; - int i, j, nblocks; + int j, nblocks; PyCodeObject *co = NULL; + PyObject *consts = NULL; /* Make sure every block that falls off the end returns None. XXX NEXT_BLOCK() isn't quite right, because if the last block ends with a jump or return b_next shouldn't set. */ if (!c->u->u_curblock->b_return) { - NEXT_BLOCK(c); + c->u->u_lineno = -1; if (addNone) ADDOP_LOAD_CONST(c, Py_None); ADDOP(c, RETURN_VALUE); } + for (basicblock *b = c->u->u_blocks; b != NULL; b = b->b_list) { + if (normalize_basic_block(b)) { + return NULL; + } + } + + for (basicblock *b = c->u->u_blocks; b != NULL; b = b->b_list) { + if (extend_block(b)) { + return NULL; + } + } + nblocks = 0; entryblock = NULL; for (b = c->u->u_blocks; b != NULL; b = b->b_list) { nblocks++; entryblock = b; } + assert(entryblock != NULL); + + if (insert_generator_prefix(c, entryblock)) { + goto error; + } /* Set firstlineno if it wasn't explicitly set. */ if (!c->u->u_firstlineno) { - if (entryblock && entryblock->b_instr && entryblock->b_instr->i_lineno) + if (entryblock->b_instr && entryblock->b_instr->i_lineno) c->u->u_firstlineno = entryblock->b_instr->i_lineno; - else + else c->u->u_firstlineno = 1; } + if (!assemble_init(&a, nblocks, c->u->u_firstlineno)) goto error; - dfs(c, entryblock, &a, nblocks); + a.a_entry = entryblock; + a.a_nblocks = nblocks; + + consts = consts_dict_keys_inorder(c->u->u_consts); + if (consts == NULL) { + goto error; + } + + if (optimize_cfg(c, &a, consts)) { + goto error; + } + if (duplicate_exits_without_lineno(c)) { + return NULL; + } + if (trim_unused_consts(c, &a, consts)) { + goto error; + } + propagate_line_numbers(&a); + guarantee_lineno_for_exits(&a, c->u->u_firstlineno); + + /* Order of basic blocks must have been determined by now */ + normalize_jumps(&a); /* Can't modify the bytecode after computing jump offsets. */ assemble_jump_offsets(&a, c); - /* Emit code in reverse postorder from dfs. */ - for (i = a.a_nblocks - 1; i >= 0; i--) { - b = a.a_postorder[i]; + /* Emit code. */ + for(b = entryblock; b != NULL; b = b->b_next) { for (j = 0; j < b->b_iused; j++) if (!assemble_emit(&a, &b->b_instr[j])) goto error; } + if (!assemble_line_range(&a)) { + return 0; + } - if (_PyBytes_Resize(&a.a_lnotab, a.a_lnotab_off) < 0) + if (_PyBytes_Resize(&a.a_lnotab, a.a_lnotab_off) < 0) { + goto error; + } + if (!merge_const_one(c, &a.a_lnotab)) { + goto error; + } + if (_PyBytes_Resize(&a.a_bytecode, a.a_offset * sizeof(_Py_CODEUNIT)) < 0) { goto error; - if (_PyBytes_Resize(&a.a_bytecode, a.a_offset * sizeof(_Py_CODEUNIT)) < 0) + } + if (!merge_const_one(c, &a.a_bytecode)) { goto error; + } - co = makecode(c, &a); + co = makecode(c, &a, consts); error: + Py_XDECREF(consts); assemble_free(&a); return co; } -#undef PyAST_Compile -PyCodeObject * -PyAST_Compile(mod_ty mod, const char *filename, PyCompilerFlags *flags, - PyArena *arena) +/* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n + with LOAD_CONST (c1, c2, ... cn). + The consts table must still be in list form so that the + new constant (c1, c2, ... cn) can be appended. + Called with codestr pointing to the first LOAD_CONST. +*/ +static int +fold_tuple_on_constants(struct compiler *c, + struct instr *inst, + int n, PyObject *consts) +{ + /* Pre-conditions */ + assert(PyList_CheckExact(consts)); + assert(inst[n].i_opcode == BUILD_TUPLE); + assert(inst[n].i_oparg == n); + + for (int i = 0; i < n; i++) { + if (inst[i].i_opcode != LOAD_CONST) { + return 0; + } + } + + /* Buildup new tuple of constants */ + PyObject *newconst = PyTuple_New(n); + if (newconst == NULL) { + return -1; + } + for (int i = 0; i < n; i++) { + int arg = inst[i].i_oparg; + PyObject *constant = PyList_GET_ITEM(consts, arg); + Py_INCREF(constant); + PyTuple_SET_ITEM(newconst, i, constant); + } + if (merge_const_one(c, &newconst) == 0) { + Py_DECREF(newconst); + return -1; + } + + Py_ssize_t index; + for (index = 0; index < PyList_GET_SIZE(consts); index++) { + if (PyList_GET_ITEM(consts, index) == newconst) { + break; + } + } + if (index == PyList_GET_SIZE(consts)) { + if ((size_t)index >= (size_t)INT_MAX - 1) { + Py_DECREF(newconst); + PyErr_SetString(PyExc_OverflowError, "too many constants"); + return -1; + } + if (PyList_Append(consts, newconst)) { + Py_DECREF(newconst); + return -1; + } + } + Py_DECREF(newconst); + for (int i = 0; i < n; i++) { + inst[i].i_opcode = NOP; + } + inst[n].i_opcode = LOAD_CONST; + inst[n].i_oparg = (int)index; + return 0; +} + + +// Eliminate n * ROT_N(n). +static void +fold_rotations(struct instr *inst, int n) +{ + for (int i = 0; i < n; i++) { + int rot; + switch (inst[i].i_opcode) { + case ROT_N: + rot = inst[i].i_oparg; + break; + case ROT_FOUR: + rot = 4; + break; + case ROT_THREE: + rot = 3; + break; + case ROT_TWO: + rot = 2; + break; + default: + return; + } + if (rot != n) { + return; + } + } + for (int i = 0; i < n; i++) { + inst[i].i_opcode = NOP; + } +} + +// Attempt to eliminate jumps to jumps by updating inst to jump to +// target->i_target using the provided opcode. Return whether or not the +// optimization was successful. +static bool +jump_thread(struct instr *inst, struct instr *target, int opcode) +{ + assert(is_jump(inst)); + assert(is_jump(target)); + // bpo-45773: If inst->i_target == target->i_target, then nothing actually + // changes (and we fall into an infinite loop): + if (inst->i_lineno == target->i_lineno && + inst->i_target != target->i_target) + { + inst->i_target = target->i_target; + inst->i_opcode = opcode; + return true; + } + return false; +} + +/* Maximum size of basic block that should be copied in optimizer */ +#define MAX_COPY_SIZE 4 + +/* Optimization */ +static int +optimize_basic_block(struct compiler *c, basicblock *bb, PyObject *consts) +{ + assert(PyList_CheckExact(consts)); + struct instr nop; + nop.i_opcode = NOP; + struct instr *target; + for (int i = 0; i < bb->b_iused; i++) { + struct instr *inst = &bb->b_instr[i]; + int oparg = inst->i_oparg; + int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0; + if (is_jump(inst)) { + /* Skip over empty basic blocks. */ + while (inst->i_target->b_iused == 0) { + inst->i_target = inst->i_target->b_next; + } + target = &inst->i_target->b_instr[0]; + } + else { + target = &nop; + } + switch (inst->i_opcode) { + /* Remove LOAD_CONST const; conditional jump */ + case LOAD_CONST: + { + PyObject* cnt; + int is_true; + int jump_if_true; + switch(nextop) { + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: + cnt = PyList_GET_ITEM(consts, oparg); + is_true = PyObject_IsTrue(cnt); + if (is_true == -1) { + goto error; + } + inst->i_opcode = NOP; + jump_if_true = nextop == POP_JUMP_IF_TRUE; + if (is_true == jump_if_true) { + bb->b_instr[i+1].i_opcode = JUMP_ABSOLUTE; + bb->b_nofallthrough = 1; + } + else { + bb->b_instr[i+1].i_opcode = NOP; + } + break; + case JUMP_IF_FALSE_OR_POP: + case JUMP_IF_TRUE_OR_POP: + cnt = PyList_GET_ITEM(consts, oparg); + is_true = PyObject_IsTrue(cnt); + if (is_true == -1) { + goto error; + } + jump_if_true = nextop == JUMP_IF_TRUE_OR_POP; + if (is_true == jump_if_true) { + bb->b_instr[i+1].i_opcode = JUMP_ABSOLUTE; + bb->b_nofallthrough = 1; + } + else { + inst->i_opcode = NOP; + bb->b_instr[i+1].i_opcode = NOP; + } + break; + } + break; + } + + /* Try to fold tuples of constants. + Skip over BUILD_SEQN 1 UNPACK_SEQN 1. + Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2. + Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2. */ + case BUILD_TUPLE: + if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) { + switch(oparg) { + case 1: + inst->i_opcode = NOP; + bb->b_instr[i+1].i_opcode = NOP; + break; + case 2: + inst->i_opcode = ROT_TWO; + bb->b_instr[i+1].i_opcode = NOP; + break; + case 3: + inst->i_opcode = ROT_THREE; + bb->b_instr[i+1].i_opcode = ROT_TWO; + } + break; + } + if (i >= oparg) { + if (fold_tuple_on_constants(c, inst-oparg, oparg, consts)) { + goto error; + } + } + break; + + /* Simplify conditional jump to conditional jump where the + result of the first test implies the success of a similar + test or the failure of the opposite test. + Arises in code like: + "a and b or c" + "(a and b) and c" + "(a or b) or c" + "(a or b) and c" + x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_FALSE_OR_POP z + --> x:JUMP_IF_FALSE_OR_POP z + x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_TRUE_OR_POP z + --> x:POP_JUMP_IF_FALSE y+1 + where y+1 is the instruction following the second test. + */ + case JUMP_IF_FALSE_OR_POP: + switch (target->i_opcode) { + case POP_JUMP_IF_FALSE: + i -= jump_thread(inst, target, POP_JUMP_IF_FALSE); + break; + case JUMP_ABSOLUTE: + case JUMP_FORWARD: + case JUMP_IF_FALSE_OR_POP: + i -= jump_thread(inst, target, JUMP_IF_FALSE_OR_POP); + break; + case JUMP_IF_TRUE_OR_POP: + case POP_JUMP_IF_TRUE: + if (inst->i_lineno == target->i_lineno) { + // We don't need to bother checking for loops here, + // since a block's b_next cannot point to itself: + assert(inst->i_target != inst->i_target->b_next); + inst->i_opcode = POP_JUMP_IF_FALSE; + inst->i_target = inst->i_target->b_next; + --i; + } + break; + } + break; + case JUMP_IF_TRUE_OR_POP: + switch (target->i_opcode) { + case POP_JUMP_IF_TRUE: + i -= jump_thread(inst, target, POP_JUMP_IF_TRUE); + break; + case JUMP_ABSOLUTE: + case JUMP_FORWARD: + case JUMP_IF_TRUE_OR_POP: + i -= jump_thread(inst, target, JUMP_IF_TRUE_OR_POP); + break; + case JUMP_IF_FALSE_OR_POP: + case POP_JUMP_IF_FALSE: + if (inst->i_lineno == target->i_lineno) { + // We don't need to bother checking for loops here, + // since a block's b_next cannot point to itself: + assert(inst->i_target != inst->i_target->b_next); + inst->i_opcode = POP_JUMP_IF_TRUE; + inst->i_target = inst->i_target->b_next; + --i; + } + break; + } + break; + case POP_JUMP_IF_FALSE: + switch (target->i_opcode) { + case JUMP_ABSOLUTE: + case JUMP_FORWARD: + i -= jump_thread(inst, target, POP_JUMP_IF_FALSE); + } + break; + case POP_JUMP_IF_TRUE: + switch (target->i_opcode) { + case JUMP_ABSOLUTE: + case JUMP_FORWARD: + i -= jump_thread(inst, target, POP_JUMP_IF_TRUE); + } + break; + case JUMP_ABSOLUTE: + case JUMP_FORWARD: + switch (target->i_opcode) { + case JUMP_ABSOLUTE: + case JUMP_FORWARD: + i -= jump_thread(inst, target, JUMP_ABSOLUTE); + } + break; + case FOR_ITER: + if (target->i_opcode == JUMP_FORWARD) { + i -= jump_thread(inst, target, FOR_ITER); + } + break; + case ROT_N: + switch (oparg) { + case 0: + case 1: + inst->i_opcode = NOP; + continue; + case 2: + inst->i_opcode = ROT_TWO; + break; + case 3: + inst->i_opcode = ROT_THREE; + break; + case 4: + inst->i_opcode = ROT_FOUR; + break; + } + if (i >= oparg - 1) { + fold_rotations(inst - oparg + 1, oparg); + } + break; + } + } + return 0; +error: + return -1; +} + +/* If this block ends with an unconditional jump to an exit block, + * then remove the jump and extend this block with the target. + */ +static int +extend_block(basicblock *bb) { + if (bb->b_iused == 0) { + return 0; + } + struct instr *last = &bb->b_instr[bb->b_iused-1]; + if (last->i_opcode != JUMP_ABSOLUTE && last->i_opcode != JUMP_FORWARD) { + return 0; + } + if (last->i_target->b_exit && last->i_target->b_iused <= MAX_COPY_SIZE) { + basicblock *to_copy = last->i_target; + last->i_opcode = NOP; + for (int i = 0; i < to_copy->b_iused; i++) { + int index = compiler_next_instr(bb); + if (index < 0) { + return -1; + } + bb->b_instr[index] = to_copy->b_instr[i]; + } + bb->b_exit = 1; + } + return 0; +} + +static void +clean_basic_block(basicblock *bb, int prev_lineno) { + /* Remove NOPs when legal to do so. */ + int dest = 0; + for (int src = 0; src < bb->b_iused; src++) { + int lineno = bb->b_instr[src].i_lineno; + if (bb->b_instr[src].i_opcode == NOP) { + /* Eliminate no-op if it doesn't have a line number */ + if (lineno < 0) { + continue; + } + /* or, if the previous instruction had the same line number. */ + if (prev_lineno == lineno) { + continue; + } + /* or, if the next instruction has same line number or no line number */ + if (src < bb->b_iused - 1) { + int next_lineno = bb->b_instr[src+1].i_lineno; + if (next_lineno < 0 || next_lineno == lineno) { + bb->b_instr[src+1].i_lineno = lineno; + continue; + } + } + else { + basicblock* next = bb->b_next; + while (next && next->b_iused == 0) { + next = next->b_next; + } + /* or if last instruction in BB and next BB has same line number */ + if (next) { + if (lineno == next->b_instr[0].i_lineno) { + continue; + } + } + } + + } + if (dest != src) { + bb->b_instr[dest] = bb->b_instr[src]; + } + dest++; + prev_lineno = lineno; + } + assert(dest <= bb->b_iused); + bb->b_iused = dest; +} + +static int +normalize_basic_block(basicblock *bb) { + /* Mark blocks as exit and/or nofallthrough. + Raise SystemError if CFG is malformed. */ + for (int i = 0; i < bb->b_iused; i++) { + switch(bb->b_instr[i].i_opcode) { + case RETURN_VALUE: + case RAISE_VARARGS: + case RERAISE: + bb->b_exit = 1; + bb->b_nofallthrough = 1; + break; + case JUMP_ABSOLUTE: + case JUMP_FORWARD: + bb->b_nofallthrough = 1; + /* fall through */ + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: + case JUMP_IF_FALSE_OR_POP: + case JUMP_IF_TRUE_OR_POP: + case FOR_ITER: + if (i != bb->b_iused-1) { + PyErr_SetString(PyExc_SystemError, "malformed control flow graph."); + return -1; + } + /* Skip over empty basic blocks. */ + while (bb->b_instr[i].i_target->b_iused == 0) { + bb->b_instr[i].i_target = bb->b_instr[i].i_target->b_next; + } + + } + } + return 0; +} + +static int +mark_reachable(struct assembler *a) { + basicblock **stack, **sp; + sp = stack = (basicblock **)PyObject_Malloc(sizeof(basicblock *) * a->a_nblocks); + if (stack == NULL) { + return -1; + } + a->a_entry->b_predecessors = 1; + *sp++ = a->a_entry; + while (sp > stack) { + basicblock *b = *(--sp); + if (b->b_next && !b->b_nofallthrough) { + if (b->b_next->b_predecessors == 0) { + *sp++ = b->b_next; + } + b->b_next->b_predecessors++; + } + for (int i = 0; i < b->b_iused; i++) { + basicblock *target; + if (is_jump(&b->b_instr[i])) { + target = b->b_instr[i].i_target; + if (target->b_predecessors == 0) { + *sp++ = target; + } + target->b_predecessors++; + } + } + } + PyObject_Free(stack); + return 0; +} + +static void +eliminate_empty_basic_blocks(basicblock *entry) { + /* Eliminate empty blocks */ + for (basicblock *b = entry; b != NULL; b = b->b_next) { + basicblock *next = b->b_next; + if (next) { + while (next->b_iused == 0 && next->b_next) { + next = next->b_next; + } + b->b_next = next; + } + } + for (basicblock *b = entry; b != NULL; b = b->b_next) { + if (b->b_iused == 0) { + continue; + } + if (is_jump(&b->b_instr[b->b_iused-1])) { + basicblock *target = b->b_instr[b->b_iused-1].i_target; + while (target->b_iused == 0) { + target = target->b_next; + } + b->b_instr[b->b_iused-1].i_target = target; + } + } +} + + +/* If an instruction has no line number, but it's predecessor in the BB does, + * then copy the line number. If a successor block has no line number, and only + * one predecessor, then inherit the line number. + * This ensures that all exit blocks (with one predecessor) receive a line number. + * Also reduces the size of the line number table, + * but has no impact on the generated line number events. + */ +static void +propagate_line_numbers(struct assembler *a) { + for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) { + if (b->b_iused == 0) { + continue; + } + int prev_lineno = -1; + for (int i = 0; i < b->b_iused; i++) { + if (b->b_instr[i].i_lineno < 0) { + b->b_instr[i].i_lineno = prev_lineno; + } + else { + prev_lineno = b->b_instr[i].i_lineno; + } + } + if (!b->b_nofallthrough && b->b_next->b_predecessors == 1) { + assert(b->b_next->b_iused); + if (b->b_next->b_instr[0].i_lineno < 0) { + b->b_next->b_instr[0].i_lineno = prev_lineno; + } + } + if (is_jump(&b->b_instr[b->b_iused-1])) { + switch (b->b_instr[b->b_iused-1].i_opcode) { + /* Note: Only actual jumps, not exception handlers */ + case SETUP_ASYNC_WITH: + case SETUP_WITH: + case SETUP_FINALLY: + continue; + } + basicblock *target = b->b_instr[b->b_iused-1].i_target; + if (target->b_predecessors == 1) { + if (target->b_instr[0].i_lineno < 0) { + target->b_instr[0].i_lineno = prev_lineno; + } + } + } + } +} + +/* Perform optimizations on a control flow graph. + The consts object should still be in list form to allow new constants + to be appended. + + All transformations keep the code size the same or smaller. + For those that reduce size, the gaps are initially filled with + NOPs. Later those NOPs are removed. +*/ + +static int +optimize_cfg(struct compiler *c, struct assembler *a, PyObject *consts) +{ + for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) { + if (optimize_basic_block(c, b, consts)) { + return -1; + } + clean_basic_block(b, -1); + assert(b->b_predecessors == 0); + } + for (basicblock *b = c->u->u_blocks; b != NULL; b = b->b_list) { + if (extend_block(b)) { + return -1; + } + } + if (mark_reachable(a)) { + return -1; + } + /* Delete unreachable instructions */ + for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) { + if (b->b_predecessors == 0) { + b->b_iused = 0; + b->b_nofallthrough = 0; + } + } + basicblock *pred = NULL; + for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) { + int prev_lineno = -1; + if (pred && pred->b_iused) { + prev_lineno = pred->b_instr[pred->b_iused-1].i_lineno; + } + clean_basic_block(b, prev_lineno); + pred = b->b_nofallthrough ? NULL : b; + } + eliminate_empty_basic_blocks(a->a_entry); + /* Delete jump instructions made redundant by previous step. If a non-empty + block ends with a jump instruction, check if the next non-empty block + reached through normal flow control is the target of that jump. If it + is, then the jump instruction is redundant and can be deleted. + */ + int maybe_empty_blocks = 0; + for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) { + if (b->b_iused > 0) { + struct instr *b_last_instr = &b->b_instr[b->b_iused - 1]; + if (b_last_instr->i_opcode == JUMP_ABSOLUTE || + b_last_instr->i_opcode == JUMP_FORWARD) { + if (b_last_instr->i_target == b->b_next) { + assert(b->b_next->b_iused); + b->b_nofallthrough = 0; + b_last_instr->i_opcode = NOP; + clean_basic_block(b, -1); + maybe_empty_blocks = 1; + } + } + } + } + if (maybe_empty_blocks) { + eliminate_empty_basic_blocks(a->a_entry); + } + return 0; +} + +// Remove trailing unused constants. +static int +trim_unused_consts(struct compiler *c, struct assembler *a, PyObject *consts) +{ + assert(PyList_CheckExact(consts)); + + // The first constant may be docstring; keep it always. + int max_const_index = 0; + for (basicblock *b = a->a_entry; b != NULL; b = b->b_next) { + for (int i = 0; i < b->b_iused; i++) { + if (b->b_instr[i].i_opcode == LOAD_CONST && + b->b_instr[i].i_oparg > max_const_index) { + max_const_index = b->b_instr[i].i_oparg; + } + } + } + if (max_const_index+1 < PyList_GET_SIZE(consts)) { + //fprintf(stderr, "removing trailing consts: max=%d, size=%d\n", + // max_const_index, (int)PyList_GET_SIZE(consts)); + if (PyList_SetSlice(consts, max_const_index+1, + PyList_GET_SIZE(consts), NULL) < 0) { + return 1; + } + } + return 0; +} + +static inline int +is_exit_without_lineno(basicblock *b) { + return b->b_exit && b->b_instr[0].i_lineno < 0; +} + +/* PEP 626 mandates that the f_lineno of a frame is correct + * after a frame terminates. It would be prohibitively expensive + * to continuously update the f_lineno field at runtime, + * so we make sure that all exiting instruction (raises and returns) + * have a valid line number, allowing us to compute f_lineno lazily. + * We can do this by duplicating the exit blocks without line number + * so that none have more than one predecessor. We can then safely + * copy the line number from the sole predecessor block. + */ +static int +duplicate_exits_without_lineno(struct compiler *c) +{ + /* Copy all exit blocks without line number that are targets of a jump. + */ + for (basicblock *b = c->u->u_blocks; b != NULL; b = b->b_list) { + if (b->b_iused > 0 && is_jump(&b->b_instr[b->b_iused-1])) { + switch (b->b_instr[b->b_iused-1].i_opcode) { + /* Note: Only actual jumps, not exception handlers */ + case SETUP_ASYNC_WITH: + case SETUP_WITH: + case SETUP_FINALLY: + continue; + } + basicblock *target = b->b_instr[b->b_iused-1].i_target; + if (is_exit_without_lineno(target) && target->b_predecessors > 1) { + basicblock *new_target = compiler_copy_block(c, target); + if (new_target == NULL) { + return -1; + } + new_target->b_instr[0].i_lineno = b->b_instr[b->b_iused-1].i_lineno; + b->b_instr[b->b_iused-1].i_target = new_target; + target->b_predecessors--; + new_target->b_predecessors = 1; + new_target->b_next = target->b_next; + target->b_next = new_target; + } + } + } + /* Eliminate empty blocks */ + for (basicblock *b = c->u->u_blocks; b != NULL; b = b->b_list) { + while (b->b_next && b->b_next->b_iused == 0) { + b->b_next = b->b_next->b_next; + } + } + /* Any remaining reachable exit blocks without line number can only be reached by + * fall through, and thus can only have a single predecessor */ + for (basicblock *b = c->u->u_blocks; b != NULL; b = b->b_list) { + if (!b->b_nofallthrough && b->b_next && b->b_iused > 0) { + if (is_exit_without_lineno(b->b_next)) { + assert(b->b_next->b_iused > 0); + b->b_next->b_instr[0].i_lineno = b->b_instr[b->b_iused-1].i_lineno; + } + } + } + return 0; +} + + +/* Retained for API compatibility. + * Optimization is now done in optimize_cfg */ + +PyObject * +PyCode_Optimize(PyObject *code, PyObject* Py_UNUSED(consts), + PyObject *Py_UNUSED(names), PyObject *Py_UNUSED(lnotab_obj)) { - return PyAST_CompileEx(mod, filename, flags, -1, arena); + Py_INCREF(code); + return code; } |