path: root/contrib/tools/python3/src/Python/ast.c
diff options
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/tools/python3/src/Python/ast.c
intermediate changes
Diffstat (limited to 'contrib/tools/python3/src/Python/ast.c')
1 files changed, 5858 insertions, 0 deletions
diff --git a/contrib/tools/python3/src/Python/ast.c b/contrib/tools/python3/src/Python/ast.c
new file mode 100644
index 0000000000..6dd7059263
--- /dev/null
+++ b/contrib/tools/python3/src/Python/ast.c
@@ -0,0 +1,5858 @@
+ * This file includes functions to transform a concrete syntax tree (CST) to
+ * an abstract syntax tree (AST). The main function is PyAST_FromNode().
+ *
+ */
+#include "Python.h"
+#include "Python-ast.h"
+#include "node.h"
+#include "ast.h"
+#include "token.h"
+#include "pythonrun.h"
+#include <assert.h>
+#include <stdbool.h>
+#define MAXLEVEL 200 /* Max parentheses level */
+static int validate_stmts(asdl_seq *);
+static int validate_exprs(asdl_seq *, expr_context_ty, int);
+static int validate_nonempty_seq(asdl_seq *, const char *, const char *);
+static int validate_stmt(stmt_ty);
+static int validate_expr(expr_ty, expr_context_ty);
+static int
+validate_name(PyObject *name)
+ assert(PyUnicode_Check(name));
+ static const char * const forbidden[] = {
+ "None",
+ "True",
+ "False",
+ };
+ for (int i = 0; forbidden[i] != NULL; i++) {
+ if (_PyUnicode_EqualToASCIIString(name, forbidden[i])) {
+ PyErr_Format(PyExc_ValueError, "Name node can't be used with '%s' constant", forbidden[i]);
+ return 0;
+ }
+ }
+ return 1;
+static int
+validate_comprehension(asdl_seq *gens)
+ Py_ssize_t i;
+ if (!asdl_seq_LEN(gens)) {
+ PyErr_SetString(PyExc_ValueError, "comprehension with no generators");
+ return 0;
+ }
+ for (i = 0; i < asdl_seq_LEN(gens); i++) {
+ comprehension_ty comp = asdl_seq_GET(gens, i);
+ if (!validate_expr(comp->target, Store) ||
+ !validate_expr(comp->iter, Load) ||
+ !validate_exprs(comp->ifs, Load, 0))
+ return 0;
+ }
+ return 1;
+static int
+validate_keywords(asdl_seq *keywords)
+ Py_ssize_t i;
+ for (i = 0; i < asdl_seq_LEN(keywords); i++)
+ if (!validate_expr(((keyword_ty)asdl_seq_GET(keywords, i))->value, Load))
+ return 0;
+ return 1;
+static int
+validate_args(asdl_seq *args)
+ Py_ssize_t i;
+ for (i = 0; i < asdl_seq_LEN(args); i++) {
+ arg_ty arg = asdl_seq_GET(args, i);
+ if (arg->annotation && !validate_expr(arg->annotation, Load))
+ return 0;
+ }
+ return 1;
+static const char *
+expr_context_name(expr_context_ty ctx)
+ switch (ctx) {
+ case Load:
+ return "Load";
+ case Store:
+ return "Store";
+ case Del:
+ return "Del";
+ default:
+ }
+static int
+validate_arguments(arguments_ty args)
+ if (!validate_args(args->posonlyargs) || !validate_args(args->args)) {
+ return 0;
+ }
+ if (args->vararg && args->vararg->annotation
+ && !validate_expr(args->vararg->annotation, Load)) {
+ return 0;
+ }
+ if (!validate_args(args->kwonlyargs))
+ return 0;
+ if (args->kwarg && args->kwarg->annotation
+ && !validate_expr(args->kwarg->annotation, Load)) {
+ return 0;
+ }
+ if (asdl_seq_LEN(args->defaults) > asdl_seq_LEN(args->posonlyargs) + asdl_seq_LEN(args->args)) {
+ PyErr_SetString(PyExc_ValueError, "more positional defaults than args on arguments");
+ return 0;
+ }
+ if (asdl_seq_LEN(args->kw_defaults) != asdl_seq_LEN(args->kwonlyargs)) {
+ PyErr_SetString(PyExc_ValueError, "length of kwonlyargs is not the same as "
+ "kw_defaults on arguments");
+ return 0;
+ }
+ return validate_exprs(args->defaults, Load, 0) && validate_exprs(args->kw_defaults, Load, 1);
+static int
+validate_constant(PyObject *value)
+ if (value == Py_None || value == Py_Ellipsis)
+ return 1;
+ if (PyLong_CheckExact(value)
+ || PyFloat_CheckExact(value)
+ || PyComplex_CheckExact(value)
+ || PyBool_Check(value)
+ || PyUnicode_CheckExact(value)
+ || PyBytes_CheckExact(value))
+ return 1;
+ if (PyTuple_CheckExact(value) || PyFrozenSet_CheckExact(value)) {
+ PyObject *it;
+ it = PyObject_GetIter(value);
+ if (it == NULL)
+ return 0;
+ while (1) {
+ PyObject *item = PyIter_Next(it);
+ if (item == NULL) {
+ if (PyErr_Occurred()) {
+ Py_DECREF(it);
+ return 0;
+ }
+ break;
+ }
+ if (!validate_constant(item)) {
+ Py_DECREF(it);
+ Py_DECREF(item);
+ return 0;
+ }
+ Py_DECREF(item);
+ }
+ Py_DECREF(it);
+ return 1;
+ }
+ if (!PyErr_Occurred()) {
+ PyErr_Format(PyExc_TypeError,
+ "got an invalid type in Constant: %s",
+ _PyType_Name(Py_TYPE(value)));
+ }
+ return 0;
+static int
+validate_expr(expr_ty exp, expr_context_ty ctx)
+ int check_ctx = 1;
+ expr_context_ty actual_ctx;
+ /* First check expression context. */
+ switch (exp->kind) {
+ case Attribute_kind:
+ actual_ctx = exp->v.Attribute.ctx;
+ break;
+ case Subscript_kind:
+ actual_ctx = exp->v.Subscript.ctx;
+ break;
+ case Starred_kind:
+ actual_ctx = exp->v.Starred.ctx;
+ break;
+ case Name_kind:
+ if (!validate_name(exp->v.Name.id)) {
+ return 0;
+ }
+ actual_ctx = exp->v.Name.ctx;
+ break;
+ case List_kind:
+ actual_ctx = exp->v.List.ctx;
+ break;
+ case Tuple_kind:
+ actual_ctx = exp->v.Tuple.ctx;
+ break;
+ default:
+ if (ctx != Load) {
+ PyErr_Format(PyExc_ValueError, "expression which can't be "
+ "assigned to in %s context", expr_context_name(ctx));
+ return 0;
+ }
+ check_ctx = 0;
+ /* set actual_ctx to prevent gcc warning */
+ actual_ctx = 0;
+ }
+ if (check_ctx && actual_ctx != ctx) {
+ PyErr_Format(PyExc_ValueError, "expression must have %s context but has %s instead",
+ expr_context_name(ctx), expr_context_name(actual_ctx));
+ return 0;
+ }
+ /* Now validate expression. */
+ switch (exp->kind) {
+ case BoolOp_kind:
+ if (asdl_seq_LEN(exp->v.BoolOp.values) < 2) {
+ PyErr_SetString(PyExc_ValueError, "BoolOp with less than 2 values");
+ return 0;
+ }
+ return validate_exprs(exp->v.BoolOp.values, Load, 0);
+ case BinOp_kind:
+ return validate_expr(exp->v.BinOp.left, Load) &&
+ validate_expr(exp->v.BinOp.right, Load);
+ case UnaryOp_kind:
+ return validate_expr(exp->v.UnaryOp.operand, Load);
+ case Lambda_kind:
+ return validate_arguments(exp->v.Lambda.args) &&
+ validate_expr(exp->v.Lambda.body, Load);
+ case IfExp_kind:
+ return validate_expr(exp->v.IfExp.test, Load) &&
+ validate_expr(exp->v.IfExp.body, Load) &&
+ validate_expr(exp->v.IfExp.orelse, Load);
+ case Dict_kind:
+ if (asdl_seq_LEN(exp->v.Dict.keys) != asdl_seq_LEN(exp->v.Dict.values)) {
+ PyErr_SetString(PyExc_ValueError,
+ "Dict doesn't have the same number of keys as values");
+ return 0;
+ }
+ /* null_ok=1 for keys expressions to allow dict unpacking to work in
+ dict literals, i.e. ``{**{a:b}}`` */
+ return validate_exprs(exp->v.Dict.keys, Load, /*null_ok=*/ 1) &&
+ validate_exprs(exp->v.Dict.values, Load, /*null_ok=*/ 0);
+ case Set_kind:
+ return validate_exprs(exp->v.Set.elts, Load, 0);
+#define COMP(NAME) \
+ case NAME ## _kind: \
+ return validate_comprehension(exp->v.NAME.generators) && \
+ validate_expr(exp->v.NAME.elt, Load);
+ COMP(ListComp)
+ COMP(SetComp)
+ COMP(GeneratorExp)
+#undef COMP
+ case DictComp_kind:
+ return validate_comprehension(exp->v.DictComp.generators) &&
+ validate_expr(exp->v.DictComp.key, Load) &&
+ validate_expr(exp->v.DictComp.value, Load);
+ case Yield_kind:
+ return !exp->v.Yield.value || validate_expr(exp->v.Yield.value, Load);
+ case YieldFrom_kind:
+ return validate_expr(exp->v.YieldFrom.value, Load);
+ case Await_kind:
+ return validate_expr(exp->v.Await.value, Load);
+ case Compare_kind:
+ if (!asdl_seq_LEN(exp->v.Compare.comparators)) {
+ PyErr_SetString(PyExc_ValueError, "Compare with no comparators");
+ return 0;
+ }
+ if (asdl_seq_LEN(exp->v.Compare.comparators) !=
+ asdl_seq_LEN(exp->v.Compare.ops)) {
+ PyErr_SetString(PyExc_ValueError, "Compare has a different number "
+ "of comparators and operands");
+ return 0;
+ }
+ return validate_exprs(exp->v.Compare.comparators, Load, 0) &&
+ validate_expr(exp->v.Compare.left, Load);
+ case Call_kind:
+ return validate_expr(exp->v.Call.func, Load) &&
+ validate_exprs(exp->v.Call.args, Load, 0) &&
+ validate_keywords(exp->v.Call.keywords);
+ case Constant_kind:
+ if (!validate_constant(exp->v.Constant.value)) {
+ return 0;
+ }
+ return 1;
+ case JoinedStr_kind:
+ return validate_exprs(exp->v.JoinedStr.values, Load, 0);
+ case FormattedValue_kind:
+ if (validate_expr(exp->v.FormattedValue.value, Load) == 0)
+ return 0;
+ if (exp->v.FormattedValue.format_spec)
+ return validate_expr(exp->v.FormattedValue.format_spec, Load);
+ return 1;
+ case Attribute_kind:
+ return validate_expr(exp->v.Attribute.value, Load);
+ case Subscript_kind:
+ return validate_expr(exp->v.Subscript.slice, Load) &&
+ validate_expr(exp->v.Subscript.value, Load);
+ case Starred_kind:
+ return validate_expr(exp->v.Starred.value, ctx);
+ case Slice_kind:
+ return (!exp->v.Slice.lower || validate_expr(exp->v.Slice.lower, Load)) &&
+ (!exp->v.Slice.upper || validate_expr(exp->v.Slice.upper, Load)) &&
+ (!exp->v.Slice.step || validate_expr(exp->v.Slice.step, Load));
+ case List_kind:
+ return validate_exprs(exp->v.List.elts, ctx, 0);
+ case Tuple_kind:
+ return validate_exprs(exp->v.Tuple.elts, ctx, 0);
+ case NamedExpr_kind:
+ return validate_expr(exp->v.NamedExpr.value, Load);
+ /* This last case doesn't have any checking. */
+ case Name_kind:
+ return 1;
+ }
+ PyErr_SetString(PyExc_SystemError, "unexpected expression");
+ return 0;
+static int
+validate_nonempty_seq(asdl_seq *seq, const char *what, const char *owner)
+ if (asdl_seq_LEN(seq))
+ return 1;
+ PyErr_Format(PyExc_ValueError, "empty %s on %s", what, owner);
+ return 0;
+static int
+validate_assignlist(asdl_seq *targets, expr_context_ty ctx)
+ return validate_nonempty_seq(targets, "targets", ctx == Del ? "Delete" : "Assign") &&
+ validate_exprs(targets, ctx, 0);
+static int
+validate_body(asdl_seq *body, const char *owner)
+ return validate_nonempty_seq(body, "body", owner) && validate_stmts(body);
+static int
+validate_stmt(stmt_ty stmt)
+ Py_ssize_t i;
+ switch (stmt->kind) {
+ case FunctionDef_kind:
+ return validate_body(stmt->v.FunctionDef.body, "FunctionDef") &&
+ validate_arguments(stmt->v.FunctionDef.args) &&
+ validate_exprs(stmt->v.FunctionDef.decorator_list, Load, 0) &&
+ (!stmt->v.FunctionDef.returns ||
+ validate_expr(stmt->v.FunctionDef.returns, Load));
+ case ClassDef_kind:
+ return validate_body(stmt->v.ClassDef.body, "ClassDef") &&
+ validate_exprs(stmt->v.ClassDef.bases, Load, 0) &&
+ validate_keywords(stmt->v.ClassDef.keywords) &&
+ validate_exprs(stmt->v.ClassDef.decorator_list, Load, 0);
+ case Return_kind:
+ return !stmt->v.Return.value || validate_expr(stmt->v.Return.value, Load);
+ case Delete_kind:
+ return validate_assignlist(stmt->v.Delete.targets, Del);
+ case Assign_kind:
+ return validate_assignlist(stmt->v.Assign.targets, Store) &&
+ validate_expr(stmt->v.Assign.value, Load);
+ case AugAssign_kind:
+ return validate_expr(stmt->v.AugAssign.target, Store) &&
+ validate_expr(stmt->v.AugAssign.value, Load);
+ case AnnAssign_kind:
+ if (stmt->v.AnnAssign.target->kind != Name_kind &&
+ stmt->v.AnnAssign.simple) {
+ PyErr_SetString(PyExc_TypeError,
+ "AnnAssign with simple non-Name target");
+ return 0;
+ }
+ return validate_expr(stmt->v.AnnAssign.target, Store) &&
+ (!stmt->v.AnnAssign.value ||
+ validate_expr(stmt->v.AnnAssign.value, Load)) &&
+ validate_expr(stmt->v.AnnAssign.annotation, Load);
+ case For_kind:
+ return validate_expr(stmt->v.For.target, Store) &&
+ validate_expr(stmt->v.For.iter, Load) &&
+ validate_body(stmt->v.For.body, "For") &&
+ validate_stmts(stmt->v.For.orelse);
+ case AsyncFor_kind:
+ return validate_expr(stmt->v.AsyncFor.target, Store) &&
+ validate_expr(stmt->v.AsyncFor.iter, Load) &&
+ validate_body(stmt->v.AsyncFor.body, "AsyncFor") &&
+ validate_stmts(stmt->v.AsyncFor.orelse);
+ case While_kind:
+ return validate_expr(stmt->v.While.test, Load) &&
+ validate_body(stmt->v.While.body, "While") &&
+ validate_stmts(stmt->v.While.orelse);
+ case If_kind:
+ return validate_expr(stmt->v.If.test, Load) &&
+ validate_body(stmt->v.If.body, "If") &&
+ validate_stmts(stmt->v.If.orelse);
+ case With_kind:
+ if (!validate_nonempty_seq(stmt->v.With.items, "items", "With"))
+ return 0;
+ for (i = 0; i < asdl_seq_LEN(stmt->v.With.items); i++) {
+ withitem_ty item = asdl_seq_GET(stmt->v.With.items, i);
+ if (!validate_expr(item->context_expr, Load) ||
+ (item->optional_vars && !validate_expr(item->optional_vars, Store)))
+ return 0;
+ }
+ return validate_body(stmt->v.With.body, "With");
+ case AsyncWith_kind:
+ if (!validate_nonempty_seq(stmt->v.AsyncWith.items, "items", "AsyncWith"))
+ return 0;
+ for (i = 0; i < asdl_seq_LEN(stmt->v.AsyncWith.items); i++) {
+ withitem_ty item = asdl_seq_GET(stmt->v.AsyncWith.items, i);
+ if (!validate_expr(item->context_expr, Load) ||
+ (item->optional_vars && !validate_expr(item->optional_vars, Store)))
+ return 0;
+ }
+ return validate_body(stmt->v.AsyncWith.body, "AsyncWith");
+ case Raise_kind:
+ if (stmt->v.Raise.exc) {
+ return validate_expr(stmt->v.Raise.exc, Load) &&
+ (!stmt->v.Raise.cause || validate_expr(stmt->v.Raise.cause, Load));
+ }
+ if (stmt->v.Raise.cause) {
+ PyErr_SetString(PyExc_ValueError, "Raise with cause but no exception");
+ return 0;
+ }
+ return 1;
+ case Try_kind:
+ if (!validate_body(stmt->v.Try.body, "Try"))
+ return 0;
+ if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
+ !asdl_seq_LEN(stmt->v.Try.finalbody)) {
+ PyErr_SetString(PyExc_ValueError, "Try has neither except handlers nor finalbody");
+ return 0;
+ }
+ if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
+ asdl_seq_LEN(stmt->v.Try.orelse)) {
+ PyErr_SetString(PyExc_ValueError, "Try has orelse but no except handlers");
+ return 0;
+ }
+ for (i = 0; i < asdl_seq_LEN(stmt->v.Try.handlers); i++) {
+ excepthandler_ty handler = asdl_seq_GET(stmt->v.Try.handlers, i);
+ if ((handler->v.ExceptHandler.type &&
+ !validate_expr(handler->v.ExceptHandler.type, Load)) ||
+ !validate_body(handler->v.ExceptHandler.body, "ExceptHandler"))
+ return 0;
+ }
+ return (!asdl_seq_LEN(stmt->v.Try.finalbody) ||
+ validate_stmts(stmt->v.Try.finalbody)) &&
+ (!asdl_seq_LEN(stmt->v.Try.orelse) ||
+ validate_stmts(stmt->v.Try.orelse));
+ case Assert_kind:
+ return validate_expr(stmt->v.Assert.test, Load) &&
+ (!stmt->v.Assert.msg || validate_expr(stmt->v.Assert.msg, Load));
+ case Import_kind:
+ return validate_nonempty_seq(stmt->v.Import.names, "names", "Import");
+ case ImportFrom_kind:
+ if (stmt->v.ImportFrom.level < 0) {
+ PyErr_SetString(PyExc_ValueError, "Negative ImportFrom level");
+ return 0;
+ }
+ return validate_nonempty_seq(stmt->v.ImportFrom.names, "names", "ImportFrom");
+ case Global_kind:
+ return validate_nonempty_seq(stmt->v.Global.names, "names", "Global");
+ case Nonlocal_kind:
+ return validate_nonempty_seq(stmt->v.Nonlocal.names, "names", "Nonlocal");
+ case Expr_kind:
+ return validate_expr(stmt->v.Expr.value, Load);
+ case AsyncFunctionDef_kind:
+ return validate_body(stmt->v.AsyncFunctionDef.body, "AsyncFunctionDef") &&
+ validate_arguments(stmt->v.AsyncFunctionDef.args) &&
+ validate_exprs(stmt->v.AsyncFunctionDef.decorator_list, Load, 0) &&
+ (!stmt->v.AsyncFunctionDef.returns ||
+ validate_expr(stmt->v.AsyncFunctionDef.returns, Load));
+ case Pass_kind:
+ case Break_kind:
+ case Continue_kind:
+ return 1;
+ default:
+ PyErr_SetString(PyExc_SystemError, "unexpected statement");
+ return 0;
+ }
+static int
+validate_stmts(asdl_seq *seq)
+ Py_ssize_t i;
+ for (i = 0; i < asdl_seq_LEN(seq); i++) {
+ stmt_ty stmt = asdl_seq_GET(seq, i);
+ if (stmt) {
+ if (!validate_stmt(stmt))
+ return 0;
+ }
+ else {
+ PyErr_SetString(PyExc_ValueError,
+ "None disallowed in statement list");
+ return 0;
+ }
+ }
+ return 1;
+static int
+validate_exprs(asdl_seq *exprs, expr_context_ty ctx, int null_ok)
+ Py_ssize_t i;
+ for (i = 0; i < asdl_seq_LEN(exprs); i++) {
+ expr_ty expr = asdl_seq_GET(exprs, i);
+ if (expr) {
+ if (!validate_expr(expr, ctx))
+ return 0;
+ }
+ else if (!null_ok) {
+ PyErr_SetString(PyExc_ValueError,
+ "None disallowed in expression list");
+ return 0;
+ }
+ }
+ return 1;
+PyAST_Validate(mod_ty mod)
+ int res = 0;
+ switch (mod->kind) {
+ case Module_kind:
+ res = validate_stmts(mod->v.Module.body);
+ break;
+ case Interactive_kind:
+ res = validate_stmts(mod->v.Interactive.body);
+ break;
+ case Expression_kind:
+ res = validate_expr(mod->v.Expression.body, Load);
+ break;
+ default:
+ PyErr_SetString(PyExc_SystemError, "impossible module node");
+ res = 0;
+ break;
+ }
+ return res;
+/* This is done here, so defines like "test" don't interfere with AST use above. */
+#include "grammar.h"
+#include "parsetok.h"
+#include "graminit.h"
+/* Data structure used internally */
+struct compiling {
+ PyArena *c_arena; /* Arena for allocating memory. */
+ PyObject *c_filename; /* filename */
+ PyObject *c_normalize; /* Normalization function from unicodedata. */
+ int c_feature_version; /* Latest minor version of Python for allowed features */
+static asdl_seq *seq_for_testlist(struct compiling *, const node *);
+static expr_ty ast_for_expr(struct compiling *, const node *);
+static stmt_ty ast_for_stmt(struct compiling *, const node *);
+static asdl_seq *ast_for_suite(struct compiling *c, const node *n);
+static asdl_seq *ast_for_exprlist(struct compiling *, const node *,
+ expr_context_ty);
+static expr_ty ast_for_testlist(struct compiling *, const node *);
+static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *);
+static stmt_ty ast_for_with_stmt(struct compiling *, const node *, bool);
+static stmt_ty ast_for_for_stmt(struct compiling *, const node *, bool);
+/* Note different signature for ast_for_call */
+static expr_ty ast_for_call(struct compiling *, const node *, expr_ty,
+ const node *, const node *, const node *);
+static PyObject *parsenumber(struct compiling *, const char *);
+static expr_ty parsestrplus(struct compiling *, const node *n);
+static void get_last_end_pos(asdl_seq *, int *, int *);
+#define COMP_GENEXP 0
+#define COMP_LISTCOMP 1
+#define COMP_SETCOMP 2
+static int
+init_normalization(struct compiling *c)
+ PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
+ if (!m)
+ return 0;
+ c->c_normalize = PyObject_GetAttrString(m, "normalize");
+ Py_DECREF(m);
+ if (!c->c_normalize)
+ return 0;
+ return 1;
+static identifier
+new_identifier(const char *n, struct compiling *c)
+ PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
+ if (!id)
+ return NULL;
+ /* PyUnicode_DecodeUTF8 should always return a ready string. */
+ assert(PyUnicode_IS_READY(id));
+ /* Check whether there are non-ASCII characters in the
+ identifier; if so, normalize to NFKC. */
+ if (!PyUnicode_IS_ASCII(id)) {
+ PyObject *id2;
+ if (!c->c_normalize && !init_normalization(c)) {
+ Py_DECREF(id);
+ return NULL;
+ }
+ PyObject *form = PyUnicode_InternFromString("NFKC");
+ if (form == NULL) {
+ Py_DECREF(id);
+ return NULL;
+ }
+ PyObject *args[2] = {form, id};
+ id2 = _PyObject_FastCall(c->c_normalize, args, 2);
+ Py_DECREF(id);
+ Py_DECREF(form);
+ if (!id2)
+ return NULL;
+ if (!PyUnicode_Check(id2)) {
+ PyErr_Format(PyExc_TypeError,
+ "unicodedata.normalize() must return a string, not "
+ "%.200s",
+ _PyType_Name(Py_TYPE(id2)));
+ Py_DECREF(id2);
+ return NULL;
+ }
+ id = id2;
+ }
+ PyUnicode_InternInPlace(&id);
+ if (PyArena_AddPyObject(c->c_arena, id) < 0) {
+ Py_DECREF(id);
+ return NULL;
+ }
+ return id;
+#define NEW_IDENTIFIER(n) new_identifier(STR(n), c)
+static int
+ast_error(struct compiling *c, const node *n, const char *errmsg, ...)
+ PyObject *value, *errstr, *loc, *tmp;
+ va_list va;
+ va_start(va, errmsg);
+ errstr = PyUnicode_FromFormatV(errmsg, va);
+ va_end(va);
+ if (!errstr) {
+ return 0;
+ }
+ loc = PyErr_ProgramTextObject(c->c_filename, LINENO(n));
+ if (!loc) {
+ Py_INCREF(Py_None);
+ loc = Py_None;
+ }
+ tmp = Py_BuildValue("(OiiN)", c->c_filename, LINENO(n), n->n_col_offset + 1, loc);
+ if (!tmp) {
+ Py_DECREF(errstr);
+ return 0;
+ }
+ value = PyTuple_Pack(2, errstr, tmp);
+ Py_DECREF(errstr);
+ Py_DECREF(tmp);
+ if (value) {
+ PyErr_SetObject(PyExc_SyntaxError, value);
+ Py_DECREF(value);
+ }
+ return 0;
+/* num_stmts() returns number of contained statements.
+ Use this routine to determine how big a sequence is needed for
+ the statements in a parse tree. Its raison d'etre is this bit of
+ grammar:
+ stmt: simple_stmt | compound_stmt
+ simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+ A simple_stmt can contain multiple small_stmt elements joined
+ by semicolons. If the arg is a simple_stmt, the number of
+ small_stmt elements is returned.
+static string
+new_type_comment(const char *s, struct compiling *c)
+ PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL);
+ if (res == NULL)
+ return NULL;
+ if (PyArena_AddPyObject(c->c_arena, res) < 0) {
+ Py_DECREF(res);
+ return NULL;
+ }
+ return res;
+#define NEW_TYPE_COMMENT(n) new_type_comment(STR(n), c)
+static int
+num_stmts(const node *n)
+ int i, l;
+ node *ch;
+ switch (TYPE(n)) {
+ case single_input:
+ if (TYPE(CHILD(n, 0)) == NEWLINE)
+ return 0;
+ else
+ return num_stmts(CHILD(n, 0));
+ case file_input:
+ l = 0;
+ for (i = 0; i < NCH(n); i++) {
+ ch = CHILD(n, i);
+ if (TYPE(ch) == stmt)
+ l += num_stmts(ch);
+ }
+ return l;
+ case stmt:
+ return num_stmts(CHILD(n, 0));
+ case compound_stmt:
+ return 1;
+ case simple_stmt:
+ return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
+ case suite:
+ case func_body_suite:
+ /* func_body_suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */
+ /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
+ if (NCH(n) == 1)
+ return num_stmts(CHILD(n, 0));
+ else {
+ i = 2;
+ l = 0;
+ if (TYPE(CHILD(n, 1)) == TYPE_COMMENT)
+ i += 2;
+ for (; i < (NCH(n) - 1); i++)
+ l += num_stmts(CHILD(n, i));
+ return l;
+ }
+ default: {
+ _Py_FatalErrorFormat(__func__, "Non-statement found: %d %d",
+ TYPE(n), NCH(n));
+ }
+ }
+/* Transform the CST rooted at node * to the appropriate AST
+PyAST_FromNodeObject(const node *n, PyCompilerFlags *flags,
+ PyObject *filename, PyArena *arena)
+ int i, j, k, num;
+ asdl_seq *stmts = NULL;
+ asdl_seq *type_ignores = NULL;
+ stmt_ty s;
+ node *ch;
+ struct compiling c;
+ mod_ty res = NULL;
+ asdl_seq *argtypes = NULL;
+ expr_ty ret, arg;
+ c.c_arena = arena;
+ /* borrowed reference */
+ c.c_filename = filename;
+ c.c_normalize = NULL;
+ c.c_feature_version = flags && (flags->cf_flags & PyCF_ONLY_AST) ?
+ flags->cf_feature_version : PY_MINOR_VERSION;
+ if (TYPE(n) == encoding_decl)
+ n = CHILD(n, 0);
+ k = 0;
+ switch (TYPE(n)) {
+ case file_input:
+ stmts = _Py_asdl_seq_new(num_stmts(n), arena);
+ if (!stmts)
+ goto out;
+ for (i = 0; i < NCH(n) - 1; i++) {
+ ch = CHILD(n, i);
+ if (TYPE(ch) == NEWLINE)
+ continue;
+ REQ(ch, stmt);
+ num = num_stmts(ch);
+ if (num == 1) {
+ s = ast_for_stmt(&c, ch);
+ if (!s)
+ goto out;
+ asdl_seq_SET(stmts, k++, s);
+ }
+ else {
+ ch = CHILD(ch, 0);
+ REQ(ch, simple_stmt);
+ for (j = 0; j < num; j++) {
+ s = ast_for_stmt(&c, CHILD(ch, j * 2));
+ if (!s)
+ goto out;
+ asdl_seq_SET(stmts, k++, s);
+ }
+ }
+ }
+ /* Type ignores are stored under the ENDMARKER in file_input. */
+ ch = CHILD(n, NCH(n) - 1);
+ num = NCH(ch);
+ type_ignores = _Py_asdl_seq_new(num, arena);
+ if (!type_ignores)
+ goto out;
+ for (i = 0; i < num; i++) {
+ string type_comment = new_type_comment(STR(CHILD(ch, i)), &c);
+ if (!type_comment)
+ goto out;
+ type_ignore_ty ti = TypeIgnore(LINENO(CHILD(ch, i)), type_comment, arena);
+ if (!ti)
+ goto out;
+ asdl_seq_SET(type_ignores, i, ti);
+ }
+ res = Module(stmts, type_ignores, arena);
+ break;
+ case eval_input: {
+ expr_ty testlist_ast;
+ /* XXX Why not comp_for here? */
+ testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
+ if (!testlist_ast)
+ goto out;
+ res = Expression(testlist_ast, arena);
+ break;
+ }
+ case single_input:
+ if (TYPE(CHILD(n, 0)) == NEWLINE) {
+ stmts = _Py_asdl_seq_new(1, arena);
+ if (!stmts)
+ goto out;
+ asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset,
+ arena));
+ if (!asdl_seq_GET(stmts, 0))
+ goto out;
+ res = Interactive(stmts, arena);
+ }
+ else {
+ n = CHILD(n, 0);
+ num = num_stmts(n);
+ stmts = _Py_asdl_seq_new(num, arena);
+ if (!stmts)
+ goto out;
+ if (num == 1) {
+ s = ast_for_stmt(&c, n);
+ if (!s)
+ goto out;
+ asdl_seq_SET(stmts, 0, s);
+ }
+ else {
+ /* Only a simple_stmt can contain multiple statements. */
+ REQ(n, simple_stmt);
+ for (i = 0; i < NCH(n); i += 2) {
+ if (TYPE(CHILD(n, i)) == NEWLINE)
+ break;
+ s = ast_for_stmt(&c, CHILD(n, i));
+ if (!s)
+ goto out;
+ asdl_seq_SET(stmts, i / 2, s);
+ }
+ }
+ res = Interactive(stmts, arena);
+ }
+ break;
+ case func_type_input:
+ n = CHILD(n, 0);
+ REQ(n, func_type);
+ if (TYPE(CHILD(n, 1)) == typelist) {
+ ch = CHILD(n, 1);
+ /* this is overly permissive -- we don't pay any attention to
+ * stars on the args -- just parse them into an ordered list */
+ num = 0;
+ for (i = 0; i < NCH(ch); i++) {
+ if (TYPE(CHILD(ch, i)) == test) {
+ num++;
+ }
+ }
+ argtypes = _Py_asdl_seq_new(num, arena);
+ if (!argtypes)
+ goto out;
+ j = 0;
+ for (i = 0; i < NCH(ch); i++) {
+ if (TYPE(CHILD(ch, i)) == test) {
+ arg = ast_for_expr(&c, CHILD(ch, i));
+ if (!arg)
+ goto out;
+ asdl_seq_SET(argtypes, j++, arg);
+ }
+ }
+ }
+ else {
+ argtypes = _Py_asdl_seq_new(0, arena);
+ if (!argtypes)
+ goto out;
+ }
+ ret = ast_for_expr(&c, CHILD(n, NCH(n) - 1));
+ if (!ret)
+ goto out;
+ res = FunctionType(argtypes, ret, arena);
+ break;
+ default:
+ PyErr_Format(PyExc_SystemError,
+ "invalid node %d for PyAST_FromNode", TYPE(n));
+ goto out;
+ }
+ out:
+ if (c.c_normalize) {
+ Py_DECREF(c.c_normalize);
+ }
+ return res;
+PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename_str,
+ PyArena *arena)
+ mod_ty mod;
+ PyObject *filename;
+ filename = PyUnicode_DecodeFSDefault(filename_str);
+ if (filename == NULL)
+ return NULL;
+ mod = PyAST_FromNodeObject(n, flags, filename, arena);
+ Py_DECREF(filename);
+ return mod;
+/* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
+static operator_ty
+get_operator(struct compiling *c, const node *n)
+ switch (TYPE(n)) {
+ case VBAR:
+ return BitOr;
+ return BitXor;
+ case AMPER:
+ return BitAnd;
+ return LShift;
+ return RShift;
+ case PLUS:
+ return Add;
+ case MINUS:
+ return Sub;
+ case STAR:
+ return Mult;
+ case AT:
+ if (c->c_feature_version < 5) {
+ ast_error(c, n,
+ "The '@' operator is only supported in Python 3.5 and greater");
+ return (operator_ty)0;
+ }
+ return MatMult;
+ case SLASH:
+ return Div;
+ return FloorDiv;
+ case PERCENT:
+ return Mod;
+ default:
+ return (operator_ty)0;
+ }
+static const char * const FORBIDDEN[] = {
+ "None",
+ "True",
+ "False",
+ "__debug__",
+static int
+forbidden_name(struct compiling *c, identifier name, const node *n,
+ int full_checks)
+ assert(PyUnicode_Check(name));
+ const char * const *p = FORBIDDEN;
+ if (!full_checks) {
+ /* In most cases, the parser will protect True, False, and None
+ from being assign to. */
+ p += 3;
+ }
+ for (; *p; p++) {
+ if (_PyUnicode_EqualToASCIIString(name, *p)) {
+ ast_error(c, n, "cannot assign to %U", name);
+ return 1;
+ }
+ }
+ return 0;
+static expr_ty
+copy_location(expr_ty e, const node *n, const node *end)
+ if (e) {
+ e->lineno = LINENO(n);
+ e->col_offset = n->n_col_offset;
+ e->end_lineno = end->n_end_lineno;
+ e->end_col_offset = end->n_end_col_offset;
+ }
+ return e;
+static const char *
+get_expr_name(expr_ty e)
+ switch (e->kind) {
+ case Attribute_kind:
+ return "attribute";
+ case Subscript_kind:
+ return "subscript";
+ case Starred_kind:
+ return "starred";
+ case Name_kind:
+ return "name";
+ case List_kind:
+ return "list";
+ case Tuple_kind:
+ return "tuple";
+ case Lambda_kind:
+ return "lambda";
+ case Call_kind:
+ return "function call";
+ case BoolOp_kind:
+ case BinOp_kind:
+ case UnaryOp_kind:
+ return "operator";
+ case GeneratorExp_kind:
+ return "generator expression";
+ case Yield_kind:
+ case YieldFrom_kind:
+ return "yield expression";
+ case Await_kind:
+ return "await expression";
+ case ListComp_kind:
+ return "list comprehension";
+ case SetComp_kind:
+ return "set comprehension";
+ case DictComp_kind:
+ return "dict comprehension";
+ case Dict_kind:
+ return "dict display";
+ case Set_kind:
+ return "set display";
+ case JoinedStr_kind:
+ case FormattedValue_kind:
+ return "f-string expression";
+ case Constant_kind: {
+ PyObject *value = e->v.Constant.value;
+ if (value == Py_None) {
+ return "None";
+ }
+ if (value == Py_False) {
+ return "False";
+ }
+ if (value == Py_True) {
+ return "True";
+ }
+ if (value == Py_Ellipsis) {
+ return "Ellipsis";
+ }
+ return "literal";
+ }
+ case Compare_kind:
+ return "comparison";
+ case IfExp_kind:
+ return "conditional expression";
+ case NamedExpr_kind:
+ return "named expression";
+ default:
+ PyErr_Format(PyExc_SystemError,
+ "unexpected expression in assignment %d (line %d)",
+ e->kind, e->lineno);
+ return NULL;
+ }
+/* Set the context ctx for expr_ty e, recursively traversing e.
+ Only sets context for expr kinds that "can appear in assignment context"
+ (according to ../Parser/Python.asdl). For other expr kinds, it sets
+ an appropriate syntax error and returns false.
+static int
+set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
+ asdl_seq *s = NULL;
+ /* Expressions in an augmented assignment have a Store context. */
+ switch (e->kind) {
+ case Attribute_kind:
+ e->v.Attribute.ctx = ctx;
+ if (ctx == Store && forbidden_name(c, e->v.Attribute.attr, n, 1))
+ return 0;
+ break;
+ case Subscript_kind:
+ e->v.Subscript.ctx = ctx;
+ break;
+ case Starred_kind:
+ e->v.Starred.ctx = ctx;
+ if (!set_context(c, e->v.Starred.value, ctx, n))
+ return 0;
+ break;
+ case Name_kind:
+ if (ctx == Store) {
+ if (forbidden_name(c, e->v.Name.id, n, 0))
+ return 0; /* forbidden_name() calls ast_error() */
+ }
+ e->v.Name.ctx = ctx;
+ break;
+ case List_kind:
+ e->v.List.ctx = ctx;
+ s = e->v.List.elts;
+ break;
+ case Tuple_kind:
+ e->v.Tuple.ctx = ctx;
+ s = e->v.Tuple.elts;
+ break;
+ default: {
+ const char *expr_name = get_expr_name(e);
+ if (expr_name != NULL) {
+ ast_error(c, n, "cannot %s %s",
+ ctx == Store ? "assign to" : "delete",
+ expr_name);
+ }
+ return 0;
+ }
+ }
+ /* If the LHS is a list or tuple, we need to set the assignment
+ context for all the contained elements.
+ */
+ if (s) {
+ Py_ssize_t i;
+ for (i = 0; i < asdl_seq_LEN(s); i++) {
+ if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n))
+ return 0;
+ }
+ }
+ return 1;
+static operator_ty
+ast_for_augassign(struct compiling *c, const node *n)
+ REQ(n, augassign);
+ n = CHILD(n, 0);
+ switch (STR(n)[0]) {
+ case '+':
+ return Add;
+ case '-':
+ return Sub;
+ case '/':
+ if (STR(n)[1] == '/')
+ return FloorDiv;
+ else
+ return Div;
+ case '%':
+ return Mod;
+ case '<':
+ return LShift;
+ case '>':
+ return RShift;
+ case '&':
+ return BitAnd;
+ case '^':
+ return BitXor;
+ case '|':
+ return BitOr;
+ case '*':
+ if (STR(n)[1] == '*')
+ return Pow;
+ else
+ return Mult;
+ case '@':
+ if (c->c_feature_version < 5) {
+ ast_error(c, n,
+ "The '@' operator is only supported in Python 3.5 and greater");
+ return (operator_ty)0;
+ }
+ return MatMult;
+ default:
+ PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
+ return (operator_ty)0;
+ }
+static cmpop_ty
+ast_for_comp_op(struct compiling *c, const node *n)
+ /* comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'
+ |'is' 'not'
+ */
+ REQ(n, comp_op);
+ if (NCH(n) == 1) {
+ n = CHILD(n, 0);
+ switch (TYPE(n)) {
+ case LESS:
+ return Lt;
+ case GREATER:
+ return Gt;
+ case EQEQUAL: /* == */
+ return Eq;
+ return LtE;
+ return GtE;
+ case NOTEQUAL:
+ return NotEq;
+ case NAME:
+ if (strcmp(STR(n), "in") == 0)
+ return In;
+ if (strcmp(STR(n), "is") == 0)
+ return Is;
+ /* fall through */
+ default:
+ PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
+ STR(n));
+ return (cmpop_ty)0;
+ }
+ }
+ else if (NCH(n) == 2) {
+ /* handle "not in" and "is not" */
+ switch (TYPE(CHILD(n, 0))) {
+ case NAME:
+ if (strcmp(STR(CHILD(n, 1)), "in") == 0)
+ return NotIn;
+ if (strcmp(STR(CHILD(n, 0)), "is") == 0)
+ return IsNot;
+ /* fall through */
+ default:
+ PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
+ STR(CHILD(n, 0)), STR(CHILD(n, 1)));
+ return (cmpop_ty)0;
+ }
+ }
+ PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
+ NCH(n));
+ return (cmpop_ty)0;
+static asdl_seq *
+seq_for_testlist(struct compiling *c, const node *n)
+ /* testlist: test (',' test)* [',']
+ testlist_star_expr: test|star_expr (',' test|star_expr)* [',']
+ */
+ asdl_seq *seq;
+ expr_ty expression;
+ int i;
+ assert(TYPE(n) == testlist || TYPE(n) == testlist_star_expr || TYPE(n) == testlist_comp);
+ seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
+ if (!seq)
+ return NULL;
+ for (i = 0; i < NCH(n); i += 2) {
+ const node *ch = CHILD(n, i);
+ assert(TYPE(ch) == test || TYPE(ch) == test_nocond || TYPE(ch) == star_expr || TYPE(ch) == namedexpr_test);
+ expression = ast_for_expr(c, ch);
+ if (!expression)
+ return NULL;
+ assert(i / 2 < seq->size);
+ asdl_seq_SET(seq, i / 2, expression);
+ }
+ return seq;
+static arg_ty
+ast_for_arg(struct compiling *c, const node *n)
+ identifier name;
+ expr_ty annotation = NULL;
+ node *ch;
+ arg_ty ret;
+ assert(TYPE(n) == tfpdef || TYPE(n) == vfpdef);
+ ch = CHILD(n, 0);
+ name = NEW_IDENTIFIER(ch);
+ if (!name)
+ return NULL;
+ if (forbidden_name(c, name, ch, 0))
+ return NULL;
+ if (NCH(n) == 3 && TYPE(CHILD(n, 1)) == COLON) {
+ annotation = ast_for_expr(c, CHILD(n, 2));
+ if (!annotation)
+ return NULL;
+ }
+ ret = arg(name, annotation, NULL, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ if (!ret)
+ return NULL;
+ return ret;
+/* returns -1 if failed to handle keyword only arguments
+ returns new position to keep processing if successful
+ (',' tfpdef ['=' test])*
+ ^^^
+ start pointing here
+ */
+static int
+handle_keywordonly_args(struct compiling *c, const node *n, int start,
+ asdl_seq *kwonlyargs, asdl_seq *kwdefaults)
+ PyObject *argname;
+ node *ch;
+ expr_ty expression, annotation;
+ arg_ty arg = NULL;
+ int i = start;
+ int j = 0; /* index for kwdefaults and kwonlyargs */
+ if (kwonlyargs == NULL) {
+ ast_error(c, CHILD(n, start), "named arguments must follow bare *");
+ return -1;
+ }
+ assert(kwdefaults != NULL);
+ while (i < NCH(n)) {
+ ch = CHILD(n, i);
+ switch (TYPE(ch)) {
+ case vfpdef:
+ case tfpdef:
+ if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
+ expression = ast_for_expr(c, CHILD(n, i + 2));
+ if (!expression)
+ goto error;
+ asdl_seq_SET(kwdefaults, j, expression);
+ i += 2; /* '=' and test */
+ }
+ else { /* setting NULL if no default value exists */
+ asdl_seq_SET(kwdefaults, j, NULL);
+ }
+ if (NCH(ch) == 3) {
+ /* ch is NAME ':' test */
+ annotation = ast_for_expr(c, CHILD(ch, 2));
+ if (!annotation)
+ goto error;
+ }
+ else {
+ annotation = NULL;
+ }
+ ch = CHILD(ch, 0);
+ argname = NEW_IDENTIFIER(ch);
+ if (!argname)
+ goto error;
+ if (forbidden_name(c, argname, ch, 0))
+ goto error;
+ arg = arg(argname, annotation, NULL, LINENO(ch), ch->n_col_offset,
+ ch->n_end_lineno, ch->n_end_col_offset,
+ c->c_arena);
+ if (!arg)
+ goto error;
+ asdl_seq_SET(kwonlyargs, j++, arg);
+ i += 1; /* the name */
+ if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
+ i += 1; /* the comma, if present */
+ break;
+ /* arg will be equal to the last argument processed */
+ arg->type_comment = NEW_TYPE_COMMENT(ch);
+ if (!arg->type_comment)
+ goto error;
+ i += 1;
+ break;
+ return i;
+ default:
+ ast_error(c, ch, "unexpected node");
+ goto error;
+ }
+ }
+ return i;
+ error:
+ return -1;
+/* Create AST for argument list. */
+static arguments_ty
+ast_for_arguments(struct compiling *c, const node *n)
+ /* This function handles both typedargslist (function definition)
+ and varargslist (lambda definition).
+ parameters: '(' [typedargslist] ')'
+ The following definition for typedarglist is equivalent to this set of rules:
+ arguments = argument (',' [TYPE_COMMENT] argument)*
+ argument = tfpdef ['=' test]
+ kwargs = '**' tfpdef [','] [TYPE_COMMENT]
+ args = '*' [tfpdef]
+ kwonly_kwargs = (',' [TYPE_COMMENT] argument)* (TYPE_COMMENT | [','
+ [TYPE_COMMENT] [kwargs]])
+ args_kwonly_kwargs = args kwonly_kwargs | kwargs
+ poskeyword_args_kwonly_kwargs = arguments ( TYPE_COMMENT | [','
+ [TYPE_COMMENT] [args_kwonly_kwargs]])
+ typedargslist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
+ typedarglist = (arguments ',' [TYPE_COMMENT] '/' [',' [[TYPE_COMMENT]
+ typedargslist_no_posonly]])|(typedargslist_no_posonly)"
+ typedargslist: ( (tfpdef ['=' test] (',' [TYPE_COMMENT] tfpdef ['=' test])*
+ ',' [TYPE_COMMENT] '/' [',' [ [TYPE_COMMENT] tfpdef ['=' test] ( ','
+ [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ '*'
+ [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [','
+ [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | '**' tfpdef [',']
+ [TYPE_COMMENT]]]) | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])*
+ (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) |
+ '**' tfpdef [','] [TYPE_COMMENT]]] ) | (tfpdef ['=' test] (','
+ [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ '*'
+ [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [','
+ [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) | '**' tfpdef [',']
+ [TYPE_COMMENT]]]) | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])*
+ (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) |
+ '**' tfpdef [','] [TYPE_COMMENT]))
+ tfpdef: NAME [':' test]
+ The following definition for varargslist is equivalent to this set of rules:
+ arguments = argument (',' argument )*
+ argument = vfpdef ['=' test]
+ kwargs = '**' vfpdef [',']
+ args = '*' [vfpdef]
+ kwonly_kwargs = (',' argument )* [',' [kwargs]]
+ args_kwonly_kwargs = args kwonly_kwargs | kwargs
+ poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]]
+ vararglist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
+ varargslist = arguments ',' '/' [','[(vararglist_no_posonly)]] |
+ (vararglist_no_posonly)
+ varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['='
+ test] (',' vfpdef ['=' test])* [',' [ '*' [vfpdef] (',' vfpdef ['=' test])* [','
+ ['**' vfpdef [',']]] | '**' vfpdef [',']]] | '*' [vfpdef] (',' vfpdef ['=' test])*
+ [',' ['**' vfpdef [',']]] | '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef
+ ['=' test])* [',' [ '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
+ | '**' vfpdef [',']]] | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef
+ [',']]] | '**' vfpdef [','])
+ vfpdef: NAME
+ */
+ int i, j, k, l, nposonlyargs=0, nposargs = 0, nkwonlyargs = 0;
+ int nposdefaults = 0, found_default = 0;
+ asdl_seq *posonlyargs, *posargs, *posdefaults, *kwonlyargs, *kwdefaults;
+ arg_ty vararg = NULL, kwarg = NULL;
+ arg_ty arg = NULL;
+ node *ch;
+ if (TYPE(n) == parameters) {
+ if (NCH(n) == 2) /* () as argument list */
+ return arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
+ n = CHILD(n, 1);
+ }
+ assert(TYPE(n) == typedargslist || TYPE(n) == varargslist);
+ /* First count the number of positional args & defaults. The
+ variable i is the loop index for this for loop and the next.
+ The next loop picks up where the first leaves off.
+ */
+ for (i = 0; i < NCH(n); i++) {
+ ch = CHILD(n, i);
+ if (TYPE(ch) == STAR) {
+ /* skip star */
+ i++;
+ if (i < NCH(n) && /* skip argument following star */
+ (TYPE(CHILD(n, i)) == tfpdef ||
+ TYPE(CHILD(n, i)) == vfpdef)) {
+ i++;
+ }
+ break;
+ }
+ if (TYPE(ch) == DOUBLESTAR) break;
+ if (TYPE(ch) == vfpdef || TYPE(ch) == tfpdef) nposargs++;
+ if (TYPE(ch) == EQUAL) nposdefaults++;
+ if (TYPE(ch) == SLASH ) {
+ nposonlyargs = nposargs;
+ nposargs = 0;
+ }
+ }
+ /* count the number of keyword only args &
+ defaults for keyword only args */
+ for ( ; i < NCH(n); ++i) {
+ ch = CHILD(n, i);
+ if (TYPE(ch) == DOUBLESTAR) break;
+ if (TYPE(ch) == tfpdef || TYPE(ch) == vfpdef) nkwonlyargs++;
+ }
+ posonlyargs = (nposonlyargs ? _Py_asdl_seq_new(nposonlyargs, c->c_arena) : NULL);
+ if (!posonlyargs && nposonlyargs) {
+ return NULL;
+ }
+ posargs = (nposargs ? _Py_asdl_seq_new(nposargs, c->c_arena) : NULL);
+ if (!posargs && nposargs)
+ return NULL;
+ kwonlyargs = (nkwonlyargs ?
+ _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
+ if (!kwonlyargs && nkwonlyargs)
+ return NULL;
+ posdefaults = (nposdefaults ?
+ _Py_asdl_seq_new(nposdefaults, c->c_arena) : NULL);
+ if (!posdefaults && nposdefaults)
+ return NULL;
+ /* The length of kwonlyargs and kwdefaults are same
+ since we set NULL as default for keyword only argument w/o default
+ - we have sequence data structure, but no dictionary */
+ kwdefaults = (nkwonlyargs ?
+ _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
+ if (!kwdefaults && nkwonlyargs)
+ return NULL;
+ /* tfpdef: NAME [':' test]
+ vfpdef: NAME
+ */
+ i = 0;
+ j = 0; /* index for defaults */
+ k = 0; /* index for args */
+ l = 0; /* index for posonlyargs */
+ while (i < NCH(n)) {
+ ch = CHILD(n, i);
+ switch (TYPE(ch)) {
+ case tfpdef:
+ case vfpdef:
+ /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
+ anything other than EQUAL or a comma? */
+ /* XXX Should NCH(n) check be made a separate check? */
+ if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
+ expr_ty expression = ast_for_expr(c, CHILD(n, i + 2));
+ if (!expression)
+ return NULL;
+ assert(posdefaults != NULL);
+ asdl_seq_SET(posdefaults, j++, expression);
+ i += 2;
+ found_default = 1;
+ }
+ else if (found_default) {
+ ast_error(c, n,
+ "non-default argument follows default argument");
+ return NULL;
+ }
+ arg = ast_for_arg(c, ch);
+ if (!arg)
+ return NULL;
+ if (l < nposonlyargs) {
+ asdl_seq_SET(posonlyargs, l++, arg);
+ } else {
+ asdl_seq_SET(posargs, k++, arg);
+ }
+ i += 1; /* the name */
+ if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
+ i += 1; /* the comma, if present */
+ break;
+ case SLASH:
+ /* Advance the slash and the comma. If there are more names
+ * after the slash there will be a comma so we are advancing
+ * the correct number of nodes. If the slash is the last item,
+ * we will be advancing an extra token but then * i > NCH(n)
+ * and the enclosing while will finish correctly. */
+ i += 2;
+ break;
+ case STAR:
+ if (i+1 >= NCH(n) ||
+ (i+2 == NCH(n) && (TYPE(CHILD(n, i+1)) == COMMA
+ || TYPE(CHILD(n, i+1)) == TYPE_COMMENT))) {
+ ast_error(c, CHILD(n, i),
+ "named arguments must follow bare *");
+ return NULL;
+ }
+ ch = CHILD(n, i+1); /* tfpdef or COMMA */
+ if (TYPE(ch) == COMMA) {
+ int res = 0;
+ i += 2; /* now follows keyword only arguments */
+ if (i < NCH(n) && TYPE(CHILD(n, i)) == TYPE_COMMENT) {
+ ast_error(c, CHILD(n, i),
+ "bare * has associated type comment");
+ return NULL;
+ }
+ res = handle_keywordonly_args(c, n, i,
+ kwonlyargs, kwdefaults);
+ if (res == -1) return NULL;
+ i = res; /* res has new position to process */
+ }
+ else {
+ vararg = ast_for_arg(c, ch);
+ if (!vararg)
+ return NULL;
+ i += 2; /* the star and the name */
+ if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
+ i += 1; /* the comma, if present */
+ if (i < NCH(n) && TYPE(CHILD(n, i)) == TYPE_COMMENT) {
+ vararg->type_comment = NEW_TYPE_COMMENT(CHILD(n, i));
+ if (!vararg->type_comment)
+ return NULL;
+ i += 1;
+ }
+ if (i < NCH(n) && (TYPE(CHILD(n, i)) == tfpdef
+ || TYPE(CHILD(n, i)) == vfpdef)) {
+ int res = 0;
+ res = handle_keywordonly_args(c, n, i,
+ kwonlyargs, kwdefaults);
+ if (res == -1) return NULL;
+ i = res; /* res has new position to process */
+ }
+ }
+ break;
+ ch = CHILD(n, i+1); /* tfpdef */
+ assert(TYPE(ch) == tfpdef || TYPE(ch) == vfpdef);
+ kwarg = ast_for_arg(c, ch);
+ if (!kwarg)
+ return NULL;
+ i += 2; /* the double star and the name */
+ if (i < NCH(n) && TYPE(CHILD(n, i)) == COMMA)
+ i += 1; /* the comma, if present */
+ break;
+ assert(i);
+ if (kwarg)
+ arg = kwarg;
+ /* arg will be equal to the last argument processed */
+ arg->type_comment = NEW_TYPE_COMMENT(ch);
+ if (!arg->type_comment)
+ return NULL;
+ i += 1;
+ break;
+ default:
+ PyErr_Format(PyExc_SystemError,
+ "unexpected node in varargslist: %d @ %d",
+ TYPE(ch), i);
+ return NULL;
+ }
+ }
+ return arguments(posonlyargs, posargs, vararg, kwonlyargs, kwdefaults, kwarg, posdefaults, c->c_arena);
+static expr_ty
+ast_for_decorator(struct compiling *c, const node *n)
+ /* decorator: '@' namedexpr_test NEWLINE */
+ REQ(n, decorator);
+ REQ(CHILD(n, 0), AT);
+ return ast_for_expr(c, CHILD(n, 1));
+static asdl_seq*
+ast_for_decorators(struct compiling *c, const node *n)
+ asdl_seq* decorator_seq;
+ expr_ty d;
+ int i;
+ REQ(n, decorators);
+ decorator_seq = _Py_asdl_seq_new(NCH(n), c->c_arena);
+ if (!decorator_seq)
+ return NULL;
+ for (i = 0; i < NCH(n); i++) {
+ d = ast_for_decorator(c, CHILD(n, i));
+ if (!d)
+ return NULL;
+ asdl_seq_SET(decorator_seq, i, d);
+ }
+ return decorator_seq;
+static stmt_ty
+ast_for_funcdef_impl(struct compiling *c, const node *n0,
+ asdl_seq *decorator_seq, bool is_async)
+ /* funcdef: 'def' NAME parameters ['->' test] ':' [TYPE_COMMENT] suite */
+ const node * const n = is_async ? CHILD(n0, 1) : n0;
+ identifier name;
+ arguments_ty args;
+ asdl_seq *body;
+ expr_ty returns = NULL;
+ int name_i = 1;
+ int end_lineno, end_col_offset;
+ node *tc;
+ string type_comment = NULL;
+ if (is_async && c->c_feature_version < 5) {
+ ast_error(c, n,
+ "Async functions are only supported in Python 3.5 and greater");
+ return NULL;
+ }
+ REQ(n, funcdef);
+ name = NEW_IDENTIFIER(CHILD(n, name_i));
+ if (!name)
+ return NULL;
+ if (forbidden_name(c, name, CHILD(n, name_i), 0))
+ return NULL;
+ args = ast_for_arguments(c, CHILD(n, name_i + 1));
+ if (!args)
+ return NULL;
+ if (TYPE(CHILD(n, name_i+2)) == RARROW) {
+ returns = ast_for_expr(c, CHILD(n, name_i + 3));
+ if (!returns)
+ return NULL;
+ name_i += 2;
+ }
+ if (TYPE(CHILD(n, name_i + 3)) == TYPE_COMMENT) {
+ type_comment = NEW_TYPE_COMMENT(CHILD(n, name_i + 3));
+ if (!type_comment)
+ return NULL;
+ name_i += 1;
+ }
+ body = ast_for_suite(c, CHILD(n, name_i + 3));
+ if (!body)
+ return NULL;
+ get_last_end_pos(body, &end_lineno, &end_col_offset);
+ if (NCH(CHILD(n, name_i + 3)) > 1) {
+ /* Check if the suite has a type comment in it. */
+ tc = CHILD(CHILD(n, name_i + 3), 1);
+ if (TYPE(tc) == TYPE_COMMENT) {
+ if (type_comment != NULL) {
+ ast_error(c, n, "Cannot have two type comments on def");
+ return NULL;
+ }
+ type_comment = NEW_TYPE_COMMENT(tc);
+ if (!type_comment)
+ return NULL;
+ }
+ }
+ if (is_async)
+ return AsyncFunctionDef(name, args, body, decorator_seq, returns, type_comment,
+ LINENO(n0), n0->n_col_offset, end_lineno, end_col_offset, c->c_arena);
+ else
+ return FunctionDef(name, args, body, decorator_seq, returns, type_comment,
+ LINENO(n), n->n_col_offset, end_lineno, end_col_offset, c->c_arena);
+static stmt_ty
+ast_for_async_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
+ /* async_funcdef: ASYNC funcdef */
+ REQ(n, async_funcdef);
+ REQ(CHILD(n, 0), ASYNC);
+ REQ(CHILD(n, 1), funcdef);
+ return ast_for_funcdef_impl(c, n, decorator_seq,
+ true /* is_async */);
+static stmt_ty
+ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
+ /* funcdef: 'def' NAME parameters ['->' test] ':' suite */
+ return ast_for_funcdef_impl(c, n, decorator_seq,
+ false /* is_async */);
+static stmt_ty
+ast_for_async_stmt(struct compiling *c, const node *n)
+ /* async_stmt: ASYNC (funcdef | with_stmt | for_stmt) */
+ REQ(n, async_stmt);
+ REQ(CHILD(n, 0), ASYNC);
+ switch (TYPE(CHILD(n, 1))) {
+ case funcdef:
+ return ast_for_funcdef_impl(c, n, NULL,
+ true /* is_async */);
+ case with_stmt:
+ return ast_for_with_stmt(c, n,
+ true /* is_async */);
+ case for_stmt:
+ return ast_for_for_stmt(c, n,
+ true /* is_async */);
+ default:
+ PyErr_Format(PyExc_SystemError,
+ "invalid async stament: %s",
+ STR(CHILD(n, 1)));
+ return NULL;
+ }
+static stmt_ty
+ast_for_decorated(struct compiling *c, const node *n)
+ /* decorated: decorators (classdef | funcdef | async_funcdef) */
+ stmt_ty thing = NULL;
+ asdl_seq *decorator_seq = NULL;
+ REQ(n, decorated);
+ decorator_seq = ast_for_decorators(c, CHILD(n, 0));
+ if (!decorator_seq)
+ return NULL;
+ assert(TYPE(CHILD(n, 1)) == funcdef ||
+ TYPE(CHILD(n, 1)) == async_funcdef ||
+ TYPE(CHILD(n, 1)) == classdef);
+ if (TYPE(CHILD(n, 1)) == funcdef) {
+ thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq);
+ } else if (TYPE(CHILD(n, 1)) == classdef) {
+ thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq);
+ } else if (TYPE(CHILD(n, 1)) == async_funcdef) {
+ thing = ast_for_async_funcdef(c, CHILD(n, 1), decorator_seq);
+ }
+ return thing;
+static expr_ty
+ast_for_namedexpr(struct compiling *c, const node *n)
+ /* namedexpr_test: test [':=' test]
+ argument: ( test [comp_for] |
+ test ':=' test |
+ test '=' test |
+ '**' test |
+ '*' test )
+ */
+ expr_ty target, value;
+ target = ast_for_expr(c, CHILD(n, 0));
+ if (!target)
+ return NULL;
+ value = ast_for_expr(c, CHILD(n, 2));
+ if (!value)
+ return NULL;
+ if (target->kind != Name_kind) {
+ const char *expr_name = get_expr_name(target);
+ if (expr_name != NULL) {
+ ast_error(c, n, "cannot use assignment expressions with %s", expr_name);
+ }
+ return NULL;
+ }
+ if (!set_context(c, target, Store, n))
+ return NULL;
+ return NamedExpr(target, value, LINENO(n), n->n_col_offset, n->n_end_lineno,
+ n->n_end_col_offset, c->c_arena);
+static expr_ty
+ast_for_lambdef(struct compiling *c, const node *n)
+ /* lambdef: 'lambda' [varargslist] ':' test
+ lambdef_nocond: 'lambda' [varargslist] ':' test_nocond */
+ arguments_ty args;
+ expr_ty expression;
+ if (NCH(n) == 3) {
+ args = arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
+ if (!args)
+ return NULL;
+ expression = ast_for_expr(c, CHILD(n, 2));
+ if (!expression)
+ return NULL;
+ }
+ else {
+ args = ast_for_arguments(c, CHILD(n, 1));
+ if (!args)
+ return NULL;
+ expression = ast_for_expr(c, CHILD(n, 3));
+ if (!expression)
+ return NULL;
+ }
+ return Lambda(args, expression, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+static expr_ty
+ast_for_ifexpr(struct compiling *c, const node *n)
+ /* test: or_test 'if' or_test 'else' test */
+ expr_ty expression, body, orelse;
+ assert(NCH(n) == 5);
+ body = ast_for_expr(c, CHILD(n, 0));
+ if (!body)
+ return NULL;
+ expression = ast_for_expr(c, CHILD(n, 2));
+ if (!expression)
+ return NULL;
+ orelse = ast_for_expr(c, CHILD(n, 4));
+ if (!orelse)
+ return NULL;
+ return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset,
+ c->c_arena);
+ Count the number of 'for' loops in a comprehension.
+ Helper for ast_for_comprehension().
+static int
+count_comp_fors(struct compiling *c, const node *n)
+ int n_fors = 0;
+ count_comp_for:
+ n_fors++;
+ REQ(n, comp_for);
+ if (NCH(n) == 2) {
+ REQ(CHILD(n, 0), ASYNC);
+ n = CHILD(n, 1);
+ }
+ else if (NCH(n) == 1) {
+ n = CHILD(n, 0);
+ }
+ else {
+ goto error;
+ }
+ if (NCH(n) == (5)) {
+ n = CHILD(n, 4);
+ }
+ else {
+ return n_fors;
+ }
+ count_comp_iter:
+ REQ(n, comp_iter);
+ n = CHILD(n, 0);
+ if (TYPE(n) == comp_for)
+ goto count_comp_for;
+ else if (TYPE(n) == comp_if) {
+ if (NCH(n) == 3) {
+ n = CHILD(n, 2);
+ goto count_comp_iter;
+ }
+ else
+ return n_fors;
+ }
+ error:
+ /* Should never be reached */
+ PyErr_SetString(PyExc_SystemError,
+ "logic error in count_comp_fors");
+ return -1;
+/* Count the number of 'if' statements in a comprehension.
+ Helper for ast_for_comprehension().
+static int
+count_comp_ifs(struct compiling *c, const node *n)
+ int n_ifs = 0;
+ while (1) {
+ REQ(n, comp_iter);
+ if (TYPE(CHILD(n, 0)) == comp_for)
+ return n_ifs;
+ n = CHILD(n, 0);
+ REQ(n, comp_if);
+ n_ifs++;
+ if (NCH(n) == 2)
+ return n_ifs;
+ n = CHILD(n, 2);
+ }
+static asdl_seq *
+ast_for_comprehension(struct compiling *c, const node *n)
+ int i, n_fors;
+ asdl_seq *comps;
+ n_fors = count_comp_fors(c, n);
+ if (n_fors == -1)
+ return NULL;
+ comps = _Py_asdl_seq_new(n_fors, c->c_arena);
+ if (!comps)
+ return NULL;
+ for (i = 0; i < n_fors; i++) {
+ comprehension_ty comp;
+ asdl_seq *t;
+ expr_ty expression, first;
+ node *for_ch;
+ node *sync_n;
+ int is_async = 0;
+ REQ(n, comp_for);
+ if (NCH(n) == 2) {
+ is_async = 1;
+ REQ(CHILD(n, 0), ASYNC);
+ sync_n = CHILD(n, 1);
+ }
+ else {
+ sync_n = CHILD(n, 0);
+ }
+ REQ(sync_n, sync_comp_for);
+ /* Async comprehensions only allowed in Python 3.6 and greater */
+ if (is_async && c->c_feature_version < 6) {
+ ast_error(c, n,
+ "Async comprehensions are only supported in Python 3.6 and greater");
+ return NULL;
+ }
+ for_ch = CHILD(sync_n, 1);
+ t = ast_for_exprlist(c, for_ch, Store);
+ if (!t)
+ return NULL;
+ expression = ast_for_expr(c, CHILD(sync_n, 3));
+ if (!expression)
+ return NULL;
+ /* Check the # of children rather than the length of t, since
+ (x for x, in ...) has 1 element in t, but still requires a Tuple. */
+ first = (expr_ty)asdl_seq_GET(t, 0);
+ if (NCH(for_ch) == 1)
+ comp = comprehension(first, expression, NULL,
+ is_async, c->c_arena);
+ else
+ comp = comprehension(Tuple(t, Store, first->lineno, first->col_offset,
+ for_ch->n_end_lineno, for_ch->n_end_col_offset,
+ c->c_arena),
+ expression, NULL, is_async, c->c_arena);
+ if (!comp)
+ return NULL;
+ if (NCH(sync_n) == 5) {
+ int j, n_ifs;
+ asdl_seq *ifs;
+ n = CHILD(sync_n, 4);
+ n_ifs = count_comp_ifs(c, n);
+ if (n_ifs == -1)
+ return NULL;
+ ifs = _Py_asdl_seq_new(n_ifs, c->c_arena);
+ if (!ifs)
+ return NULL;
+ for (j = 0; j < n_ifs; j++) {
+ REQ(n, comp_iter);
+ n = CHILD(n, 0);
+ REQ(n, comp_if);
+ expression = ast_for_expr(c, CHILD(n, 1));
+ if (!expression)
+ return NULL;
+ asdl_seq_SET(ifs, j, expression);
+ if (NCH(n) == 3)
+ n = CHILD(n, 2);
+ }
+ /* on exit, must guarantee that n is a comp_for */
+ if (TYPE(n) == comp_iter)
+ n = CHILD(n, 0);
+ comp->ifs = ifs;
+ }
+ asdl_seq_SET(comps, i, comp);
+ }
+ return comps;
+static expr_ty
+ast_for_itercomp(struct compiling *c, const node *n, int type)
+ /* testlist_comp: (test|star_expr)
+ * ( comp_for | (',' (test|star_expr))* [','] ) */
+ expr_ty elt;
+ asdl_seq *comps;
+ node *ch;
+ assert(NCH(n) > 1);
+ ch = CHILD(n, 0);
+ elt = ast_for_expr(c, ch);
+ if (!elt)
+ return NULL;
+ if (elt->kind == Starred_kind) {
+ ast_error(c, ch, "iterable unpacking cannot be used in comprehension");
+ return NULL;
+ }
+ comps = ast_for_comprehension(c, CHILD(n, 1));
+ if (!comps)
+ return NULL;
+ if (type == COMP_GENEXP)
+ return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ else if (type == COMP_LISTCOMP)
+ return ListComp(elt, comps, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ else if (type == COMP_SETCOMP)
+ return SetComp(elt, comps, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ else
+ /* Should never happen */
+ return NULL;
+/* Fills in the key, value pair corresponding to the dict element. In case
+ * of an unpacking, key is NULL. *i is advanced by the number of ast
+ * elements. Iff successful, nonzero is returned.
+ */
+static int
+ast_for_dictelement(struct compiling *c, const node *n, int *i,
+ expr_ty *key, expr_ty *value)
+ expr_ty expression;
+ if (TYPE(CHILD(n, *i)) == DOUBLESTAR) {
+ assert(NCH(n) - *i >= 2);
+ expression = ast_for_expr(c, CHILD(n, *i + 1));
+ if (!expression)
+ return 0;
+ *key = NULL;
+ *value = expression;
+ *i += 2;
+ }
+ else {
+ assert(NCH(n) - *i >= 3);
+ expression = ast_for_expr(c, CHILD(n, *i));
+ if (!expression)
+ return 0;
+ *key = expression;
+ REQ(CHILD(n, *i + 1), COLON);
+ expression = ast_for_expr(c, CHILD(n, *i + 2));
+ if (!expression)
+ return 0;
+ *value = expression;
+ *i += 3;
+ }
+ return 1;
+static expr_ty
+ast_for_dictcomp(struct compiling *c, const node *n)
+ expr_ty key, value;
+ asdl_seq *comps;
+ int i = 0;
+ if (!ast_for_dictelement(c, n, &i, &key, &value))
+ return NULL;
+ assert(key);
+ assert(NCH(n) - i >= 1);
+ comps = ast_for_comprehension(c, CHILD(n, i));
+ if (!comps)
+ return NULL;
+ return DictComp(key, value, comps, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+static expr_ty
+ast_for_dictdisplay(struct compiling *c, const node *n)
+ int i;
+ int j;
+ int size;
+ asdl_seq *keys, *values;
+ size = (NCH(n) + 1) / 3; /* +1 in case no trailing comma */
+ keys = _Py_asdl_seq_new(size, c->c_arena);
+ if (!keys)
+ return NULL;
+ values = _Py_asdl_seq_new(size, c->c_arena);
+ if (!values)
+ return NULL;
+ j = 0;
+ for (i = 0; i < NCH(n); i++) {
+ expr_ty key, value;
+ if (!ast_for_dictelement(c, n, &i, &key, &value))
+ return NULL;
+ asdl_seq_SET(keys, j, key);
+ asdl_seq_SET(values, j, value);
+ j++;
+ }
+ keys->size = j;
+ values->size = j;
+ return Dict(keys, values, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+static expr_ty
+ast_for_genexp(struct compiling *c, const node *n)
+ assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument));
+ return ast_for_itercomp(c, n, COMP_GENEXP);
+static expr_ty
+ast_for_listcomp(struct compiling *c, const node *n)
+ assert(TYPE(n) == (testlist_comp));
+ return ast_for_itercomp(c, n, COMP_LISTCOMP);
+static expr_ty
+ast_for_setcomp(struct compiling *c, const node *n)
+ assert(TYPE(n) == (dictorsetmaker));
+ return ast_for_itercomp(c, n, COMP_SETCOMP);
+static expr_ty
+ast_for_setdisplay(struct compiling *c, const node *n)
+ int i;
+ int size;
+ asdl_seq *elts;
+ assert(TYPE(n) == (dictorsetmaker));
+ size = (NCH(n) + 1) / 2; /* +1 in case no trailing comma */
+ elts = _Py_asdl_seq_new(size, c->c_arena);
+ if (!elts)
+ return NULL;
+ for (i = 0; i < NCH(n); i += 2) {
+ expr_ty expression;
+ expression = ast_for_expr(c, CHILD(n, i));
+ if (!expression)
+ return NULL;
+ asdl_seq_SET(elts, i / 2, expression);
+ }
+ return Set(elts, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+static expr_ty
+ast_for_atom(struct compiling *c, const node *n)
+ /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']'
+ | '{' [dictmaker|testlist_comp] '}' | NAME | NUMBER | STRING+
+ | '...' | 'None' | 'True' | 'False'
+ */
+ node *ch = CHILD(n, 0);
+ switch (TYPE(ch)) {
+ case NAME: {
+ PyObject *name;
+ const char *s = STR(ch);
+ size_t len = strlen(s);
+ if (len >= 4 && len <= 5) {
+ if (!strcmp(s, "None"))
+ return Constant(Py_None, NULL, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ if (!strcmp(s, "True"))
+ return Constant(Py_True, NULL, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ if (!strcmp(s, "False"))
+ return Constant(Py_False, NULL, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ name = new_identifier(s, c);
+ if (!name)
+ return NULL;
+ /* All names start in Load context, but may later be changed. */
+ return Name(name, Load, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ case STRING: {
+ expr_ty str = parsestrplus(c, n);
+ if (!str) {
+ const char *errtype = NULL;
+ if (PyErr_ExceptionMatches(PyExc_UnicodeError))
+ errtype = "unicode error";
+ else if (PyErr_ExceptionMatches(PyExc_ValueError))
+ errtype = "value error";
+ if (errtype) {
+ PyObject *type, *value, *tback, *errstr;
+ PyErr_Fetch(&type, &value, &tback);
+ errstr = PyObject_Str(value);
+ if (errstr) {
+ ast_error(c, n, "(%s) %U", errtype, errstr);
+ Py_DECREF(errstr);
+ }
+ else {
+ PyErr_Clear();
+ ast_error(c, n, "(%s) unknown error", errtype);
+ }
+ Py_DECREF(type);
+ Py_XDECREF(value);
+ Py_XDECREF(tback);
+ }
+ return NULL;
+ }
+ return str;
+ }
+ case NUMBER: {
+ PyObject *pynum;
+ /* Underscores in numeric literals are only allowed in Python 3.6 or greater */
+ /* Check for underscores here rather than in parse_number so we can report a line number on error */
+ if (c->c_feature_version < 6 && strchr(STR(ch), '_') != NULL) {
+ ast_error(c, ch,
+ "Underscores in numeric literals are only supported in Python 3.6 and greater");
+ return NULL;
+ }
+ pynum = parsenumber(c, STR(ch));
+ if (!pynum)
+ return NULL;
+ if (PyArena_AddPyObject(c->c_arena, pynum) < 0) {
+ Py_DECREF(pynum);
+ return NULL;
+ }
+ return Constant(pynum, NULL, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ case ELLIPSIS: /* Ellipsis */
+ return Constant(Py_Ellipsis, NULL, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ case LPAR: /* some parenthesized expressions */
+ ch = CHILD(n, 1);
+ if (TYPE(ch) == RPAR)
+ return Tuple(NULL, Load, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ if (TYPE(ch) == yield_expr)
+ return ast_for_expr(c, ch);
+ /* testlist_comp: test ( comp_for | (',' test)* [','] ) */
+ if (NCH(ch) == 1) {
+ return ast_for_testlist(c, ch);
+ }
+ if (TYPE(CHILD(ch, 1)) == comp_for) {
+ return copy_location(ast_for_genexp(c, ch), n, n);
+ }
+ else {
+ return copy_location(ast_for_testlist(c, ch), n, n);
+ }
+ case LSQB: /* list (or list comprehension) */
+ ch = CHILD(n, 1);
+ if (TYPE(ch) == RSQB)
+ return List(NULL, Load, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ REQ(ch, testlist_comp);
+ if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
+ asdl_seq *elts = seq_for_testlist(c, ch);
+ if (!elts)
+ return NULL;
+ return List(elts, Load, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ else {
+ return copy_location(ast_for_listcomp(c, ch), n, n);
+ }
+ case LBRACE: {
+ /* dictorsetmaker: ( ((test ':' test | '**' test)
+ * (comp_for | (',' (test ':' test | '**' test))* [','])) |
+ * ((test | '*' test)
+ * (comp_for | (',' (test | '*' test))* [','])) ) */
+ expr_ty res;
+ ch = CHILD(n, 1);
+ if (TYPE(ch) == RBRACE) {
+ /* It's an empty dict. */
+ return Dict(NULL, NULL, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ else {
+ int is_dict = (TYPE(CHILD(ch, 0)) == DOUBLESTAR);
+ if (NCH(ch) == 1 ||
+ (NCH(ch) > 1 &&
+ TYPE(CHILD(ch, 1)) == COMMA)) {
+ /* It's a set display. */
+ res = ast_for_setdisplay(c, ch);
+ }
+ else if (NCH(ch) > 1 &&
+ TYPE(CHILD(ch, 1)) == comp_for) {
+ /* It's a set comprehension. */
+ res = ast_for_setcomp(c, ch);
+ }
+ else if (NCH(ch) > 3 - is_dict &&
+ TYPE(CHILD(ch, 3 - is_dict)) == comp_for) {
+ /* It's a dictionary comprehension. */
+ if (is_dict) {
+ ast_error(c, n,
+ "dict unpacking cannot be used in dict comprehension");
+ return NULL;
+ }
+ res = ast_for_dictcomp(c, ch);
+ }
+ else {
+ /* It's a dictionary display. */
+ res = ast_for_dictdisplay(c, ch);
+ }
+ return copy_location(res, n, n);
+ }
+ }
+ default:
+ PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
+ return NULL;
+ }
+static expr_ty
+ast_for_slice(struct compiling *c, const node *n)
+ node *ch;
+ expr_ty lower = NULL, upper = NULL, step = NULL;
+ REQ(n, subscript);
+ /*
+ subscript: test | [test] ':' [test] [sliceop]
+ sliceop: ':' [test]
+ */
+ ch = CHILD(n, 0);
+ if (NCH(n) == 1 && TYPE(ch) == test) {
+ return ast_for_expr(c, ch);
+ }
+ if (TYPE(ch) == test) {
+ lower = ast_for_expr(c, ch);
+ if (!lower)
+ return NULL;
+ }
+ /* If there's an upper bound it's in the second or third position. */
+ if (TYPE(ch) == COLON) {
+ if (NCH(n) > 1) {
+ node *n2 = CHILD(n, 1);
+ if (TYPE(n2) == test) {
+ upper = ast_for_expr(c, n2);
+ if (!upper)
+ return NULL;
+ }
+ }
+ } else if (NCH(n) > 2) {
+ node *n2 = CHILD(n, 2);
+ if (TYPE(n2) == test) {
+ upper = ast_for_expr(c, n2);
+ if (!upper)
+ return NULL;
+ }
+ }
+ ch = CHILD(n, NCH(n) - 1);
+ if (TYPE(ch) == sliceop) {
+ if (NCH(ch) != 1) {
+ ch = CHILD(ch, 1);
+ if (TYPE(ch) == test) {
+ step = ast_for_expr(c, ch);
+ if (!step)
+ return NULL;
+ }
+ }
+ }
+ return Slice(lower, upper, step, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+static expr_ty
+ast_for_binop(struct compiling *c, const node *n)
+ /* Must account for a sequence of expressions.
+ How should A op B op C by represented?
+ BinOp(BinOp(A, op, B), op, C).
+ */
+ int i, nops;
+ expr_ty expr1, expr2, result;
+ operator_ty newoperator;
+ expr1 = ast_for_expr(c, CHILD(n, 0));
+ if (!expr1)
+ return NULL;
+ expr2 = ast_for_expr(c, CHILD(n, 2));
+ if (!expr2)
+ return NULL;
+ newoperator = get_operator(c, CHILD(n, 1));
+ if (!newoperator)
+ return NULL;
+ result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
+ CHILD(n, 2)->n_end_lineno, CHILD(n, 2)->n_end_col_offset,
+ c->c_arena);
+ if (!result)
+ return NULL;
+ nops = (NCH(n) - 1) / 2;
+ for (i = 1; i < nops; i++) {
+ expr_ty tmp_result, tmp;
+ const node* next_oper = CHILD(n, i * 2 + 1);
+ newoperator = get_operator(c, next_oper);
+ if (!newoperator)
+ return NULL;
+ tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
+ if (!tmp)
+ return NULL;
+ tmp_result = BinOp(result, newoperator, tmp,
+ LINENO(n), n->n_col_offset,
+ CHILD(n, i * 2 + 2)->n_end_lineno,
+ CHILD(n, i * 2 + 2)->n_end_col_offset,
+ c->c_arena);
+ if (!tmp_result)
+ return NULL;
+ result = tmp_result;
+ }
+ return result;
+static expr_ty
+ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr, const node *start)
+ /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+ subscriptlist: subscript (',' subscript)* [',']
+ subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
+ */
+ const node *n_copy = n;
+ REQ(n, trailer);
+ if (TYPE(CHILD(n, 0)) == LPAR) {
+ if (NCH(n) == 2)
+ return Call(left_expr, NULL, NULL, LINENO(start), start->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ else
+ return ast_for_call(c, CHILD(n, 1), left_expr,
+ start, CHILD(n, 0), CHILD(n, 2));
+ }
+ else if (TYPE(CHILD(n, 0)) == DOT) {
+ PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1));
+ if (!attr_id)
+ return NULL;
+ return Attribute(left_expr, attr_id, Load,
+ LINENO(start), start->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ else {
+ REQ(CHILD(n, 0), LSQB);
+ REQ(CHILD(n, 2), RSQB);
+ n = CHILD(n, 1);
+ if (NCH(n) == 1) {
+ expr_ty slc = ast_for_slice(c, CHILD(n, 0));
+ if (!slc)
+ return NULL;
+ return Subscript(left_expr, slc, Load, LINENO(start), start->n_col_offset,
+ n_copy->n_end_lineno, n_copy->n_end_col_offset,
+ c->c_arena);
+ }
+ else {
+ int j;
+ expr_ty slc, e;
+ asdl_seq *elts;
+ elts = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
+ if (!elts)
+ return NULL;
+ for (j = 0; j < NCH(n); j += 2) {
+ slc = ast_for_slice(c, CHILD(n, j));
+ if (!slc)
+ return NULL;
+ asdl_seq_SET(elts, j / 2, slc);
+ }
+ e = Tuple(elts, Load, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset,
+ c->c_arena);
+ if (!e)
+ return NULL;
+ return Subscript(left_expr, e,
+ Load, LINENO(start), start->n_col_offset,
+ n_copy->n_end_lineno, n_copy->n_end_col_offset,
+ c->c_arena);
+ }
+ }
+static expr_ty
+ast_for_factor(struct compiling *c, const node *n)
+ expr_ty expression;
+ expression = ast_for_expr(c, CHILD(n, 1));
+ if (!expression)
+ return NULL;
+ switch (TYPE(CHILD(n, 0))) {
+ case PLUS:
+ return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset,
+ c->c_arena);
+ case MINUS:
+ return UnaryOp(USub, expression, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset,
+ c->c_arena);
+ case TILDE:
+ return UnaryOp(Invert, expression, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset,
+ c->c_arena);
+ }
+ PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
+ TYPE(CHILD(n, 0)));
+ return NULL;
+static expr_ty
+ast_for_atom_expr(struct compiling *c, const node *n)
+ int i, nch, start = 0;
+ expr_ty e;
+ REQ(n, atom_expr);
+ nch = NCH(n);
+ if (TYPE(CHILD(n, 0)) == AWAIT) {
+ if (c->c_feature_version < 5) {
+ ast_error(c, n,
+ "Await expressions are only supported in Python 3.5 and greater");
+ return NULL;
+ }
+ start = 1;
+ assert(nch > 1);
+ }
+ e = ast_for_atom(c, CHILD(n, start));
+ if (!e)
+ return NULL;
+ if (nch == 1)
+ return e;
+ if (start && nch == 2) {
+ return Await(e, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ for (i = start + 1; i < nch; i++) {
+ node *ch = CHILD(n, i);
+ if (TYPE(ch) != trailer)
+ break;
+ e = ast_for_trailer(c, ch, e, CHILD(n, start));
+ if (!e)
+ return NULL;
+ }
+ if (start) {
+ /* there was an 'await' */
+ return Await(e, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ else {
+ return e;
+ }
+static expr_ty
+ast_for_power(struct compiling *c, const node *n)
+ /* power: atom trailer* ('**' factor)*
+ */
+ expr_ty e;
+ REQ(n, power);
+ e = ast_for_atom_expr(c, CHILD(n, 0));
+ if (!e)
+ return NULL;
+ if (NCH(n) == 1)
+ return e;
+ if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
+ expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
+ if (!f)
+ return NULL;
+ e = BinOp(e, Pow, f, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ return e;
+static expr_ty
+ast_for_starred(struct compiling *c, const node *n)
+ expr_ty tmp;
+ REQ(n, star_expr);
+ tmp = ast_for_expr(c, CHILD(n, 1));
+ if (!tmp)
+ return NULL;
+ /* The Load context is changed later. */
+ return Starred(tmp, Load, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+/* Do not name a variable 'expr'! Will cause a compile error.
+static expr_ty
+ast_for_expr(struct compiling *c, const node *n)
+ /* handle the full range of simple expressions
+ namedexpr_test: test [':=' test]
+ test: or_test ['if' or_test 'else' test] | lambdef
+ test_nocond: or_test | lambdef_nocond
+ or_test: and_test ('or' and_test)*
+ and_test: not_test ('and' not_test)*
+ not_test: 'not' not_test | comparison
+ comparison: expr (comp_op expr)*
+ expr: xor_expr ('|' xor_expr)*
+ xor_expr: and_expr ('^' and_expr)*
+ and_expr: shift_expr ('&' shift_expr)*
+ shift_expr: arith_expr (('<<'|'>>') arith_expr)*
+ arith_expr: term (('+'|'-') term)*
+ term: factor (('*'|'@'|'/'|'%'|'//') factor)*
+ factor: ('+'|'-'|'~') factor | power
+ power: atom_expr ['**' factor]
+ atom_expr: [AWAIT] atom trailer*
+ yield_expr: 'yield' [yield_arg]
+ */
+ asdl_seq *seq;
+ int i;
+ loop:
+ switch (TYPE(n)) {
+ case namedexpr_test:
+ if (NCH(n) == 3)
+ return ast_for_namedexpr(c, n);
+ /* Fallthrough */
+ case test:
+ case test_nocond:
+ if (TYPE(CHILD(n, 0)) == lambdef ||
+ TYPE(CHILD(n, 0)) == lambdef_nocond)
+ return ast_for_lambdef(c, CHILD(n, 0));
+ else if (NCH(n) > 1)
+ return ast_for_ifexpr(c, n);
+ /* Fallthrough */
+ case or_test:
+ case and_test:
+ if (NCH(n) == 1) {
+ n = CHILD(n, 0);
+ goto loop;
+ }
+ seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
+ if (!seq)
+ return NULL;
+ for (i = 0; i < NCH(n); i += 2) {
+ expr_ty e = ast_for_expr(c, CHILD(n, i));
+ if (!e)
+ return NULL;
+ asdl_seq_SET(seq, i / 2, e);
+ }
+ if (!strcmp(STR(CHILD(n, 1)), "and"))
+ return BoolOp(And, seq, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset,
+ c->c_arena);
+ assert(!strcmp(STR(CHILD(n, 1)), "or"));
+ return BoolOp(Or, seq, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ case not_test:
+ if (NCH(n) == 1) {
+ n = CHILD(n, 0);
+ goto loop;
+ }
+ else {
+ expr_ty expression = ast_for_expr(c, CHILD(n, 1));
+ if (!expression)
+ return NULL;
+ return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset,
+ c->c_arena);
+ }
+ case comparison:
+ if (NCH(n) == 1) {
+ n = CHILD(n, 0);
+ goto loop;
+ }
+ else {
+ expr_ty expression;
+ asdl_int_seq *ops;
+ asdl_seq *cmps;
+ ops = _Py_asdl_int_seq_new(NCH(n) / 2, c->c_arena);
+ if (!ops)
+ return NULL;
+ cmps = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
+ if (!cmps) {
+ return NULL;
+ }
+ for (i = 1; i < NCH(n); i += 2) {
+ cmpop_ty newoperator;
+ newoperator = ast_for_comp_op(c, CHILD(n, i));
+ if (!newoperator) {
+ return NULL;
+ }
+ expression = ast_for_expr(c, CHILD(n, i + 1));
+ if (!expression) {
+ return NULL;
+ }
+ asdl_seq_SET(ops, i / 2, newoperator);
+ asdl_seq_SET(cmps, i / 2, expression);
+ }
+ expression = ast_for_expr(c, CHILD(n, 0));
+ if (!expression) {
+ return NULL;
+ }
+ return Compare(expression, ops, cmps, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ case star_expr:
+ return ast_for_starred(c, n);
+ /* The next five cases all handle BinOps. The main body of code
+ is the same in each case, but the switch turned inside out to
+ reuse the code for each type of operator.
+ */
+ case expr:
+ case xor_expr:
+ case and_expr:
+ case shift_expr:
+ case arith_expr:
+ case term:
+ if (NCH(n) == 1) {
+ n = CHILD(n, 0);
+ goto loop;
+ }
+ return ast_for_binop(c, n);
+ case yield_expr: {
+ node *an = NULL;
+ node *en = NULL;
+ int is_from = 0;
+ expr_ty exp = NULL;
+ if (NCH(n) > 1)
+ an = CHILD(n, 1); /* yield_arg */
+ if (an) {
+ en = CHILD(an, NCH(an) - 1);
+ if (NCH(an) == 2) {
+ is_from = 1;
+ exp = ast_for_expr(c, en);
+ }
+ else
+ exp = ast_for_testlist(c, en);
+ if (!exp)
+ return NULL;
+ }
+ if (is_from)
+ return YieldFrom(exp, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ return Yield(exp, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ case factor:
+ if (NCH(n) == 1) {
+ n = CHILD(n, 0);
+ goto loop;
+ }
+ return ast_for_factor(c, n);
+ case power:
+ return ast_for_power(c, n);
+ default:
+ PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
+ return NULL;
+ }
+ /* should never get here unless if error is set */
+ return NULL;
+static expr_ty
+ast_for_call(struct compiling *c, const node *n, expr_ty func,
+ const node *start, const node *maybegenbeg, const node *closepar)
+ /*
+ arglist: argument (',' argument)* [',']
+ argument: ( test [comp_for] | '*' test | test '=' test | '**' test )
+ */
+ int i, nargs, nkeywords;
+ int ndoublestars;
+ asdl_seq *args;
+ asdl_seq *keywords;
+ REQ(n, arglist);
+ nargs = 0;
+ nkeywords = 0;
+ for (i = 0; i < NCH(n); i++) {
+ node *ch = CHILD(n, i);
+ if (TYPE(ch) == argument) {
+ if (NCH(ch) == 1)
+ nargs++;
+ else if (TYPE(CHILD(ch, 1)) == comp_for) {
+ nargs++;
+ if (!maybegenbeg) {
+ ast_error(c, ch, "invalid syntax");
+ return NULL;
+ }
+ if (NCH(n) > 1) {
+ ast_error(c, ch, "Generator expression must be parenthesized");
+ return NULL;
+ }
+ }
+ else if (TYPE(CHILD(ch, 0)) == STAR)
+ nargs++;
+ else if (TYPE(CHILD(ch, 1)) == COLONEQUAL) {
+ nargs++;
+ }
+ else
+ /* TYPE(CHILD(ch, 0)) == DOUBLESTAR or keyword argument */
+ nkeywords++;
+ }
+ }
+ args = _Py_asdl_seq_new(nargs, c->c_arena);
+ if (!args)
+ return NULL;
+ keywords = _Py_asdl_seq_new(nkeywords, c->c_arena);
+ if (!keywords)
+ return NULL;
+ nargs = 0; /* positional arguments + iterable argument unpackings */
+ nkeywords = 0; /* keyword arguments + keyword argument unpackings */
+ ndoublestars = 0; /* just keyword argument unpackings */
+ for (i = 0; i < NCH(n); i++) {
+ node *ch = CHILD(n, i);
+ if (TYPE(ch) == argument) {
+ expr_ty e;
+ node *chch = CHILD(ch, 0);
+ if (NCH(ch) == 1) {
+ /* a positional argument */
+ if (nkeywords) {
+ if (ndoublestars) {
+ ast_error(c, chch,
+ "positional argument follows "
+ "keyword argument unpacking");
+ }
+ else {
+ ast_error(c, chch,
+ "positional argument follows "
+ "keyword argument");
+ }
+ return NULL;
+ }
+ e = ast_for_expr(c, chch);
+ if (!e)
+ return NULL;
+ asdl_seq_SET(args, nargs++, e);
+ }
+ else if (TYPE(chch) == STAR) {
+ /* an iterable argument unpacking */
+ expr_ty starred;
+ if (ndoublestars) {
+ ast_error(c, chch,
+ "iterable argument unpacking follows "
+ "keyword argument unpacking");
+ return NULL;
+ }
+ e = ast_for_expr(c, CHILD(ch, 1));
+ if (!e)
+ return NULL;
+ starred = Starred(e, Load, LINENO(chch),
+ chch->n_col_offset,
+ e->end_lineno, e->end_col_offset,
+ c->c_arena);
+ if (!starred)
+ return NULL;
+ asdl_seq_SET(args, nargs++, starred);
+ }
+ else if (TYPE(chch) == DOUBLESTAR) {
+ /* a keyword argument unpacking */
+ keyword_ty kw;
+ i++;
+ e = ast_for_expr(c, CHILD(ch, 1));
+ if (!e)
+ return NULL;
+ kw = keyword(NULL, e, chch->n_lineno, chch->n_col_offset,
+ e->end_lineno, e->end_col_offset, c->c_arena);
+ asdl_seq_SET(keywords, nkeywords++, kw);
+ ndoublestars++;
+ }
+ else if (TYPE(CHILD(ch, 1)) == comp_for) {
+ /* the lone generator expression */
+ e = copy_location(ast_for_genexp(c, ch), maybegenbeg, closepar);
+ if (!e)
+ return NULL;
+ asdl_seq_SET(args, nargs++, e);
+ }
+ else if (TYPE(CHILD(ch, 1)) == COLONEQUAL) {
+ /* treat colon equal as positional argument */
+ if (nkeywords) {
+ if (ndoublestars) {
+ ast_error(c, chch,
+ "positional argument follows "
+ "keyword argument unpacking");
+ }
+ else {
+ ast_error(c, chch,
+ "positional argument follows "
+ "keyword argument");
+ }
+ return NULL;
+ }
+ e = ast_for_namedexpr(c, ch);
+ if (!e)
+ return NULL;
+ asdl_seq_SET(args, nargs++, e);
+ }
+ else {
+ /* a keyword argument */
+ keyword_ty kw;
+ identifier key;
+ // To remain LL(1), the grammar accepts any test (basically, any
+ // expression) in the keyword slot of a call site. So, we need
+ // to manually enforce that the keyword is a NAME here.
+ static const int name_tree[] = {
+ test,
+ or_test,
+ and_test,
+ not_test,
+ comparison,
+ expr,
+ xor_expr,
+ and_expr,
+ shift_expr,
+ arith_expr,
+ term,
+ factor,
+ power,
+ atom_expr,
+ atom,
+ 0,
+ };
+ node *expr_node = chch;
+ for (int i = 0; name_tree[i]; i++) {
+ if (TYPE(expr_node) != name_tree[i])
+ break;
+ if (NCH(expr_node) != 1)
+ break;
+ expr_node = CHILD(expr_node, 0);
+ }
+ if (TYPE(expr_node) != NAME) {
+ ast_error(c, chch,
+ "expression cannot contain assignment, "
+ "perhaps you meant \"==\"?");
+ return NULL;
+ }
+ key = new_identifier(STR(expr_node), c);
+ if (key == NULL) {
+ return NULL;
+ }
+ if (forbidden_name(c, key, chch, 1)) {
+ return NULL;
+ }
+ e = ast_for_expr(c, CHILD(ch, 2));
+ if (!e)
+ return NULL;
+ kw = keyword(key, e, chch->n_lineno, chch->n_col_offset,
+ e->end_lineno, e->end_col_offset, c->c_arena);
+ if (!kw)
+ return NULL;
+ asdl_seq_SET(keywords, nkeywords++, kw);
+ }
+ }
+ }
+ return Call(func, args, keywords, LINENO(start), start->n_col_offset,
+ closepar->n_end_lineno, closepar->n_end_col_offset, c->c_arena);
+static expr_ty
+ast_for_testlist(struct compiling *c, const node* n)
+ /* testlist_comp: test (comp_for | (',' test)* [',']) */
+ /* testlist: test (',' test)* [','] */
+ assert(NCH(n) > 0);
+ if (TYPE(n) == testlist_comp) {
+ if (NCH(n) > 1)
+ assert(TYPE(CHILD(n, 1)) != comp_for);
+ }
+ else {
+ assert(TYPE(n) == testlist ||
+ TYPE(n) == testlist_star_expr);
+ }
+ if (NCH(n) == 1)
+ return ast_for_expr(c, CHILD(n, 0));
+ else {
+ asdl_seq *tmp = seq_for_testlist(c, n);
+ if (!tmp)
+ return NULL;
+ return Tuple(tmp, Load, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+static stmt_ty
+ast_for_expr_stmt(struct compiling *c, const node *n)
+ REQ(n, expr_stmt);
+ /* expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
+ [('=' (yield_expr|testlist_star_expr))+ [TYPE_COMMENT]] )
+ annassign: ':' test ['=' (yield_expr|testlist)]
+ testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
+ augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+ '<<=' | '>>=' | '**=' | '//=')
+ test: ... here starts the operator precedence dance
+ */
+ int num = NCH(n);
+ if (num == 1) {
+ expr_ty e = ast_for_testlist(c, CHILD(n, 0));
+ if (!e)
+ return NULL;
+ return Expr(e, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ else if (TYPE(CHILD(n, 1)) == augassign) {
+ expr_ty expr1, expr2;
+ operator_ty newoperator;
+ node *ch = CHILD(n, 0);
+ expr1 = ast_for_testlist(c, ch);
+ if (!expr1)
+ return NULL;
+ /* Augmented assignments can only have a name, a subscript, or an
+ attribute on the left, though, so we have to explicitly check for
+ those. */
+ switch (expr1->kind) {
+ case Name_kind:
+ case Attribute_kind:
+ case Subscript_kind:
+ break;
+ default:
+ ast_error(c, ch, "'%s' is an illegal expression for augmented assignment",
+ get_expr_name(expr1));
+ return NULL;
+ }
+ /* set_context checks that most expressions are not the left side. */
+ if(!set_context(c, expr1, Store, ch)) {
+ return NULL;
+ }
+ ch = CHILD(n, 2);
+ if (TYPE(ch) == testlist)
+ expr2 = ast_for_testlist(c, ch);
+ else
+ expr2 = ast_for_expr(c, ch);
+ if (!expr2)
+ return NULL;
+ newoperator = ast_for_augassign(c, CHILD(n, 1));
+ if (!newoperator)
+ return NULL;
+ return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ else if (TYPE(CHILD(n, 1)) == annassign) {
+ expr_ty expr1, expr2, expr3;
+ node *ch = CHILD(n, 0);
+ node *deep, *ann = CHILD(n, 1);
+ int simple = 1;
+ /* AnnAssigns are only allowed in Python 3.6 or greater */
+ if (c->c_feature_version < 6) {
+ ast_error(c, ch,
+ "Variable annotation syntax is only supported in Python 3.6 and greater");
+ return NULL;
+ }
+ /* we keep track of parens to qualify (x) as expression not name */
+ deep = ch;
+ while (NCH(deep) == 1) {
+ deep = CHILD(deep, 0);
+ }
+ if (NCH(deep) > 0 && TYPE(CHILD(deep, 0)) == LPAR) {
+ simple = 0;
+ }
+ expr1 = ast_for_testlist(c, ch);
+ if (!expr1) {
+ return NULL;
+ }
+ switch (expr1->kind) {
+ case Name_kind:
+ if (forbidden_name(c, expr1->v.Name.id, n, 0)) {
+ return NULL;
+ }
+ expr1->v.Name.ctx = Store;
+ break;
+ case Attribute_kind:
+ if (forbidden_name(c, expr1->v.Attribute.attr, n, 1)) {
+ return NULL;
+ }
+ expr1->v.Attribute.ctx = Store;
+ break;
+ case Subscript_kind:
+ expr1->v.Subscript.ctx = Store;
+ break;
+ case List_kind:
+ ast_error(c, ch,
+ "only single target (not list) can be annotated");
+ return NULL;
+ case Tuple_kind:
+ ast_error(c, ch,
+ "only single target (not tuple) can be annotated");
+ return NULL;
+ default:
+ ast_error(c, ch,
+ "illegal target for annotation");
+ return NULL;
+ }
+ if (expr1->kind != Name_kind) {
+ simple = 0;
+ }
+ ch = CHILD(ann, 1);
+ expr2 = ast_for_expr(c, ch);
+ if (!expr2) {
+ return NULL;
+ }
+ if (NCH(ann) == 2) {
+ return AnnAssign(expr1, expr2, NULL, simple,
+ LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ else {
+ ch = CHILD(ann, 3);
+ if (TYPE(ch) == testlist_star_expr) {
+ expr3 = ast_for_testlist(c, ch);
+ }
+ else {
+ expr3 = ast_for_expr(c, ch);
+ }
+ if (!expr3) {
+ return NULL;
+ }
+ return AnnAssign(expr1, expr2, expr3, simple,
+ LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ }
+ else {
+ int i, nch_minus_type, has_type_comment;
+ asdl_seq *targets;
+ node *value;
+ expr_ty expression;
+ string type_comment;
+ /* a normal assignment */
+ REQ(CHILD(n, 1), EQUAL);
+ has_type_comment = TYPE(CHILD(n, num - 1)) == TYPE_COMMENT;
+ nch_minus_type = num - has_type_comment;
+ targets = _Py_asdl_seq_new(nch_minus_type / 2, c->c_arena);
+ if (!targets)
+ return NULL;
+ for (i = 0; i < nch_minus_type - 2; i += 2) {
+ expr_ty e;
+ node *ch = CHILD(n, i);
+ if (TYPE(ch) == yield_expr) {
+ ast_error(c, ch, "assignment to yield expression not possible");
+ return NULL;
+ }
+ e = ast_for_testlist(c, ch);
+ if (!e)
+ return NULL;
+ /* set context to assign */
+ if (!set_context(c, e, Store, CHILD(n, i)))
+ return NULL;
+ asdl_seq_SET(targets, i / 2, e);
+ }
+ value = CHILD(n, nch_minus_type - 1);
+ if (TYPE(value) == testlist_star_expr)
+ expression = ast_for_testlist(c, value);
+ else
+ expression = ast_for_expr(c, value);
+ if (!expression)
+ return NULL;
+ if (has_type_comment) {
+ type_comment = NEW_TYPE_COMMENT(CHILD(n, nch_minus_type));
+ if (!type_comment)
+ return NULL;
+ }
+ else
+ type_comment = NULL;
+ return Assign(targets, expression, type_comment, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+static asdl_seq *
+ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
+ asdl_seq *seq;
+ int i;
+ expr_ty e;
+ REQ(n, exprlist);
+ seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
+ if (!seq)
+ return NULL;
+ for (i = 0; i < NCH(n); i += 2) {
+ e = ast_for_expr(c, CHILD(n, i));
+ if (!e)
+ return NULL;
+ asdl_seq_SET(seq, i / 2, e);
+ if (context && !set_context(c, e, context, CHILD(n, i)))
+ return NULL;
+ }
+ return seq;
+static stmt_ty
+ast_for_del_stmt(struct compiling *c, const node *n)
+ asdl_seq *expr_list;
+ /* del_stmt: 'del' exprlist */
+ REQ(n, del_stmt);
+ expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
+ if (!expr_list)
+ return NULL;
+ return Delete(expr_list, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+static stmt_ty
+ast_for_flow_stmt(struct compiling *c, const node *n)
+ /*
+ flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
+ | yield_stmt
+ break_stmt: 'break'
+ continue_stmt: 'continue'
+ return_stmt: 'return' [testlist]
+ yield_stmt: yield_expr
+ yield_expr: 'yield' testlist | 'yield' 'from' test
+ raise_stmt: 'raise' [test [',' test [',' test]]]
+ */
+ node *ch;
+ REQ(n, flow_stmt);
+ ch = CHILD(n, 0);
+ switch (TYPE(ch)) {
+ case break_stmt:
+ return Break(LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ case continue_stmt:
+ return Continue(LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ case yield_stmt: { /* will reduce to yield_expr */
+ expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
+ if (!exp)
+ return NULL;
+ return Expr(exp, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ case return_stmt:
+ if (NCH(ch) == 1)
+ return Return(NULL, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ else {
+ expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
+ if (!expression)
+ return NULL;
+ return Return(expression, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ case raise_stmt:
+ if (NCH(ch) == 1)
+ return Raise(NULL, NULL, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ else if (NCH(ch) >= 2) {
+ expr_ty cause = NULL;
+ expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
+ if (!expression)
+ return NULL;
+ if (NCH(ch) == 4) {
+ cause = ast_for_expr(c, CHILD(ch, 3));
+ if (!cause)
+ return NULL;
+ }
+ return Raise(expression, cause, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ /* fall through */
+ default:
+ PyErr_Format(PyExc_SystemError,
+ "unexpected flow_stmt: %d", TYPE(ch));
+ return NULL;
+ }
+static alias_ty
+alias_for_import_name(struct compiling *c, const node *n, int store)
+ /*
+ import_as_name: NAME ['as' NAME]
+ dotted_as_name: dotted_name ['as' NAME]
+ dotted_name: NAME ('.' NAME)*
+ */
+ identifier str, name;
+ loop:
+ switch (TYPE(n)) {
+ case import_as_name: {
+ node *name_node = CHILD(n, 0);
+ str = NULL;
+ name = NEW_IDENTIFIER(name_node);
+ if (!name)
+ return NULL;
+ if (NCH(n) == 3) {
+ node *str_node = CHILD(n, 2);
+ str = NEW_IDENTIFIER(str_node);
+ if (!str)
+ return NULL;
+ if (store && forbidden_name(c, str, str_node, 0))
+ return NULL;
+ }
+ else {
+ if (forbidden_name(c, name, name_node, 0))
+ return NULL;
+ }
+ return alias(name, str, c->c_arena);
+ }
+ case dotted_as_name:
+ if (NCH(n) == 1) {
+ n = CHILD(n, 0);
+ goto loop;
+ }
+ else {
+ node *asname_node = CHILD(n, 2);
+ alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0);
+ if (!a)
+ return NULL;
+ assert(!a->asname);
+ a->asname = NEW_IDENTIFIER(asname_node);
+ if (!a->asname)
+ return NULL;
+ if (forbidden_name(c, a->asname, asname_node, 0))
+ return NULL;
+ return a;
+ }
+ case dotted_name:
+ if (NCH(n) == 1) {
+ node *name_node = CHILD(n, 0);
+ name = NEW_IDENTIFIER(name_node);
+ if (!name)
+ return NULL;
+ if (store && forbidden_name(c, name, name_node, 0))
+ return NULL;
+ return alias(name, NULL, c->c_arena);
+ }
+ else {
+ /* Create a string of the form "a.b.c" */
+ int i;
+ size_t len;
+ char *s;
+ PyObject *uni;
+ len = 0;
+ for (i = 0; i < NCH(n); i += 2)
+ /* length of string plus one for the dot */
+ len += strlen(STR(CHILD(n, i))) + 1;
+ len--; /* the last name doesn't have a dot */
+ str = PyBytes_FromStringAndSize(NULL, len);
+ if (!str)
+ return NULL;
+ s = PyBytes_AS_STRING(str);
+ if (!s)
+ return NULL;
+ for (i = 0; i < NCH(n); i += 2) {
+ char *sch = STR(CHILD(n, i));
+ strcpy(s, STR(CHILD(n, i)));
+ s += strlen(sch);
+ *s++ = '.';
+ }
+ --s;
+ *s = '\0';
+ uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str),
+ PyBytes_GET_SIZE(str),
+ NULL);
+ Py_DECREF(str);
+ if (!uni)
+ return NULL;
+ str = uni;
+ PyUnicode_InternInPlace(&str);
+ if (PyArena_AddPyObject(c->c_arena, str) < 0) {
+ Py_DECREF(str);
+ return NULL;
+ }
+ return alias(str, NULL, c->c_arena);
+ }
+ case STAR:
+ str = PyUnicode_InternFromString("*");
+ if (!str)
+ return NULL;
+ if (PyArena_AddPyObject(c->c_arena, str) < 0) {
+ Py_DECREF(str);
+ return NULL;
+ }
+ return alias(str, NULL, c->c_arena);
+ default:
+ PyErr_Format(PyExc_SystemError,
+ "unexpected import name: %d", TYPE(n));
+ return NULL;
+ }
+static stmt_ty
+ast_for_import_stmt(struct compiling *c, const node *n)
+ /*
+ import_stmt: import_name | import_from
+ import_name: 'import' dotted_as_names
+ import_from: 'from' (('.' | '...')* dotted_name | ('.' | '...')+)
+ 'import' ('*' | '(' import_as_names ')' | import_as_names)
+ */
+ int lineno;
+ int col_offset;
+ int i;
+ asdl_seq *aliases;
+ REQ(n, import_stmt);
+ lineno = LINENO(n);
+ col_offset = n->n_col_offset;
+ n = CHILD(n, 0);
+ if (TYPE(n) == import_name) {
+ n = CHILD(n, 1);
+ REQ(n, dotted_as_names);
+ aliases = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
+ if (!aliases)
+ return NULL;
+ for (i = 0; i < NCH(n); i += 2) {
+ alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
+ if (!import_alias)
+ return NULL;
+ asdl_seq_SET(aliases, i / 2, import_alias);
+ }
+ // Even though n is modified above, the end position is not changed
+ return Import(aliases, lineno, col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ else if (TYPE(n) == import_from) {
+ int n_children;
+ int idx, ndots = 0;
+ const node *n_copy = n;
+ alias_ty mod = NULL;
+ identifier modname = NULL;
+ /* Count the number of dots (for relative imports) and check for the
+ optional module name */
+ for (idx = 1; idx < NCH(n); idx++) {
+ if (TYPE(CHILD(n, idx)) == dotted_name) {
+ mod = alias_for_import_name(c, CHILD(n, idx), 0);
+ if (!mod)
+ return NULL;
+ idx++;
+ break;
+ } else if (TYPE(CHILD(n, idx)) == ELLIPSIS) {
+ /* three consecutive dots are tokenized as one ELLIPSIS */
+ ndots += 3;
+ continue;
+ } else if (TYPE(CHILD(n, idx)) != DOT) {
+ break;
+ }
+ ndots++;
+ }
+ idx++; /* skip over the 'import' keyword */
+ switch (TYPE(CHILD(n, idx))) {
+ case STAR:
+ /* from ... import * */
+ n = CHILD(n, idx);
+ n_children = 1;
+ break;
+ case LPAR:
+ /* from ... import (x, y, z) */
+ n = CHILD(n, idx + 1);
+ n_children = NCH(n);
+ break;
+ case import_as_names:
+ /* from ... import x, y, z */
+ n = CHILD(n, idx);
+ n_children = NCH(n);
+ if (n_children % 2 == 0) {
+ ast_error(c, n,
+ "trailing comma not allowed without"
+ " surrounding parentheses");
+ return NULL;
+ }
+ break;
+ default:
+ ast_error(c, n, "Unexpected node-type in from-import");
+ return NULL;
+ }
+ aliases = _Py_asdl_seq_new((n_children + 1) / 2, c->c_arena);
+ if (!aliases)
+ return NULL;
+ /* handle "from ... import *" special b/c there's no children */
+ if (TYPE(n) == STAR) {
+ alias_ty import_alias = alias_for_import_name(c, n, 1);
+ if (!import_alias)
+ return NULL;
+ asdl_seq_SET(aliases, 0, import_alias);
+ }
+ else {
+ for (i = 0; i < NCH(n); i += 2) {
+ alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
+ if (!import_alias)
+ return NULL;
+ asdl_seq_SET(aliases, i / 2, import_alias);
+ }
+ }
+ if (mod != NULL)
+ modname = mod->name;
+ return ImportFrom(modname, aliases, ndots, lineno, col_offset,
+ n_copy->n_end_lineno, n_copy->n_end_col_offset,
+ c->c_arena);
+ }
+ PyErr_Format(PyExc_SystemError,
+ "unknown import statement: starts with command '%s'",
+ STR(CHILD(n, 0)));
+ return NULL;
+static stmt_ty
+ast_for_global_stmt(struct compiling *c, const node *n)
+ /* global_stmt: 'global' NAME (',' NAME)* */
+ identifier name;
+ asdl_seq *s;
+ int i;
+ REQ(n, global_stmt);
+ s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
+ if (!s)
+ return NULL;
+ for (i = 1; i < NCH(n); i += 2) {
+ name = NEW_IDENTIFIER(CHILD(n, i));
+ if (!name)
+ return NULL;
+ asdl_seq_SET(s, i / 2, name);
+ }
+ return Global(s, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+static stmt_ty
+ast_for_nonlocal_stmt(struct compiling *c, const node *n)
+ /* nonlocal_stmt: 'nonlocal' NAME (',' NAME)* */
+ identifier name;
+ asdl_seq *s;
+ int i;
+ REQ(n, nonlocal_stmt);
+ s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
+ if (!s)
+ return NULL;
+ for (i = 1; i < NCH(n); i += 2) {
+ name = NEW_IDENTIFIER(CHILD(n, i));
+ if (!name)
+ return NULL;
+ asdl_seq_SET(s, i / 2, name);
+ }
+ return Nonlocal(s, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+static stmt_ty
+ast_for_assert_stmt(struct compiling *c, const node *n)
+ /* assert_stmt: 'assert' test [',' test] */
+ REQ(n, assert_stmt);
+ if (NCH(n) == 2) {
+ expr_ty expression = ast_for_expr(c, CHILD(n, 1));
+ if (!expression)
+ return NULL;
+ return Assert(expression, NULL, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ else if (NCH(n) == 4) {
+ expr_ty expr1, expr2;
+ expr1 = ast_for_expr(c, CHILD(n, 1));
+ if (!expr1)
+ return NULL;
+ expr2 = ast_for_expr(c, CHILD(n, 3));
+ if (!expr2)
+ return NULL;
+ return Assert(expr1, expr2, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ PyErr_Format(PyExc_SystemError,
+ "improper number of parts to 'assert' statement: %d",
+ NCH(n));
+ return NULL;
+static asdl_seq *
+ast_for_suite(struct compiling *c, const node *n)
+ /* suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT */
+ asdl_seq *seq;
+ stmt_ty s;
+ int i, total, num, end, pos = 0;
+ node *ch;
+ if (TYPE(n) != func_body_suite) {
+ REQ(n, suite);
+ }
+ total = num_stmts(n);
+ seq = _Py_asdl_seq_new(total, c->c_arena);
+ if (!seq)
+ return NULL;
+ if (TYPE(CHILD(n, 0)) == simple_stmt) {
+ n = CHILD(n, 0);
+ /* simple_stmt always ends with a NEWLINE,
+ and may have a trailing SEMI
+ */
+ end = NCH(n) - 1;
+ if (TYPE(CHILD(n, end - 1)) == SEMI)
+ end--;
+ /* loop by 2 to skip semi-colons */
+ for (i = 0; i < end; i += 2) {
+ ch = CHILD(n, i);
+ s = ast_for_stmt(c, ch);
+ if (!s)
+ return NULL;
+ asdl_seq_SET(seq, pos++, s);
+ }
+ }
+ else {
+ i = 2;
+ if (TYPE(CHILD(n, 1)) == TYPE_COMMENT) {
+ i += 2;
+ }
+ for (; i < (NCH(n) - 1); i++) {
+ ch = CHILD(n, i);
+ REQ(ch, stmt);
+ num = num_stmts(ch);
+ if (num == 1) {
+ /* small_stmt or compound_stmt with only one child */
+ s = ast_for_stmt(c, ch);
+ if (!s)
+ return NULL;
+ asdl_seq_SET(seq, pos++, s);
+ }
+ else {
+ int j;
+ ch = CHILD(ch, 0);
+ REQ(ch, simple_stmt);
+ for (j = 0; j < NCH(ch); j += 2) {
+ /* statement terminates with a semi-colon ';' */
+ if (NCH(CHILD(ch, j)) == 0) {
+ assert((j + 1) == NCH(ch));
+ break;
+ }
+ s = ast_for_stmt(c, CHILD(ch, j));
+ if (!s)
+ return NULL;
+ asdl_seq_SET(seq, pos++, s);
+ }
+ }
+ }
+ }
+ assert(pos == seq->size);
+ return seq;
+static void
+get_last_end_pos(asdl_seq *s, int *end_lineno, int *end_col_offset)
+ Py_ssize_t tot = asdl_seq_LEN(s);
+ // There must be no empty suites.
+ assert(tot > 0);
+ stmt_ty last = asdl_seq_GET(s, tot - 1);
+ *end_lineno = last->end_lineno;
+ *end_col_offset = last->end_col_offset;
+static stmt_ty
+ast_for_if_stmt(struct compiling *c, const node *n)
+ /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
+ ['else' ':' suite]
+ */
+ char *s;
+ int end_lineno, end_col_offset;
+ REQ(n, if_stmt);
+ if (NCH(n) == 4) {
+ expr_ty expression;
+ asdl_seq *suite_seq;
+ expression = ast_for_expr(c, CHILD(n, 1));
+ if (!expression)
+ return NULL;
+ suite_seq = ast_for_suite(c, CHILD(n, 3));
+ if (!suite_seq)
+ return NULL;
+ get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
+ return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
+ end_lineno, end_col_offset, c->c_arena);
+ }
+ s = STR(CHILD(n, 4));
+ /* s[2], the third character in the string, will be
+ 's' for el_s_e, or
+ 'i' for el_i_f
+ */
+ if (s[2] == 's') {
+ expr_ty expression;
+ asdl_seq *seq1, *seq2;
+ expression = ast_for_expr(c, CHILD(n, 1));
+ if (!expression)
+ return NULL;
+ seq1 = ast_for_suite(c, CHILD(n, 3));
+ if (!seq1)
+ return NULL;
+ seq2 = ast_for_suite(c, CHILD(n, 6));
+ if (!seq2)
+ return NULL;
+ get_last_end_pos(seq2, &end_lineno, &end_col_offset);
+ return If(expression, seq1, seq2, LINENO(n), n->n_col_offset,
+ end_lineno, end_col_offset, c->c_arena);
+ }
+ else if (s[2] == 'i') {
+ int i, n_elif, has_else = 0;
+ expr_ty expression;
+ asdl_seq *suite_seq;
+ asdl_seq *orelse = NULL;
+ n_elif = NCH(n) - 4;
+ /* must reference the child n_elif+1 since 'else' token is third,
+ not fourth, child from the end. */
+ if (TYPE(CHILD(n, (n_elif + 1))) == NAME
+ && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
+ has_else = 1;
+ n_elif -= 3;
+ }
+ n_elif /= 4;
+ if (has_else) {
+ asdl_seq *suite_seq2;
+ orelse = _Py_asdl_seq_new(1, c->c_arena);
+ if (!orelse)
+ return NULL;
+ expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
+ if (!expression)
+ return NULL;
+ suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4));
+ if (!suite_seq)
+ return NULL;
+ suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
+ if (!suite_seq2)
+ return NULL;
+ get_last_end_pos(suite_seq2, &end_lineno, &end_col_offset);
+ asdl_seq_SET(orelse, 0,
+ If(expression, suite_seq, suite_seq2,
+ LINENO(CHILD(n, NCH(n) - 7)),
+ CHILD(n, NCH(n) - 7)->n_col_offset,
+ end_lineno, end_col_offset, c->c_arena));
+ /* the just-created orelse handled the last elif */
+ n_elif--;
+ }
+ for (i = 0; i < n_elif; i++) {
+ int off = 5 + (n_elif - i - 1) * 4;
+ asdl_seq *newobj = _Py_asdl_seq_new(1, c->c_arena);
+ if (!newobj)
+ return NULL;
+ expression = ast_for_expr(c, CHILD(n, off));
+ if (!expression)
+ return NULL;
+ suite_seq = ast_for_suite(c, CHILD(n, off + 2));
+ if (!suite_seq)
+ return NULL;
+ if (orelse != NULL) {
+ get_last_end_pos(orelse, &end_lineno, &end_col_offset);
+ } else {
+ get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
+ }
+ asdl_seq_SET(newobj, 0,
+ If(expression, suite_seq, orelse,
+ LINENO(CHILD(n, off - 1)),
+ CHILD(n, off - 1)->n_col_offset,
+ end_lineno, end_col_offset, c->c_arena));
+ orelse = newobj;
+ }
+ expression = ast_for_expr(c, CHILD(n, 1));
+ if (!expression)
+ return NULL;
+ suite_seq = ast_for_suite(c, CHILD(n, 3));
+ if (!suite_seq)
+ return NULL;
+ get_last_end_pos(orelse, &end_lineno, &end_col_offset);
+ return If(expression, suite_seq, orelse,
+ LINENO(n), n->n_col_offset,
+ end_lineno, end_col_offset, c->c_arena);
+ }
+ PyErr_Format(PyExc_SystemError,
+ "unexpected token in 'if' statement: %s", s);
+ return NULL;
+static stmt_ty
+ast_for_while_stmt(struct compiling *c, const node *n)
+ /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
+ REQ(n, while_stmt);
+ int end_lineno, end_col_offset;
+ if (NCH(n) == 4) {
+ expr_ty expression;
+ asdl_seq *suite_seq;
+ expression = ast_for_expr(c, CHILD(n, 1));
+ if (!expression)
+ return NULL;
+ suite_seq = ast_for_suite(c, CHILD(n, 3));
+ if (!suite_seq)
+ return NULL;
+ get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
+ return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
+ end_lineno, end_col_offset, c->c_arena);
+ }
+ else if (NCH(n) == 7) {
+ expr_ty expression;
+ asdl_seq *seq1, *seq2;
+ expression = ast_for_expr(c, CHILD(n, 1));
+ if (!expression)
+ return NULL;
+ seq1 = ast_for_suite(c, CHILD(n, 3));
+ if (!seq1)
+ return NULL;
+ seq2 = ast_for_suite(c, CHILD(n, 6));
+ if (!seq2)
+ return NULL;
+ get_last_end_pos(seq2, &end_lineno, &end_col_offset);
+ return While(expression, seq1, seq2, LINENO(n), n->n_col_offset,
+ end_lineno, end_col_offset, c->c_arena);
+ }
+ PyErr_Format(PyExc_SystemError,
+ "wrong number of tokens for 'while' statement: %d",
+ NCH(n));
+ return NULL;
+static stmt_ty
+ast_for_for_stmt(struct compiling *c, const node *n0, bool is_async)
+ const node * const n = is_async ? CHILD(n0, 1) : n0;
+ asdl_seq *_target, *seq = NULL, *suite_seq;
+ expr_ty expression;
+ expr_ty target, first;
+ const node *node_target;
+ int end_lineno, end_col_offset;
+ int has_type_comment;
+ string type_comment;
+ if (is_async && c->c_feature_version < 5) {
+ ast_error(c, n,
+ "Async for loops are only supported in Python 3.5 and greater");
+ return NULL;
+ }
+ /* for_stmt: 'for' exprlist 'in' testlist ':' [TYPE_COMMENT] suite ['else' ':' suite] */
+ REQ(n, for_stmt);
+ has_type_comment = TYPE(CHILD(n, 5)) == TYPE_COMMENT;
+ if (NCH(n) == 9 + has_type_comment) {
+ seq = ast_for_suite(c, CHILD(n, 8 + has_type_comment));
+ if (!seq)
+ return NULL;
+ }
+ node_target = CHILD(n, 1);
+ _target = ast_for_exprlist(c, node_target, Store);
+ if (!_target)
+ return NULL;
+ /* Check the # of children rather than the length of _target, since
+ for x, in ... has 1 element in _target, but still requires a Tuple. */
+ first = (expr_ty)asdl_seq_GET(_target, 0);
+ if (NCH(node_target) == 1)
+ target = first;
+ else
+ target = Tuple(_target, Store, first->lineno, first->col_offset,
+ node_target->n_end_lineno, node_target->n_end_col_offset,
+ c->c_arena);
+ expression = ast_for_testlist(c, CHILD(n, 3));
+ if (!expression)
+ return NULL;
+ suite_seq = ast_for_suite(c, CHILD(n, 5 + has_type_comment));
+ if (!suite_seq)
+ return NULL;
+ if (seq != NULL) {
+ get_last_end_pos(seq, &end_lineno, &end_col_offset);
+ } else {
+ get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
+ }
+ if (has_type_comment) {
+ type_comment = NEW_TYPE_COMMENT(CHILD(n, 5));
+ if (!type_comment)
+ return NULL;
+ }
+ else
+ type_comment = NULL;
+ if (is_async)
+ return AsyncFor(target, expression, suite_seq, seq, type_comment,
+ LINENO(n0), n0->n_col_offset,
+ end_lineno, end_col_offset, c->c_arena);
+ else
+ return For(target, expression, suite_seq, seq, type_comment,
+ LINENO(n), n->n_col_offset,
+ end_lineno, end_col_offset, c->c_arena);
+static excepthandler_ty
+ast_for_except_clause(struct compiling *c, const node *exc, node *body)
+ /* except_clause: 'except' [test ['as' test]] */
+ int end_lineno, end_col_offset;
+ REQ(exc, except_clause);
+ REQ(body, suite);
+ if (NCH(exc) == 1) {
+ asdl_seq *suite_seq = ast_for_suite(c, body);
+ if (!suite_seq)
+ return NULL;
+ get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
+ return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc),
+ exc->n_col_offset,
+ end_lineno, end_col_offset, c->c_arena);
+ }
+ else if (NCH(exc) == 2) {
+ expr_ty expression;
+ asdl_seq *suite_seq;
+ expression = ast_for_expr(c, CHILD(exc, 1));
+ if (!expression)
+ return NULL;
+ suite_seq = ast_for_suite(c, body);
+ if (!suite_seq)
+ return NULL;
+ get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
+ return ExceptHandler(expression, NULL, suite_seq, LINENO(exc),
+ exc->n_col_offset,
+ end_lineno, end_col_offset, c->c_arena);
+ }
+ else if (NCH(exc) == 4) {
+ asdl_seq *suite_seq;
+ expr_ty expression;
+ identifier e = NEW_IDENTIFIER(CHILD(exc, 3));
+ if (!e)
+ return NULL;
+ if (forbidden_name(c, e, CHILD(exc, 3), 0))
+ return NULL;
+ expression = ast_for_expr(c, CHILD(exc, 1));
+ if (!expression)
+ return NULL;
+ suite_seq = ast_for_suite(c, body);
+ if (!suite_seq)
+ return NULL;
+ get_last_end_pos(suite_seq, &end_lineno, &end_col_offset);
+ return ExceptHandler(expression, e, suite_seq, LINENO(exc),
+ exc->n_col_offset,
+ end_lineno, end_col_offset, c->c_arena);
+ }
+ PyErr_Format(PyExc_SystemError,
+ "wrong number of children for 'except' clause: %d",
+ NCH(exc));
+ return NULL;
+static stmt_ty
+ast_for_try_stmt(struct compiling *c, const node *n)
+ const int nch = NCH(n);
+ int end_lineno, end_col_offset, n_except = (nch - 3)/3;
+ asdl_seq *body, *handlers = NULL, *orelse = NULL, *finally = NULL;
+ excepthandler_ty last_handler;
+ REQ(n, try_stmt);
+ body = ast_for_suite(c, CHILD(n, 2));
+ if (body == NULL)
+ return NULL;
+ if (TYPE(CHILD(n, nch - 3)) == NAME) {
+ if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
+ if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
+ /* we can assume it's an "else",
+ because nch >= 9 for try-else-finally and
+ it would otherwise have a type of except_clause */
+ orelse = ast_for_suite(c, CHILD(n, nch - 4));
+ if (orelse == NULL)
+ return NULL;
+ n_except--;
+ }
+ finally = ast_for_suite(c, CHILD(n, nch - 1));
+ if (finally == NULL)
+ return NULL;
+ n_except--;
+ }
+ else {
+ /* we can assume it's an "else",
+ otherwise it would have a type of except_clause */
+ orelse = ast_for_suite(c, CHILD(n, nch - 1));
+ if (orelse == NULL)
+ return NULL;
+ n_except--;
+ }
+ }
+ else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
+ ast_error(c, n, "malformed 'try' statement");
+ return NULL;
+ }
+ if (n_except > 0) {
+ int i;
+ /* process except statements to create a try ... except */
+ handlers = _Py_asdl_seq_new(n_except, c->c_arena);
+ if (handlers == NULL)
+ return NULL;
+ for (i = 0; i < n_except; i++) {
+ excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
+ CHILD(n, 5 + i * 3));
+ if (!e)
+ return NULL;
+ asdl_seq_SET(handlers, i, e);
+ }
+ }
+ assert(finally != NULL || asdl_seq_LEN(handlers));
+ if (finally != NULL) {
+ // finally is always last
+ get_last_end_pos(finally, &end_lineno, &end_col_offset);
+ } else if (orelse != NULL) {
+ // otherwise else is last
+ get_last_end_pos(orelse, &end_lineno, &end_col_offset);
+ } else {
+ // inline the get_last_end_pos logic due to layout mismatch
+ last_handler = (excepthandler_ty) asdl_seq_GET(handlers, n_except - 1);
+ end_lineno = last_handler->end_lineno;
+ end_col_offset = last_handler->end_col_offset;
+ }
+ return Try(body, handlers, orelse, finally, LINENO(n), n->n_col_offset,
+ end_lineno, end_col_offset, c->c_arena);
+/* with_item: test ['as' expr] */
+static withitem_ty
+ast_for_with_item(struct compiling *c, const node *n)
+ expr_ty context_expr, optional_vars = NULL;
+ REQ(n, with_item);
+ context_expr = ast_for_expr(c, CHILD(n, 0));
+ if (!context_expr)
+ return NULL;
+ if (NCH(n) == 3) {
+ optional_vars = ast_for_expr(c, CHILD(n, 2));
+ if (!optional_vars) {
+ return NULL;
+ }
+ if (!set_context(c, optional_vars, Store, n)) {
+ return NULL;
+ }
+ }
+ return withitem(context_expr, optional_vars, c->c_arena);
+/* with_stmt: 'with' with_item (',' with_item)* ':' [TYPE_COMMENT] suite */
+static stmt_ty
+ast_for_with_stmt(struct compiling *c, const node *n0, bool is_async)
+ const node * const n = is_async ? CHILD(n0, 1) : n0;
+ int i, n_items, nch_minus_type, has_type_comment, end_lineno, end_col_offset;
+ asdl_seq *items, *body;
+ string type_comment;
+ if (is_async && c->c_feature_version < 5) {
+ ast_error(c, n,
+ "Async with statements are only supported in Python 3.5 and greater");
+ return NULL;
+ }
+ REQ(n, with_stmt);
+ has_type_comment = TYPE(CHILD(n, NCH(n) - 2)) == TYPE_COMMENT;
+ nch_minus_type = NCH(n) - has_type_comment;
+ n_items = (nch_minus_type - 2) / 2;
+ items = _Py_asdl_seq_new(n_items, c->c_arena);
+ if (!items)
+ return NULL;
+ for (i = 1; i < nch_minus_type - 2; i += 2) {
+ withitem_ty item = ast_for_with_item(c, CHILD(n, i));
+ if (!item)
+ return NULL;
+ asdl_seq_SET(items, (i - 1) / 2, item);
+ }
+ body = ast_for_suite(c, CHILD(n, NCH(n) - 1));
+ if (!body)
+ return NULL;
+ get_last_end_pos(body, &end_lineno, &end_col_offset);
+ if (has_type_comment) {
+ type_comment = NEW_TYPE_COMMENT(CHILD(n, NCH(n) - 2));
+ if (!type_comment)
+ return NULL;
+ }
+ else
+ type_comment = NULL;
+ if (is_async)
+ return AsyncWith(items, body, type_comment, LINENO(n0), n0->n_col_offset,
+ end_lineno, end_col_offset, c->c_arena);
+ else
+ return With(items, body, type_comment, LINENO(n), n->n_col_offset,
+ end_lineno, end_col_offset, c->c_arena);
+static stmt_ty
+ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
+ /* classdef: 'class' NAME ['(' arglist ')'] ':' suite */
+ PyObject *classname;
+ asdl_seq *s;
+ expr_ty call;
+ int end_lineno, end_col_offset;
+ REQ(n, classdef);
+ if (NCH(n) == 4) { /* class NAME ':' suite */
+ s = ast_for_suite(c, CHILD(n, 3));
+ if (!s)
+ return NULL;
+ get_last_end_pos(s, &end_lineno, &end_col_offset);
+ classname = NEW_IDENTIFIER(CHILD(n, 1));
+ if (!classname)
+ return NULL;
+ if (forbidden_name(c, classname, CHILD(n, 3), 0))
+ return NULL;
+ return ClassDef(classname, NULL, NULL, s, decorator_seq,
+ LINENO(n), n->n_col_offset,
+ end_lineno, end_col_offset, c->c_arena);
+ }
+ if (TYPE(CHILD(n, 3)) == RPAR) { /* class NAME '(' ')' ':' suite */
+ s = ast_for_suite(c, CHILD(n, 5));
+ if (!s)
+ return NULL;
+ get_last_end_pos(s, &end_lineno, &end_col_offset);
+ classname = NEW_IDENTIFIER(CHILD(n, 1));
+ if (!classname)
+ return NULL;
+ if (forbidden_name(c, classname, CHILD(n, 3), 0))
+ return NULL;
+ return ClassDef(classname, NULL, NULL, s, decorator_seq,
+ LINENO(n), n->n_col_offset,
+ end_lineno, end_col_offset, c->c_arena);
+ }
+ /* class NAME '(' arglist ')' ':' suite */
+ /* build up a fake Call node so we can extract its pieces */
+ {
+ PyObject *dummy_name;
+ expr_ty dummy;
+ dummy_name = NEW_IDENTIFIER(CHILD(n, 1));
+ if (!dummy_name)
+ return NULL;
+ dummy = Name(dummy_name, Load, LINENO(n), n->n_col_offset,
+ CHILD(n, 1)->n_end_lineno, CHILD(n, 1)->n_end_col_offset,
+ c->c_arena);
+ call = ast_for_call(c, CHILD(n, 3), dummy,
+ CHILD(n, 1), NULL, CHILD(n, 4));
+ if (!call)
+ return NULL;
+ }
+ s = ast_for_suite(c, CHILD(n, 6));
+ if (!s)
+ return NULL;
+ get_last_end_pos(s, &end_lineno, &end_col_offset);
+ classname = NEW_IDENTIFIER(CHILD(n, 1));
+ if (!classname)
+ return NULL;
+ if (forbidden_name(c, classname, CHILD(n, 1), 0))
+ return NULL;
+ return ClassDef(classname, call->v.Call.args, call->v.Call.keywords, s,
+ decorator_seq, LINENO(n), n->n_col_offset,
+ end_lineno, end_col_offset, c->c_arena);
+static stmt_ty
+ast_for_stmt(struct compiling *c, const node *n)
+ if (TYPE(n) == stmt) {
+ assert(NCH(n) == 1);
+ n = CHILD(n, 0);
+ }
+ if (TYPE(n) == simple_stmt) {
+ assert(num_stmts(n) == 1);
+ n = CHILD(n, 0);
+ }
+ if (TYPE(n) == small_stmt) {
+ n = CHILD(n, 0);
+ /* small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt
+ | import_stmt | global_stmt | nonlocal_stmt | assert_stmt
+ */
+ switch (TYPE(n)) {
+ case expr_stmt:
+ return ast_for_expr_stmt(c, n);
+ case del_stmt:
+ return ast_for_del_stmt(c, n);
+ case pass_stmt:
+ return Pass(LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ case flow_stmt:
+ return ast_for_flow_stmt(c, n);
+ case import_stmt:
+ return ast_for_import_stmt(c, n);
+ case global_stmt:
+ return ast_for_global_stmt(c, n);
+ case nonlocal_stmt:
+ return ast_for_nonlocal_stmt(c, n);
+ case assert_stmt:
+ return ast_for_assert_stmt(c, n);
+ default:
+ PyErr_Format(PyExc_SystemError,
+ "unhandled small_stmt: TYPE=%d NCH=%d\n",
+ TYPE(n), NCH(n));
+ return NULL;
+ }
+ }
+ else {
+ /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
+ | funcdef | classdef | decorated | async_stmt
+ */
+ node *ch = CHILD(n, 0);
+ REQ(n, compound_stmt);
+ switch (TYPE(ch)) {
+ case if_stmt:
+ return ast_for_if_stmt(c, ch);
+ case while_stmt:
+ return ast_for_while_stmt(c, ch);
+ case for_stmt:
+ return ast_for_for_stmt(c, ch, 0);
+ case try_stmt:
+ return ast_for_try_stmt(c, ch);
+ case with_stmt:
+ return ast_for_with_stmt(c, ch, 0);
+ case funcdef:
+ return ast_for_funcdef(c, ch, NULL);
+ case classdef:
+ return ast_for_classdef(c, ch, NULL);
+ case decorated:
+ return ast_for_decorated(c, ch);
+ case async_stmt:
+ return ast_for_async_stmt(c, ch);
+ default:
+ PyErr_Format(PyExc_SystemError,
+ "unhandled compound_stmt: TYPE=%d NCH=%d\n",
+ TYPE(n), NCH(n));
+ return NULL;
+ }
+ }
+static PyObject *
+parsenumber_raw(struct compiling *c, const char *s)
+ const char *end;
+ long x;
+ double dx;
+ Py_complex compl;
+ int imflag;
+ assert(s != NULL);
+ errno = 0;
+ end = s + strlen(s) - 1;
+ imflag = *end == 'j' || *end == 'J';
+ if (s[0] == '0') {
+ x = (long) PyOS_strtoul(s, (char **)&end, 0);
+ if (x < 0 && errno == 0) {
+ return PyLong_FromString(s, (char **)0, 0);
+ }
+ }
+ else
+ x = PyOS_strtol(s, (char **)&end, 0);
+ if (*end == '\0') {
+ if (errno != 0)
+ return PyLong_FromString(s, (char **)0, 0);
+ return PyLong_FromLong(x);
+ }
+ /* XXX Huge floats may silently fail */
+ if (imflag) {
+ compl.real = 0.;
+ compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
+ if (compl.imag == -1.0 && PyErr_Occurred())
+ return NULL;
+ return PyComplex_FromCComplex(compl);
+ }
+ else
+ {
+ dx = PyOS_string_to_double(s, NULL, NULL);
+ if (dx == -1.0 && PyErr_Occurred())
+ return NULL;
+ return PyFloat_FromDouble(dx);
+ }
+static PyObject *
+parsenumber(struct compiling *c, const char *s)
+ char *dup, *end;
+ PyObject *res = NULL;
+ assert(s != NULL);
+ if (strchr(s, '_') == NULL) {
+ return parsenumber_raw(c, s);
+ }
+ /* Create a duplicate without underscores. */
+ dup = PyMem_Malloc(strlen(s) + 1);
+ if (dup == NULL) {
+ return PyErr_NoMemory();
+ }
+ end = dup;
+ for (; *s; s++) {
+ if (*s != '_') {
+ *end++ = *s;
+ }
+ }
+ *end = '\0';
+ res = parsenumber_raw(c, dup);
+ PyMem_Free(dup);
+ return res;
+static PyObject *
+decode_utf8(struct compiling *c, const char **sPtr, const char *end)
+ const char *s, *t;
+ t = s = *sPtr;
+ /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
+ while (s < end && (*s & 0x80)) s++;
+ *sPtr = s;
+ return PyUnicode_DecodeUTF8(t, s - t, NULL);
+static int
+warn_invalid_escape_sequence(struct compiling *c, const node *n,
+ unsigned char first_invalid_escape_char)
+ PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c",
+ first_invalid_escape_char);
+ if (msg == NULL) {
+ return -1;
+ }
+ if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg,
+ c->c_filename, LINENO(n),
+ NULL, NULL) < 0)
+ {
+ if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
+ /* Replace the DeprecationWarning exception with a SyntaxError
+ to get a more accurate error report */
+ PyErr_Clear();
+ ast_error(c, n, "%U", msg);
+ }
+ Py_DECREF(msg);
+ return -1;
+ }
+ Py_DECREF(msg);
+ return 0;
+static PyObject *
+decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
+ size_t len)
+ PyObject *v, *u;
+ char *buf;
+ char *p;
+ const char *end;
+ /* check for integer overflow */
+ if (len > SIZE_MAX / 6)
+ return NULL;
+ /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
+ "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
+ u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
+ if (u == NULL)
+ return NULL;
+ p = buf = PyBytes_AsString(u);
+ end = s + len;
+ while (s < end) {
+ if (*s == '\\') {
+ *p++ = *s++;
+ if (s >= end || *s & 0x80) {
+ strcpy(p, "u005c");
+ p += 5;
+ if (s >= end)
+ break;
+ }
+ }
+ if (*s & 0x80) { /* XXX inefficient */
+ PyObject *w;
+ int kind;
+ const void *data;
+ Py_ssize_t len, i;
+ w = decode_utf8(c, &s, end);
+ if (w == NULL) {
+ Py_DECREF(u);
+ return NULL;
+ }
+ kind = PyUnicode_KIND(w);
+ data = PyUnicode_DATA(w);
+ len = PyUnicode_GET_LENGTH(w);
+ for (i = 0; i < len; i++) {
+ Py_UCS4 chr = PyUnicode_READ(kind, data, i);
+ sprintf(p, "\\U%08x", chr);
+ p += 10;
+ }
+ /* Should be impossible to overflow */
+ assert(p - buf <= PyBytes_GET_SIZE(u));
+ Py_DECREF(w);
+ } else {
+ *p++ = *s++;
+ }
+ }
+ len = p - buf;
+ s = buf;
+ const char *first_invalid_escape;
+ v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL, &first_invalid_escape);
+ if (v != NULL && first_invalid_escape != NULL) {
+ if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
+ /* We have not decref u before because first_invalid_escape points
+ inside u. */
+ Py_XDECREF(u);
+ Py_DECREF(v);
+ return NULL;
+ }
+ }
+ Py_XDECREF(u);
+ return v;
+static PyObject *
+decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
+ size_t len)
+ const char *first_invalid_escape;
+ PyObject *result = _PyBytes_DecodeEscape(s, len, NULL,
+ &first_invalid_escape);
+ if (result == NULL)
+ return NULL;
+ if (first_invalid_escape != NULL) {
+ if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
+ Py_DECREF(result);
+ return NULL;
+ }
+ }
+ return result;
+/* Shift locations for the given node and all its children by adding `lineno`
+ and `col_offset` to existing locations. */
+static void fstring_shift_node_locations(node *n, int lineno, int col_offset)
+ n->n_col_offset = n->n_col_offset + col_offset;
+ n->n_end_col_offset = n->n_end_col_offset + col_offset;
+ for (int i = 0; i < NCH(n); ++i) {
+ if (n->n_lineno && n->n_lineno < CHILD(n, i)->n_lineno) {
+ /* Shifting column offsets unnecessary if there's been newlines. */
+ col_offset = 0;
+ }
+ fstring_shift_node_locations(CHILD(n, i), lineno, col_offset);
+ }
+ n->n_lineno = n->n_lineno + lineno;
+ n->n_end_lineno = n->n_end_lineno + lineno;
+/* Fix locations for the given node and its children.
+ `parent` is the enclosing node.
+ `n` is the node which locations are going to be fixed relative to parent.
+ `expr_str` is the child node's string representation, including braces.
+static void
+fstring_fix_node_location(const node *parent, node *n, char *expr_str)
+ char *substr = NULL;
+ char *start;
+ int lines = LINENO(parent) - 1;
+ int cols = parent->n_col_offset;
+ /* Find the full fstring to fix location information in `n`. */
+ while (parent && parent->n_type != STRING)
+ parent = parent->n_child;
+ if (parent && parent->n_str) {
+ substr = strstr(parent->n_str, expr_str);
+ if (substr) {
+ start = substr;
+ while (start > parent->n_str) {
+ if (start[0] == '\n')
+ break;
+ start--;
+ }
+ cols += (int)(substr - start);
+ /* adjust the start based on the number of newlines encountered
+ before the f-string expression */
+ for (char* p = parent->n_str; p < substr; p++) {
+ if (*p == '\n') {
+ lines++;
+ }
+ }
+ }
+ }
+ fstring_shift_node_locations(n, lines, cols);
+/* Compile this expression in to an expr_ty. Add parens around the
+ expression, in order to allow leading spaces in the expression. */
+static expr_ty
+fstring_compile_expr(const char *expr_start, const char *expr_end,
+ struct compiling *c, const node *n)
+ node *mod_n;
+ mod_ty mod;
+ char *str;
+ Py_ssize_t len;
+ const char *s;
+ assert(expr_end >= expr_start);
+ assert(*(expr_start-1) == '{');
+ assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':' ||
+ *expr_end == '=');
+ /* If the substring is all whitespace, it's an error. We need to catch this
+ here, and not when we call PyParser_SimpleParseStringFlagsFilename,
+ because turning the expression '' in to '()' would go from being invalid
+ to valid. */
+ for (s = expr_start; s != expr_end; s++) {
+ char c = *s;
+ /* The Python parser ignores only the following whitespace
+ characters (\r already is converted to \n). */
+ if (!(c == ' ' || c == '\t' || c == '\n' || c == '\f')) {
+ break;
+ }
+ }
+ if (s == expr_end) {
+ ast_error(c, n, "f-string: empty expression not allowed");
+ return NULL;
+ }
+ len = expr_end - expr_start;
+ /* Allocate 3 extra bytes: open paren, close paren, null byte. */
+ str = PyMem_Malloc(len + 3);
+ if (str == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ str[0] = '(';
+ memcpy(str+1, expr_start, len);
+ str[len+1] = ')';
+ str[len+2] = 0;
+ PyCompilerFlags cf = _PyCompilerFlags_INIT;
+ cf.cf_flags = PyCF_ONLY_AST;
+ mod_n = PyParser_SimpleParseStringFlagsFilename(str, "<fstring>",
+ Py_eval_input, 0);
+ if (!mod_n) {
+ PyMem_Free(str);
+ return NULL;
+ }
+ /* Reuse str to find the correct column offset. */
+ str[0] = '{';
+ str[len+1] = '}';
+ fstring_fix_node_location(n, mod_n, str);
+ mod = PyAST_FromNode(mod_n, &cf, "<fstring>", c->c_arena);
+ PyMem_Free(str);
+ PyNode_Free(mod_n);
+ if (!mod)
+ return NULL;
+ return mod->v.Expression.body;
+/* Return -1 on error.
+ Return 0 if we reached the end of the literal.
+ Return 1 if we haven't reached the end of the literal, but we want
+ the caller to process the literal up to this point. Used for
+ doubled braces.
+static int
+fstring_find_literal(const char **str, const char *end, int raw,
+ PyObject **literal, int recurse_lvl,
+ struct compiling *c, const node *n)
+ /* Get any literal string. It ends when we hit an un-doubled left
+ brace (which isn't part of a unicode name escape such as
+ "\N{EULER CONSTANT}"), or the end of the string. */
+ const char *s = *str;
+ const char *literal_start = s;
+ int result = 0;
+ assert(*literal == NULL);
+ while (s < end) {
+ char ch = *s++;
+ if (!raw && ch == '\\' && s < end) {
+ ch = *s++;
+ if (ch == 'N') {
+ if (s < end && *s++ == '{') {
+ while (s < end && *s++ != '}') {
+ }
+ continue;
+ }
+ break;
+ }
+ if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) {
+ return -1;
+ }
+ }
+ if (ch == '{' || ch == '}') {
+ /* Check for doubled braces, but only at the top level. If
+ we checked at every level, then f'{0:{3}}' would fail
+ with the two closing braces. */
+ if (recurse_lvl == 0) {
+ if (s < end && *s == ch) {
+ /* We're going to tell the caller that the literal ends
+ here, but that they should continue scanning. But also
+ skip over the second brace when we resume scanning. */
+ *str = s + 1;
+ result = 1;
+ goto done;
+ }
+ /* Where a single '{' is the start of a new expression, a
+ single '}' is not allowed. */
+ if (ch == '}') {
+ *str = s - 1;
+ ast_error(c, n, "f-string: single '}' is not allowed");
+ return -1;
+ }
+ }
+ /* We're either at a '{', which means we're starting another
+ expression; or a '}', which means we're at the end of this
+ f-string (for a nested format_spec). */
+ s--;
+ break;
+ }
+ }
+ *str = s;
+ assert(s <= end);
+ assert(s == end || *s == '{' || *s == '}');
+ if (literal_start != s) {
+ if (raw)
+ *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
+ s - literal_start,
+ else
+ *literal = decode_unicode_with_escapes(c, n, literal_start,
+ s - literal_start);
+ if (!*literal)
+ return -1;
+ }
+ return result;
+/* Forward declaration because parsing is recursive. */
+static expr_ty
+fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
+ struct compiling *c, const node *n);
+/* Parse the f-string at *str, ending at end. We know *str starts an
+ expression (so it must be a '{'). Returns the FormattedValue node, which
+ includes the expression, conversion character, format_spec expression, and
+ optionally the text of the expression (if = is used).
+ Note that I don't do a perfect job here: I don't make sure that a
+ closing brace doesn't match an opening paren, for example. It
+ doesn't need to error on all invalid expressions, just correctly
+ find the end of all valid ones. Any errors inside the expression
+ will be caught when we parse it later.
+ *expression is set to the expression. For an '=' "debug" expression,
+ *expr_text is set to the debug text (the original text of the expression,
+ including the '=' and any whitespace around it, as a string object). If
+ not a debug expression, *expr_text set to NULL. */
+static int
+fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
+ PyObject **expr_text, expr_ty *expression,
+ struct compiling *c, const node *n)
+ /* Return -1 on error, else 0. */
+ const char *expr_start;
+ const char *expr_end;
+ expr_ty simple_expression;
+ expr_ty format_spec = NULL; /* Optional format specifier. */
+ int conversion = -1; /* The conversion char. Use default if not
+ specified, or !r if using = and no format
+ spec. */
+ /* 0 if we're not in a string, else the quote char we're trying to
+ match (single or double quote). */
+ char quote_char = 0;
+ /* If we're inside a string, 1=normal, 3=triple-quoted. */
+ int string_type = 0;
+ /* Keep track of nesting level for braces/parens/brackets in
+ expressions. */
+ Py_ssize_t nested_depth = 0;
+ char parenstack[MAXLEVEL];
+ *expr_text = NULL;
+ /* Can only nest one level deep. */
+ if (recurse_lvl >= 2) {
+ ast_error(c, n, "f-string: expressions nested too deeply");
+ goto error;
+ }
+ /* The first char must be a left brace, or we wouldn't have gotten
+ here. Skip over it. */
+ assert(**str == '{');
+ *str += 1;
+ expr_start = *str;
+ for (; *str < end; (*str)++) {
+ char ch;
+ /* Loop invariants. */
+ assert(nested_depth >= 0);
+ assert(*str >= expr_start && *str < end);
+ if (quote_char)
+ assert(string_type == 1 || string_type == 3);
+ else
+ assert(string_type == 0);
+ ch = **str;
+ /* Nowhere inside an expression is a backslash allowed. */
+ if (ch == '\\') {
+ /* Error: can't include a backslash character, inside
+ parens or strings or not. */
+ ast_error(c, n,
+ "f-string expression part "
+ "cannot include a backslash");
+ goto error;
+ }
+ if (quote_char) {
+ /* We're inside a string. See if we're at the end. */
+ /* This code needs to implement the same non-error logic
+ as tok_get from tokenizer.c, at the letter_quote
+ label. To actually share that code would be a
+ nightmare. But, it's unlikely to change and is small,
+ so duplicate it here. Note we don't need to catch all
+ of the errors, since they'll be caught when parsing the
+ expression. We just need to match the non-error
+ cases. Thus we can ignore \n in single-quoted strings,
+ for example. Or non-terminated strings. */
+ if (ch == quote_char) {
+ /* Does this match the string_type (single or triple
+ quoted)? */
+ if (string_type == 3) {
+ if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
+ /* We're at the end of a triple quoted string. */
+ *str += 2;
+ string_type = 0;
+ quote_char = 0;
+ continue;
+ }
+ } else {
+ /* We're at the end of a normal string. */
+ quote_char = 0;
+ string_type = 0;
+ continue;
+ }
+ }
+ } else if (ch == '\'' || ch == '"') {
+ /* Is this a triple quoted string? */
+ if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
+ string_type = 3;
+ *str += 2;
+ } else {
+ /* Start of a normal string. */
+ string_type = 1;
+ }
+ /* Start looking for the end of the string. */
+ quote_char = ch;
+ } else if (ch == '[' || ch == '{' || ch == '(') {
+ if (nested_depth >= MAXLEVEL) {
+ ast_error(c, n, "f-string: too many nested parenthesis");
+ goto error;
+ }
+ parenstack[nested_depth] = ch;
+ nested_depth++;
+ } else if (ch == '#') {
+ /* Error: can't include a comment character, inside parens
+ or not. */
+ ast_error(c, n, "f-string expression part cannot include '#'");
+ goto error;
+ } else if (nested_depth == 0 &&
+ (ch == '!' || ch == ':' || ch == '}' ||
+ ch == '=' || ch == '>' || ch == '<')) {
+ /* See if there's a next character. */
+ if (*str+1 < end) {
+ char next = *(*str+1);
+ /* For "!=". since '=' is not an allowed conversion character,
+ nothing is lost in this test. */
+ if ((ch == '!' && next == '=') || /* != */
+ (ch == '=' && next == '=') || /* == */
+ (ch == '<' && next == '=') || /* <= */
+ (ch == '>' && next == '=') /* >= */
+ ) {
+ *str += 1;
+ continue;
+ }
+ /* Don't get out of the loop for these, if they're single
+ chars (not part of 2-char tokens). If by themselves, they
+ don't end an expression (unlike say '!'). */
+ if (ch == '>' || ch == '<') {
+ continue;
+ }
+ }
+ /* Normal way out of this loop. */
+ break;
+ } else if (ch == ']' || ch == '}' || ch == ')') {
+ if (!nested_depth) {
+ ast_error(c, n, "f-string: unmatched '%c'", ch);
+ goto error;
+ }
+ nested_depth--;
+ int opening = parenstack[nested_depth];
+ if (!((opening == '(' && ch == ')') ||
+ (opening == '[' && ch == ']') ||
+ (opening == '{' && ch == '}')))
+ {
+ ast_error(c, n,
+ "f-string: closing parenthesis '%c' "
+ "does not match opening parenthesis '%c'",
+ ch, opening);
+ goto error;
+ }
+ } else {
+ /* Just consume this char and loop around. */
+ }
+ }
+ expr_end = *str;
+ /* If we leave this loop in a string or with mismatched parens, we
+ don't care. We'll get a syntax error when compiling the
+ expression. But, we can produce a better error message, so
+ let's just do that.*/
+ if (quote_char) {
+ ast_error(c, n, "f-string: unterminated string");
+ goto error;
+ }
+ if (nested_depth) {
+ int opening = parenstack[nested_depth - 1];
+ ast_error(c, n, "f-string: unmatched '%c'", opening);
+ goto error;
+ }
+ if (*str >= end)
+ goto unexpected_end_of_string;
+ /* Compile the expression as soon as possible, so we show errors
+ related to the expression before errors related to the
+ conversion or format_spec. */
+ simple_expression = fstring_compile_expr(expr_start, expr_end, c, n);
+ if (!simple_expression)
+ goto error;
+ /* Check for =, which puts the text value of the expression in
+ expr_text. */
+ if (**str == '=') {
+ if (c->c_feature_version < 8) {
+ ast_error(c, n,
+ "f-string: self documenting expressions are "
+ "only supported in Python 3.8 and greater");
+ goto error;
+ }
+ *str += 1;
+ /* Skip over ASCII whitespace. No need to test for end of string
+ here, since we know there's at least a trailing quote somewhere
+ ahead. */
+ while (Py_ISSPACE(**str)) {
+ *str += 1;
+ }
+ /* Set *expr_text to the text of the expression. */
+ *expr_text = PyUnicode_FromStringAndSize(expr_start, *str-expr_start);
+ if (!*expr_text) {
+ goto error;
+ }
+ }
+ /* Check for a conversion char, if present. */
+ if (**str == '!') {
+ *str += 1;
+ if (*str >= end)
+ goto unexpected_end_of_string;
+ conversion = **str;
+ *str += 1;
+ /* Validate the conversion. */
+ if (!(conversion == 's' || conversion == 'r' || conversion == 'a')) {
+ ast_error(c, n,
+ "f-string: invalid conversion character: "
+ "expected 's', 'r', or 'a'");
+ goto error;
+ }
+ }
+ /* Check for the format spec, if present. */
+ if (*str >= end)
+ goto unexpected_end_of_string;
+ if (**str == ':') {
+ *str += 1;
+ if (*str >= end)
+ goto unexpected_end_of_string;
+ /* Parse the format spec. */
+ format_spec = fstring_parse(str, end, raw, recurse_lvl+1, c, n);
+ if (!format_spec)
+ goto error;
+ }
+ if (*str >= end || **str != '}')
+ goto unexpected_end_of_string;
+ /* We're at a right brace. Consume it. */
+ assert(*str < end);
+ assert(**str == '}');
+ *str += 1;
+ /* If we're in = mode (detected by non-NULL expr_text), and have no format
+ spec and no explicit conversion, set the conversion to 'r'. */
+ if (*expr_text && format_spec == NULL && conversion == -1) {
+ conversion = 'r';
+ }
+ /* And now create the FormattedValue node that represents this
+ entire expression with the conversion and format spec. */
+ *expression = FormattedValue(simple_expression, conversion,
+ format_spec, LINENO(n),
+ n->n_col_offset, n->n_end_lineno,
+ n->n_end_col_offset, c->c_arena);
+ if (!*expression)
+ goto error;
+ return 0;
+ ast_error(c, n, "f-string: expecting '}'");
+ /* Falls through to error. */
+ Py_XDECREF(*expr_text);
+ return -1;
+/* Return -1 on error.
+ Return 0 if we have a literal (possible zero length) and an
+ expression (zero length if at the end of the string.
+ Return 1 if we have a literal, but no expression, and we want the
+ caller to call us again. This is used to deal with doubled
+ braces.
+ When called multiple times on the string 'a{{b{0}c', this function
+ will return:
+ 1. the literal 'a{' with no expression, and a return value
+ of 1. Despite the fact that there's no expression, the return
+ value of 1 means we're not finished yet.
+ 2. the literal 'b' and the expression '0', with a return value of
+ 0. The fact that there's an expression means we're not finished.
+ 3. literal 'c' with no expression and a return value of 0. The
+ combination of the return value of 0 with no expression means
+ we're finished.
+static int
+fstring_find_literal_and_expr(const char **str, const char *end, int raw,
+ int recurse_lvl, PyObject **literal,
+ PyObject **expr_text, expr_ty *expression,
+ struct compiling *c, const node *n)
+ int result;
+ assert(*literal == NULL && *expression == NULL);
+ /* Get any literal string. */
+ result = fstring_find_literal(str, end, raw, literal, recurse_lvl, c, n);
+ if (result < 0)
+ goto error;
+ assert(result == 0 || result == 1);
+ if (result == 1)
+ /* We have a literal, but don't look at the expression. */
+ return 1;
+ if (*str >= end || **str == '}')
+ /* We're at the end of the string or the end of a nested
+ f-string: no expression. The top-level error case where we
+ expect to be at the end of the string but we're at a '}' is
+ handled later. */
+ return 0;
+ /* We must now be the start of an expression, on a '{'. */
+ assert(**str == '{');
+ if (fstring_find_expr(str, end, raw, recurse_lvl, expr_text,
+ expression, c, n) < 0)
+ goto error;
+ return 0;
+ Py_CLEAR(*literal);
+ return -1;
+typedef struct {
+ /* Incrementally build an array of expr_ty, so be used in an
+ asdl_seq. Cache some small but reasonably sized number of
+ expr_ty's, and then after that start dynamically allocating,
+ doubling the number allocated each time. Note that the f-string
+ f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one
+ Constant for the literal 'a'. So you add expr_ty's about twice as
+ fast as you add expressions in an f-string. */
+ Py_ssize_t allocated; /* Number we've allocated. */
+ Py_ssize_t size; /* Number we've used. */
+ expr_ty *p; /* Pointer to the memory we're actually
+ using. Will point to 'data' until we
+ start dynamically allocating. */
+ expr_ty data[EXPRLIST_N_CACHED];
+} ExprList;
+#ifdef NDEBUG
+#define ExprList_check_invariants(l)
+static void
+ExprList_check_invariants(ExprList *l)
+ /* Check our invariants. Make sure this object is "live", and
+ hasn't been deallocated. */
+ assert(l->size >= 0);
+ assert(l->p != NULL);
+ if (l->size <= EXPRLIST_N_CACHED)
+ assert(l->data == l->p);
+static void
+ExprList_Init(ExprList *l)
+ l->allocated = EXPRLIST_N_CACHED;
+ l->size = 0;
+ /* Until we start allocating dynamically, p points to data. */
+ l->p = l->data;
+ ExprList_check_invariants(l);
+static int
+ExprList_Append(ExprList *l, expr_ty exp)
+ ExprList_check_invariants(l);
+ if (l->size >= l->allocated) {
+ /* We need to alloc (or realloc) the memory. */
+ Py_ssize_t new_size = l->allocated * 2;
+ /* See if we've ever allocated anything dynamically. */
+ if (l->p == l->data) {
+ Py_ssize_t i;
+ /* We're still using the cached data. Switch to
+ alloc-ing. */
+ l->p = PyMem_Malloc(sizeof(expr_ty) * new_size);
+ if (!l->p)
+ return -1;
+ /* Copy the cached data into the new buffer. */
+ for (i = 0; i < l->size; i++)
+ l->p[i] = l->data[i];
+ } else {
+ /* Just realloc. */
+ expr_ty *tmp = PyMem_Realloc(l->p, sizeof(expr_ty) * new_size);
+ if (!tmp) {
+ PyMem_Free(l->p);
+ l->p = NULL;
+ return -1;
+ }
+ l->p = tmp;
+ }
+ l->allocated = new_size;
+ assert(l->allocated == 2 * l->size);
+ }
+ l->p[l->size++] = exp;
+ ExprList_check_invariants(l);
+ return 0;
+static void
+ExprList_Dealloc(ExprList *l)
+ ExprList_check_invariants(l);
+ /* If there's been an error, or we've never dynamically allocated,
+ do nothing. */
+ if (!l->p || l->p == l->data) {
+ /* Do nothing. */
+ } else {
+ /* We have dynamically allocated. Free the memory. */
+ PyMem_Free(l->p);
+ }
+ l->p = NULL;
+ l->size = -1;
+static asdl_seq *
+ExprList_Finish(ExprList *l, PyArena *arena)
+ asdl_seq *seq;
+ ExprList_check_invariants(l);
+ /* Allocate the asdl_seq and copy the expressions in to it. */
+ seq = _Py_asdl_seq_new(l->size, arena);
+ if (seq) {
+ Py_ssize_t i;
+ for (i = 0; i < l->size; i++)
+ asdl_seq_SET(seq, i, l->p[i]);
+ }
+ ExprList_Dealloc(l);
+ return seq;
+/* The FstringParser is designed to add a mix of strings and
+ f-strings, and concat them together as needed. Ultimately, it
+ generates an expr_ty. */
+typedef struct {
+ PyObject *last_str;
+ ExprList expr_list;
+ int fmode;
+} FstringParser;
+#ifdef NDEBUG
+#define FstringParser_check_invariants(state)
+static void
+FstringParser_check_invariants(FstringParser *state)
+ if (state->last_str)
+ assert(PyUnicode_CheckExact(state->last_str));
+ ExprList_check_invariants(&state->expr_list);
+static void
+FstringParser_Init(FstringParser *state)
+ state->last_str = NULL;
+ state->fmode = 0;
+ ExprList_Init(&state->expr_list);
+ FstringParser_check_invariants(state);
+static void
+FstringParser_Dealloc(FstringParser *state)
+ FstringParser_check_invariants(state);
+ Py_XDECREF(state->last_str);
+ ExprList_Dealloc(&state->expr_list);
+/* Constants for the following */
+static PyObject *u_kind;
+/* Compute 'kind' field for string Constant (either 'u' or None) */
+static PyObject *
+make_kind(struct compiling *c, const node *n)
+ char *s = NULL;
+ PyObject *kind = NULL;
+ /* Find the first string literal, if any */
+ while (TYPE(n) != STRING) {
+ if (NCH(n) == 0)
+ return NULL;
+ n = CHILD(n, 0);
+ }
+ /* If it starts with 'u', return a PyUnicode "u" string */
+ s = STR(n);
+ if (s && *s == 'u') {
+ if (!u_kind) {
+ u_kind = PyUnicode_InternFromString("u");
+ if (!u_kind)
+ return NULL;
+ }
+ kind = u_kind;
+ if (PyArena_AddPyObject(c->c_arena, kind) < 0) {
+ return NULL;
+ }
+ Py_INCREF(kind);
+ }
+ return kind;
+/* Make a Constant node, but decref the PyUnicode object being added. */
+static expr_ty
+make_str_node_and_del(PyObject **str, struct compiling *c, const node* n)
+ PyObject *s = *str;
+ PyObject *kind = NULL;
+ *str = NULL;
+ assert(PyUnicode_CheckExact(s));
+ if (PyArena_AddPyObject(c->c_arena, s) < 0) {
+ Py_DECREF(s);
+ return NULL;
+ }
+ kind = make_kind(c, n);
+ if (kind == NULL && PyErr_Occurred())
+ return NULL;
+ return Constant(s, kind, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+/* Add a non-f-string (that is, a regular literal string). str is
+ decref'd. */
+static int
+FstringParser_ConcatAndDel(FstringParser *state, PyObject *str)
+ FstringParser_check_invariants(state);
+ assert(PyUnicode_CheckExact(str));
+ if (PyUnicode_GET_LENGTH(str) == 0) {
+ Py_DECREF(str);
+ return 0;
+ }
+ if (!state->last_str) {
+ /* We didn't have a string before, so just remember this one. */
+ state->last_str = str;
+ } else {
+ /* Concatenate this with the previous string. */
+ PyUnicode_AppendAndDel(&state->last_str, str);
+ if (!state->last_str)
+ return -1;
+ }
+ FstringParser_check_invariants(state);
+ return 0;
+/* Parse an f-string. The f-string is in *str to end, with no
+ 'f' or quotes. */
+static int
+FstringParser_ConcatFstring(FstringParser *state, const char **str,
+ const char *end, int raw, int recurse_lvl,
+ struct compiling *c, const node *n)
+ FstringParser_check_invariants(state);
+ state->fmode = 1;
+ /* Parse the f-string. */
+ while (1) {
+ PyObject *literal = NULL;
+ PyObject *expr_text = NULL;
+ expr_ty expression = NULL;
+ /* If there's a zero length literal in front of the
+ expression, literal will be NULL. If we're at the end of
+ the f-string, expression will be NULL (unless result == 1,
+ see below). */
+ int result = fstring_find_literal_and_expr(str, end, raw, recurse_lvl,
+ &literal, &expr_text,
+ &expression, c, n);
+ if (result < 0)
+ return -1;
+ /* Add the literal, if any. */
+ if (literal && FstringParser_ConcatAndDel(state, literal) < 0) {
+ Py_XDECREF(expr_text);
+ return -1;
+ }
+ /* Add the expr_text, if any. */
+ if (expr_text && FstringParser_ConcatAndDel(state, expr_text) < 0) {
+ return -1;
+ }
+ /* We've dealt with the literal and expr_text, their ownership has
+ been transferred to the state object. Don't look at them again. */
+ /* See if we should just loop around to get the next literal
+ and expression, while ignoring the expression this
+ time. This is used for un-doubling braces, as an
+ optimization. */
+ if (result == 1)
+ continue;
+ if (!expression)
+ /* We're done with this f-string. */
+ break;
+ /* We know we have an expression. Convert any existing string
+ to a Constant node. */
+ if (!state->last_str) {
+ /* Do nothing. No previous literal. */
+ } else {
+ /* Convert the existing last_str literal to a Constant node. */
+ expr_ty str = make_str_node_and_del(&state->last_str, c, n);
+ if (!str || ExprList_Append(&state->expr_list, str) < 0)
+ return -1;
+ }
+ if (ExprList_Append(&state->expr_list, expression) < 0)
+ return -1;
+ }
+ /* If recurse_lvl is zero, then we must be at the end of the
+ string. Otherwise, we must be at a right brace. */
+ if (recurse_lvl == 0 && *str < end-1) {
+ ast_error(c, n, "f-string: unexpected end of string");
+ return -1;
+ }
+ if (recurse_lvl != 0 && **str != '}') {
+ ast_error(c, n, "f-string: expecting '}'");
+ return -1;
+ }
+ FstringParser_check_invariants(state);
+ return 0;
+/* Convert the partial state reflected in last_str and expr_list to an
+ expr_ty. The expr_ty can be a Constant, or a JoinedStr. */
+static expr_ty
+FstringParser_Finish(FstringParser *state, struct compiling *c,
+ const node *n)
+ asdl_seq *seq;
+ FstringParser_check_invariants(state);
+ /* If we're just a constant string with no expressions, return
+ that. */
+ if (!state->fmode) {
+ assert(!state->expr_list.size);
+ if (!state->last_str) {
+ /* Create a zero length string. */
+ state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
+ if (!state->last_str)
+ goto error;
+ }
+ return make_str_node_and_del(&state->last_str, c, n);
+ }
+ /* Create a Constant node out of last_str, if needed. It will be the
+ last node in our expression list. */
+ if (state->last_str) {
+ expr_ty str = make_str_node_and_del(&state->last_str, c, n);
+ if (!str || ExprList_Append(&state->expr_list, str) < 0)
+ goto error;
+ }
+ /* This has already been freed. */
+ assert(state->last_str == NULL);
+ seq = ExprList_Finish(&state->expr_list, c->c_arena);
+ if (!seq)
+ goto error;
+ return JoinedStr(seq, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ FstringParser_Dealloc(state);
+ return NULL;
+/* Given an f-string (with no 'f' or quotes) that's in *str and ends
+ at end, parse it into an expr_ty. Return NULL on error. Adjust
+ str to point past the parsed portion. */
+static expr_ty
+fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
+ struct compiling *c, const node *n)
+ FstringParser state;
+ FstringParser_Init(&state);
+ if (FstringParser_ConcatFstring(&state, str, end, raw, recurse_lvl,
+ c, n) < 0) {
+ FstringParser_Dealloc(&state);
+ return NULL;
+ }
+ return FstringParser_Finish(&state, c, n);
+/* n is a Python string literal, including the bracketing quote
+ characters, and r, b, u, &/or f prefixes (if any), and embedded
+ escape sequences (if any). parsestr parses it, and sets *result to
+ decoded Python string object. If the string is an f-string, set
+ *fstr and *fstrlen to the unparsed string object. Return 0 if no
+ errors occurred.
+static int
+parsestr(struct compiling *c, const node *n, int *bytesmode, int *rawmode,
+ PyObject **result, const char **fstr, Py_ssize_t *fstrlen)
+ size_t len;
+ const char *s = STR(n);
+ int quote = Py_CHARMASK(*s);
+ int fmode = 0;
+ *bytesmode = 0;
+ *rawmode = 0;
+ *result = NULL;
+ *fstr = NULL;
+ if (Py_ISALPHA(quote)) {
+ while (!*bytesmode || !*rawmode) {
+ if (quote == 'b' || quote == 'B') {
+ quote = *++s;
+ *bytesmode = 1;
+ }
+ else if (quote == 'u' || quote == 'U') {
+ quote = *++s;
+ }
+ else if (quote == 'r' || quote == 'R') {
+ quote = *++s;
+ *rawmode = 1;
+ }
+ else if (quote == 'f' || quote == 'F') {
+ quote = *++s;
+ fmode = 1;
+ }
+ else {
+ break;
+ }
+ }
+ }
+ /* fstrings are only allowed in Python 3.6 and greater */
+ if (fmode && c->c_feature_version < 6) {
+ ast_error(c, n, "Format strings are only supported in Python 3.6 and greater");
+ return -1;
+ }
+ if (fmode && *bytesmode) {
+ PyErr_BadInternalCall();
+ return -1;
+ }
+ if (quote != '\'' && quote != '\"') {
+ PyErr_BadInternalCall();
+ return -1;
+ }
+ /* Skip the leading quote char. */
+ s++;
+ len = strlen(s);
+ if (len > INT_MAX) {
+ PyErr_SetString(PyExc_OverflowError,
+ "string to parse is too long");
+ return -1;
+ }
+ if (s[--len] != quote) {
+ /* Last quote char must match the first. */
+ PyErr_BadInternalCall();
+ return -1;
+ }
+ if (len >= 4 && s[0] == quote && s[1] == quote) {
+ /* A triple quoted string. We've already skipped one quote at
+ the start and one at the end of the string. Now skip the
+ two at the start. */
+ s += 2;
+ len -= 2;
+ /* And check that the last two match. */
+ if (s[--len] != quote || s[--len] != quote) {
+ PyErr_BadInternalCall();
+ return -1;
+ }
+ }
+ if (fmode) {
+ /* Just return the bytes. The caller will parse the resulting
+ string. */
+ *fstr = s;
+ *fstrlen = len;
+ return 0;
+ }
+ /* Not an f-string. */
+ /* Avoid invoking escape decoding routines if possible. */
+ *rawmode = *rawmode || strchr(s, '\\') == NULL;
+ if (*bytesmode) {
+ /* Disallow non-ASCII characters. */
+ const char *ch;
+ for (ch = s; *ch; ch++) {
+ if (Py_CHARMASK(*ch) >= 0x80) {
+ ast_error(c, n,
+ "bytes can only contain ASCII "
+ "literal characters.");
+ return -1;
+ }
+ }
+ if (*rawmode)
+ *result = PyBytes_FromStringAndSize(s, len);
+ else
+ *result = decode_bytes_with_escapes(c, n, s, len);
+ } else {
+ if (*rawmode)
+ *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
+ else
+ *result = decode_unicode_with_escapes(c, n, s, len);
+ }
+ return *result == NULL ? -1 : 0;
+/* Accepts a STRING+ atom, and produces an expr_ty node. Run through
+ each STRING atom, and process it as needed. For bytes, just
+ concatenate them together, and the result will be a Constant node. For
+ normal strings and f-strings, concatenate them together. The result
+ will be a Constant node if there were no f-strings; a FormattedValue
+ node if there's just an f-string (with no leading or trailing
+ literals), or a JoinedStr node if there are multiple f-strings or
+ any literals involved. */
+static expr_ty
+parsestrplus(struct compiling *c, const node *n)
+ int bytesmode = 0;
+ PyObject *bytes_str = NULL;
+ int i;
+ FstringParser state;
+ FstringParser_Init(&state);
+ for (i = 0; i < NCH(n); i++) {
+ int this_bytesmode;
+ int this_rawmode;
+ PyObject *s;
+ const char *fstr;
+ Py_ssize_t fstrlen = -1; /* Silence a compiler warning. */
+ if (parsestr(c, CHILD(n, i), &this_bytesmode, &this_rawmode, &s,
+ &fstr, &fstrlen) != 0)
+ goto error;
+ /* Check that we're not mixing bytes with unicode. */
+ if (i != 0 && bytesmode != this_bytesmode) {
+ ast_error(c, n, "cannot mix bytes and nonbytes literals");
+ /* s is NULL if the current string part is an f-string. */
+ Py_XDECREF(s);
+ goto error;
+ }
+ bytesmode = this_bytesmode;
+ if (fstr != NULL) {
+ int result;
+ assert(s == NULL && !bytesmode);
+ /* This is an f-string. Parse and concatenate it. */
+ result = FstringParser_ConcatFstring(&state, &fstr, fstr+fstrlen,
+ this_rawmode, 0, c, n);
+ if (result < 0)
+ goto error;
+ } else {
+ /* A string or byte string. */
+ assert(s != NULL && fstr == NULL);
+ assert(bytesmode ? PyBytes_CheckExact(s) :
+ PyUnicode_CheckExact(s));
+ if (bytesmode) {
+ /* For bytes, concat as we go. */
+ if (i == 0) {
+ /* First time, just remember this value. */
+ bytes_str = s;
+ } else {
+ PyBytes_ConcatAndDel(&bytes_str, s);
+ if (!bytes_str)
+ goto error;
+ }
+ } else {
+ /* This is a regular string. Concatenate it. */
+ if (FstringParser_ConcatAndDel(&state, s) < 0)
+ goto error;
+ }
+ }
+ }
+ if (bytesmode) {
+ /* Just return the bytes object and we're done. */
+ if (PyArena_AddPyObject(c->c_arena, bytes_str) < 0)
+ goto error;
+ return Constant(bytes_str, NULL, LINENO(n), n->n_col_offset,
+ n->n_end_lineno, n->n_end_col_offset, c->c_arena);
+ }
+ /* We're not a bytes string, bytes_str should never have been set. */
+ assert(bytes_str == NULL);
+ return FstringParser_Finish(&state, c, n);
+ Py_XDECREF(bytes_str);
+ FstringParser_Dealloc(&state);
+ return NULL;
+PyObject *
+_PyAST_GetDocString(asdl_seq *body)
+ if (!asdl_seq_LEN(body)) {
+ return NULL;
+ }
+ stmt_ty st = (stmt_ty)asdl_seq_GET(body, 0);
+ if (st->kind != Expr_kind) {
+ return NULL;
+ }
+ expr_ty e = st->v.Expr.value;
+ if (e->kind == Constant_kind && PyUnicode_CheckExact(e->v.Constant.value)) {
+ return e->v.Constant.value;
+ }
+ return NULL;