summaryrefslogtreecommitdiffstats
path: root/contrib/tools/python3/src/Parser/tokenizer.c
diff options
context:
space:
mode:
authorshadchin <[email protected]>2022-02-10 16:44:39 +0300
committerDaniil Cherednik <[email protected]>2022-02-10 16:44:39 +0300
commite9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch)
tree64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/tools/python3/src/Parser/tokenizer.c
parent2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff)
Restoring authorship annotation for <[email protected]>. Commit 2 of 2.
Diffstat (limited to 'contrib/tools/python3/src/Parser/tokenizer.c')
-rw-r--r--contrib/tools/python3/src/Parser/tokenizer.c678
1 files changed, 339 insertions, 339 deletions
diff --git a/contrib/tools/python3/src/Parser/tokenizer.c b/contrib/tools/python3/src/Parser/tokenizer.c
index 4bda4986f5d..41bfdb853c7 100644
--- a/contrib/tools/python3/src/Parser/tokenizer.c
+++ b/contrib/tools/python3/src/Parser/tokenizer.c
@@ -1,7 +1,7 @@
/* Tokenizer implementation */
-#define PY_SSIZE_T_CLEAN
+#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include <ctype.h>
@@ -42,9 +42,9 @@ static int tok_nextc(struct tok_state *tok);
static void tok_backup(struct tok_state *tok, int c);
-/* Spaces in this constant are treated as "zero or more spaces or tabs" when
- tokenizing. */
-static const char* type_comment_prefix = "# type: ";
+/* Spaces in this constant are treated as "zero or more spaces or tabs" when
+ tokenizing. */
+static const char* type_comment_prefix = "# type: ";
/* Create and initialize a new tok_state structure */
@@ -55,9 +55,9 @@ tok_new(void)
sizeof(struct tok_state));
if (tok == NULL)
return NULL;
- tok->buf = tok->cur = tok->inp = NULL;
- tok->start = NULL;
- tok->end = NULL;
+ tok->buf = tok->cur = tok->inp = NULL;
+ tok->start = NULL;
+ tok->end = NULL;
tok->done = E_OK;
tok->fp = NULL;
tok->input = NULL;
@@ -80,13 +80,13 @@ tok_new(void)
tok->filename = NULL;
tok->decoding_readline = NULL;
tok->decoding_buffer = NULL;
- tok->type_comments = 0;
+ tok->type_comments = 0;
+
+ tok->async_hacks = 0;
+ tok->async_def = 0;
+ tok->async_def_indent = 0;
+ tok->async_def_nl = 0;
- tok->async_hacks = 0;
- tok->async_def = 0;
- tok->async_def_indent = 0;
- tok->async_def_nl = 0;
-
return tok;
}
@@ -109,9 +109,9 @@ error_ret(struct tok_state *tok) /* XXX */
tok->decoding_erred = 1;
if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
PyMem_FREE(tok->buf);
- tok->buf = tok->cur = tok->inp = NULL;
- tok->start = NULL;
- tok->end = NULL;
+ tok->buf = tok->cur = tok->inp = NULL;
+ tok->start = NULL;
+ tok->end = NULL;
tok->done = E_DECODE;
return NULL; /* as if it were EOF */
}
@@ -548,7 +548,7 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
"Non-UTF-8 code starting with '\\x%.2x' "
"in file %U on line %i, "
"but no encoding declared; "
- "see https://python.org/dev/peps/pep-0263/ for details",
+ "see https://python.org/dev/peps/pep-0263/ for details",
badchar, tok->filename, tok->lineno + 1);
return error_ret(tok);
}
@@ -649,14 +649,14 @@ translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
}
*current = '\0';
final_length = current - buf + 1;
- if (final_length < needed_length && final_length) {
+ if (final_length < needed_length && final_length) {
/* should never fail */
- char* result = PyMem_REALLOC(buf, final_length);
- if (result == NULL) {
- PyMem_FREE(buf);
- }
- buf = result;
- }
+ char* result = PyMem_REALLOC(buf, final_length);
+ if (result == NULL) {
+ PyMem_FREE(buf);
+ }
+ buf = result;
+ }
return buf;
}
@@ -664,11 +664,11 @@ translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
Look for encoding declarations inside STR, and record them
inside TOK. */
-static char *
+static char *
decode_str(const char *input, int single, struct tok_state *tok)
{
PyObject* utf8 = NULL;
- char *str;
+ char *str;
const char *s;
const char *newl[2] = {NULL, NULL};
int lineno = 0;
@@ -726,18 +726,18 @@ struct tok_state *
PyTokenizer_FromString(const char *str, int exec_input)
{
struct tok_state *tok = tok_new();
- char *decoded;
-
+ char *decoded;
+
if (tok == NULL)
return NULL;
- decoded = decode_str(str, exec_input, tok);
- if (decoded == NULL) {
+ decoded = decode_str(str, exec_input, tok);
+ if (decoded == NULL) {
PyTokenizer_Free(tok);
return NULL;
}
- tok->buf = tok->cur = tok->inp = decoded;
- tok->end = decoded;
+ tok->buf = tok->cur = tok->inp = decoded;
+ tok->end = decoded;
return tok;
}
@@ -745,18 +745,18 @@ struct tok_state *
PyTokenizer_FromUTF8(const char *str, int exec_input)
{
struct tok_state *tok = tok_new();
- char *translated;
+ char *translated;
if (tok == NULL)
return NULL;
- tok->input = translated = translate_newlines(str, exec_input, tok);
- if (translated == NULL) {
+ tok->input = translated = translate_newlines(str, exec_input, tok);
+ if (translated == NULL) {
PyTokenizer_Free(tok);
return NULL;
}
tok->decoding_state = STATE_RAW;
tok->read_coding_spec = 1;
tok->enc = NULL;
- tok->str = translated;
+ tok->str = translated;
tok->encoding = (char *)PyMem_MALLOC(6);
if (!tok->encoding) {
PyTokenizer_Free(tok);
@@ -764,8 +764,8 @@ PyTokenizer_FromUTF8(const char *str, int exec_input)
}
strcpy(tok->encoding, "utf-8");
- tok->buf = tok->cur = tok->inp = translated;
- tok->end = translated;
+ tok->buf = tok->cur = tok->inp = translated;
+ tok->end = translated;
return tok;
}
@@ -815,7 +815,7 @@ PyTokenizer_Free(struct tok_state *tok)
if (tok->fp != NULL && tok->buf != NULL)
PyMem_FREE(tok->buf);
if (tok->input)
- PyMem_FREE(tok->input);
+ PyMem_FREE(tok->input);
PyMem_FREE(tok);
}
@@ -890,7 +890,7 @@ tok_nextc(struct tok_state *tok)
size_t start = tok->start - tok->buf;
size_t oldlen = tok->cur - tok->buf;
size_t newlen = oldlen + strlen(newtok);
- Py_ssize_t cur_multi_line_start = tok->multi_line_start - tok->buf;
+ Py_ssize_t cur_multi_line_start = tok->multi_line_start - tok->buf;
char *buf = tok->buf;
buf = (char *)PyMem_REALLOC(buf, newlen+1);
tok->lineno++;
@@ -903,7 +903,7 @@ tok_nextc(struct tok_state *tok)
}
tok->buf = buf;
tok->cur = tok->buf + oldlen;
- tok->multi_line_start = tok->buf + cur_multi_line_start;
+ tok->multi_line_start = tok->buf + cur_multi_line_start;
tok->line_start = tok->cur;
strcpy(tok->buf + oldlen, newtok);
PyMem_FREE(newtok);
@@ -962,7 +962,7 @@ tok_nextc(struct tok_state *tok)
while (!done) {
Py_ssize_t curstart = tok->start == NULL ? -1 :
tok->start - tok->buf;
- Py_ssize_t cur_multi_line_start = tok->multi_line_start - tok->buf;
+ Py_ssize_t cur_multi_line_start = tok->multi_line_start - tok->buf;
Py_ssize_t curvalid = tok->inp - tok->buf;
Py_ssize_t newsize = curvalid + BUFSIZ;
char *newbuf = tok->buf;
@@ -975,7 +975,7 @@ tok_nextc(struct tok_state *tok)
}
tok->buf = newbuf;
tok->cur = tok->buf + cur;
- tok->multi_line_start = tok->buf + cur_multi_line_start;
+ tok->multi_line_start = tok->buf + cur_multi_line_start;
tok->line_start = tok->cur;
tok->inp = tok->buf + curvalid;
tok->end = tok->buf + newsize;
@@ -991,8 +991,8 @@ tok_nextc(struct tok_state *tok)
return EOF;
/* Last line does not end in \n,
fake one */
- if (tok->inp[-1] != '\n')
- strcpy(tok->inp, "\n");
+ if (tok->inp[-1] != '\n')
+ strcpy(tok->inp, "\n");
}
tok->inp = strchr(tok->inp, '\0');
done = tok->inp[-1] == '\n';
@@ -1027,59 +1027,59 @@ static void
tok_backup(struct tok_state *tok, int c)
{
if (c != EOF) {
- if (--tok->cur < tok->buf) {
- Py_FatalError("tokenizer beginning of buffer");
- }
- if (*tok->cur != c) {
+ if (--tok->cur < tok->buf) {
+ Py_FatalError("tokenizer beginning of buffer");
+ }
+ if (*tok->cur != c) {
*tok->cur = c;
- }
+ }
}
}
-static int
-syntaxerror(struct tok_state *tok, const char *format, ...)
+static int
+syntaxerror(struct tok_state *tok, const char *format, ...)
{
- PyObject *errmsg, *errtext, *args;
- va_list vargs;
-#ifdef HAVE_STDARG_PROTOTYPES
- va_start(vargs, format);
-#else
- va_start(vargs);
-#endif
- errmsg = PyUnicode_FromFormatV(format, vargs);
- va_end(vargs);
- if (!errmsg) {
- goto error;
+ PyObject *errmsg, *errtext, *args;
+ va_list vargs;
+#ifdef HAVE_STDARG_PROTOTYPES
+ va_start(vargs, format);
+#else
+ va_start(vargs);
+#endif
+ errmsg = PyUnicode_FromFormatV(format, vargs);
+ va_end(vargs);
+ if (!errmsg) {
+ goto error;
}
- errtext = PyUnicode_DecodeUTF8(tok->line_start, tok->cur - tok->line_start,
- "replace");
- if (!errtext) {
- goto error;
- }
- int offset = (int)PyUnicode_GET_LENGTH(errtext);
- Py_ssize_t line_len = strcspn(tok->line_start, "\n");
- if (line_len != tok->cur - tok->line_start) {
- Py_DECREF(errtext);
- errtext = PyUnicode_DecodeUTF8(tok->line_start, line_len,
- "replace");
- }
- if (!errtext) {
- goto error;
- }
+ errtext = PyUnicode_DecodeUTF8(tok->line_start, tok->cur - tok->line_start,
+ "replace");
+ if (!errtext) {
+ goto error;
+ }
+ int offset = (int)PyUnicode_GET_LENGTH(errtext);
+ Py_ssize_t line_len = strcspn(tok->line_start, "\n");
+ if (line_len != tok->cur - tok->line_start) {
+ Py_DECREF(errtext);
+ errtext = PyUnicode_DecodeUTF8(tok->line_start, line_len,
+ "replace");
+ }
+ if (!errtext) {
+ goto error;
+ }
- args = Py_BuildValue("(O(OiiN))", errmsg,
- tok->filename, tok->lineno, offset, errtext);
- if (args) {
- PyErr_SetObject(PyExc_SyntaxError, args);
- Py_DECREF(args);
+ args = Py_BuildValue("(O(OiiN))", errmsg,
+ tok->filename, tok->lineno, offset, errtext);
+ if (args) {
+ PyErr_SetObject(PyExc_SyntaxError, args);
+ Py_DECREF(args);
}
-error:
- Py_XDECREF(errmsg);
- tok->done = E_ERROR;
- return ERRORTOKEN;
+error:
+ Py_XDECREF(errmsg);
+ tok->done = E_ERROR;
+ return ERRORTOKEN;
}
static int
@@ -1100,50 +1100,50 @@ verify_identifier(struct tok_state *tok)
if (tok->decoding_erred)
return 0;
s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
- if (s == NULL) {
+ if (s == NULL) {
if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
- tok->done = E_DECODE;
- }
- else {
+ tok->done = E_DECODE;
+ }
+ else {
tok->done = E_ERROR;
}
return 0;
}
- Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
- if (invalid < 0) {
- Py_DECREF(s);
- tok->done = E_ERROR;
- return 0;
- }
- assert(PyUnicode_GET_LENGTH(s) > 0);
- if (invalid < PyUnicode_GET_LENGTH(s)) {
- Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
- if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
- /* Determine the offset in UTF-8 encoded input */
- Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
- if (s != NULL) {
- Py_SETREF(s, PyUnicode_AsUTF8String(s));
- }
- if (s == NULL) {
- tok->done = E_ERROR;
- return 0;
- }
- tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
- }
- Py_DECREF(s);
- // PyUnicode_FromFormatV() does not support %X
- char hex[9];
- (void)PyOS_snprintf(hex, sizeof(hex), "%04X", ch);
- if (Py_UNICODE_ISPRINTABLE(ch)) {
- syntaxerror(tok, "invalid character '%c' (U+%s)", ch, hex);
- }
- else {
- syntaxerror(tok, "invalid non-printable character U+%s", hex);
- }
- return 0;
- }
+ Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
+ if (invalid < 0) {
+ Py_DECREF(s);
+ tok->done = E_ERROR;
+ return 0;
+ }
+ assert(PyUnicode_GET_LENGTH(s) > 0);
+ if (invalid < PyUnicode_GET_LENGTH(s)) {
+ Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
+ if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
+ /* Determine the offset in UTF-8 encoded input */
+ Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
+ if (s != NULL) {
+ Py_SETREF(s, PyUnicode_AsUTF8String(s));
+ }
+ if (s == NULL) {
+ tok->done = E_ERROR;
+ return 0;
+ }
+ tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
+ }
+ Py_DECREF(s);
+ // PyUnicode_FromFormatV() does not support %X
+ char hex[9];
+ (void)PyOS_snprintf(hex, sizeof(hex), "%04X", ch);
+ if (Py_UNICODE_ISPRINTABLE(ch)) {
+ syntaxerror(tok, "invalid character '%c' (U+%s)", ch, hex);
+ }
+ else {
+ syntaxerror(tok, "invalid non-printable character U+%s", hex);
+ }
+ return 0;
+ }
Py_DECREF(s);
- return 1;
+ return 1;
}
static int
@@ -1161,7 +1161,7 @@ tok_decimal_tail(struct tok_state *tok)
c = tok_nextc(tok);
if (!isdigit(c)) {
tok_backup(tok, c);
- syntaxerror(tok, "invalid decimal literal");
+ syntaxerror(tok, "invalid decimal literal");
return 0;
}
}
@@ -1171,7 +1171,7 @@ tok_decimal_tail(struct tok_state *tok)
/* Get next token, after space stripping etc. */
static int
-tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
+tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
{
int c;
int blankline, nonascii;
@@ -1203,9 +1203,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
}
}
tok_backup(tok, c);
- if (c == '#' || c == '\n' || c == '\\') {
+ if (c == '#' || c == '\n' || c == '\\') {
/* Lines with only whitespace and/or comments
- and/or a line continuation character
+ and/or a line continuation character
shouldn't affect the indentation and are
not passed to the parser as NEWLINE tokens,
except *totally* empty lines in interactive
@@ -1213,12 +1213,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
if (col == 0 && c == '\n' && tok->prompt != NULL) {
blankline = 0; /* Let it through */
}
- else if (tok->prompt != NULL && tok->lineno == 1) {
- /* In interactive mode, if the first line contains
- only spaces and/or a comment, let it through. */
- blankline = 0;
- col = altcol = 0;
- }
+ else if (tok->prompt != NULL && tok->lineno == 1) {
+ /* In interactive mode, if the first line contains
+ only spaces and/or a comment, let it through. */
+ blankline = 0;
+ col = altcol = 0;
+ }
else {
blankline = 1; /* Ignore completely */
}
@@ -1279,31 +1279,31 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
}
}
- /* Peek ahead at the next character */
- c = tok_nextc(tok);
- tok_backup(tok, c);
- /* Check if we are closing an async function */
- if (tok->async_def
- && !blankline
- /* Due to some implementation artifacts of type comments,
- * a TYPE_COMMENT at the start of a function won't set an
- * indentation level and it will produce a NEWLINE after it.
- * To avoid spuriously ending an async function due to this,
- * wait until we have some non-newline char in front of us. */
- && c != '\n'
- && tok->level == 0
- /* There was a NEWLINE after ASYNC DEF,
- so we're past the signature. */
- && tok->async_def_nl
- /* Current indentation level is less than where
- the async function was defined */
- && tok->async_def_indent >= tok->indent)
- {
- tok->async_def = 0;
- tok->async_def_indent = 0;
- tok->async_def_nl = 0;
- }
-
+ /* Peek ahead at the next character */
+ c = tok_nextc(tok);
+ tok_backup(tok, c);
+ /* Check if we are closing an async function */
+ if (tok->async_def
+ && !blankline
+ /* Due to some implementation artifacts of type comments,
+ * a TYPE_COMMENT at the start of a function won't set an
+ * indentation level and it will produce a NEWLINE after it.
+ * To avoid spuriously ending an async function due to this,
+ * wait until we have some non-newline char in front of us. */
+ && c != '\n'
+ && tok->level == 0
+ /* There was a NEWLINE after ASYNC DEF,
+ so we're past the signature. */
+ && tok->async_def_nl
+ /* Current indentation level is less than where
+ the async function was defined */
+ && tok->async_def_indent >= tok->indent)
+ {
+ tok->async_def = 0;
+ tok->async_def_indent = 0;
+ tok->async_def_nl = 0;
+ }
+
again:
tok->start = NULL;
/* Skip spaces */
@@ -1314,63 +1314,63 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
/* Set start of current token */
tok->start = tok->cur - 1;
- /* Skip comment, unless it's a type comment */
+ /* Skip comment, unless it's a type comment */
if (c == '#') {
- const char *prefix, *p, *type_start;
-
+ const char *prefix, *p, *type_start;
+
while (c != EOF && c != '\n') {
c = tok_nextc(tok);
}
-
- if (tok->type_comments) {
- p = tok->start;
- prefix = type_comment_prefix;
- while (*prefix && p < tok->cur) {
- if (*prefix == ' ') {
- while (*p == ' ' || *p == '\t') {
- p++;
- }
- } else if (*prefix == *p) {
- p++;
- } else {
- break;
- }
-
- prefix++;
- }
-
- /* This is a type comment if we matched all of type_comment_prefix. */
- if (!*prefix) {
- int is_type_ignore = 1;
- const char *ignore_end = p + 6;
- tok_backup(tok, c); /* don't eat the newline or EOF */
-
- type_start = p;
-
- /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
- * or anything ASCII and non-alphanumeric. */
- is_type_ignore = (
- tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
- && !(tok->cur > ignore_end
- && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
-
- if (is_type_ignore) {
- *p_start = ignore_end;
- *p_end = tok->cur;
-
- /* If this type ignore is the only thing on the line, consume the newline also. */
- if (blankline) {
- tok_nextc(tok);
- tok->atbol = 1;
- }
- return TYPE_IGNORE;
- } else {
- *p_start = type_start; /* after type_comment_prefix */
- *p_end = tok->cur;
- return TYPE_COMMENT;
- }
- }
- }
+
+ if (tok->type_comments) {
+ p = tok->start;
+ prefix = type_comment_prefix;
+ while (*prefix && p < tok->cur) {
+ if (*prefix == ' ') {
+ while (*p == ' ' || *p == '\t') {
+ p++;
+ }
+ } else if (*prefix == *p) {
+ p++;
+ } else {
+ break;
+ }
+
+ prefix++;
+ }
+
+ /* This is a type comment if we matched all of type_comment_prefix. */
+ if (!*prefix) {
+ int is_type_ignore = 1;
+ const char *ignore_end = p + 6;
+ tok_backup(tok, c); /* don't eat the newline or EOF */
+
+ type_start = p;
+
+ /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
+ * or anything ASCII and non-alphanumeric. */
+ is_type_ignore = (
+ tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
+ && !(tok->cur > ignore_end
+ && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
+
+ if (is_type_ignore) {
+ *p_start = ignore_end;
+ *p_end = tok->cur;
+
+ /* If this type ignore is the only thing on the line, consume the newline also. */
+ if (blankline) {
+ tok_nextc(tok);
+ tok->atbol = 1;
+ }
+ return TYPE_IGNORE;
+ } else {
+ *p_start = type_start; /* after type_comment_prefix */
+ *p_end = tok->cur;
+ return TYPE_COMMENT;
+ }
+ }
+ }
}
/* Check for EOF and errors now */
@@ -1417,55 +1417,55 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
if (nonascii && !verify_identifier(tok)) {
return ERRORTOKEN;
}
-
+
*p_start = tok->start;
*p_end = tok->cur;
- /* async/await parsing block. */
- if (tok->cur - tok->start == 5 && tok->start[0] == 'a') {
- /* May be an 'async' or 'await' token. For Python 3.7 or
- later we recognize them unconditionally. For Python
- 3.5 or 3.6 we recognize 'async' in front of 'def', and
- either one inside of 'async def'. (Technically we
- shouldn't recognize these at all for 3.4 or earlier,
- but there's no *valid* Python 3.4 code that would be
- rejected, and async functions will be rejected in a
- later phase.) */
- if (!tok->async_hacks || tok->async_def) {
- /* Always recognize the keywords. */
- if (memcmp(tok->start, "async", 5) == 0) {
- return ASYNC;
- }
- if (memcmp(tok->start, "await", 5) == 0) {
- return AWAIT;
- }
- }
- else if (memcmp(tok->start, "async", 5) == 0) {
- /* The current token is 'async'.
- Look ahead one token to see if that is 'def'. */
-
- struct tok_state ahead_tok;
- const char *ahead_tok_start = NULL;
- const char *ahead_tok_end = NULL;
- int ahead_tok_kind;
-
- memcpy(&ahead_tok, tok, sizeof(ahead_tok));
- ahead_tok_kind = tok_get(&ahead_tok, &ahead_tok_start,
- &ahead_tok_end);
-
- if (ahead_tok_kind == NAME
- && ahead_tok.cur - ahead_tok.start == 3
- && memcmp(ahead_tok.start, "def", 3) == 0)
- {
- /* The next token is going to be 'def', so instead of
- returning a plain NAME token, return ASYNC. */
- tok->async_def_indent = tok->indent;
- tok->async_def = 1;
- return ASYNC;
- }
- }
- }
-
+ /* async/await parsing block. */
+ if (tok->cur - tok->start == 5 && tok->start[0] == 'a') {
+ /* May be an 'async' or 'await' token. For Python 3.7 or
+ later we recognize them unconditionally. For Python
+ 3.5 or 3.6 we recognize 'async' in front of 'def', and
+ either one inside of 'async def'. (Technically we
+ shouldn't recognize these at all for 3.4 or earlier,
+ but there's no *valid* Python 3.4 code that would be
+ rejected, and async functions will be rejected in a
+ later phase.) */
+ if (!tok->async_hacks || tok->async_def) {
+ /* Always recognize the keywords. */
+ if (memcmp(tok->start, "async", 5) == 0) {
+ return ASYNC;
+ }
+ if (memcmp(tok->start, "await", 5) == 0) {
+ return AWAIT;
+ }
+ }
+ else if (memcmp(tok->start, "async", 5) == 0) {
+ /* The current token is 'async'.
+ Look ahead one token to see if that is 'def'. */
+
+ struct tok_state ahead_tok;
+ const char *ahead_tok_start = NULL;
+ const char *ahead_tok_end = NULL;
+ int ahead_tok_kind;
+
+ memcpy(&ahead_tok, tok, sizeof(ahead_tok));
+ ahead_tok_kind = tok_get(&ahead_tok, &ahead_tok_start,
+ &ahead_tok_end);
+
+ if (ahead_tok_kind == NAME
+ && ahead_tok.cur - ahead_tok.start == 3
+ && memcmp(ahead_tok.start, "def", 3) == 0)
+ {
+ /* The next token is going to be 'def', so instead of
+ returning a plain NAME token, return ASYNC. */
+ tok->async_def_indent = tok->indent;
+ tok->async_def = 1;
+ return ASYNC;
+ }
+ }
+ }
+
return NAME;
}
@@ -1478,11 +1478,11 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
*p_start = tok->start;
*p_end = tok->cur - 1; /* Leave '\n' out of the string */
tok->cont_line = 0;
- if (tok->async_def) {
- /* We're somewhere inside an 'async def' function, and
- we've encountered a NEWLINE after its signature. */
- tok->async_def_nl = 1;
- }
+ if (tok->async_def) {
+ /* We're somewhere inside an 'async def' function, and
+ we've encountered a NEWLINE after its signature. */
+ tok->async_def_nl = 1;
+ }
return NEWLINE;
}
@@ -1525,7 +1525,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
}
if (!isxdigit(c)) {
tok_backup(tok, c);
- return syntaxerror(tok, "invalid hexadecimal literal");
+ return syntaxerror(tok, "invalid hexadecimal literal");
}
do {
c = tok_nextc(tok);
@@ -1541,22 +1541,22 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
}
if (c < '0' || c >= '8') {
tok_backup(tok, c);
- if (isdigit(c)) {
- return syntaxerror(tok,
- "invalid digit '%c' in octal literal", c);
- }
- else {
- return syntaxerror(tok, "invalid octal literal");
- }
+ if (isdigit(c)) {
+ return syntaxerror(tok,
+ "invalid digit '%c' in octal literal", c);
+ }
+ else {
+ return syntaxerror(tok, "invalid octal literal");
+ }
}
do {
c = tok_nextc(tok);
} while ('0' <= c && c < '8');
} while (c == '_');
- if (isdigit(c)) {
- return syntaxerror(tok,
- "invalid digit '%c' in octal literal", c);
- }
+ if (isdigit(c)) {
+ return syntaxerror(tok,
+ "invalid digit '%c' in octal literal", c);
+ }
}
else if (c == 'b' || c == 'B') {
/* Binary */
@@ -1567,22 +1567,22 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
}
if (c != '0' && c != '1') {
tok_backup(tok, c);
- if (isdigit(c)) {
- return syntaxerror(tok,
- "invalid digit '%c' in binary literal", c);
- }
- else {
- return syntaxerror(tok, "invalid binary literal");
- }
+ if (isdigit(c)) {
+ return syntaxerror(tok,
+ "invalid digit '%c' in binary literal", c);
+ }
+ else {
+ return syntaxerror(tok, "invalid binary literal");
+ }
}
do {
c = tok_nextc(tok);
} while (c == '0' || c == '1');
} while (c == '_');
- if (isdigit(c)) {
- return syntaxerror(tok,
- "invalid digit '%c' in binary literal", c);
- }
+ if (isdigit(c)) {
+ return syntaxerror(tok,
+ "invalid digit '%c' in binary literal", c);
+ }
}
else {
int nonzero = 0;
@@ -1593,7 +1593,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
c = tok_nextc(tok);
if (!isdigit(c)) {
tok_backup(tok, c);
- return syntaxerror(tok, "invalid decimal literal");
+ return syntaxerror(tok, "invalid decimal literal");
}
}
if (c != '0') {
@@ -1621,10 +1621,10 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
else if (nonzero) {
/* Old-style octal: now disallowed. */
tok_backup(tok, c);
- return syntaxerror(tok,
- "leading zeros in decimal integer "
- "literals are not permitted; "
- "use an 0o prefix for octal integers");
+ return syntaxerror(tok,
+ "leading zeros in decimal integer "
+ "literals are not permitted; "
+ "use an 0o prefix for octal integers");
}
}
}
@@ -1657,7 +1657,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
c = tok_nextc(tok);
if (!isdigit(c)) {
tok_backup(tok, c);
- return syntaxerror(tok, "invalid decimal literal");
+ return syntaxerror(tok, "invalid decimal literal");
}
} else if (!isdigit(c)) {
tok_backup(tok, c);
@@ -1691,13 +1691,13 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
int quote_size = 1; /* 1 or 3 */
int end_quote_size = 0;
- /* Nodes of type STRING, especially multi line strings
- must be handled differently in order to get both
- the starting line number and the column offset right.
- (cf. issue 16806) */
- tok->first_lineno = tok->lineno;
- tok->multi_line_start = tok->line_start;
-
+ /* Nodes of type STRING, especially multi line strings
+ must be handled differently in order to get both
+ the starting line number and the column offset right.
+ (cf. issue 16806) */
+ tok->first_lineno = tok->lineno;
+ tok->multi_line_start = tok->line_start;
+
/* Find the quote size and start of string */
c = tok_nextc(tok);
if (c == quote) {
@@ -1754,14 +1754,14 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
tok->done = E_LINECONT;
return ERRORTOKEN;
}
- c = tok_nextc(tok);
- if (c == EOF) {
- tok->done = E_EOF;
- tok->cur = tok->inp;
- return ERRORTOKEN;
- } else {
- tok_backup(tok, c);
- }
+ c = tok_nextc(tok);
+ if (c == EOF) {
+ tok->done = E_EOF;
+ tok->cur = tok->inp;
+ return ERRORTOKEN;
+ } else {
+ tok_backup(tok, c);
+ }
tok->cont_line = 1;
goto again; /* Read next line */
}
@@ -1791,38 +1791,38 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
case '(':
case '[':
case '{':
- if (tok->level >= MAXLEVEL) {
- return syntaxerror(tok, "too many nested parentheses");
- }
- tok->parenstack[tok->level] = c;
- tok->parenlinenostack[tok->level] = tok->lineno;
+ if (tok->level >= MAXLEVEL) {
+ return syntaxerror(tok, "too many nested parentheses");
+ }
+ tok->parenstack[tok->level] = c;
+ tok->parenlinenostack[tok->level] = tok->lineno;
tok->level++;
break;
case ')':
case ']':
case '}':
- if (!tok->level) {
- return syntaxerror(tok, "unmatched '%c'", c);
- }
+ if (!tok->level) {
+ return syntaxerror(tok, "unmatched '%c'", c);
+ }
tok->level--;
- int opening = tok->parenstack[tok->level];
- if (!((opening == '(' && c == ')') ||
- (opening == '[' && c == ']') ||
- (opening == '{' && c == '}')))
- {
- if (tok->parenlinenostack[tok->level] != tok->lineno) {
- return syntaxerror(tok,
- "closing parenthesis '%c' does not match "
- "opening parenthesis '%c' on line %d",
- c, opening, tok->parenlinenostack[tok->level]);
- }
- else {
- return syntaxerror(tok,
- "closing parenthesis '%c' does not match "
- "opening parenthesis '%c'",
- c, opening);
- }
- }
+ int opening = tok->parenstack[tok->level];
+ if (!((opening == '(' && c == ')') ||
+ (opening == '[' && c == ']') ||
+ (opening == '{' && c == '}')))
+ {
+ if (tok->parenlinenostack[tok->level] != tok->lineno) {
+ return syntaxerror(tok,
+ "closing parenthesis '%c' does not match "
+ "opening parenthesis '%c' on line %d",
+ c, opening, tok->parenlinenostack[tok->level]);
+ }
+ else {
+ return syntaxerror(tok,
+ "closing parenthesis '%c' does not match "
+ "opening parenthesis '%c'",
+ c, opening);
+ }
+ }
break;
}
@@ -1833,7 +1833,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
}
int
-PyTokenizer_Get(struct tok_state *tok, const char **p_start, const char **p_end)
+PyTokenizer_Get(struct tok_state *tok, const char **p_start, const char **p_end)
{
int result = tok_get(tok, p_start, p_end);
if (tok->decoding_erred) {
@@ -1858,9 +1858,9 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
{
struct tok_state *tok;
FILE *fp;
- const char *p_start = NULL;
- const char *p_end = NULL;
- char *encoding = NULL;
+ const char *p_start = NULL;
+ const char *p_end = NULL;
+ char *encoding = NULL;
fd = _Py_dup(fd);
if (fd < 0) {
@@ -1895,7 +1895,7 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
if (tok->encoding) {
encoding = (char *)PyMem_MALLOC(strlen(tok->encoding) + 1);
if (encoding)
- strcpy(encoding, tok->encoding);
+ strcpy(encoding, tok->encoding);
}
PyTokenizer_Free(tok);
return encoding;