Update Python from 3.11.8 to 3.12.2

author: shadchin <shadchin@yandex-team.com> 2024-02-12 07:53:52 +0300
committer: shadchin <shadchin@yandex-team.com> 2024-02-12 08:07:36 +0300
commit: ce1b7ca3171f9158180640c6a02a74b4afffedea (patch)
tree: e47c1e8391b1b0128262c1e9b1e6ed4c8fff2348 /contrib/tools/python3/src/Parser/pegen_errors.c
parent: 57350d96f030db90f220ce50ee591d5c5d403df7 (diff)
download: ydb-ce1b7ca3171f9158180640c6a02a74b4afffedea.tar.gz
1 files changed, 26 insertions, 34 deletions
diff --git a/contrib/tools/python3/src/Parser/pegen_errors.c b/contrib/tools/python3/src/Parser/pegen_errors.c
index ccb0d37406..cefec5d275 100644
--- a/contrib/tools/python3/src/Parser/pegen_errors.c
+++ b/contrib/tools/python3/src/Parser/pegen_errors.c
@@ -66,6 +66,7 @@ _Pypegen_tokenizer_error(Parser *p)
     const char *msg = NULL;
     PyObject* errtype = PyExc_SyntaxError;
     Py_ssize_t col_offset = -1;
+    p->error_indicator = 1;
     switch (p->tok->done) {
         case E_TOKEN:
             msg = "invalid token";
@@ -168,11 +169,11 @@ _PyPegen_tokenize_full_source_to_check_for_errors(Parser *p) {
     Py_ssize_t current_err_line = current_token->lineno;
 
     int ret = 0;
+    struct token new_token;
+    _PyToken_Init(&new_token);
 
     for (;;) {
-        const char *start;
-        const char *end;
-        switch (_PyTokenizer_Get(p->tok, &start, &end)) {
+        switch (_PyTokenizer_Get(p->tok, &new_token)) {
             case ERRORTOKEN:
                 if (PyErr_Occurred()) {
                     ret = -1;
@@ -197,7 +198,11 @@ _PyPegen_tokenize_full_source_to_check_for_errors(Parser *p) {
 
 
 exit:
-    if (PyErr_Occurred()) {
+    _PyToken_Free(&new_token);
+    // If we're in an f-string, we want the syntax error in the expression part
+    // to propagate, so that tokenizer errors (like expecting '}') that happen afterwards
+    // do not swallow it.
+    if (PyErr_Occurred() && p->tok->tok_mode_stack_index <= 0) {
         Py_XDECREF(value);
         Py_XDECREF(type);
         Py_XDECREF(traceback);
@@ -210,7 +215,7 @@ exit:
 // PARSER ERRORS
 
 void *
-_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
+_PyPegen_raise_error(Parser *p, PyObject *errtype, int use_mark, const char *errmsg, ...)
 {
     // Bail out if we already have an error set.
     if (p->error_indicator && PyErr_Occurred()) {
@@ -223,8 +228,13 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
         va_end(va);
         return NULL;
     }
-
-    Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
+    if (use_mark && p->mark == p->fill && _PyPegen_fill_token(p) < 0) {
+        p->error_indicator = 1;
+        return NULL;
+    }
+    Token *t = p->known_err_token != NULL
+                   ? p->known_err_token
+                   : p->tokens[use_mark ? p->mark : p->fill - 1];
     Py_ssize_t col_offset;
     Py_ssize_t end_col_offset = -1;
     if (t->col_offset == -1) {
@@ -257,7 +267,7 @@ get_error_line_from_tokenizer_buffers(Parser *p, Py_ssize_t lineno)
      * (multi-line) statement are stored in p->tok->interactive_src_start.
      * If not, we're parsing from a string, which means that the whole source
      * is stored in p->tok->str. */
-    assert((p->tok->fp == NULL && p->tok->str != NULL) || p->tok->fp == stdin);
+    assert((p->tok->fp == NULL && p->tok->str != NULL) || p->tok->fp != NULL);
 
     char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str;
     if (cur_line == NULL) {
@@ -316,21 +326,6 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
         end_col_offset = p->tok->cur - p->tok->line_start;
     }
 
-    if (p->start_rule == Py_fstring_input) {
-        const char *fstring_msg = "f-string: ";
-        Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg);
-
-        char *new_errmsg = PyMem_Malloc(len + 1); // Lengths of both strings plus NULL character
-        if (!new_errmsg) {
-            return (void *) PyErr_NoMemory();
-        }
-
-        // Copy both strings into new buffer
-        memcpy(new_errmsg, fstring_msg, strlen(fstring_msg));
-        memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg));
-        new_errmsg[len] = 0;
-        errmsg = new_errmsg;
-    }
     errstr = PyUnicode_FromFormatV(errmsg, va);
     if (!errstr) {
         goto error;
@@ -369,11 +364,6 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
         }
     }
 
-    if (p->start_rule == Py_fstring_input) {
-        col_offset -= p->starting_col_offset;
-        end_col_offset -= p->starting_col_offset;
-    }
-
     Py_ssize_t col_number = col_offset;
     Py_ssize_t end_col_number = end_col_offset;
 
@@ -404,17 +394,11 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
 
     Py_DECREF(errstr);
     Py_DECREF(value);
-    if (p->start_rule == Py_fstring_input) {
-        PyMem_Free((void *)errmsg);
-    }
     return NULL;
 
 error:
     Py_XDECREF(errstr);
     Py_XDECREF(error_line);
-    if (p->start_rule == Py_fstring_input) {
-        PyMem_Free((void *)errmsg);
-    }
     return NULL;
 }
 
@@ -460,3 +444,11 @@ _Pypegen_set_syntax_error(Parser* p, Token* last_token) {
     // generic SyntaxError we just raised if errors are found.
     _PyPegen_tokenize_full_source_to_check_for_errors(p);
 }
+
+void
+_Pypegen_stack_overflow(Parser *p)
+{
+    p->error_indicator = 1;
+    PyErr_SetString(PyExc_MemoryError,
+        "Parser stack overflowed - Python source too complex to parse");
+}
author	shadchin <shadchin@yandex-team.com>	2024-02-12 07:53:52 +0300
committer	shadchin <shadchin@yandex-team.com>	2024-02-12 08:07:36 +0300
commit	ce1b7ca3171f9158180640c6a02a74b4afffedea (patch)
tree	e47c1e8391b1b0128262c1e9b1e6ed4c8fff2348 /contrib/tools/python3/src/Parser/pegen_errors.c
parent	57350d96f030db90f220ce50ee591d5c5d403df7 (diff)
download	ydb-ce1b7ca3171f9158180640c6a02a74b4afffedea.tar.gz