diff options
author | shadchin <[email protected]> | 2025-06-13 00:05:26 +0300 |
---|---|---|
committer | shadchin <[email protected]> | 2025-06-13 00:35:30 +0300 |
commit | 796b9088366b10b4cd42885101fc20c0b5709b07 (patch) | |
tree | f287eacb0b95ffd7cabf95b16cafb4788645dc38 /contrib/tools/python3/Parser/string_parser.c | |
parent | c72bca862651e507d2ff4980ef7f4ff7267a7227 (diff) |
Update Python 3 to 3.12.10
commit_hash:dd2398e159fe1d72ea6b12da52fccc933a41a785
Diffstat (limited to 'contrib/tools/python3/Parser/string_parser.c')
-rw-r--r-- | contrib/tools/python3/Parser/string_parser.c | 57 |
1 files changed, 48 insertions, 9 deletions
diff --git a/contrib/tools/python3/Parser/string_parser.c b/contrib/tools/python3/Parser/string_parser.c index 164f715e153..8607885f2e4 100644 --- a/contrib/tools/python3/Parser/string_parser.c +++ b/contrib/tools/python3/Parser/string_parser.c @@ -1,15 +1,15 @@ -#include <stdbool.h> - #include <Python.h> #include "tokenizer.h" #include "pegen.h" #include "string_parser.h" +#include <stdbool.h> + //// STRING HANDLING FUNCTIONS //// static int -warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t) +warn_invalid_escape_sequence(Parser *p, const char* buffer, const char *first_invalid_escape, Token *t) { if (p->call_invalid_rules) { // Do not report warnings if we are in the second pass of the parser @@ -38,8 +38,46 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token else { category = PyExc_DeprecationWarning; } + + // Calculate the lineno and the col_offset of the invalid escape sequence + const char *start = buffer; + const char *end = first_invalid_escape; + int lineno = t->lineno; + int col_offset = t->col_offset; + while (start < end) { + if (*start == '\n') { + lineno++; + col_offset = 0; + } + else { + col_offset++; + } + start++; + } + + // Count the number of quotes in the token + char first_quote = 0; + if (lineno == t->lineno) { + int quote_count = 0; + char* tok = PyBytes_AsString(t->bytes); + for (int i = 0; i < PyBytes_Size(t->bytes); i++) { + if (tok[i] == '\'' || tok[i] == '\"') { + if (quote_count == 0) { + first_quote = tok[i]; + } + if (tok[i] == first_quote) { + quote_count++; + } + } else { + break; + } + } + + col_offset += quote_count; + } + if (PyErr_WarnExplicitObject(category, msg, p->tok->filename, - t->lineno, NULL, NULL) < 0) { + lineno, NULL, NULL) < 0) { if (PyErr_ExceptionMatches(category)) { /* Replace the Syntax/DeprecationWarning exception with a SyntaxError to get a more accurate error report */ @@ -50,11 +88,12 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token error location, if p->known_err_token is not set. */ p->known_err_token = t; if (octal) { - RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'", - first_invalid_escape); + RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1, + "invalid octal escape sequence '\\%.3s'", first_invalid_escape); } else { - RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c); + RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1, + "invalid escape sequence '\\%c'", c); } } Py_DECREF(msg); @@ -148,7 +187,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t) // HACK: later we can simply pass the line no, since we don't preserve the tokens // when we are decoding the string but we preserve the line numbers. if (v != NULL && first_invalid_escape != NULL && t != NULL) { - if (warn_invalid_escape_sequence(parser, first_invalid_escape, t) < 0) { + if (warn_invalid_escape_sequence(parser, s, first_invalid_escape, t) < 0) { /* We have not decref u before because first_invalid_escape points inside u. */ Py_XDECREF(u); @@ -170,7 +209,7 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t) } if (first_invalid_escape != NULL) { - if (warn_invalid_escape_sequence(p, first_invalid_escape, t) < 0) { + if (warn_invalid_escape_sequence(p, s, first_invalid_escape, t) < 0) { Py_DECREF(result); return NULL; } |