intermediate changes

ref:102662f6c42fba80d7bfd4a328124cbb4294be48
author: arcadia-devtools <arcadia-devtools@yandex-team.ru> 2022-03-18 09:10:23 +0300
committer: arcadia-devtools <arcadia-devtools@yandex-team.ru> 2022-03-18 09:10:23 +0300
commit: fef2b3a8ed5955b63c71e8e541a5acf2e393925a (patch)
tree: e55d2882d5c2c71561a0aa89158ec174d81f92fd /contrib/tools/python3/src/Parser/pegen/parse_string.c
parent: 2acc0fc3cdc40434ea286f2fac62386e3fd9c19d (diff)
download: ydb-fef2b3a8ed5955b63c71e8e541a5acf2e393925a.tar.gz
1 files changed, 24 insertions, 12 deletions
diff --git a/contrib/tools/python3/src/Parser/pegen/parse_string.c b/contrib/tools/python3/src/Parser/pegen/parse_string.c
index f1df2c46a6..15a132b4e0 100644
--- a/contrib/tools/python3/src/Parser/pegen/parse_string.c
+++ b/contrib/tools/python3/src/Parser/pegen/parse_string.c
@@ -444,12 +444,23 @@ fstring_find_literal(Parser *p, const char **str, const char *end, int raw,
         if (!raw && ch == '\\' && s < end) {
             ch = *s++;
             if (ch == 'N') {
+                /* We need to look at and skip matching braces for "\N{name}"
+                   sequences because otherwise we'll think the opening '{'
+                   starts an expression, which is not the case with "\N".
+                   Keep looking for either a matched '{' '}' pair, or the end
+                   of the string. */
+
                 if (s < end && *s++ == '{') {
                     while (s < end && *s++ != '}') {
                     }
                     continue;
                 }
-                break;
+
+                /* This is an invalid "\N" sequence, since it's a "\N" not
+                   followed by a "{".  Just keep parsing this literal.  This
+                   error will be caught later by
+                   decode_unicode_with_escapes(). */
+                continue;
             }
             if (ch == '{' && warn_invalid_escape_sequence(p, ch, t) < 0) {
                 return -1;
@@ -493,7 +504,8 @@ done:
             *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
                                                     s - literal_start,
                                                     NULL, NULL);
-        } else {
+        }
+        else {
             *literal = decode_unicode_with_escapes(p, literal_start,
                                                    s - literal_start, t);
         }
@@ -656,12 +668,12 @@ fstring_find_expr(Parser *p, const char **str, const char *end, int raw, int rec
                     *str += 1;
                     continue;
                 }
-                /* Don't get out of the loop for these, if they're single
-                   chars (not part of 2-char tokens). If by themselves, they
-                   don't end an expression (unlike say '!'). */
-                if (ch == '>' || ch == '<') {
-                    continue;
-                }
+            }
+            /* Don't get out of the loop for these, if they're single
+               chars (not part of 2-char tokens). If by themselves, they
+               don't end an expression (unlike say '!'). */
+            if (ch == '>' || ch == '<') {
+                continue;
             }
 
             /* Normal way out of this loop. */
@@ -688,10 +700,10 @@ fstring_find_expr(Parser *p, const char **str, const char *end, int raw, int rec
         }
     }
     expr_end = *str;
-    /* If we leave this loop in a string or with mismatched parens, we
-       don't care. We'll get a syntax error when compiling the
-       expression. But, we can produce a better error message, so
-       let's just do that.*/
+    /* If we leave the above loop in a string or with mismatched parens, we
+       don't really care. We'll get a syntax error when compiling the
+       expression. But, we can produce a better error message, so let's just
+       do that.*/
     if (quote_char) {
         RAISE_SYNTAX_ERROR("f-string: unterminated string");
         goto error;
author	arcadia-devtools <arcadia-devtools@yandex-team.ru>	2022-03-18 09:10:23 +0300
committer	arcadia-devtools <arcadia-devtools@yandex-team.ru>	2022-03-18 09:10:23 +0300
commit	fef2b3a8ed5955b63c71e8e541a5acf2e393925a (patch)
tree	e55d2882d5c2c71561a0aa89158ec174d81f92fd /contrib/tools/python3/src/Parser/pegen/parse_string.c
parent	2acc0fc3cdc40434ea286f2fac62386e3fd9c19d (diff)
download	ydb-fef2b3a8ed5955b63c71e8e541a5acf2e393925a.tar.gz