Library import 16 (#2433)

Co-authored-by: robot-piglet <[email protected]> Co-authored-by: deshevoy <[email protected]> Co-authored-by: robot-contrib <[email protected]> Co-authored-by: thegeorg <[email protected]> Co-authored-by: robot-ya-builder <[email protected]> Co-authored-by: svidyuk <[email protected]> Co-authored-by: shadchin <[email protected]> Co-authored-by: robot-ratatosk <[email protected]> Co-authored-by: innokentii <[email protected]> Co-authored-by: arkady-e1ppa <[email protected]> Co-authored-by: snermolaev <[email protected]> Co-authored-by: dimdim11 <[email protected]> Co-authored-by: kickbutt <[email protected]> Co-authored-by: abdullinsaid <[email protected]> Co-authored-by: korsunandrei <[email protected]> Co-authored-by: petrk <[email protected]> Co-authored-by: miroslav2 <[email protected]> Co-authored-by: serjflint <[email protected]> Co-authored-by: akhropov <[email protected]> Co-authored-by: prettyboy <[email protected]> Co-authored-by: ilikepugs <[email protected]> Co-authored-by: hiddenpath <[email protected]> Co-authored-by: mikhnenko <[email protected]> Co-authored-by: spreis <[email protected]> Co-authored-by: andreyshspb <[email protected]> Co-authored-by: dimaandreev <[email protected]> Co-authored-by: rashid <[email protected]> Co-authored-by: robot-ydb-importer <[email protected]> Co-authored-by: r-vetrov <[email protected]> Co-authored-by: ypodlesov <[email protected]> Co-authored-by: zaverden <[email protected]> Co-authored-by: vpozdyayev <[email protected]> Co-authored-by: robot-cozmo <[email protected]> Co-authored-by: v-korovin <[email protected]> Co-authored-by: arikon <[email protected]> Co-authored-by: khoden <[email protected]> Co-authored-by: psydmm <[email protected]> Co-authored-by: robot-javacom <[email protected]> Co-authored-by: dtorilov <[email protected]> Co-authored-by: sennikovmv <[email protected]> Co-authored-by: hcpp <[email protected]>
author: AlexSm <[email protected]> 2024-03-05 10:40:59 +0100
committer: GitHub <[email protected]> 2024-03-05 12:40:59 +0300
commit: 1ac13c847b5358faba44dbb638a828e24369467b (patch)
tree: 07672b4dd3604ad3dee540a02c6494cb7d10dc3d /contrib/tools/python3/src/Parser/string_parser.c
parent: ffcca3e7f7958ddc6487b91d3df8c01054bd0638 (diff)
1 files changed, 0 insertions, 274 deletions
diff --git a/contrib/tools/python3/src/Parser/string_parser.c b/contrib/tools/python3/src/Parser/string_parser.c
deleted file mode 100644
index 65c320c2173..00000000000
--- a/contrib/tools/python3/src/Parser/string_parser.c
+++ /dev/null
@@ -1,274 +0,0 @@
-#include <stdbool.h>
-
-#include <Python.h>
-
-#include "tokenizer.h"
-#include "pegen.h"
-#include "string_parser.h"
-
-//// STRING HANDLING FUNCTIONS ////
-
-static int
-warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t)
-{
-    if (p->call_invalid_rules) {
-        // Do not report warnings if we are in the second pass of the parser
-        // to avoid showing the warning twice.
-        return 0;
-    }
-    unsigned char c = *first_invalid_escape;
-    if ((t->type == FSTRING_MIDDLE || t->type == FSTRING_END) && (c == '{' || c == '}')) {  // in this case the tokenizer has already emitted a warning,
-                                                                                            // see tokenizer.c:warn_invalid_escape_sequence
-        return 0;
-    }
-
-    int octal = ('4' <= c && c <= '7');
-    PyObject *msg =
-        octal
-        ? PyUnicode_FromFormat("invalid octal escape sequence '\\%.3s'",
-                               first_invalid_escape)
-        : PyUnicode_FromFormat("invalid escape sequence '\\%c'", c);
-    if (msg == NULL) {
-        return -1;
-    }
-    PyObject *category;
-    if (p->feature_version >= 12) {
-        category = PyExc_SyntaxWarning;
-    }
-    else {
-        category = PyExc_DeprecationWarning;
-    }
-    if (PyErr_WarnExplicitObject(category, msg, p->tok->filename,
-                                 t->lineno, NULL, NULL) < 0) {
-        if (PyErr_ExceptionMatches(category)) {
-            /* Replace the Syntax/DeprecationWarning exception with a SyntaxError
-               to get a more accurate error report */
-            PyErr_Clear();
-
-            /* This is needed, in order for the SyntaxError to point to the token t,
-               since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
-               error location, if p->known_err_token is not set. */
-            p->known_err_token = t;
-            if (octal) {
-                RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'",
-                                   first_invalid_escape);
-            }
-            else {
-                RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c);
-            }
-        }
-        Py_DECREF(msg);
-        return -1;
-    }
-    Py_DECREF(msg);
-    return 0;
-}
-
-static PyObject *
-decode_utf8(const char **sPtr, const char *end)
-{
-    const char *s;
-    const char *t;
-    t = s = *sPtr;
-    while (s < end && (*s & 0x80)) {
-        s++;
-    }
-    *sPtr = s;
-    return PyUnicode_DecodeUTF8(t, s - t, NULL);
-}
-
-static PyObject *
-decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
-{
-    PyObject *v;
-    PyObject *u;
-    char *buf;
-    char *p;
-    const char *end;
-
-    /* check for integer overflow */
-    if (len > SIZE_MAX / 6) {
-        return NULL;
-    }
-    /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
-       "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
-    u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
-    if (u == NULL) {
-        return NULL;
-    }
-    p = buf = PyBytes_AsString(u);
-    if (p == NULL) {
-        return NULL;
-    }
-    end = s + len;
-    while (s < end) {
-        if (*s == '\\') {
-            *p++ = *s++;
-            if (s >= end || *s & 0x80) {
-                strcpy(p, "u005c");
-                p += 5;
-                if (s >= end) {
-                    break;
-                }
-            }
-        }
-        if (*s & 0x80) {
-            PyObject *w;
-            int kind;
-            const void *data;
-            Py_ssize_t w_len;
-            Py_ssize_t i;
-            w = decode_utf8(&s, end);
-            if (w == NULL) {
-                Py_DECREF(u);
-                return NULL;
-            }
-            kind = PyUnicode_KIND(w);
-            data = PyUnicode_DATA(w);
-            w_len = PyUnicode_GET_LENGTH(w);
-            for (i = 0; i < w_len; i++) {
-                Py_UCS4 chr = PyUnicode_READ(kind, data, i);
-                sprintf(p, "\\U%08x", chr);
-                p += 10;
-            }
-            /* Should be impossible to overflow */
-            assert(p - buf <= PyBytes_GET_SIZE(u));
-            Py_DECREF(w);
-        }
-        else {
-            *p++ = *s++;
-        }
-    }
-    len = p - buf;
-    s = buf;
-
-    const char *first_invalid_escape;
-    v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL, &first_invalid_escape);
-
-    // HACK: later we can simply pass the line no, since we don't preserve the tokens
-    // when we are decoding the string but we preserve the line numbers.
-    if (v != NULL && first_invalid_escape != NULL && t != NULL) {
-        if (warn_invalid_escape_sequence(parser, first_invalid_escape, t) < 0) {
-            /* We have not decref u before because first_invalid_escape points
-               inside u. */
-            Py_XDECREF(u);
-            Py_DECREF(v);
-            return NULL;
-        }
-    }
-    Py_XDECREF(u);
-    return v;
-}
-
-static PyObject *
-decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
-{
-    const char *first_invalid_escape;
-    PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape);
-    if (result == NULL) {
-        return NULL;
-    }
-
-    if (first_invalid_escape != NULL) {
-        if (warn_invalid_escape_sequence(p, first_invalid_escape, t) < 0) {
-            Py_DECREF(result);
-            return NULL;
-        }
-    }
-    return result;
-}
-
-PyObject *
-_PyPegen_decode_string(Parser *p, int raw, const char *s, size_t len, Token *t)
-{
-    if (raw) {
-        return PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
-    }
-    return decode_unicode_with_escapes(p, s, len, t);
-}
-
-/* s must include the bracketing quote characters, and r, b &/or f prefixes
-    (if any), and embedded escape sequences (if any). (f-strings are handled by the parser)
-   _PyPegen_parse_string parses it, and returns the decoded Python string object. */
-PyObject *
-_PyPegen_parse_string(Parser *p, Token *t)
-{
-    const char *s = PyBytes_AsString(t->bytes);
-    if (s == NULL) {
-        return NULL;
-    }
-
-    size_t len;
-    int quote = Py_CHARMASK(*s);
-    int bytesmode = 0;
-    int rawmode = 0;
-
-    if (Py_ISALPHA(quote)) {
-        while (!bytesmode || !rawmode) {
-            if (quote == 'b' || quote == 'B') {
-                quote =(unsigned char)*++s;
-                bytesmode = 1;
-            }
-            else if (quote == 'u' || quote == 'U') {
-                quote = (unsigned char)*++s;
-            }
-            else if (quote == 'r' || quote == 'R') {
-                quote = (unsigned char)*++s;
-                rawmode = 1;
-            }
-            else {
-                break;
-            }
-        }
-    }
-
-    if (quote != '\'' && quote != '\"') {
-        PyErr_BadInternalCall();
-        return NULL;
-    }
-    /* Skip the leading quote char. */
-    s++;
-    len = strlen(s);
-    if (len > INT_MAX) {
-        PyErr_SetString(PyExc_OverflowError, "string to parse is too long");
-        return NULL;
-    }
-    if (s[--len] != quote) {
-        /* Last quote char must match the first. */
-        PyErr_BadInternalCall();
-        return NULL;
-    }
-    if (len >= 4 && s[0] == quote && s[1] == quote) {
-        /* A triple quoted string. We've already skipped one quote at
-           the start and one at the end of the string. Now skip the
-           two at the start. */
-        s += 2;
-        len -= 2;
-        /* And check that the last two match. */
-        if (s[--len] != quote || s[--len] != quote) {
-            PyErr_BadInternalCall();
-            return NULL;
-        }
-    }
-
-    /* Avoid invoking escape decoding routines if possible. */
-    rawmode = rawmode || strchr(s, '\\') == NULL;
-    if (bytesmode) {
-        /* Disallow non-ASCII characters. */
-        const char *ch;
-        for (ch = s; *ch; ch++) {
-            if (Py_CHARMASK(*ch) >= 0x80) {
-                RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
-                                   t,
-                                   "bytes can only contain ASCII "
-                                   "literal characters");
-                return NULL;
-            }
-        }
-        if (rawmode) {
-            return PyBytes_FromStringAndSize(s, len);
-        }
-        return decode_bytes_with_escapes(p, s, len, t);
-    }
-    return _PyPegen_decode_string(p, rawmode, s, len, t);
-}
author	AlexSm <[email protected]>	2024-03-05 10:40:59 +0100
committer	GitHub <[email protected]>	2024-03-05 12:40:59 +0300
commit	1ac13c847b5358faba44dbb638a828e24369467b (patch)
tree	07672b4dd3604ad3dee540a02c6494cb7d10dc3d /contrib/tools/python3/src/Parser/string_parser.c
parent	ffcca3e7f7958ddc6487b91d3df8c01054bd0638 (diff)