diff options
| author | orivej <[email protected]> | 2022-02-10 16:44:49 +0300 | 
|---|---|---|
| committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:44:49 +0300 | 
| commit | 718c552901d703c502ccbefdfc3c9028d608b947 (patch) | |
| tree | 46534a98bbefcd7b1f3faa5b52c138ab27db75b7 /contrib/tools/python3/src/Objects/stringlib | |
| parent | e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (diff) | |
Restoring authorship annotation for <[email protected]>. Commit 1 of 2.
Diffstat (limited to 'contrib/tools/python3/src/Objects/stringlib')
21 files changed, 4334 insertions, 4334 deletions
| diff --git a/contrib/tools/python3/src/Objects/stringlib/asciilib.h b/contrib/tools/python3/src/Objects/stringlib/asciilib.h index e69a2c076e3..90ba2aa0157 100644 --- a/contrib/tools/python3/src/Objects/stringlib/asciilib.h +++ b/contrib/tools/python3/src/Objects/stringlib/asciilib.h @@ -1,26 +1,26 @@ -/* this is sort of a hack.  there's at least one place (formatting -   floats) where some stringlib code takes a different path if it's -   compiled as unicode. */ -#define STRINGLIB_IS_UNICODE     1 - -#define FASTSEARCH               asciilib_fastsearch -#define STRINGLIB(F)             asciilib_##F -#define STRINGLIB_OBJECT         PyUnicodeObject -#define STRINGLIB_SIZEOF_CHAR    1 -#define STRINGLIB_MAX_CHAR       0x7Fu -#define STRINGLIB_CHAR           Py_UCS1 -#define STRINGLIB_TYPE_NAME      "unicode" -#define STRINGLIB_PARSE_CODE     "U" -#define STRINGLIB_EMPTY          unicode_empty -#define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE -#define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK -#define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL -#define STRINGLIB_TODECIMAL      Py_UNICODE_TODECIMAL -#define STRINGLIB_STR            PyUnicode_1BYTE_DATA -#define STRINGLIB_LEN            PyUnicode_GET_LENGTH +/* this is sort of a hack.  there's at least one place (formatting  +   floats) where some stringlib code takes a different path if it's  +   compiled as unicode. */  +#define STRINGLIB_IS_UNICODE     1  +  +#define FASTSEARCH               asciilib_fastsearch  +#define STRINGLIB(F)             asciilib_##F  +#define STRINGLIB_OBJECT         PyUnicodeObject  +#define STRINGLIB_SIZEOF_CHAR    1  +#define STRINGLIB_MAX_CHAR       0x7Fu  +#define STRINGLIB_CHAR           Py_UCS1  +#define STRINGLIB_TYPE_NAME      "unicode"  +#define STRINGLIB_PARSE_CODE     "U"  +#define STRINGLIB_EMPTY          unicode_empty  +#define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE  +#define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK  +#define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL  +#define STRINGLIB_TODECIMAL      Py_UNICODE_TODECIMAL  +#define STRINGLIB_STR            PyUnicode_1BYTE_DATA  +#define STRINGLIB_LEN            PyUnicode_GET_LENGTH   #define STRINGLIB_NEW(STR,LEN)   _PyUnicode_FromASCII((const char*)(STR),(LEN)) -#define STRINGLIB_CHECK          PyUnicode_Check -#define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact - -#define STRINGLIB_TOSTR          PyObject_Str -#define STRINGLIB_TOASCII        PyObject_ASCII +#define STRINGLIB_CHECK          PyUnicode_Check  +#define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact  +  +#define STRINGLIB_TOSTR          PyObject_Str  +#define STRINGLIB_TOASCII        PyObject_ASCII  diff --git a/contrib/tools/python3/src/Objects/stringlib/codecs.h b/contrib/tools/python3/src/Objects/stringlib/codecs.h index 9b2a29ba3b8..dc2f8d2967b 100644 --- a/contrib/tools/python3/src/Objects/stringlib/codecs.h +++ b/contrib/tools/python3/src/Objects/stringlib/codecs.h @@ -1,825 +1,825 @@ -/* stringlib: codec implementations */ - -#if !STRINGLIB_IS_UNICODE -# error "codecs.h is specific to Unicode" -#endif - +/* stringlib: codec implementations */  +  +#if !STRINGLIB_IS_UNICODE  +# error "codecs.h is specific to Unicode"  +#endif  +   #include "pycore_byteswap.h"      // _Py_bswap32() -/* Mask to quickly check whether a C 'long' contains a -   non-ASCII, UTF8-encoded char. */ -#if (SIZEOF_LONG == 8) -# define ASCII_CHAR_MASK 0x8080808080808080UL -#elif (SIZEOF_LONG == 4) -# define ASCII_CHAR_MASK 0x80808080UL -#else -# error C 'long' size should be either 4 or 8! -#endif - -/* 10xxxxxx */ -#define IS_CONTINUATION_BYTE(ch) ((ch) >= 0x80 && (ch) < 0xC0) - -Py_LOCAL_INLINE(Py_UCS4) -STRINGLIB(utf8_decode)(const char **inptr, const char *end, -                       STRINGLIB_CHAR *dest, -                       Py_ssize_t *outpos) -{ -    Py_UCS4 ch; -    const char *s = *inptr; -    const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG); -    STRINGLIB_CHAR *p = dest + *outpos; - -    while (s < end) { -        ch = (unsigned char)*s; - -        if (ch < 0x80) { -            /* Fast path for runs of ASCII characters. Given that common UTF-8 -               input will consist of an overwhelming majority of ASCII -               characters, we try to optimize for this case by checking -               as many characters as a C 'long' can contain. -               First, check if we can do an aligned read, as most CPUs have -               a penalty for unaligned reads. -            */ -            if (_Py_IS_ALIGNED(s, SIZEOF_LONG)) { -                /* Help register allocation */ -                const char *_s = s; -                STRINGLIB_CHAR *_p = p; -                while (_s < aligned_end) { -                    /* Read a whole long at a time (either 4 or 8 bytes), -                       and do a fast unrolled copy if it only contains ASCII -                       characters. */ +/* Mask to quickly check whether a C 'long' contains a  +   non-ASCII, UTF8-encoded char. */  +#if (SIZEOF_LONG == 8)  +# define ASCII_CHAR_MASK 0x8080808080808080UL  +#elif (SIZEOF_LONG == 4)  +# define ASCII_CHAR_MASK 0x80808080UL  +#else  +# error C 'long' size should be either 4 or 8!  +#endif  +  +/* 10xxxxxx */  +#define IS_CONTINUATION_BYTE(ch) ((ch) >= 0x80 && (ch) < 0xC0)  +  +Py_LOCAL_INLINE(Py_UCS4)  +STRINGLIB(utf8_decode)(const char **inptr, const char *end,  +                       STRINGLIB_CHAR *dest,  +                       Py_ssize_t *outpos)  +{  +    Py_UCS4 ch;  +    const char *s = *inptr;  +    const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);  +    STRINGLIB_CHAR *p = dest + *outpos;  +  +    while (s < end) {  +        ch = (unsigned char)*s;  +  +        if (ch < 0x80) {  +            /* Fast path for runs of ASCII characters. Given that common UTF-8  +               input will consist of an overwhelming majority of ASCII  +               characters, we try to optimize for this case by checking  +               as many characters as a C 'long' can contain.  +               First, check if we can do an aligned read, as most CPUs have  +               a penalty for unaligned reads.  +            */  +            if (_Py_IS_ALIGNED(s, SIZEOF_LONG)) {  +                /* Help register allocation */  +                const char *_s = s;  +                STRINGLIB_CHAR *_p = p;  +                while (_s < aligned_end) {  +                    /* Read a whole long at a time (either 4 or 8 bytes),  +                       and do a fast unrolled copy if it only contains ASCII  +                       characters. */                       unsigned long value = *(const unsigned long *) _s; -                    if (value & ASCII_CHAR_MASK) -                        break; -#if PY_LITTLE_ENDIAN -                    _p[0] = (STRINGLIB_CHAR)(value & 0xFFu); -                    _p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu); -                    _p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu); -                    _p[3] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu); -# if SIZEOF_LONG == 8 -                    _p[4] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu); -                    _p[5] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu); -                    _p[6] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu); -                    _p[7] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu); -# endif -#else -# if SIZEOF_LONG == 8 -                    _p[0] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu); -                    _p[1] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu); -                    _p[2] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu); -                    _p[3] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu); -                    _p[4] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu); -                    _p[5] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu); -                    _p[6] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu); -                    _p[7] = (STRINGLIB_CHAR)(value & 0xFFu); -# else -                    _p[0] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu); -                    _p[1] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu); -                    _p[2] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu); -                    _p[3] = (STRINGLIB_CHAR)(value & 0xFFu); -# endif -#endif -                    _s += SIZEOF_LONG; -                    _p += SIZEOF_LONG; -                } -                s = _s; -                p = _p; -                if (s == end) -                    break; -                ch = (unsigned char)*s; -            } -            if (ch < 0x80) { -                s++; -                *p++ = ch; -                continue; -            } -        } - -        if (ch < 0xE0) { -            /* \xC2\x80-\xDF\xBF -- 0080-07FF */ -            Py_UCS4 ch2; -            if (ch < 0xC2) { -                /* invalid sequence -                \x80-\xBF -- continuation byte -                \xC0-\xC1 -- fake 0000-007F */ -                goto InvalidStart; -            } -            if (end - s < 2) { -                /* unexpected end of data: the caller will decide whether -                   it's an error or not */ -                break; -            } -            ch2 = (unsigned char)s[1]; -            if (!IS_CONTINUATION_BYTE(ch2)) -                /* invalid continuation byte */ -                goto InvalidContinuation1; -            ch = (ch << 6) + ch2 - -                 ((0xC0 << 6) + 0x80); -            assert ((ch > 0x007F) && (ch <= 0x07FF)); -            s += 2; -            if (STRINGLIB_MAX_CHAR <= 0x007F || -                (STRINGLIB_MAX_CHAR < 0x07FF && ch > STRINGLIB_MAX_CHAR)) -                /* Out-of-range */ -                goto Return; -            *p++ = ch; -            continue; -        } - -        if (ch < 0xF0) { -            /* \xE0\xA0\x80-\xEF\xBF\xBF -- 0800-FFFF */ -            Py_UCS4 ch2, ch3; -            if (end - s < 3) { -                /* unexpected end of data: the caller will decide whether -                   it's an error or not */ -                if (end - s < 2) -                    break; -                ch2 = (unsigned char)s[1]; -                if (!IS_CONTINUATION_BYTE(ch2) || -                    (ch2 < 0xA0 ? ch == 0xE0 : ch == 0xED)) -                    /* for clarification see comments below */ -                    goto InvalidContinuation1; -                break; -            } -            ch2 = (unsigned char)s[1]; -            ch3 = (unsigned char)s[2]; -            if (!IS_CONTINUATION_BYTE(ch2)) { -                /* invalid continuation byte */ -                goto InvalidContinuation1; -            } -            if (ch == 0xE0) { -                if (ch2 < 0xA0) -                    /* invalid sequence -                       \xE0\x80\x80-\xE0\x9F\xBF -- fake 0000-0800 */ -                    goto InvalidContinuation1; -            } else if (ch == 0xED && ch2 >= 0xA0) { -                /* Decoding UTF-8 sequences in range \xED\xA0\x80-\xED\xBF\xBF -                   will result in surrogates in range D800-DFFF. Surrogates are -                   not valid UTF-8 so they are rejected. +                    if (value & ASCII_CHAR_MASK)  +                        break;  +#if PY_LITTLE_ENDIAN  +                    _p[0] = (STRINGLIB_CHAR)(value & 0xFFu);  +                    _p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);  +                    _p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);  +                    _p[3] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);  +# if SIZEOF_LONG == 8  +                    _p[4] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);  +                    _p[5] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);  +                    _p[6] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);  +                    _p[7] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);  +# endif  +#else  +# if SIZEOF_LONG == 8  +                    _p[0] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);  +                    _p[1] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);  +                    _p[2] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);  +                    _p[3] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);  +                    _p[4] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);  +                    _p[5] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);  +                    _p[6] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);  +                    _p[7] = (STRINGLIB_CHAR)(value & 0xFFu);  +# else  +                    _p[0] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);  +                    _p[1] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);  +                    _p[2] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);  +                    _p[3] = (STRINGLIB_CHAR)(value & 0xFFu);  +# endif  +#endif  +                    _s += SIZEOF_LONG;  +                    _p += SIZEOF_LONG;  +                }  +                s = _s;  +                p = _p;  +                if (s == end)  +                    break;  +                ch = (unsigned char)*s;  +            }  +            if (ch < 0x80) {  +                s++;  +                *p++ = ch;  +                continue;  +            }  +        }  +  +        if (ch < 0xE0) {  +            /* \xC2\x80-\xDF\xBF -- 0080-07FF */  +            Py_UCS4 ch2;  +            if (ch < 0xC2) {  +                /* invalid sequence  +                \x80-\xBF -- continuation byte  +                \xC0-\xC1 -- fake 0000-007F */  +                goto InvalidStart;  +            }  +            if (end - s < 2) {  +                /* unexpected end of data: the caller will decide whether  +                   it's an error or not */  +                break;  +            }  +            ch2 = (unsigned char)s[1];  +            if (!IS_CONTINUATION_BYTE(ch2))  +                /* invalid continuation byte */  +                goto InvalidContinuation1;  +            ch = (ch << 6) + ch2 -  +                 ((0xC0 << 6) + 0x80);  +            assert ((ch > 0x007F) && (ch <= 0x07FF));  +            s += 2;  +            if (STRINGLIB_MAX_CHAR <= 0x007F ||  +                (STRINGLIB_MAX_CHAR < 0x07FF && ch > STRINGLIB_MAX_CHAR))  +                /* Out-of-range */  +                goto Return;  +            *p++ = ch;  +            continue;  +        }  +  +        if (ch < 0xF0) {  +            /* \xE0\xA0\x80-\xEF\xBF\xBF -- 0800-FFFF */  +            Py_UCS4 ch2, ch3;  +            if (end - s < 3) {  +                /* unexpected end of data: the caller will decide whether  +                   it's an error or not */  +                if (end - s < 2)  +                    break;  +                ch2 = (unsigned char)s[1];  +                if (!IS_CONTINUATION_BYTE(ch2) ||  +                    (ch2 < 0xA0 ? ch == 0xE0 : ch == 0xED))  +                    /* for clarification see comments below */  +                    goto InvalidContinuation1;  +                break;  +            }  +            ch2 = (unsigned char)s[1];  +            ch3 = (unsigned char)s[2];  +            if (!IS_CONTINUATION_BYTE(ch2)) {  +                /* invalid continuation byte */  +                goto InvalidContinuation1;  +            }  +            if (ch == 0xE0) {  +                if (ch2 < 0xA0)  +                    /* invalid sequence  +                       \xE0\x80\x80-\xE0\x9F\xBF -- fake 0000-0800 */  +                    goto InvalidContinuation1;  +            } else if (ch == 0xED && ch2 >= 0xA0) {  +                /* Decoding UTF-8 sequences in range \xED\xA0\x80-\xED\xBF\xBF  +                   will result in surrogates in range D800-DFFF. Surrogates are  +                   not valid UTF-8 so they are rejected.                      See https://www.unicode.org/versions/Unicode5.2.0/ch03.pdf -                   (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */ -                goto InvalidContinuation1; -            } -            if (!IS_CONTINUATION_BYTE(ch3)) { -                /* invalid continuation byte */ -                goto InvalidContinuation2; -            } -            ch = (ch << 12) + (ch2 << 6) + ch3 - -                 ((0xE0 << 12) + (0x80 << 6) + 0x80); -            assert ((ch > 0x07FF) && (ch <= 0xFFFF)); -            s += 3; -            if (STRINGLIB_MAX_CHAR <= 0x07FF || -                (STRINGLIB_MAX_CHAR < 0xFFFF && ch > STRINGLIB_MAX_CHAR)) -                /* Out-of-range */ -                goto Return; -            *p++ = ch; -            continue; -        } - -        if (ch < 0xF5) { -            /* \xF0\x90\x80\x80-\xF4\x8F\xBF\xBF -- 10000-10FFFF */ -            Py_UCS4 ch2, ch3, ch4; -            if (end - s < 4) { -                /* unexpected end of data: the caller will decide whether -                   it's an error or not */ -                if (end - s < 2) -                    break; -                ch2 = (unsigned char)s[1]; -                if (!IS_CONTINUATION_BYTE(ch2) || -                    (ch2 < 0x90 ? ch == 0xF0 : ch == 0xF4)) -                    /* for clarification see comments below */ -                    goto InvalidContinuation1; -                if (end - s < 3) -                    break; -                ch3 = (unsigned char)s[2]; -                if (!IS_CONTINUATION_BYTE(ch3)) -                    goto InvalidContinuation2; -                break; -            } -            ch2 = (unsigned char)s[1]; -            ch3 = (unsigned char)s[2]; -            ch4 = (unsigned char)s[3]; -            if (!IS_CONTINUATION_BYTE(ch2)) { -                /* invalid continuation byte */ -                goto InvalidContinuation1; -            } -            if (ch == 0xF0) { -                if (ch2 < 0x90) -                    /* invalid sequence -                       \xF0\x80\x80\x80-\xF0\x8F\xBF\xBF -- fake 0000-FFFF */ -                    goto InvalidContinuation1; -            } else if (ch == 0xF4 && ch2 >= 0x90) { -                /* invalid sequence +                   (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */  +                goto InvalidContinuation1;  +            }  +            if (!IS_CONTINUATION_BYTE(ch3)) {  +                /* invalid continuation byte */  +                goto InvalidContinuation2;  +            }  +            ch = (ch << 12) + (ch2 << 6) + ch3 -  +                 ((0xE0 << 12) + (0x80 << 6) + 0x80);  +            assert ((ch > 0x07FF) && (ch <= 0xFFFF));  +            s += 3;  +            if (STRINGLIB_MAX_CHAR <= 0x07FF ||  +                (STRINGLIB_MAX_CHAR < 0xFFFF && ch > STRINGLIB_MAX_CHAR))  +                /* Out-of-range */  +                goto Return;  +            *p++ = ch;  +            continue;  +        }  +  +        if (ch < 0xF5) {  +            /* \xF0\x90\x80\x80-\xF4\x8F\xBF\xBF -- 10000-10FFFF */  +            Py_UCS4 ch2, ch3, ch4;  +            if (end - s < 4) {  +                /* unexpected end of data: the caller will decide whether  +                   it's an error or not */  +                if (end - s < 2)  +                    break;  +                ch2 = (unsigned char)s[1];  +                if (!IS_CONTINUATION_BYTE(ch2) ||  +                    (ch2 < 0x90 ? ch == 0xF0 : ch == 0xF4))  +                    /* for clarification see comments below */  +                    goto InvalidContinuation1;  +                if (end - s < 3)  +                    break;  +                ch3 = (unsigned char)s[2];  +                if (!IS_CONTINUATION_BYTE(ch3))  +                    goto InvalidContinuation2;  +                break;  +            }  +            ch2 = (unsigned char)s[1];  +            ch3 = (unsigned char)s[2];  +            ch4 = (unsigned char)s[3];  +            if (!IS_CONTINUATION_BYTE(ch2)) {  +                /* invalid continuation byte */  +                goto InvalidContinuation1;  +            }  +            if (ch == 0xF0) {  +                if (ch2 < 0x90)  +                    /* invalid sequence  +                       \xF0\x80\x80\x80-\xF0\x8F\xBF\xBF -- fake 0000-FFFF */  +                    goto InvalidContinuation1;  +            } else if (ch == 0xF4 && ch2 >= 0x90) {  +                /* invalid sequence                      \xF4\x90\x80\x80- -- 110000- overflow */ -                goto InvalidContinuation1; -            } -            if (!IS_CONTINUATION_BYTE(ch3)) { -                /* invalid continuation byte */ -                goto InvalidContinuation2; -            } -            if (!IS_CONTINUATION_BYTE(ch4)) { -                /* invalid continuation byte */ -                goto InvalidContinuation3; -            } -            ch = (ch << 18) + (ch2 << 12) + (ch3 << 6) + ch4 - -                 ((0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80); -            assert ((ch > 0xFFFF) && (ch <= 0x10FFFF)); -            s += 4; -            if (STRINGLIB_MAX_CHAR <= 0xFFFF || -                (STRINGLIB_MAX_CHAR < 0x10FFFF && ch > STRINGLIB_MAX_CHAR)) -                /* Out-of-range */ -                goto Return; -            *p++ = ch; -            continue; -        } -        goto InvalidStart; -    } -    ch = 0; -Return: -    *inptr = s; -    *outpos = p - dest; -    return ch; -InvalidStart: -    ch = 1; -    goto Return; -InvalidContinuation1: -    ch = 2; -    goto Return; -InvalidContinuation2: -    ch = 3; -    goto Return; -InvalidContinuation3: -    ch = 4; -    goto Return; -} - -#undef ASCII_CHAR_MASK - - -/* UTF-8 encoder specialized for a Unicode kind to avoid the slow -   PyUnicode_READ() macro. Delete some parts of the code depending on the kind: -   UCS-1 strings don't need to handle surrogates for example. */ +                goto InvalidContinuation1;  +            }  +            if (!IS_CONTINUATION_BYTE(ch3)) {  +                /* invalid continuation byte */  +                goto InvalidContinuation2;  +            }  +            if (!IS_CONTINUATION_BYTE(ch4)) {  +                /* invalid continuation byte */  +                goto InvalidContinuation3;  +            }  +            ch = (ch << 18) + (ch2 << 12) + (ch3 << 6) + ch4 -  +                 ((0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80);  +            assert ((ch > 0xFFFF) && (ch <= 0x10FFFF));  +            s += 4;  +            if (STRINGLIB_MAX_CHAR <= 0xFFFF ||  +                (STRINGLIB_MAX_CHAR < 0x10FFFF && ch > STRINGLIB_MAX_CHAR))  +                /* Out-of-range */  +                goto Return;  +            *p++ = ch;  +            continue;  +        }  +        goto InvalidStart;  +    }  +    ch = 0;  +Return:  +    *inptr = s;  +    *outpos = p - dest;  +    return ch;  +InvalidStart:  +    ch = 1;  +    goto Return;  +InvalidContinuation1:  +    ch = 2;  +    goto Return;  +InvalidContinuation2:  +    ch = 3;  +    goto Return;  +InvalidContinuation3:  +    ch = 4;  +    goto Return;  +}  +  +#undef ASCII_CHAR_MASK  +  +  +/* UTF-8 encoder specialized for a Unicode kind to avoid the slow  +   PyUnicode_READ() macro. Delete some parts of the code depending on the kind:  +   UCS-1 strings don't need to handle surrogates for example. */   Py_LOCAL_INLINE(char *)  STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,                          PyObject *unicode,                          const STRINGLIB_CHAR *data, -                        Py_ssize_t size, +                        Py_ssize_t size,                           _Py_error_handler error_handler, -                        const char *errors) -{ -    Py_ssize_t i;                /* index into data of next input character */ -    char *p;                     /* next free byte in output buffer */ -#if STRINGLIB_SIZEOF_CHAR > 1 -    PyObject *error_handler_obj = NULL; -    PyObject *exc = NULL; -    PyObject *rep = NULL; -#endif -#if STRINGLIB_SIZEOF_CHAR == 1 -    const Py_ssize_t max_char_size = 2; -#elif STRINGLIB_SIZEOF_CHAR == 2 -    const Py_ssize_t max_char_size = 3; -#else /*  STRINGLIB_SIZEOF_CHAR == 4 */ -    const Py_ssize_t max_char_size = 4; -#endif - -    assert(size >= 0); -    if (size > PY_SSIZE_T_MAX / max_char_size) { -        /* integer overflow */ +                        const char *errors)  +{  +    Py_ssize_t i;                /* index into data of next input character */  +    char *p;                     /* next free byte in output buffer */  +#if STRINGLIB_SIZEOF_CHAR > 1  +    PyObject *error_handler_obj = NULL;  +    PyObject *exc = NULL;  +    PyObject *rep = NULL;  +#endif  +#if STRINGLIB_SIZEOF_CHAR == 1  +    const Py_ssize_t max_char_size = 2;  +#elif STRINGLIB_SIZEOF_CHAR == 2  +    const Py_ssize_t max_char_size = 3;  +#else /*  STRINGLIB_SIZEOF_CHAR == 4 */  +    const Py_ssize_t max_char_size = 4;  +#endif  +  +    assert(size >= 0);  +    if (size > PY_SSIZE_T_MAX / max_char_size) {  +        /* integer overflow */           PyErr_NoMemory();          return NULL; -    } - +    }  +       _PyBytesWriter_Init(writer);      p = _PyBytesWriter_Alloc(writer, size * max_char_size); -    if (p == NULL) -        return NULL; - -    for (i = 0; i < size;) { -        Py_UCS4 ch = data[i++]; - -        if (ch < 0x80) { -            /* Encode ASCII */ -            *p++ = (char) ch; - -        } -        else -#if STRINGLIB_SIZEOF_CHAR > 1 -        if (ch < 0x0800) -#endif -        { -            /* Encode Latin-1 */ -            *p++ = (char)(0xc0 | (ch >> 6)); -            *p++ = (char)(0x80 | (ch & 0x3f)); -        } -#if STRINGLIB_SIZEOF_CHAR > 1 -        else if (Py_UNICODE_IS_SURROGATE(ch)) { -            Py_ssize_t startpos, endpos, newpos; -            Py_ssize_t k; -            if (error_handler == _Py_ERROR_UNKNOWN) { +    if (p == NULL)  +        return NULL;  +  +    for (i = 0; i < size;) {  +        Py_UCS4 ch = data[i++];  +  +        if (ch < 0x80) {  +            /* Encode ASCII */  +            *p++ = (char) ch;  +  +        }  +        else  +#if STRINGLIB_SIZEOF_CHAR > 1  +        if (ch < 0x0800)  +#endif  +        {  +            /* Encode Latin-1 */  +            *p++ = (char)(0xc0 | (ch >> 6));  +            *p++ = (char)(0x80 | (ch & 0x3f));  +        }  +#if STRINGLIB_SIZEOF_CHAR > 1  +        else if (Py_UNICODE_IS_SURROGATE(ch)) {  +            Py_ssize_t startpos, endpos, newpos;  +            Py_ssize_t k;  +            if (error_handler == _Py_ERROR_UNKNOWN) {                   error_handler = _Py_GetErrorHandler(errors); -            } - -            startpos = i-1; -            endpos = startpos+1; - -            while ((endpos < size) && Py_UNICODE_IS_SURROGATE(data[endpos])) -                endpos++; - -            /* Only overallocate the buffer if it's not the last write */ +            }  +  +            startpos = i-1;  +            endpos = startpos+1;  +  +            while ((endpos < size) && Py_UNICODE_IS_SURROGATE(data[endpos]))  +                endpos++;  +  +            /* Only overallocate the buffer if it's not the last write */               writer->overallocate = (endpos < size); - -            switch (error_handler) -            { -            case _Py_ERROR_REPLACE: -                memset(p, '?', endpos - startpos); -                p += (endpos - startpos); -                /* fall through */ -            case _Py_ERROR_IGNORE: -                i += (endpos - startpos - 1); -                break; - -            case _Py_ERROR_SURROGATEPASS: -                for (k=startpos; k<endpos; k++) { -                    ch = data[k]; -                    *p++ = (char)(0xe0 | (ch >> 12)); -                    *p++ = (char)(0x80 | ((ch >> 6) & 0x3f)); -                    *p++ = (char)(0x80 | (ch & 0x3f)); -                } -                i += (endpos - startpos - 1); -                break; - -            case _Py_ERROR_BACKSLASHREPLACE: -                /* subtract preallocated bytes */ +  +            switch (error_handler)  +            {  +            case _Py_ERROR_REPLACE:  +                memset(p, '?', endpos - startpos);  +                p += (endpos - startpos);  +                /* fall through */  +            case _Py_ERROR_IGNORE:  +                i += (endpos - startpos - 1);  +                break;  +  +            case _Py_ERROR_SURROGATEPASS:  +                for (k=startpos; k<endpos; k++) {  +                    ch = data[k];  +                    *p++ = (char)(0xe0 | (ch >> 12));  +                    *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));  +                    *p++ = (char)(0x80 | (ch & 0x3f));  +                }  +                i += (endpos - startpos - 1);  +                break;  +  +            case _Py_ERROR_BACKSLASHREPLACE:  +                /* subtract preallocated bytes */                   writer->min_size -= max_char_size * (endpos - startpos);                  p = backslashreplace(writer, p, -                                     unicode, startpos, endpos); -                if (p == NULL) -                    goto error; -                i += (endpos - startpos - 1); -                break; - -            case _Py_ERROR_XMLCHARREFREPLACE: -                /* subtract preallocated bytes */ +                                     unicode, startpos, endpos);  +                if (p == NULL)  +                    goto error;  +                i += (endpos - startpos - 1);  +                break;  +  +            case _Py_ERROR_XMLCHARREFREPLACE:  +                /* subtract preallocated bytes */                   writer->min_size -= max_char_size * (endpos - startpos);                  p = xmlcharrefreplace(writer, p, -                                      unicode, startpos, endpos); -                if (p == NULL) -                    goto error; -                i += (endpos - startpos - 1); -                break; - -            case _Py_ERROR_SURROGATEESCAPE: -                for (k=startpos; k<endpos; k++) { -                    ch = data[k]; -                    if (!(0xDC80 <= ch && ch <= 0xDCFF)) -                        break; -                    *p++ = (char)(ch & 0xff); -                } -                if (k >= endpos) { -                    i += (endpos - startpos - 1); -                    break; -                } -                startpos = k; -                assert(startpos < endpos); -                /* fall through */ -            default: -                rep = unicode_encode_call_errorhandler( -                      errors, &error_handler_obj, "utf-8", "surrogates not allowed", -                      unicode, &exc, startpos, endpos, &newpos); -                if (!rep) -                    goto error; - -                /* subtract preallocated bytes */ +                                      unicode, startpos, endpos);  +                if (p == NULL)  +                    goto error;  +                i += (endpos - startpos - 1);  +                break;  +  +            case _Py_ERROR_SURROGATEESCAPE:  +                for (k=startpos; k<endpos; k++) {  +                    ch = data[k];  +                    if (!(0xDC80 <= ch && ch <= 0xDCFF))  +                        break;  +                    *p++ = (char)(ch & 0xff);  +                }  +                if (k >= endpos) {  +                    i += (endpos - startpos - 1);  +                    break;  +                }  +                startpos = k;  +                assert(startpos < endpos);  +                /* fall through */  +            default:  +                rep = unicode_encode_call_errorhandler(  +                      errors, &error_handler_obj, "utf-8", "surrogates not allowed",  +                      unicode, &exc, startpos, endpos, &newpos);  +                if (!rep)  +                    goto error;  +  +                /* subtract preallocated bytes */                   writer->min_size -= max_char_size * (newpos - startpos); - -                if (PyBytes_Check(rep)) { +  +                if (PyBytes_Check(rep)) {                       p = _PyBytesWriter_WriteBytes(writer, p, -                                                  PyBytes_AS_STRING(rep), -                                                  PyBytes_GET_SIZE(rep)); -                } -                else { -                    /* rep is unicode */ -                    if (PyUnicode_READY(rep) < 0) -                        goto error; - -                    if (!PyUnicode_IS_ASCII(rep)) { -                        raise_encode_exception(&exc, "utf-8", unicode, -                                               startpos, endpos, -                                               "surrogates not allowed"); -                        goto error; -                    } - +                                                  PyBytes_AS_STRING(rep),  +                                                  PyBytes_GET_SIZE(rep));  +                }  +                else {  +                    /* rep is unicode */  +                    if (PyUnicode_READY(rep) < 0)  +                        goto error;  +  +                    if (!PyUnicode_IS_ASCII(rep)) {  +                        raise_encode_exception(&exc, "utf-8", unicode,  +                                               startpos, endpos,  +                                               "surrogates not allowed");  +                        goto error;  +                    }  +                       p = _PyBytesWriter_WriteBytes(writer, p, -                                                  PyUnicode_DATA(rep), -                                                  PyUnicode_GET_LENGTH(rep)); -                } - -                if (p == NULL) -                    goto error; -                Py_CLEAR(rep); - -                i = newpos; -            } - -            /* If overallocation was disabled, ensure that it was the last -               write. Otherwise, we missed an optimization */ +                                                  PyUnicode_DATA(rep),  +                                                  PyUnicode_GET_LENGTH(rep));  +                }  +  +                if (p == NULL)  +                    goto error;  +                Py_CLEAR(rep);  +  +                i = newpos;  +            }  +  +            /* If overallocation was disabled, ensure that it was the last  +               write. Otherwise, we missed an optimization */               assert(writer->overallocate || i == size); -        } -        else -#if STRINGLIB_SIZEOF_CHAR > 2 -        if (ch < 0x10000) -#endif -        { -            *p++ = (char)(0xe0 | (ch >> 12)); -            *p++ = (char)(0x80 | ((ch >> 6) & 0x3f)); -            *p++ = (char)(0x80 | (ch & 0x3f)); -        } -#if STRINGLIB_SIZEOF_CHAR > 2 -        else /* ch >= 0x10000 */ -        { -            assert(ch <= MAX_UNICODE); -            /* Encode UCS4 Unicode ordinals */ -            *p++ = (char)(0xf0 | (ch >> 18)); -            *p++ = (char)(0x80 | ((ch >> 12) & 0x3f)); -            *p++ = (char)(0x80 | ((ch >> 6) & 0x3f)); -            *p++ = (char)(0x80 | (ch & 0x3f)); -        } -#endif /* STRINGLIB_SIZEOF_CHAR > 2 */ -#endif /* STRINGLIB_SIZEOF_CHAR > 1 */ -    } - -#if STRINGLIB_SIZEOF_CHAR > 1 -    Py_XDECREF(error_handler_obj); -    Py_XDECREF(exc); -#endif +        }  +        else  +#if STRINGLIB_SIZEOF_CHAR > 2  +        if (ch < 0x10000)  +#endif  +        {  +            *p++ = (char)(0xe0 | (ch >> 12));  +            *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));  +            *p++ = (char)(0x80 | (ch & 0x3f));  +        }  +#if STRINGLIB_SIZEOF_CHAR > 2  +        else /* ch >= 0x10000 */  +        {  +            assert(ch <= MAX_UNICODE);  +            /* Encode UCS4 Unicode ordinals */  +            *p++ = (char)(0xf0 | (ch >> 18));  +            *p++ = (char)(0x80 | ((ch >> 12) & 0x3f));  +            *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));  +            *p++ = (char)(0x80 | (ch & 0x3f));  +        }  +#endif /* STRINGLIB_SIZEOF_CHAR > 2 */  +#endif /* STRINGLIB_SIZEOF_CHAR > 1 */  +    }  +  +#if STRINGLIB_SIZEOF_CHAR > 1  +    Py_XDECREF(error_handler_obj);  +    Py_XDECREF(exc);  +#endif       return p; - -#if STRINGLIB_SIZEOF_CHAR > 1 - error: -    Py_XDECREF(rep); -    Py_XDECREF(error_handler_obj); -    Py_XDECREF(exc); -    return NULL; -#endif -} - -/* The pattern for constructing UCS2-repeated masks. */ -#if SIZEOF_LONG == 8 -# define UCS2_REPEAT_MASK 0x0001000100010001ul -#elif SIZEOF_LONG == 4 -# define UCS2_REPEAT_MASK 0x00010001ul -#else -# error C 'long' size should be either 4 or 8! -#endif - -/* The mask for fast checking. */ -#if STRINGLIB_SIZEOF_CHAR == 1 -/* The mask for fast checking of whether a C 'long' contains a -   non-ASCII or non-Latin1 UTF16-encoded characters. */ -# define FAST_CHAR_MASK         (UCS2_REPEAT_MASK * (0xFFFFu & ~STRINGLIB_MAX_CHAR)) -#else -/* The mask for fast checking of whether a C 'long' may contain -   UTF16-encoded surrogate characters. This is an efficient heuristic, -   assuming that non-surrogate characters with a code point >= 0x8000 are -   rare in most input. -*/ -# define FAST_CHAR_MASK         (UCS2_REPEAT_MASK * 0x8000u) -#endif -/* The mask for fast byte-swapping. */ -#define STRIPPED_MASK           (UCS2_REPEAT_MASK * 0x00FFu) -/* Swap bytes. */ -#define SWAB(value)             ((((value) >> 8) & STRIPPED_MASK) | \ -                                 (((value) & STRIPPED_MASK) << 8)) - -Py_LOCAL_INLINE(Py_UCS4) -STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e, -                        STRINGLIB_CHAR *dest, Py_ssize_t *outpos, -                        int native_ordering) -{ -    Py_UCS4 ch; -    const unsigned char *aligned_end = -            (const unsigned char *) _Py_ALIGN_DOWN(e, SIZEOF_LONG); -    const unsigned char *q = *inptr; -    STRINGLIB_CHAR *p = dest + *outpos; -    /* Offsets from q for retrieving byte pairs in the right order. */ -#if PY_LITTLE_ENDIAN -    int ihi = !!native_ordering, ilo = !native_ordering; -#else -    int ihi = !native_ordering, ilo = !!native_ordering; -#endif -    --e; - -    while (q < e) { -        Py_UCS4 ch2; -        /* First check for possible aligned read of a C 'long'. Unaligned -           reads are more expensive, better to defer to another iteration. */ -        if (_Py_IS_ALIGNED(q, SIZEOF_LONG)) { -            /* Fast path for runs of in-range non-surrogate chars. */ -            const unsigned char *_q = q; -            while (_q < aligned_end) { +  +#if STRINGLIB_SIZEOF_CHAR > 1  + error:  +    Py_XDECREF(rep);  +    Py_XDECREF(error_handler_obj);  +    Py_XDECREF(exc);  +    return NULL;  +#endif  +}  +  +/* The pattern for constructing UCS2-repeated masks. */  +#if SIZEOF_LONG == 8  +# define UCS2_REPEAT_MASK 0x0001000100010001ul  +#elif SIZEOF_LONG == 4  +# define UCS2_REPEAT_MASK 0x00010001ul  +#else  +# error C 'long' size should be either 4 or 8!  +#endif  +  +/* The mask for fast checking. */  +#if STRINGLIB_SIZEOF_CHAR == 1  +/* The mask for fast checking of whether a C 'long' contains a  +   non-ASCII or non-Latin1 UTF16-encoded characters. */  +# define FAST_CHAR_MASK         (UCS2_REPEAT_MASK * (0xFFFFu & ~STRINGLIB_MAX_CHAR))  +#else  +/* The mask for fast checking of whether a C 'long' may contain  +   UTF16-encoded surrogate characters. This is an efficient heuristic,  +   assuming that non-surrogate characters with a code point >= 0x8000 are  +   rare in most input.  +*/  +# define FAST_CHAR_MASK         (UCS2_REPEAT_MASK * 0x8000u)  +#endif  +/* The mask for fast byte-swapping. */  +#define STRIPPED_MASK           (UCS2_REPEAT_MASK * 0x00FFu)  +/* Swap bytes. */  +#define SWAB(value)             ((((value) >> 8) & STRIPPED_MASK) | \  +                                 (((value) & STRIPPED_MASK) << 8))  +  +Py_LOCAL_INLINE(Py_UCS4)  +STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,  +                        STRINGLIB_CHAR *dest, Py_ssize_t *outpos,  +                        int native_ordering)  +{  +    Py_UCS4 ch;  +    const unsigned char *aligned_end =  +            (const unsigned char *) _Py_ALIGN_DOWN(e, SIZEOF_LONG);  +    const unsigned char *q = *inptr;  +    STRINGLIB_CHAR *p = dest + *outpos;  +    /* Offsets from q for retrieving byte pairs in the right order. */  +#if PY_LITTLE_ENDIAN  +    int ihi = !!native_ordering, ilo = !native_ordering;  +#else  +    int ihi = !native_ordering, ilo = !!native_ordering;  +#endif  +    --e;  +  +    while (q < e) {  +        Py_UCS4 ch2;  +        /* First check for possible aligned read of a C 'long'. Unaligned  +           reads are more expensive, better to defer to another iteration. */  +        if (_Py_IS_ALIGNED(q, SIZEOF_LONG)) {  +            /* Fast path for runs of in-range non-surrogate chars. */  +            const unsigned char *_q = q;  +            while (_q < aligned_end) {                   unsigned long block = * (const unsigned long *) _q; -                if (native_ordering) { -                    /* Can use buffer directly */ -                    if (block & FAST_CHAR_MASK) -                        break; -                } -                else { -                    /* Need to byte-swap */ -                    if (block & SWAB(FAST_CHAR_MASK)) -                        break; -#if STRINGLIB_SIZEOF_CHAR == 1 -                    block >>= 8; -#else -                    block = SWAB(block); -#endif -                } -#if PY_LITTLE_ENDIAN -# if SIZEOF_LONG == 4 -                p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu); -                p[1] = (STRINGLIB_CHAR)(block >> 16); -# elif SIZEOF_LONG == 8 -                p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu); -                p[1] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu); -                p[2] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu); -                p[3] = (STRINGLIB_CHAR)(block >> 48); -# endif -#else -# if SIZEOF_LONG == 4 -                p[0] = (STRINGLIB_CHAR)(block >> 16); -                p[1] = (STRINGLIB_CHAR)(block & 0xFFFFu); -# elif SIZEOF_LONG == 8 -                p[0] = (STRINGLIB_CHAR)(block >> 48); -                p[1] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu); -                p[2] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu); -                p[3] = (STRINGLIB_CHAR)(block & 0xFFFFu); -# endif -#endif -                _q += SIZEOF_LONG; -                p += SIZEOF_LONG / 2; -            } -            q = _q; -            if (q >= e) -                break; -        } - -        ch = (q[ihi] << 8) | q[ilo]; -        q += 2; -        if (!Py_UNICODE_IS_SURROGATE(ch)) { -#if STRINGLIB_SIZEOF_CHAR < 2 -            if (ch > STRINGLIB_MAX_CHAR) -                /* Out-of-range */ -                goto Return; -#endif -            *p++ = (STRINGLIB_CHAR)ch; -            continue; -        } - -        /* UTF-16 code pair: */ +                if (native_ordering) {  +                    /* Can use buffer directly */  +                    if (block & FAST_CHAR_MASK)  +                        break;  +                }  +                else {  +                    /* Need to byte-swap */  +                    if (block & SWAB(FAST_CHAR_MASK))  +                        break;  +#if STRINGLIB_SIZEOF_CHAR == 1  +                    block >>= 8;  +#else  +                    block = SWAB(block);  +#endif  +                }  +#if PY_LITTLE_ENDIAN  +# if SIZEOF_LONG == 4  +                p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);  +                p[1] = (STRINGLIB_CHAR)(block >> 16);  +# elif SIZEOF_LONG == 8  +                p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);  +                p[1] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);  +                p[2] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);  +                p[3] = (STRINGLIB_CHAR)(block >> 48);  +# endif  +#else  +# if SIZEOF_LONG == 4  +                p[0] = (STRINGLIB_CHAR)(block >> 16);  +                p[1] = (STRINGLIB_CHAR)(block & 0xFFFFu);  +# elif SIZEOF_LONG == 8  +                p[0] = (STRINGLIB_CHAR)(block >> 48);  +                p[1] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);  +                p[2] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);  +                p[3] = (STRINGLIB_CHAR)(block & 0xFFFFu);  +# endif  +#endif  +                _q += SIZEOF_LONG;  +                p += SIZEOF_LONG / 2;  +            }  +            q = _q;  +            if (q >= e)  +                break;  +        }  +  +        ch = (q[ihi] << 8) | q[ilo];  +        q += 2;  +        if (!Py_UNICODE_IS_SURROGATE(ch)) {  +#if STRINGLIB_SIZEOF_CHAR < 2  +            if (ch > STRINGLIB_MAX_CHAR)  +                /* Out-of-range */  +                goto Return;  +#endif  +            *p++ = (STRINGLIB_CHAR)ch;  +            continue;  +        }  +  +        /* UTF-16 code pair: */           if (!Py_UNICODE_IS_HIGH_SURROGATE(ch))              goto IllegalEncoding; -        if (q >= e) -            goto UnexpectedEnd; -        ch2 = (q[ihi] << 8) | q[ilo]; -        q += 2; -        if (!Py_UNICODE_IS_LOW_SURROGATE(ch2)) -            goto IllegalSurrogate; -        ch = Py_UNICODE_JOIN_SURROGATES(ch, ch2); -#if STRINGLIB_SIZEOF_CHAR < 4 -        /* Out-of-range */ -        goto Return; -#else -        *p++ = (STRINGLIB_CHAR)ch; -#endif -    } -    ch = 0; -Return: -    *inptr = q; -    *outpos = p - dest; -    return ch; -UnexpectedEnd: -    ch = 1; -    goto Return; -IllegalEncoding: -    ch = 2; -    goto Return; -IllegalSurrogate: -    ch = 3; -    goto Return; -} -#undef UCS2_REPEAT_MASK -#undef FAST_CHAR_MASK -#undef STRIPPED_MASK -#undef SWAB - - -#if STRINGLIB_MAX_CHAR >= 0x80 -Py_LOCAL_INLINE(Py_ssize_t) -STRINGLIB(utf16_encode)(const STRINGLIB_CHAR *in, -                        Py_ssize_t len, -                        unsigned short **outptr, -                        int native_ordering) -{ -    unsigned short *out = *outptr; -    const STRINGLIB_CHAR *end = in + len; -#if STRINGLIB_SIZEOF_CHAR == 1 -    if (native_ordering) { -        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4); -        while (in < unrolled_end) { -            out[0] = in[0]; -            out[1] = in[1]; -            out[2] = in[2]; -            out[3] = in[3]; -            in += 4; out += 4; -        } -        while (in < end) { -            *out++ = *in++; -        } -    } else { -# define SWAB2(CH)  ((CH) << 8) /* high byte is zero */ -        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4); -        while (in < unrolled_end) { -            out[0] = SWAB2(in[0]); -            out[1] = SWAB2(in[1]); -            out[2] = SWAB2(in[2]); -            out[3] = SWAB2(in[3]); -            in += 4; out += 4; -        } -        while (in < end) { -            Py_UCS4 ch = *in++; -            *out++ = SWAB2((Py_UCS2)ch); -        } -#undef SWAB2 -    } -    *outptr = out; -    return len; -#else -    if (native_ordering) { -#if STRINGLIB_MAX_CHAR < 0x10000 -        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4); -        while (in < unrolled_end) { -            /* check if any character is a surrogate character */ -            if (((in[0] ^ 0xd800) & -                 (in[1] ^ 0xd800) & -                 (in[2] ^ 0xd800) & -                 (in[3] ^ 0xd800) & 0xf800) == 0) -                break; -            out[0] = in[0]; -            out[1] = in[1]; -            out[2] = in[2]; -            out[3] = in[3]; -            in += 4; out += 4; -        } -#endif -        while (in < end) { -            Py_UCS4 ch; -            ch = *in++; -            if (ch < 0xd800) -                *out++ = ch; -            else if (ch < 0xe000) -                /* reject surrogate characters (U+D800-U+DFFF) */ -                goto fail; -#if STRINGLIB_MAX_CHAR >= 0x10000 -            else if (ch >= 0x10000) { -                out[0] = Py_UNICODE_HIGH_SURROGATE(ch); -                out[1] = Py_UNICODE_LOW_SURROGATE(ch); -                out += 2; -            } -#endif -            else -                *out++ = ch; -        } -    } else { -#define SWAB2(CH)  (((CH) << 8) | ((CH) >> 8)) -#if STRINGLIB_MAX_CHAR < 0x10000 -        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4); -        while (in < unrolled_end) { -            /* check if any character is a surrogate character */ -            if (((in[0] ^ 0xd800) & -                 (in[1] ^ 0xd800) & -                 (in[2] ^ 0xd800) & -                 (in[3] ^ 0xd800) & 0xf800) == 0) -                break; -            out[0] = SWAB2(in[0]); -            out[1] = SWAB2(in[1]); -            out[2] = SWAB2(in[2]); -            out[3] = SWAB2(in[3]); -            in += 4; out += 4; -        } -#endif -        while (in < end) { -            Py_UCS4 ch = *in++; -            if (ch < 0xd800) -                *out++ = SWAB2((Py_UCS2)ch); -            else if (ch < 0xe000) -                /* reject surrogate characters (U+D800-U+DFFF) */ -                goto fail; -#if STRINGLIB_MAX_CHAR >= 0x10000 -            else if (ch >= 0x10000) { -                Py_UCS2 ch1 = Py_UNICODE_HIGH_SURROGATE(ch); -                Py_UCS2 ch2 = Py_UNICODE_LOW_SURROGATE(ch); -                out[0] = SWAB2(ch1); -                out[1] = SWAB2(ch2); -                out += 2; -            } -#endif -            else -                *out++ = SWAB2((Py_UCS2)ch); -        } -#undef SWAB2 -    } -    *outptr = out; -    return len; -  fail: -    *outptr = out; -    return len - (end - in + 1); -#endif -} - +        if (q >= e)  +            goto UnexpectedEnd;  +        ch2 = (q[ihi] << 8) | q[ilo];  +        q += 2;  +        if (!Py_UNICODE_IS_LOW_SURROGATE(ch2))  +            goto IllegalSurrogate;  +        ch = Py_UNICODE_JOIN_SURROGATES(ch, ch2);  +#if STRINGLIB_SIZEOF_CHAR < 4  +        /* Out-of-range */  +        goto Return;  +#else  +        *p++ = (STRINGLIB_CHAR)ch;  +#endif  +    }  +    ch = 0;  +Return:  +    *inptr = q;  +    *outpos = p - dest;  +    return ch;  +UnexpectedEnd:  +    ch = 1;  +    goto Return;  +IllegalEncoding:  +    ch = 2;  +    goto Return;  +IllegalSurrogate:  +    ch = 3;  +    goto Return;  +}  +#undef UCS2_REPEAT_MASK  +#undef FAST_CHAR_MASK  +#undef STRIPPED_MASK  +#undef SWAB  +  +  +#if STRINGLIB_MAX_CHAR >= 0x80  +Py_LOCAL_INLINE(Py_ssize_t)  +STRINGLIB(utf16_encode)(const STRINGLIB_CHAR *in,  +                        Py_ssize_t len,  +                        unsigned short **outptr,  +                        int native_ordering)  +{  +    unsigned short *out = *outptr;  +    const STRINGLIB_CHAR *end = in + len;  +#if STRINGLIB_SIZEOF_CHAR == 1  +    if (native_ordering) {  +        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  +        while (in < unrolled_end) {  +            out[0] = in[0];  +            out[1] = in[1];  +            out[2] = in[2];  +            out[3] = in[3];  +            in += 4; out += 4;  +        }  +        while (in < end) {  +            *out++ = *in++;  +        }  +    } else {  +# define SWAB2(CH)  ((CH) << 8) /* high byte is zero */  +        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  +        while (in < unrolled_end) {  +            out[0] = SWAB2(in[0]);  +            out[1] = SWAB2(in[1]);  +            out[2] = SWAB2(in[2]);  +            out[3] = SWAB2(in[3]);  +            in += 4; out += 4;  +        }  +        while (in < end) {  +            Py_UCS4 ch = *in++;  +            *out++ = SWAB2((Py_UCS2)ch);  +        }  +#undef SWAB2  +    }  +    *outptr = out;  +    return len;  +#else  +    if (native_ordering) {  +#if STRINGLIB_MAX_CHAR < 0x10000  +        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  +        while (in < unrolled_end) {  +            /* check if any character is a surrogate character */  +            if (((in[0] ^ 0xd800) &  +                 (in[1] ^ 0xd800) &  +                 (in[2] ^ 0xd800) &  +                 (in[3] ^ 0xd800) & 0xf800) == 0)  +                break;  +            out[0] = in[0];  +            out[1] = in[1];  +            out[2] = in[2];  +            out[3] = in[3];  +            in += 4; out += 4;  +        }  +#endif  +        while (in < end) {  +            Py_UCS4 ch;  +            ch = *in++;  +            if (ch < 0xd800)  +                *out++ = ch;  +            else if (ch < 0xe000)  +                /* reject surrogate characters (U+D800-U+DFFF) */  +                goto fail;  +#if STRINGLIB_MAX_CHAR >= 0x10000  +            else if (ch >= 0x10000) {  +                out[0] = Py_UNICODE_HIGH_SURROGATE(ch);  +                out[1] = Py_UNICODE_LOW_SURROGATE(ch);  +                out += 2;  +            }  +#endif  +            else  +                *out++ = ch;  +        }  +    } else {  +#define SWAB2(CH)  (((CH) << 8) | ((CH) >> 8))  +#if STRINGLIB_MAX_CHAR < 0x10000  +        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  +        while (in < unrolled_end) {  +            /* check if any character is a surrogate character */  +            if (((in[0] ^ 0xd800) &  +                 (in[1] ^ 0xd800) &  +                 (in[2] ^ 0xd800) &  +                 (in[3] ^ 0xd800) & 0xf800) == 0)  +                break;  +            out[0] = SWAB2(in[0]);  +            out[1] = SWAB2(in[1]);  +            out[2] = SWAB2(in[2]);  +            out[3] = SWAB2(in[3]);  +            in += 4; out += 4;  +        }  +#endif  +        while (in < end) {  +            Py_UCS4 ch = *in++;  +            if (ch < 0xd800)  +                *out++ = SWAB2((Py_UCS2)ch);  +            else if (ch < 0xe000)  +                /* reject surrogate characters (U+D800-U+DFFF) */  +                goto fail;  +#if STRINGLIB_MAX_CHAR >= 0x10000  +            else if (ch >= 0x10000) {  +                Py_UCS2 ch1 = Py_UNICODE_HIGH_SURROGATE(ch);  +                Py_UCS2 ch2 = Py_UNICODE_LOW_SURROGATE(ch);  +                out[0] = SWAB2(ch1);  +                out[1] = SWAB2(ch2);  +                out += 2;  +            }  +#endif  +            else  +                *out++ = SWAB2((Py_UCS2)ch);  +        }  +#undef SWAB2  +    }  +    *outptr = out;  +    return len;  +  fail:  +    *outptr = out;  +    return len - (end - in + 1);  +#endif  +}  +   static inline uint32_t  STRINGLIB(SWAB4)(STRINGLIB_CHAR ch)  {      uint32_t word = ch; -#if STRINGLIB_SIZEOF_CHAR == 1 +#if STRINGLIB_SIZEOF_CHAR == 1       /* high bytes are zero */      return (word << 24); -#elif STRINGLIB_SIZEOF_CHAR == 2 +#elif STRINGLIB_SIZEOF_CHAR == 2       /* high bytes are zero */      return ((word & 0x00FFu) << 24) | ((word & 0xFF00u) << 8); -#else +#else       return _Py_bswap32(word); -#endif +#endif   } -Py_LOCAL_INLINE(Py_ssize_t) -STRINGLIB(utf32_encode)(const STRINGLIB_CHAR *in, -                        Py_ssize_t len, +Py_LOCAL_INLINE(Py_ssize_t)  +STRINGLIB(utf32_encode)(const STRINGLIB_CHAR *in,  +                        Py_ssize_t len,                           uint32_t **outptr, -                        int native_ordering) -{ +                        int native_ordering)  +{       uint32_t *out = *outptr; -    const STRINGLIB_CHAR *end = in + len; -    if (native_ordering) { -        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4); -        while (in < unrolled_end) { -#if STRINGLIB_SIZEOF_CHAR > 1 -            /* check if any character is a surrogate character */ -            if (((in[0] ^ 0xd800) & -                 (in[1] ^ 0xd800) & -                 (in[2] ^ 0xd800) & -                 (in[3] ^ 0xd800) & 0xf800) == 0) -                break; -#endif -            out[0] = in[0]; -            out[1] = in[1]; -            out[2] = in[2]; -            out[3] = in[3]; -            in += 4; out += 4; -        } -        while (in < end) { -            Py_UCS4 ch; -            ch = *in++; -#if STRINGLIB_SIZEOF_CHAR > 1 -            if (Py_UNICODE_IS_SURROGATE(ch)) { -                /* reject surrogate characters (U+D800-U+DFFF) */ -                goto fail; -            } -#endif -            *out++ = ch; -        } -    } else { -        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4); -        while (in < unrolled_end) { -#if STRINGLIB_SIZEOF_CHAR > 1 -            /* check if any character is a surrogate character */ -            if (((in[0] ^ 0xd800) & -                 (in[1] ^ 0xd800) & -                 (in[2] ^ 0xd800) & -                 (in[3] ^ 0xd800) & 0xf800) == 0) -                break; -#endif +    const STRINGLIB_CHAR *end = in + len;  +    if (native_ordering) {  +        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  +        while (in < unrolled_end) {  +#if STRINGLIB_SIZEOF_CHAR > 1  +            /* check if any character is a surrogate character */  +            if (((in[0] ^ 0xd800) &  +                 (in[1] ^ 0xd800) &  +                 (in[2] ^ 0xd800) &  +                 (in[3] ^ 0xd800) & 0xf800) == 0)  +                break;  +#endif  +            out[0] = in[0];  +            out[1] = in[1];  +            out[2] = in[2];  +            out[3] = in[3];  +            in += 4; out += 4;  +        }  +        while (in < end) {  +            Py_UCS4 ch;  +            ch = *in++;  +#if STRINGLIB_SIZEOF_CHAR > 1  +            if (Py_UNICODE_IS_SURROGATE(ch)) {  +                /* reject surrogate characters (U+D800-U+DFFF) */  +                goto fail;  +            }  +#endif  +            *out++ = ch;  +        }  +    } else {  +        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);  +        while (in < unrolled_end) {  +#if STRINGLIB_SIZEOF_CHAR > 1  +            /* check if any character is a surrogate character */  +            if (((in[0] ^ 0xd800) &  +                 (in[1] ^ 0xd800) &  +                 (in[2] ^ 0xd800) &  +                 (in[3] ^ 0xd800) & 0xf800) == 0)  +                break;  +#endif               out[0] = STRINGLIB(SWAB4)(in[0]);              out[1] = STRINGLIB(SWAB4)(in[1]);              out[2] = STRINGLIB(SWAB4)(in[2]);              out[3] = STRINGLIB(SWAB4)(in[3]); -            in += 4; out += 4; -        } -        while (in < end) { -            Py_UCS4 ch = *in++; -#if STRINGLIB_SIZEOF_CHAR > 1 -            if (Py_UNICODE_IS_SURROGATE(ch)) { -                /* reject surrogate characters (U+D800-U+DFFF) */ -                goto fail; -            } -#endif +            in += 4; out += 4;  +        }  +        while (in < end) {  +            Py_UCS4 ch = *in++;  +#if STRINGLIB_SIZEOF_CHAR > 1  +            if (Py_UNICODE_IS_SURROGATE(ch)) {  +                /* reject surrogate characters (U+D800-U+DFFF) */  +                goto fail;  +            }  +#endif               *out++ = STRINGLIB(SWAB4)(ch); -        } -    } -    *outptr = out; -    return len; -#if STRINGLIB_SIZEOF_CHAR > 1 -  fail: -    *outptr = out; -    return len - (end - in + 1); -#endif -} - -#endif +        }  +    }  +    *outptr = out;  +    return len;  +#if STRINGLIB_SIZEOF_CHAR > 1  +  fail:  +    *outptr = out;  +    return len - (end - in + 1);  +#endif  +}  +  +#endif  diff --git a/contrib/tools/python3/src/Objects/stringlib/count.h b/contrib/tools/python3/src/Objects/stringlib/count.h index f48500bf561..794224d0151 100644 --- a/contrib/tools/python3/src/Objects/stringlib/count.h +++ b/contrib/tools/python3/src/Objects/stringlib/count.h @@ -1,27 +1,27 @@ -/* stringlib: count implementation */ - -#ifndef STRINGLIB_FASTSEARCH_H -#error must include "stringlib/fastsearch.h" before including this module -#endif - -Py_LOCAL_INLINE(Py_ssize_t) -STRINGLIB(count)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, -                const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, -                Py_ssize_t maxcount) -{ -    Py_ssize_t count; - -    if (str_len < 0) -        return 0; /* start > len(str) */ -    if (sub_len == 0) -        return (str_len < maxcount) ? str_len + 1 : maxcount; - -    count = FASTSEARCH(str, str_len, sub, sub_len, maxcount, FAST_COUNT); - -    if (count < 0) -        return 0; /* no match */ - -    return count; -} - - +/* stringlib: count implementation */  +  +#ifndef STRINGLIB_FASTSEARCH_H  +#error must include "stringlib/fastsearch.h" before including this module  +#endif  +  +Py_LOCAL_INLINE(Py_ssize_t)  +STRINGLIB(count)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,  +                const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,  +                Py_ssize_t maxcount)  +{  +    Py_ssize_t count;  +  +    if (str_len < 0)  +        return 0; /* start > len(str) */  +    if (sub_len == 0)  +        return (str_len < maxcount) ? str_len + 1 : maxcount;  +  +    count = FASTSEARCH(str, str_len, sub, sub_len, maxcount, FAST_COUNT);  +  +    if (count < 0)  +        return 0; /* no match */  +  +    return count;  +}  +  +  diff --git a/contrib/tools/python3/src/Objects/stringlib/ctype.h b/contrib/tools/python3/src/Objects/stringlib/ctype.h index 9b319b07d11..466624dac7a 100644 --- a/contrib/tools/python3/src/Objects/stringlib/ctype.h +++ b/contrib/tools/python3/src/Objects/stringlib/ctype.h @@ -1,116 +1,116 @@ -#if STRINGLIB_IS_UNICODE -# error "ctype.h only compatible with byte-wise strings" -#endif - +#if STRINGLIB_IS_UNICODE  +# error "ctype.h only compatible with byte-wise strings"  +#endif  +   #include "pycore_bytes_methods.h" - -static PyObject* +  +static PyObject*   stringlib_isspace(PyObject *self, PyObject *Py_UNUSED(ignored)) -{ -    return _Py_bytes_isspace(STRINGLIB_STR(self), STRINGLIB_LEN(self)); -} - -static PyObject* +{  +    return _Py_bytes_isspace(STRINGLIB_STR(self), STRINGLIB_LEN(self));  +}  +  +static PyObject*   stringlib_isalpha(PyObject *self, PyObject *Py_UNUSED(ignored)) -{ -    return _Py_bytes_isalpha(STRINGLIB_STR(self), STRINGLIB_LEN(self)); -} - -static PyObject* +{  +    return _Py_bytes_isalpha(STRINGLIB_STR(self), STRINGLIB_LEN(self));  +}  +  +static PyObject*   stringlib_isalnum(PyObject *self, PyObject *Py_UNUSED(ignored)) -{ -    return _Py_bytes_isalnum(STRINGLIB_STR(self), STRINGLIB_LEN(self)); -} - -static PyObject* +{  +    return _Py_bytes_isalnum(STRINGLIB_STR(self), STRINGLIB_LEN(self));  +}  +  +static PyObject*   stringlib_isascii(PyObject *self, PyObject *Py_UNUSED(ignored)) -{ -    return _Py_bytes_isascii(STRINGLIB_STR(self), STRINGLIB_LEN(self)); -} - -static PyObject* +{  +    return _Py_bytes_isascii(STRINGLIB_STR(self), STRINGLIB_LEN(self));  +}  +  +static PyObject*   stringlib_isdigit(PyObject *self, PyObject *Py_UNUSED(ignored)) -{ -    return _Py_bytes_isdigit(STRINGLIB_STR(self), STRINGLIB_LEN(self)); -} - -static PyObject* +{  +    return _Py_bytes_isdigit(STRINGLIB_STR(self), STRINGLIB_LEN(self));  +}  +  +static PyObject*   stringlib_islower(PyObject *self, PyObject *Py_UNUSED(ignored)) -{ -    return _Py_bytes_islower(STRINGLIB_STR(self), STRINGLIB_LEN(self)); -} - -static PyObject* +{  +    return _Py_bytes_islower(STRINGLIB_STR(self), STRINGLIB_LEN(self));  +}  +  +static PyObject*   stringlib_isupper(PyObject *self, PyObject *Py_UNUSED(ignored)) -{ -    return _Py_bytes_isupper(STRINGLIB_STR(self), STRINGLIB_LEN(self)); -} - -static PyObject* +{  +    return _Py_bytes_isupper(STRINGLIB_STR(self), STRINGLIB_LEN(self));  +}  +  +static PyObject*   stringlib_istitle(PyObject *self, PyObject *Py_UNUSED(ignored)) -{ -    return _Py_bytes_istitle(STRINGLIB_STR(self), STRINGLIB_LEN(self)); -} - - -/* functions that return a new object partially translated by ctype funcs: */ - -static PyObject* +{  +    return _Py_bytes_istitle(STRINGLIB_STR(self), STRINGLIB_LEN(self));  +}  +  +  +/* functions that return a new object partially translated by ctype funcs: */  +  +static PyObject*   stringlib_lower(PyObject *self, PyObject *Py_UNUSED(ignored)) -{ -    PyObject* newobj; -    newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self)); -    if (!newobj) -            return NULL; -    _Py_bytes_lower(STRINGLIB_STR(newobj), STRINGLIB_STR(self), -                 STRINGLIB_LEN(self)); -    return newobj; -} - -static PyObject* +{  +    PyObject* newobj;  +    newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));  +    if (!newobj)  +            return NULL;  +    _Py_bytes_lower(STRINGLIB_STR(newobj), STRINGLIB_STR(self),  +                 STRINGLIB_LEN(self));  +    return newobj;  +}  +  +static PyObject*   stringlib_upper(PyObject *self, PyObject *Py_UNUSED(ignored)) -{ -    PyObject* newobj; -    newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self)); -    if (!newobj) -            return NULL; -    _Py_bytes_upper(STRINGLIB_STR(newobj), STRINGLIB_STR(self), -                 STRINGLIB_LEN(self)); -    return newobj; -} - -static PyObject* +{  +    PyObject* newobj;  +    newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));  +    if (!newobj)  +            return NULL;  +    _Py_bytes_upper(STRINGLIB_STR(newobj), STRINGLIB_STR(self),  +                 STRINGLIB_LEN(self));  +    return newobj;  +}  +  +static PyObject*   stringlib_title(PyObject *self, PyObject *Py_UNUSED(ignored)) -{ -    PyObject* newobj; -    newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self)); -    if (!newobj) -            return NULL; -    _Py_bytes_title(STRINGLIB_STR(newobj), STRINGLIB_STR(self), -                 STRINGLIB_LEN(self)); -    return newobj; -} - -static PyObject* +{  +    PyObject* newobj;  +    newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));  +    if (!newobj)  +            return NULL;  +    _Py_bytes_title(STRINGLIB_STR(newobj), STRINGLIB_STR(self),  +                 STRINGLIB_LEN(self));  +    return newobj;  +}  +  +static PyObject*   stringlib_capitalize(PyObject *self, PyObject *Py_UNUSED(ignored)) -{ -    PyObject* newobj; -    newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self)); -    if (!newobj) -            return NULL; -    _Py_bytes_capitalize(STRINGLIB_STR(newobj), STRINGLIB_STR(self), -                      STRINGLIB_LEN(self)); -    return newobj; -} - -static PyObject* +{  +    PyObject* newobj;  +    newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));  +    if (!newobj)  +            return NULL;  +    _Py_bytes_capitalize(STRINGLIB_STR(newobj), STRINGLIB_STR(self),  +                      STRINGLIB_LEN(self));  +    return newobj;  +}  +  +static PyObject*   stringlib_swapcase(PyObject *self, PyObject *Py_UNUSED(ignored)) -{ -    PyObject* newobj; -    newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self)); -    if (!newobj) -            return NULL; -    _Py_bytes_swapcase(STRINGLIB_STR(newobj), STRINGLIB_STR(self), -                    STRINGLIB_LEN(self)); -    return newobj; -} +{  +    PyObject* newobj;  +    newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));  +    if (!newobj)  +            return NULL;  +    _Py_bytes_swapcase(STRINGLIB_STR(newobj), STRINGLIB_STR(self),  +                    STRINGLIB_LEN(self));  +    return newobj;  +}  diff --git a/contrib/tools/python3/src/Objects/stringlib/eq.h b/contrib/tools/python3/src/Objects/stringlib/eq.h index 9c1058b86cb..4efd5181575 100644 --- a/contrib/tools/python3/src/Objects/stringlib/eq.h +++ b/contrib/tools/python3/src/Objects/stringlib/eq.h @@ -1,25 +1,25 @@ -/* Fast unicode equal function optimized for dictobject.c and setobject.c */ - -/* Return 1 if two unicode objects are equal, 0 if not. - * unicode_eq() is called when the hash of two unicode objects is equal. - */ -Py_LOCAL_INLINE(int) -unicode_eq(PyObject *aa, PyObject *bb) -{ +/* Fast unicode equal function optimized for dictobject.c and setobject.c */  +  +/* Return 1 if two unicode objects are equal, 0 if not.  + * unicode_eq() is called when the hash of two unicode objects is equal.  + */  +Py_LOCAL_INLINE(int)  +unicode_eq(PyObject *aa, PyObject *bb)  +{       assert(PyUnicode_Check(aa));      assert(PyUnicode_Check(bb));      assert(PyUnicode_IS_READY(aa));      assert(PyUnicode_IS_READY(bb)); -    PyUnicodeObject *a = (PyUnicodeObject *)aa; -    PyUnicodeObject *b = (PyUnicodeObject *)bb; - -    if (PyUnicode_GET_LENGTH(a) != PyUnicode_GET_LENGTH(b)) -        return 0; -    if (PyUnicode_GET_LENGTH(a) == 0) -        return 1; -    if (PyUnicode_KIND(a) != PyUnicode_KIND(b)) -        return 0; -    return memcmp(PyUnicode_1BYTE_DATA(a), PyUnicode_1BYTE_DATA(b), -                  PyUnicode_GET_LENGTH(a) * PyUnicode_KIND(a)) == 0; -} +    PyUnicodeObject *a = (PyUnicodeObject *)aa;  +    PyUnicodeObject *b = (PyUnicodeObject *)bb;  +  +    if (PyUnicode_GET_LENGTH(a) != PyUnicode_GET_LENGTH(b))  +        return 0;  +    if (PyUnicode_GET_LENGTH(a) == 0)  +        return 1;  +    if (PyUnicode_KIND(a) != PyUnicode_KIND(b))  +        return 0;  +    return memcmp(PyUnicode_1BYTE_DATA(a), PyUnicode_1BYTE_DATA(b),  +                  PyUnicode_GET_LENGTH(a) * PyUnicode_KIND(a)) == 0;  +}  diff --git a/contrib/tools/python3/src/Objects/stringlib/fastsearch.h b/contrib/tools/python3/src/Objects/stringlib/fastsearch.h index 56a4467d353..5ed40b34694 100644 --- a/contrib/tools/python3/src/Objects/stringlib/fastsearch.h +++ b/contrib/tools/python3/src/Objects/stringlib/fastsearch.h @@ -1,283 +1,283 @@ -/* stringlib: fastsearch implementation */ - -#define STRINGLIB_FASTSEARCH_H - -/* fast search/count implementation, based on a mix between boyer- -   moore and horspool, with a few more bells and whistles on the top. -   for some more background, see: http://effbot.org/zone/stringlib.htm */ - -/* note: fastsearch may access s[n], which isn't a problem when using -   Python's ordinary string types, but may cause problems if you're -   using this code in other contexts.  also, the count mode returns -1 -   if there cannot possible be a match in the target string, and 0 if -   it has actually checked for matches, but didn't find any.  callers -   beware! */ - -#define FAST_COUNT 0 -#define FAST_SEARCH 1 -#define FAST_RSEARCH 2 - -#if LONG_BIT >= 128 -#define STRINGLIB_BLOOM_WIDTH 128 -#elif LONG_BIT >= 64 -#define STRINGLIB_BLOOM_WIDTH 64 -#elif LONG_BIT >= 32 -#define STRINGLIB_BLOOM_WIDTH 32 -#else -#error "LONG_BIT is smaller than 32" -#endif - -#define STRINGLIB_BLOOM_ADD(mask, ch) \ -    ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) -#define STRINGLIB_BLOOM(mask, ch)     \ -    ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) - -#if STRINGLIB_SIZEOF_CHAR == 1 -#  define MEMCHR_CUT_OFF 15 -#else -#  define MEMCHR_CUT_OFF 40 -#endif - -Py_LOCAL_INLINE(Py_ssize_t) -STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) -{ -    const STRINGLIB_CHAR *p, *e; - -    p = s; -    e = s + n; -    if (n > MEMCHR_CUT_OFF) { -#if STRINGLIB_SIZEOF_CHAR == 1 -        p = memchr(s, ch, n); -        if (p != NULL) -            return (p - s); -        return -1; -#else +/* stringlib: fastsearch implementation */  +  +#define STRINGLIB_FASTSEARCH_H  +  +/* fast search/count implementation, based on a mix between boyer-  +   moore and horspool, with a few more bells and whistles on the top.  +   for some more background, see: http://effbot.org/zone/stringlib.htm */  +  +/* note: fastsearch may access s[n], which isn't a problem when using  +   Python's ordinary string types, but may cause problems if you're  +   using this code in other contexts.  also, the count mode returns -1  +   if there cannot possible be a match in the target string, and 0 if  +   it has actually checked for matches, but didn't find any.  callers  +   beware! */  +  +#define FAST_COUNT 0  +#define FAST_SEARCH 1  +#define FAST_RSEARCH 2  +  +#if LONG_BIT >= 128  +#define STRINGLIB_BLOOM_WIDTH 128  +#elif LONG_BIT >= 64  +#define STRINGLIB_BLOOM_WIDTH 64  +#elif LONG_BIT >= 32  +#define STRINGLIB_BLOOM_WIDTH 32  +#else  +#error "LONG_BIT is smaller than 32"  +#endif  +  +#define STRINGLIB_BLOOM_ADD(mask, ch) \  +    ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))  +#define STRINGLIB_BLOOM(mask, ch)     \  +    ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))  +  +#if STRINGLIB_SIZEOF_CHAR == 1  +#  define MEMCHR_CUT_OFF 15  +#else  +#  define MEMCHR_CUT_OFF 40  +#endif  +  +Py_LOCAL_INLINE(Py_ssize_t)  +STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)  +{  +    const STRINGLIB_CHAR *p, *e;  +  +    p = s;  +    e = s + n;  +    if (n > MEMCHR_CUT_OFF) {  +#if STRINGLIB_SIZEOF_CHAR == 1  +        p = memchr(s, ch, n);  +        if (p != NULL)  +            return (p - s);  +        return -1;  +#else           /* use memchr if we can choose a needle without too many likely -           false positives */ -        const STRINGLIB_CHAR *s1, *e1; -        unsigned char needle = ch & 0xff; -        /* If looking for a multiple of 256, we'd have too -           many false positives looking for the '\0' byte in UCS2 -           and UCS4 representations. */ -        if (needle != 0) { -            do { -                void *candidate = memchr(p, needle, -                                         (e - p) * sizeof(STRINGLIB_CHAR)); -                if (candidate == NULL) -                    return -1; -                s1 = p; -                p = (const STRINGLIB_CHAR *) -                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); -                if (*p == ch) -                    return (p - s); -                /* False positive */ -                p++; -                if (p - s1 > MEMCHR_CUT_OFF) -                    continue; -                if (e - p <= MEMCHR_CUT_OFF) -                    break; -                e1 = p + MEMCHR_CUT_OFF; -                while (p != e1) { -                    if (*p == ch) -                        return (p - s); -                    p++; -                } -            } -            while (e - p > MEMCHR_CUT_OFF); -        } -#endif -    } -    while (p < e) { -        if (*p == ch) -            return (p - s); -        p++; -    } -    return -1; -} - -Py_LOCAL_INLINE(Py_ssize_t) -STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) -{ -    const STRINGLIB_CHAR *p; -#ifdef HAVE_MEMRCHR -    /* memrchr() is a GNU extension, available since glibc 2.1.91. -       it doesn't seem as optimized as memchr(), but is still quite -       faster than our hand-written loop below */ - -    if (n > MEMCHR_CUT_OFF) { -#if STRINGLIB_SIZEOF_CHAR == 1 -        p = memrchr(s, ch, n); -        if (p != NULL) -            return (p - s); -        return -1; -#else +           false positives */  +        const STRINGLIB_CHAR *s1, *e1;  +        unsigned char needle = ch & 0xff;  +        /* If looking for a multiple of 256, we'd have too  +           many false positives looking for the '\0' byte in UCS2  +           and UCS4 representations. */  +        if (needle != 0) {  +            do {  +                void *candidate = memchr(p, needle,  +                                         (e - p) * sizeof(STRINGLIB_CHAR));  +                if (candidate == NULL)  +                    return -1;  +                s1 = p;  +                p = (const STRINGLIB_CHAR *)  +                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));  +                if (*p == ch)  +                    return (p - s);  +                /* False positive */  +                p++;  +                if (p - s1 > MEMCHR_CUT_OFF)  +                    continue;  +                if (e - p <= MEMCHR_CUT_OFF)  +                    break;  +                e1 = p + MEMCHR_CUT_OFF;  +                while (p != e1) {  +                    if (*p == ch)  +                        return (p - s);  +                    p++;  +                }  +            }  +            while (e - p > MEMCHR_CUT_OFF);  +        }  +#endif  +    }  +    while (p < e) {  +        if (*p == ch)  +            return (p - s);  +        p++;  +    }  +    return -1;  +}  +  +Py_LOCAL_INLINE(Py_ssize_t)  +STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)  +{  +    const STRINGLIB_CHAR *p;  +#ifdef HAVE_MEMRCHR  +    /* memrchr() is a GNU extension, available since glibc 2.1.91.  +       it doesn't seem as optimized as memchr(), but is still quite  +       faster than our hand-written loop below */  +  +    if (n > MEMCHR_CUT_OFF) {  +#if STRINGLIB_SIZEOF_CHAR == 1  +        p = memrchr(s, ch, n);  +        if (p != NULL)  +            return (p - s);  +        return -1;  +#else           /* use memrchr if we can choose a needle without too many likely -           false positives */ -        const STRINGLIB_CHAR *s1; -        Py_ssize_t n1; -        unsigned char needle = ch & 0xff; -        /* If looking for a multiple of 256, we'd have too -           many false positives looking for the '\0' byte in UCS2 -           and UCS4 representations. */ -        if (needle != 0) { -            do { -                void *candidate = memrchr(s, needle, -                                          n * sizeof(STRINGLIB_CHAR)); -                if (candidate == NULL) -                    return -1; -                n1 = n; -                p = (const STRINGLIB_CHAR *) -                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); -                n = p - s; -                if (*p == ch) -                    return n; -                /* False positive */ -                if (n1 - n > MEMCHR_CUT_OFF) -                    continue; -                if (n <= MEMCHR_CUT_OFF) -                    break; -                s1 = p - MEMCHR_CUT_OFF; -                while (p > s1) { -                    p--; -                    if (*p == ch) -                        return (p - s); -                } -                n = p - s; -            } -            while (n > MEMCHR_CUT_OFF); -        } -#endif -    } -#endif  /* HAVE_MEMRCHR */ -    p = s + n; -    while (p > s) { -        p--; -        if (*p == ch) -            return (p - s); -    } -    return -1; -} - -#undef MEMCHR_CUT_OFF - -Py_LOCAL_INLINE(Py_ssize_t) -FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, -           const STRINGLIB_CHAR* p, Py_ssize_t m, -           Py_ssize_t maxcount, int mode) -{ -    unsigned long mask; -    Py_ssize_t skip, count = 0; -    Py_ssize_t i, j, mlast, w; - -    w = n - m; - -    if (w < 0 || (mode == FAST_COUNT && maxcount == 0)) -        return -1; - -    /* look for special cases */ -    if (m <= 1) { -        if (m <= 0) -            return -1; -        /* use special case for 1-character strings */ -        if (mode == FAST_SEARCH) -            return STRINGLIB(find_char)(s, n, p[0]); -        else if (mode == FAST_RSEARCH) -            return STRINGLIB(rfind_char)(s, n, p[0]); -        else {  /* FAST_COUNT */ -            for (i = 0; i < n; i++) -                if (s[i] == p[0]) { -                    count++; -                    if (count == maxcount) -                        return maxcount; -                } -            return count; -        } -    } - -    mlast = m - 1; -    skip = mlast - 1; -    mask = 0; - -    if (mode != FAST_RSEARCH) { -        const STRINGLIB_CHAR *ss = s + m - 1; -        const STRINGLIB_CHAR *pp = p + m - 1; - -        /* create compressed boyer-moore delta 1 table */ - -        /* process pattern[:-1] */ -        for (i = 0; i < mlast; i++) { -            STRINGLIB_BLOOM_ADD(mask, p[i]); -            if (p[i] == p[mlast]) -                skip = mlast - i - 1; -        } -        /* process pattern[-1] outside the loop */ -        STRINGLIB_BLOOM_ADD(mask, p[mlast]); - -        for (i = 0; i <= w; i++) { -            /* note: using mlast in the skip path slows things down on x86 */ -            if (ss[i] == pp[0]) { -                /* candidate match */ -                for (j = 0; j < mlast; j++) -                    if (s[i+j] != p[j]) -                        break; -                if (j == mlast) { -                    /* got a match! */ -                    if (mode != FAST_COUNT) -                        return i; -                    count++; -                    if (count == maxcount) -                        return maxcount; -                    i = i + mlast; -                    continue; -                } -                /* miss: check if next character is part of pattern */ -                if (!STRINGLIB_BLOOM(mask, ss[i+1])) -                    i = i + m; -                else -                    i = i + skip; -            } else { -                /* skip: check if next character is part of pattern */ -                if (!STRINGLIB_BLOOM(mask, ss[i+1])) -                    i = i + m; -            } -        } -    } else {    /* FAST_RSEARCH */ - -        /* create compressed boyer-moore delta 1 table */ - -        /* process pattern[0] outside the loop */ -        STRINGLIB_BLOOM_ADD(mask, p[0]); -        /* process pattern[:0:-1] */ -        for (i = mlast; i > 0; i--) { -            STRINGLIB_BLOOM_ADD(mask, p[i]); -            if (p[i] == p[0]) -                skip = i - 1; -        } - -        for (i = w; i >= 0; i--) { -            if (s[i] == p[0]) { -                /* candidate match */ -                for (j = mlast; j > 0; j--) -                    if (s[i+j] != p[j]) -                        break; -                if (j == 0) -                    /* got a match! */ -                    return i; -                /* miss: check if previous character is part of pattern */ -                if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) -                    i = i - m; -                else -                    i = i - skip; -            } else { -                /* skip: check if previous character is part of pattern */ -                if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) -                    i = i - m; -            } -        } -    } - -    if (mode != FAST_COUNT) -        return -1; -    return count; -} - +           false positives */  +        const STRINGLIB_CHAR *s1;  +        Py_ssize_t n1;  +        unsigned char needle = ch & 0xff;  +        /* If looking for a multiple of 256, we'd have too  +           many false positives looking for the '\0' byte in UCS2  +           and UCS4 representations. */  +        if (needle != 0) {  +            do {  +                void *candidate = memrchr(s, needle,  +                                          n * sizeof(STRINGLIB_CHAR));  +                if (candidate == NULL)  +                    return -1;  +                n1 = n;  +                p = (const STRINGLIB_CHAR *)  +                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));  +                n = p - s;  +                if (*p == ch)  +                    return n;  +                /* False positive */  +                if (n1 - n > MEMCHR_CUT_OFF)  +                    continue;  +                if (n <= MEMCHR_CUT_OFF)  +                    break;  +                s1 = p - MEMCHR_CUT_OFF;  +                while (p > s1) {  +                    p--;  +                    if (*p == ch)  +                        return (p - s);  +                }  +                n = p - s;  +            }  +            while (n > MEMCHR_CUT_OFF);  +        }  +#endif  +    }  +#endif  /* HAVE_MEMRCHR */  +    p = s + n;  +    while (p > s) {  +        p--;  +        if (*p == ch)  +            return (p - s);  +    }  +    return -1;  +}  +  +#undef MEMCHR_CUT_OFF  +  +Py_LOCAL_INLINE(Py_ssize_t)  +FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,  +           const STRINGLIB_CHAR* p, Py_ssize_t m,  +           Py_ssize_t maxcount, int mode)  +{  +    unsigned long mask;  +    Py_ssize_t skip, count = 0;  +    Py_ssize_t i, j, mlast, w;  +  +    w = n - m;  +  +    if (w < 0 || (mode == FAST_COUNT && maxcount == 0))  +        return -1;  +  +    /* look for special cases */  +    if (m <= 1) {  +        if (m <= 0)  +            return -1;  +        /* use special case for 1-character strings */  +        if (mode == FAST_SEARCH)  +            return STRINGLIB(find_char)(s, n, p[0]);  +        else if (mode == FAST_RSEARCH)  +            return STRINGLIB(rfind_char)(s, n, p[0]);  +        else {  /* FAST_COUNT */  +            for (i = 0; i < n; i++)  +                if (s[i] == p[0]) {  +                    count++;  +                    if (count == maxcount)  +                        return maxcount;  +                }  +            return count;  +        }  +    }  +  +    mlast = m - 1;  +    skip = mlast - 1;  +    mask = 0;  +  +    if (mode != FAST_RSEARCH) {  +        const STRINGLIB_CHAR *ss = s + m - 1;  +        const STRINGLIB_CHAR *pp = p + m - 1;  +  +        /* create compressed boyer-moore delta 1 table */  +  +        /* process pattern[:-1] */  +        for (i = 0; i < mlast; i++) {  +            STRINGLIB_BLOOM_ADD(mask, p[i]);  +            if (p[i] == p[mlast])  +                skip = mlast - i - 1;  +        }  +        /* process pattern[-1] outside the loop */  +        STRINGLIB_BLOOM_ADD(mask, p[mlast]);  +  +        for (i = 0; i <= w; i++) {  +            /* note: using mlast in the skip path slows things down on x86 */  +            if (ss[i] == pp[0]) {  +                /* candidate match */  +                for (j = 0; j < mlast; j++)  +                    if (s[i+j] != p[j])  +                        break;  +                if (j == mlast) {  +                    /* got a match! */  +                    if (mode != FAST_COUNT)  +                        return i;  +                    count++;  +                    if (count == maxcount)  +                        return maxcount;  +                    i = i + mlast;  +                    continue;  +                }  +                /* miss: check if next character is part of pattern */  +                if (!STRINGLIB_BLOOM(mask, ss[i+1]))  +                    i = i + m;  +                else  +                    i = i + skip;  +            } else {  +                /* skip: check if next character is part of pattern */  +                if (!STRINGLIB_BLOOM(mask, ss[i+1]))  +                    i = i + m;  +            }  +        }  +    } else {    /* FAST_RSEARCH */  +  +        /* create compressed boyer-moore delta 1 table */  +  +        /* process pattern[0] outside the loop */  +        STRINGLIB_BLOOM_ADD(mask, p[0]);  +        /* process pattern[:0:-1] */  +        for (i = mlast; i > 0; i--) {  +            STRINGLIB_BLOOM_ADD(mask, p[i]);  +            if (p[i] == p[0])  +                skip = i - 1;  +        }  +  +        for (i = w; i >= 0; i--) {  +            if (s[i] == p[0]) {  +                /* candidate match */  +                for (j = mlast; j > 0; j--)  +                    if (s[i+j] != p[j])  +                        break;  +                if (j == 0)  +                    /* got a match! */  +                    return i;  +                /* miss: check if previous character is part of pattern */  +                if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))  +                    i = i - m;  +                else  +                    i = i - skip;  +            } else {  +                /* skip: check if previous character is part of pattern */  +                if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))  +                    i = i - m;  +            }  +        }  +    }  +  +    if (mode != FAST_COUNT)  +        return -1;  +    return count;  +}  +  diff --git a/contrib/tools/python3/src/Objects/stringlib/find.h b/contrib/tools/python3/src/Objects/stringlib/find.h index 509b9297396..9ca4256e406 100644 --- a/contrib/tools/python3/src/Objects/stringlib/find.h +++ b/contrib/tools/python3/src/Objects/stringlib/find.h @@ -1,119 +1,119 @@ -/* stringlib: find/index implementation */ - -#ifndef STRINGLIB_FASTSEARCH_H -#error must include "stringlib/fastsearch.h" before including this module -#endif - -Py_LOCAL_INLINE(Py_ssize_t) -STRINGLIB(find)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, -               const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, -               Py_ssize_t offset) -{ -    Py_ssize_t pos; - -    assert(str_len >= 0); -    if (sub_len == 0) -        return offset; - -    pos = FASTSEARCH(str, str_len, sub, sub_len, -1, FAST_SEARCH); - -    if (pos >= 0) -        pos += offset; - -    return pos; -} - -Py_LOCAL_INLINE(Py_ssize_t) -STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, -                const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, -                Py_ssize_t offset) -{ -    Py_ssize_t pos; - -    assert(str_len >= 0); -    if (sub_len == 0) -        return str_len + offset; - -    pos = FASTSEARCH(str, str_len, sub, sub_len, -1, FAST_RSEARCH); - -    if (pos >= 0) -        pos += offset; - -    return pos; -} - -Py_LOCAL_INLINE(Py_ssize_t) -STRINGLIB(find_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, -                     const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, -                     Py_ssize_t start, Py_ssize_t end) -{ -    return STRINGLIB(find)(str + start, end - start, sub, sub_len, start); -} - -Py_LOCAL_INLINE(Py_ssize_t) -STRINGLIB(rfind_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, -                      const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, -                      Py_ssize_t start, Py_ssize_t end) -{ -    return STRINGLIB(rfind)(str + start, end - start, sub, sub_len, start); -} - -#ifdef STRINGLIB_WANT_CONTAINS_OBJ - -Py_LOCAL_INLINE(int) -STRINGLIB(contains_obj)(PyObject* str, PyObject* sub) -{ -    return STRINGLIB(find)( -        STRINGLIB_STR(str), STRINGLIB_LEN(str), -        STRINGLIB_STR(sub), STRINGLIB_LEN(sub), 0 -        ) != -1; -} - -#endif /* STRINGLIB_WANT_CONTAINS_OBJ */ - -/* -This function is a helper for the "find" family (find, rfind, index, -rindex) and for count, startswith and endswith, because they all have -the same behaviour for the arguments. - -It does not touch the variables received until it knows everything -is ok. -*/ - -#define FORMAT_BUFFER_SIZE 50 - -Py_LOCAL_INLINE(int) -STRINGLIB(parse_args_finds)(const char * function_name, PyObject *args, -                           PyObject **subobj, -                           Py_ssize_t *start, Py_ssize_t *end) -{ -    PyObject *tmp_subobj; -    Py_ssize_t tmp_start = 0; -    Py_ssize_t tmp_end = PY_SSIZE_T_MAX; -    PyObject *obj_start=Py_None, *obj_end=Py_None; -    char format[FORMAT_BUFFER_SIZE] = "O|OO:"; -    size_t len = strlen(format); - -    strncpy(format + len, function_name, FORMAT_BUFFER_SIZE - len - 1); -    format[FORMAT_BUFFER_SIZE - 1] = '\0'; - -    if (!PyArg_ParseTuple(args, format, &tmp_subobj, &obj_start, &obj_end)) -        return 0; - -    /* To support None in "start" and "end" arguments, meaning -       the same as if they were not passed. -    */ -    if (obj_start != Py_None) -        if (!_PyEval_SliceIndex(obj_start, &tmp_start)) -            return 0; -    if (obj_end != Py_None) -        if (!_PyEval_SliceIndex(obj_end, &tmp_end)) -            return 0; - -    *start = tmp_start; -    *end = tmp_end; -    *subobj = tmp_subobj; -    return 1; -} - -#undef FORMAT_BUFFER_SIZE +/* stringlib: find/index implementation */  +  +#ifndef STRINGLIB_FASTSEARCH_H  +#error must include "stringlib/fastsearch.h" before including this module  +#endif  +  +Py_LOCAL_INLINE(Py_ssize_t)  +STRINGLIB(find)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,  +               const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,  +               Py_ssize_t offset)  +{  +    Py_ssize_t pos;  +  +    assert(str_len >= 0);  +    if (sub_len == 0)  +        return offset;  +  +    pos = FASTSEARCH(str, str_len, sub, sub_len, -1, FAST_SEARCH);  +  +    if (pos >= 0)  +        pos += offset;  +  +    return pos;  +}  +  +Py_LOCAL_INLINE(Py_ssize_t)  +STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,  +                const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,  +                Py_ssize_t offset)  +{  +    Py_ssize_t pos;  +  +    assert(str_len >= 0);  +    if (sub_len == 0)  +        return str_len + offset;  +  +    pos = FASTSEARCH(str, str_len, sub, sub_len, -1, FAST_RSEARCH);  +  +    if (pos >= 0)  +        pos += offset;  +  +    return pos;  +}  +  +Py_LOCAL_INLINE(Py_ssize_t)  +STRINGLIB(find_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,  +                     const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,  +                     Py_ssize_t start, Py_ssize_t end)  +{  +    return STRINGLIB(find)(str + start, end - start, sub, sub_len, start);  +}  +  +Py_LOCAL_INLINE(Py_ssize_t)  +STRINGLIB(rfind_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,  +                      const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,  +                      Py_ssize_t start, Py_ssize_t end)  +{  +    return STRINGLIB(rfind)(str + start, end - start, sub, sub_len, start);  +}  +  +#ifdef STRINGLIB_WANT_CONTAINS_OBJ  +  +Py_LOCAL_INLINE(int)  +STRINGLIB(contains_obj)(PyObject* str, PyObject* sub)  +{  +    return STRINGLIB(find)(  +        STRINGLIB_STR(str), STRINGLIB_LEN(str),  +        STRINGLIB_STR(sub), STRINGLIB_LEN(sub), 0  +        ) != -1;  +}  +  +#endif /* STRINGLIB_WANT_CONTAINS_OBJ */  +  +/*  +This function is a helper for the "find" family (find, rfind, index,  +rindex) and for count, startswith and endswith, because they all have  +the same behaviour for the arguments.  +  +It does not touch the variables received until it knows everything  +is ok.  +*/  +  +#define FORMAT_BUFFER_SIZE 50  +  +Py_LOCAL_INLINE(int)  +STRINGLIB(parse_args_finds)(const char * function_name, PyObject *args,  +                           PyObject **subobj,  +                           Py_ssize_t *start, Py_ssize_t *end)  +{  +    PyObject *tmp_subobj;  +    Py_ssize_t tmp_start = 0;  +    Py_ssize_t tmp_end = PY_SSIZE_T_MAX;  +    PyObject *obj_start=Py_None, *obj_end=Py_None;  +    char format[FORMAT_BUFFER_SIZE] = "O|OO:";  +    size_t len = strlen(format);  +  +    strncpy(format + len, function_name, FORMAT_BUFFER_SIZE - len - 1);  +    format[FORMAT_BUFFER_SIZE - 1] = '\0';  +  +    if (!PyArg_ParseTuple(args, format, &tmp_subobj, &obj_start, &obj_end))  +        return 0;  +  +    /* To support None in "start" and "end" arguments, meaning  +       the same as if they were not passed.  +    */  +    if (obj_start != Py_None)  +        if (!_PyEval_SliceIndex(obj_start, &tmp_start))  +            return 0;  +    if (obj_end != Py_None)  +        if (!_PyEval_SliceIndex(obj_end, &tmp_end))  +            return 0;  +  +    *start = tmp_start;  +    *end = tmp_end;  +    *subobj = tmp_subobj;  +    return 1;  +}  +  +#undef FORMAT_BUFFER_SIZE  diff --git a/contrib/tools/python3/src/Objects/stringlib/find_max_char.h b/contrib/tools/python3/src/Objects/stringlib/find_max_char.h index f4e0a7761d3..608bc37a43d 100644 --- a/contrib/tools/python3/src/Objects/stringlib/find_max_char.h +++ b/contrib/tools/python3/src/Objects/stringlib/find_max_char.h @@ -1,134 +1,134 @@ -/* Finding the optimal width of unicode characters in a buffer */ - -#if !STRINGLIB_IS_UNICODE -# error "find_max_char.h is specific to Unicode" -#endif - -/* Mask to quickly check whether a C 'long' contains a -   non-ASCII, UTF8-encoded char. */ -#if (SIZEOF_LONG == 8) -# define UCS1_ASCII_CHAR_MASK 0x8080808080808080UL -#elif (SIZEOF_LONG == 4) -# define UCS1_ASCII_CHAR_MASK 0x80808080UL -#else -# error C 'long' size should be either 4 or 8! -#endif - -#if STRINGLIB_SIZEOF_CHAR == 1 - -Py_LOCAL_INLINE(Py_UCS4) -STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end) -{ -    const unsigned char *p = (const unsigned char *) begin; -    const unsigned char *aligned_end = -            (const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG); - -    while (p < end) { -        if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { -            /* Help register allocation */ -            const unsigned char *_p = p; -            while (_p < aligned_end) { +/* Finding the optimal width of unicode characters in a buffer */  +  +#if !STRINGLIB_IS_UNICODE  +# error "find_max_char.h is specific to Unicode"  +#endif  +  +/* Mask to quickly check whether a C 'long' contains a  +   non-ASCII, UTF8-encoded char. */  +#if (SIZEOF_LONG == 8)  +# define UCS1_ASCII_CHAR_MASK 0x8080808080808080UL  +#elif (SIZEOF_LONG == 4)  +# define UCS1_ASCII_CHAR_MASK 0x80808080UL  +#else  +# error C 'long' size should be either 4 or 8!  +#endif  +  +#if STRINGLIB_SIZEOF_CHAR == 1  +  +Py_LOCAL_INLINE(Py_UCS4)  +STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)  +{  +    const unsigned char *p = (const unsigned char *) begin;  +    const unsigned char *aligned_end =  +            (const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);  +  +    while (p < end) {  +        if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {  +            /* Help register allocation */  +            const unsigned char *_p = p;  +            while (_p < aligned_end) {                   unsigned long value = *(const unsigned long *) _p; -                if (value & UCS1_ASCII_CHAR_MASK) -                    return 255; -                _p += SIZEOF_LONG; -            } -            p = _p; -            if (p == end) -                break; -        } -        if (*p++ & 0x80) -            return 255; -    } -    return 127; -} - -#undef ASCII_CHAR_MASK - -#else /* STRINGLIB_SIZEOF_CHAR == 1 */ - -#define MASK_ASCII 0xFFFFFF80 -#define MASK_UCS1 0xFFFFFF00 -#define MASK_UCS2 0xFFFF0000 - -#define MAX_CHAR_ASCII 0x7f -#define MAX_CHAR_UCS1  0xff -#define MAX_CHAR_UCS2  0xffff -#define MAX_CHAR_UCS4  0x10ffff - -Py_LOCAL_INLINE(Py_UCS4) -STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end) -{ -#if STRINGLIB_SIZEOF_CHAR == 2 -    const Py_UCS4 mask_limit = MASK_UCS1; -    const Py_UCS4 max_char_limit = MAX_CHAR_UCS2; -#elif STRINGLIB_SIZEOF_CHAR == 4 -    const Py_UCS4 mask_limit = MASK_UCS2; -    const Py_UCS4 max_char_limit = MAX_CHAR_UCS4; -#else -#error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4) -#endif -    Py_UCS4 mask; -    Py_ssize_t n = end - begin; -    const STRINGLIB_CHAR *p = begin; -    const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4); -    Py_UCS4 max_char; - -    max_char = MAX_CHAR_ASCII; -    mask = MASK_ASCII; -    while (p < unrolled_end) { -        STRINGLIB_CHAR bits = p[0] | p[1] | p[2] | p[3]; -        if (bits & mask) { -            if (mask == mask_limit) { -                /* Limit reached */ -                return max_char_limit; -            } -            if (mask == MASK_ASCII) { -                max_char = MAX_CHAR_UCS1; -                mask = MASK_UCS1; -            } -            else { -                /* mask can't be MASK_UCS2 because of mask_limit above */ -                assert(mask == MASK_UCS1); -                max_char = MAX_CHAR_UCS2; -                mask = MASK_UCS2; -            } -            /* We check the new mask on the same chars in the next iteration */ -            continue; -        } -        p += 4; -    } -    while (p < end) { -        if (p[0] & mask) { -            if (mask == mask_limit) { -                /* Limit reached */ -                return max_char_limit; -            } -            if (mask == MASK_ASCII) { -                max_char = MAX_CHAR_UCS1; -                mask = MASK_UCS1; -            } -            else { -                /* mask can't be MASK_UCS2 because of mask_limit above */ -                assert(mask == MASK_UCS1); -                max_char = MAX_CHAR_UCS2; -                mask = MASK_UCS2; -            } -            /* We check the new mask on the same chars in the next iteration */ -            continue; -        } -        p++; -    } -    return max_char; -} - -#undef MASK_ASCII -#undef MASK_UCS1 -#undef MASK_UCS2 -#undef MAX_CHAR_ASCII -#undef MAX_CHAR_UCS1 -#undef MAX_CHAR_UCS2 -#undef MAX_CHAR_UCS4 - -#endif /* STRINGLIB_SIZEOF_CHAR == 1 */ - +                if (value & UCS1_ASCII_CHAR_MASK)  +                    return 255;  +                _p += SIZEOF_LONG;  +            }  +            p = _p;  +            if (p == end)  +                break;  +        }  +        if (*p++ & 0x80)  +            return 255;  +    }  +    return 127;  +}  +  +#undef ASCII_CHAR_MASK  +  +#else /* STRINGLIB_SIZEOF_CHAR == 1 */  +  +#define MASK_ASCII 0xFFFFFF80  +#define MASK_UCS1 0xFFFFFF00  +#define MASK_UCS2 0xFFFF0000  +  +#define MAX_CHAR_ASCII 0x7f  +#define MAX_CHAR_UCS1  0xff  +#define MAX_CHAR_UCS2  0xffff  +#define MAX_CHAR_UCS4  0x10ffff  +  +Py_LOCAL_INLINE(Py_UCS4)  +STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)  +{  +#if STRINGLIB_SIZEOF_CHAR == 2  +    const Py_UCS4 mask_limit = MASK_UCS1;  +    const Py_UCS4 max_char_limit = MAX_CHAR_UCS2;  +#elif STRINGLIB_SIZEOF_CHAR == 4  +    const Py_UCS4 mask_limit = MASK_UCS2;  +    const Py_UCS4 max_char_limit = MAX_CHAR_UCS4;  +#else  +#error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4)  +#endif  +    Py_UCS4 mask;  +    Py_ssize_t n = end - begin;  +    const STRINGLIB_CHAR *p = begin;  +    const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4);  +    Py_UCS4 max_char;  +  +    max_char = MAX_CHAR_ASCII;  +    mask = MASK_ASCII;  +    while (p < unrolled_end) {  +        STRINGLIB_CHAR bits = p[0] | p[1] | p[2] | p[3];  +        if (bits & mask) {  +            if (mask == mask_limit) {  +                /* Limit reached */  +                return max_char_limit;  +            }  +            if (mask == MASK_ASCII) {  +                max_char = MAX_CHAR_UCS1;  +                mask = MASK_UCS1;  +            }  +            else {  +                /* mask can't be MASK_UCS2 because of mask_limit above */  +                assert(mask == MASK_UCS1);  +                max_char = MAX_CHAR_UCS2;  +                mask = MASK_UCS2;  +            }  +            /* We check the new mask on the same chars in the next iteration */  +            continue;  +        }  +        p += 4;  +    }  +    while (p < end) {  +        if (p[0] & mask) {  +            if (mask == mask_limit) {  +                /* Limit reached */  +                return max_char_limit;  +            }  +            if (mask == MASK_ASCII) {  +                max_char = MAX_CHAR_UCS1;  +                mask = MASK_UCS1;  +            }  +            else {  +                /* mask can't be MASK_UCS2 because of mask_limit above */  +                assert(mask == MASK_UCS1);  +                max_char = MAX_CHAR_UCS2;  +                mask = MASK_UCS2;  +            }  +            /* We check the new mask on the same chars in the next iteration */  +            continue;  +        }  +        p++;  +    }  +    return max_char;  +}  +  +#undef MASK_ASCII  +#undef MASK_UCS1  +#undef MASK_UCS2  +#undef MAX_CHAR_ASCII  +#undef MAX_CHAR_UCS1  +#undef MAX_CHAR_UCS2  +#undef MAX_CHAR_UCS4  +  +#endif /* STRINGLIB_SIZEOF_CHAR == 1 */  +  diff --git a/contrib/tools/python3/src/Objects/stringlib/join.h b/contrib/tools/python3/src/Objects/stringlib/join.h index 53bcbdea7ad..acb39b497ff 100644 --- a/contrib/tools/python3/src/Objects/stringlib/join.h +++ b/contrib/tools/python3/src/Objects/stringlib/join.h @@ -1,73 +1,73 @@ -/* stringlib: bytes joining implementation */ - -#if STRINGLIB_IS_UNICODE -#error join.h only compatible with byte-wise strings -#endif - -Py_LOCAL_INLINE(PyObject *) -STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) -{ +/* stringlib: bytes joining implementation */  +  +#if STRINGLIB_IS_UNICODE  +#error join.h only compatible with byte-wise strings  +#endif  +  +Py_LOCAL_INLINE(PyObject *)  +STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)  +{       const char *sepstr = STRINGLIB_STR(sep);      Py_ssize_t seplen = STRINGLIB_LEN(sep); -    PyObject *res = NULL; -    char *p; -    Py_ssize_t seqlen = 0; -    Py_ssize_t sz = 0; -    Py_ssize_t i, nbufs; -    PyObject *seq, *item; -    Py_buffer *buffers = NULL; -#define NB_STATIC_BUFFERS 10 -    Py_buffer static_buffers[NB_STATIC_BUFFERS]; +    PyObject *res = NULL;  +    char *p;  +    Py_ssize_t seqlen = 0;  +    Py_ssize_t sz = 0;  +    Py_ssize_t i, nbufs;  +    PyObject *seq, *item;  +    Py_buffer *buffers = NULL;  +#define NB_STATIC_BUFFERS 10  +    Py_buffer static_buffers[NB_STATIC_BUFFERS];   #define GIL_THRESHOLD 1048576      int drop_gil = 1;      PyThreadState *save = NULL; - -    seq = PySequence_Fast(iterable, "can only join an iterable"); -    if (seq == NULL) { -        return NULL; -    } - -    seqlen = PySequence_Fast_GET_SIZE(seq); -    if (seqlen == 0) { -        Py_DECREF(seq); -        return STRINGLIB_NEW(NULL, 0); -    } -#ifndef STRINGLIB_MUTABLE -    if (seqlen == 1) { -        item = PySequence_Fast_GET_ITEM(seq, 0); -        if (STRINGLIB_CHECK_EXACT(item)) { -            Py_INCREF(item); -            Py_DECREF(seq); -            return item; -        } -    } -#endif -    if (seqlen > NB_STATIC_BUFFERS) { -        buffers = PyMem_NEW(Py_buffer, seqlen); -        if (buffers == NULL) { -            Py_DECREF(seq); -            PyErr_NoMemory(); -            return NULL; -        } -    } -    else { -        buffers = static_buffers; -    } - -    /* Here is the general case.  Do a pre-pass to figure out the total -     * amount of space we'll need (sz), and see whether all arguments are -     * bytes-like. -     */ -    for (i = 0, nbufs = 0; i < seqlen; i++) { -        Py_ssize_t itemlen; -        item = PySequence_Fast_GET_ITEM(seq, i); -        if (PyBytes_CheckExact(item)) { -            /* Fast path. */ -            Py_INCREF(item); -            buffers[i].obj = item; -            buffers[i].buf = PyBytes_AS_STRING(item); -            buffers[i].len = PyBytes_GET_SIZE(item); -        } +  +    seq = PySequence_Fast(iterable, "can only join an iterable");  +    if (seq == NULL) {  +        return NULL;  +    }  +  +    seqlen = PySequence_Fast_GET_SIZE(seq);  +    if (seqlen == 0) {  +        Py_DECREF(seq);  +        return STRINGLIB_NEW(NULL, 0);  +    }  +#ifndef STRINGLIB_MUTABLE  +    if (seqlen == 1) {  +        item = PySequence_Fast_GET_ITEM(seq, 0);  +        if (STRINGLIB_CHECK_EXACT(item)) {  +            Py_INCREF(item);  +            Py_DECREF(seq);  +            return item;  +        }  +    }  +#endif  +    if (seqlen > NB_STATIC_BUFFERS) {  +        buffers = PyMem_NEW(Py_buffer, seqlen);  +        if (buffers == NULL) {  +            Py_DECREF(seq);  +            PyErr_NoMemory();  +            return NULL;  +        }  +    }  +    else {  +        buffers = static_buffers;  +    }  +  +    /* Here is the general case.  Do a pre-pass to figure out the total  +     * amount of space we'll need (sz), and see whether all arguments are  +     * bytes-like.  +     */  +    for (i = 0, nbufs = 0; i < seqlen; i++) {  +        Py_ssize_t itemlen;  +        item = PySequence_Fast_GET_ITEM(seq, i);  +        if (PyBytes_CheckExact(item)) {  +            /* Fast path. */  +            Py_INCREF(item);  +            buffers[i].obj = item;  +            buffers[i].buf = PyBytes_AS_STRING(item);  +            buffers[i].len = PyBytes_GET_SIZE(item);  +        }           else {              if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {                  PyErr_Format(PyExc_TypeError, @@ -83,52 +83,52 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)               * changing the behaviour of that data race.               */              drop_gil = 0; -        } -        nbufs = i + 1;  /* for error cleanup */ -        itemlen = buffers[i].len; -        if (itemlen > PY_SSIZE_T_MAX - sz) { -            PyErr_SetString(PyExc_OverflowError, -                            "join() result is too long"); -            goto error; -        } -        sz += itemlen; -        if (i != 0) { -            if (seplen > PY_SSIZE_T_MAX - sz) { -                PyErr_SetString(PyExc_OverflowError, -                                "join() result is too long"); -                goto error; -            } -            sz += seplen; -        } -        if (seqlen != PySequence_Fast_GET_SIZE(seq)) { -            PyErr_SetString(PyExc_RuntimeError, -                            "sequence changed size during iteration"); -            goto error; -        } -    } - -    /* Allocate result space. */ -    res = STRINGLIB_NEW(NULL, sz); -    if (res == NULL) -        goto error; - -    /* Catenate everything. */ -    p = STRINGLIB_STR(res); +        }  +        nbufs = i + 1;  /* for error cleanup */  +        itemlen = buffers[i].len;  +        if (itemlen > PY_SSIZE_T_MAX - sz) {  +            PyErr_SetString(PyExc_OverflowError,  +                            "join() result is too long");  +            goto error;  +        }  +        sz += itemlen;  +        if (i != 0) {  +            if (seplen > PY_SSIZE_T_MAX - sz) {  +                PyErr_SetString(PyExc_OverflowError,  +                                "join() result is too long");  +                goto error;  +            }  +            sz += seplen;  +        }  +        if (seqlen != PySequence_Fast_GET_SIZE(seq)) {  +            PyErr_SetString(PyExc_RuntimeError,  +                            "sequence changed size during iteration");  +            goto error;  +        }  +    }  +  +    /* Allocate result space. */  +    res = STRINGLIB_NEW(NULL, sz);  +    if (res == NULL)  +        goto error;  +  +    /* Catenate everything. */  +    p = STRINGLIB_STR(res);       if (sz < GIL_THRESHOLD) {          drop_gil = 0;   /* Benefits are likely outweighed by the overheads */      }      if (drop_gil) {          save = PyEval_SaveThread();      } -    if (!seplen) { -        /* fast path */ -        for (i = 0; i < nbufs; i++) { -            Py_ssize_t n = buffers[i].len; -            char *q = buffers[i].buf; -            memcpy(p, q, n); -            p += n; -        } -    } +    if (!seplen) {  +        /* fast path */  +        for (i = 0; i < nbufs; i++) {  +            Py_ssize_t n = buffers[i].len;  +            char *q = buffers[i].buf;  +            memcpy(p, q, n);  +            p += n;  +        }  +    }       else {          for (i = 0; i < nbufs; i++) {              Py_ssize_t n; @@ -141,23 +141,23 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)              q = buffers[i].buf;              memcpy(p, q, n);              p += n; -        } -    } +        }  +    }       if (drop_gil) {          PyEval_RestoreThread(save);      } -    goto done; - -error: -    res = NULL; -done: -    Py_DECREF(seq); -    for (i = 0; i < nbufs; i++) -        PyBuffer_Release(&buffers[i]); -    if (buffers != static_buffers) -        PyMem_FREE(buffers); -    return res; -} - -#undef NB_STATIC_BUFFERS +    goto done;  +  +error:  +    res = NULL;  +done:  +    Py_DECREF(seq);  +    for (i = 0; i < nbufs; i++)  +        PyBuffer_Release(&buffers[i]);  +    if (buffers != static_buffers)  +        PyMem_FREE(buffers);  +    return res;  +}  +  +#undef NB_STATIC_BUFFERS   #undef GIL_THRESHOLD diff --git a/contrib/tools/python3/src/Objects/stringlib/localeutil.h b/contrib/tools/python3/src/Objects/stringlib/localeutil.h index bd16e0a1728..de48a62f1f4 100644 --- a/contrib/tools/python3/src/Objects/stringlib/localeutil.h +++ b/contrib/tools/python3/src/Objects/stringlib/localeutil.h @@ -1,82 +1,82 @@ -/* _PyUnicode_InsertThousandsGrouping() helper functions */ - -typedef struct { -    const char *grouping; -    char previous; -    Py_ssize_t i; /* Where we're currently pointing in grouping. */ -} GroupGenerator; - - -static void -GroupGenerator_init(GroupGenerator *self, const char *grouping) -{ -    self->grouping = grouping; -    self->i = 0; -    self->previous = 0; -} - - -/* Returns the next grouping, or 0 to signify end. */ -static Py_ssize_t -GroupGenerator_next(GroupGenerator *self) -{ -    /* Note that we don't really do much error checking here. If a -       grouping string contains just CHAR_MAX, for example, then just -       terminate the generator. That shouldn't happen, but at least we -       fail gracefully. */ -    switch (self->grouping[self->i]) { -    case 0: -        return self->previous; -    case CHAR_MAX: -        /* Stop the generator. */ -        return 0; -    default: { -        char ch = self->grouping[self->i]; -        self->previous = ch; -        self->i++; -        return (Py_ssize_t)ch; -    } -    } -} - - -/* Fill in some digits, leading zeros, and thousands separator. All -   are optional, depending on when we're called. */ -static void -InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos, -                             PyObject *digits, Py_ssize_t *digits_pos, -                             Py_ssize_t n_chars, Py_ssize_t n_zeros, -                             PyObject *thousands_sep, Py_ssize_t thousands_sep_len, -                             Py_UCS4 *maxchar) -{ -    if (!writer) { -        /* if maxchar > 127, maxchar is already set */ -        if (*maxchar == 127 && thousands_sep) { -            Py_UCS4 maxchar2 = PyUnicode_MAX_CHAR_VALUE(thousands_sep); -            *maxchar = Py_MAX(*maxchar, maxchar2); -        } -        return; -    } - -    if (thousands_sep) { -        *buffer_pos -= thousands_sep_len; - -        /* Copy the thousands_sep chars into the buffer. */ -        _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos, -                                      thousands_sep, 0, -                                      thousands_sep_len); -    } - -    *buffer_pos -= n_chars; -    *digits_pos -= n_chars; -    _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos, -                                  digits, *digits_pos, -                                  n_chars); - -    if (n_zeros) { -        *buffer_pos -= n_zeros; -        enum PyUnicode_Kind kind = PyUnicode_KIND(writer->buffer); -        void *data = PyUnicode_DATA(writer->buffer); +/* _PyUnicode_InsertThousandsGrouping() helper functions */  +  +typedef struct {  +    const char *grouping;  +    char previous;  +    Py_ssize_t i; /* Where we're currently pointing in grouping. */  +} GroupGenerator;  +  +  +static void  +GroupGenerator_init(GroupGenerator *self, const char *grouping)  +{  +    self->grouping = grouping;  +    self->i = 0;  +    self->previous = 0;  +}  +  +  +/* Returns the next grouping, or 0 to signify end. */  +static Py_ssize_t  +GroupGenerator_next(GroupGenerator *self)  +{  +    /* Note that we don't really do much error checking here. If a  +       grouping string contains just CHAR_MAX, for example, then just  +       terminate the generator. That shouldn't happen, but at least we  +       fail gracefully. */  +    switch (self->grouping[self->i]) {  +    case 0:  +        return self->previous;  +    case CHAR_MAX:  +        /* Stop the generator. */  +        return 0;  +    default: {  +        char ch = self->grouping[self->i];  +        self->previous = ch;  +        self->i++;  +        return (Py_ssize_t)ch;  +    }  +    }  +}  +  +  +/* Fill in some digits, leading zeros, and thousands separator. All  +   are optional, depending on when we're called. */  +static void  +InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos,  +                             PyObject *digits, Py_ssize_t *digits_pos,  +                             Py_ssize_t n_chars, Py_ssize_t n_zeros,  +                             PyObject *thousands_sep, Py_ssize_t thousands_sep_len,  +                             Py_UCS4 *maxchar)  +{  +    if (!writer) {  +        /* if maxchar > 127, maxchar is already set */  +        if (*maxchar == 127 && thousands_sep) {  +            Py_UCS4 maxchar2 = PyUnicode_MAX_CHAR_VALUE(thousands_sep);  +            *maxchar = Py_MAX(*maxchar, maxchar2);  +        }  +        return;  +    }  +  +    if (thousands_sep) {  +        *buffer_pos -= thousands_sep_len;  +  +        /* Copy the thousands_sep chars into the buffer. */  +        _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,  +                                      thousands_sep, 0,  +                                      thousands_sep_len);  +    }  +  +    *buffer_pos -= n_chars;  +    *digits_pos -= n_chars;  +    _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,  +                                  digits, *digits_pos,  +                                  n_chars);  +  +    if (n_zeros) {  +        *buffer_pos -= n_zeros;  +        enum PyUnicode_Kind kind = PyUnicode_KIND(writer->buffer);  +        void *data = PyUnicode_DATA(writer->buffer);           unicode_fill(kind, data, '0', *buffer_pos, n_zeros); -    } -} +    }  +}  diff --git a/contrib/tools/python3/src/Objects/stringlib/partition.h b/contrib/tools/python3/src/Objects/stringlib/partition.h index ed32a6f2b38..d47ac35217f 100644 --- a/contrib/tools/python3/src/Objects/stringlib/partition.h +++ b/contrib/tools/python3/src/Objects/stringlib/partition.h @@ -1,116 +1,116 @@ -/* stringlib: partition implementation */ - -#ifndef STRINGLIB_FASTSEARCH_H -#error must include "stringlib/fastsearch.h" before including this module -#endif - -Py_LOCAL_INLINE(PyObject*) -STRINGLIB(partition)(PyObject* str_obj, -                    const STRINGLIB_CHAR* str, Py_ssize_t str_len, -                    PyObject* sep_obj, -                    const STRINGLIB_CHAR* sep, Py_ssize_t sep_len) -{ -    PyObject* out; -    Py_ssize_t pos; - -    if (sep_len == 0) { -        PyErr_SetString(PyExc_ValueError, "empty separator"); -        return NULL; -    } - -    out = PyTuple_New(3); -    if (!out) -        return NULL; - -    pos = FASTSEARCH(str, str_len, sep, sep_len, -1, FAST_SEARCH); - -    if (pos < 0) { -#if STRINGLIB_MUTABLE -        PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, str_len)); -        PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0)); -        PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(NULL, 0)); - -        if (PyErr_Occurred()) { -            Py_DECREF(out); -            return NULL; -        } -#else -        Py_INCREF(str_obj); -        PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj); -        Py_INCREF(STRINGLIB_EMPTY); -        PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY); -        Py_INCREF(STRINGLIB_EMPTY); -        PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY); -#endif -        return out; -    } - -    PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos)); -    Py_INCREF(sep_obj); -    PyTuple_SET_ITEM(out, 1, sep_obj); -    pos += sep_len; -    PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos)); - -    if (PyErr_Occurred()) { -        Py_DECREF(out); -        return NULL; -    } - -    return out; -} - -Py_LOCAL_INLINE(PyObject*) -STRINGLIB(rpartition)(PyObject* str_obj, -                     const STRINGLIB_CHAR* str, Py_ssize_t str_len, -                     PyObject* sep_obj, -                     const STRINGLIB_CHAR* sep, Py_ssize_t sep_len) -{ -    PyObject* out; -    Py_ssize_t pos; - -    if (sep_len == 0) { -        PyErr_SetString(PyExc_ValueError, "empty separator"); -        return NULL; -    } - -    out = PyTuple_New(3); -    if (!out) -        return NULL; - -    pos = FASTSEARCH(str, str_len, sep, sep_len, -1, FAST_RSEARCH); - -    if (pos < 0) { -#if STRINGLIB_MUTABLE -        PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(NULL, 0)); -        PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0)); -        PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str, str_len)); - -        if (PyErr_Occurred()) { -            Py_DECREF(out); -            return NULL; -        } -#else -        Py_INCREF(STRINGLIB_EMPTY); -        PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY); -        Py_INCREF(STRINGLIB_EMPTY); -        PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY); -        Py_INCREF(str_obj); -        PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj); -#endif -        return out; -    } - -    PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos)); -    Py_INCREF(sep_obj); -    PyTuple_SET_ITEM(out, 1, sep_obj); -    pos += sep_len; -    PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos)); - -    if (PyErr_Occurred()) { -        Py_DECREF(out); -        return NULL; -    } - -    return out; -} - +/* stringlib: partition implementation */  +  +#ifndef STRINGLIB_FASTSEARCH_H  +#error must include "stringlib/fastsearch.h" before including this module  +#endif  +  +Py_LOCAL_INLINE(PyObject*)  +STRINGLIB(partition)(PyObject* str_obj,  +                    const STRINGLIB_CHAR* str, Py_ssize_t str_len,  +                    PyObject* sep_obj,  +                    const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)  +{  +    PyObject* out;  +    Py_ssize_t pos;  +  +    if (sep_len == 0) {  +        PyErr_SetString(PyExc_ValueError, "empty separator");  +        return NULL;  +    }  +  +    out = PyTuple_New(3);  +    if (!out)  +        return NULL;  +  +    pos = FASTSEARCH(str, str_len, sep, sep_len, -1, FAST_SEARCH);  +  +    if (pos < 0) {  +#if STRINGLIB_MUTABLE  +        PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, str_len));  +        PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));  +        PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(NULL, 0));  +  +        if (PyErr_Occurred()) {  +            Py_DECREF(out);  +            return NULL;  +        }  +#else  +        Py_INCREF(str_obj);  +        PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj);  +        Py_INCREF(STRINGLIB_EMPTY);  +        PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);  +        Py_INCREF(STRINGLIB_EMPTY);  +        PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY);  +#endif  +        return out;  +    }  +  +    PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos));  +    Py_INCREF(sep_obj);  +    PyTuple_SET_ITEM(out, 1, sep_obj);  +    pos += sep_len;  +    PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos));  +  +    if (PyErr_Occurred()) {  +        Py_DECREF(out);  +        return NULL;  +    }  +  +    return out;  +}  +  +Py_LOCAL_INLINE(PyObject*)  +STRINGLIB(rpartition)(PyObject* str_obj,  +                     const STRINGLIB_CHAR* str, Py_ssize_t str_len,  +                     PyObject* sep_obj,  +                     const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)  +{  +    PyObject* out;  +    Py_ssize_t pos;  +  +    if (sep_len == 0) {  +        PyErr_SetString(PyExc_ValueError, "empty separator");  +        return NULL;  +    }  +  +    out = PyTuple_New(3);  +    if (!out)  +        return NULL;  +  +    pos = FASTSEARCH(str, str_len, sep, sep_len, -1, FAST_RSEARCH);  +  +    if (pos < 0) {  +#if STRINGLIB_MUTABLE  +        PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(NULL, 0));  +        PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));  +        PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str, str_len));  +  +        if (PyErr_Occurred()) {  +            Py_DECREF(out);  +            return NULL;  +        }  +#else  +        Py_INCREF(STRINGLIB_EMPTY);  +        PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY);  +        Py_INCREF(STRINGLIB_EMPTY);  +        PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);  +        Py_INCREF(str_obj);  +        PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj);  +#endif  +        return out;  +    }  +  +    PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos));  +    Py_INCREF(sep_obj);  +    PyTuple_SET_ITEM(out, 1, sep_obj);  +    pos += sep_len;  +    PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos));  +  +    if (PyErr_Occurred()) {  +        Py_DECREF(out);  +        return NULL;  +    }  +  +    return out;  +}  +  diff --git a/contrib/tools/python3/src/Objects/stringlib/replace.h b/contrib/tools/python3/src/Objects/stringlib/replace.h index ef318ed6dd5..dcd2fb8ff4e 100644 --- a/contrib/tools/python3/src/Objects/stringlib/replace.h +++ b/contrib/tools/python3/src/Objects/stringlib/replace.h @@ -1,53 +1,53 @@ -/* stringlib: replace implementation */ - -#ifndef STRINGLIB_FASTSEARCH_H -#error must include "stringlib/fastsearch.h" before including this module -#endif - -Py_LOCAL_INLINE(void) -STRINGLIB(replace_1char_inplace)(STRINGLIB_CHAR* s, STRINGLIB_CHAR* end, -                                 Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount) -{ -    *s = u2; -    while (--maxcount && ++s != end) { -        /* Find the next character to be replaced. - -           If it occurs often, it is faster to scan for it using an inline -           loop.  If it occurs seldom, it is faster to scan for it using a -           function call; the overhead of the function call is amortized -           across the many characters that call covers.  We start with an -           inline loop and use a heuristic to determine whether to fall back -           to a function call. */ -        if (*s != u1) { -            int attempts = 10; -            /* search u1 in a dummy loop */ -            while (1) { -                if (++s == end) -                    return; -                if (*s == u1) -                    break; -                if (!--attempts) { -                    /* if u1 was not found for attempts iterations, -                       use FASTSEARCH() or memchr() */ -#if STRINGLIB_SIZEOF_CHAR == 1 -                    s++; -                    s = memchr(s, u1, end - s); -                    if (s == NULL) -                        return; -#else -                    Py_ssize_t i; -                    STRINGLIB_CHAR ch1 = (STRINGLIB_CHAR) u1; -                    s++; -                    i = FASTSEARCH(s, end - s, &ch1, 1, 0, FAST_SEARCH); -                    if (i < 0) -                        return; -                    s += i; -#endif -                    /* restart the dummy loop */ -                    break; -                } -            } -        } -        *s = u2; -    } -} +/* stringlib: replace implementation */  +  +#ifndef STRINGLIB_FASTSEARCH_H  +#error must include "stringlib/fastsearch.h" before including this module  +#endif  +  +Py_LOCAL_INLINE(void)  +STRINGLIB(replace_1char_inplace)(STRINGLIB_CHAR* s, STRINGLIB_CHAR* end,  +                                 Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount)  +{  +    *s = u2;  +    while (--maxcount && ++s != end) {  +        /* Find the next character to be replaced.  +  +           If it occurs often, it is faster to scan for it using an inline  +           loop.  If it occurs seldom, it is faster to scan for it using a  +           function call; the overhead of the function call is amortized  +           across the many characters that call covers.  We start with an  +           inline loop and use a heuristic to determine whether to fall back  +           to a function call. */  +        if (*s != u1) {  +            int attempts = 10;  +            /* search u1 in a dummy loop */  +            while (1) {  +                if (++s == end)  +                    return;  +                if (*s == u1)  +                    break;  +                if (!--attempts) {  +                    /* if u1 was not found for attempts iterations,  +                       use FASTSEARCH() or memchr() */  +#if STRINGLIB_SIZEOF_CHAR == 1  +                    s++;  +                    s = memchr(s, u1, end - s);  +                    if (s == NULL)  +                        return;  +#else  +                    Py_ssize_t i;  +                    STRINGLIB_CHAR ch1 = (STRINGLIB_CHAR) u1;  +                    s++;  +                    i = FASTSEARCH(s, end - s, &ch1, 1, 0, FAST_SEARCH);  +                    if (i < 0)  +                        return;  +                    s += i;  +#endif  +                    /* restart the dummy loop */  +                    break;  +                }  +            }  +        }  +        *s = u2;  +    }  +}  diff --git a/contrib/tools/python3/src/Objects/stringlib/split.h b/contrib/tools/python3/src/Objects/stringlib/split.h index 068047f9874..23666a8a851 100644 --- a/contrib/tools/python3/src/Objects/stringlib/split.h +++ b/contrib/tools/python3/src/Objects/stringlib/split.h @@ -1,390 +1,390 @@ -/* stringlib: split implementation */ - -#ifndef STRINGLIB_FASTSEARCH_H -#error must include "stringlib/fastsearch.h" before including this module -#endif - -/* Overallocate the initial list to reduce the number of reallocs for small -   split sizes.  Eg, "A A A A A A A A A A".split() (10 elements) has three -   resizes, to sizes 4, 8, then 16.  Most observed string splits are for human -   text (roughly 11 words per line) and field delimited data (usually 1-10 -   fields).  For large strings the split algorithms are bandwidth limited -   so increasing the preallocation likely will not improve things.*/ - -#define MAX_PREALLOC 12 - -/* 5 splits gives 6 elements */ -#define PREALLOC_SIZE(maxsplit) \ -    (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1) - -#define SPLIT_APPEND(data, left, right)         \ -    sub = STRINGLIB_NEW((data) + (left),        \ -                        (right) - (left));      \ -    if (sub == NULL)                            \ -        goto onError;                           \ -    if (PyList_Append(list, sub)) {             \ -        Py_DECREF(sub);                         \ -        goto onError;                           \ -    }                                           \ -    else                                        \ -        Py_DECREF(sub); - -#define SPLIT_ADD(data, left, right) {          \ -    sub = STRINGLIB_NEW((data) + (left),        \ -                        (right) - (left));      \ -    if (sub == NULL)                            \ -        goto onError;                           \ -    if (count < MAX_PREALLOC) {                 \ -        PyList_SET_ITEM(list, count, sub);      \ -    } else {                                    \ -        if (PyList_Append(list, sub)) {         \ -            Py_DECREF(sub);                     \ -            goto onError;                       \ -        }                                       \ -        else                                    \ -            Py_DECREF(sub);                     \ -    }                                           \ -    count++; } - - -/* Always force the list to the expected size. */ +/* stringlib: split implementation */  +  +#ifndef STRINGLIB_FASTSEARCH_H  +#error must include "stringlib/fastsearch.h" before including this module  +#endif  +  +/* Overallocate the initial list to reduce the number of reallocs for small  +   split sizes.  Eg, "A A A A A A A A A A".split() (10 elements) has three  +   resizes, to sizes 4, 8, then 16.  Most observed string splits are for human  +   text (roughly 11 words per line) and field delimited data (usually 1-10  +   fields).  For large strings the split algorithms are bandwidth limited  +   so increasing the preallocation likely will not improve things.*/  +  +#define MAX_PREALLOC 12  +  +/* 5 splits gives 6 elements */  +#define PREALLOC_SIZE(maxsplit) \  +    (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)  +  +#define SPLIT_APPEND(data, left, right)         \  +    sub = STRINGLIB_NEW((data) + (left),        \  +                        (right) - (left));      \  +    if (sub == NULL)                            \  +        goto onError;                           \  +    if (PyList_Append(list, sub)) {             \  +        Py_DECREF(sub);                         \  +        goto onError;                           \  +    }                                           \  +    else                                        \  +        Py_DECREF(sub);  +  +#define SPLIT_ADD(data, left, right) {          \  +    sub = STRINGLIB_NEW((data) + (left),        \  +                        (right) - (left));      \  +    if (sub == NULL)                            \  +        goto onError;                           \  +    if (count < MAX_PREALLOC) {                 \  +        PyList_SET_ITEM(list, count, sub);      \  +    } else {                                    \  +        if (PyList_Append(list, sub)) {         \  +            Py_DECREF(sub);                     \  +            goto onError;                       \  +        }                                       \  +        else                                    \  +            Py_DECREF(sub);                     \  +    }                                           \  +    count++; }  +  +  +/* Always force the list to the expected size. */   #define FIX_PREALLOC_SIZE(list) Py_SET_SIZE(list, count) - -Py_LOCAL_INLINE(PyObject *) -STRINGLIB(split_whitespace)(PyObject* str_obj, -                           const STRINGLIB_CHAR* str, Py_ssize_t str_len, -                           Py_ssize_t maxcount) -{ -    Py_ssize_t i, j, count=0; -    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); -    PyObject *sub; - -    if (list == NULL) -        return NULL; - -    i = j = 0; -    while (maxcount-- > 0) { -        while (i < str_len && STRINGLIB_ISSPACE(str[i])) -            i++; -        if (i == str_len) break; -        j = i; i++; -        while (i < str_len && !STRINGLIB_ISSPACE(str[i])) -            i++; -#ifndef STRINGLIB_MUTABLE -        if (j == 0 && i == str_len && STRINGLIB_CHECK_EXACT(str_obj)) { -            /* No whitespace in str_obj, so just use it as list[0] */ -            Py_INCREF(str_obj); -            PyList_SET_ITEM(list, 0, (PyObject *)str_obj); -            count++; -            break; -        } -#endif -        SPLIT_ADD(str, j, i); -    } - -    if (i < str_len) { -        /* Only occurs when maxcount was reached */ -        /* Skip any remaining whitespace and copy to end of string */ -        while (i < str_len && STRINGLIB_ISSPACE(str[i])) -            i++; -        if (i != str_len) -            SPLIT_ADD(str, i, str_len); -    } -    FIX_PREALLOC_SIZE(list); -    return list; - -  onError: -    Py_DECREF(list); -    return NULL; -} - -Py_LOCAL_INLINE(PyObject *) -STRINGLIB(split_char)(PyObject* str_obj, -                     const STRINGLIB_CHAR* str, Py_ssize_t str_len, -                     const STRINGLIB_CHAR ch, -                     Py_ssize_t maxcount) -{ -    Py_ssize_t i, j, count=0; -    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); -    PyObject *sub; - -    if (list == NULL) -        return NULL; - -    i = j = 0; -    while ((j < str_len) && (maxcount-- > 0)) { -        for(; j < str_len; j++) { -            /* I found that using memchr makes no difference */ -            if (str[j] == ch) { -                SPLIT_ADD(str, i, j); -                i = j = j + 1; -                break; -            } -        } -    } -#ifndef STRINGLIB_MUTABLE -    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) { -        /* ch not in str_obj, so just use str_obj as list[0] */ -        Py_INCREF(str_obj); -        PyList_SET_ITEM(list, 0, (PyObject *)str_obj); -        count++; -    } else -#endif -    if (i <= str_len) { -        SPLIT_ADD(str, i, str_len); -    } -    FIX_PREALLOC_SIZE(list); -    return list; - -  onError: -    Py_DECREF(list); -    return NULL; -} - -Py_LOCAL_INLINE(PyObject *) -STRINGLIB(split)(PyObject* str_obj, -                const STRINGLIB_CHAR* str, Py_ssize_t str_len, -                const STRINGLIB_CHAR* sep, Py_ssize_t sep_len, -                Py_ssize_t maxcount) -{ -    Py_ssize_t i, j, pos, count=0; -    PyObject *list, *sub; - -    if (sep_len == 0) { -        PyErr_SetString(PyExc_ValueError, "empty separator"); -        return NULL; -    } -    else if (sep_len == 1) -        return STRINGLIB(split_char)(str_obj, str, str_len, sep[0], maxcount); - -    list = PyList_New(PREALLOC_SIZE(maxcount)); -    if (list == NULL) -        return NULL; - -    i = j = 0; -    while (maxcount-- > 0) { -        pos = FASTSEARCH(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH); -        if (pos < 0) -            break; -        j = i + pos; -        SPLIT_ADD(str, i, j); -        i = j + sep_len; -    } -#ifndef STRINGLIB_MUTABLE -    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) { -        /* No match in str_obj, so just use it as list[0] */ -        Py_INCREF(str_obj); -        PyList_SET_ITEM(list, 0, (PyObject *)str_obj); -        count++; -    } else -#endif -    { -        SPLIT_ADD(str, i, str_len); -    } -    FIX_PREALLOC_SIZE(list); -    return list; - -  onError: -    Py_DECREF(list); -    return NULL; -} - -Py_LOCAL_INLINE(PyObject *) -STRINGLIB(rsplit_whitespace)(PyObject* str_obj, -                            const STRINGLIB_CHAR* str, Py_ssize_t str_len, -                            Py_ssize_t maxcount) -{ -    Py_ssize_t i, j, count=0; -    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); -    PyObject *sub; - -    if (list == NULL) -        return NULL; - -    i = j = str_len - 1; -    while (maxcount-- > 0) { -        while (i >= 0 && STRINGLIB_ISSPACE(str[i])) -            i--; -        if (i < 0) break; -        j = i; i--; -        while (i >= 0 && !STRINGLIB_ISSPACE(str[i])) -            i--; -#ifndef STRINGLIB_MUTABLE -        if (j == str_len - 1 && i < 0 && STRINGLIB_CHECK_EXACT(str_obj)) { -            /* No whitespace in str_obj, so just use it as list[0] */ -            Py_INCREF(str_obj); -            PyList_SET_ITEM(list, 0, (PyObject *)str_obj); -            count++; -            break; -        } -#endif -        SPLIT_ADD(str, i + 1, j + 1); -    } - -    if (i >= 0) { -        /* Only occurs when maxcount was reached */ -        /* Skip any remaining whitespace and copy to beginning of string */ -        while (i >= 0 && STRINGLIB_ISSPACE(str[i])) -            i--; -        if (i >= 0) -            SPLIT_ADD(str, 0, i + 1); -    } -    FIX_PREALLOC_SIZE(list); -    if (PyList_Reverse(list) < 0) -        goto onError; -    return list; - -  onError: -    Py_DECREF(list); -    return NULL; -} - -Py_LOCAL_INLINE(PyObject *) -STRINGLIB(rsplit_char)(PyObject* str_obj, -                      const STRINGLIB_CHAR* str, Py_ssize_t str_len, -                      const STRINGLIB_CHAR ch, -                      Py_ssize_t maxcount) -{ -    Py_ssize_t i, j, count=0; -    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); -    PyObject *sub; - -    if (list == NULL) -        return NULL; - -    i = j = str_len - 1; -    while ((i >= 0) && (maxcount-- > 0)) { -        for(; i >= 0; i--) { -            if (str[i] == ch) { -                SPLIT_ADD(str, i + 1, j + 1); -                j = i = i - 1; -                break; -            } -        } -    } -#ifndef STRINGLIB_MUTABLE -    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) { -        /* ch not in str_obj, so just use str_obj as list[0] */ -        Py_INCREF(str_obj); -        PyList_SET_ITEM(list, 0, (PyObject *)str_obj); -        count++; -    } else -#endif -    if (j >= -1) { -        SPLIT_ADD(str, 0, j + 1); -    } -    FIX_PREALLOC_SIZE(list); -    if (PyList_Reverse(list) < 0) -        goto onError; -    return list; - -  onError: -    Py_DECREF(list); -    return NULL; -} - -Py_LOCAL_INLINE(PyObject *) -STRINGLIB(rsplit)(PyObject* str_obj, -                 const STRINGLIB_CHAR* str, Py_ssize_t str_len, -                 const STRINGLIB_CHAR* sep, Py_ssize_t sep_len, -                 Py_ssize_t maxcount) -{ -    Py_ssize_t j, pos, count=0; -    PyObject *list, *sub; - -    if (sep_len == 0) { -        PyErr_SetString(PyExc_ValueError, "empty separator"); -        return NULL; -    } -    else if (sep_len == 1) -        return STRINGLIB(rsplit_char)(str_obj, str, str_len, sep[0], maxcount); - -    list = PyList_New(PREALLOC_SIZE(maxcount)); -    if (list == NULL) -        return NULL; - -    j = str_len; -    while (maxcount-- > 0) { -        pos = FASTSEARCH(str, j, sep, sep_len, -1, FAST_RSEARCH); -        if (pos < 0) -            break; -        SPLIT_ADD(str, pos + sep_len, j); -        j = pos; -    } -#ifndef STRINGLIB_MUTABLE -    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) { -        /* No match in str_obj, so just use it as list[0] */ -        Py_INCREF(str_obj); -        PyList_SET_ITEM(list, 0, (PyObject *)str_obj); -        count++; -    } else -#endif -    { -        SPLIT_ADD(str, 0, j); -    } -    FIX_PREALLOC_SIZE(list); -    if (PyList_Reverse(list) < 0) -        goto onError; -    return list; - -  onError: -    Py_DECREF(list); -    return NULL; -} - -Py_LOCAL_INLINE(PyObject *) -STRINGLIB(splitlines)(PyObject* str_obj, -                     const STRINGLIB_CHAR* str, Py_ssize_t str_len, -                     int keepends) -{ -    /* This does not use the preallocated list because splitlines is -       usually run with hundreds of newlines.  The overhead of -       switching between PyList_SET_ITEM and append causes about a -       2-3% slowdown for that common case.  A smarter implementation -       could move the if check out, so the SET_ITEMs are done first -       and the appends only done when the prealloc buffer is full. -       That's too much work for little gain.*/ - -    Py_ssize_t i; -    Py_ssize_t j; -    PyObject *list = PyList_New(0); -    PyObject *sub; - -    if (list == NULL) -        return NULL; - -    for (i = j = 0; i < str_len; ) { -        Py_ssize_t eol; - -        /* Find a line and append it */ -        while (i < str_len && !STRINGLIB_ISLINEBREAK(str[i])) -            i++; - -        /* Skip the line break reading CRLF as one line break */ -        eol = i; -        if (i < str_len) { -            if (str[i] == '\r' && i + 1 < str_len && str[i+1] == '\n') -                i += 2; -            else -                i++; -            if (keepends) -                eol = i; -        } -#ifndef STRINGLIB_MUTABLE -        if (j == 0 && eol == str_len && STRINGLIB_CHECK_EXACT(str_obj)) { -            /* No linebreak in str_obj, so just use it as list[0] */ -            if (PyList_Append(list, str_obj)) -                goto onError; -            break; -        } -#endif -        SPLIT_APPEND(str, j, eol); -        j = i; -    } -    return list; - -  onError: -    Py_DECREF(list); -    return NULL; -} - +  +Py_LOCAL_INLINE(PyObject *)  +STRINGLIB(split_whitespace)(PyObject* str_obj,  +                           const STRINGLIB_CHAR* str, Py_ssize_t str_len,  +                           Py_ssize_t maxcount)  +{  +    Py_ssize_t i, j, count=0;  +    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));  +    PyObject *sub;  +  +    if (list == NULL)  +        return NULL;  +  +    i = j = 0;  +    while (maxcount-- > 0) {  +        while (i < str_len && STRINGLIB_ISSPACE(str[i]))  +            i++;  +        if (i == str_len) break;  +        j = i; i++;  +        while (i < str_len && !STRINGLIB_ISSPACE(str[i]))  +            i++;  +#ifndef STRINGLIB_MUTABLE  +        if (j == 0 && i == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {  +            /* No whitespace in str_obj, so just use it as list[0] */  +            Py_INCREF(str_obj);  +            PyList_SET_ITEM(list, 0, (PyObject *)str_obj);  +            count++;  +            break;  +        }  +#endif  +        SPLIT_ADD(str, j, i);  +    }  +  +    if (i < str_len) {  +        /* Only occurs when maxcount was reached */  +        /* Skip any remaining whitespace and copy to end of string */  +        while (i < str_len && STRINGLIB_ISSPACE(str[i]))  +            i++;  +        if (i != str_len)  +            SPLIT_ADD(str, i, str_len);  +    }  +    FIX_PREALLOC_SIZE(list);  +    return list;  +  +  onError:  +    Py_DECREF(list);  +    return NULL;  +}  +  +Py_LOCAL_INLINE(PyObject *)  +STRINGLIB(split_char)(PyObject* str_obj,  +                     const STRINGLIB_CHAR* str, Py_ssize_t str_len,  +                     const STRINGLIB_CHAR ch,  +                     Py_ssize_t maxcount)  +{  +    Py_ssize_t i, j, count=0;  +    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));  +    PyObject *sub;  +  +    if (list == NULL)  +        return NULL;  +  +    i = j = 0;  +    while ((j < str_len) && (maxcount-- > 0)) {  +        for(; j < str_len; j++) {  +            /* I found that using memchr makes no difference */  +            if (str[j] == ch) {  +                SPLIT_ADD(str, i, j);  +                i = j = j + 1;  +                break;  +            }  +        }  +    }  +#ifndef STRINGLIB_MUTABLE  +    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {  +        /* ch not in str_obj, so just use str_obj as list[0] */  +        Py_INCREF(str_obj);  +        PyList_SET_ITEM(list, 0, (PyObject *)str_obj);  +        count++;  +    } else  +#endif  +    if (i <= str_len) {  +        SPLIT_ADD(str, i, str_len);  +    }  +    FIX_PREALLOC_SIZE(list);  +    return list;  +  +  onError:  +    Py_DECREF(list);  +    return NULL;  +}  +  +Py_LOCAL_INLINE(PyObject *)  +STRINGLIB(split)(PyObject* str_obj,  +                const STRINGLIB_CHAR* str, Py_ssize_t str_len,  +                const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,  +                Py_ssize_t maxcount)  +{  +    Py_ssize_t i, j, pos, count=0;  +    PyObject *list, *sub;  +  +    if (sep_len == 0) {  +        PyErr_SetString(PyExc_ValueError, "empty separator");  +        return NULL;  +    }  +    else if (sep_len == 1)  +        return STRINGLIB(split_char)(str_obj, str, str_len, sep[0], maxcount);  +  +    list = PyList_New(PREALLOC_SIZE(maxcount));  +    if (list == NULL)  +        return NULL;  +  +    i = j = 0;  +    while (maxcount-- > 0) {  +        pos = FASTSEARCH(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH);  +        if (pos < 0)  +            break;  +        j = i + pos;  +        SPLIT_ADD(str, i, j);  +        i = j + sep_len;  +    }  +#ifndef STRINGLIB_MUTABLE  +    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {  +        /* No match in str_obj, so just use it as list[0] */  +        Py_INCREF(str_obj);  +        PyList_SET_ITEM(list, 0, (PyObject *)str_obj);  +        count++;  +    } else  +#endif  +    {  +        SPLIT_ADD(str, i, str_len);  +    }  +    FIX_PREALLOC_SIZE(list);  +    return list;  +  +  onError:  +    Py_DECREF(list);  +    return NULL;  +}  +  +Py_LOCAL_INLINE(PyObject *)  +STRINGLIB(rsplit_whitespace)(PyObject* str_obj,  +                            const STRINGLIB_CHAR* str, Py_ssize_t str_len,  +                            Py_ssize_t maxcount)  +{  +    Py_ssize_t i, j, count=0;  +    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));  +    PyObject *sub;  +  +    if (list == NULL)  +        return NULL;  +  +    i = j = str_len - 1;  +    while (maxcount-- > 0) {  +        while (i >= 0 && STRINGLIB_ISSPACE(str[i]))  +            i--;  +        if (i < 0) break;  +        j = i; i--;  +        while (i >= 0 && !STRINGLIB_ISSPACE(str[i]))  +            i--;  +#ifndef STRINGLIB_MUTABLE  +        if (j == str_len - 1 && i < 0 && STRINGLIB_CHECK_EXACT(str_obj)) {  +            /* No whitespace in str_obj, so just use it as list[0] */  +            Py_INCREF(str_obj);  +            PyList_SET_ITEM(list, 0, (PyObject *)str_obj);  +            count++;  +            break;  +        }  +#endif  +        SPLIT_ADD(str, i + 1, j + 1);  +    }  +  +    if (i >= 0) {  +        /* Only occurs when maxcount was reached */  +        /* Skip any remaining whitespace and copy to beginning of string */  +        while (i >= 0 && STRINGLIB_ISSPACE(str[i]))  +            i--;  +        if (i >= 0)  +            SPLIT_ADD(str, 0, i + 1);  +    }  +    FIX_PREALLOC_SIZE(list);  +    if (PyList_Reverse(list) < 0)  +        goto onError;  +    return list;  +  +  onError:  +    Py_DECREF(list);  +    return NULL;  +}  +  +Py_LOCAL_INLINE(PyObject *)  +STRINGLIB(rsplit_char)(PyObject* str_obj,  +                      const STRINGLIB_CHAR* str, Py_ssize_t str_len,  +                      const STRINGLIB_CHAR ch,  +                      Py_ssize_t maxcount)  +{  +    Py_ssize_t i, j, count=0;  +    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));  +    PyObject *sub;  +  +    if (list == NULL)  +        return NULL;  +  +    i = j = str_len - 1;  +    while ((i >= 0) && (maxcount-- > 0)) {  +        for(; i >= 0; i--) {  +            if (str[i] == ch) {  +                SPLIT_ADD(str, i + 1, j + 1);  +                j = i = i - 1;  +                break;  +            }  +        }  +    }  +#ifndef STRINGLIB_MUTABLE  +    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {  +        /* ch not in str_obj, so just use str_obj as list[0] */  +        Py_INCREF(str_obj);  +        PyList_SET_ITEM(list, 0, (PyObject *)str_obj);  +        count++;  +    } else  +#endif  +    if (j >= -1) {  +        SPLIT_ADD(str, 0, j + 1);  +    }  +    FIX_PREALLOC_SIZE(list);  +    if (PyList_Reverse(list) < 0)  +        goto onError;  +    return list;  +  +  onError:  +    Py_DECREF(list);  +    return NULL;  +}  +  +Py_LOCAL_INLINE(PyObject *)  +STRINGLIB(rsplit)(PyObject* str_obj,  +                 const STRINGLIB_CHAR* str, Py_ssize_t str_len,  +                 const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,  +                 Py_ssize_t maxcount)  +{  +    Py_ssize_t j, pos, count=0;  +    PyObject *list, *sub;  +  +    if (sep_len == 0) {  +        PyErr_SetString(PyExc_ValueError, "empty separator");  +        return NULL;  +    }  +    else if (sep_len == 1)  +        return STRINGLIB(rsplit_char)(str_obj, str, str_len, sep[0], maxcount);  +  +    list = PyList_New(PREALLOC_SIZE(maxcount));  +    if (list == NULL)  +        return NULL;  +  +    j = str_len;  +    while (maxcount-- > 0) {  +        pos = FASTSEARCH(str, j, sep, sep_len, -1, FAST_RSEARCH);  +        if (pos < 0)  +            break;  +        SPLIT_ADD(str, pos + sep_len, j);  +        j = pos;  +    }  +#ifndef STRINGLIB_MUTABLE  +    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {  +        /* No match in str_obj, so just use it as list[0] */  +        Py_INCREF(str_obj);  +        PyList_SET_ITEM(list, 0, (PyObject *)str_obj);  +        count++;  +    } else  +#endif  +    {  +        SPLIT_ADD(str, 0, j);  +    }  +    FIX_PREALLOC_SIZE(list);  +    if (PyList_Reverse(list) < 0)  +        goto onError;  +    return list;  +  +  onError:  +    Py_DECREF(list);  +    return NULL;  +}  +  +Py_LOCAL_INLINE(PyObject *)  +STRINGLIB(splitlines)(PyObject* str_obj,  +                     const STRINGLIB_CHAR* str, Py_ssize_t str_len,  +                     int keepends)  +{  +    /* This does not use the preallocated list because splitlines is  +       usually run with hundreds of newlines.  The overhead of  +       switching between PyList_SET_ITEM and append causes about a  +       2-3% slowdown for that common case.  A smarter implementation  +       could move the if check out, so the SET_ITEMs are done first  +       and the appends only done when the prealloc buffer is full.  +       That's too much work for little gain.*/  +  +    Py_ssize_t i;  +    Py_ssize_t j;  +    PyObject *list = PyList_New(0);  +    PyObject *sub;  +  +    if (list == NULL)  +        return NULL;  +  +    for (i = j = 0; i < str_len; ) {  +        Py_ssize_t eol;  +  +        /* Find a line and append it */  +        while (i < str_len && !STRINGLIB_ISLINEBREAK(str[i]))  +            i++;  +  +        /* Skip the line break reading CRLF as one line break */  +        eol = i;  +        if (i < str_len) {  +            if (str[i] == '\r' && i + 1 < str_len && str[i+1] == '\n')  +                i += 2;  +            else  +                i++;  +            if (keepends)  +                eol = i;  +        }  +#ifndef STRINGLIB_MUTABLE  +        if (j == 0 && eol == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {  +            /* No linebreak in str_obj, so just use it as list[0] */  +            if (PyList_Append(list, str_obj))  +                goto onError;  +            break;  +        }  +#endif  +        SPLIT_APPEND(str, j, eol);  +        j = i;  +    }  +    return list;  +  +  onError:  +    Py_DECREF(list);  +    return NULL;  +}  +  diff --git a/contrib/tools/python3/src/Objects/stringlib/stringdefs.h b/contrib/tools/python3/src/Objects/stringlib/stringdefs.h index ce27f3e4081..7628f538c99 100644 --- a/contrib/tools/python3/src/Objects/stringlib/stringdefs.h +++ b/contrib/tools/python3/src/Objects/stringlib/stringdefs.h @@ -1,28 +1,28 @@ -#ifndef STRINGLIB_STRINGDEFS_H -#define STRINGLIB_STRINGDEFS_H - -/* this is sort of a hack.  there's at least one place (formatting -   floats) where some stringlib code takes a different path if it's -   compiled as unicode. */ -#define STRINGLIB_IS_UNICODE     0 - -#define FASTSEARCH fastsearch -#define STRINGLIB(F) stringlib_##F -#define STRINGLIB_OBJECT         PyBytesObject -#define STRINGLIB_SIZEOF_CHAR    1 -#define STRINGLIB_CHAR           char -#define STRINGLIB_TYPE_NAME      "string" -#define STRINGLIB_PARSE_CODE     "S" -#define STRINGLIB_EMPTY          nullstring -#define STRINGLIB_ISSPACE        Py_ISSPACE -#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r')) -#define STRINGLIB_ISDECIMAL(x)   ((x >= '0') && (x <= '9')) -#define STRINGLIB_TODECIMAL(x)   (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1) -#define STRINGLIB_STR            PyBytes_AS_STRING -#define STRINGLIB_LEN            PyBytes_GET_SIZE -#define STRINGLIB_NEW            PyBytes_FromStringAndSize -#define STRINGLIB_CHECK          PyBytes_Check -#define STRINGLIB_CHECK_EXACT    PyBytes_CheckExact -#define STRINGLIB_TOSTR          PyObject_Str -#define STRINGLIB_TOASCII        PyObject_Repr -#endif /* !STRINGLIB_STRINGDEFS_H */ +#ifndef STRINGLIB_STRINGDEFS_H  +#define STRINGLIB_STRINGDEFS_H  +  +/* this is sort of a hack.  there's at least one place (formatting  +   floats) where some stringlib code takes a different path if it's  +   compiled as unicode. */  +#define STRINGLIB_IS_UNICODE     0  +  +#define FASTSEARCH fastsearch  +#define STRINGLIB(F) stringlib_##F  +#define STRINGLIB_OBJECT         PyBytesObject  +#define STRINGLIB_SIZEOF_CHAR    1  +#define STRINGLIB_CHAR           char  +#define STRINGLIB_TYPE_NAME      "string"  +#define STRINGLIB_PARSE_CODE     "S"  +#define STRINGLIB_EMPTY          nullstring  +#define STRINGLIB_ISSPACE        Py_ISSPACE  +#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))  +#define STRINGLIB_ISDECIMAL(x)   ((x >= '0') && (x <= '9'))  +#define STRINGLIB_TODECIMAL(x)   (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1)  +#define STRINGLIB_STR            PyBytes_AS_STRING  +#define STRINGLIB_LEN            PyBytes_GET_SIZE  +#define STRINGLIB_NEW            PyBytes_FromStringAndSize  +#define STRINGLIB_CHECK          PyBytes_Check  +#define STRINGLIB_CHECK_EXACT    PyBytes_CheckExact  +#define STRINGLIB_TOSTR          PyObject_Str  +#define STRINGLIB_TOASCII        PyObject_Repr  +#endif /* !STRINGLIB_STRINGDEFS_H */  diff --git a/contrib/tools/python3/src/Objects/stringlib/transmogrify.h b/contrib/tools/python3/src/Objects/stringlib/transmogrify.h index e1165ea38e8..33c6abba095 100644 --- a/contrib/tools/python3/src/Objects/stringlib/transmogrify.h +++ b/contrib/tools/python3/src/Objects/stringlib/transmogrify.h @@ -1,10 +1,10 @@ -#if STRINGLIB_IS_UNICODE -# error "transmogrify.h only compatible with byte-wise strings" -#endif - -/* the more complicated methods.  parts of these should be pulled out into the -   shared code in bytes_methods.c to cut down on duplicate code bloat.  */ - +#if STRINGLIB_IS_UNICODE  +# error "transmogrify.h only compatible with byte-wise strings"  +#endif  +  +/* the more complicated methods.  parts of these should be pulled out into the  +   shared code in bytes_methods.c to cut down on duplicate code bloat.  */  +   /*[clinic input]  class B "PyObject *" "&PyType_Type"  [clinic start generated code]*/ @@ -12,18 +12,18 @@ class B "PyObject *" "&PyType_Type"  #include "clinic/transmogrify.h.h" -static inline PyObject * -return_self(PyObject *self) -{ -#if !STRINGLIB_MUTABLE -    if (STRINGLIB_CHECK_EXACT(self)) { -        Py_INCREF(self); -        return self; -    } -#endif -    return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); -} - +static inline PyObject *  +return_self(PyObject *self)  +{  +#if !STRINGLIB_MUTABLE  +    if (STRINGLIB_CHECK_EXACT(self)) {  +        Py_INCREF(self);  +        return self;  +    }  +#endif  +    return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));  +}  +   /*[clinic input]  B.expandtabs as stringlib_expandtabs @@ -37,100 +37,100 @@ If tabsize is not given, a tab size of 8 characters is assumed.  static PyObject *  stringlib_expandtabs_impl(PyObject *self, int tabsize)  /*[clinic end generated code: output=069cb7fae72e4c2b input=3c6d3b12aa3ccbea]*/ -{ -    const char *e, *p; -    char *q; -    Py_ssize_t i, j; -    PyObject *u; - -    /* First pass: determine size of output string */ -    i = j = 0; -    e = STRINGLIB_STR(self) + STRINGLIB_LEN(self); -    for (p = STRINGLIB_STR(self); p < e; p++) { -        if (*p == '\t') { -            if (tabsize > 0) { -                Py_ssize_t incr = tabsize - (j % tabsize); -                if (j > PY_SSIZE_T_MAX - incr) -                    goto overflow; -                j += incr; -            } -        } -        else { -            if (j > PY_SSIZE_T_MAX - 1) -                goto overflow; -            j++; -            if (*p == '\n' || *p == '\r') { -                if (i > PY_SSIZE_T_MAX - j) -                    goto overflow; -                i += j; -                j = 0; -            } -        } -    } - -    if (i > PY_SSIZE_T_MAX - j) -        goto overflow; - -    /* Second pass: create output string and fill it */ -    u = STRINGLIB_NEW(NULL, i + j); -    if (!u) -        return NULL; - -    j = 0; -    q = STRINGLIB_STR(u); - -    for (p = STRINGLIB_STR(self); p < e; p++) { -        if (*p == '\t') { -            if (tabsize > 0) { -                i = tabsize - (j % tabsize); -                j += i; -                while (i--) -                    *q++ = ' '; -            } -        } -        else { -            j++; -            *q++ = *p; -            if (*p == '\n' || *p == '\r') -                j = 0; -        } -    } - -    return u; -  overflow: -    PyErr_SetString(PyExc_OverflowError, "result too long"); -    return NULL; -} - -static inline PyObject * -pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill) -{ -    PyObject *u; - -    if (left < 0) -        left = 0; -    if (right < 0) -        right = 0; - -    if (left == 0 && right == 0) { -        return return_self(self); -    } - -    u = STRINGLIB_NEW(NULL, left + STRINGLIB_LEN(self) + right); -    if (u) { -        if (left) -            memset(STRINGLIB_STR(u), fill, left); -        memcpy(STRINGLIB_STR(u) + left, -               STRINGLIB_STR(self), -               STRINGLIB_LEN(self)); -        if (right) -            memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self), -                   fill, right); -    } - -    return u; -} - +{  +    const char *e, *p;  +    char *q;  +    Py_ssize_t i, j;  +    PyObject *u;  +  +    /* First pass: determine size of output string */  +    i = j = 0;  +    e = STRINGLIB_STR(self) + STRINGLIB_LEN(self);  +    for (p = STRINGLIB_STR(self); p < e; p++) {  +        if (*p == '\t') {  +            if (tabsize > 0) {  +                Py_ssize_t incr = tabsize - (j % tabsize);  +                if (j > PY_SSIZE_T_MAX - incr)  +                    goto overflow;  +                j += incr;  +            }  +        }  +        else {  +            if (j > PY_SSIZE_T_MAX - 1)  +                goto overflow;  +            j++;  +            if (*p == '\n' || *p == '\r') {  +                if (i > PY_SSIZE_T_MAX - j)  +                    goto overflow;  +                i += j;  +                j = 0;  +            }  +        }  +    }  +  +    if (i > PY_SSIZE_T_MAX - j)  +        goto overflow;  +  +    /* Second pass: create output string and fill it */  +    u = STRINGLIB_NEW(NULL, i + j);  +    if (!u)  +        return NULL;  +  +    j = 0;  +    q = STRINGLIB_STR(u);  +  +    for (p = STRINGLIB_STR(self); p < e; p++) {  +        if (*p == '\t') {  +            if (tabsize > 0) {  +                i = tabsize - (j % tabsize);  +                j += i;  +                while (i--)  +                    *q++ = ' ';  +            }  +        }  +        else {  +            j++;  +            *q++ = *p;  +            if (*p == '\n' || *p == '\r')  +                j = 0;  +        }  +    }  +  +    return u;  +  overflow:  +    PyErr_SetString(PyExc_OverflowError, "result too long");  +    return NULL;  +}  +  +static inline PyObject *  +pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)  +{  +    PyObject *u;  +  +    if (left < 0)  +        left = 0;  +    if (right < 0)  +        right = 0;  +  +    if (left == 0 && right == 0) {  +        return return_self(self);  +    }  +  +    u = STRINGLIB_NEW(NULL, left + STRINGLIB_LEN(self) + right);  +    if (u) {  +        if (left)  +            memset(STRINGLIB_STR(u), fill, left);  +        memcpy(STRINGLIB_STR(u) + left,  +               STRINGLIB_STR(self),  +               STRINGLIB_LEN(self));  +        if (right)  +            memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),  +                   fill, right);  +    }  +  +    return u;  +}  +   /*[clinic input]  B.ljust as stringlib_ljust @@ -143,18 +143,18 @@ Return a left-justified string of length width.  Padding is done using the specified fill character.  [clinic start generated code]*/ -static PyObject * +static PyObject *   stringlib_ljust_impl(PyObject *self, Py_ssize_t width, char fillchar)  /*[clinic end generated code: output=c79ca173c5ff8337 input=eff2d014bc7d80df]*/ -{ -    if (STRINGLIB_LEN(self) >= width) { -        return return_self(self); -    } - -    return pad(self, 0, width - STRINGLIB_LEN(self), fillchar); -} - - +{  +    if (STRINGLIB_LEN(self) >= width) {  +        return return_self(self);  +    }  +  +    return pad(self, 0, width - STRINGLIB_LEN(self), fillchar);  +}  +  +   /*[clinic input]  B.rjust as stringlib_rjust @@ -167,18 +167,18 @@ Return a right-justified string of length width.  Padding is done using the specified fill character.  [clinic start generated code]*/ -static PyObject * +static PyObject *   stringlib_rjust_impl(PyObject *self, Py_ssize_t width, char fillchar)  /*[clinic end generated code: output=7df5d728a5439570 input=218b0bd31308955d]*/ -{ -    if (STRINGLIB_LEN(self) >= width) { -        return return_self(self); -    } - -    return pad(self, width - STRINGLIB_LEN(self), 0, fillchar); -} - - +{  +    if (STRINGLIB_LEN(self) >= width) {  +        return return_self(self);  +    }  +  +    return pad(self, width - STRINGLIB_LEN(self), 0, fillchar);  +}  +  +   /*[clinic input]  B.center as stringlib_center @@ -191,22 +191,22 @@ Return a centered string of length width.  Padding is done using the specified fill character.  [clinic start generated code]*/ -static PyObject * +static PyObject *   stringlib_center_impl(PyObject *self, Py_ssize_t width, char fillchar)  /*[clinic end generated code: output=d8da2e055288b4c2 input=3776fd278765d89b]*/ -{ -    Py_ssize_t marg, left; - -    if (STRINGLIB_LEN(self) >= width) { -        return return_self(self); -    } - -    marg = width - STRINGLIB_LEN(self); -    left = marg / 2 + (marg & width & 1); - -    return pad(self, left, marg - left, fillchar); -} - +{  +    Py_ssize_t marg, left;  +  +    if (STRINGLIB_LEN(self) >= width) {  +        return return_self(self);  +    }  +  +    marg = width - STRINGLIB_LEN(self);  +    left = marg / 2 + (marg & width & 1);  +  +    return pad(self, left, marg - left, fillchar);  +}  +   /*[clinic input]  B.zfill as stringlib_zfill @@ -218,523 +218,523 @@ Pad a numeric string with zeros on the left, to fill a field of the given width.  The original string is never truncated.  [clinic start generated code]*/ -static PyObject * +static PyObject *   stringlib_zfill_impl(PyObject *self, Py_ssize_t width)  /*[clinic end generated code: output=0b3c684a7f1b2319 input=2da6d7b8e9bcb19a]*/ -{ -    Py_ssize_t fill; -    PyObject *s; -    char *p; - -    if (STRINGLIB_LEN(self) >= width) { -        return return_self(self); -    } - -    fill = width - STRINGLIB_LEN(self); - -    s = pad(self, fill, 0, '0'); - -    if (s == NULL) -        return NULL; - -    p = STRINGLIB_STR(s); -    if (p[fill] == '+' || p[fill] == '-') { -        /* move sign to beginning of string */ -        p[0] = p[fill]; -        p[fill] = '0'; -    } - -    return s; -} - - -/* find and count characters and substrings */ - -#define findchar(target, target_len, c)                         \ -  ((char *)memchr((const void *)(target), c, target_len)) - - -static Py_ssize_t -countchar(const char *target, Py_ssize_t target_len, char c, -          Py_ssize_t maxcount) -{ -    Py_ssize_t count = 0; -    const char *start = target; -    const char *end = target + target_len; - -    while ((start = findchar(start, end - start, c)) != NULL) { -        count++; -        if (count >= maxcount) -            break; -        start += 1; -    } -    return count; -} - - -/* Algorithms for different cases of string replacement */ - -/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */ -static PyObject * -stringlib_replace_interleave(PyObject *self, -                             const char *to_s, Py_ssize_t to_len, -                             Py_ssize_t maxcount) -{ -    const char *self_s; -    char *result_s; -    Py_ssize_t self_len, result_len; -    Py_ssize_t count, i; -    PyObject *result; - -    self_len = STRINGLIB_LEN(self); - -    /* 1 at the end plus 1 after every character; -       count = min(maxcount, self_len + 1) */ -    if (maxcount <= self_len) { -        count = maxcount; -    } -    else { -        /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */ -        count = self_len + 1; -    } - -    /* Check for overflow */ -    /*   result_len = count * to_len + self_len; */ -    assert(count > 0); -    if (to_len > (PY_SSIZE_T_MAX - self_len) / count) { -        PyErr_SetString(PyExc_OverflowError, -                        "replace bytes is too long"); -        return NULL; -    } -    result_len = count * to_len + self_len; -    result = STRINGLIB_NEW(NULL, result_len); -    if (result == NULL) { -        return NULL; -    } - -    self_s = STRINGLIB_STR(self); -    result_s = STRINGLIB_STR(result); - -    if (to_len > 1) { -        /* Lay the first one down (guaranteed this will occur) */ -        memcpy(result_s, to_s, to_len); -        result_s += to_len; -        count -= 1; - -        for (i = 0; i < count; i++) { -            *result_s++ = *self_s++; -            memcpy(result_s, to_s, to_len); -            result_s += to_len; -        } -    } -    else { -        result_s[0] = to_s[0]; -        result_s += to_len; -        count -= 1; -        for (i = 0; i < count; i++) { -            *result_s++ = *self_s++; -            result_s[0] = to_s[0]; -            result_s += to_len; -        } -    } - -    /* Copy the rest of the original string */ -    memcpy(result_s, self_s, self_len - i); - -    return result; -} - -/* Special case for deleting a single character */ -/* len(self)>=1, len(from)==1, to="", maxcount>=1 */ -static PyObject * -stringlib_replace_delete_single_character(PyObject *self, -                                          char from_c, Py_ssize_t maxcount) -{ -    const char *self_s, *start, *next, *end; -    char *result_s; -    Py_ssize_t self_len, result_len; -    Py_ssize_t count; -    PyObject *result; - -    self_len = STRINGLIB_LEN(self); -    self_s = STRINGLIB_STR(self); - -    count = countchar(self_s, self_len, from_c, maxcount); -    if (count == 0) { -        return return_self(self); -    } - -    result_len = self_len - count;  /* from_len == 1 */ -    assert(result_len>=0); - -    result = STRINGLIB_NEW(NULL, result_len); -    if (result == NULL) { -        return NULL; -    } -    result_s = STRINGLIB_STR(result); - -    start = self_s; -    end = self_s + self_len; -    while (count-- > 0) { -        next = findchar(start, end - start, from_c); -        if (next == NULL) -            break; -        memcpy(result_s, start, next - start); -        result_s += (next - start); -        start = next + 1; -    } -    memcpy(result_s, start, end - start); - -    return result; -} - -/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */ - -static PyObject * -stringlib_replace_delete_substring(PyObject *self, -                                   const char *from_s, Py_ssize_t from_len, -                                   Py_ssize_t maxcount) -{ -    const char *self_s, *start, *next, *end; -    char *result_s; -    Py_ssize_t self_len, result_len; -    Py_ssize_t count, offset; -    PyObject *result; - -    self_len = STRINGLIB_LEN(self); -    self_s = STRINGLIB_STR(self); - -    count = stringlib_count(self_s, self_len, -                            from_s, from_len, -                            maxcount); - -    if (count == 0) { -        /* no matches */ -        return return_self(self); -    } - -    result_len = self_len - (count * from_len); -    assert (result_len>=0); - -    result = STRINGLIB_NEW(NULL, result_len); -    if (result == NULL) { -        return NULL; -    } -    result_s = STRINGLIB_STR(result); - -    start = self_s; -    end = self_s + self_len; -    while (count-- > 0) { -        offset = stringlib_find(start, end - start, -                                from_s, from_len, -                                0); -        if (offset == -1) -            break; -        next = start + offset; - -        memcpy(result_s, start, next - start); - -        result_s += (next - start); -        start = next + from_len; -    } -    memcpy(result_s, start, end - start); -    return result; -} - -/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */ -static PyObject * -stringlib_replace_single_character_in_place(PyObject *self, -                                            char from_c, char to_c, -                                            Py_ssize_t maxcount) -{ -    const char *self_s, *end; -    char *result_s, *start, *next; -    Py_ssize_t self_len; -    PyObject *result; - -    /* The result string will be the same size */ -    self_s = STRINGLIB_STR(self); -    self_len = STRINGLIB_LEN(self); - -    next = findchar(self_s, self_len, from_c); - -    if (next == NULL) { -        /* No matches; return the original bytes */ -        return return_self(self); -    } - -    /* Need to make a new bytes */ -    result = STRINGLIB_NEW(NULL, self_len); -    if (result == NULL) { -        return NULL; -    } -    result_s = STRINGLIB_STR(result); -    memcpy(result_s, self_s, self_len); - -    /* change everything in-place, starting with this one */ -    start =  result_s + (next - self_s); -    *start = to_c; -    start++; -    end = result_s + self_len; - -    while (--maxcount > 0) { -        next = findchar(start, end - start, from_c); -        if (next == NULL) -            break; -        *next = to_c; -        start = next + 1; -    } - -    return result; -} - -/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */ -static PyObject * -stringlib_replace_substring_in_place(PyObject *self, -                                     const char *from_s, Py_ssize_t from_len, -                                     const char *to_s, Py_ssize_t to_len, -                                     Py_ssize_t maxcount) -{ -    const char *self_s, *end; -    char *result_s, *start; -    Py_ssize_t self_len, offset; -    PyObject *result; - -    /* The result bytes will be the same size */ - -    self_s = STRINGLIB_STR(self); -    self_len = STRINGLIB_LEN(self); - -    offset = stringlib_find(self_s, self_len, -                            from_s, from_len, -                            0); -    if (offset == -1) { -        /* No matches; return the original bytes */ -        return return_self(self); -    } - -    /* Need to make a new bytes */ -    result = STRINGLIB_NEW(NULL, self_len); -    if (result == NULL) { -        return NULL; -    } -    result_s = STRINGLIB_STR(result); -    memcpy(result_s, self_s, self_len); - -    /* change everything in-place, starting with this one */ -    start =  result_s + offset; -    memcpy(start, to_s, from_len); -    start += from_len; -    end = result_s + self_len; - -    while ( --maxcount > 0) { -        offset = stringlib_find(start, end - start, -                                from_s, from_len, -                                0); -        if (offset == -1) -            break; -        memcpy(start + offset, to_s, from_len); -        start += offset + from_len; -    } - -    return result; -} - -/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */ -static PyObject * -stringlib_replace_single_character(PyObject *self, -                                   char from_c, -                                   const char *to_s, Py_ssize_t to_len, -                                   Py_ssize_t maxcount) -{ -    const char *self_s, *start, *next, *end; -    char *result_s; -    Py_ssize_t self_len, result_len; -    Py_ssize_t count; -    PyObject *result; - -    self_s = STRINGLIB_STR(self); -    self_len = STRINGLIB_LEN(self); - -    count = countchar(self_s, self_len, from_c, maxcount); -    if (count == 0) { -        /* no matches, return unchanged */ -        return return_self(self); -    } - -    /* use the difference between current and new, hence the "-1" */ -    /*   result_len = self_len + count * (to_len-1)  */ -    assert(count > 0); -    if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) { -        PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); -        return NULL; -    } -    result_len = self_len + count * (to_len - 1); - -    result = STRINGLIB_NEW(NULL, result_len); -    if (result == NULL) { -        return NULL; -    } -    result_s = STRINGLIB_STR(result); - -    start = self_s; -    end = self_s + self_len; -    while (count-- > 0) { -        next = findchar(start, end - start, from_c); -        if (next == NULL) -            break; - -        if (next == start) { -            /* replace with the 'to' */ -            memcpy(result_s, to_s, to_len); -            result_s += to_len; -            start += 1; -        } else { -            /* copy the unchanged old then the 'to' */ -            memcpy(result_s, start, next - start); -            result_s += (next - start); -            memcpy(result_s, to_s, to_len); -            result_s += to_len; -            start = next + 1; -        } -    } -    /* Copy the remainder of the remaining bytes */ -    memcpy(result_s, start, end - start); - -    return result; -} - -/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */ -static PyObject * -stringlib_replace_substring(PyObject *self, -                            const char *from_s, Py_ssize_t from_len, -                            const char *to_s, Py_ssize_t to_len, -                            Py_ssize_t maxcount) -{ -    const char *self_s, *start, *next, *end; -    char *result_s; -    Py_ssize_t self_len, result_len; -    Py_ssize_t count, offset; -    PyObject *result; - -    self_s = STRINGLIB_STR(self); -    self_len = STRINGLIB_LEN(self); - -    count = stringlib_count(self_s, self_len, -                            from_s, from_len, -                            maxcount); - -    if (count == 0) { -        /* no matches, return unchanged */ -        return return_self(self); -    } - -    /* Check for overflow */ -    /*    result_len = self_len + count * (to_len-from_len) */ -    assert(count > 0); -    if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) { -        PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); -        return NULL; -    } -    result_len = self_len + count * (to_len - from_len); - -    result = STRINGLIB_NEW(NULL, result_len); -    if (result == NULL) { -        return NULL; -    } -    result_s = STRINGLIB_STR(result); - -    start = self_s; -    end = self_s + self_len; -    while (count-- > 0) { -        offset = stringlib_find(start, end - start, -                                from_s, from_len, -                                0); -        if (offset == -1) -            break; -        next = start + offset; -        if (next == start) { -            /* replace with the 'to' */ -            memcpy(result_s, to_s, to_len); -            result_s += to_len; -            start += from_len; -        } else { -            /* copy the unchanged old then the 'to' */ -            memcpy(result_s, start, next - start); -            result_s += (next - start); -            memcpy(result_s, to_s, to_len); -            result_s += to_len; -            start = next + from_len; -        } -    } -    /* Copy the remainder of the remaining bytes */ -    memcpy(result_s, start, end - start); - -    return result; -} - - -static PyObject * -stringlib_replace(PyObject *self, -                  const char *from_s, Py_ssize_t from_len, -                  const char *to_s, Py_ssize_t to_len, -                  Py_ssize_t maxcount) -{ +{  +    Py_ssize_t fill;  +    PyObject *s;  +    char *p;  +  +    if (STRINGLIB_LEN(self) >= width) {  +        return return_self(self);  +    }  +  +    fill = width - STRINGLIB_LEN(self);  +  +    s = pad(self, fill, 0, '0');  +  +    if (s == NULL)  +        return NULL;  +  +    p = STRINGLIB_STR(s);  +    if (p[fill] == '+' || p[fill] == '-') {  +        /* move sign to beginning of string */  +        p[0] = p[fill];  +        p[fill] = '0';  +    }  +  +    return s;  +}  +  +  +/* find and count characters and substrings */  +  +#define findchar(target, target_len, c)                         \  +  ((char *)memchr((const void *)(target), c, target_len))  +  +  +static Py_ssize_t  +countchar(const char *target, Py_ssize_t target_len, char c,  +          Py_ssize_t maxcount)  +{  +    Py_ssize_t count = 0;  +    const char *start = target;  +    const char *end = target + target_len;  +  +    while ((start = findchar(start, end - start, c)) != NULL) {  +        count++;  +        if (count >= maxcount)  +            break;  +        start += 1;  +    }  +    return count;  +}  +  +  +/* Algorithms for different cases of string replacement */  +  +/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */  +static PyObject *  +stringlib_replace_interleave(PyObject *self,  +                             const char *to_s, Py_ssize_t to_len,  +                             Py_ssize_t maxcount)  +{  +    const char *self_s;  +    char *result_s;  +    Py_ssize_t self_len, result_len;  +    Py_ssize_t count, i;  +    PyObject *result;  +  +    self_len = STRINGLIB_LEN(self);  +  +    /* 1 at the end plus 1 after every character;  +       count = min(maxcount, self_len + 1) */  +    if (maxcount <= self_len) {  +        count = maxcount;  +    }  +    else {  +        /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */  +        count = self_len + 1;  +    }  +  +    /* Check for overflow */  +    /*   result_len = count * to_len + self_len; */  +    assert(count > 0);  +    if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {  +        PyErr_SetString(PyExc_OverflowError,  +                        "replace bytes is too long");  +        return NULL;  +    }  +    result_len = count * to_len + self_len;  +    result = STRINGLIB_NEW(NULL, result_len);  +    if (result == NULL) {  +        return NULL;  +    }  +  +    self_s = STRINGLIB_STR(self);  +    result_s = STRINGLIB_STR(result);  +  +    if (to_len > 1) {  +        /* Lay the first one down (guaranteed this will occur) */  +        memcpy(result_s, to_s, to_len);  +        result_s += to_len;  +        count -= 1;  +  +        for (i = 0; i < count; i++) {  +            *result_s++ = *self_s++;  +            memcpy(result_s, to_s, to_len);  +            result_s += to_len;  +        }  +    }  +    else {  +        result_s[0] = to_s[0];  +        result_s += to_len;  +        count -= 1;  +        for (i = 0; i < count; i++) {  +            *result_s++ = *self_s++;  +            result_s[0] = to_s[0];  +            result_s += to_len;  +        }  +    }  +  +    /* Copy the rest of the original string */  +    memcpy(result_s, self_s, self_len - i);  +  +    return result;  +}  +  +/* Special case for deleting a single character */  +/* len(self)>=1, len(from)==1, to="", maxcount>=1 */  +static PyObject *  +stringlib_replace_delete_single_character(PyObject *self,  +                                          char from_c, Py_ssize_t maxcount)  +{  +    const char *self_s, *start, *next, *end;  +    char *result_s;  +    Py_ssize_t self_len, result_len;  +    Py_ssize_t count;  +    PyObject *result;  +  +    self_len = STRINGLIB_LEN(self);  +    self_s = STRINGLIB_STR(self);  +  +    count = countchar(self_s, self_len, from_c, maxcount);  +    if (count == 0) {  +        return return_self(self);  +    }  +  +    result_len = self_len - count;  /* from_len == 1 */  +    assert(result_len>=0);  +  +    result = STRINGLIB_NEW(NULL, result_len);  +    if (result == NULL) {  +        return NULL;  +    }  +    result_s = STRINGLIB_STR(result);  +  +    start = self_s;  +    end = self_s + self_len;  +    while (count-- > 0) {  +        next = findchar(start, end - start, from_c);  +        if (next == NULL)  +            break;  +        memcpy(result_s, start, next - start);  +        result_s += (next - start);  +        start = next + 1;  +    }  +    memcpy(result_s, start, end - start);  +  +    return result;  +}  +  +/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */  +  +static PyObject *  +stringlib_replace_delete_substring(PyObject *self,  +                                   const char *from_s, Py_ssize_t from_len,  +                                   Py_ssize_t maxcount)  +{  +    const char *self_s, *start, *next, *end;  +    char *result_s;  +    Py_ssize_t self_len, result_len;  +    Py_ssize_t count, offset;  +    PyObject *result;  +  +    self_len = STRINGLIB_LEN(self);  +    self_s = STRINGLIB_STR(self);  +  +    count = stringlib_count(self_s, self_len,  +                            from_s, from_len,  +                            maxcount);  +  +    if (count == 0) {  +        /* no matches */  +        return return_self(self);  +    }  +  +    result_len = self_len - (count * from_len);  +    assert (result_len>=0);  +  +    result = STRINGLIB_NEW(NULL, result_len);  +    if (result == NULL) {  +        return NULL;  +    }  +    result_s = STRINGLIB_STR(result);  +  +    start = self_s;  +    end = self_s + self_len;  +    while (count-- > 0) {  +        offset = stringlib_find(start, end - start,  +                                from_s, from_len,  +                                0);  +        if (offset == -1)  +            break;  +        next = start + offset;  +  +        memcpy(result_s, start, next - start);  +  +        result_s += (next - start);  +        start = next + from_len;  +    }  +    memcpy(result_s, start, end - start);  +    return result;  +}  +  +/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */  +static PyObject *  +stringlib_replace_single_character_in_place(PyObject *self,  +                                            char from_c, char to_c,  +                                            Py_ssize_t maxcount)  +{  +    const char *self_s, *end;  +    char *result_s, *start, *next;  +    Py_ssize_t self_len;  +    PyObject *result;  +  +    /* The result string will be the same size */  +    self_s = STRINGLIB_STR(self);  +    self_len = STRINGLIB_LEN(self);  +  +    next = findchar(self_s, self_len, from_c);  +  +    if (next == NULL) {  +        /* No matches; return the original bytes */  +        return return_self(self);  +    }  +  +    /* Need to make a new bytes */  +    result = STRINGLIB_NEW(NULL, self_len);  +    if (result == NULL) {  +        return NULL;  +    }  +    result_s = STRINGLIB_STR(result);  +    memcpy(result_s, self_s, self_len);  +  +    /* change everything in-place, starting with this one */  +    start =  result_s + (next - self_s);  +    *start = to_c;  +    start++;  +    end = result_s + self_len;  +  +    while (--maxcount > 0) {  +        next = findchar(start, end - start, from_c);  +        if (next == NULL)  +            break;  +        *next = to_c;  +        start = next + 1;  +    }  +  +    return result;  +}  +  +/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */  +static PyObject *  +stringlib_replace_substring_in_place(PyObject *self,  +                                     const char *from_s, Py_ssize_t from_len,  +                                     const char *to_s, Py_ssize_t to_len,  +                                     Py_ssize_t maxcount)  +{  +    const char *self_s, *end;  +    char *result_s, *start;  +    Py_ssize_t self_len, offset;  +    PyObject *result;  +  +    /* The result bytes will be the same size */  +  +    self_s = STRINGLIB_STR(self);  +    self_len = STRINGLIB_LEN(self);  +  +    offset = stringlib_find(self_s, self_len,  +                            from_s, from_len,  +                            0);  +    if (offset == -1) {  +        /* No matches; return the original bytes */  +        return return_self(self);  +    }  +  +    /* Need to make a new bytes */  +    result = STRINGLIB_NEW(NULL, self_len);  +    if (result == NULL) {  +        return NULL;  +    }  +    result_s = STRINGLIB_STR(result);  +    memcpy(result_s, self_s, self_len);  +  +    /* change everything in-place, starting with this one */  +    start =  result_s + offset;  +    memcpy(start, to_s, from_len);  +    start += from_len;  +    end = result_s + self_len;  +  +    while ( --maxcount > 0) {  +        offset = stringlib_find(start, end - start,  +                                from_s, from_len,  +                                0);  +        if (offset == -1)  +            break;  +        memcpy(start + offset, to_s, from_len);  +        start += offset + from_len;  +    }  +  +    return result;  +}  +  +/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */  +static PyObject *  +stringlib_replace_single_character(PyObject *self,  +                                   char from_c,  +                                   const char *to_s, Py_ssize_t to_len,  +                                   Py_ssize_t maxcount)  +{  +    const char *self_s, *start, *next, *end;  +    char *result_s;  +    Py_ssize_t self_len, result_len;  +    Py_ssize_t count;  +    PyObject *result;  +  +    self_s = STRINGLIB_STR(self);  +    self_len = STRINGLIB_LEN(self);  +  +    count = countchar(self_s, self_len, from_c, maxcount);  +    if (count == 0) {  +        /* no matches, return unchanged */  +        return return_self(self);  +    }  +  +    /* use the difference between current and new, hence the "-1" */  +    /*   result_len = self_len + count * (to_len-1)  */  +    assert(count > 0);  +    if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {  +        PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");  +        return NULL;  +    }  +    result_len = self_len + count * (to_len - 1);  +  +    result = STRINGLIB_NEW(NULL, result_len);  +    if (result == NULL) {  +        return NULL;  +    }  +    result_s = STRINGLIB_STR(result);  +  +    start = self_s;  +    end = self_s + self_len;  +    while (count-- > 0) {  +        next = findchar(start, end - start, from_c);  +        if (next == NULL)  +            break;  +  +        if (next == start) {  +            /* replace with the 'to' */  +            memcpy(result_s, to_s, to_len);  +            result_s += to_len;  +            start += 1;  +        } else {  +            /* copy the unchanged old then the 'to' */  +            memcpy(result_s, start, next - start);  +            result_s += (next - start);  +            memcpy(result_s, to_s, to_len);  +            result_s += to_len;  +            start = next + 1;  +        }  +    }  +    /* Copy the remainder of the remaining bytes */  +    memcpy(result_s, start, end - start);  +  +    return result;  +}  +  +/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */  +static PyObject *  +stringlib_replace_substring(PyObject *self,  +                            const char *from_s, Py_ssize_t from_len,  +                            const char *to_s, Py_ssize_t to_len,  +                            Py_ssize_t maxcount)  +{  +    const char *self_s, *start, *next, *end;  +    char *result_s;  +    Py_ssize_t self_len, result_len;  +    Py_ssize_t count, offset;  +    PyObject *result;  +  +    self_s = STRINGLIB_STR(self);  +    self_len = STRINGLIB_LEN(self);  +  +    count = stringlib_count(self_s, self_len,  +                            from_s, from_len,  +                            maxcount);  +  +    if (count == 0) {  +        /* no matches, return unchanged */  +        return return_self(self);  +    }  +  +    /* Check for overflow */  +    /*    result_len = self_len + count * (to_len-from_len) */  +    assert(count > 0);  +    if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {  +        PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");  +        return NULL;  +    }  +    result_len = self_len + count * (to_len - from_len);  +  +    result = STRINGLIB_NEW(NULL, result_len);  +    if (result == NULL) {  +        return NULL;  +    }  +    result_s = STRINGLIB_STR(result);  +  +    start = self_s;  +    end = self_s + self_len;  +    while (count-- > 0) {  +        offset = stringlib_find(start, end - start,  +                                from_s, from_len,  +                                0);  +        if (offset == -1)  +            break;  +        next = start + offset;  +        if (next == start) {  +            /* replace with the 'to' */  +            memcpy(result_s, to_s, to_len);  +            result_s += to_len;  +            start += from_len;  +        } else {  +            /* copy the unchanged old then the 'to' */  +            memcpy(result_s, start, next - start);  +            result_s += (next - start);  +            memcpy(result_s, to_s, to_len);  +            result_s += to_len;  +            start = next + from_len;  +        }  +    }  +    /* Copy the remainder of the remaining bytes */  +    memcpy(result_s, start, end - start);  +  +    return result;  +}  +  +  +static PyObject *  +stringlib_replace(PyObject *self,  +                  const char *from_s, Py_ssize_t from_len,  +                  const char *to_s, Py_ssize_t to_len,  +                  Py_ssize_t maxcount)  +{       if (STRINGLIB_LEN(self) < from_len) {          /* nothing to do; return the original bytes */          return return_self(self);      } -    if (maxcount < 0) { -        maxcount = PY_SSIZE_T_MAX; +    if (maxcount < 0) {  +        maxcount = PY_SSIZE_T_MAX;       } else if (maxcount == 0) { -        /* nothing to do; return the original bytes */ -        return return_self(self); -    } - -    /* Handle zero-length special cases */ -    if (from_len == 0) { -        if (to_len == 0) { -            /* nothing to do; return the original bytes */ -            return return_self(self); -        } -        /* insert the 'to' bytes everywhere.    */ -        /*    >>> b"Python".replace(b"", b".")  */ -        /*    b'.P.y.t.h.o.n.'                  */ -        return stringlib_replace_interleave(self, to_s, to_len, maxcount); -    } - -    if (to_len == 0) { -        /* delete all occurrences of 'from' bytes */ -        if (from_len == 1) { -            return stringlib_replace_delete_single_character( -                self, from_s[0], maxcount); -        } else { -            return stringlib_replace_delete_substring( -                self, from_s, from_len, maxcount); -        } -    } - -    /* Handle special case where both bytes have the same length */ - -    if (from_len == to_len) { -        if (from_len == 1) { -            return stringlib_replace_single_character_in_place( -                self, from_s[0], to_s[0], maxcount); -        } else { -            return stringlib_replace_substring_in_place( -                self, from_s, from_len, to_s, to_len, maxcount); -        } -    } - -    /* Otherwise use the more generic algorithms */ -    if (from_len == 1) { -        return stringlib_replace_single_character( -            self, from_s[0], to_s, to_len, maxcount); -    } else { -        /* len('from')>=2, len('to')>=1 */ -        return stringlib_replace_substring( -            self, from_s, from_len, to_s, to_len, maxcount); -    } -} - -#undef findchar +        /* nothing to do; return the original bytes */  +        return return_self(self);  +    }  +  +    /* Handle zero-length special cases */  +    if (from_len == 0) {  +        if (to_len == 0) {  +            /* nothing to do; return the original bytes */  +            return return_self(self);  +        }  +        /* insert the 'to' bytes everywhere.    */  +        /*    >>> b"Python".replace(b"", b".")  */  +        /*    b'.P.y.t.h.o.n.'                  */  +        return stringlib_replace_interleave(self, to_s, to_len, maxcount);  +    }  +  +    if (to_len == 0) {  +        /* delete all occurrences of 'from' bytes */  +        if (from_len == 1) {  +            return stringlib_replace_delete_single_character(  +                self, from_s[0], maxcount);  +        } else {  +            return stringlib_replace_delete_substring(  +                self, from_s, from_len, maxcount);  +        }  +    }  +  +    /* Handle special case where both bytes have the same length */  +  +    if (from_len == to_len) {  +        if (from_len == 1) {  +            return stringlib_replace_single_character_in_place(  +                self, from_s[0], to_s[0], maxcount);  +        } else {  +            return stringlib_replace_substring_in_place(  +                self, from_s, from_len, to_s, to_len, maxcount);  +        }  +    }  +  +    /* Otherwise use the more generic algorithms */  +    if (from_len == 1) {  +        return stringlib_replace_single_character(  +            self, from_s[0], to_s, to_len, maxcount);  +    } else {  +        /* len('from')>=2, len('to')>=1 */  +        return stringlib_replace_substring(  +            self, from_s, from_len, to_s, to_len, maxcount);  +    }  +}  +  +#undef findchar  diff --git a/contrib/tools/python3/src/Objects/stringlib/ucs1lib.h b/contrib/tools/python3/src/Objects/stringlib/ucs1lib.h index bc4b104f112..e1776bd840e 100644 --- a/contrib/tools/python3/src/Objects/stringlib/ucs1lib.h +++ b/contrib/tools/python3/src/Objects/stringlib/ucs1lib.h @@ -1,26 +1,26 @@ -/* this is sort of a hack.  there's at least one place (formatting -   floats) where some stringlib code takes a different path if it's -   compiled as unicode. */ -#define STRINGLIB_IS_UNICODE     1 - -#define FASTSEARCH               ucs1lib_fastsearch -#define STRINGLIB(F)             ucs1lib_##F -#define STRINGLIB_OBJECT         PyUnicodeObject -#define STRINGLIB_SIZEOF_CHAR    1 -#define STRINGLIB_MAX_CHAR       0xFFu -#define STRINGLIB_CHAR           Py_UCS1 -#define STRINGLIB_TYPE_NAME      "unicode" -#define STRINGLIB_PARSE_CODE     "U" -#define STRINGLIB_EMPTY          unicode_empty -#define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE -#define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK -#define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL -#define STRINGLIB_TODECIMAL      Py_UNICODE_TODECIMAL -#define STRINGLIB_STR            PyUnicode_1BYTE_DATA -#define STRINGLIB_LEN            PyUnicode_GET_LENGTH -#define STRINGLIB_NEW            _PyUnicode_FromUCS1 -#define STRINGLIB_CHECK          PyUnicode_Check -#define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact - -#define STRINGLIB_TOSTR          PyObject_Str -#define STRINGLIB_TOASCII        PyObject_ASCII +/* this is sort of a hack.  there's at least one place (formatting  +   floats) where some stringlib code takes a different path if it's  +   compiled as unicode. */  +#define STRINGLIB_IS_UNICODE     1  +  +#define FASTSEARCH               ucs1lib_fastsearch  +#define STRINGLIB(F)             ucs1lib_##F  +#define STRINGLIB_OBJECT         PyUnicodeObject  +#define STRINGLIB_SIZEOF_CHAR    1  +#define STRINGLIB_MAX_CHAR       0xFFu  +#define STRINGLIB_CHAR           Py_UCS1  +#define STRINGLIB_TYPE_NAME      "unicode"  +#define STRINGLIB_PARSE_CODE     "U"  +#define STRINGLIB_EMPTY          unicode_empty  +#define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE  +#define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK  +#define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL  +#define STRINGLIB_TODECIMAL      Py_UNICODE_TODECIMAL  +#define STRINGLIB_STR            PyUnicode_1BYTE_DATA  +#define STRINGLIB_LEN            PyUnicode_GET_LENGTH  +#define STRINGLIB_NEW            _PyUnicode_FromUCS1  +#define STRINGLIB_CHECK          PyUnicode_Check  +#define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact  +  +#define STRINGLIB_TOSTR          PyObject_Str  +#define STRINGLIB_TOASCII        PyObject_ASCII  diff --git a/contrib/tools/python3/src/Objects/stringlib/ucs2lib.h b/contrib/tools/python3/src/Objects/stringlib/ucs2lib.h index 86a1dff1b56..36efc362686 100644 --- a/contrib/tools/python3/src/Objects/stringlib/ucs2lib.h +++ b/contrib/tools/python3/src/Objects/stringlib/ucs2lib.h @@ -1,26 +1,26 @@ -/* this is sort of a hack.  there's at least one place (formatting -   floats) where some stringlib code takes a different path if it's -   compiled as unicode. */ -#define STRINGLIB_IS_UNICODE     1 - -#define FASTSEARCH               ucs2lib_fastsearch -#define STRINGLIB(F)             ucs2lib_##F -#define STRINGLIB_OBJECT         PyUnicodeObject -#define STRINGLIB_SIZEOF_CHAR    2 -#define STRINGLIB_MAX_CHAR       0xFFFFu -#define STRINGLIB_CHAR           Py_UCS2 -#define STRINGLIB_TYPE_NAME      "unicode" -#define STRINGLIB_PARSE_CODE     "U" -#define STRINGLIB_EMPTY          unicode_empty -#define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE -#define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK -#define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL -#define STRINGLIB_TODECIMAL      Py_UNICODE_TODECIMAL -#define STRINGLIB_STR            PyUnicode_2BYTE_DATA -#define STRINGLIB_LEN            PyUnicode_GET_LENGTH -#define STRINGLIB_NEW            _PyUnicode_FromUCS2 -#define STRINGLIB_CHECK          PyUnicode_Check -#define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact - -#define STRINGLIB_TOSTR          PyObject_Str -#define STRINGLIB_TOASCII        PyObject_ASCII +/* this is sort of a hack.  there's at least one place (formatting  +   floats) where some stringlib code takes a different path if it's  +   compiled as unicode. */  +#define STRINGLIB_IS_UNICODE     1  +  +#define FASTSEARCH               ucs2lib_fastsearch  +#define STRINGLIB(F)             ucs2lib_##F  +#define STRINGLIB_OBJECT         PyUnicodeObject  +#define STRINGLIB_SIZEOF_CHAR    2  +#define STRINGLIB_MAX_CHAR       0xFFFFu  +#define STRINGLIB_CHAR           Py_UCS2  +#define STRINGLIB_TYPE_NAME      "unicode"  +#define STRINGLIB_PARSE_CODE     "U"  +#define STRINGLIB_EMPTY          unicode_empty  +#define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE  +#define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK  +#define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL  +#define STRINGLIB_TODECIMAL      Py_UNICODE_TODECIMAL  +#define STRINGLIB_STR            PyUnicode_2BYTE_DATA  +#define STRINGLIB_LEN            PyUnicode_GET_LENGTH  +#define STRINGLIB_NEW            _PyUnicode_FromUCS2  +#define STRINGLIB_CHECK          PyUnicode_Check  +#define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact  +  +#define STRINGLIB_TOSTR          PyObject_Str  +#define STRINGLIB_TOASCII        PyObject_ASCII  diff --git a/contrib/tools/python3/src/Objects/stringlib/ucs4lib.h b/contrib/tools/python3/src/Objects/stringlib/ucs4lib.h index 3c32a93c96a..86e7fa630f3 100644 --- a/contrib/tools/python3/src/Objects/stringlib/ucs4lib.h +++ b/contrib/tools/python3/src/Objects/stringlib/ucs4lib.h @@ -1,27 +1,27 @@ -/* this is sort of a hack.  there's at least one place (formatting -   floats) where some stringlib code takes a different path if it's -   compiled as unicode. */ -#define STRINGLIB_IS_UNICODE     1 - -#define FASTSEARCH               ucs4lib_fastsearch -#define STRINGLIB(F)             ucs4lib_##F -#define STRINGLIB_OBJECT         PyUnicodeObject -#define STRINGLIB_SIZEOF_CHAR    4 -#define STRINGLIB_MAX_CHAR       0x10FFFFu -#define STRINGLIB_CHAR           Py_UCS4 -#define STRINGLIB_TYPE_NAME      "unicode" -#define STRINGLIB_PARSE_CODE     "U" -#define STRINGLIB_EMPTY          unicode_empty -#define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE -#define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK -#define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL -#define STRINGLIB_TODECIMAL      Py_UNICODE_TODECIMAL -#define STRINGLIB_STR            PyUnicode_4BYTE_DATA -#define STRINGLIB_LEN            PyUnicode_GET_LENGTH -#define STRINGLIB_NEW            _PyUnicode_FromUCS4 -#define STRINGLIB_CHECK          PyUnicode_Check -#define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact - -#define STRINGLIB_TOSTR          PyObject_Str -#define STRINGLIB_TOASCII        PyObject_ASCII - +/* this is sort of a hack.  there's at least one place (formatting  +   floats) where some stringlib code takes a different path if it's  +   compiled as unicode. */  +#define STRINGLIB_IS_UNICODE     1  +  +#define FASTSEARCH               ucs4lib_fastsearch  +#define STRINGLIB(F)             ucs4lib_##F  +#define STRINGLIB_OBJECT         PyUnicodeObject  +#define STRINGLIB_SIZEOF_CHAR    4  +#define STRINGLIB_MAX_CHAR       0x10FFFFu  +#define STRINGLIB_CHAR           Py_UCS4  +#define STRINGLIB_TYPE_NAME      "unicode"  +#define STRINGLIB_PARSE_CODE     "U"  +#define STRINGLIB_EMPTY          unicode_empty  +#define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE  +#define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK  +#define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL  +#define STRINGLIB_TODECIMAL      Py_UNICODE_TODECIMAL  +#define STRINGLIB_STR            PyUnicode_4BYTE_DATA  +#define STRINGLIB_LEN            PyUnicode_GET_LENGTH  +#define STRINGLIB_NEW            _PyUnicode_FromUCS4  +#define STRINGLIB_CHECK          PyUnicode_Check  +#define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact  +  +#define STRINGLIB_TOSTR          PyObject_Str  +#define STRINGLIB_TOASCII        PyObject_ASCII  +  diff --git a/contrib/tools/python3/src/Objects/stringlib/undef.h b/contrib/tools/python3/src/Objects/stringlib/undef.h index c41e254fde6..53f74b4371b 100644 --- a/contrib/tools/python3/src/Objects/stringlib/undef.h +++ b/contrib/tools/python3/src/Objects/stringlib/undef.h @@ -1,10 +1,10 @@ -#undef  FASTSEARCH -#undef  STRINGLIB -#undef  STRINGLIB_SIZEOF_CHAR -#undef  STRINGLIB_MAX_CHAR -#undef  STRINGLIB_CHAR -#undef  STRINGLIB_STR -#undef  STRINGLIB_LEN -#undef  STRINGLIB_NEW -#undef STRINGLIB_IS_UNICODE - +#undef  FASTSEARCH  +#undef  STRINGLIB  +#undef  STRINGLIB_SIZEOF_CHAR  +#undef  STRINGLIB_MAX_CHAR  +#undef  STRINGLIB_CHAR  +#undef  STRINGLIB_STR  +#undef  STRINGLIB_LEN  +#undef  STRINGLIB_NEW  +#undef STRINGLIB_IS_UNICODE  +  diff --git a/contrib/tools/python3/src/Objects/stringlib/unicode_format.h b/contrib/tools/python3/src/Objects/stringlib/unicode_format.h index b526ad21b82..96b820d0a4c 100644 --- a/contrib/tools/python3/src/Objects/stringlib/unicode_format.h +++ b/contrib/tools/python3/src/Objects/stringlib/unicode_format.h @@ -1,445 +1,445 @@ -/* -    unicode_format.h -- implementation of str.format(). -*/ - -/************************************************************************/ -/***********   Global data structures and forward declarations  *********/ -/************************************************************************/ - -/* -   A SubString consists of the characters between two string or -   unicode pointers. -*/ -typedef struct { -    PyObject *str; /* borrowed reference */ -    Py_ssize_t start, end; -} SubString; - - -typedef enum { -    ANS_INIT, -    ANS_AUTO, -    ANS_MANUAL -} AutoNumberState;   /* Keep track if we're auto-numbering fields */ - -/* Keeps track of our auto-numbering state, and which number field we're on */ -typedef struct { -    AutoNumberState an_state; -    int an_field_number; -} AutoNumber; - - -/* forward declaration for recursion */ -static PyObject * -build_string(SubString *input, PyObject *args, PyObject *kwargs, -             int recursion_depth, AutoNumber *auto_number); - - - -/************************************************************************/ -/**************************  Utility  functions  ************************/ -/************************************************************************/ - -static void -AutoNumber_Init(AutoNumber *auto_number) -{ -    auto_number->an_state = ANS_INIT; -    auto_number->an_field_number = 0; -} - -/* fill in a SubString from a pointer and length */ -Py_LOCAL_INLINE(void) -SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end) -{ -    str->str = s; -    str->start = start; -    str->end = end; -} - -/* return a new string.  if str->str is NULL, return None */ -Py_LOCAL_INLINE(PyObject *) -SubString_new_object(SubString *str) -{ -    if (str->str == NULL) -        Py_RETURN_NONE; -    return PyUnicode_Substring(str->str, str->start, str->end); -} - -/* return a new string.  if str->str is NULL, return a new empty string */ -Py_LOCAL_INLINE(PyObject *) -SubString_new_object_or_empty(SubString *str) -{ -    if (str->str == NULL) { -        return PyUnicode_New(0, 0); -    } -    return SubString_new_object(str); -} - -/* Return 1 if an error has been detected switching between automatic -   field numbering and manual field specification, else return 0. Set -   ValueError on error. */ -static int -autonumber_state_error(AutoNumberState state, int field_name_is_empty) -{ -    if (state == ANS_MANUAL) { -        if (field_name_is_empty) { -            PyErr_SetString(PyExc_ValueError, "cannot switch from " -                            "manual field specification to " -                            "automatic field numbering"); -            return 1; -        } -    } -    else { -        if (!field_name_is_empty) { -            PyErr_SetString(PyExc_ValueError, "cannot switch from " -                            "automatic field numbering to " -                            "manual field specification"); -            return 1; -        } -    } -    return 0; -} - - -/************************************************************************/ -/***********  Format string parsing -- integers and identifiers *********/ -/************************************************************************/ - -static Py_ssize_t -get_integer(const SubString *str) -{ -    Py_ssize_t accumulator = 0; -    Py_ssize_t digitval; -    Py_ssize_t i; - -    /* empty string is an error */ -    if (str->start >= str->end) -        return -1; - -    for (i = str->start; i < str->end; i++) { -        digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i)); -        if (digitval < 0) -            return -1; -        /* -           Detect possible overflow before it happens: - -              accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if -              accumulator > (PY_SSIZE_T_MAX - digitval) / 10. -        */ -        if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) { -            PyErr_Format(PyExc_ValueError, -                         "Too many decimal digits in format string"); -            return -1; -        } -        accumulator = accumulator * 10 + digitval; -    } -    return accumulator; -} - -/************************************************************************/ -/******** Functions to get field objects and specification strings ******/ -/************************************************************************/ - -/* do the equivalent of obj.name */ -static PyObject * -getattr(PyObject *obj, SubString *name) -{ -    PyObject *newobj; -    PyObject *str = SubString_new_object(name); -    if (str == NULL) -        return NULL; -    newobj = PyObject_GetAttr(obj, str); -    Py_DECREF(str); -    return newobj; -} - -/* do the equivalent of obj[idx], where obj is a sequence */ -static PyObject * -getitem_sequence(PyObject *obj, Py_ssize_t idx) -{ -    return PySequence_GetItem(obj, idx); -} - -/* do the equivalent of obj[idx], where obj is not a sequence */ -static PyObject * -getitem_idx(PyObject *obj, Py_ssize_t idx) -{ -    PyObject *newobj; -    PyObject *idx_obj = PyLong_FromSsize_t(idx); -    if (idx_obj == NULL) -        return NULL; -    newobj = PyObject_GetItem(obj, idx_obj); -    Py_DECREF(idx_obj); -    return newobj; -} - -/* do the equivalent of obj[name] */ -static PyObject * -getitem_str(PyObject *obj, SubString *name) -{ -    PyObject *newobj; -    PyObject *str = SubString_new_object(name); -    if (str == NULL) -        return NULL; -    newobj = PyObject_GetItem(obj, str); -    Py_DECREF(str); -    return newobj; -} - -typedef struct { -    /* the entire string we're parsing.  we assume that someone else -       is managing its lifetime, and that it will exist for the -       lifetime of the iterator.  can be empty */ -    SubString str; - -    /* index to where we are inside field_name */ -    Py_ssize_t index; -} FieldNameIterator; - - -static int -FieldNameIterator_init(FieldNameIterator *self, PyObject *s, -                       Py_ssize_t start, Py_ssize_t end) -{ -    SubString_init(&self->str, s, start, end); -    self->index = start; -    return 1; -} - -static int -_FieldNameIterator_attr(FieldNameIterator *self, SubString *name) -{ -    Py_UCS4 c; - -    name->str = self->str.str; -    name->start = self->index; - -    /* return everything until '.' or '[' */ -    while (self->index < self->str.end) { -        c = PyUnicode_READ_CHAR(self->str.str, self->index++); -        switch (c) { -        case '[': -        case '.': -            /* backup so that we this character will be seen next time */ -            self->index--; -            break; -        default: -            continue; -        } -        break; -    } -    /* end of string is okay */ -    name->end = self->index; -    return 1; -} - -static int -_FieldNameIterator_item(FieldNameIterator *self, SubString *name) -{ -    int bracket_seen = 0; -    Py_UCS4 c; - -    name->str = self->str.str; -    name->start = self->index; - -    /* return everything until ']' */ -    while (self->index < self->str.end) { -        c = PyUnicode_READ_CHAR(self->str.str, self->index++); -        switch (c) { -        case ']': -            bracket_seen = 1; -            break; -        default: -            continue; -        } -        break; -    } -    /* make sure we ended with a ']' */ -    if (!bracket_seen) { -        PyErr_SetString(PyExc_ValueError, "Missing ']' in format string"); -        return 0; -    } - -    /* end of string is okay */ -    /* don't include the ']' */ -    name->end = self->index-1; -    return 1; -} - -/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */ -static int -FieldNameIterator_next(FieldNameIterator *self, int *is_attribute, -                       Py_ssize_t *name_idx, SubString *name) -{ -    /* check at end of input */ -    if (self->index >= self->str.end) -        return 1; - -    switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) { -    case '.': -        *is_attribute = 1; -        if (_FieldNameIterator_attr(self, name) == 0) -            return 0; -        *name_idx = -1; -        break; -    case '[': -        *is_attribute = 0; -        if (_FieldNameIterator_item(self, name) == 0) -            return 0; -        *name_idx = get_integer(name); -        if (*name_idx == -1 && PyErr_Occurred()) -            return 0; -        break; -    default: -        /* Invalid character follows ']' */ -        PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may " -                        "follow ']' in format field specifier"); -        return 0; -    } - -    /* empty string is an error */ -    if (name->start == name->end) { -        PyErr_SetString(PyExc_ValueError, "Empty attribute in format string"); -        return 0; -    } - -    return 2; -} - - -/* input: field_name -   output: 'first' points to the part before the first '[' or '.' -           'first_idx' is -1 if 'first' is not an integer, otherwise -                       it's the value of first converted to an integer -           'rest' is an iterator to return the rest -*/ -static int -field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first, -                 Py_ssize_t *first_idx, FieldNameIterator *rest, -                 AutoNumber *auto_number) -{ -    Py_UCS4 c; -    Py_ssize_t i = start; -    int field_name_is_empty; -    int using_numeric_index; - -    /* find the part up until the first '.' or '[' */ -    while (i < end) { -        switch (c = PyUnicode_READ_CHAR(str, i++)) { -        case '[': -        case '.': -            /* backup so that we this character is available to the -               "rest" iterator */ -            i--; -            break; -        default: -            continue; -        } -        break; -    } - -    /* set up the return values */ -    SubString_init(first, str, start, i); -    FieldNameIterator_init(rest, str, i, end); - -    /* see if "first" is an integer, in which case it's used as an index */ -    *first_idx = get_integer(first); -    if (*first_idx == -1 && PyErr_Occurred()) -        return 0; - -    field_name_is_empty = first->start >= first->end; - -    /* If the field name is omitted or if we have a numeric index -       specified, then we're doing numeric indexing into args. */ -    using_numeric_index = field_name_is_empty || *first_idx != -1; - -    /* We always get here exactly one time for each field we're -       processing. And we get here in field order (counting by left -       braces). So this is the perfect place to handle automatic field -       numbering if the field name is omitted. */ - -    /* Check if we need to do the auto-numbering. It's not needed if -       we're called from string.Format routines, because it's handled -       in that class by itself. */ -    if (auto_number) { -        /* Initialize our auto numbering state if this is the first -           time we're either auto-numbering or manually numbering. */ -        if (auto_number->an_state == ANS_INIT && using_numeric_index) -            auto_number->an_state = field_name_is_empty ? -                ANS_AUTO : ANS_MANUAL; - -        /* Make sure our state is consistent with what we're doing -           this time through. Only check if we're using a numeric -           index. */ -        if (using_numeric_index) -            if (autonumber_state_error(auto_number->an_state, -                                       field_name_is_empty)) -                return 0; -        /* Zero length field means we want to do auto-numbering of the -           fields. */ -        if (field_name_is_empty) -            *first_idx = (auto_number->an_field_number)++; -    } - -    return 1; -} - - -/* -    get_field_object returns the object inside {}, before the -    format_spec.  It handles getindex and getattr lookups and consumes -    the entire input string. -*/ -static PyObject * -get_field_object(SubString *input, PyObject *args, PyObject *kwargs, -                 AutoNumber *auto_number) -{ -    PyObject *obj = NULL; -    int ok; -    int is_attribute; -    SubString name; -    SubString first; -    Py_ssize_t index; -    FieldNameIterator rest; - -    if (!field_name_split(input->str, input->start, input->end, &first, -                          &index, &rest, auto_number)) { -        goto error; -    } - -    if (index == -1) { -        /* look up in kwargs */ -        PyObject *key = SubString_new_object(&first); -        if (key == NULL) { -            goto error; -        } -        if (kwargs == NULL) { -            PyErr_SetObject(PyExc_KeyError, key); -            Py_DECREF(key); -            goto error; -        } -        /* Use PyObject_GetItem instead of PyDict_GetItem because this -           code is no longer just used with kwargs. It might be passed -           a non-dict when called through format_map. */ -        obj = PyObject_GetItem(kwargs, key); -        Py_DECREF(key); -        if (obj == NULL) { -            goto error; -        } -    } -    else { -        /* If args is NULL, we have a format string with a positional field -           with only kwargs to retrieve it from. This can only happen when -           used with format_map(), where positional arguments are not -           allowed. */ -        if (args == NULL) { -            PyErr_SetString(PyExc_ValueError, "Format string contains " -                            "positional fields"); -            goto error; -        } - -        /* look up in args */ -        obj = PySequence_GetItem(args, index); +/*  +    unicode_format.h -- implementation of str.format().  +*/  +  +/************************************************************************/  +/***********   Global data structures and forward declarations  *********/  +/************************************************************************/  +  +/*  +   A SubString consists of the characters between two string or  +   unicode pointers.  +*/  +typedef struct {  +    PyObject *str; /* borrowed reference */  +    Py_ssize_t start, end;  +} SubString;  +  +  +typedef enum {  +    ANS_INIT,  +    ANS_AUTO,  +    ANS_MANUAL  +} AutoNumberState;   /* Keep track if we're auto-numbering fields */  +  +/* Keeps track of our auto-numbering state, and which number field we're on */  +typedef struct {  +    AutoNumberState an_state;  +    int an_field_number;  +} AutoNumber;  +  +  +/* forward declaration for recursion */  +static PyObject *  +build_string(SubString *input, PyObject *args, PyObject *kwargs,  +             int recursion_depth, AutoNumber *auto_number);  +  +  +  +/************************************************************************/  +/**************************  Utility  functions  ************************/  +/************************************************************************/  +  +static void  +AutoNumber_Init(AutoNumber *auto_number)  +{  +    auto_number->an_state = ANS_INIT;  +    auto_number->an_field_number = 0;  +}  +  +/* fill in a SubString from a pointer and length */  +Py_LOCAL_INLINE(void)  +SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)  +{  +    str->str = s;  +    str->start = start;  +    str->end = end;  +}  +  +/* return a new string.  if str->str is NULL, return None */  +Py_LOCAL_INLINE(PyObject *)  +SubString_new_object(SubString *str)  +{  +    if (str->str == NULL)  +        Py_RETURN_NONE;  +    return PyUnicode_Substring(str->str, str->start, str->end);  +}  +  +/* return a new string.  if str->str is NULL, return a new empty string */  +Py_LOCAL_INLINE(PyObject *)  +SubString_new_object_or_empty(SubString *str)  +{  +    if (str->str == NULL) {  +        return PyUnicode_New(0, 0);  +    }  +    return SubString_new_object(str);  +}  +  +/* Return 1 if an error has been detected switching between automatic  +   field numbering and manual field specification, else return 0. Set  +   ValueError on error. */  +static int  +autonumber_state_error(AutoNumberState state, int field_name_is_empty)  +{  +    if (state == ANS_MANUAL) {  +        if (field_name_is_empty) {  +            PyErr_SetString(PyExc_ValueError, "cannot switch from "  +                            "manual field specification to "  +                            "automatic field numbering");  +            return 1;  +        }  +    }  +    else {  +        if (!field_name_is_empty) {  +            PyErr_SetString(PyExc_ValueError, "cannot switch from "  +                            "automatic field numbering to "  +                            "manual field specification");  +            return 1;  +        }  +    }  +    return 0;  +}  +  +  +/************************************************************************/  +/***********  Format string parsing -- integers and identifiers *********/  +/************************************************************************/  +  +static Py_ssize_t  +get_integer(const SubString *str)  +{  +    Py_ssize_t accumulator = 0;  +    Py_ssize_t digitval;  +    Py_ssize_t i;  +  +    /* empty string is an error */  +    if (str->start >= str->end)  +        return -1;  +  +    for (i = str->start; i < str->end; i++) {  +        digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));  +        if (digitval < 0)  +            return -1;  +        /*  +           Detect possible overflow before it happens:  +  +              accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if  +              accumulator > (PY_SSIZE_T_MAX - digitval) / 10.  +        */  +        if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {  +            PyErr_Format(PyExc_ValueError,  +                         "Too many decimal digits in format string");  +            return -1;  +        }  +        accumulator = accumulator * 10 + digitval;  +    }  +    return accumulator;  +}  +  +/************************************************************************/  +/******** Functions to get field objects and specification strings ******/  +/************************************************************************/  +  +/* do the equivalent of obj.name */  +static PyObject *  +getattr(PyObject *obj, SubString *name)  +{  +    PyObject *newobj;  +    PyObject *str = SubString_new_object(name);  +    if (str == NULL)  +        return NULL;  +    newobj = PyObject_GetAttr(obj, str);  +    Py_DECREF(str);  +    return newobj;  +}  +  +/* do the equivalent of obj[idx], where obj is a sequence */  +static PyObject *  +getitem_sequence(PyObject *obj, Py_ssize_t idx)  +{  +    return PySequence_GetItem(obj, idx);  +}  +  +/* do the equivalent of obj[idx], where obj is not a sequence */  +static PyObject *  +getitem_idx(PyObject *obj, Py_ssize_t idx)  +{  +    PyObject *newobj;  +    PyObject *idx_obj = PyLong_FromSsize_t(idx);  +    if (idx_obj == NULL)  +        return NULL;  +    newobj = PyObject_GetItem(obj, idx_obj);  +    Py_DECREF(idx_obj);  +    return newobj;  +}  +  +/* do the equivalent of obj[name] */  +static PyObject *  +getitem_str(PyObject *obj, SubString *name)  +{  +    PyObject *newobj;  +    PyObject *str = SubString_new_object(name);  +    if (str == NULL)  +        return NULL;  +    newobj = PyObject_GetItem(obj, str);  +    Py_DECREF(str);  +    return newobj;  +}  +  +typedef struct {  +    /* the entire string we're parsing.  we assume that someone else  +       is managing its lifetime, and that it will exist for the  +       lifetime of the iterator.  can be empty */  +    SubString str;  +  +    /* index to where we are inside field_name */  +    Py_ssize_t index;  +} FieldNameIterator;  +  +  +static int  +FieldNameIterator_init(FieldNameIterator *self, PyObject *s,  +                       Py_ssize_t start, Py_ssize_t end)  +{  +    SubString_init(&self->str, s, start, end);  +    self->index = start;  +    return 1;  +}  +  +static int  +_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)  +{  +    Py_UCS4 c;  +  +    name->str = self->str.str;  +    name->start = self->index;  +  +    /* return everything until '.' or '[' */  +    while (self->index < self->str.end) {  +        c = PyUnicode_READ_CHAR(self->str.str, self->index++);  +        switch (c) {  +        case '[':  +        case '.':  +            /* backup so that we this character will be seen next time */  +            self->index--;  +            break;  +        default:  +            continue;  +        }  +        break;  +    }  +    /* end of string is okay */  +    name->end = self->index;  +    return 1;  +}  +  +static int  +_FieldNameIterator_item(FieldNameIterator *self, SubString *name)  +{  +    int bracket_seen = 0;  +    Py_UCS4 c;  +  +    name->str = self->str.str;  +    name->start = self->index;  +  +    /* return everything until ']' */  +    while (self->index < self->str.end) {  +        c = PyUnicode_READ_CHAR(self->str.str, self->index++);  +        switch (c) {  +        case ']':  +            bracket_seen = 1;  +            break;  +        default:  +            continue;  +        }  +        break;  +    }  +    /* make sure we ended with a ']' */  +    if (!bracket_seen) {  +        PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");  +        return 0;  +    }  +  +    /* end of string is okay */  +    /* don't include the ']' */  +    name->end = self->index-1;  +    return 1;  +}  +  +/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */  +static int  +FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,  +                       Py_ssize_t *name_idx, SubString *name)  +{  +    /* check at end of input */  +    if (self->index >= self->str.end)  +        return 1;  +  +    switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {  +    case '.':  +        *is_attribute = 1;  +        if (_FieldNameIterator_attr(self, name) == 0)  +            return 0;  +        *name_idx = -1;  +        break;  +    case '[':  +        *is_attribute = 0;  +        if (_FieldNameIterator_item(self, name) == 0)  +            return 0;  +        *name_idx = get_integer(name);  +        if (*name_idx == -1 && PyErr_Occurred())  +            return 0;  +        break;  +    default:  +        /* Invalid character follows ']' */  +        PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "  +                        "follow ']' in format field specifier");  +        return 0;  +    }  +  +    /* empty string is an error */  +    if (name->start == name->end) {  +        PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");  +        return 0;  +    }  +  +    return 2;  +}  +  +  +/* input: field_name  +   output: 'first' points to the part before the first '[' or '.'  +           'first_idx' is -1 if 'first' is not an integer, otherwise  +                       it's the value of first converted to an integer  +           'rest' is an iterator to return the rest  +*/  +static int  +field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,  +                 Py_ssize_t *first_idx, FieldNameIterator *rest,  +                 AutoNumber *auto_number)  +{  +    Py_UCS4 c;  +    Py_ssize_t i = start;  +    int field_name_is_empty;  +    int using_numeric_index;  +  +    /* find the part up until the first '.' or '[' */  +    while (i < end) {  +        switch (c = PyUnicode_READ_CHAR(str, i++)) {  +        case '[':  +        case '.':  +            /* backup so that we this character is available to the  +               "rest" iterator */  +            i--;  +            break;  +        default:  +            continue;  +        }  +        break;  +    }  +  +    /* set up the return values */  +    SubString_init(first, str, start, i);  +    FieldNameIterator_init(rest, str, i, end);  +  +    /* see if "first" is an integer, in which case it's used as an index */  +    *first_idx = get_integer(first);  +    if (*first_idx == -1 && PyErr_Occurred())  +        return 0;  +  +    field_name_is_empty = first->start >= first->end;  +  +    /* If the field name is omitted or if we have a numeric index  +       specified, then we're doing numeric indexing into args. */  +    using_numeric_index = field_name_is_empty || *first_idx != -1;  +  +    /* We always get here exactly one time for each field we're  +       processing. And we get here in field order (counting by left  +       braces). So this is the perfect place to handle automatic field  +       numbering if the field name is omitted. */  +  +    /* Check if we need to do the auto-numbering. It's not needed if  +       we're called from string.Format routines, because it's handled  +       in that class by itself. */  +    if (auto_number) {  +        /* Initialize our auto numbering state if this is the first  +           time we're either auto-numbering or manually numbering. */  +        if (auto_number->an_state == ANS_INIT && using_numeric_index)  +            auto_number->an_state = field_name_is_empty ?  +                ANS_AUTO : ANS_MANUAL;  +  +        /* Make sure our state is consistent with what we're doing  +           this time through. Only check if we're using a numeric  +           index. */  +        if (using_numeric_index)  +            if (autonumber_state_error(auto_number->an_state,  +                                       field_name_is_empty))  +                return 0;  +        /* Zero length field means we want to do auto-numbering of the  +           fields. */  +        if (field_name_is_empty)  +            *first_idx = (auto_number->an_field_number)++;  +    }  +  +    return 1;  +}  +  +  +/*  +    get_field_object returns the object inside {}, before the  +    format_spec.  It handles getindex and getattr lookups and consumes  +    the entire input string.  +*/  +static PyObject *  +get_field_object(SubString *input, PyObject *args, PyObject *kwargs,  +                 AutoNumber *auto_number)  +{  +    PyObject *obj = NULL;  +    int ok;  +    int is_attribute;  +    SubString name;  +    SubString first;  +    Py_ssize_t index;  +    FieldNameIterator rest;  +  +    if (!field_name_split(input->str, input->start, input->end, &first,  +                          &index, &rest, auto_number)) {  +        goto error;  +    }  +  +    if (index == -1) {  +        /* look up in kwargs */  +        PyObject *key = SubString_new_object(&first);  +        if (key == NULL) {  +            goto error;  +        }  +        if (kwargs == NULL) {  +            PyErr_SetObject(PyExc_KeyError, key);  +            Py_DECREF(key);  +            goto error;  +        }  +        /* Use PyObject_GetItem instead of PyDict_GetItem because this  +           code is no longer just used with kwargs. It might be passed  +           a non-dict when called through format_map. */  +        obj = PyObject_GetItem(kwargs, key);  +        Py_DECREF(key);  +        if (obj == NULL) {  +            goto error;  +        }  +    }  +    else {  +        /* If args is NULL, we have a format string with a positional field  +           with only kwargs to retrieve it from. This can only happen when  +           used with format_map(), where positional arguments are not  +           allowed. */  +        if (args == NULL) {  +            PyErr_SetString(PyExc_ValueError, "Format string contains "  +                            "positional fields");  +            goto error;  +        }  +  +        /* look up in args */  +        obj = PySequence_GetItem(args, index);           if (obj == NULL) {              PyErr_Format(PyExc_IndexError,                           "Replacement index %zd out of range for positional " @@ -447,845 +447,845 @@ get_field_object(SubString *input, PyObject *args, PyObject *kwargs,                           index);               goto error;          } -    } - -    /* iterate over the rest of the field_name */ -    while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index, -                                        &name)) == 2) { -        PyObject *tmp; - -        if (is_attribute) -            /* getattr lookup "." */ -            tmp = getattr(obj, &name); -        else -            /* getitem lookup "[]" */ -            if (index == -1) -                tmp = getitem_str(obj, &name); -            else -                if (PySequence_Check(obj)) -                    tmp = getitem_sequence(obj, index); -                else -                    /* not a sequence */ -                    tmp = getitem_idx(obj, index); -        if (tmp == NULL) -            goto error; - -        /* assign to obj */ -        Py_DECREF(obj); -        obj = tmp; -    } -    /* end of iterator, this is the non-error case */ -    if (ok == 1) -        return obj; -error: -    Py_XDECREF(obj); -    return NULL; -} - -/************************************************************************/ -/*****************  Field rendering functions  **************************/ -/************************************************************************/ - -/* -    render_field() is the main function in this section.  It takes the -    field object and field specification string generated by -    get_field_and_spec, and renders the field into the output string. - -    render_field calls fieldobj.__format__(format_spec) method, and -    appends to the output. -*/ -static int -render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer) -{ -    int ok = 0; -    PyObject *result = NULL; -    PyObject *format_spec_object = NULL; -    int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL; -    int err; - -    /* If we know the type exactly, skip the lookup of __format__ and just -       call the formatter directly. */ -    if (PyUnicode_CheckExact(fieldobj)) -        formatter = _PyUnicode_FormatAdvancedWriter; -    else if (PyLong_CheckExact(fieldobj)) -        formatter = _PyLong_FormatAdvancedWriter; -    else if (PyFloat_CheckExact(fieldobj)) -        formatter = _PyFloat_FormatAdvancedWriter; -    else if (PyComplex_CheckExact(fieldobj)) -        formatter = _PyComplex_FormatAdvancedWriter; - -    if (formatter) { -        /* we know exactly which formatter will be called when __format__ is -           looked up, so call it directly, instead. */ -        err = formatter(writer, fieldobj, format_spec->str, -                        format_spec->start, format_spec->end); -        return (err == 0); -    } -    else { -        /* We need to create an object out of the pointers we have, because -           __format__ takes a string/unicode object for format_spec. */ -        if (format_spec->str) -            format_spec_object = PyUnicode_Substring(format_spec->str, -                                                     format_spec->start, -                                                     format_spec->end); -        else -            format_spec_object = PyUnicode_New(0, 0); -        if (format_spec_object == NULL) -            goto done; - -        result = PyObject_Format(fieldobj, format_spec_object); -    } -    if (result == NULL) -        goto done; - -    if (_PyUnicodeWriter_WriteStr(writer, result) == -1) -        goto done; -    ok = 1; - -done: -    Py_XDECREF(format_spec_object); -    Py_XDECREF(result); -    return ok; -} - -static int -parse_field(SubString *str, SubString *field_name, SubString *format_spec, -            int *format_spec_needs_expanding, Py_UCS4 *conversion) -{ -    /* Note this function works if the field name is zero length, -       which is good.  Zero length field names are handled later, in -       field_name_split. */ - -    Py_UCS4 c = 0; - -    /* initialize these, as they may be empty */ -    *conversion = '\0'; -    SubString_init(format_spec, NULL, 0, 0); - -    /* Search for the field name.  it's terminated by the end of -       the string, or a ':' or '!' */ -    field_name->str = str->str; -    field_name->start = str->start; -    while (str->start < str->end) { -        switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) { -        case '{': -            PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name"); -            return 0; -        case '[': -            for (; str->start < str->end; str->start++) -                if (PyUnicode_READ_CHAR(str->str, str->start) == ']') -                    break; -            continue; -        case '}': -        case ':': -        case '!': -            break; -        default: -            continue; -        } -        break; -    } - -    field_name->end = str->start - 1; -    if (c == '!' || c == ':') { -        Py_ssize_t count; -        /* we have a format specifier and/or a conversion */ -        /* don't include the last character */ - -        /* see if there's a conversion specifier */ -        if (c == '!') { -            /* there must be another character present */ -            if (str->start >= str->end) { -                PyErr_SetString(PyExc_ValueError, -                                "end of string while looking for conversion " -                                "specifier"); -                return 0; -            } -            *conversion = PyUnicode_READ_CHAR(str->str, str->start++); - -            if (str->start < str->end) { -                c = PyUnicode_READ_CHAR(str->str, str->start++); -                if (c == '}') -                    return 1; -                if (c != ':') { -                    PyErr_SetString(PyExc_ValueError, -                                    "expected ':' after conversion specifier"); -                    return 0; -                } -            } -        } -        format_spec->str = str->str; -        format_spec->start = str->start; -        count = 1; -        while (str->start < str->end) { -            switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) { -            case '{': -                *format_spec_needs_expanding = 1; -                count++; -                break; -            case '}': -                count--; -                if (count == 0) { -                    format_spec->end = str->start - 1; -                    return 1; -                } -                break; -            default: -                break; -            } -        } - -        PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec"); -        return 0; -    } -    else if (c != '}') { -        PyErr_SetString(PyExc_ValueError, "expected '}' before end of string"); -        return 0; -    } - -    return 1; -} - -/************************************************************************/ -/******* Output string allocation and escape-to-markup processing  ******/ -/************************************************************************/ - -/* MarkupIterator breaks the string into pieces of either literal -   text, or things inside {} that need to be marked up.  it is -   designed to make it easy to wrap a Python iterator around it, for -   use with the Formatter class */ - -typedef struct { -    SubString str; -} MarkupIterator; - -static int -MarkupIterator_init(MarkupIterator *self, PyObject *str, -                    Py_ssize_t start, Py_ssize_t end) -{ -    SubString_init(&self->str, str, start, end); -    return 1; -} - -/* returns 0 on error, 1 on non-error termination, and 2 if it got a -   string (or something to be expanded) */ -static int -MarkupIterator_next(MarkupIterator *self, SubString *literal, -                    int *field_present, SubString *field_name, -                    SubString *format_spec, Py_UCS4 *conversion, -                    int *format_spec_needs_expanding) -{ -    int at_end; -    Py_UCS4 c = 0; -    Py_ssize_t start; -    Py_ssize_t len; -    int markup_follows = 0; - -    /* initialize all of the output variables */ -    SubString_init(literal, NULL, 0, 0); -    SubString_init(field_name, NULL, 0, 0); -    SubString_init(format_spec, NULL, 0, 0); -    *conversion = '\0'; -    *format_spec_needs_expanding = 0; -    *field_present = 0; - -    /* No more input, end of iterator.  This is the normal exit -       path. */ -    if (self->str.start >= self->str.end) -        return 1; - -    start = self->str.start; - -    /* First read any literal text. Read until the end of string, an -       escaped '{' or '}', or an unescaped '{'.  In order to never -       allocate memory and so I can just pass pointers around, if -       there's an escaped '{' or '}' then we'll return the literal -       including the brace, but no format object.  The next time -       through, we'll return the rest of the literal, skipping past -       the second consecutive brace. */ -    while (self->str.start < self->str.end) { -        switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) { -        case '{': -        case '}': -            markup_follows = 1; -            break; -        default: -            continue; -        } -        break; -    } - -    at_end = self->str.start >= self->str.end; -    len = self->str.start - start; - -    if ((c == '}') && (at_end || -                       (c != PyUnicode_READ_CHAR(self->str.str, -                                                 self->str.start)))) { -        PyErr_SetString(PyExc_ValueError, "Single '}' encountered " -                        "in format string"); -        return 0; -    } -    if (at_end && c == '{') { -        PyErr_SetString(PyExc_ValueError, "Single '{' encountered " -                        "in format string"); -        return 0; -    } -    if (!at_end) { -        if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) { -            /* escaped } or {, skip it in the input.  there is no -               markup object following us, just this literal text */ -            self->str.start++; -            markup_follows = 0; -        } -        else -            len--; -    } - -    /* record the literal text */ -    literal->str = self->str.str; -    literal->start = start; -    literal->end = start + len; - -    if (!markup_follows) -        return 2; - -    /* this is markup; parse the field */ -    *field_present = 1; -    if (!parse_field(&self->str, field_name, format_spec, -                     format_spec_needs_expanding, conversion)) -        return 0; -    return 2; -} - - -/* do the !r or !s conversion on obj */ -static PyObject * -do_conversion(PyObject *obj, Py_UCS4 conversion) -{ -    /* XXX in pre-3.0, do we need to convert this to unicode, since it -       might have returned a string? */ -    switch (conversion) { -    case 'r': -        return PyObject_Repr(obj); -    case 's': -        return PyObject_Str(obj); -    case 'a': -        return PyObject_ASCII(obj); -    default: -        if (conversion > 32 && conversion < 127) { -                /* It's the ASCII subrange; casting to char is safe -                   (assuming the execution character set is an ASCII -                   superset). */ -                PyErr_Format(PyExc_ValueError, -                     "Unknown conversion specifier %c", -                     (char)conversion); -        } else -                PyErr_Format(PyExc_ValueError, -                     "Unknown conversion specifier \\x%x", -                     (unsigned int)conversion); -        return NULL; -    } -} - -/* given: - -   {field_name!conversion:format_spec} - -   compute the result and write it to output. -   format_spec_needs_expanding is an optimization.  if it's false, -   just output the string directly, otherwise recursively expand the -   format_spec string. - -   field_name is allowed to be zero length, in which case we -   are doing auto field numbering. -*/ - -static int -output_markup(SubString *field_name, SubString *format_spec, -              int format_spec_needs_expanding, Py_UCS4 conversion, -              _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs, -              int recursion_depth, AutoNumber *auto_number) -{ -    PyObject *tmp = NULL; -    PyObject *fieldobj = NULL; -    SubString expanded_format_spec; -    SubString *actual_format_spec; -    int result = 0; - -    /* convert field_name to an object */ -    fieldobj = get_field_object(field_name, args, kwargs, auto_number); -    if (fieldobj == NULL) -        goto done; - -    if (conversion != '\0') { -        tmp = do_conversion(fieldobj, conversion); -        if (tmp == NULL || PyUnicode_READY(tmp) == -1) -            goto done; - -        /* do the assignment, transferring ownership: fieldobj = tmp */ -        Py_DECREF(fieldobj); -        fieldobj = tmp; -        tmp = NULL; -    } - +    }  +  +    /* iterate over the rest of the field_name */  +    while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,  +                                        &name)) == 2) {  +        PyObject *tmp;  +  +        if (is_attribute)  +            /* getattr lookup "." */  +            tmp = getattr(obj, &name);  +        else  +            /* getitem lookup "[]" */  +            if (index == -1)  +                tmp = getitem_str(obj, &name);  +            else  +                if (PySequence_Check(obj))  +                    tmp = getitem_sequence(obj, index);  +                else  +                    /* not a sequence */  +                    tmp = getitem_idx(obj, index);  +        if (tmp == NULL)  +            goto error;  +  +        /* assign to obj */  +        Py_DECREF(obj);  +        obj = tmp;  +    }  +    /* end of iterator, this is the non-error case */  +    if (ok == 1)  +        return obj;  +error:  +    Py_XDECREF(obj);  +    return NULL;  +}  +  +/************************************************************************/  +/*****************  Field rendering functions  **************************/  +/************************************************************************/  +  +/*  +    render_field() is the main function in this section.  It takes the  +    field object and field specification string generated by  +    get_field_and_spec, and renders the field into the output string.  +  +    render_field calls fieldobj.__format__(format_spec) method, and  +    appends to the output.  +*/  +static int  +render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)  +{  +    int ok = 0;  +    PyObject *result = NULL;  +    PyObject *format_spec_object = NULL;  +    int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;  +    int err;  +  +    /* If we know the type exactly, skip the lookup of __format__ and just  +       call the formatter directly. */  +    if (PyUnicode_CheckExact(fieldobj))  +        formatter = _PyUnicode_FormatAdvancedWriter;  +    else if (PyLong_CheckExact(fieldobj))  +        formatter = _PyLong_FormatAdvancedWriter;  +    else if (PyFloat_CheckExact(fieldobj))  +        formatter = _PyFloat_FormatAdvancedWriter;  +    else if (PyComplex_CheckExact(fieldobj))  +        formatter = _PyComplex_FormatAdvancedWriter;  +  +    if (formatter) {  +        /* we know exactly which formatter will be called when __format__ is  +           looked up, so call it directly, instead. */  +        err = formatter(writer, fieldobj, format_spec->str,  +                        format_spec->start, format_spec->end);  +        return (err == 0);  +    }  +    else {  +        /* We need to create an object out of the pointers we have, because  +           __format__ takes a string/unicode object for format_spec. */  +        if (format_spec->str)  +            format_spec_object = PyUnicode_Substring(format_spec->str,  +                                                     format_spec->start,  +                                                     format_spec->end);  +        else  +            format_spec_object = PyUnicode_New(0, 0);  +        if (format_spec_object == NULL)  +            goto done;  +  +        result = PyObject_Format(fieldobj, format_spec_object);  +    }  +    if (result == NULL)  +        goto done;  +  +    if (_PyUnicodeWriter_WriteStr(writer, result) == -1)  +        goto done;  +    ok = 1;  +  +done:  +    Py_XDECREF(format_spec_object);  +    Py_XDECREF(result);  +    return ok;  +}  +  +static int  +parse_field(SubString *str, SubString *field_name, SubString *format_spec,  +            int *format_spec_needs_expanding, Py_UCS4 *conversion)  +{  +    /* Note this function works if the field name is zero length,  +       which is good.  Zero length field names are handled later, in  +       field_name_split. */  +  +    Py_UCS4 c = 0;  +  +    /* initialize these, as they may be empty */  +    *conversion = '\0';  +    SubString_init(format_spec, NULL, 0, 0);  +  +    /* Search for the field name.  it's terminated by the end of  +       the string, or a ':' or '!' */  +    field_name->str = str->str;  +    field_name->start = str->start;  +    while (str->start < str->end) {  +        switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {  +        case '{':  +            PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");  +            return 0;  +        case '[':  +            for (; str->start < str->end; str->start++)  +                if (PyUnicode_READ_CHAR(str->str, str->start) == ']')  +                    break;  +            continue;  +        case '}':  +        case ':':  +        case '!':  +            break;  +        default:  +            continue;  +        }  +        break;  +    }  +  +    field_name->end = str->start - 1;  +    if (c == '!' || c == ':') {  +        Py_ssize_t count;  +        /* we have a format specifier and/or a conversion */  +        /* don't include the last character */  +  +        /* see if there's a conversion specifier */  +        if (c == '!') {  +            /* there must be another character present */  +            if (str->start >= str->end) {  +                PyErr_SetString(PyExc_ValueError,  +                                "end of string while looking for conversion "  +                                "specifier");  +                return 0;  +            }  +            *conversion = PyUnicode_READ_CHAR(str->str, str->start++);  +  +            if (str->start < str->end) {  +                c = PyUnicode_READ_CHAR(str->str, str->start++);  +                if (c == '}')  +                    return 1;  +                if (c != ':') {  +                    PyErr_SetString(PyExc_ValueError,  +                                    "expected ':' after conversion specifier");  +                    return 0;  +                }  +            }  +        }  +        format_spec->str = str->str;  +        format_spec->start = str->start;  +        count = 1;  +        while (str->start < str->end) {  +            switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {  +            case '{':  +                *format_spec_needs_expanding = 1;  +                count++;  +                break;  +            case '}':  +                count--;  +                if (count == 0) {  +                    format_spec->end = str->start - 1;  +                    return 1;  +                }  +                break;  +            default:  +                break;  +            }  +        }  +  +        PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");  +        return 0;  +    }  +    else if (c != '}') {  +        PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");  +        return 0;  +    }  +  +    return 1;  +}  +  +/************************************************************************/  +/******* Output string allocation and escape-to-markup processing  ******/  +/************************************************************************/  +  +/* MarkupIterator breaks the string into pieces of either literal  +   text, or things inside {} that need to be marked up.  it is  +   designed to make it easy to wrap a Python iterator around it, for  +   use with the Formatter class */  +  +typedef struct {  +    SubString str;  +} MarkupIterator;  +  +static int  +MarkupIterator_init(MarkupIterator *self, PyObject *str,  +                    Py_ssize_t start, Py_ssize_t end)  +{  +    SubString_init(&self->str, str, start, end);  +    return 1;  +}  +  +/* returns 0 on error, 1 on non-error termination, and 2 if it got a  +   string (or something to be expanded) */  +static int  +MarkupIterator_next(MarkupIterator *self, SubString *literal,  +                    int *field_present, SubString *field_name,  +                    SubString *format_spec, Py_UCS4 *conversion,  +                    int *format_spec_needs_expanding)  +{  +    int at_end;  +    Py_UCS4 c = 0;  +    Py_ssize_t start;  +    Py_ssize_t len;  +    int markup_follows = 0;  +  +    /* initialize all of the output variables */  +    SubString_init(literal, NULL, 0, 0);  +    SubString_init(field_name, NULL, 0, 0);  +    SubString_init(format_spec, NULL, 0, 0);  +    *conversion = '\0';  +    *format_spec_needs_expanding = 0;  +    *field_present = 0;  +  +    /* No more input, end of iterator.  This is the normal exit  +       path. */  +    if (self->str.start >= self->str.end)  +        return 1;  +  +    start = self->str.start;  +  +    /* First read any literal text. Read until the end of string, an  +       escaped '{' or '}', or an unescaped '{'.  In order to never  +       allocate memory and so I can just pass pointers around, if  +       there's an escaped '{' or '}' then we'll return the literal  +       including the brace, but no format object.  The next time  +       through, we'll return the rest of the literal, skipping past  +       the second consecutive brace. */  +    while (self->str.start < self->str.end) {  +        switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {  +        case '{':  +        case '}':  +            markup_follows = 1;  +            break;  +        default:  +            continue;  +        }  +        break;  +    }  +  +    at_end = self->str.start >= self->str.end;  +    len = self->str.start - start;  +  +    if ((c == '}') && (at_end ||  +                       (c != PyUnicode_READ_CHAR(self->str.str,  +                                                 self->str.start)))) {  +        PyErr_SetString(PyExc_ValueError, "Single '}' encountered "  +                        "in format string");  +        return 0;  +    }  +    if (at_end && c == '{') {  +        PyErr_SetString(PyExc_ValueError, "Single '{' encountered "  +                        "in format string");  +        return 0;  +    }  +    if (!at_end) {  +        if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {  +            /* escaped } or {, skip it in the input.  there is no  +               markup object following us, just this literal text */  +            self->str.start++;  +            markup_follows = 0;  +        }  +        else  +            len--;  +    }  +  +    /* record the literal text */  +    literal->str = self->str.str;  +    literal->start = start;  +    literal->end = start + len;  +  +    if (!markup_follows)  +        return 2;  +  +    /* this is markup; parse the field */  +    *field_present = 1;  +    if (!parse_field(&self->str, field_name, format_spec,  +                     format_spec_needs_expanding, conversion))  +        return 0;  +    return 2;  +}  +  +  +/* do the !r or !s conversion on obj */  +static PyObject *  +do_conversion(PyObject *obj, Py_UCS4 conversion)  +{  +    /* XXX in pre-3.0, do we need to convert this to unicode, since it  +       might have returned a string? */  +    switch (conversion) {  +    case 'r':  +        return PyObject_Repr(obj);  +    case 's':  +        return PyObject_Str(obj);  +    case 'a':  +        return PyObject_ASCII(obj);  +    default:  +        if (conversion > 32 && conversion < 127) {  +                /* It's the ASCII subrange; casting to char is safe  +                   (assuming the execution character set is an ASCII  +                   superset). */  +                PyErr_Format(PyExc_ValueError,  +                     "Unknown conversion specifier %c",  +                     (char)conversion);  +        } else  +                PyErr_Format(PyExc_ValueError,  +                     "Unknown conversion specifier \\x%x",  +                     (unsigned int)conversion);  +        return NULL;  +    }  +}  +  +/* given:  +  +   {field_name!conversion:format_spec}  +  +   compute the result and write it to output.  +   format_spec_needs_expanding is an optimization.  if it's false,  +   just output the string directly, otherwise recursively expand the  +   format_spec string.  +  +   field_name is allowed to be zero length, in which case we  +   are doing auto field numbering.  +*/  +  +static int  +output_markup(SubString *field_name, SubString *format_spec,  +              int format_spec_needs_expanding, Py_UCS4 conversion,  +              _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,  +              int recursion_depth, AutoNumber *auto_number)  +{  +    PyObject *tmp = NULL;  +    PyObject *fieldobj = NULL;  +    SubString expanded_format_spec;  +    SubString *actual_format_spec;  +    int result = 0;  +  +    /* convert field_name to an object */  +    fieldobj = get_field_object(field_name, args, kwargs, auto_number);  +    if (fieldobj == NULL)  +        goto done;  +  +    if (conversion != '\0') {  +        tmp = do_conversion(fieldobj, conversion);  +        if (tmp == NULL || PyUnicode_READY(tmp) == -1)  +            goto done;  +  +        /* do the assignment, transferring ownership: fieldobj = tmp */  +        Py_DECREF(fieldobj);  +        fieldobj = tmp;  +        tmp = NULL;  +    }  +       /* if needed, recursively compute the format_spec */ -    if (format_spec_needs_expanding) { -        tmp = build_string(format_spec, args, kwargs, recursion_depth-1, -                           auto_number); -        if (tmp == NULL || PyUnicode_READY(tmp) == -1) -            goto done; - -        /* note that in the case we're expanding the format string, -           tmp must be kept around until after the call to -           render_field. */ -        SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp)); -        actual_format_spec = &expanded_format_spec; -    } -    else -        actual_format_spec = format_spec; - -    if (render_field(fieldobj, actual_format_spec, writer) == 0) -        goto done; - -    result = 1; - -done: -    Py_XDECREF(fieldobj); -    Py_XDECREF(tmp); - -    return result; -} - -/* -    do_markup is the top-level loop for the format() method.  It -    searches through the format string for escapes to markup codes, and -    calls other functions to move non-markup text to the output, -    and to perform the markup to the output. -*/ -static int -do_markup(SubString *input, PyObject *args, PyObject *kwargs, -          _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number) -{ -    MarkupIterator iter; -    int format_spec_needs_expanding; -    int result; -    int field_present; -    SubString literal; -    SubString field_name; -    SubString format_spec; -    Py_UCS4 conversion; - -    MarkupIterator_init(&iter, input->str, input->start, input->end); -    while ((result = MarkupIterator_next(&iter, &literal, &field_present, -                                         &field_name, &format_spec, -                                         &conversion, -                                         &format_spec_needs_expanding)) == 2) { -        if (literal.end != literal.start) { -            if (!field_present && iter.str.start == iter.str.end) -                writer->overallocate = 0; -            if (_PyUnicodeWriter_WriteSubstring(writer, literal.str, -                                                literal.start, literal.end) < 0) -                return 0; -        } - -        if (field_present) { -            if (iter.str.start == iter.str.end) -                writer->overallocate = 0; -            if (!output_markup(&field_name, &format_spec, -                               format_spec_needs_expanding, conversion, writer, -                               args, kwargs, recursion_depth, auto_number)) -                return 0; -        } -    } -    return result; -} - - -/* -    build_string allocates the output string and then -    calls do_markup to do the heavy lifting. -*/ -static PyObject * -build_string(SubString *input, PyObject *args, PyObject *kwargs, -             int recursion_depth, AutoNumber *auto_number) -{ -    _PyUnicodeWriter writer; - -    /* check the recursion level */ -    if (recursion_depth <= 0) { -        PyErr_SetString(PyExc_ValueError, -                        "Max string recursion exceeded"); -        return NULL; -    } - -    _PyUnicodeWriter_Init(&writer); -    writer.overallocate = 1; -    writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100; - -    if (!do_markup(input, args, kwargs, &writer, recursion_depth, -                   auto_number)) { -        _PyUnicodeWriter_Dealloc(&writer); -        return NULL; -    } - -    return _PyUnicodeWriter_Finish(&writer); -} - -/************************************************************************/ -/*********** main routine ***********************************************/ -/************************************************************************/ - -/* this is the main entry point */ -static PyObject * -do_string_format(PyObject *self, PyObject *args, PyObject *kwargs) -{ -    SubString input; - -    /* PEP 3101 says only 2 levels, so that -       "{0:{1}}".format('abc', 's')            # works -       "{0:{1:{2}}}".format('abc', 's', '')    # fails -    */ -    int recursion_depth = 2; - -    AutoNumber auto_number; - -    if (PyUnicode_READY(self) == -1) -        return NULL; - -    AutoNumber_Init(&auto_number); -    SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self)); -    return build_string(&input, args, kwargs, recursion_depth, &auto_number); -} - -static PyObject * -do_string_format_map(PyObject *self, PyObject *obj) -{ -    return do_string_format(self, NULL, obj); -} - - -/************************************************************************/ -/*********** formatteriterator ******************************************/ -/************************************************************************/ - -/* This is used to implement string.Formatter.vparse().  It exists so -   Formatter can share code with the built in unicode.format() method. -   It's really just a wrapper around MarkupIterator that is callable -   from Python. */ - -typedef struct { -    PyObject_HEAD -    PyObject *str; -    MarkupIterator it_markup; -} formatteriterobject; - -static void -formatteriter_dealloc(formatteriterobject *it) -{ -    Py_XDECREF(it->str); -    PyObject_FREE(it); -} - -/* returns a tuple: -   (literal, field_name, format_spec, conversion) - -   literal is any literal text to output.  might be zero length -   field_name is the string before the ':'.  might be None -   format_spec is the string after the ':'.  mibht be None -   conversion is either None, or the string after the '!' -*/ -static PyObject * -formatteriter_next(formatteriterobject *it) -{ -    SubString literal; -    SubString field_name; -    SubString format_spec; -    Py_UCS4 conversion; -    int format_spec_needs_expanding; -    int field_present; -    int result = MarkupIterator_next(&it->it_markup, &literal, &field_present, -                                     &field_name, &format_spec, &conversion, -                                     &format_spec_needs_expanding); - -    /* all of the SubString objects point into it->str, so no -       memory management needs to be done on them */ -    assert(0 <= result && result <= 2); -    if (result == 0 || result == 1) -        /* if 0, error has already been set, if 1, iterator is empty */ -        return NULL; -    else { -        PyObject *literal_str = NULL; -        PyObject *field_name_str = NULL; -        PyObject *format_spec_str = NULL; -        PyObject *conversion_str = NULL; -        PyObject *tuple = NULL; - -        literal_str = SubString_new_object(&literal); -        if (literal_str == NULL) -            goto done; - -        field_name_str = SubString_new_object(&field_name); -        if (field_name_str == NULL) -            goto done; - -        /* if field_name is non-zero length, return a string for -           format_spec (even if zero length), else return None */ -        format_spec_str = (field_present ? -                           SubString_new_object_or_empty : -                           SubString_new_object)(&format_spec); -        if (format_spec_str == NULL) -            goto done; - -        /* if the conversion is not specified, return a None, -           otherwise create a one length string with the conversion -           character */ -        if (conversion == '\0') { -            conversion_str = Py_None; -            Py_INCREF(conversion_str); -        } -        else -            conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, -                                                       &conversion, 1); -        if (conversion_str == NULL) -            goto done; - -        tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str, -                             conversion_str); -    done: -        Py_XDECREF(literal_str); -        Py_XDECREF(field_name_str); -        Py_XDECREF(format_spec_str); -        Py_XDECREF(conversion_str); -        return tuple; -    } -} - -static PyMethodDef formatteriter_methods[] = { -    {NULL,              NULL}           /* sentinel */ -}; - -static PyTypeObject PyFormatterIter_Type = { -    PyVarObject_HEAD_INIT(&PyType_Type, 0) -    "formatteriterator",                /* tp_name */ -    sizeof(formatteriterobject),        /* tp_basicsize */ -    0,                                  /* tp_itemsize */ -    /* methods */ -    (destructor)formatteriter_dealloc,  /* tp_dealloc */ +    if (format_spec_needs_expanding) {  +        tmp = build_string(format_spec, args, kwargs, recursion_depth-1,  +                           auto_number);  +        if (tmp == NULL || PyUnicode_READY(tmp) == -1)  +            goto done;  +  +        /* note that in the case we're expanding the format string,  +           tmp must be kept around until after the call to  +           render_field. */  +        SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));  +        actual_format_spec = &expanded_format_spec;  +    }  +    else  +        actual_format_spec = format_spec;  +  +    if (render_field(fieldobj, actual_format_spec, writer) == 0)  +        goto done;  +  +    result = 1;  +  +done:  +    Py_XDECREF(fieldobj);  +    Py_XDECREF(tmp);  +  +    return result;  +}  +  +/*  +    do_markup is the top-level loop for the format() method.  It  +    searches through the format string for escapes to markup codes, and  +    calls other functions to move non-markup text to the output,  +    and to perform the markup to the output.  +*/  +static int  +do_markup(SubString *input, PyObject *args, PyObject *kwargs,  +          _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)  +{  +    MarkupIterator iter;  +    int format_spec_needs_expanding;  +    int result;  +    int field_present;  +    SubString literal;  +    SubString field_name;  +    SubString format_spec;  +    Py_UCS4 conversion;  +  +    MarkupIterator_init(&iter, input->str, input->start, input->end);  +    while ((result = MarkupIterator_next(&iter, &literal, &field_present,  +                                         &field_name, &format_spec,  +                                         &conversion,  +                                         &format_spec_needs_expanding)) == 2) {  +        if (literal.end != literal.start) {  +            if (!field_present && iter.str.start == iter.str.end)  +                writer->overallocate = 0;  +            if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,  +                                                literal.start, literal.end) < 0)  +                return 0;  +        }  +  +        if (field_present) {  +            if (iter.str.start == iter.str.end)  +                writer->overallocate = 0;  +            if (!output_markup(&field_name, &format_spec,  +                               format_spec_needs_expanding, conversion, writer,  +                               args, kwargs, recursion_depth, auto_number))  +                return 0;  +        }  +    }  +    return result;  +}  +  +  +/*  +    build_string allocates the output string and then  +    calls do_markup to do the heavy lifting.  +*/  +static PyObject *  +build_string(SubString *input, PyObject *args, PyObject *kwargs,  +             int recursion_depth, AutoNumber *auto_number)  +{  +    _PyUnicodeWriter writer;  +  +    /* check the recursion level */  +    if (recursion_depth <= 0) {  +        PyErr_SetString(PyExc_ValueError,  +                        "Max string recursion exceeded");  +        return NULL;  +    }  +  +    _PyUnicodeWriter_Init(&writer);  +    writer.overallocate = 1;  +    writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;  +  +    if (!do_markup(input, args, kwargs, &writer, recursion_depth,  +                   auto_number)) {  +        _PyUnicodeWriter_Dealloc(&writer);  +        return NULL;  +    }  +  +    return _PyUnicodeWriter_Finish(&writer);  +}  +  +/************************************************************************/  +/*********** main routine ***********************************************/  +/************************************************************************/  +  +/* this is the main entry point */  +static PyObject *  +do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)  +{  +    SubString input;  +  +    /* PEP 3101 says only 2 levels, so that  +       "{0:{1}}".format('abc', 's')            # works  +       "{0:{1:{2}}}".format('abc', 's', '')    # fails  +    */  +    int recursion_depth = 2;  +  +    AutoNumber auto_number;  +  +    if (PyUnicode_READY(self) == -1)  +        return NULL;  +  +    AutoNumber_Init(&auto_number);  +    SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));  +    return build_string(&input, args, kwargs, recursion_depth, &auto_number);  +}  +  +static PyObject *  +do_string_format_map(PyObject *self, PyObject *obj)  +{  +    return do_string_format(self, NULL, obj);  +}  +  +  +/************************************************************************/  +/*********** formatteriterator ******************************************/  +/************************************************************************/  +  +/* This is used to implement string.Formatter.vparse().  It exists so  +   Formatter can share code with the built in unicode.format() method.  +   It's really just a wrapper around MarkupIterator that is callable  +   from Python. */  +  +typedef struct {  +    PyObject_HEAD  +    PyObject *str;  +    MarkupIterator it_markup;  +} formatteriterobject;  +  +static void  +formatteriter_dealloc(formatteriterobject *it)  +{  +    Py_XDECREF(it->str);  +    PyObject_FREE(it);  +}  +  +/* returns a tuple:  +   (literal, field_name, format_spec, conversion)  +  +   literal is any literal text to output.  might be zero length  +   field_name is the string before the ':'.  might be None  +   format_spec is the string after the ':'.  mibht be None  +   conversion is either None, or the string after the '!'  +*/  +static PyObject *  +formatteriter_next(formatteriterobject *it)  +{  +    SubString literal;  +    SubString field_name;  +    SubString format_spec;  +    Py_UCS4 conversion;  +    int format_spec_needs_expanding;  +    int field_present;  +    int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,  +                                     &field_name, &format_spec, &conversion,  +                                     &format_spec_needs_expanding);  +  +    /* all of the SubString objects point into it->str, so no  +       memory management needs to be done on them */  +    assert(0 <= result && result <= 2);  +    if (result == 0 || result == 1)  +        /* if 0, error has already been set, if 1, iterator is empty */  +        return NULL;  +    else {  +        PyObject *literal_str = NULL;  +        PyObject *field_name_str = NULL;  +        PyObject *format_spec_str = NULL;  +        PyObject *conversion_str = NULL;  +        PyObject *tuple = NULL;  +  +        literal_str = SubString_new_object(&literal);  +        if (literal_str == NULL)  +            goto done;  +  +        field_name_str = SubString_new_object(&field_name);  +        if (field_name_str == NULL)  +            goto done;  +  +        /* if field_name is non-zero length, return a string for  +           format_spec (even if zero length), else return None */  +        format_spec_str = (field_present ?  +                           SubString_new_object_or_empty :  +                           SubString_new_object)(&format_spec);  +        if (format_spec_str == NULL)  +            goto done;  +  +        /* if the conversion is not specified, return a None,  +           otherwise create a one length string with the conversion  +           character */  +        if (conversion == '\0') {  +            conversion_str = Py_None;  +            Py_INCREF(conversion_str);  +        }  +        else  +            conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,  +                                                       &conversion, 1);  +        if (conversion_str == NULL)  +            goto done;  +  +        tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,  +                             conversion_str);  +    done:  +        Py_XDECREF(literal_str);  +        Py_XDECREF(field_name_str);  +        Py_XDECREF(format_spec_str);  +        Py_XDECREF(conversion_str);  +        return tuple;  +    }  +}  +  +static PyMethodDef formatteriter_methods[] = {  +    {NULL,              NULL}           /* sentinel */  +};  +  +static PyTypeObject PyFormatterIter_Type = {  +    PyVarObject_HEAD_INIT(&PyType_Type, 0)  +    "formatteriterator",                /* tp_name */  +    sizeof(formatteriterobject),        /* tp_basicsize */  +    0,                                  /* tp_itemsize */  +    /* methods */  +    (destructor)formatteriter_dealloc,  /* tp_dealloc */       0,                                  /* tp_vectorcall_offset */ -    0,                                  /* tp_getattr */ -    0,                                  /* tp_setattr */ +    0,                                  /* tp_getattr */  +    0,                                  /* tp_setattr */       0,                                  /* tp_as_async */ -    0,                                  /* tp_repr */ -    0,                                  /* tp_as_number */ -    0,                                  /* tp_as_sequence */ -    0,                                  /* tp_as_mapping */ -    0,                                  /* tp_hash */ -    0,                                  /* tp_call */ -    0,                                  /* tp_str */ -    PyObject_GenericGetAttr,            /* tp_getattro */ -    0,                                  /* tp_setattro */ -    0,                                  /* tp_as_buffer */ -    Py_TPFLAGS_DEFAULT,                 /* tp_flags */ -    0,                                  /* tp_doc */ -    0,                                  /* tp_traverse */ -    0,                                  /* tp_clear */ -    0,                                  /* tp_richcompare */ -    0,                                  /* tp_weaklistoffset */ -    PyObject_SelfIter,                  /* tp_iter */ -    (iternextfunc)formatteriter_next,   /* tp_iternext */ -    formatteriter_methods,              /* tp_methods */ -    0, -}; - -/* unicode_formatter_parser is used to implement -   string.Formatter.vformat.  it parses a string and returns tuples -   describing the parsed elements.  It's a wrapper around -   stringlib/string_format.h's MarkupIterator */ -static PyObject * -formatter_parser(PyObject *ignored, PyObject *self) -{ -    formatteriterobject *it; - -    if (!PyUnicode_Check(self)) { -        PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name); -        return NULL; -    } - -    if (PyUnicode_READY(self) == -1) -        return NULL; - -    it = PyObject_New(formatteriterobject, &PyFormatterIter_Type); -    if (it == NULL) -        return NULL; - -    /* take ownership, give the object to the iterator */ -    Py_INCREF(self); -    it->str = self; - -    /* initialize the contained MarkupIterator */ -    MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self)); -    return (PyObject *)it; -} - - -/************************************************************************/ -/*********** fieldnameiterator ******************************************/ -/************************************************************************/ - - -/* This is used to implement string.Formatter.vparse().  It parses the -   field name into attribute and item values.  It's a Python-callable -   wrapper around FieldNameIterator */ - -typedef struct { -    PyObject_HEAD -    PyObject *str; -    FieldNameIterator it_field; -} fieldnameiterobject; - -static void -fieldnameiter_dealloc(fieldnameiterobject *it) -{ -    Py_XDECREF(it->str); -    PyObject_FREE(it); -} - -/* returns a tuple: -   (is_attr, value) -   is_attr is true if we used attribute syntax (e.g., '.foo') -              false if we used index syntax (e.g., '[foo]') -   value is an integer or string -*/ -static PyObject * -fieldnameiter_next(fieldnameiterobject *it) -{ -    int result; -    int is_attr; -    Py_ssize_t idx; -    SubString name; - -    result = FieldNameIterator_next(&it->it_field, &is_attr, -                                    &idx, &name); -    if (result == 0 || result == 1) -        /* if 0, error has already been set, if 1, iterator is empty */ -        return NULL; -    else { -        PyObject* result = NULL; -        PyObject* is_attr_obj = NULL; -        PyObject* obj = NULL; - -        is_attr_obj = PyBool_FromLong(is_attr); -        if (is_attr_obj == NULL) -            goto done; - -        /* either an integer or a string */ -        if (idx != -1) -            obj = PyLong_FromSsize_t(idx); -        else -            obj = SubString_new_object(&name); -        if (obj == NULL) -            goto done; - -        /* return a tuple of values */ -        result = PyTuple_Pack(2, is_attr_obj, obj); - -    done: -        Py_XDECREF(is_attr_obj); -        Py_XDECREF(obj); -        return result; -    } -} - -static PyMethodDef fieldnameiter_methods[] = { -    {NULL,              NULL}           /* sentinel */ -}; - -static PyTypeObject PyFieldNameIter_Type = { -    PyVarObject_HEAD_INIT(&PyType_Type, 0) -    "fieldnameiterator",                /* tp_name */ -    sizeof(fieldnameiterobject),        /* tp_basicsize */ -    0,                                  /* tp_itemsize */ -    /* methods */ -    (destructor)fieldnameiter_dealloc,  /* tp_dealloc */ +    0,                                  /* tp_repr */  +    0,                                  /* tp_as_number */  +    0,                                  /* tp_as_sequence */  +    0,                                  /* tp_as_mapping */  +    0,                                  /* tp_hash */  +    0,                                  /* tp_call */  +    0,                                  /* tp_str */  +    PyObject_GenericGetAttr,            /* tp_getattro */  +    0,                                  /* tp_setattro */  +    0,                                  /* tp_as_buffer */  +    Py_TPFLAGS_DEFAULT,                 /* tp_flags */  +    0,                                  /* tp_doc */  +    0,                                  /* tp_traverse */  +    0,                                  /* tp_clear */  +    0,                                  /* tp_richcompare */  +    0,                                  /* tp_weaklistoffset */  +    PyObject_SelfIter,                  /* tp_iter */  +    (iternextfunc)formatteriter_next,   /* tp_iternext */  +    formatteriter_methods,              /* tp_methods */  +    0,  +};  +  +/* unicode_formatter_parser is used to implement  +   string.Formatter.vformat.  it parses a string and returns tuples  +   describing the parsed elements.  It's a wrapper around  +   stringlib/string_format.h's MarkupIterator */  +static PyObject *  +formatter_parser(PyObject *ignored, PyObject *self)  +{  +    formatteriterobject *it;  +  +    if (!PyUnicode_Check(self)) {  +        PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);  +        return NULL;  +    }  +  +    if (PyUnicode_READY(self) == -1)  +        return NULL;  +  +    it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);  +    if (it == NULL)  +        return NULL;  +  +    /* take ownership, give the object to the iterator */  +    Py_INCREF(self);  +    it->str = self;  +  +    /* initialize the contained MarkupIterator */  +    MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));  +    return (PyObject *)it;  +}  +  +  +/************************************************************************/  +/*********** fieldnameiterator ******************************************/  +/************************************************************************/  +  +  +/* This is used to implement string.Formatter.vparse().  It parses the  +   field name into attribute and item values.  It's a Python-callable  +   wrapper around FieldNameIterator */  +  +typedef struct {  +    PyObject_HEAD  +    PyObject *str;  +    FieldNameIterator it_field;  +} fieldnameiterobject;  +  +static void  +fieldnameiter_dealloc(fieldnameiterobject *it)  +{  +    Py_XDECREF(it->str);  +    PyObject_FREE(it);  +}  +  +/* returns a tuple:  +   (is_attr, value)  +   is_attr is true if we used attribute syntax (e.g., '.foo')  +              false if we used index syntax (e.g., '[foo]')  +   value is an integer or string  +*/  +static PyObject *  +fieldnameiter_next(fieldnameiterobject *it)  +{  +    int result;  +    int is_attr;  +    Py_ssize_t idx;  +    SubString name;  +  +    result = FieldNameIterator_next(&it->it_field, &is_attr,  +                                    &idx, &name);  +    if (result == 0 || result == 1)  +        /* if 0, error has already been set, if 1, iterator is empty */  +        return NULL;  +    else {  +        PyObject* result = NULL;  +        PyObject* is_attr_obj = NULL;  +        PyObject* obj = NULL;  +  +        is_attr_obj = PyBool_FromLong(is_attr);  +        if (is_attr_obj == NULL)  +            goto done;  +  +        /* either an integer or a string */  +        if (idx != -1)  +            obj = PyLong_FromSsize_t(idx);  +        else  +            obj = SubString_new_object(&name);  +        if (obj == NULL)  +            goto done;  +  +        /* return a tuple of values */  +        result = PyTuple_Pack(2, is_attr_obj, obj);  +  +    done:  +        Py_XDECREF(is_attr_obj);  +        Py_XDECREF(obj);  +        return result;  +    }  +}  +  +static PyMethodDef fieldnameiter_methods[] = {  +    {NULL,              NULL}           /* sentinel */  +};  +  +static PyTypeObject PyFieldNameIter_Type = {  +    PyVarObject_HEAD_INIT(&PyType_Type, 0)  +    "fieldnameiterator",                /* tp_name */  +    sizeof(fieldnameiterobject),        /* tp_basicsize */  +    0,                                  /* tp_itemsize */  +    /* methods */  +    (destructor)fieldnameiter_dealloc,  /* tp_dealloc */       0,                                  /* tp_vectorcall_offset */ -    0,                                  /* tp_getattr */ -    0,                                  /* tp_setattr */ +    0,                                  /* tp_getattr */  +    0,                                  /* tp_setattr */       0,                                  /* tp_as_async */ -    0,                                  /* tp_repr */ -    0,                                  /* tp_as_number */ -    0,                                  /* tp_as_sequence */ -    0,                                  /* tp_as_mapping */ -    0,                                  /* tp_hash */ -    0,                                  /* tp_call */ -    0,                                  /* tp_str */ -    PyObject_GenericGetAttr,            /* tp_getattro */ -    0,                                  /* tp_setattro */ -    0,                                  /* tp_as_buffer */ -    Py_TPFLAGS_DEFAULT,                 /* tp_flags */ -    0,                                  /* tp_doc */ -    0,                                  /* tp_traverse */ -    0,                                  /* tp_clear */ -    0,                                  /* tp_richcompare */ -    0,                                  /* tp_weaklistoffset */ -    PyObject_SelfIter,                  /* tp_iter */ -    (iternextfunc)fieldnameiter_next,   /* tp_iternext */ -    fieldnameiter_methods,              /* tp_methods */ -    0}; - -/* unicode_formatter_field_name_split is used to implement -   string.Formatter.vformat.  it takes a PEP 3101 "field name", and -   returns a tuple of (first, rest): "first", the part before the -   first '.' or '['; and "rest", an iterator for the rest of the field -   name.  it's a wrapper around stringlib/string_format.h's -   field_name_split.  The iterator it returns is a -   FieldNameIterator */ -static PyObject * -formatter_field_name_split(PyObject *ignored, PyObject *self) -{ -    SubString first; -    Py_ssize_t first_idx; -    fieldnameiterobject *it; - -    PyObject *first_obj = NULL; -    PyObject *result = NULL; - -    if (!PyUnicode_Check(self)) { -        PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name); -        return NULL; -    } - -    if (PyUnicode_READY(self) == -1) -        return NULL; - -    it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type); -    if (it == NULL) -        return NULL; - -    /* take ownership, give the object to the iterator.  this is -       just to keep the field_name alive */ -    Py_INCREF(self); -    it->str = self; - -    /* Pass in auto_number = NULL. We'll return an empty string for -       first_obj in that case. */ -    if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self), -                          &first, &first_idx, &it->it_field, NULL)) -        goto done; - -    /* first becomes an integer, if possible; else a string */ -    if (first_idx != -1) -        first_obj = PyLong_FromSsize_t(first_idx); -    else -        /* convert "first" into a string object */ -        first_obj = SubString_new_object(&first); -    if (first_obj == NULL) -        goto done; - -    /* return a tuple of values */ -    result = PyTuple_Pack(2, first_obj, it); - -done: -    Py_XDECREF(it); -    Py_XDECREF(first_obj); -    return result; -} +    0,                                  /* tp_repr */  +    0,                                  /* tp_as_number */  +    0,                                  /* tp_as_sequence */  +    0,                                  /* tp_as_mapping */  +    0,                                  /* tp_hash */  +    0,                                  /* tp_call */  +    0,                                  /* tp_str */  +    PyObject_GenericGetAttr,            /* tp_getattro */  +    0,                                  /* tp_setattro */  +    0,                                  /* tp_as_buffer */  +    Py_TPFLAGS_DEFAULT,                 /* tp_flags */  +    0,                                  /* tp_doc */  +    0,                                  /* tp_traverse */  +    0,                                  /* tp_clear */  +    0,                                  /* tp_richcompare */  +    0,                                  /* tp_weaklistoffset */  +    PyObject_SelfIter,                  /* tp_iter */  +    (iternextfunc)fieldnameiter_next,   /* tp_iternext */  +    fieldnameiter_methods,              /* tp_methods */  +    0};  +  +/* unicode_formatter_field_name_split is used to implement  +   string.Formatter.vformat.  it takes a PEP 3101 "field name", and  +   returns a tuple of (first, rest): "first", the part before the  +   first '.' or '['; and "rest", an iterator for the rest of the field  +   name.  it's a wrapper around stringlib/string_format.h's  +   field_name_split.  The iterator it returns is a  +   FieldNameIterator */  +static PyObject *  +formatter_field_name_split(PyObject *ignored, PyObject *self)  +{  +    SubString first;  +    Py_ssize_t first_idx;  +    fieldnameiterobject *it;  +  +    PyObject *first_obj = NULL;  +    PyObject *result = NULL;  +  +    if (!PyUnicode_Check(self)) {  +        PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);  +        return NULL;  +    }  +  +    if (PyUnicode_READY(self) == -1)  +        return NULL;  +  +    it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);  +    if (it == NULL)  +        return NULL;  +  +    /* take ownership, give the object to the iterator.  this is  +       just to keep the field_name alive */  +    Py_INCREF(self);  +    it->str = self;  +  +    /* Pass in auto_number = NULL. We'll return an empty string for  +       first_obj in that case. */  +    if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),  +                          &first, &first_idx, &it->it_field, NULL))  +        goto done;  +  +    /* first becomes an integer, if possible; else a string */  +    if (first_idx != -1)  +        first_obj = PyLong_FromSsize_t(first_idx);  +    else  +        /* convert "first" into a string object */  +        first_obj = SubString_new_object(&first);  +    if (first_obj == NULL)  +        goto done;  +  +    /* return a tuple of values */  +    result = PyTuple_Pack(2, first_obj, it);  +  +done:  +    Py_XDECREF(it);  +    Py_XDECREF(first_obj);  +    return result;  +}  diff --git a/contrib/tools/python3/src/Objects/stringlib/unicodedefs.h b/contrib/tools/python3/src/Objects/stringlib/unicodedefs.h index 3db5629e11f..f9ab32796e6 100644 --- a/contrib/tools/python3/src/Objects/stringlib/unicodedefs.h +++ b/contrib/tools/python3/src/Objects/stringlib/unicodedefs.h @@ -1,32 +1,32 @@ -#ifndef STRINGLIB_UNICODEDEFS_H -#define STRINGLIB_UNICODEDEFS_H - -/* this is sort of a hack.  there's at least one place (formatting -   floats) where some stringlib code takes a different path if it's -   compiled as unicode. */ -#define STRINGLIB_IS_UNICODE     1 - -#define FASTSEARCH               fastsearch -#define STRINGLIB(F)             stringlib_##F -#define STRINGLIB_OBJECT         PyUnicodeObject -#define STRINGLIB_SIZEOF_CHAR    Py_UNICODE_SIZE -#define STRINGLIB_CHAR           Py_UNICODE -#define STRINGLIB_TYPE_NAME      "unicode" -#define STRINGLIB_PARSE_CODE     "U" -#define STRINGLIB_EMPTY          unicode_empty -#define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE -#define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK -#define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL -#define STRINGLIB_TODECIMAL      Py_UNICODE_TODECIMAL -#define STRINGLIB_STR            PyUnicode_AS_UNICODE -#define STRINGLIB_LEN            PyUnicode_GET_SIZE -#define STRINGLIB_NEW            PyUnicode_FromUnicode -#define STRINGLIB_CHECK          PyUnicode_Check -#define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact - -#define STRINGLIB_TOSTR          PyObject_Str -#define STRINGLIB_TOASCII        PyObject_ASCII - -#define STRINGLIB_WANT_CONTAINS_OBJ 1 - -#endif /* !STRINGLIB_UNICODEDEFS_H */ +#ifndef STRINGLIB_UNICODEDEFS_H  +#define STRINGLIB_UNICODEDEFS_H  +  +/* this is sort of a hack.  there's at least one place (formatting  +   floats) where some stringlib code takes a different path if it's  +   compiled as unicode. */  +#define STRINGLIB_IS_UNICODE     1  +  +#define FASTSEARCH               fastsearch  +#define STRINGLIB(F)             stringlib_##F  +#define STRINGLIB_OBJECT         PyUnicodeObject  +#define STRINGLIB_SIZEOF_CHAR    Py_UNICODE_SIZE  +#define STRINGLIB_CHAR           Py_UNICODE  +#define STRINGLIB_TYPE_NAME      "unicode"  +#define STRINGLIB_PARSE_CODE     "U"  +#define STRINGLIB_EMPTY          unicode_empty  +#define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE  +#define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK  +#define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL  +#define STRINGLIB_TODECIMAL      Py_UNICODE_TODECIMAL  +#define STRINGLIB_STR            PyUnicode_AS_UNICODE  +#define STRINGLIB_LEN            PyUnicode_GET_SIZE  +#define STRINGLIB_NEW            PyUnicode_FromUnicode  +#define STRINGLIB_CHECK          PyUnicode_Check  +#define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact  +  +#define STRINGLIB_TOSTR          PyObject_Str  +#define STRINGLIB_TOASCII        PyObject_ASCII  +  +#define STRINGLIB_WANT_CONTAINS_OBJ 1  +  +#endif /* !STRINGLIB_UNICODEDEFS_H */  | 
