diff options
author | shadchin <shadchin@yandex-team.com> | 2024-02-12 07:53:52 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@ydb.tech> | 2024-02-14 14:26:16 +0000 |
commit | 31f2a419764a8ba77c2a970cfc80056c6cd06756 (patch) | |
tree | c1995d239eba8571cefc640f6648e1d5dd4ce9e2 /contrib/tools/python3/src/Objects/stringlib | |
parent | fe2ef02b38d9c85d80060963b265a1df9f38c3bb (diff) | |
download | ydb-31f2a419764a8ba77c2a970cfc80056c6cd06756.tar.gz |
Update Python from 3.11.8 to 3.12.2
Diffstat (limited to 'contrib/tools/python3/src/Objects/stringlib')
16 files changed, 86 insertions, 76 deletions
diff --git a/contrib/tools/python3/src/Objects/stringlib/asciilib.h b/contrib/tools/python3/src/Objects/stringlib/asciilib.h index eebe888e41..b3016bfbbb 100644 --- a/contrib/tools/python3/src/Objects/stringlib/asciilib.h +++ b/contrib/tools/python3/src/Objects/stringlib/asciilib.h @@ -21,6 +21,7 @@ #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact #define STRINGLIB_MUTABLE 0 +#define STRINGLIB_FAST_MEMCHR memchr #define STRINGLIB_TOSTR PyObject_Str #define STRINGLIB_TOASCII PyObject_ASCII diff --git a/contrib/tools/python3/src/Objects/stringlib/clinic/transmogrify.h.h b/contrib/tools/python3/src/Objects/stringlib/clinic/transmogrify.h.h index b88517bd36..49388cf043 100644 --- a/contrib/tools/python3/src/Objects/stringlib/clinic/transmogrify.h.h +++ b/contrib/tools/python3/src/Objects/stringlib/clinic/transmogrify.h.h @@ -2,6 +2,12 @@ preserve [clinic start generated code]*/ +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif + + PyDoc_STRVAR(stringlib_expandtabs__doc__, "expandtabs($self, /, tabsize=8)\n" "--\n" @@ -20,8 +26,31 @@ static PyObject * stringlib_expandtabs(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(tabsize), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + static const char * const _keywords[] = {"tabsize", NULL}; - static _PyArg_Parser _parser = {NULL, _keywords, "expandtabs", 0}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "expandtabs", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE PyObject *argsbuf[1]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; int tabsize = 8; @@ -249,4 +278,4 @@ stringlib_zfill(PyObject *self, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=46d058103bffedf7 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=d44a269805f6739e input=a9049054013a1b77]*/ diff --git a/contrib/tools/python3/src/Objects/stringlib/count.h b/contrib/tools/python3/src/Objects/stringlib/count.h index f48500bf56..e20edcd104 100644 --- a/contrib/tools/python3/src/Objects/stringlib/count.h +++ b/contrib/tools/python3/src/Objects/stringlib/count.h @@ -4,6 +4,11 @@ #error must include "stringlib/fastsearch.h" before including this module #endif +// gh-97982: Implementing asciilib_count() is not worth it, FASTSEARCH() does +// not specialize the code for ASCII strings. Use ucs1lib_count() for ASCII and +// UCS1 strings: it's the same than asciilib_count(). +#if !STRINGLIB_IS_UNICODE || STRINGLIB_MAX_CHAR > 0x7Fu + Py_LOCAL_INLINE(Py_ssize_t) STRINGLIB(count)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, const STRINGLIB_CHAR* sub, Py_ssize_t sub_len, @@ -24,4 +29,4 @@ STRINGLIB(count)(const STRINGLIB_CHAR* str, Py_ssize_t str_len, return count; } - +#endif diff --git a/contrib/tools/python3/src/Objects/stringlib/eq.h b/contrib/tools/python3/src/Objects/stringlib/eq.h index 9c1058b86c..2eac4baf5c 100644 --- a/contrib/tools/python3/src/Objects/stringlib/eq.h +++ b/contrib/tools/python3/src/Objects/stringlib/eq.h @@ -4,16 +4,8 @@ * unicode_eq() is called when the hash of two unicode objects is equal. */ Py_LOCAL_INLINE(int) -unicode_eq(PyObject *aa, PyObject *bb) +unicode_eq(PyObject *a, PyObject *b) { - assert(PyUnicode_Check(aa)); - assert(PyUnicode_Check(bb)); - assert(PyUnicode_IS_READY(aa)); - assert(PyUnicode_IS_READY(bb)); - - PyUnicodeObject *a = (PyUnicodeObject *)aa; - PyUnicodeObject *b = (PyUnicodeObject *)bb; - if (PyUnicode_GET_LENGTH(a) != PyUnicode_GET_LENGTH(b)) return 0; if (PyUnicode_GET_LENGTH(a) == 0) diff --git a/contrib/tools/python3/src/Objects/stringlib/fastsearch.h b/contrib/tools/python3/src/Objects/stringlib/fastsearch.h index 7403d8a3f7..257b7bd678 100644 --- a/contrib/tools/python3/src/Objects/stringlib/fastsearch.h +++ b/contrib/tools/python3/src/Objects/stringlib/fastsearch.h @@ -18,7 +18,8 @@ algorithm, which has worst-case O(n) runtime and best-case O(n/k). Also compute a table of shifts to achieve O(n/k) in more cases, and often (data dependent) deduce larger shifts than pure C&P can - deduce. */ + deduce. See stringlib_find_two_way_notes.txt in this folder for a + detailed explanation. */ #define FAST_COUNT 0 #define FAST_SEARCH 1 @@ -39,7 +40,7 @@ #define STRINGLIB_BLOOM(mask, ch) \ ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) -#if STRINGLIB_SIZEOF_CHAR == 1 +#ifdef STRINGLIB_FAST_MEMCHR # define MEMCHR_CUT_OFF 15 #else # define MEMCHR_CUT_OFF 40 @@ -53,8 +54,8 @@ STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) p = s; e = s + n; if (n > MEMCHR_CUT_OFF) { -#if STRINGLIB_SIZEOF_CHAR == 1 - p = memchr(s, ch, n); +#ifdef STRINGLIB_FAST_MEMCHR + p = STRINGLIB_FAST_MEMCHR(s, ch, n); if (p != NULL) return (p - s); return -1; @@ -102,16 +103,26 @@ STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) return -1; } +#undef MEMCHR_CUT_OFF + +#if STRINGLIB_SIZEOF_CHAR == 1 +# define MEMRCHR_CUT_OFF 15 +#else +# define MEMRCHR_CUT_OFF 40 +#endif + + Py_LOCAL_INLINE(Py_ssize_t) STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) { const STRINGLIB_CHAR *p; #ifdef HAVE_MEMRCHR - /* memrchr() is a GNU extension, available since glibc 2.1.91. - it doesn't seem as optimized as memchr(), but is still quite - faster than our hand-written loop below */ + /* memrchr() is a GNU extension, available since glibc 2.1.91. it + doesn't seem as optimized as memchr(), but is still quite + faster than our hand-written loop below. There is no wmemrchr + for 4-byte chars. */ - if (n > MEMCHR_CUT_OFF) { + if (n > MEMRCHR_CUT_OFF) { #if STRINGLIB_SIZEOF_CHAR == 1 p = memrchr(s, ch, n); if (p != NULL) @@ -139,11 +150,11 @@ STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) if (*p == ch) return n; /* False positive */ - if (n1 - n > MEMCHR_CUT_OFF) + if (n1 - n > MEMRCHR_CUT_OFF) continue; - if (n <= MEMCHR_CUT_OFF) + if (n <= MEMRCHR_CUT_OFF) break; - s1 = p - MEMCHR_CUT_OFF; + s1 = p - MEMRCHR_CUT_OFF; while (p > s1) { p--; if (*p == ch) @@ -151,7 +162,7 @@ STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) } n = p - s; } - while (n > MEMCHR_CUT_OFF); + while (n > MEMRCHR_CUT_OFF); } #endif } @@ -165,7 +176,7 @@ STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) return -1; } -#undef MEMCHR_CUT_OFF +#undef MEMRCHR_CUT_OFF /* Change to a 1 to see logging comments walk through the algorithm. */ #if 0 && STRINGLIB_SIZEOF_CHAR == 1 @@ -388,7 +399,7 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack, if (window_last >= haystack_end) { return -1; } - LOG("Horspool skip"); + LOG("Horspool skip\n"); } no_shift: window = window_last - len_needle + 1; @@ -447,7 +458,7 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack, if (window_last >= haystack_end) { return -1; } - LOG("Horspool skip"); + LOG("Horspool skip\n"); } window = window_last - len_needle + 1; assert((window[len_needle - 1] & TABLE_MASK) == diff --git a/contrib/tools/python3/src/Objects/stringlib/join.h b/contrib/tools/python3/src/Objects/stringlib/join.h index bb011f7db7..de6bd83ffe 100644 --- a/contrib/tools/python3/src/Objects/stringlib/join.h +++ b/contrib/tools/python3/src/Objects/stringlib/join.h @@ -63,8 +63,7 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) item = PySequence_Fast_GET_ITEM(seq, i); if (PyBytes_CheckExact(item)) { /* Fast path. */ - Py_INCREF(item); - buffers[i].obj = item; + buffers[i].obj = Py_NewRef(item); buffers[i].buf = PyBytes_AS_STRING(item); buffers[i].len = PyBytes_GET_SIZE(item); } diff --git a/contrib/tools/python3/src/Objects/stringlib/localeutil.h b/contrib/tools/python3/src/Objects/stringlib/localeutil.h index bd16e0a172..d77715ec0d 100644 --- a/contrib/tools/python3/src/Objects/stringlib/localeutil.h +++ b/contrib/tools/python3/src/Objects/stringlib/localeutil.h @@ -75,7 +75,7 @@ InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos, if (n_zeros) { *buffer_pos -= n_zeros; - enum PyUnicode_Kind kind = PyUnicode_KIND(writer->buffer); + int kind = PyUnicode_KIND(writer->buffer); void *data = PyUnicode_DATA(writer->buffer); unicode_fill(kind, data, '0', *buffer_pos, n_zeros); } diff --git a/contrib/tools/python3/src/Objects/stringlib/replace.h b/contrib/tools/python3/src/Objects/stringlib/replace.h index ef318ed6dd..123c9f850f 100644 --- a/contrib/tools/python3/src/Objects/stringlib/replace.h +++ b/contrib/tools/python3/src/Objects/stringlib/replace.h @@ -29,9 +29,9 @@ STRINGLIB(replace_1char_inplace)(STRINGLIB_CHAR* s, STRINGLIB_CHAR* end, if (!--attempts) { /* if u1 was not found for attempts iterations, use FASTSEARCH() or memchr() */ -#if STRINGLIB_SIZEOF_CHAR == 1 +#ifdef STRINGLIB_FAST_MEMCHR s++; - s = memchr(s, u1, end - s); + s = STRINGLIB_FAST_MEMCHR(s, u1, end - s); if (s == NULL) return; #else diff --git a/contrib/tools/python3/src/Objects/stringlib/stringdefs.h b/contrib/tools/python3/src/Objects/stringlib/stringdefs.h index 88641b25d4..484b98b729 100644 --- a/contrib/tools/python3/src/Objects/stringlib/stringdefs.h +++ b/contrib/tools/python3/src/Objects/stringlib/stringdefs.h @@ -24,4 +24,5 @@ #define STRINGLIB_CHECK_EXACT PyBytes_CheckExact #define STRINGLIB_TOSTR PyObject_Str #define STRINGLIB_TOASCII PyObject_Repr +#define STRINGLIB_FAST_MEMCHR memchr #endif /* !STRINGLIB_STRINGDEFS_H */ diff --git a/contrib/tools/python3/src/Objects/stringlib/transmogrify.h b/contrib/tools/python3/src/Objects/stringlib/transmogrify.h index e1165ea38e..71099bb586 100644 --- a/contrib/tools/python3/src/Objects/stringlib/transmogrify.h +++ b/contrib/tools/python3/src/Objects/stringlib/transmogrify.h @@ -17,8 +17,7 @@ return_self(PyObject *self) { #if !STRINGLIB_MUTABLE if (STRINGLIB_CHECK_EXACT(self)) { - Py_INCREF(self); - return self; + return Py_NewRef(self); } #endif return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); diff --git a/contrib/tools/python3/src/Objects/stringlib/ucs1lib.h b/contrib/tools/python3/src/Objects/stringlib/ucs1lib.h index 026ab11f1f..1b9b65ecba 100644 --- a/contrib/tools/python3/src/Objects/stringlib/ucs1lib.h +++ b/contrib/tools/python3/src/Objects/stringlib/ucs1lib.h @@ -20,6 +20,7 @@ #define STRINGLIB_NEW _PyUnicode_FromUCS1 #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact +#define STRINGLIB_FAST_MEMCHR memchr #define STRINGLIB_MUTABLE 0 #define STRINGLIB_TOSTR PyObject_Str diff --git a/contrib/tools/python3/src/Objects/stringlib/ucs2lib.h b/contrib/tools/python3/src/Objects/stringlib/ucs2lib.h index 75f11bc290..4b49bbb31d 100644 --- a/contrib/tools/python3/src/Objects/stringlib/ucs2lib.h +++ b/contrib/tools/python3/src/Objects/stringlib/ucs2lib.h @@ -21,6 +21,10 @@ #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact #define STRINGLIB_MUTABLE 0 +#if SIZEOF_WCHAR_T == 2 +#define STRINGLIB_FAST_MEMCHR(s, c, n) \ + (Py_UCS2 *)wmemchr((const wchar_t *)(s), c, n) +#endif #define STRINGLIB_TOSTR PyObject_Str #define STRINGLIB_TOASCII PyObject_ASCII diff --git a/contrib/tools/python3/src/Objects/stringlib/ucs4lib.h b/contrib/tools/python3/src/Objects/stringlib/ucs4lib.h index 57344f235b..def4ca5d17 100644 --- a/contrib/tools/python3/src/Objects/stringlib/ucs4lib.h +++ b/contrib/tools/python3/src/Objects/stringlib/ucs4lib.h @@ -21,6 +21,10 @@ #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact #define STRINGLIB_MUTABLE 0 +#if SIZEOF_WCHAR_T == 4 +#define STRINGLIB_FAST_MEMCHR(s, c, n) \ + (Py_UCS4 *)wmemchr((const wchar_t *)(s), c, n) +#endif #define STRINGLIB_TOSTR PyObject_Str #define STRINGLIB_TOASCII PyObject_ASCII diff --git a/contrib/tools/python3/src/Objects/stringlib/undef.h b/contrib/tools/python3/src/Objects/stringlib/undef.h index bf32298505..cc873a2ec4 100644 --- a/contrib/tools/python3/src/Objects/stringlib/undef.h +++ b/contrib/tools/python3/src/Objects/stringlib/undef.h @@ -8,3 +8,4 @@ #undef STRINGLIB_NEW #undef STRINGLIB_IS_UNICODE #undef STRINGLIB_MUTABLE +#undef STRINGLIB_FAST_MEMCHR diff --git a/contrib/tools/python3/src/Objects/stringlib/unicode_format.h b/contrib/tools/python3/src/Objects/stringlib/unicode_format.h index a4eea7b919..ccd7c77c0a 100644 --- a/contrib/tools/python3/src/Objects/stringlib/unicode_format.h +++ b/contrib/tools/python3/src/Objects/stringlib/unicode_format.h @@ -473,8 +473,7 @@ get_field_object(SubString *input, PyObject *args, PyObject *kwargs, goto error; /* assign to obj */ - Py_DECREF(obj); - obj = tmp; + Py_SETREF(obj, tmp); } /* end of iterator, this is the non-error case */ if (ok == 1) @@ -825,8 +824,7 @@ output_markup(SubString *field_name, SubString *format_spec, goto done; /* do the assignment, transferring ownership: fieldobj = tmp */ - Py_DECREF(fieldobj); - fieldobj = tmp; + Py_SETREF(fieldobj, tmp); tmp = NULL; } @@ -1042,8 +1040,7 @@ formatteriter_next(formatteriterobject *it) otherwise create a one length string with the conversion character */ if (conversion == '\0') { - conversion_str = Py_None; - Py_INCREF(conversion_str); + conversion_str = Py_NewRef(Py_None); } else conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, @@ -1121,8 +1118,7 @@ formatter_parser(PyObject *ignored, PyObject *self) return NULL; /* take ownership, give the object to the iterator */ - Py_INCREF(self); - it->str = self; + it->str = Py_NewRef(self); /* initialize the contained MarkupIterator */ MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self)); @@ -1265,8 +1261,7 @@ formatter_field_name_split(PyObject *ignored, PyObject *self) /* take ownership, give the object to the iterator. this is just to keep the field_name alive */ - Py_INCREF(self); - it->str = self; + it->str = Py_NewRef(self); /* Pass in auto_number = NULL. We'll return an empty string for first_obj in that case. */ diff --git a/contrib/tools/python3/src/Objects/stringlib/unicodedefs.h b/contrib/tools/python3/src/Objects/stringlib/unicodedefs.h deleted file mode 100644 index ba2ce0aeea..0000000000 --- a/contrib/tools/python3/src/Objects/stringlib/unicodedefs.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef STRINGLIB_UNICODEDEFS_H -#define STRINGLIB_UNICODEDEFS_H - -/* this is sort of a hack. there's at least one place (formatting - floats) where some stringlib code takes a different path if it's - compiled as unicode. */ -#define STRINGLIB_IS_UNICODE 1 - -#define FASTSEARCH fastsearch -#define STRINGLIB(F) stringlib_##F -#define STRINGLIB_OBJECT PyUnicodeObject -#define STRINGLIB_SIZEOF_CHAR Py_UNICODE_SIZE -#define STRINGLIB_CHAR Py_UNICODE -#define STRINGLIB_TYPE_NAME "unicode" -#define STRINGLIB_PARSE_CODE "U" -#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE -#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK -#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL -#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL -#define STRINGLIB_STR PyUnicode_AS_UNICODE -#define STRINGLIB_LEN PyUnicode_GET_SIZE -#define STRINGLIB_NEW PyUnicode_FromUnicode -#define STRINGLIB_CHECK PyUnicode_Check -#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact -#define STRINGLIB_MUTABLE 0 - -#define STRINGLIB_TOSTR PyObject_Str -#define STRINGLIB_TOASCII PyObject_ASCII - -#define STRINGLIB_WANT_CONTAINS_OBJ 1 - -#endif /* !STRINGLIB_UNICODEDEFS_H */ |