aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/tools/python3/src/Objects/stringlib
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.com>2024-02-12 07:53:52 +0300
committerDaniil Cherednik <dcherednik@ydb.tech>2024-02-14 14:26:16 +0000
commit31f2a419764a8ba77c2a970cfc80056c6cd06756 (patch)
treec1995d239eba8571cefc640f6648e1d5dd4ce9e2 /contrib/tools/python3/src/Objects/stringlib
parentfe2ef02b38d9c85d80060963b265a1df9f38c3bb (diff)
downloadydb-31f2a419764a8ba77c2a970cfc80056c6cd06756.tar.gz
Update Python from 3.11.8 to 3.12.2
Diffstat (limited to 'contrib/tools/python3/src/Objects/stringlib')
-rw-r--r--contrib/tools/python3/src/Objects/stringlib/asciilib.h1
-rw-r--r--contrib/tools/python3/src/Objects/stringlib/clinic/transmogrify.h.h33
-rw-r--r--contrib/tools/python3/src/Objects/stringlib/count.h7
-rw-r--r--contrib/tools/python3/src/Objects/stringlib/eq.h10
-rw-r--r--contrib/tools/python3/src/Objects/stringlib/fastsearch.h41
-rw-r--r--contrib/tools/python3/src/Objects/stringlib/join.h3
-rw-r--r--contrib/tools/python3/src/Objects/stringlib/localeutil.h2
-rw-r--r--contrib/tools/python3/src/Objects/stringlib/replace.h4
-rw-r--r--contrib/tools/python3/src/Objects/stringlib/stringdefs.h1
-rw-r--r--contrib/tools/python3/src/Objects/stringlib/transmogrify.h3
-rw-r--r--contrib/tools/python3/src/Objects/stringlib/ucs1lib.h1
-rw-r--r--contrib/tools/python3/src/Objects/stringlib/ucs2lib.h4
-rw-r--r--contrib/tools/python3/src/Objects/stringlib/ucs4lib.h4
-rw-r--r--contrib/tools/python3/src/Objects/stringlib/undef.h1
-rw-r--r--contrib/tools/python3/src/Objects/stringlib/unicode_format.h15
-rw-r--r--contrib/tools/python3/src/Objects/stringlib/unicodedefs.h32
16 files changed, 86 insertions, 76 deletions
diff --git a/contrib/tools/python3/src/Objects/stringlib/asciilib.h b/contrib/tools/python3/src/Objects/stringlib/asciilib.h
index eebe888e41..b3016bfbbb 100644
--- a/contrib/tools/python3/src/Objects/stringlib/asciilib.h
+++ b/contrib/tools/python3/src/Objects/stringlib/asciilib.h
@@ -21,6 +21,7 @@
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_MUTABLE 0
+#define STRINGLIB_FAST_MEMCHR memchr
#define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_TOASCII PyObject_ASCII
diff --git a/contrib/tools/python3/src/Objects/stringlib/clinic/transmogrify.h.h b/contrib/tools/python3/src/Objects/stringlib/clinic/transmogrify.h.h
index b88517bd36..49388cf043 100644
--- a/contrib/tools/python3/src/Objects/stringlib/clinic/transmogrify.h.h
+++ b/contrib/tools/python3/src/Objects/stringlib/clinic/transmogrify.h.h
@@ -2,6 +2,12 @@
preserve
[clinic start generated code]*/
+#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+# include "pycore_gc.h" // PyGC_Head
+# include "pycore_runtime.h" // _Py_ID()
+#endif
+
+
PyDoc_STRVAR(stringlib_expandtabs__doc__,
"expandtabs($self, /, tabsize=8)\n"
"--\n"
@@ -20,8 +26,31 @@ static PyObject *
stringlib_expandtabs(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 1
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_item = { &_Py_ID(tabsize), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
static const char * const _keywords[] = {"tabsize", NULL};
- static _PyArg_Parser _parser = {NULL, _keywords, "expandtabs", 0};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "expandtabs",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
PyObject *argsbuf[1];
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0;
int tabsize = 8;
@@ -249,4 +278,4 @@ stringlib_zfill(PyObject *self, PyObject *arg)
exit:
return return_value;
}
-/*[clinic end generated code: output=46d058103bffedf7 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=d44a269805f6739e input=a9049054013a1b77]*/
diff --git a/contrib/tools/python3/src/Objects/stringlib/count.h b/contrib/tools/python3/src/Objects/stringlib/count.h
index f48500bf56..e20edcd104 100644
--- a/contrib/tools/python3/src/Objects/stringlib/count.h
+++ b/contrib/tools/python3/src/Objects/stringlib/count.h
@@ -4,6 +4,11 @@
#error must include "stringlib/fastsearch.h" before including this module
#endif
+// gh-97982: Implementing asciilib_count() is not worth it, FASTSEARCH() does
+// not specialize the code for ASCII strings. Use ucs1lib_count() for ASCII and
+// UCS1 strings: it's the same than asciilib_count().
+#if !STRINGLIB_IS_UNICODE || STRINGLIB_MAX_CHAR > 0x7Fu
+
Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(count)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
@@ -24,4 +29,4 @@ STRINGLIB(count)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
return count;
}
-
+#endif
diff --git a/contrib/tools/python3/src/Objects/stringlib/eq.h b/contrib/tools/python3/src/Objects/stringlib/eq.h
index 9c1058b86c..2eac4baf5c 100644
--- a/contrib/tools/python3/src/Objects/stringlib/eq.h
+++ b/contrib/tools/python3/src/Objects/stringlib/eq.h
@@ -4,16 +4,8 @@
* unicode_eq() is called when the hash of two unicode objects is equal.
*/
Py_LOCAL_INLINE(int)
-unicode_eq(PyObject *aa, PyObject *bb)
+unicode_eq(PyObject *a, PyObject *b)
{
- assert(PyUnicode_Check(aa));
- assert(PyUnicode_Check(bb));
- assert(PyUnicode_IS_READY(aa));
- assert(PyUnicode_IS_READY(bb));
-
- PyUnicodeObject *a = (PyUnicodeObject *)aa;
- PyUnicodeObject *b = (PyUnicodeObject *)bb;
-
if (PyUnicode_GET_LENGTH(a) != PyUnicode_GET_LENGTH(b))
return 0;
if (PyUnicode_GET_LENGTH(a) == 0)
diff --git a/contrib/tools/python3/src/Objects/stringlib/fastsearch.h b/contrib/tools/python3/src/Objects/stringlib/fastsearch.h
index 7403d8a3f7..257b7bd678 100644
--- a/contrib/tools/python3/src/Objects/stringlib/fastsearch.h
+++ b/contrib/tools/python3/src/Objects/stringlib/fastsearch.h
@@ -18,7 +18,8 @@
algorithm, which has worst-case O(n) runtime and best-case O(n/k).
Also compute a table of shifts to achieve O(n/k) in more cases,
and often (data dependent) deduce larger shifts than pure C&P can
- deduce. */
+ deduce. See stringlib_find_two_way_notes.txt in this folder for a
+ detailed explanation. */
#define FAST_COUNT 0
#define FAST_SEARCH 1
@@ -39,7 +40,7 @@
#define STRINGLIB_BLOOM(mask, ch) \
((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
-#if STRINGLIB_SIZEOF_CHAR == 1
+#ifdef STRINGLIB_FAST_MEMCHR
# define MEMCHR_CUT_OFF 15
#else
# define MEMCHR_CUT_OFF 40
@@ -53,8 +54,8 @@ STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
p = s;
e = s + n;
if (n > MEMCHR_CUT_OFF) {
-#if STRINGLIB_SIZEOF_CHAR == 1
- p = memchr(s, ch, n);
+#ifdef STRINGLIB_FAST_MEMCHR
+ p = STRINGLIB_FAST_MEMCHR(s, ch, n);
if (p != NULL)
return (p - s);
return -1;
@@ -102,16 +103,26 @@ STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
return -1;
}
+#undef MEMCHR_CUT_OFF
+
+#if STRINGLIB_SIZEOF_CHAR == 1
+# define MEMRCHR_CUT_OFF 15
+#else
+# define MEMRCHR_CUT_OFF 40
+#endif
+
+
Py_LOCAL_INLINE(Py_ssize_t)
STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
{
const STRINGLIB_CHAR *p;
#ifdef HAVE_MEMRCHR
- /* memrchr() is a GNU extension, available since glibc 2.1.91.
- it doesn't seem as optimized as memchr(), but is still quite
- faster than our hand-written loop below */
+ /* memrchr() is a GNU extension, available since glibc 2.1.91. it
+ doesn't seem as optimized as memchr(), but is still quite
+ faster than our hand-written loop below. There is no wmemrchr
+ for 4-byte chars. */
- if (n > MEMCHR_CUT_OFF) {
+ if (n > MEMRCHR_CUT_OFF) {
#if STRINGLIB_SIZEOF_CHAR == 1
p = memrchr(s, ch, n);
if (p != NULL)
@@ -139,11 +150,11 @@ STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
if (*p == ch)
return n;
/* False positive */
- if (n1 - n > MEMCHR_CUT_OFF)
+ if (n1 - n > MEMRCHR_CUT_OFF)
continue;
- if (n <= MEMCHR_CUT_OFF)
+ if (n <= MEMRCHR_CUT_OFF)
break;
- s1 = p - MEMCHR_CUT_OFF;
+ s1 = p - MEMRCHR_CUT_OFF;
while (p > s1) {
p--;
if (*p == ch)
@@ -151,7 +162,7 @@ STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
}
n = p - s;
}
- while (n > MEMCHR_CUT_OFF);
+ while (n > MEMRCHR_CUT_OFF);
}
#endif
}
@@ -165,7 +176,7 @@ STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
return -1;
}
-#undef MEMCHR_CUT_OFF
+#undef MEMRCHR_CUT_OFF
/* Change to a 1 to see logging comments walk through the algorithm. */
#if 0 && STRINGLIB_SIZEOF_CHAR == 1
@@ -388,7 +399,7 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
if (window_last >= haystack_end) {
return -1;
}
- LOG("Horspool skip");
+ LOG("Horspool skip\n");
}
no_shift:
window = window_last - len_needle + 1;
@@ -447,7 +458,7 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
if (window_last >= haystack_end) {
return -1;
}
- LOG("Horspool skip");
+ LOG("Horspool skip\n");
}
window = window_last - len_needle + 1;
assert((window[len_needle - 1] & TABLE_MASK) ==
diff --git a/contrib/tools/python3/src/Objects/stringlib/join.h b/contrib/tools/python3/src/Objects/stringlib/join.h
index bb011f7db7..de6bd83ffe 100644
--- a/contrib/tools/python3/src/Objects/stringlib/join.h
+++ b/contrib/tools/python3/src/Objects/stringlib/join.h
@@ -63,8 +63,7 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
item = PySequence_Fast_GET_ITEM(seq, i);
if (PyBytes_CheckExact(item)) {
/* Fast path. */
- Py_INCREF(item);
- buffers[i].obj = item;
+ buffers[i].obj = Py_NewRef(item);
buffers[i].buf = PyBytes_AS_STRING(item);
buffers[i].len = PyBytes_GET_SIZE(item);
}
diff --git a/contrib/tools/python3/src/Objects/stringlib/localeutil.h b/contrib/tools/python3/src/Objects/stringlib/localeutil.h
index bd16e0a172..d77715ec0d 100644
--- a/contrib/tools/python3/src/Objects/stringlib/localeutil.h
+++ b/contrib/tools/python3/src/Objects/stringlib/localeutil.h
@@ -75,7 +75,7 @@ InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos,
if (n_zeros) {
*buffer_pos -= n_zeros;
- enum PyUnicode_Kind kind = PyUnicode_KIND(writer->buffer);
+ int kind = PyUnicode_KIND(writer->buffer);
void *data = PyUnicode_DATA(writer->buffer);
unicode_fill(kind, data, '0', *buffer_pos, n_zeros);
}
diff --git a/contrib/tools/python3/src/Objects/stringlib/replace.h b/contrib/tools/python3/src/Objects/stringlib/replace.h
index ef318ed6dd..123c9f850f 100644
--- a/contrib/tools/python3/src/Objects/stringlib/replace.h
+++ b/contrib/tools/python3/src/Objects/stringlib/replace.h
@@ -29,9 +29,9 @@ STRINGLIB(replace_1char_inplace)(STRINGLIB_CHAR* s, STRINGLIB_CHAR* end,
if (!--attempts) {
/* if u1 was not found for attempts iterations,
use FASTSEARCH() or memchr() */
-#if STRINGLIB_SIZEOF_CHAR == 1
+#ifdef STRINGLIB_FAST_MEMCHR
s++;
- s = memchr(s, u1, end - s);
+ s = STRINGLIB_FAST_MEMCHR(s, u1, end - s);
if (s == NULL)
return;
#else
diff --git a/contrib/tools/python3/src/Objects/stringlib/stringdefs.h b/contrib/tools/python3/src/Objects/stringlib/stringdefs.h
index 88641b25d4..484b98b729 100644
--- a/contrib/tools/python3/src/Objects/stringlib/stringdefs.h
+++ b/contrib/tools/python3/src/Objects/stringlib/stringdefs.h
@@ -24,4 +24,5 @@
#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
#define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_TOASCII PyObject_Repr
+#define STRINGLIB_FAST_MEMCHR memchr
#endif /* !STRINGLIB_STRINGDEFS_H */
diff --git a/contrib/tools/python3/src/Objects/stringlib/transmogrify.h b/contrib/tools/python3/src/Objects/stringlib/transmogrify.h
index e1165ea38e..71099bb586 100644
--- a/contrib/tools/python3/src/Objects/stringlib/transmogrify.h
+++ b/contrib/tools/python3/src/Objects/stringlib/transmogrify.h
@@ -17,8 +17,7 @@ return_self(PyObject *self)
{
#if !STRINGLIB_MUTABLE
if (STRINGLIB_CHECK_EXACT(self)) {
- Py_INCREF(self);
- return self;
+ return Py_NewRef(self);
}
#endif
return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
diff --git a/contrib/tools/python3/src/Objects/stringlib/ucs1lib.h b/contrib/tools/python3/src/Objects/stringlib/ucs1lib.h
index 026ab11f1f..1b9b65ecba 100644
--- a/contrib/tools/python3/src/Objects/stringlib/ucs1lib.h
+++ b/contrib/tools/python3/src/Objects/stringlib/ucs1lib.h
@@ -20,6 +20,7 @@
#define STRINGLIB_NEW _PyUnicode_FromUCS1
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
+#define STRINGLIB_FAST_MEMCHR memchr
#define STRINGLIB_MUTABLE 0
#define STRINGLIB_TOSTR PyObject_Str
diff --git a/contrib/tools/python3/src/Objects/stringlib/ucs2lib.h b/contrib/tools/python3/src/Objects/stringlib/ucs2lib.h
index 75f11bc290..4b49bbb31d 100644
--- a/contrib/tools/python3/src/Objects/stringlib/ucs2lib.h
+++ b/contrib/tools/python3/src/Objects/stringlib/ucs2lib.h
@@ -21,6 +21,10 @@
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_MUTABLE 0
+#if SIZEOF_WCHAR_T == 2
+#define STRINGLIB_FAST_MEMCHR(s, c, n) \
+ (Py_UCS2 *)wmemchr((const wchar_t *)(s), c, n)
+#endif
#define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_TOASCII PyObject_ASCII
diff --git a/contrib/tools/python3/src/Objects/stringlib/ucs4lib.h b/contrib/tools/python3/src/Objects/stringlib/ucs4lib.h
index 57344f235b..def4ca5d17 100644
--- a/contrib/tools/python3/src/Objects/stringlib/ucs4lib.h
+++ b/contrib/tools/python3/src/Objects/stringlib/ucs4lib.h
@@ -21,6 +21,10 @@
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
#define STRINGLIB_MUTABLE 0
+#if SIZEOF_WCHAR_T == 4
+#define STRINGLIB_FAST_MEMCHR(s, c, n) \
+ (Py_UCS4 *)wmemchr((const wchar_t *)(s), c, n)
+#endif
#define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_TOASCII PyObject_ASCII
diff --git a/contrib/tools/python3/src/Objects/stringlib/undef.h b/contrib/tools/python3/src/Objects/stringlib/undef.h
index bf32298505..cc873a2ec4 100644
--- a/contrib/tools/python3/src/Objects/stringlib/undef.h
+++ b/contrib/tools/python3/src/Objects/stringlib/undef.h
@@ -8,3 +8,4 @@
#undef STRINGLIB_NEW
#undef STRINGLIB_IS_UNICODE
#undef STRINGLIB_MUTABLE
+#undef STRINGLIB_FAST_MEMCHR
diff --git a/contrib/tools/python3/src/Objects/stringlib/unicode_format.h b/contrib/tools/python3/src/Objects/stringlib/unicode_format.h
index a4eea7b919..ccd7c77c0a 100644
--- a/contrib/tools/python3/src/Objects/stringlib/unicode_format.h
+++ b/contrib/tools/python3/src/Objects/stringlib/unicode_format.h
@@ -473,8 +473,7 @@ get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
goto error;
/* assign to obj */
- Py_DECREF(obj);
- obj = tmp;
+ Py_SETREF(obj, tmp);
}
/* end of iterator, this is the non-error case */
if (ok == 1)
@@ -825,8 +824,7 @@ output_markup(SubString *field_name, SubString *format_spec,
goto done;
/* do the assignment, transferring ownership: fieldobj = tmp */
- Py_DECREF(fieldobj);
- fieldobj = tmp;
+ Py_SETREF(fieldobj, tmp);
tmp = NULL;
}
@@ -1042,8 +1040,7 @@ formatteriter_next(formatteriterobject *it)
otherwise create a one length string with the conversion
character */
if (conversion == '\0') {
- conversion_str = Py_None;
- Py_INCREF(conversion_str);
+ conversion_str = Py_NewRef(Py_None);
}
else
conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
@@ -1121,8 +1118,7 @@ formatter_parser(PyObject *ignored, PyObject *self)
return NULL;
/* take ownership, give the object to the iterator */
- Py_INCREF(self);
- it->str = self;
+ it->str = Py_NewRef(self);
/* initialize the contained MarkupIterator */
MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
@@ -1265,8 +1261,7 @@ formatter_field_name_split(PyObject *ignored, PyObject *self)
/* take ownership, give the object to the iterator. this is
just to keep the field_name alive */
- Py_INCREF(self);
- it->str = self;
+ it->str = Py_NewRef(self);
/* Pass in auto_number = NULL. We'll return an empty string for
first_obj in that case. */
diff --git a/contrib/tools/python3/src/Objects/stringlib/unicodedefs.h b/contrib/tools/python3/src/Objects/stringlib/unicodedefs.h
deleted file mode 100644
index ba2ce0aeea..0000000000
--- a/contrib/tools/python3/src/Objects/stringlib/unicodedefs.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef STRINGLIB_UNICODEDEFS_H
-#define STRINGLIB_UNICODEDEFS_H
-
-/* this is sort of a hack. there's at least one place (formatting
- floats) where some stringlib code takes a different path if it's
- compiled as unicode. */
-#define STRINGLIB_IS_UNICODE 1
-
-#define FASTSEARCH fastsearch
-#define STRINGLIB(F) stringlib_##F
-#define STRINGLIB_OBJECT PyUnicodeObject
-#define STRINGLIB_SIZEOF_CHAR Py_UNICODE_SIZE
-#define STRINGLIB_CHAR Py_UNICODE
-#define STRINGLIB_TYPE_NAME "unicode"
-#define STRINGLIB_PARSE_CODE "U"
-#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
-#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
-#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
-#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
-#define STRINGLIB_STR PyUnicode_AS_UNICODE
-#define STRINGLIB_LEN PyUnicode_GET_SIZE
-#define STRINGLIB_NEW PyUnicode_FromUnicode
-#define STRINGLIB_CHECK PyUnicode_Check
-#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
-#define STRINGLIB_MUTABLE 0
-
-#define STRINGLIB_TOSTR PyObject_Str
-#define STRINGLIB_TOASCII PyObject_ASCII
-
-#define STRINGLIB_WANT_CONTAINS_OBJ 1
-
-#endif /* !STRINGLIB_UNICODEDEFS_H */