summaryrefslogtreecommitdiffstats
path: root/contrib/tools/python3/src/Objects
diff options
context:
space:
mode:
authorarcadia-devtools <[email protected]>2022-06-09 19:02:01 +0300
committerarcadia-devtools <[email protected]>2022-06-09 19:02:01 +0300
commit4a29d649866ff133e0b8f8a1009e1000a44d7279 (patch)
tree547229aded91b3760628c646a144af604f1c3e2b /contrib/tools/python3/src/Objects
parent782f2445a283aed9a66e699137b3349af1689c29 (diff)
intermediate changes
ref:478170c7a5a1c0788ddd0d6513ce4ed86d7d7c99
Diffstat (limited to 'contrib/tools/python3/src/Objects')
-rw-r--r--contrib/tools/python3/src/Objects/clinic/unicodeobject.c.h34
-rw-r--r--contrib/tools/python3/src/Objects/exceptions.c9
-rw-r--r--contrib/tools/python3/src/Objects/floatobject.c2
-rw-r--r--contrib/tools/python3/src/Objects/frameobject.c5
-rw-r--r--contrib/tools/python3/src/Objects/genericaliasobject.c10
-rw-r--r--contrib/tools/python3/src/Objects/genobject.c14
-rw-r--r--contrib/tools/python3/src/Objects/listobject.c33
-rw-r--r--contrib/tools/python3/src/Objects/picklebufobject.c2
-rw-r--r--contrib/tools/python3/src/Objects/stringlib/codecs.h15
-rw-r--r--contrib/tools/python3/src/Objects/stringlib/fastsearch.h3
-rw-r--r--contrib/tools/python3/src/Objects/typeobject.c20
-rw-r--r--contrib/tools/python3/src/Objects/unicodeobject.c90
-rw-r--r--contrib/tools/python3/src/Objects/unionobject.c4
13 files changed, 146 insertions, 95 deletions
diff --git a/contrib/tools/python3/src/Objects/clinic/unicodeobject.c.h b/contrib/tools/python3/src/Objects/clinic/unicodeobject.c.h
index 9ef8ce2e353..803b5f2353f 100644
--- a/contrib/tools/python3/src/Objects/clinic/unicodeobject.c.h
+++ b/contrib/tools/python3/src/Objects/clinic/unicodeobject.c.h
@@ -858,15 +858,21 @@ PyDoc_STRVAR(unicode_split__doc__,
"split($self, /, sep=None, maxsplit=-1)\n"
"--\n"
"\n"
-"Return a list of the words in the string, using sep as the delimiter string.\n"
+"Return a list of the substrings in the string, using sep as the separator string.\n"
"\n"
" sep\n"
-" The delimiter according which to split the string.\n"
-" None (the default value) means split according to any whitespace,\n"
-" and discard empty strings from the result.\n"
+" The separator used to split the string.\n"
+"\n"
+" When set to None (the default value), will split on any whitespace\n"
+" character (including \\\\n \\\\r \\\\t \\\\f and spaces) and will discard\n"
+" empty strings from the result.\n"
" maxsplit\n"
-" Maximum number of splits to do.\n"
-" -1 (the default value) means no limit.");
+" Maximum number of splits (starting from the left).\n"
+" -1 (the default value) means no limit.\n"
+"\n"
+"Note, str.split() is mainly useful for data that has been intentionally\n"
+"delimited. With natural text that includes punctuation, consider using\n"
+"the regular expression module.");
#define UNICODE_SPLIT_METHODDEF \
{"split", (PyCFunction)(void(*)(void))unicode_split, METH_FASTCALL|METH_KEYWORDS, unicode_split__doc__},
@@ -953,17 +959,19 @@ PyDoc_STRVAR(unicode_rsplit__doc__,
"rsplit($self, /, sep=None, maxsplit=-1)\n"
"--\n"
"\n"
-"Return a list of the words in the string, using sep as the delimiter string.\n"
+"Return a list of the substrings in the string, using sep as the separator string.\n"
"\n"
" sep\n"
-" The delimiter according which to split the string.\n"
-" None (the default value) means split according to any whitespace,\n"
-" and discard empty strings from the result.\n"
+" The separator used to split the string.\n"
+"\n"
+" When set to None (the default value), will split on any whitespace\n"
+" character (including \\\\n \\\\r \\\\t \\\\f and spaces) and will discard\n"
+" empty strings from the result.\n"
" maxsplit\n"
-" Maximum number of splits to do.\n"
+" Maximum number of splits (starting from the left).\n"
" -1 (the default value) means no limit.\n"
"\n"
-"Splits are done starting at the end of the string and working to the front.");
+"Splitting starts at the end of the string and works to the front.");
#define UNICODE_RSPLIT_METHODDEF \
{"rsplit", (PyCFunction)(void(*)(void))unicode_rsplit, METH_FASTCALL|METH_KEYWORDS, unicode_rsplit__doc__},
@@ -1327,4 +1335,4 @@ skip_optional_pos:
exit:
return return_value;
}
-/*[clinic end generated code: output=f10cf85d3935b3b7 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=c494bed46209961d input=a9049054013a1b77]*/
diff --git a/contrib/tools/python3/src/Objects/exceptions.c b/contrib/tools/python3/src/Objects/exceptions.c
index 6537a7ccd1e..9639b4436a0 100644
--- a/contrib/tools/python3/src/Objects/exceptions.c
+++ b/contrib/tools/python3/src/Objects/exceptions.c
@@ -847,14 +847,7 @@ oserror_parse_args(PyObject **p_args,
winerrcode = PyLong_AsLong(*winerror);
if (winerrcode == -1 && PyErr_Occurred())
return -1;
- /* Set errno to the corresponding POSIX errno (overriding
- first argument). Windows Socket error codes (>= 10000)
- have the same value as their POSIX counterparts.
- */
- if (winerrcode < 10000)
- errcode = winerror_to_errno(winerrcode);
- else
- errcode = winerrcode;
+ errcode = winerror_to_errno(winerrcode);
*myerrno = PyLong_FromLong(errcode);
if (!*myerrno)
return -1;
diff --git a/contrib/tools/python3/src/Objects/floatobject.c b/contrib/tools/python3/src/Objects/floatobject.c
index 2e02f37f4a5..5af26787731 100644
--- a/contrib/tools/python3/src/Objects/floatobject.c
+++ b/contrib/tools/python3/src/Objects/floatobject.c
@@ -64,7 +64,7 @@ static PyStructSequence_Field floatinfo_fields[] = {
{"min_exp", "DBL_MIN_EXP -- minimum int e such that radix**(e-1) "
"is a normalized float"},
{"min_10_exp", "DBL_MIN_10_EXP -- minimum int e such that 10**e is "
- "a normalized"},
+ "a normalized float"},
{"dig", "DBL_DIG -- maximum number of decimal digits that "
"can be faithfully represented in a float"},
{"mant_dig", "DBL_MANT_DIG -- mantissa digits"},
diff --git a/contrib/tools/python3/src/Objects/frameobject.c b/contrib/tools/python3/src/Objects/frameobject.c
index d02cf9d3ba9..be84d33bf52 100644
--- a/contrib/tools/python3/src/Objects/frameobject.c
+++ b/contrib/tools/python3/src/Objects/frameobject.c
@@ -195,7 +195,10 @@ markblocks(PyCodeObject *code_obj, int len)
break;
case GET_ITER:
case GET_AITER:
- block_stack = push_block(block_stack, Loop);
+ // For-loops get a Loop block, but comprehensions do not.
+ if (_Py_OPCODE(code[i + 1]) != CALL_FUNCTION) {
+ block_stack = push_block(block_stack, Loop);
+ }
blocks[i+1] = block_stack;
break;
case FOR_ITER:
diff --git a/contrib/tools/python3/src/Objects/genericaliasobject.c b/contrib/tools/python3/src/Objects/genericaliasobject.c
index dbe5d89b739..f52bc974f4d 100644
--- a/contrib/tools/python3/src/Objects/genericaliasobject.c
+++ b/contrib/tools/python3/src/Objects/genericaliasobject.c
@@ -349,6 +349,11 @@ _Py_subs_parameters(PyObject *self, PyObject *args, PyObject *parameters, PyObje
return newargs;
}
+PyDoc_STRVAR(genericalias__doc__,
+"Represent a PEP 585 generic type\n"
+"\n"
+"E.g. for t = list[int], t.__origin__ is list and t.__args__ is (int,).");
+
static PyObject *
ga_getitem(PyObject *self, PyObject *item)
{
@@ -628,14 +633,11 @@ static PyNumberMethods ga_as_number = {
// TODO:
// - argument clinic?
-// - __doc__?
// - cache?
PyTypeObject Py_GenericAliasType = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
.tp_name = "types.GenericAlias",
- .tp_doc = "Represent a PEP 585 generic type\n"
- "\n"
- "E.g. for t = list[int], t.__origin__ is list and t.__args__ is (int,).",
+ .tp_doc = genericalias__doc__,
.tp_basicsize = sizeof(gaobject),
.tp_dealloc = ga_dealloc,
.tp_repr = ga_repr,
diff --git a/contrib/tools/python3/src/Objects/genobject.c b/contrib/tools/python3/src/Objects/genobject.c
index 33fc4a59249..123c17aae7e 100644
--- a/contrib/tools/python3/src/Objects/genobject.c
+++ b/contrib/tools/python3/src/Objects/genobject.c
@@ -403,8 +403,11 @@ gen_close(PyGenObject *gen, PyObject *args)
PyDoc_STRVAR(throw_doc,
-"throw(typ[,val[,tb]]) -> raise exception in generator,\n\
-return next yielded value or raise StopIteration.");
+"throw(value)\n\
+throw(type[,value[,tb]])\n\
+\n\
+Raise exception in generator, return next yielded value or raise\n\
+StopIteration.");
static PyObject *
_gen_throw(PyGenObject *gen, int close_on_genexit,
@@ -1001,8 +1004,11 @@ PyDoc_STRVAR(coro_send_doc,
return next iterated value or raise StopIteration.");
PyDoc_STRVAR(coro_throw_doc,
-"throw(typ[,val[,tb]]) -> raise exception in coroutine,\n\
-return next iterated value or raise StopIteration.");
+"throw(value)\n\
+throw(type[,value[,traceback]])\n\
+\n\
+Raise exception in coroutine, return next iterated value or raise\n\
+StopIteration.");
PyDoc_STRVAR(coro_close_doc,
"close() -> raise GeneratorExit inside coroutine.");
diff --git a/contrib/tools/python3/src/Objects/listobject.c b/contrib/tools/python3/src/Objects/listobject.c
index 533ee7436d3..7f37b738605 100644
--- a/contrib/tools/python3/src/Objects/listobject.c
+++ b/contrib/tools/python3/src/Objects/listobject.c
@@ -863,7 +863,6 @@ list_extend(PyListObject *self, PyObject *iterable)
PyObject *it; /* iter(v) */
Py_ssize_t m; /* size of self */
Py_ssize_t n; /* guess for size of iterable */
- Py_ssize_t mn; /* m + n */
Py_ssize_t i;
PyObject *(*iternext)(PyObject *);
@@ -887,7 +886,13 @@ list_extend(PyListObject *self, PyObject *iterable)
/* It should not be possible to allocate a list large enough to cause
an overflow on any relevant platform */
assert(m < PY_SSIZE_T_MAX - n);
- if (list_resize(self, m + n) < 0) {
+ if (self->ob_item == NULL) {
+ if (list_preallocate_exact(self, n) < 0) {
+ return NULL;
+ }
+ Py_SET_SIZE(self, n);
+ }
+ else if (list_resize(self, m + n) < 0) {
Py_DECREF(iterable);
return NULL;
}
@@ -926,10 +931,13 @@ list_extend(PyListObject *self, PyObject *iterable)
* eventually run out of memory during the loop.
*/
}
+ else if (self->ob_item == NULL) {
+ if (n && list_preallocate_exact(self, n) < 0)
+ goto error;
+ }
else {
- mn = m + n;
/* Make room. */
- if (list_resize(self, mn) < 0)
+ if (list_resize(self, m + n) < 0)
goto error;
/* Make the list sane again. */
Py_SET_SIZE(self, m);
@@ -1548,8 +1556,10 @@ static void
merge_freemem(MergeState *ms)
{
assert(ms != NULL);
- if (ms->a.keys != ms->temparray)
+ if (ms->a.keys != ms->temparray) {
PyMem_Free(ms->a.keys);
+ ms->a.keys = NULL;
+ }
}
/* Ensure enough temp memory for 'need' array slots is available.
@@ -2715,19 +2725,6 @@ list___init___impl(PyListObject *self, PyObject *iterable)
(void)_list_clear(self);
}
if (iterable != NULL) {
- if (_PyObject_HasLen(iterable)) {
- Py_ssize_t iter_len = PyObject_Size(iterable);
- if (iter_len == -1) {
- if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
- return -1;
- }
- PyErr_Clear();
- }
- if (iter_len > 0 && self->ob_item == NULL
- && list_preallocate_exact(self, iter_len)) {
- return -1;
- }
- }
PyObject *rv = list_extend(self, iterable);
if (rv == NULL)
return -1;
diff --git a/contrib/tools/python3/src/Objects/picklebufobject.c b/contrib/tools/python3/src/Objects/picklebufobject.c
index a135e5575e2..aaa852cfbb0 100644
--- a/contrib/tools/python3/src/Objects/picklebufobject.c
+++ b/contrib/tools/python3/src/Objects/picklebufobject.c
@@ -206,7 +206,7 @@ static PyMethodDef picklebuf_methods[] = {
PyTypeObject PyPickleBuffer_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
.tp_name = "pickle.PickleBuffer",
- .tp_doc = "Wrapper for potentially out-of-band buffers",
+ .tp_doc = PyDoc_STR("Wrapper for potentially out-of-band buffers"),
.tp_basicsize = sizeof(PyPickleBufferObject),
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
.tp_new = picklebuf_new,
diff --git a/contrib/tools/python3/src/Objects/stringlib/codecs.h b/contrib/tools/python3/src/Objects/stringlib/codecs.h
index b17cda18f54..958cc861478 100644
--- a/contrib/tools/python3/src/Objects/stringlib/codecs.h
+++ b/contrib/tools/python3/src/Objects/stringlib/codecs.h
@@ -387,8 +387,19 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,
if (!rep)
goto error;
- /* subtract preallocated bytes */
- writer->min_size -= max_char_size * (newpos - startpos);
+ if (newpos < startpos) {
+ writer->overallocate = 1;
+ p = _PyBytesWriter_Prepare(writer, p,
+ max_char_size * (startpos - newpos));
+ if (p == NULL)
+ goto error;
+ }
+ else {
+ /* subtract preallocated bytes */
+ writer->min_size -= max_char_size * (newpos - startpos);
+ /* Only overallocate the buffer if it's not the last write */
+ writer->overallocate = (newpos < size);
+ }
if (PyBytes_Check(rep)) {
p = _PyBytesWriter_WriteBytes(writer, p,
diff --git a/contrib/tools/python3/src/Objects/stringlib/fastsearch.h b/contrib/tools/python3/src/Objects/stringlib/fastsearch.h
index 6574720b609..7b8be5d6492 100644
--- a/contrib/tools/python3/src/Objects/stringlib/fastsearch.h
+++ b/contrib/tools/python3/src/Objects/stringlib/fastsearch.h
@@ -4,7 +4,8 @@
/* fast search/count implementation, based on a mix between boyer-
moore and horspool, with a few more bells and whistles on the top.
- for some more background, see: http://effbot.org/zone/stringlib.htm */
+ for some more background, see:
+ https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */
/* note: fastsearch may access s[n], which isn't a problem when using
Python's ordinary string types, but may cause problems if you're
diff --git a/contrib/tools/python3/src/Objects/typeobject.c b/contrib/tools/python3/src/Objects/typeobject.c
index b3ba1208eb2..50f2742f676 100644
--- a/contrib/tools/python3/src/Objects/typeobject.c
+++ b/contrib/tools/python3/src/Objects/typeobject.c
@@ -369,22 +369,26 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) {
Py_ssize_t i, n;
int custom = !Py_IS_TYPE(type, &PyType_Type);
int unbound;
- PyObject *mro_meth = NULL;
- PyObject *type_mro_meth = NULL;
if (custom) {
+ PyObject *mro_meth, *type_mro_meth;
mro_meth = lookup_maybe_method(
(PyObject *)type, &PyId_mro, &unbound);
- if (mro_meth == NULL)
+ if (mro_meth == NULL) {
goto clear;
+ }
type_mro_meth = lookup_maybe_method(
(PyObject *)&PyType_Type, &PyId_mro, &unbound);
- if (type_mro_meth == NULL)
+ if (type_mro_meth == NULL) {
+ Py_DECREF(mro_meth);
goto clear;
- if (mro_meth != type_mro_meth)
+ }
+ int custom_mro = (mro_meth != type_mro_meth);
+ Py_DECREF(mro_meth);
+ Py_DECREF(type_mro_meth);
+ if (custom_mro) {
goto clear;
- Py_XDECREF(mro_meth);
- Py_XDECREF(type_mro_meth);
+ }
}
n = PyTuple_GET_SIZE(bases);
for (i = 0; i < n; i++) {
@@ -400,8 +404,6 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) {
}
return;
clear:
- Py_XDECREF(mro_meth);
- Py_XDECREF(type_mro_meth);
type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG;
type->tp_version_tag = 0; /* 0 is not a valid version tag */
}
diff --git a/contrib/tools/python3/src/Objects/unicodeobject.c b/contrib/tools/python3/src/Objects/unicodeobject.c
index 077cf8d7f45..b7ec1f28d7e 100644
--- a/contrib/tools/python3/src/Objects/unicodeobject.c
+++ b/contrib/tools/python3/src/Objects/unicodeobject.c
@@ -5373,7 +5373,7 @@ _Py_DecodeUTF8Ex(const char *s, Py_ssize_t size, wchar_t **wstr, size_t *wlen,
/* Note: size will always be longer than the resulting Unicode
character count */
- if (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t) < (size + 1)) {
+ if (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t) - 1 < size) {
return -1;
}
@@ -5959,7 +5959,7 @@ _PyUnicode_EncodeUTF32(PyObject *str,
pos = 0;
while (pos < len) {
- Py_ssize_t repsize, moreunits;
+ Py_ssize_t newpos, repsize, moreunits;
if (kind == PyUnicode_2BYTE_KIND) {
pos += ucs2lib_utf32_encode((const Py_UCS2 *)data + pos, len - pos,
@@ -5976,7 +5976,7 @@ _PyUnicode_EncodeUTF32(PyObject *str,
rep = unicode_encode_call_errorhandler(
errors, &errorHandler,
encoding, "surrogates not allowed",
- str, &exc, pos, pos + 1, &pos);
+ str, &exc, pos, pos + 1, &newpos);
if (!rep)
goto error;
@@ -5984,7 +5984,7 @@ _PyUnicode_EncodeUTF32(PyObject *str,
repsize = PyBytes_GET_SIZE(rep);
if (repsize & 3) {
raise_encode_exception(&exc, encoding,
- str, pos - 1, pos,
+ str, pos, pos + 1,
"surrogates not allowed");
goto error;
}
@@ -5997,28 +5997,30 @@ _PyUnicode_EncodeUTF32(PyObject *str,
moreunits = repsize = PyUnicode_GET_LENGTH(rep);
if (!PyUnicode_IS_ASCII(rep)) {
raise_encode_exception(&exc, encoding,
- str, pos - 1, pos,
+ str, pos, pos + 1,
"surrogates not allowed");
goto error;
}
}
+ moreunits += pos - newpos;
+ pos = newpos;
/* four bytes are reserved for each surrogate */
- if (moreunits > 1) {
+ if (moreunits > 0) {
Py_ssize_t outpos = out - (uint32_t*) PyBytes_AS_STRING(v);
if (moreunits >= (PY_SSIZE_T_MAX - PyBytes_GET_SIZE(v)) / 4) {
/* integer overflow */
PyErr_NoMemory();
goto error;
}
- if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + 4 * (moreunits - 1)) < 0)
+ if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + 4 * moreunits) < 0)
goto error;
out = (uint32_t*) PyBytes_AS_STRING(v) + outpos;
}
if (PyBytes_Check(rep)) {
memcpy(out, PyBytes_AS_STRING(rep), repsize);
- out += moreunits;
+ out += repsize / 4;
} else /* rep is unicode */ {
assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
ucs1lib_utf32_encode(PyUnicode_1BYTE_DATA(rep), repsize,
@@ -6311,7 +6313,7 @@ _PyUnicode_EncodeUTF16(PyObject *str,
pos = 0;
while (pos < len) {
- Py_ssize_t repsize, moreunits;
+ Py_ssize_t newpos, repsize, moreunits;
if (kind == PyUnicode_2BYTE_KIND) {
pos += ucs2lib_utf16_encode((const Py_UCS2 *)data + pos, len - pos,
@@ -6328,7 +6330,7 @@ _PyUnicode_EncodeUTF16(PyObject *str,
rep = unicode_encode_call_errorhandler(
errors, &errorHandler,
encoding, "surrogates not allowed",
- str, &exc, pos, pos + 1, &pos);
+ str, &exc, pos, pos + 1, &newpos);
if (!rep)
goto error;
@@ -6336,7 +6338,7 @@ _PyUnicode_EncodeUTF16(PyObject *str,
repsize = PyBytes_GET_SIZE(rep);
if (repsize & 1) {
raise_encode_exception(&exc, encoding,
- str, pos - 1, pos,
+ str, pos, pos + 1,
"surrogates not allowed");
goto error;
}
@@ -6349,28 +6351,30 @@ _PyUnicode_EncodeUTF16(PyObject *str,
moreunits = repsize = PyUnicode_GET_LENGTH(rep);
if (!PyUnicode_IS_ASCII(rep)) {
raise_encode_exception(&exc, encoding,
- str, pos - 1, pos,
+ str, pos, pos + 1,
"surrogates not allowed");
goto error;
}
}
+ moreunits += pos - newpos;
+ pos = newpos;
/* two bytes are reserved for each surrogate */
- if (moreunits > 1) {
+ if (moreunits > 0) {
Py_ssize_t outpos = out - (unsigned short*) PyBytes_AS_STRING(v);
if (moreunits >= (PY_SSIZE_T_MAX - PyBytes_GET_SIZE(v)) / 2) {
/* integer overflow */
PyErr_NoMemory();
goto error;
}
- if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + 2 * (moreunits - 1)) < 0)
+ if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + 2 * moreunits) < 0)
goto error;
out = (unsigned short*) PyBytes_AS_STRING(v) + outpos;
}
if (PyBytes_Check(rep)) {
memcpy(out, PyBytes_AS_STRING(rep), repsize);
- out += moreunits;
+ out += repsize / 2;
} else /* rep is unicode */ {
assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
ucs1lib_utf16_encode(PyUnicode_1BYTE_DATA(rep), repsize,
@@ -7297,8 +7301,19 @@ unicode_encode_ucs1(PyObject *unicode,
if (rep == NULL)
goto onError;
- /* subtract preallocated bytes */
- writer.min_size -= newpos - collstart;
+ if (newpos < collstart) {
+ writer.overallocate = 1;
+ str = _PyBytesWriter_Prepare(&writer, str,
+ collstart - newpos);
+ if (str == NULL)
+ goto onError;
+ }
+ else {
+ /* subtract preallocated bytes */
+ writer.min_size -= newpos - collstart;
+ /* Only overallocate the buffer if it's not the last write */
+ writer.overallocate = (newpos < size);
+ }
if (PyBytes_Check(rep)) {
/* Directly copy bytes result to output. */
@@ -8104,13 +8119,14 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
pos, pos + 1, &newpos);
if (rep == NULL)
goto error;
- pos = newpos;
+ Py_ssize_t morebytes = pos - newpos;
if (PyBytes_Check(rep)) {
outsize = PyBytes_GET_SIZE(rep);
- if (outsize != 1) {
+ morebytes += outsize;
+ if (morebytes > 0) {
Py_ssize_t offset = out - PyBytes_AS_STRING(*outbytes);
- newoutsize = PyBytes_GET_SIZE(*outbytes) + (outsize - 1);
+ newoutsize = PyBytes_GET_SIZE(*outbytes) + morebytes;
if (_PyBytes_Resize(outbytes, newoutsize) < 0) {
Py_DECREF(rep);
goto error;
@@ -8131,9 +8147,10 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
}
outsize = PyUnicode_GET_LENGTH(rep);
- if (outsize != 1) {
+ morebytes += outsize;
+ if (morebytes > 0) {
Py_ssize_t offset = out - PyBytes_AS_STRING(*outbytes);
- newoutsize = PyBytes_GET_SIZE(*outbytes) + (outsize - 1);
+ newoutsize = PyBytes_GET_SIZE(*outbytes) + morebytes;
if (_PyBytes_Resize(outbytes, newoutsize) < 0) {
Py_DECREF(rep);
goto error;
@@ -8156,6 +8173,7 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
out++;
}
}
+ pos = newpos;
Py_DECREF(rep);
}
/* write a NUL byte */
@@ -13478,19 +13496,26 @@ PyUnicode_Split(PyObject *s, PyObject *sep, Py_ssize_t maxsplit)
str.split as unicode_split
sep: object = None
- The delimiter according which to split the string.
- None (the default value) means split according to any whitespace,
- and discard empty strings from the result.
+ The separator used to split the string.
+
+ When set to None (the default value), will split on any whitespace
+ character (including \\n \\r \\t \\f and spaces) and will discard
+ empty strings from the result.
maxsplit: Py_ssize_t = -1
- Maximum number of splits to do.
+ Maximum number of splits (starting from the left).
-1 (the default value) means no limit.
-Return a list of the words in the string, using sep as the delimiter string.
+Return a list of the substrings in the string, using sep as the separator string.
+
+Note, str.split() is mainly useful for data that has been intentionally
+delimited. With natural text that includes punctuation, consider using
+the regular expression module.
+
[clinic start generated code]*/
static PyObject *
unicode_split_impl(PyObject *self, PyObject *sep, Py_ssize_t maxsplit)
-/*[clinic end generated code: output=3a65b1db356948dc input=606e750488a82359]*/
+/*[clinic end generated code: output=3a65b1db356948dc input=906d953b44efc43b]*/
{
if (sep == Py_None)
return split(self, NULL, maxsplit);
@@ -13661,14 +13686,14 @@ PyUnicode_RSplit(PyObject *s, PyObject *sep, Py_ssize_t maxsplit)
/*[clinic input]
str.rsplit as unicode_rsplit = str.split
-Return a list of the words in the string, using sep as the delimiter string.
+Return a list of the substrings in the string, using sep as the separator string.
-Splits are done starting at the end of the string and working to the front.
+Splitting starts at the end of the string and works to the front.
[clinic start generated code]*/
static PyObject *
unicode_rsplit_impl(PyObject *self, PyObject *sep, Py_ssize_t maxsplit)
-/*[clinic end generated code: output=c2b815c63bcabffc input=12ad4bf57dd35f15]*/
+/*[clinic end generated code: output=c2b815c63bcabffc input=ea78406060fce33c]*/
{
if (sep == Py_None)
return rsplit(self, NULL, maxsplit);
@@ -16352,6 +16377,9 @@ _PyUnicode_Fini(PyInterpreterState *interp)
if (_Py_IsMainInterpreter(interp)) {
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
assert(interned == NULL);
+ // bpo-47182: force a unicodedata CAPI capsule re-import on
+ // subsequent initialization of main interpreter.
+ ucnhash_capi = NULL;
}
_PyUnicode_FiniEncodings(&state->fs_codec);
diff --git a/contrib/tools/python3/src/Objects/unionobject.c b/contrib/tools/python3/src/Objects/unionobject.c
index 80c70389ab3..6d8bb021421 100644
--- a/contrib/tools/python3/src/Objects/unionobject.c
+++ b/contrib/tools/python3/src/Objects/unionobject.c
@@ -447,9 +447,9 @@ union_getattro(PyObject *self, PyObject *name)
PyTypeObject _PyUnion_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
.tp_name = "types.UnionType",
- .tp_doc = "Represent a PEP 604 union type\n"
+ .tp_doc = PyDoc_STR("Represent a PEP 604 union type\n"
"\n"
- "E.g. for int | str",
+ "E.g. for int | str"),
.tp_basicsize = sizeof(unionobject),
.tp_dealloc = unionobject_dealloc,
.tp_alloc = PyType_GenericAlloc,