diff options
author | arcadia-devtools <[email protected]> | 2022-06-09 19:02:01 +0300 |
---|---|---|
committer | arcadia-devtools <[email protected]> | 2022-06-09 19:02:01 +0300 |
commit | 4a29d649866ff133e0b8f8a1009e1000a44d7279 (patch) | |
tree | 547229aded91b3760628c646a144af604f1c3e2b /contrib/tools/python3/src/Objects | |
parent | 782f2445a283aed9a66e699137b3349af1689c29 (diff) |
intermediate changes
ref:478170c7a5a1c0788ddd0d6513ce4ed86d7d7c99
Diffstat (limited to 'contrib/tools/python3/src/Objects')
-rw-r--r-- | contrib/tools/python3/src/Objects/clinic/unicodeobject.c.h | 34 | ||||
-rw-r--r-- | contrib/tools/python3/src/Objects/exceptions.c | 9 | ||||
-rw-r--r-- | contrib/tools/python3/src/Objects/floatobject.c | 2 | ||||
-rw-r--r-- | contrib/tools/python3/src/Objects/frameobject.c | 5 | ||||
-rw-r--r-- | contrib/tools/python3/src/Objects/genericaliasobject.c | 10 | ||||
-rw-r--r-- | contrib/tools/python3/src/Objects/genobject.c | 14 | ||||
-rw-r--r-- | contrib/tools/python3/src/Objects/listobject.c | 33 | ||||
-rw-r--r-- | contrib/tools/python3/src/Objects/picklebufobject.c | 2 | ||||
-rw-r--r-- | contrib/tools/python3/src/Objects/stringlib/codecs.h | 15 | ||||
-rw-r--r-- | contrib/tools/python3/src/Objects/stringlib/fastsearch.h | 3 | ||||
-rw-r--r-- | contrib/tools/python3/src/Objects/typeobject.c | 20 | ||||
-rw-r--r-- | contrib/tools/python3/src/Objects/unicodeobject.c | 90 | ||||
-rw-r--r-- | contrib/tools/python3/src/Objects/unionobject.c | 4 |
13 files changed, 146 insertions, 95 deletions
diff --git a/contrib/tools/python3/src/Objects/clinic/unicodeobject.c.h b/contrib/tools/python3/src/Objects/clinic/unicodeobject.c.h index 9ef8ce2e353..803b5f2353f 100644 --- a/contrib/tools/python3/src/Objects/clinic/unicodeobject.c.h +++ b/contrib/tools/python3/src/Objects/clinic/unicodeobject.c.h @@ -858,15 +858,21 @@ PyDoc_STRVAR(unicode_split__doc__, "split($self, /, sep=None, maxsplit=-1)\n" "--\n" "\n" -"Return a list of the words in the string, using sep as the delimiter string.\n" +"Return a list of the substrings in the string, using sep as the separator string.\n" "\n" " sep\n" -" The delimiter according which to split the string.\n" -" None (the default value) means split according to any whitespace,\n" -" and discard empty strings from the result.\n" +" The separator used to split the string.\n" +"\n" +" When set to None (the default value), will split on any whitespace\n" +" character (including \\\\n \\\\r \\\\t \\\\f and spaces) and will discard\n" +" empty strings from the result.\n" " maxsplit\n" -" Maximum number of splits to do.\n" -" -1 (the default value) means no limit."); +" Maximum number of splits (starting from the left).\n" +" -1 (the default value) means no limit.\n" +"\n" +"Note, str.split() is mainly useful for data that has been intentionally\n" +"delimited. With natural text that includes punctuation, consider using\n" +"the regular expression module."); #define UNICODE_SPLIT_METHODDEF \ {"split", (PyCFunction)(void(*)(void))unicode_split, METH_FASTCALL|METH_KEYWORDS, unicode_split__doc__}, @@ -953,17 +959,19 @@ PyDoc_STRVAR(unicode_rsplit__doc__, "rsplit($self, /, sep=None, maxsplit=-1)\n" "--\n" "\n" -"Return a list of the words in the string, using sep as the delimiter string.\n" +"Return a list of the substrings in the string, using sep as the separator string.\n" "\n" " sep\n" -" The delimiter according which to split the string.\n" -" None (the default value) means split according to any whitespace,\n" -" and discard empty strings from the result.\n" +" The separator used to split the string.\n" +"\n" +" When set to None (the default value), will split on any whitespace\n" +" character (including \\\\n \\\\r \\\\t \\\\f and spaces) and will discard\n" +" empty strings from the result.\n" " maxsplit\n" -" Maximum number of splits to do.\n" +" Maximum number of splits (starting from the left).\n" " -1 (the default value) means no limit.\n" "\n" -"Splits are done starting at the end of the string and working to the front."); +"Splitting starts at the end of the string and works to the front."); #define UNICODE_RSPLIT_METHODDEF \ {"rsplit", (PyCFunction)(void(*)(void))unicode_rsplit, METH_FASTCALL|METH_KEYWORDS, unicode_rsplit__doc__}, @@ -1327,4 +1335,4 @@ skip_optional_pos: exit: return return_value; } -/*[clinic end generated code: output=f10cf85d3935b3b7 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=c494bed46209961d input=a9049054013a1b77]*/ diff --git a/contrib/tools/python3/src/Objects/exceptions.c b/contrib/tools/python3/src/Objects/exceptions.c index 6537a7ccd1e..9639b4436a0 100644 --- a/contrib/tools/python3/src/Objects/exceptions.c +++ b/contrib/tools/python3/src/Objects/exceptions.c @@ -847,14 +847,7 @@ oserror_parse_args(PyObject **p_args, winerrcode = PyLong_AsLong(*winerror); if (winerrcode == -1 && PyErr_Occurred()) return -1; - /* Set errno to the corresponding POSIX errno (overriding - first argument). Windows Socket error codes (>= 10000) - have the same value as their POSIX counterparts. - */ - if (winerrcode < 10000) - errcode = winerror_to_errno(winerrcode); - else - errcode = winerrcode; + errcode = winerror_to_errno(winerrcode); *myerrno = PyLong_FromLong(errcode); if (!*myerrno) return -1; diff --git a/contrib/tools/python3/src/Objects/floatobject.c b/contrib/tools/python3/src/Objects/floatobject.c index 2e02f37f4a5..5af26787731 100644 --- a/contrib/tools/python3/src/Objects/floatobject.c +++ b/contrib/tools/python3/src/Objects/floatobject.c @@ -64,7 +64,7 @@ static PyStructSequence_Field floatinfo_fields[] = { {"min_exp", "DBL_MIN_EXP -- minimum int e such that radix**(e-1) " "is a normalized float"}, {"min_10_exp", "DBL_MIN_10_EXP -- minimum int e such that 10**e is " - "a normalized"}, + "a normalized float"}, {"dig", "DBL_DIG -- maximum number of decimal digits that " "can be faithfully represented in a float"}, {"mant_dig", "DBL_MANT_DIG -- mantissa digits"}, diff --git a/contrib/tools/python3/src/Objects/frameobject.c b/contrib/tools/python3/src/Objects/frameobject.c index d02cf9d3ba9..be84d33bf52 100644 --- a/contrib/tools/python3/src/Objects/frameobject.c +++ b/contrib/tools/python3/src/Objects/frameobject.c @@ -195,7 +195,10 @@ markblocks(PyCodeObject *code_obj, int len) break; case GET_ITER: case GET_AITER: - block_stack = push_block(block_stack, Loop); + // For-loops get a Loop block, but comprehensions do not. + if (_Py_OPCODE(code[i + 1]) != CALL_FUNCTION) { + block_stack = push_block(block_stack, Loop); + } blocks[i+1] = block_stack; break; case FOR_ITER: diff --git a/contrib/tools/python3/src/Objects/genericaliasobject.c b/contrib/tools/python3/src/Objects/genericaliasobject.c index dbe5d89b739..f52bc974f4d 100644 --- a/contrib/tools/python3/src/Objects/genericaliasobject.c +++ b/contrib/tools/python3/src/Objects/genericaliasobject.c @@ -349,6 +349,11 @@ _Py_subs_parameters(PyObject *self, PyObject *args, PyObject *parameters, PyObje return newargs; } +PyDoc_STRVAR(genericalias__doc__, +"Represent a PEP 585 generic type\n" +"\n" +"E.g. for t = list[int], t.__origin__ is list and t.__args__ is (int,)."); + static PyObject * ga_getitem(PyObject *self, PyObject *item) { @@ -628,14 +633,11 @@ static PyNumberMethods ga_as_number = { // TODO: // - argument clinic? -// - __doc__? // - cache? PyTypeObject Py_GenericAliasType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "types.GenericAlias", - .tp_doc = "Represent a PEP 585 generic type\n" - "\n" - "E.g. for t = list[int], t.__origin__ is list and t.__args__ is (int,).", + .tp_doc = genericalias__doc__, .tp_basicsize = sizeof(gaobject), .tp_dealloc = ga_dealloc, .tp_repr = ga_repr, diff --git a/contrib/tools/python3/src/Objects/genobject.c b/contrib/tools/python3/src/Objects/genobject.c index 33fc4a59249..123c17aae7e 100644 --- a/contrib/tools/python3/src/Objects/genobject.c +++ b/contrib/tools/python3/src/Objects/genobject.c @@ -403,8 +403,11 @@ gen_close(PyGenObject *gen, PyObject *args) PyDoc_STRVAR(throw_doc, -"throw(typ[,val[,tb]]) -> raise exception in generator,\n\ -return next yielded value or raise StopIteration."); +"throw(value)\n\ +throw(type[,value[,tb]])\n\ +\n\ +Raise exception in generator, return next yielded value or raise\n\ +StopIteration."); static PyObject * _gen_throw(PyGenObject *gen, int close_on_genexit, @@ -1001,8 +1004,11 @@ PyDoc_STRVAR(coro_send_doc, return next iterated value or raise StopIteration."); PyDoc_STRVAR(coro_throw_doc, -"throw(typ[,val[,tb]]) -> raise exception in coroutine,\n\ -return next iterated value or raise StopIteration."); +"throw(value)\n\ +throw(type[,value[,traceback]])\n\ +\n\ +Raise exception in coroutine, return next iterated value or raise\n\ +StopIteration."); PyDoc_STRVAR(coro_close_doc, "close() -> raise GeneratorExit inside coroutine."); diff --git a/contrib/tools/python3/src/Objects/listobject.c b/contrib/tools/python3/src/Objects/listobject.c index 533ee7436d3..7f37b738605 100644 --- a/contrib/tools/python3/src/Objects/listobject.c +++ b/contrib/tools/python3/src/Objects/listobject.c @@ -863,7 +863,6 @@ list_extend(PyListObject *self, PyObject *iterable) PyObject *it; /* iter(v) */ Py_ssize_t m; /* size of self */ Py_ssize_t n; /* guess for size of iterable */ - Py_ssize_t mn; /* m + n */ Py_ssize_t i; PyObject *(*iternext)(PyObject *); @@ -887,7 +886,13 @@ list_extend(PyListObject *self, PyObject *iterable) /* It should not be possible to allocate a list large enough to cause an overflow on any relevant platform */ assert(m < PY_SSIZE_T_MAX - n); - if (list_resize(self, m + n) < 0) { + if (self->ob_item == NULL) { + if (list_preallocate_exact(self, n) < 0) { + return NULL; + } + Py_SET_SIZE(self, n); + } + else if (list_resize(self, m + n) < 0) { Py_DECREF(iterable); return NULL; } @@ -926,10 +931,13 @@ list_extend(PyListObject *self, PyObject *iterable) * eventually run out of memory during the loop. */ } + else if (self->ob_item == NULL) { + if (n && list_preallocate_exact(self, n) < 0) + goto error; + } else { - mn = m + n; /* Make room. */ - if (list_resize(self, mn) < 0) + if (list_resize(self, m + n) < 0) goto error; /* Make the list sane again. */ Py_SET_SIZE(self, m); @@ -1548,8 +1556,10 @@ static void merge_freemem(MergeState *ms) { assert(ms != NULL); - if (ms->a.keys != ms->temparray) + if (ms->a.keys != ms->temparray) { PyMem_Free(ms->a.keys); + ms->a.keys = NULL; + } } /* Ensure enough temp memory for 'need' array slots is available. @@ -2715,19 +2725,6 @@ list___init___impl(PyListObject *self, PyObject *iterable) (void)_list_clear(self); } if (iterable != NULL) { - if (_PyObject_HasLen(iterable)) { - Py_ssize_t iter_len = PyObject_Size(iterable); - if (iter_len == -1) { - if (!PyErr_ExceptionMatches(PyExc_TypeError)) { - return -1; - } - PyErr_Clear(); - } - if (iter_len > 0 && self->ob_item == NULL - && list_preallocate_exact(self, iter_len)) { - return -1; - } - } PyObject *rv = list_extend(self, iterable); if (rv == NULL) return -1; diff --git a/contrib/tools/python3/src/Objects/picklebufobject.c b/contrib/tools/python3/src/Objects/picklebufobject.c index a135e5575e2..aaa852cfbb0 100644 --- a/contrib/tools/python3/src/Objects/picklebufobject.c +++ b/contrib/tools/python3/src/Objects/picklebufobject.c @@ -206,7 +206,7 @@ static PyMethodDef picklebuf_methods[] = { PyTypeObject PyPickleBuffer_Type = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "pickle.PickleBuffer", - .tp_doc = "Wrapper for potentially out-of-band buffers", + .tp_doc = PyDoc_STR("Wrapper for potentially out-of-band buffers"), .tp_basicsize = sizeof(PyPickleBufferObject), .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, .tp_new = picklebuf_new, diff --git a/contrib/tools/python3/src/Objects/stringlib/codecs.h b/contrib/tools/python3/src/Objects/stringlib/codecs.h index b17cda18f54..958cc861478 100644 --- a/contrib/tools/python3/src/Objects/stringlib/codecs.h +++ b/contrib/tools/python3/src/Objects/stringlib/codecs.h @@ -387,8 +387,19 @@ STRINGLIB(utf8_encoder)(_PyBytesWriter *writer, if (!rep) goto error; - /* subtract preallocated bytes */ - writer->min_size -= max_char_size * (newpos - startpos); + if (newpos < startpos) { + writer->overallocate = 1; + p = _PyBytesWriter_Prepare(writer, p, + max_char_size * (startpos - newpos)); + if (p == NULL) + goto error; + } + else { + /* subtract preallocated bytes */ + writer->min_size -= max_char_size * (newpos - startpos); + /* Only overallocate the buffer if it's not the last write */ + writer->overallocate = (newpos < size); + } if (PyBytes_Check(rep)) { p = _PyBytesWriter_WriteBytes(writer, p, diff --git a/contrib/tools/python3/src/Objects/stringlib/fastsearch.h b/contrib/tools/python3/src/Objects/stringlib/fastsearch.h index 6574720b609..7b8be5d6492 100644 --- a/contrib/tools/python3/src/Objects/stringlib/fastsearch.h +++ b/contrib/tools/python3/src/Objects/stringlib/fastsearch.h @@ -4,7 +4,8 @@ /* fast search/count implementation, based on a mix between boyer- moore and horspool, with a few more bells and whistles on the top. - for some more background, see: http://effbot.org/zone/stringlib.htm */ + for some more background, see: + https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */ /* note: fastsearch may access s[n], which isn't a problem when using Python's ordinary string types, but may cause problems if you're diff --git a/contrib/tools/python3/src/Objects/typeobject.c b/contrib/tools/python3/src/Objects/typeobject.c index b3ba1208eb2..50f2742f676 100644 --- a/contrib/tools/python3/src/Objects/typeobject.c +++ b/contrib/tools/python3/src/Objects/typeobject.c @@ -369,22 +369,26 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) { Py_ssize_t i, n; int custom = !Py_IS_TYPE(type, &PyType_Type); int unbound; - PyObject *mro_meth = NULL; - PyObject *type_mro_meth = NULL; if (custom) { + PyObject *mro_meth, *type_mro_meth; mro_meth = lookup_maybe_method( (PyObject *)type, &PyId_mro, &unbound); - if (mro_meth == NULL) + if (mro_meth == NULL) { goto clear; + } type_mro_meth = lookup_maybe_method( (PyObject *)&PyType_Type, &PyId_mro, &unbound); - if (type_mro_meth == NULL) + if (type_mro_meth == NULL) { + Py_DECREF(mro_meth); goto clear; - if (mro_meth != type_mro_meth) + } + int custom_mro = (mro_meth != type_mro_meth); + Py_DECREF(mro_meth); + Py_DECREF(type_mro_meth); + if (custom_mro) { goto clear; - Py_XDECREF(mro_meth); - Py_XDECREF(type_mro_meth); + } } n = PyTuple_GET_SIZE(bases); for (i = 0; i < n; i++) { @@ -400,8 +404,6 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) { } return; clear: - Py_XDECREF(mro_meth); - Py_XDECREF(type_mro_meth); type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG; type->tp_version_tag = 0; /* 0 is not a valid version tag */ } diff --git a/contrib/tools/python3/src/Objects/unicodeobject.c b/contrib/tools/python3/src/Objects/unicodeobject.c index 077cf8d7f45..b7ec1f28d7e 100644 --- a/contrib/tools/python3/src/Objects/unicodeobject.c +++ b/contrib/tools/python3/src/Objects/unicodeobject.c @@ -5373,7 +5373,7 @@ _Py_DecodeUTF8Ex(const char *s, Py_ssize_t size, wchar_t **wstr, size_t *wlen, /* Note: size will always be longer than the resulting Unicode character count */ - if (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t) < (size + 1)) { + if (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t) - 1 < size) { return -1; } @@ -5959,7 +5959,7 @@ _PyUnicode_EncodeUTF32(PyObject *str, pos = 0; while (pos < len) { - Py_ssize_t repsize, moreunits; + Py_ssize_t newpos, repsize, moreunits; if (kind == PyUnicode_2BYTE_KIND) { pos += ucs2lib_utf32_encode((const Py_UCS2 *)data + pos, len - pos, @@ -5976,7 +5976,7 @@ _PyUnicode_EncodeUTF32(PyObject *str, rep = unicode_encode_call_errorhandler( errors, &errorHandler, encoding, "surrogates not allowed", - str, &exc, pos, pos + 1, &pos); + str, &exc, pos, pos + 1, &newpos); if (!rep) goto error; @@ -5984,7 +5984,7 @@ _PyUnicode_EncodeUTF32(PyObject *str, repsize = PyBytes_GET_SIZE(rep); if (repsize & 3) { raise_encode_exception(&exc, encoding, - str, pos - 1, pos, + str, pos, pos + 1, "surrogates not allowed"); goto error; } @@ -5997,28 +5997,30 @@ _PyUnicode_EncodeUTF32(PyObject *str, moreunits = repsize = PyUnicode_GET_LENGTH(rep); if (!PyUnicode_IS_ASCII(rep)) { raise_encode_exception(&exc, encoding, - str, pos - 1, pos, + str, pos, pos + 1, "surrogates not allowed"); goto error; } } + moreunits += pos - newpos; + pos = newpos; /* four bytes are reserved for each surrogate */ - if (moreunits > 1) { + if (moreunits > 0) { Py_ssize_t outpos = out - (uint32_t*) PyBytes_AS_STRING(v); if (moreunits >= (PY_SSIZE_T_MAX - PyBytes_GET_SIZE(v)) / 4) { /* integer overflow */ PyErr_NoMemory(); goto error; } - if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + 4 * (moreunits - 1)) < 0) + if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + 4 * moreunits) < 0) goto error; out = (uint32_t*) PyBytes_AS_STRING(v) + outpos; } if (PyBytes_Check(rep)) { memcpy(out, PyBytes_AS_STRING(rep), repsize); - out += moreunits; + out += repsize / 4; } else /* rep is unicode */ { assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND); ucs1lib_utf32_encode(PyUnicode_1BYTE_DATA(rep), repsize, @@ -6311,7 +6313,7 @@ _PyUnicode_EncodeUTF16(PyObject *str, pos = 0; while (pos < len) { - Py_ssize_t repsize, moreunits; + Py_ssize_t newpos, repsize, moreunits; if (kind == PyUnicode_2BYTE_KIND) { pos += ucs2lib_utf16_encode((const Py_UCS2 *)data + pos, len - pos, @@ -6328,7 +6330,7 @@ _PyUnicode_EncodeUTF16(PyObject *str, rep = unicode_encode_call_errorhandler( errors, &errorHandler, encoding, "surrogates not allowed", - str, &exc, pos, pos + 1, &pos); + str, &exc, pos, pos + 1, &newpos); if (!rep) goto error; @@ -6336,7 +6338,7 @@ _PyUnicode_EncodeUTF16(PyObject *str, repsize = PyBytes_GET_SIZE(rep); if (repsize & 1) { raise_encode_exception(&exc, encoding, - str, pos - 1, pos, + str, pos, pos + 1, "surrogates not allowed"); goto error; } @@ -6349,28 +6351,30 @@ _PyUnicode_EncodeUTF16(PyObject *str, moreunits = repsize = PyUnicode_GET_LENGTH(rep); if (!PyUnicode_IS_ASCII(rep)) { raise_encode_exception(&exc, encoding, - str, pos - 1, pos, + str, pos, pos + 1, "surrogates not allowed"); goto error; } } + moreunits += pos - newpos; + pos = newpos; /* two bytes are reserved for each surrogate */ - if (moreunits > 1) { + if (moreunits > 0) { Py_ssize_t outpos = out - (unsigned short*) PyBytes_AS_STRING(v); if (moreunits >= (PY_SSIZE_T_MAX - PyBytes_GET_SIZE(v)) / 2) { /* integer overflow */ PyErr_NoMemory(); goto error; } - if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + 2 * (moreunits - 1)) < 0) + if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + 2 * moreunits) < 0) goto error; out = (unsigned short*) PyBytes_AS_STRING(v) + outpos; } if (PyBytes_Check(rep)) { memcpy(out, PyBytes_AS_STRING(rep), repsize); - out += moreunits; + out += repsize / 2; } else /* rep is unicode */ { assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND); ucs1lib_utf16_encode(PyUnicode_1BYTE_DATA(rep), repsize, @@ -7297,8 +7301,19 @@ unicode_encode_ucs1(PyObject *unicode, if (rep == NULL) goto onError; - /* subtract preallocated bytes */ - writer.min_size -= newpos - collstart; + if (newpos < collstart) { + writer.overallocate = 1; + str = _PyBytesWriter_Prepare(&writer, str, + collstart - newpos); + if (str == NULL) + goto onError; + } + else { + /* subtract preallocated bytes */ + writer.min_size -= newpos - collstart; + /* Only overallocate the buffer if it's not the last write */ + writer.overallocate = (newpos < size); + } if (PyBytes_Check(rep)) { /* Directly copy bytes result to output. */ @@ -8104,13 +8119,14 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes, pos, pos + 1, &newpos); if (rep == NULL) goto error; - pos = newpos; + Py_ssize_t morebytes = pos - newpos; if (PyBytes_Check(rep)) { outsize = PyBytes_GET_SIZE(rep); - if (outsize != 1) { + morebytes += outsize; + if (morebytes > 0) { Py_ssize_t offset = out - PyBytes_AS_STRING(*outbytes); - newoutsize = PyBytes_GET_SIZE(*outbytes) + (outsize - 1); + newoutsize = PyBytes_GET_SIZE(*outbytes) + morebytes; if (_PyBytes_Resize(outbytes, newoutsize) < 0) { Py_DECREF(rep); goto error; @@ -8131,9 +8147,10 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes, } outsize = PyUnicode_GET_LENGTH(rep); - if (outsize != 1) { + morebytes += outsize; + if (morebytes > 0) { Py_ssize_t offset = out - PyBytes_AS_STRING(*outbytes); - newoutsize = PyBytes_GET_SIZE(*outbytes) + (outsize - 1); + newoutsize = PyBytes_GET_SIZE(*outbytes) + morebytes; if (_PyBytes_Resize(outbytes, newoutsize) < 0) { Py_DECREF(rep); goto error; @@ -8156,6 +8173,7 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes, out++; } } + pos = newpos; Py_DECREF(rep); } /* write a NUL byte */ @@ -13478,19 +13496,26 @@ PyUnicode_Split(PyObject *s, PyObject *sep, Py_ssize_t maxsplit) str.split as unicode_split sep: object = None - The delimiter according which to split the string. - None (the default value) means split according to any whitespace, - and discard empty strings from the result. + The separator used to split the string. + + When set to None (the default value), will split on any whitespace + character (including \\n \\r \\t \\f and spaces) and will discard + empty strings from the result. maxsplit: Py_ssize_t = -1 - Maximum number of splits to do. + Maximum number of splits (starting from the left). -1 (the default value) means no limit. -Return a list of the words in the string, using sep as the delimiter string. +Return a list of the substrings in the string, using sep as the separator string. + +Note, str.split() is mainly useful for data that has been intentionally +delimited. With natural text that includes punctuation, consider using +the regular expression module. + [clinic start generated code]*/ static PyObject * unicode_split_impl(PyObject *self, PyObject *sep, Py_ssize_t maxsplit) -/*[clinic end generated code: output=3a65b1db356948dc input=606e750488a82359]*/ +/*[clinic end generated code: output=3a65b1db356948dc input=906d953b44efc43b]*/ { if (sep == Py_None) return split(self, NULL, maxsplit); @@ -13661,14 +13686,14 @@ PyUnicode_RSplit(PyObject *s, PyObject *sep, Py_ssize_t maxsplit) /*[clinic input] str.rsplit as unicode_rsplit = str.split -Return a list of the words in the string, using sep as the delimiter string. +Return a list of the substrings in the string, using sep as the separator string. -Splits are done starting at the end of the string and working to the front. +Splitting starts at the end of the string and works to the front. [clinic start generated code]*/ static PyObject * unicode_rsplit_impl(PyObject *self, PyObject *sep, Py_ssize_t maxsplit) -/*[clinic end generated code: output=c2b815c63bcabffc input=12ad4bf57dd35f15]*/ +/*[clinic end generated code: output=c2b815c63bcabffc input=ea78406060fce33c]*/ { if (sep == Py_None) return rsplit(self, NULL, maxsplit); @@ -16352,6 +16377,9 @@ _PyUnicode_Fini(PyInterpreterState *interp) if (_Py_IsMainInterpreter(interp)) { // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() assert(interned == NULL); + // bpo-47182: force a unicodedata CAPI capsule re-import on + // subsequent initialization of main interpreter. + ucnhash_capi = NULL; } _PyUnicode_FiniEncodings(&state->fs_codec); diff --git a/contrib/tools/python3/src/Objects/unionobject.c b/contrib/tools/python3/src/Objects/unionobject.c index 80c70389ab3..6d8bb021421 100644 --- a/contrib/tools/python3/src/Objects/unionobject.c +++ b/contrib/tools/python3/src/Objects/unionobject.c @@ -447,9 +447,9 @@ union_getattro(PyObject *self, PyObject *name) PyTypeObject _PyUnion_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "types.UnionType", - .tp_doc = "Represent a PEP 604 union type\n" + .tp_doc = PyDoc_STR("Represent a PEP 604 union type\n" "\n" - "E.g. for int | str", + "E.g. for int | str"), .tp_basicsize = sizeof(unionobject), .tp_dealloc = unionobject_dealloc, .tp_alloc = PyType_GenericAlloc, |