summaryrefslogtreecommitdiffstats
path: root/contrib/tools/python3/src/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorshadchin <[email protected]>2022-02-10 16:44:39 +0300
committerDaniil Cherednik <[email protected]>2022-02-10 16:44:39 +0300
commite9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch)
tree64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/tools/python3/src/Objects/unicodeobject.c
parent2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff)
Restoring authorship annotation for <[email protected]>. Commit 2 of 2.
Diffstat (limited to 'contrib/tools/python3/src/Objects/unicodeobject.c')
-rw-r--r--contrib/tools/python3/src/Objects/unicodeobject.c3380
1 files changed, 1690 insertions, 1690 deletions
diff --git a/contrib/tools/python3/src/Objects/unicodeobject.c b/contrib/tools/python3/src/Objects/unicodeobject.c
index 6ee20925e9a..7767d140e6c 100644
--- a/contrib/tools/python3/src/Objects/unicodeobject.c
+++ b/contrib/tools/python3/src/Objects/unicodeobject.c
@@ -40,15 +40,15 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#define PY_SSIZE_T_CLEAN
#include "Python.h"
-#include "pycore_abstract.h" // _PyIndex_Check()
-#include "pycore_bytes_methods.h"
-#include "pycore_fileutils.h"
-#include "pycore_initconfig.h"
-#include "pycore_interp.h" // PyInterpreterState.fs_codec
-#include "pycore_object.h"
-#include "pycore_pathconfig.h"
-#include "pycore_pylifecycle.h"
-#include "pycore_pystate.h" // _PyInterpreterState_GET()
+#include "pycore_abstract.h" // _PyIndex_Check()
+#include "pycore_bytes_methods.h"
+#include "pycore_fileutils.h"
+#include "pycore_initconfig.h"
+#include "pycore_interp.h" // PyInterpreterState.fs_codec
+#include "pycore_object.h"
+#include "pycore_pathconfig.h"
+#include "pycore_pylifecycle.h"
+#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "ucnhash.h"
#include "stringlib/eq.h"
@@ -56,15 +56,15 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include <windows.h>
#endif
-#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
-#include "pycore_fileutils.h" // _Py_LocaleUsesNonUnicodeWchar()
-#endif
-
-/* Uncomment to display statistics on interned strings at exit when
- using Valgrind or Insecure++. */
-/* #define INTERNED_STATS 1 */
-
-
+#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
+#include "pycore_fileutils.h" // _Py_LocaleUsesNonUnicodeWchar()
+#endif
+
+/* Uncomment to display statistics on interned strings at exit when
+ using Valgrind or Insecure++. */
+/* #define INTERNED_STATS 1 */
+
+
/*[clinic input]
class str "PyObject *" "&PyUnicode_Type"
[clinic start generated code]*/
@@ -97,8 +97,8 @@ NOTE: In the interpreter's initialization phase, some globals are currently
extern "C" {
#endif
-// Maximum code point of Unicode 6.0: 0x10ffff (1,114,111).
-// The value must be the same in fileutils.c.
+// Maximum code point of Unicode 6.0: 0x10ffff (1,114,111).
+// The value must be the same in fileutils.c.
#define MAX_UNICODE 0x10ffff
#ifdef Py_DEBUG
@@ -125,13 +125,13 @@ extern "C" {
_PyUnicode_UTF8_LENGTH(op))
#define _PyUnicode_WSTR(op) \
(((PyASCIIObject*)(op))->wstr)
-
-/* Don't use deprecated macro of unicodeobject.h */
-#undef PyUnicode_WSTR_LENGTH
-#define PyUnicode_WSTR_LENGTH(op) \
- (PyUnicode_IS_COMPACT_ASCII(op) ? \
- ((PyASCIIObject*)op)->length : \
- ((PyCompactUnicodeObject*)op)->wstr_length)
+
+/* Don't use deprecated macro of unicodeobject.h */
+#undef PyUnicode_WSTR_LENGTH
+#define PyUnicode_WSTR_LENGTH(op) \
+ (PyUnicode_IS_COMPACT_ASCII(op) ? \
+ ((PyASCIIObject*)op)->length : \
+ ((PyCompactUnicodeObject*)op)->wstr_length)
#define _PyUnicode_WSTR_LENGTH(op) \
(((PyCompactUnicodeObject*)(op))->wstr_length)
#define _PyUnicode_LENGTH(op) \
@@ -186,8 +186,8 @@ extern "C" {
#define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \
do { \
to_type *_to = (to_type *)(to); \
- const from_type *_iter = (const from_type *)(begin);\
- const from_type *_end = (const from_type *)(end);\
+ const from_type *_iter = (const from_type *)(begin);\
+ const from_type *_end = (const from_type *)(end);\
Py_ssize_t n = (_end) - (_iter); \
const from_type *_unrolled_end = \
_iter + _Py_SIZE_ROUND_DOWN(n, 4); \
@@ -210,8 +210,8 @@ extern "C" {
# define OVERALLOCATE_FACTOR 4
#endif
-#define INTERNED_STRINGS
-
+#define INTERNED_STRINGS
+
/* This dictionary holds all interned unicode strings. Note that references
to strings in this dictionary are *not* counted in the string's ob_refcnt.
When the interned string reaches a refcnt of 0 the string deallocation
@@ -220,9 +220,9 @@ extern "C" {
Another way to look at this is that to say that the actual reference
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
*/
-#ifdef INTERNED_STRINGS
+#ifdef INTERNED_STRINGS
static PyObject *interned = NULL;
-#endif
+#endif
/* The empty Unicode object is shared to improve performance. */
static PyObject *unicode_empty = NULL;
@@ -246,64 +246,64 @@ static PyObject *unicode_empty = NULL;
return unicode_empty; \
} while (0)
-static inline void
-unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value,
- Py_ssize_t start, Py_ssize_t length)
-{
- assert(0 <= start);
- assert(kind != PyUnicode_WCHAR_KIND);
- switch (kind) {
- case PyUnicode_1BYTE_KIND: {
- assert(value <= 0xff);
- Py_UCS1 ch = (unsigned char)value;
- Py_UCS1 *to = (Py_UCS1 *)data + start;
- memset(to, ch, length);
- break;
- }
- case PyUnicode_2BYTE_KIND: {
- assert(value <= 0xffff);
- Py_UCS2 ch = (Py_UCS2)value;
- Py_UCS2 *to = (Py_UCS2 *)data + start;
- const Py_UCS2 *end = to + length;
- for (; to < end; ++to) *to = ch;
- break;
- }
- case PyUnicode_4BYTE_KIND: {
- assert(value <= MAX_UNICODE);
- Py_UCS4 ch = value;
- Py_UCS4 * to = (Py_UCS4 *)data + start;
- const Py_UCS4 *end = to + length;
- for (; to < end; ++to) *to = ch;
- break;
- }
- default: Py_UNREACHABLE();
- }
-}
+static inline void
+unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value,
+ Py_ssize_t start, Py_ssize_t length)
+{
+ assert(0 <= start);
+ assert(kind != PyUnicode_WCHAR_KIND);
+ switch (kind) {
+ case PyUnicode_1BYTE_KIND: {
+ assert(value <= 0xff);
+ Py_UCS1 ch = (unsigned char)value;
+ Py_UCS1 *to = (Py_UCS1 *)data + start;
+ memset(to, ch, length);
+ break;
+ }
+ case PyUnicode_2BYTE_KIND: {
+ assert(value <= 0xffff);
+ Py_UCS2 ch = (Py_UCS2)value;
+ Py_UCS2 *to = (Py_UCS2 *)data + start;
+ const Py_UCS2 *end = to + length;
+ for (; to < end; ++to) *to = ch;
+ break;
+ }
+ case PyUnicode_4BYTE_KIND: {
+ assert(value <= MAX_UNICODE);
+ Py_UCS4 ch = value;
+ Py_UCS4 * to = (Py_UCS4 *)data + start;
+ const Py_UCS4 *end = to + length;
+ for (; to < end; ++to) *to = ch;
+ break;
+ }
+ default: Py_UNREACHABLE();
+ }
+}
/* Forward declaration */
static inline int
_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch);
-static inline void
-_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer);
-static PyObject *
-unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
- const char *errors);
-static PyObject *
-unicode_decode_utf8(const char *s, Py_ssize_t size,
- _Py_error_handler error_handler, const char *errors,
- Py_ssize_t *consumed);
+static inline void
+_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer);
+static PyObject *
+unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
+ const char *errors);
+static PyObject *
+unicode_decode_utf8(const char *s, Py_ssize_t size,
+ _Py_error_handler error_handler, const char *errors,
+ Py_ssize_t *consumed);
/* List of static strings. */
static _Py_Identifier *static_strings = NULL;
-#define LATIN1_SINGLETONS
-
-#ifdef LATIN1_SINGLETONS
+#define LATIN1_SINGLETONS
+
+#ifdef LATIN1_SINGLETONS
/* Single character Unicode strings in the Latin-1 range are being
shared as well. */
static PyObject *unicode_latin1[256] = {NULL};
-#endif
+#endif
/* Fast detection of the most frequent whitespace characters */
const unsigned char _Py_ascii_whitespace[] = {
@@ -394,8 +394,8 @@ static int convert_uc(PyObject *obj, void *addr);
#include "clinic/unicodeobject.c.h"
-_Py_error_handler
-_Py_GetErrorHandler(const char *errors)
+_Py_error_handler
+_Py_GetErrorHandler(const char *errors)
{
if (errors == NULL || strcmp(errors, "strict") == 0) {
return _Py_ERROR_STRICT;
@@ -421,83 +421,83 @@ _Py_GetErrorHandler(const char *errors)
return _Py_ERROR_OTHER;
}
-
-static _Py_error_handler
-get_error_handler_wide(const wchar_t *errors)
-{
- if (errors == NULL || wcscmp(errors, L"strict") == 0) {
- return _Py_ERROR_STRICT;
- }
- if (wcscmp(errors, L"surrogateescape") == 0) {
- return _Py_ERROR_SURROGATEESCAPE;
- }
- if (wcscmp(errors, L"replace") == 0) {
- return _Py_ERROR_REPLACE;
- }
- if (wcscmp(errors, L"ignore") == 0) {
- return _Py_ERROR_IGNORE;
- }
- if (wcscmp(errors, L"backslashreplace") == 0) {
- return _Py_ERROR_BACKSLASHREPLACE;
- }
- if (wcscmp(errors, L"surrogatepass") == 0) {
- return _Py_ERROR_SURROGATEPASS;
- }
- if (wcscmp(errors, L"xmlcharrefreplace") == 0) {
- return _Py_ERROR_XMLCHARREFREPLACE;
- }
- return _Py_ERROR_OTHER;
-}
-
-
-static inline int
-unicode_check_encoding_errors(const char *encoding, const char *errors)
-{
- if (encoding == NULL && errors == NULL) {
- return 0;
- }
-
- PyInterpreterState *interp = _PyInterpreterState_GET();
-#ifndef Py_DEBUG
- /* In release mode, only check in development mode (-X dev) */
- if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
- return 0;
- }
-#else
- /* Always check in debug mode */
-#endif
-
- /* Avoid calling _PyCodec_Lookup() and PyCodec_LookupError() before the
- codec registry is ready: before_PyUnicode_InitEncodings() is called. */
- if (!interp->unicode.fs_codec.encoding) {
- return 0;
- }
-
- /* Disable checks during Python finalization. For example, it allows to
- call _PyObject_Dump() during finalization for debugging purpose. */
- if (interp->finalizing) {
- return 0;
- }
-
- if (encoding != NULL) {
- PyObject *handler = _PyCodec_Lookup(encoding);
- if (handler == NULL) {
- return -1;
- }
- Py_DECREF(handler);
- }
-
- if (errors != NULL) {
- PyObject *handler = PyCodec_LookupError(errors);
- if (handler == NULL) {
- return -1;
- }
- Py_DECREF(handler);
- }
- return 0;
-}
-
-
+
+static _Py_error_handler
+get_error_handler_wide(const wchar_t *errors)
+{
+ if (errors == NULL || wcscmp(errors, L"strict") == 0) {
+ return _Py_ERROR_STRICT;
+ }
+ if (wcscmp(errors, L"surrogateescape") == 0) {
+ return _Py_ERROR_SURROGATEESCAPE;
+ }
+ if (wcscmp(errors, L"replace") == 0) {
+ return _Py_ERROR_REPLACE;
+ }
+ if (wcscmp(errors, L"ignore") == 0) {
+ return _Py_ERROR_IGNORE;
+ }
+ if (wcscmp(errors, L"backslashreplace") == 0) {
+ return _Py_ERROR_BACKSLASHREPLACE;
+ }
+ if (wcscmp(errors, L"surrogatepass") == 0) {
+ return _Py_ERROR_SURROGATEPASS;
+ }
+ if (wcscmp(errors, L"xmlcharrefreplace") == 0) {
+ return _Py_ERROR_XMLCHARREFREPLACE;
+ }
+ return _Py_ERROR_OTHER;
+}
+
+
+static inline int
+unicode_check_encoding_errors(const char *encoding, const char *errors)
+{
+ if (encoding == NULL && errors == NULL) {
+ return 0;
+ }
+
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+#ifndef Py_DEBUG
+ /* In release mode, only check in development mode (-X dev) */
+ if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
+ return 0;
+ }
+#else
+ /* Always check in debug mode */
+#endif
+
+ /* Avoid calling _PyCodec_Lookup() and PyCodec_LookupError() before the
+ codec registry is ready: before_PyUnicode_InitEncodings() is called. */
+ if (!interp->unicode.fs_codec.encoding) {
+ return 0;
+ }
+
+ /* Disable checks during Python finalization. For example, it allows to
+ call _PyObject_Dump() during finalization for debugging purpose. */
+ if (interp->finalizing) {
+ return 0;
+ }
+
+ if (encoding != NULL) {
+ PyObject *handler = _PyCodec_Lookup(encoding);
+ if (handler == NULL) {
+ return -1;
+ }
+ Py_DECREF(handler);
+ }
+
+ if (errors != NULL) {
+ PyObject *handler = PyCodec_LookupError(errors);
+ if (handler == NULL) {
+ return -1;
+ }
+ Py_DECREF(handler);
+ }
+ return 0;
+}
+
+
/* The max unicode value is always 0x10FFFF while using the PEP-393 API.
This function is kept for backward compatibility with the old API. */
Py_UNICODE
@@ -515,21 +515,21 @@ PyUnicode_GetMax(void)
int
_PyUnicode_CheckConsistency(PyObject *op, int check_content)
{
-#define CHECK(expr) \
- do { if (!(expr)) { _PyObject_ASSERT_FAILED_MSG(op, Py_STRINGIFY(expr)); } } while (0)
-
+#define CHECK(expr) \
+ do { if (!(expr)) { _PyObject_ASSERT_FAILED_MSG(op, Py_STRINGIFY(expr)); } } while (0)
+
PyASCIIObject *ascii;
unsigned int kind;
- assert(op != NULL);
- CHECK(PyUnicode_Check(op));
+ assert(op != NULL);
+ CHECK(PyUnicode_Check(op));
ascii = (PyASCIIObject *)op;
kind = ascii->state.kind;
if (ascii->state.ascii == 1 && ascii->state.compact == 1) {
- CHECK(kind == PyUnicode_1BYTE_KIND);
- CHECK(ascii->state.ready == 1);
+ CHECK(kind == PyUnicode_1BYTE_KIND);
+ CHECK(ascii->state.ready == 1);
}
else {
PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op;
@@ -537,41 +537,41 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
if (ascii->state.compact == 1) {
data = compact + 1;
- CHECK(kind == PyUnicode_1BYTE_KIND
- || kind == PyUnicode_2BYTE_KIND
- || kind == PyUnicode_4BYTE_KIND);
- CHECK(ascii->state.ascii == 0);
- CHECK(ascii->state.ready == 1);
- CHECK(compact->utf8 != data);
+ CHECK(kind == PyUnicode_1BYTE_KIND
+ || kind == PyUnicode_2BYTE_KIND
+ || kind == PyUnicode_4BYTE_KIND);
+ CHECK(ascii->state.ascii == 0);
+ CHECK(ascii->state.ready == 1);
+ CHECK(compact->utf8 != data);
}
else {
PyUnicodeObject *unicode = (PyUnicodeObject *)op;
data = unicode->data.any;
if (kind == PyUnicode_WCHAR_KIND) {
- CHECK(ascii->length == 0);
- CHECK(ascii->hash == -1);
- CHECK(ascii->state.compact == 0);
- CHECK(ascii->state.ascii == 0);
- CHECK(ascii->state.ready == 0);
- CHECK(ascii->state.interned == SSTATE_NOT_INTERNED);
- CHECK(ascii->wstr != NULL);
- CHECK(data == NULL);
- CHECK(compact->utf8 == NULL);
+ CHECK(ascii->length == 0);
+ CHECK(ascii->hash == -1);
+ CHECK(ascii->state.compact == 0);
+ CHECK(ascii->state.ascii == 0);
+ CHECK(ascii->state.ready == 0);
+ CHECK(ascii->state.interned == SSTATE_NOT_INTERNED);
+ CHECK(ascii->wstr != NULL);
+ CHECK(data == NULL);
+ CHECK(compact->utf8 == NULL);
}
else {
- CHECK(kind == PyUnicode_1BYTE_KIND
- || kind == PyUnicode_2BYTE_KIND
- || kind == PyUnicode_4BYTE_KIND);
- CHECK(ascii->state.compact == 0);
- CHECK(ascii->state.ready == 1);
- CHECK(data != NULL);
+ CHECK(kind == PyUnicode_1BYTE_KIND
+ || kind == PyUnicode_2BYTE_KIND
+ || kind == PyUnicode_4BYTE_KIND);
+ CHECK(ascii->state.compact == 0);
+ CHECK(ascii->state.ready == 1);
+ CHECK(data != NULL);
if (ascii->state.ascii) {
- CHECK(compact->utf8 == data);
- CHECK(compact->utf8_length == ascii->length);
+ CHECK(compact->utf8 == data);
+ CHECK(compact->utf8_length == ascii->length);
}
else
- CHECK(compact->utf8 != data);
+ CHECK(compact->utf8 != data);
}
}
if (kind != PyUnicode_WCHAR_KIND) {
@@ -583,23 +583,23 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
#endif
)
{
- CHECK(ascii->wstr == data);
- CHECK(compact->wstr_length == ascii->length);
+ CHECK(ascii->wstr == data);
+ CHECK(compact->wstr_length == ascii->length);
} else
- CHECK(ascii->wstr != data);
+ CHECK(ascii->wstr != data);
}
if (compact->utf8 == NULL)
- CHECK(compact->utf8_length == 0);
+ CHECK(compact->utf8_length == 0);
if (ascii->wstr == NULL)
- CHECK(compact->wstr_length == 0);
+ CHECK(compact->wstr_length == 0);
}
-
- /* check that the best kind is used: O(n) operation */
- if (check_content && kind != PyUnicode_WCHAR_KIND) {
+
+ /* check that the best kind is used: O(n) operation */
+ if (check_content && kind != PyUnicode_WCHAR_KIND) {
Py_ssize_t i;
Py_UCS4 maxchar = 0;
- const void *data;
+ const void *data;
Py_UCS4 ch;
data = PyUnicode_DATA(ascii);
@@ -611,28 +611,28 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
}
if (kind == PyUnicode_1BYTE_KIND) {
if (ascii->state.ascii == 0) {
- CHECK(maxchar >= 128);
- CHECK(maxchar <= 255);
+ CHECK(maxchar >= 128);
+ CHECK(maxchar <= 255);
}
else
- CHECK(maxchar < 128);
+ CHECK(maxchar < 128);
}
else if (kind == PyUnicode_2BYTE_KIND) {
- CHECK(maxchar >= 0x100);
- CHECK(maxchar <= 0xFFFF);
+ CHECK(maxchar >= 0x100);
+ CHECK(maxchar <= 0xFFFF);
}
else {
- CHECK(maxchar >= 0x10000);
- CHECK(maxchar <= MAX_UNICODE);
+ CHECK(maxchar >= 0x10000);
+ CHECK(maxchar <= MAX_UNICODE);
}
- CHECK(PyUnicode_READ(kind, data, ascii->length) == 0);
+ CHECK(PyUnicode_READ(kind, data, ascii->length) == 0);
}
return 1;
-
-#undef CHECK
+
+#undef CHECK
}
-
+
static PyObject*
unicode_result_wchar(PyObject *unicode)
{
@@ -682,9 +682,9 @@ unicode_result_ready(PyObject *unicode)
return unicode_empty;
}
-#ifdef LATIN1_SINGLETONS
+#ifdef LATIN1_SINGLETONS
if (length == 1) {
- const void *data = PyUnicode_DATA(unicode);
+ const void *data = PyUnicode_DATA(unicode);
int kind = PyUnicode_KIND(unicode);
Py_UCS4 ch = PyUnicode_READ(kind, data, 0);
if (ch < 256) {
@@ -704,7 +704,7 @@ unicode_result_ready(PyObject *unicode)
}
}
}
-#endif
+#endif
assert(_PyUnicode_CheckConsistency(unicode, 1));
return unicode;
@@ -743,7 +743,7 @@ backslashreplace(_PyBytesWriter *writer, char *str,
Py_ssize_t size, i;
Py_UCS4 ch;
enum PyUnicode_Kind kind;
- const void *data;
+ const void *data;
assert(PyUnicode_IS_READY(unicode));
kind = PyUnicode_KIND(unicode);
@@ -810,7 +810,7 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str,
Py_ssize_t size, i;
Py_UCS4 ch;
enum PyUnicode_Kind kind;
- const void *data;
+ const void *data;
assert(PyUnicode_IS_READY(unicode));
kind = PyUnicode_KIND(unicode);
@@ -852,11 +852,11 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str,
/* generate replacement */
for (i = collstart; i < collend; ++i) {
- size = sprintf(str, "&#%d;", PyUnicode_READ(kind, data, i));
- if (size < 0) {
- return NULL;
- }
- str += size;
+ size = sprintf(str, "&#%d;", PyUnicode_READ(kind, data, i));
+ if (size < 0) {
+ return NULL;
+ }
+ str += size;
}
return str;
}
@@ -890,7 +890,7 @@ static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;
(BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK(ch)))
static inline BLOOM_MASK
-make_bloom_mask(int kind, const void* ptr, Py_ssize_t len)
+make_bloom_mask(int kind, const void* ptr, Py_ssize_t len)
{
#define BLOOM_UPDATE(TYPE, MASK, PTR, LEN) \
do { \
@@ -980,14 +980,14 @@ ensure_unicode(PyObject *obj)
#include "stringlib/find_max_char.h"
#include "stringlib/undef.h"
-_Py_COMP_DIAG_PUSH
-_Py_COMP_DIAG_IGNORE_DEPR_DECLS
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
#include "stringlib/unicodedefs.h"
#include "stringlib/fastsearch.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
#include "stringlib/undef.h"
-_Py_COMP_DIAG_POP
+_Py_COMP_DIAG_POP
/* --- Unicode Object ----------------------------------------------------- */
@@ -1001,21 +1001,21 @@ findchar(const void *s, int kind,
if ((Py_UCS1) ch != ch)
return -1;
if (direction > 0)
- return ucs1lib_find_char((const Py_UCS1 *) s, size, (Py_UCS1) ch);
+ return ucs1lib_find_char((const Py_UCS1 *) s, size, (Py_UCS1) ch);
else
- return ucs1lib_rfind_char((const Py_UCS1 *) s, size, (Py_UCS1) ch);
+ return ucs1lib_rfind_char((const Py_UCS1 *) s, size, (Py_UCS1) ch);
case PyUnicode_2BYTE_KIND:
if ((Py_UCS2) ch != ch)
return -1;
if (direction > 0)
- return ucs2lib_find_char((const Py_UCS2 *) s, size, (Py_UCS2) ch);
+ return ucs2lib_find_char((const Py_UCS2 *) s, size, (Py_UCS2) ch);
else
- return ucs2lib_rfind_char((const Py_UCS2 *) s, size, (Py_UCS2) ch);
+ return ucs2lib_rfind_char((const Py_UCS2 *) s, size, (Py_UCS2) ch);
case PyUnicode_4BYTE_KIND:
if (direction > 0)
- return ucs4lib_find_char((const Py_UCS4 *) s, size, ch);
+ return ucs4lib_find_char((const Py_UCS4 *) s, size, ch);
else
- return ucs4lib_rfind_char((const Py_UCS4 *) s, size, ch);
+ return ucs4lib_rfind_char((const Py_UCS4 *) s, size, ch);
default:
Py_UNREACHABLE();
}
@@ -1074,12 +1074,12 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
_PyUnicode_UTF8(unicode) = NULL;
_PyUnicode_UTF8_LENGTH(unicode) = 0;
}
-#ifdef Py_REF_DEBUG
- _Py_RefTotal--;
-#endif
-#ifdef Py_TRACE_REFS
+#ifdef Py_REF_DEBUG
+ _Py_RefTotal--;
+#endif
+#ifdef Py_TRACE_REFS
_Py_ForgetReference(unicode);
-#endif
+#endif
new_unicode = (PyObject *)PyObject_REALLOC(unicode, new_size);
if (new_unicode == NULL) {
@@ -1332,18 +1332,18 @@ unicode_kind_name(PyObject *unicode)
#ifdef Py_DEBUG
/* Functions wrapping macros for use in debugger */
-const char *_PyUnicode_utf8(void *unicode_raw){
- PyObject *unicode = _PyObject_CAST(unicode_raw);
+const char *_PyUnicode_utf8(void *unicode_raw){
+ PyObject *unicode = _PyObject_CAST(unicode_raw);
return PyUnicode_UTF8(unicode);
}
-const void *_PyUnicode_compact_data(void *unicode_raw) {
- PyObject *unicode = _PyObject_CAST(unicode_raw);
+const void *_PyUnicode_compact_data(void *unicode_raw) {
+ PyObject *unicode = _PyObject_CAST(unicode_raw);
return _PyUnicode_COMPACT_DATA(unicode);
}
-const void *_PyUnicode_data(void *unicode_raw) {
- PyObject *unicode = _PyObject_CAST(unicode_raw);
- printf("obj %p\n", (void*)unicode);
+const void *_PyUnicode_data(void *unicode_raw) {
+ PyObject *unicode = _PyObject_CAST(unicode_raw);
+ printf("obj %p\n", (void*)unicode);
printf("compact %d\n", PyUnicode_IS_COMPACT(unicode));
printf("compact ascii %d\n", PyUnicode_IS_COMPACT_ASCII(unicode));
printf("ascii op %p\n", ((void*)((PyASCIIObject*)(unicode) + 1)));
@@ -1358,7 +1358,7 @@ _PyUnicode_Dump(PyObject *op)
PyASCIIObject *ascii = (PyASCIIObject *)op;
PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op;
PyUnicodeObject *unicode = (PyUnicodeObject *)op;
- const void *data;
+ const void *data;
if (ascii->state.compact)
{
@@ -1374,14 +1374,14 @@ _PyUnicode_Dump(PyObject *op)
if (ascii->wstr == data)
printf("shared ");
- printf("wstr=%p", (void *)ascii->wstr);
+ printf("wstr=%p", (void *)ascii->wstr);
if (!(ascii->state.ascii == 1 && ascii->state.compact == 1)) {
printf(" (%" PY_FORMAT_SIZE_T "u), ", compact->wstr_length);
if (!ascii->state.compact && compact->utf8 == unicode->data.any)
printf("shared ");
printf("utf8=%p (%" PY_FORMAT_SIZE_T "u)",
- (void *)compact->utf8, compact->utf8_length);
+ (void *)compact->utf8, compact->utf8_length);
}
printf(", data=%p\n", data);
}
@@ -1558,8 +1558,8 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
Py_ssize_t how_many, int check_maxchar)
{
unsigned int from_kind, to_kind;
- const void *from_data;
- void *to_data;
+ const void *from_data;
+ void *to_data;
assert(0 <= how_many);
assert(0 <= from_start);
@@ -1584,7 +1584,7 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
if (!check_maxchar
&& PyUnicode_MAX_CHAR_VALUE(from) > PyUnicode_MAX_CHAR_VALUE(to))
{
- Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
+ Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
Py_UCS4 ch;
Py_ssize_t i;
for (i=0; i < how_many; i++) {
@@ -1602,12 +1602,12 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
check that all written characters are pure ASCII */
Py_UCS4 max_char;
max_char = ucs1lib_find_max_char(from_data,
- (const Py_UCS1*)from_data + how_many);
+ (const Py_UCS1*)from_data + how_many);
if (max_char >= 128)
return -1;
}
memcpy((char*)to_data + to_kind * to_start,
- (const char*)from_data + from_kind * from_start,
+ (const char*)from_data + from_kind * from_start,
to_kind * how_many);
}
else if (from_kind == PyUnicode_1BYTE_KIND
@@ -1794,8 +1794,8 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end,
*maxchar = ch;
if (*maxchar > MAX_UNICODE) {
PyErr_Format(PyExc_ValueError,
- "character U+%x is not in range [U+0000; U+%x]",
- ch, MAX_UNICODE);
+ "character U+%x is not in range [U+0000; U+%x]",
+ ch, MAX_UNICODE);
return -1;
}
}
@@ -1891,7 +1891,7 @@ _PyUnicode_Ready(PyObject *unicode)
_PyUnicode_WSTR_LENGTH(unicode) = 0;
#endif
}
- /* maxchar exceeds 16 bit, wee need 4 bytes for unicode characters */
+ /* maxchar exceeds 16 bit, wee need 4 bytes for unicode characters */
else {
#if SIZEOF_WCHAR_T == 2
/* in case the native representation is 2-bytes, we need to allocate a
@@ -1941,32 +1941,32 @@ unicode_dealloc(PyObject *unicode)
case SSTATE_INTERNED_MORTAL:
/* revive dead object temporarily for DelItem */
- Py_SET_REFCNT(unicode, 3);
-#ifdef INTERNED_STRINGS
- if (PyDict_DelItem(interned, unicode) != 0) {
- _PyErr_WriteUnraisableMsg("deletion of interned string failed",
- NULL);
- }
-#endif
+ Py_SET_REFCNT(unicode, 3);
+#ifdef INTERNED_STRINGS
+ if (PyDict_DelItem(interned, unicode) != 0) {
+ _PyErr_WriteUnraisableMsg("deletion of interned string failed",
+ NULL);
+ }
+#endif
break;
case SSTATE_INTERNED_IMMORTAL:
- _PyObject_ASSERT_FAILED_MSG(unicode, "Immortal interned string died");
- break;
+ _PyObject_ASSERT_FAILED_MSG(unicode, "Immortal interned string died");
+ break;
default:
- Py_UNREACHABLE();
+ Py_UNREACHABLE();
}
- if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) {
+ if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) {
PyObject_DEL(_PyUnicode_WSTR(unicode));
- }
- if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) {
+ }
+ if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) {
PyObject_DEL(_PyUnicode_UTF8(unicode));
- }
- if (!PyUnicode_IS_COMPACT(unicode) && _PyUnicode_DATA_ANY(unicode)) {
+ }
+ if (!PyUnicode_IS_COMPACT(unicode) && _PyUnicode_DATA_ANY(unicode)) {
PyObject_DEL(_PyUnicode_DATA_ANY(unicode));
- }
+ }
Py_TYPE(unicode)->tp_free(unicode);
}
@@ -1975,10 +1975,10 @@ unicode_dealloc(PyObject *unicode)
static int
unicode_is_singleton(PyObject *unicode)
{
- if (unicode == unicode_empty) {
- return 1;
- }
-#ifdef LATIN1_SINGLETONS
+ if (unicode == unicode_empty) {
+ return 1;
+ }
+#ifdef LATIN1_SINGLETONS
PyASCIIObject *ascii = (PyASCIIObject *)unicode;
if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1)
{
@@ -1986,7 +1986,7 @@ unicode_is_singleton(PyObject *unicode)
if (ch < 256 && unicode_latin1[ch] == unicode)
return 1;
}
-#endif
+#endif
return 0;
}
#endif
@@ -2083,10 +2083,10 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
const char *str, Py_ssize_t len)
{
enum PyUnicode_Kind kind = PyUnicode_KIND(unicode);
- const void *data = PyUnicode_DATA(unicode);
+ const void *data = PyUnicode_DATA(unicode);
const char *end = str + len;
- assert(index + len <= PyUnicode_GET_LENGTH(unicode));
+ assert(index + len <= PyUnicode_GET_LENGTH(unicode));
switch (kind) {
case PyUnicode_1BYTE_KIND: {
#ifdef Py_DEBUG
@@ -2110,7 +2110,7 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
assert((ucs2 - start) <= PyUnicode_GET_LENGTH(unicode));
break;
}
- case PyUnicode_4BYTE_KIND: {
+ case PyUnicode_4BYTE_KIND: {
Py_UCS4 *start = (Py_UCS4 *)data + index;
Py_UCS4 *ucs4 = start;
@@ -2118,38 +2118,38 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
*ucs4 = (Py_UCS4)*str;
assert((ucs4 - start) <= PyUnicode_GET_LENGTH(unicode));
- break;
+ break;
}
- default:
- Py_UNREACHABLE();
+ default:
+ Py_UNREACHABLE();
}
}
static PyObject*
get_latin1_char(unsigned char ch)
{
- PyObject *unicode;
-
-#ifdef LATIN1_SINGLETONS
- unicode = unicode_latin1[ch];
- if (unicode) {
- Py_INCREF(unicode);
- return unicode;
- }
-#endif
-
- unicode = PyUnicode_New(1, ch);
+ PyObject *unicode;
+
+#ifdef LATIN1_SINGLETONS
+ unicode = unicode_latin1[ch];
+ if (unicode) {
+ Py_INCREF(unicode);
+ return unicode;
+ }
+#endif
+
+ unicode = PyUnicode_New(1, ch);
if (!unicode) {
- return NULL;
+ return NULL;
}
-
- PyUnicode_1BYTE_DATA(unicode)[0] = ch;
- assert(_PyUnicode_CheckConsistency(unicode, 1));
-
-#ifdef LATIN1_SINGLETONS
+
+ PyUnicode_1BYTE_DATA(unicode)[0] = ch;
+ assert(_PyUnicode_CheckConsistency(unicode, 1));
+
+#ifdef LATIN1_SINGLETONS
Py_INCREF(unicode);
- unicode_latin1[ch] = unicode;
-#endif
+ unicode_latin1[ch] = unicode;
+#endif
return unicode;
}
@@ -2215,20 +2215,20 @@ PyUnicode_FromWideChar(const wchar_t *u, Py_ssize_t size)
if (size == 0)
_Py_RETURN_UNICODE_EMPTY();
-#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
- /* Oracle Solaris uses non-Unicode internal wchar_t form for
- non-Unicode locales and hence needs conversion to UCS-4 first. */
- if (_Py_LocaleUsesNonUnicodeWchar()) {
- wchar_t* converted = _Py_DecodeNonUnicodeWchar(u, size);
- if (!converted) {
- return NULL;
- }
- PyObject *unicode = _PyUnicode_FromUCS4(converted, size);
- PyMem_Free(converted);
- return unicode;
- }
-#endif
-
+#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
+ /* Oracle Solaris uses non-Unicode internal wchar_t form for
+ non-Unicode locales and hence needs conversion to UCS-4 first. */
+ if (_Py_LocaleUsesNonUnicodeWchar()) {
+ wchar_t* converted = _Py_DecodeNonUnicodeWchar(u, size);
+ if (!converted) {
+ return NULL;
+ }
+ PyObject *unicode = _PyUnicode_FromUCS4(converted, size);
+ PyMem_Free(converted);
+ return unicode;
+ }
+#endif
+
/* Single character Unicode objects in the Latin-1 range are
shared when using this constructor */
if (size == 1 && (Py_UCS4)*u < 256)
@@ -2316,8 +2316,8 @@ _PyUnicode_FromId(_Py_Identifier *id)
return id->object;
}
-static void
-unicode_clear_static_strings(void)
+static void
+unicode_clear_static_strings(void)
{
_Py_Identifier *tmp, *s = static_strings;
while (s) {
@@ -2464,7 +2464,7 @@ Py_UCS4
_PyUnicode_FindMaxChar(PyObject *unicode, Py_ssize_t start, Py_ssize_t end)
{
enum PyUnicode_Kind kind;
- const void *startptr, *endptr;
+ const void *startptr, *endptr;
assert(PyUnicode_IS_READY(unicode));
assert(0 <= start);
@@ -2527,15 +2527,15 @@ unicode_adjust_maxchar(PyObject **p_unicode)
if (max_char >= 256)
return;
}
- else if (kind == PyUnicode_4BYTE_KIND) {
+ else if (kind == PyUnicode_4BYTE_KIND) {
const Py_UCS4 *u = PyUnicode_4BYTE_DATA(unicode);
max_char = ucs4lib_find_max_char(u, u + len);
if (max_char >= 0x10000)
return;
}
- else
- Py_UNREACHABLE();
-
+ else
+ Py_UNREACHABLE();
+
copy = PyUnicode_New(len, max_char);
if (copy != NULL)
_PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, len);
@@ -2572,12 +2572,12 @@ _PyUnicode_Copy(PyObject *unicode)
/* Widen Unicode objects to larger buffers. Don't write terminating null
character. Return NULL on error. */
-static void*
-unicode_askind(unsigned int skind, void const *data, Py_ssize_t len, unsigned int kind)
+static void*
+unicode_askind(unsigned int skind, void const *data, Py_ssize_t len, unsigned int kind)
{
void *result;
- assert(skind < kind);
+ assert(skind < kind);
switch (kind) {
case PyUnicode_2BYTE_KIND:
result = PyMem_New(Py_UCS2, len);
@@ -2586,8 +2586,8 @@ unicode_askind(unsigned int skind, void const *data, Py_ssize_t len, unsigned in
assert(skind == PyUnicode_1BYTE_KIND);
_PyUnicode_CONVERT_BYTES(
Py_UCS1, Py_UCS2,
- (const Py_UCS1 *)data,
- ((const Py_UCS1 *)data) + len,
+ (const Py_UCS1 *)data,
+ ((const Py_UCS1 *)data) + len,
result);
return result;
case PyUnicode_4BYTE_KIND:
@@ -2597,22 +2597,22 @@ unicode_askind(unsigned int skind, void const *data, Py_ssize_t len, unsigned in
if (skind == PyUnicode_2BYTE_KIND) {
_PyUnicode_CONVERT_BYTES(
Py_UCS2, Py_UCS4,
- (const Py_UCS2 *)data,
- ((const Py_UCS2 *)data) + len,
+ (const Py_UCS2 *)data,
+ ((const Py_UCS2 *)data) + len,
result);
}
else {
assert(skind == PyUnicode_1BYTE_KIND);
_PyUnicode_CONVERT_BYTES(
Py_UCS1, Py_UCS4,
- (const Py_UCS1 *)data,
- ((const Py_UCS1 *)data) + len,
+ (const Py_UCS1 *)data,
+ ((const Py_UCS1 *)data) + len,
result);
}
return result;
default:
- Py_UNREACHABLE();
- return NULL;
+ Py_UNREACHABLE();
+ return NULL;
}
}
@@ -2621,7 +2621,7 @@ as_ucs4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize,
int copy_null)
{
int kind;
- const void *data;
+ const void *data;
Py_ssize_t len, targetlen;
if (PyUnicode_READY(string) == -1)
return NULL;
@@ -2648,19 +2648,19 @@ as_ucs4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize,
}
}
if (kind == PyUnicode_1BYTE_KIND) {
- const Py_UCS1 *start = (const Py_UCS1 *) data;
+ const Py_UCS1 *start = (const Py_UCS1 *) data;
_PyUnicode_CONVERT_BYTES(Py_UCS1, Py_UCS4, start, start + len, target);
}
else if (kind == PyUnicode_2BYTE_KIND) {
- const Py_UCS2 *start = (const Py_UCS2 *) data;
+ const Py_UCS2 *start = (const Py_UCS2 *) data;
_PyUnicode_CONVERT_BYTES(Py_UCS2, Py_UCS4, start, start + len, target);
}
- else if (kind == PyUnicode_4BYTE_KIND) {
+ else if (kind == PyUnicode_4BYTE_KIND) {
memcpy(target, data, len * sizeof(Py_UCS4));
}
- else {
- Py_UNREACHABLE();
- }
+ else {
+ Py_UNREACHABLE();
+ }
if (copy_null)
target[len] = 0;
return target;
@@ -3126,83 +3126,83 @@ PyUnicode_FromFormat(const char *format, ...)
return ret;
}
-static Py_ssize_t
-unicode_get_widechar_size(PyObject *unicode)
-{
- Py_ssize_t res;
-
- assert(unicode != NULL);
- assert(_PyUnicode_CHECK(unicode));
-
- if (_PyUnicode_WSTR(unicode) != NULL) {
- return PyUnicode_WSTR_LENGTH(unicode);
- }
- assert(PyUnicode_IS_READY(unicode));
-
- res = _PyUnicode_LENGTH(unicode);
-#if SIZEOF_WCHAR_T == 2
- if (PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND) {
- const Py_UCS4 *s = PyUnicode_4BYTE_DATA(unicode);
- const Py_UCS4 *end = s + res;
- for (; s < end; ++s) {
- if (*s > 0xFFFF) {
- ++res;
- }
- }
- }
-#endif
- return res;
-}
-
-static void
-unicode_copy_as_widechar(PyObject *unicode, wchar_t *w, Py_ssize_t size)
-{
- const wchar_t *wstr;
-
- assert(unicode != NULL);
- assert(_PyUnicode_CHECK(unicode));
-
- wstr = _PyUnicode_WSTR(unicode);
- if (wstr != NULL) {
- memcpy(w, wstr, size * sizeof(wchar_t));
- return;
- }
- assert(PyUnicode_IS_READY(unicode));
-
- if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) {
- const Py_UCS1 *s = PyUnicode_1BYTE_DATA(unicode);
- for (; size--; ++s, ++w) {
- *w = *s;
- }
- }
- else {
-#if SIZEOF_WCHAR_T == 4
- assert(PyUnicode_KIND(unicode) == PyUnicode_2BYTE_KIND);
- const Py_UCS2 *s = PyUnicode_2BYTE_DATA(unicode);
- for (; size--; ++s, ++w) {
- *w = *s;
- }
-#else
- assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);
- const Py_UCS4 *s = PyUnicode_4BYTE_DATA(unicode);
- for (; size--; ++s, ++w) {
- Py_UCS4 ch = *s;
- if (ch > 0xFFFF) {
- assert(ch <= MAX_UNICODE);
- /* encode surrogate pair in this case */
- *w++ = Py_UNICODE_HIGH_SURROGATE(ch);
- if (!size--)
- break;
- *w = Py_UNICODE_LOW_SURROGATE(ch);
- }
- else {
- *w = ch;
- }
- }
-#endif
- }
-}
-
+static Py_ssize_t
+unicode_get_widechar_size(PyObject *unicode)
+{
+ Py_ssize_t res;
+
+ assert(unicode != NULL);
+ assert(_PyUnicode_CHECK(unicode));
+
+ if (_PyUnicode_WSTR(unicode) != NULL) {
+ return PyUnicode_WSTR_LENGTH(unicode);
+ }
+ assert(PyUnicode_IS_READY(unicode));
+
+ res = _PyUnicode_LENGTH(unicode);
+#if SIZEOF_WCHAR_T == 2
+ if (PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND) {
+ const Py_UCS4 *s = PyUnicode_4BYTE_DATA(unicode);
+ const Py_UCS4 *end = s + res;
+ for (; s < end; ++s) {
+ if (*s > 0xFFFF) {
+ ++res;
+ }
+ }
+ }
+#endif
+ return res;
+}
+
+static void
+unicode_copy_as_widechar(PyObject *unicode, wchar_t *w, Py_ssize_t size)
+{
+ const wchar_t *wstr;
+
+ assert(unicode != NULL);
+ assert(_PyUnicode_CHECK(unicode));
+
+ wstr = _PyUnicode_WSTR(unicode);
+ if (wstr != NULL) {
+ memcpy(w, wstr, size * sizeof(wchar_t));
+ return;
+ }
+ assert(PyUnicode_IS_READY(unicode));
+
+ if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) {
+ const Py_UCS1 *s = PyUnicode_1BYTE_DATA(unicode);
+ for (; size--; ++s, ++w) {
+ *w = *s;
+ }
+ }
+ else {
+#if SIZEOF_WCHAR_T == 4
+ assert(PyUnicode_KIND(unicode) == PyUnicode_2BYTE_KIND);
+ const Py_UCS2 *s = PyUnicode_2BYTE_DATA(unicode);
+ for (; size--; ++s, ++w) {
+ *w = *s;
+ }
+#else
+ assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);
+ const Py_UCS4 *s = PyUnicode_4BYTE_DATA(unicode);
+ for (; size--; ++s, ++w) {
+ Py_UCS4 ch = *s;
+ if (ch > 0xFFFF) {
+ assert(ch <= MAX_UNICODE);
+ /* encode surrogate pair in this case */
+ *w++ = Py_UNICODE_HIGH_SURROGATE(ch);
+ if (!size--)
+ break;
+ *w = Py_UNICODE_LOW_SURROGATE(ch);
+ }
+ else {
+ *w = ch;
+ }
+ }
+#endif
+ }
+}
+
#ifdef HAVE_WCHAR_H
/* Convert a Unicode object to a wide character string.
@@ -3224,35 +3224,35 @@ PyUnicode_AsWideChar(PyObject *unicode,
PyErr_BadInternalCall();
return -1;
}
- if (!PyUnicode_Check(unicode)) {
- PyErr_BadArgument();
+ if (!PyUnicode_Check(unicode)) {
+ PyErr_BadArgument();
return -1;
- }
+ }
+
+ res = unicode_get_widechar_size(unicode);
+ if (w == NULL) {
+ return res + 1;
+ }
- res = unicode_get_widechar_size(unicode);
- if (w == NULL) {
- return res + 1;
+ if (size > res) {
+ size = res + 1;
}
-
- if (size > res) {
- size = res + 1;
- }
- else {
- res = size;
- }
- unicode_copy_as_widechar(unicode, w, size);
-
-#if HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
- /* Oracle Solaris uses non-Unicode internal wchar_t form for
- non-Unicode locales and hence needs conversion first. */
- if (_Py_LocaleUsesNonUnicodeWchar()) {
- if (_Py_EncodeNonUnicodeWchar_InPlace(w, size) < 0) {
- return -1;
- }
- }
-#endif
-
- return res;
+ else {
+ res = size;
+ }
+ unicode_copy_as_widechar(unicode, w, size);
+
+#if HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
+ /* Oracle Solaris uses non-Unicode internal wchar_t form for
+ non-Unicode locales and hence needs conversion first. */
+ if (_Py_LocaleUsesNonUnicodeWchar()) {
+ if (_Py_EncodeNonUnicodeWchar_InPlace(w, size) < 0) {
+ return -1;
+ }
+ }
+#endif
+
+ return res;
}
wchar_t*
@@ -3266,38 +3266,38 @@ PyUnicode_AsWideCharString(PyObject *unicode,
PyErr_BadInternalCall();
return NULL;
}
- if (!PyUnicode_Check(unicode)) {
- PyErr_BadArgument();
+ if (!PyUnicode_Check(unicode)) {
+ PyErr_BadArgument();
return NULL;
}
- buflen = unicode_get_widechar_size(unicode);
- buffer = (wchar_t *) PyMem_NEW(wchar_t, (buflen + 1));
+ buflen = unicode_get_widechar_size(unicode);
+ buffer = (wchar_t *) PyMem_NEW(wchar_t, (buflen + 1));
if (buffer == NULL) {
PyErr_NoMemory();
return NULL;
}
- unicode_copy_as_widechar(unicode, buffer, buflen + 1);
-
-#if HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
- /* Oracle Solaris uses non-Unicode internal wchar_t form for
- non-Unicode locales and hence needs conversion first. */
- if (_Py_LocaleUsesNonUnicodeWchar()) {
- if (_Py_EncodeNonUnicodeWchar_InPlace(buffer, (buflen + 1)) < 0) {
- return NULL;
- }
- }
-#endif
-
- if (size != NULL) {
+ unicode_copy_as_widechar(unicode, buffer, buflen + 1);
+
+#if HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
+ /* Oracle Solaris uses non-Unicode internal wchar_t form for
+ non-Unicode locales and hence needs conversion first. */
+ if (_Py_LocaleUsesNonUnicodeWchar()) {
+ if (_Py_EncodeNonUnicodeWchar_InPlace(buffer, (buflen + 1)) < 0) {
+ return NULL;
+ }
+ }
+#endif
+
+ if (size != NULL) {
*size = buflen;
- }
- else if (wcslen(buffer) != (size_t)buflen) {
- PyMem_FREE(buffer);
- PyErr_SetString(PyExc_ValueError,
- "embedded null character");
- return NULL;
- }
+ }
+ else if (wcslen(buffer) != (size_t)buflen) {
+ PyMem_FREE(buffer);
+ PyErr_SetString(PyExc_ValueError,
+ "embedded null character");
+ return NULL;
+ }
return buffer;
}
@@ -3352,13 +3352,13 @@ PyUnicode_FromEncodedObject(PyObject *obj,
/* Decoding bytes objects is the most common case and should be fast */
if (PyBytes_Check(obj)) {
- if (PyBytes_GET_SIZE(obj) == 0) {
- if (unicode_check_encoding_errors(encoding, errors) < 0) {
- return NULL;
- }
+ if (PyBytes_GET_SIZE(obj) == 0) {
+ if (unicode_check_encoding_errors(encoding, errors) < 0) {
+ return NULL;
+ }
_Py_RETURN_UNICODE_EMPTY();
- }
- return PyUnicode_Decode(
+ }
+ return PyUnicode_Decode(
PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
encoding, errors);
}
@@ -3379,9 +3379,9 @@ PyUnicode_FromEncodedObject(PyObject *obj,
if (buffer.len == 0) {
PyBuffer_Release(&buffer);
- if (unicode_check_encoding_errors(encoding, errors) < 0) {
- return NULL;
- }
+ if (unicode_check_encoding_errors(encoding, errors) < 0) {
+ return NULL;
+ }
_Py_RETURN_UNICODE_EMPTY();
}
@@ -3449,14 +3449,14 @@ PyUnicode_Decode(const char *s,
Py_buffer info;
char buflower[11]; /* strlen("iso-8859-1\0") == 11, longest shortcut */
- if (unicode_check_encoding_errors(encoding, errors) < 0) {
- return NULL;
- }
-
- if (size == 0) {
- _Py_RETURN_UNICODE_EMPTY();
- }
-
+ if (unicode_check_encoding_errors(encoding, errors) < 0) {
+ return NULL;
+ }
+
+ if (size == 0) {
+ _Py_RETURN_UNICODE_EMPTY();
+ }
+
if (encoding == NULL) {
return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL);
}
@@ -3639,7 +3639,7 @@ PyUnicode_AsEncodedObject(PyObject *unicode,
static PyObject *
-unicode_encode_locale(PyObject *unicode, _Py_error_handler error_handler,
+unicode_encode_locale(PyObject *unicode, _Py_error_handler error_handler,
int current_locale)
{
Py_ssize_t wlen;
@@ -3658,7 +3658,7 @@ unicode_encode_locale(PyObject *unicode, _Py_error_handler error_handler,
size_t error_pos;
const char *reason;
int res = _Py_EncodeLocaleEx(wstr, &str, &error_pos, &reason,
- current_locale, error_handler);
+ current_locale, error_handler);
PyMem_Free(wstr);
if (res != 0) {
@@ -3674,9 +3674,9 @@ unicode_encode_locale(PyObject *unicode, _Py_error_handler error_handler,
Py_DECREF(exc);
}
}
- else if (res == -3) {
- PyErr_SetString(PyExc_ValueError, "unsupported error handler");
- }
+ else if (res == -3) {
+ PyErr_SetString(PyExc_ValueError, "unsupported error handler");
+ }
else {
PyErr_NoMemory();
}
@@ -3691,41 +3691,41 @@ unicode_encode_locale(PyObject *unicode, _Py_error_handler error_handler,
PyObject *
PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
{
- _Py_error_handler error_handler = _Py_GetErrorHandler(errors);
- return unicode_encode_locale(unicode, error_handler, 1);
+ _Py_error_handler error_handler = _Py_GetErrorHandler(errors);
+ return unicode_encode_locale(unicode, error_handler, 1);
}
PyObject *
PyUnicode_EncodeFSDefault(PyObject *unicode)
{
- PyInterpreterState *interp = _PyInterpreterState_GET();
- struct _Py_unicode_fs_codec *fs_codec = &interp->unicode.fs_codec;
- if (fs_codec->utf8) {
- return unicode_encode_utf8(unicode,
- fs_codec->error_handler,
- fs_codec->errors);
- }
-#ifndef _Py_FORCE_UTF8_FS_ENCODING
- else if (fs_codec->encoding) {
- return PyUnicode_AsEncodedString(unicode,
- fs_codec->encoding,
- fs_codec->errors);
- }
-#endif
- else {
- /* Before _PyUnicode_InitEncodings() is called, the Python codec
- machinery is not ready and so cannot be used:
- use wcstombs() in this case. */
- const PyConfig *config = _PyInterpreterState_GetConfig(interp);
- const wchar_t *filesystem_errors = config->filesystem_errors;
- assert(filesystem_errors != NULL);
- _Py_error_handler errors = get_error_handler_wide(filesystem_errors);
- assert(errors != _Py_ERROR_UNKNOWN);
-#ifdef _Py_FORCE_UTF8_FS_ENCODING
- return unicode_encode_utf8(unicode, errors, NULL);
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ struct _Py_unicode_fs_codec *fs_codec = &interp->unicode.fs_codec;
+ if (fs_codec->utf8) {
+ return unicode_encode_utf8(unicode,
+ fs_codec->error_handler,
+ fs_codec->errors);
+ }
+#ifndef _Py_FORCE_UTF8_FS_ENCODING
+ else if (fs_codec->encoding) {
+ return PyUnicode_AsEncodedString(unicode,
+ fs_codec->encoding,
+ fs_codec->errors);
+ }
+#endif
+ else {
+ /* Before _PyUnicode_InitEncodings() is called, the Python codec
+ machinery is not ready and so cannot be used:
+ use wcstombs() in this case. */
+ const PyConfig *config = _PyInterpreterState_GetConfig(interp);
+ const wchar_t *filesystem_errors = config->filesystem_errors;
+ assert(filesystem_errors != NULL);
+ _Py_error_handler errors = get_error_handler_wide(filesystem_errors);
+ assert(errors != _Py_ERROR_UNKNOWN);
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
+ return unicode_encode_utf8(unicode, errors, NULL);
#else
- return unicode_encode_locale(unicode, errors, 0);
-#endif
+ return unicode_encode_locale(unicode, errors, 0);
+#endif
}
}
@@ -3742,10 +3742,10 @@ PyUnicode_AsEncodedString(PyObject *unicode,
return NULL;
}
- if (unicode_check_encoding_errors(encoding, errors) < 0) {
- return NULL;
- }
-
+ if (unicode_check_encoding_errors(encoding, errors) < 0) {
+ return NULL;
+ }
+
if (encoding == NULL) {
return _PyUnicode_AsUTF8String(unicode, errors);
}
@@ -3869,8 +3869,8 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode,
}
static PyObject*
-unicode_decode_locale(const char *str, Py_ssize_t len,
- _Py_error_handler errors, int current_locale)
+unicode_decode_locale(const char *str, Py_ssize_t len,
+ _Py_error_handler errors, int current_locale)
{
if (str[len] != '\0' || (size_t)len != strlen(str)) {
PyErr_SetString(PyExc_ValueError, "embedded null byte");
@@ -3881,7 +3881,7 @@ unicode_decode_locale(const char *str, Py_ssize_t len,
size_t wlen;
const char *reason;
int res = _Py_DecodeLocaleEx(str, &wstr, &wlen, &reason,
- current_locale, errors);
+ current_locale, errors);
if (res != 0) {
if (res == -2) {
PyObject *exc;
@@ -3895,9 +3895,9 @@ unicode_decode_locale(const char *str, Py_ssize_t len,
Py_DECREF(exc);
}
}
- else if (res == -3) {
- PyErr_SetString(PyExc_ValueError, "unsupported error handler");
- }
+ else if (res == -3) {
+ PyErr_SetString(PyExc_ValueError, "unsupported error handler");
+ }
else {
PyErr_NoMemory();
}
@@ -3913,16 +3913,16 @@ PyObject*
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
const char *errors)
{
- _Py_error_handler error_handler = _Py_GetErrorHandler(errors);
- return unicode_decode_locale(str, len, error_handler, 1);
+ _Py_error_handler error_handler = _Py_GetErrorHandler(errors);
+ return unicode_decode_locale(str, len, error_handler, 1);
}
PyObject*
PyUnicode_DecodeLocale(const char *str, const char *errors)
{
Py_ssize_t size = (Py_ssize_t)strlen(str);
- _Py_error_handler error_handler = _Py_GetErrorHandler(errors);
- return unicode_decode_locale(str, size, error_handler, 1);
+ _Py_error_handler error_handler = _Py_GetErrorHandler(errors);
+ return unicode_decode_locale(str, size, error_handler, 1);
}
@@ -3935,35 +3935,35 @@ PyUnicode_DecodeFSDefault(const char *s) {
PyObject*
PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
{
- PyInterpreterState *interp = _PyInterpreterState_GET();
- struct _Py_unicode_fs_codec *fs_codec = &interp->unicode.fs_codec;
- if (fs_codec->utf8) {
- return unicode_decode_utf8(s, size,
- fs_codec->error_handler,
- fs_codec->errors,
- NULL);
- }
-#ifndef _Py_FORCE_UTF8_FS_ENCODING
- else if (fs_codec->encoding) {
- return PyUnicode_Decode(s, size,
- fs_codec->encoding,
- fs_codec->errors);
- }
-#endif
- else {
- /* Before _PyUnicode_InitEncodings() is called, the Python codec
- machinery is not ready and so cannot be used:
- use mbstowcs() in this case. */
- const PyConfig *config = _PyInterpreterState_GetConfig(interp);
- const wchar_t *filesystem_errors = config->filesystem_errors;
- assert(filesystem_errors != NULL);
- _Py_error_handler errors = get_error_handler_wide(filesystem_errors);
- assert(errors != _Py_ERROR_UNKNOWN);
-#ifdef _Py_FORCE_UTF8_FS_ENCODING
- return unicode_decode_utf8(s, size, errors, NULL, NULL);
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ struct _Py_unicode_fs_codec *fs_codec = &interp->unicode.fs_codec;
+ if (fs_codec->utf8) {
+ return unicode_decode_utf8(s, size,
+ fs_codec->error_handler,
+ fs_codec->errors,
+ NULL);
+ }
+#ifndef _Py_FORCE_UTF8_FS_ENCODING
+ else if (fs_codec->encoding) {
+ return PyUnicode_Decode(s, size,
+ fs_codec->encoding,
+ fs_codec->errors);
+ }
+#endif
+ else {
+ /* Before _PyUnicode_InitEncodings() is called, the Python codec
+ machinery is not ready and so cannot be used:
+ use mbstowcs() in this case. */
+ const PyConfig *config = _PyInterpreterState_GetConfig(interp);
+ const wchar_t *filesystem_errors = config->filesystem_errors;
+ assert(filesystem_errors != NULL);
+ _Py_error_handler errors = get_error_handler_wide(filesystem_errors);
+ assert(errors != _Py_ERROR_UNKNOWN);
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
+ return unicode_decode_utf8(s, size, errors, NULL, NULL);
#else
- return unicode_decode_locale(s, size, errors, 0);
-#endif
+ return unicode_decode_locale(s, size, errors, 0);
+#endif
}
}
@@ -3974,7 +3974,7 @@ PyUnicode_FSConverter(PyObject* arg, void* addr)
PyObject *path = NULL;
PyObject *output = NULL;
Py_ssize_t size;
- const char *data;
+ const char *data;
if (arg == NULL) {
Py_DECREF(*(PyObject**)addr);
*(PyObject**)addr = NULL;
@@ -4079,8 +4079,8 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr)
}
-static int unicode_fill_utf8(PyObject *unicode);
-
+static int unicode_fill_utf8(PyObject *unicode);
+
const char *
PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
{
@@ -4092,7 +4092,7 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
return NULL;
if (PyUnicode_UTF8(unicode) == NULL) {
- if (unicode_fill_utf8(unicode) == -1) {
+ if (unicode_fill_utf8(unicode) == -1) {
return NULL;
}
}
@@ -4115,38 +4115,38 @@ PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size)
PyErr_BadArgument();
return NULL;
}
- Py_UNICODE *w = _PyUnicode_WSTR(unicode);
- if (w == NULL) {
+ Py_UNICODE *w = _PyUnicode_WSTR(unicode);
+ if (w == NULL) {
/* Non-ASCII compact unicode object */
- assert(_PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND);
+ assert(_PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND);
assert(PyUnicode_IS_READY(unicode));
- Py_ssize_t wlen = unicode_get_widechar_size(unicode);
- if ((size_t)wlen > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
- PyErr_NoMemory();
+ Py_ssize_t wlen = unicode_get_widechar_size(unicode);
+ if ((size_t)wlen > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
+ PyErr_NoMemory();
return NULL;
}
- w = (wchar_t *) PyObject_MALLOC(sizeof(wchar_t) * (wlen + 1));
- if (w == NULL) {
- PyErr_NoMemory();
- return NULL;
+ w = (wchar_t *) PyObject_MALLOC(sizeof(wchar_t) * (wlen + 1));
+ if (w == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ unicode_copy_as_widechar(unicode, w, wlen + 1);
+ _PyUnicode_WSTR(unicode) = w;
+ if (!PyUnicode_IS_COMPACT_ASCII(unicode)) {
+ _PyUnicode_WSTR_LENGTH(unicode) = wlen;
}
- unicode_copy_as_widechar(unicode, w, wlen + 1);
- _PyUnicode_WSTR(unicode) = w;
- if (!PyUnicode_IS_COMPACT_ASCII(unicode)) {
- _PyUnicode_WSTR_LENGTH(unicode) = wlen;
- }
}
if (size != NULL)
*size = PyUnicode_WSTR_LENGTH(unicode);
- return w;
+ return w;
}
-/* Deprecated APIs */
-
-_Py_COMP_DIAG_PUSH
-_Py_COMP_DIAG_IGNORE_DEPR_DECLS
-
+/* Deprecated APIs */
+
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
+
Py_UNICODE *
PyUnicode_AsUnicode(PyObject *unicode)
{
@@ -4185,8 +4185,8 @@ PyUnicode_GetSize(PyObject *unicode)
return -1;
}
-_Py_COMP_DIAG_POP
-
+_Py_COMP_DIAG_POP
+
Py_ssize_t
PyUnicode_GetLength(PyObject *unicode)
{
@@ -4202,7 +4202,7 @@ PyUnicode_GetLength(PyObject *unicode)
Py_UCS4
PyUnicode_ReadChar(PyObject *unicode, Py_ssize_t index)
{
- const void *data;
+ const void *data;
int kind;
if (!PyUnicode_Check(unicode)) {
@@ -4277,21 +4277,21 @@ onError:
}
#ifdef MS_WINDOWS
-static int
-widechar_resize(wchar_t **buf, Py_ssize_t *size, Py_ssize_t newsize)
-{
- if (newsize > *size) {
- wchar_t *newbuf = *buf;
- if (PyMem_Resize(newbuf, wchar_t, newsize) == NULL) {
- PyErr_NoMemory();
- return -1;
- }
- *buf = newbuf;
- }
- *size = newsize;
- return 0;
-}
-
+static int
+widechar_resize(wchar_t **buf, Py_ssize_t *size, Py_ssize_t newsize)
+{
+ if (newsize > *size) {
+ wchar_t *newbuf = *buf;
+ if (PyMem_Resize(newbuf, wchar_t, newsize) == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ *buf = newbuf;
+ }
+ *size = newsize;
+ return 0;
+}
+
/* error handling callback helper:
build arguments, call the callback and check the arguments,
if no exception occurred, copy the replacement to the output
@@ -4305,7 +4305,7 @@ unicode_decode_call_errorhandler_wchar(
const char *encoding, const char *reason,
const char **input, const char **inend, Py_ssize_t *startinpos,
Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
- wchar_t **buf, Py_ssize_t *bufsize, Py_ssize_t *outpos)
+ wchar_t **buf, Py_ssize_t *bufsize, Py_ssize_t *outpos)
{
static const char *argparse = "Un;decoding error handler must return (str, int) tuple";
@@ -4333,7 +4333,7 @@ unicode_decode_call_errorhandler_wchar(
if (*exceptionObject == NULL)
goto onError;
- restuple = PyObject_CallOneArg(*errorHandler, *exceptionObject);
+ restuple = PyObject_CallOneArg(*errorHandler, *exceptionObject);
if (restuple == NULL)
goto onError;
if (!PyTuple_Check(restuple)) {
@@ -4362,10 +4362,10 @@ unicode_decode_call_errorhandler_wchar(
goto onError;
}
-_Py_COMP_DIAG_PUSH
-_Py_COMP_DIAG_IGNORE_DEPR_DECLS
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen);
-_Py_COMP_DIAG_POP
+_Py_COMP_DIAG_POP
if (repwstr == NULL)
goto onError;
/* need more space? (at least enough for what we
@@ -4379,15 +4379,15 @@ _Py_COMP_DIAG_POP
if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos))
goto overflow;
requiredsize += insize - newpos;
- outsize = *bufsize;
+ outsize = *bufsize;
if (requiredsize > outsize) {
if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize)
requiredsize = 2*outsize;
- if (widechar_resize(buf, bufsize, requiredsize) < 0) {
+ if (widechar_resize(buf, bufsize, requiredsize) < 0) {
goto onError;
- }
+ }
}
- wcsncpy(*buf + *outpos, repwstr, repwlen);
+ wcsncpy(*buf + *outpos, repwstr, repwlen);
*outpos += repwlen;
*endinpos = newpos;
*inptr = *input + newpos;
@@ -4440,7 +4440,7 @@ unicode_decode_call_errorhandler_writer(
if (*exceptionObject == NULL)
goto onError;
- restuple = PyObject_CallOneArg(*errorHandler, *exceptionObject);
+ restuple = PyObject_CallOneArg(*errorHandler, *exceptionObject);
if (restuple == NULL)
goto onError;
if (!PyTuple_Check(restuple)) {
@@ -4714,11 +4714,11 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
if (_PyUnicodeWriter_WriteCharInline(&writer, '+') < 0)
goto onError;
}
- else if (s < e && !IS_BASE64(*s)) {
- s++;
- errmsg = "ill-formed sequence";
- goto utf7Error;
- }
+ else if (s < e && !IS_BASE64(*s)) {
+ s++;
+ errmsg = "ill-formed sequence";
+ goto utf7Error;
+ }
else { /* begin base64-encoded section */
inShift = 1;
surrogate = 0;
@@ -4807,7 +4807,7 @@ _PyUnicode_EncodeUTF7(PyObject *str,
const char *errors)
{
int kind;
- const void *data;
+ const void *data;
Py_ssize_t len;
PyObject *v;
int inShift = 0;
@@ -4815,7 +4815,7 @@ _PyUnicode_EncodeUTF7(PyObject *str,
unsigned int base64bits = 0;
unsigned long base64buffer = 0;
char * out;
- const char * start;
+ const char * start;
if (PyUnicode_READY(str) == -1)
return NULL;
@@ -5007,7 +5007,7 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
/* Help allocation */
const char *_p = p;
while (_p < aligned_end) {
- unsigned long value = *(const unsigned long *) _p;
+ unsigned long value = *(const unsigned long *) _p;
if (value & ASCII_CHAR_MASK)
break;
_p += SIZEOF_LONG;
@@ -5024,10 +5024,10 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
return p - start;
}
-static PyObject *
-unicode_decode_utf8(const char *s, Py_ssize_t size,
- _Py_error_handler error_handler, const char *errors,
- Py_ssize_t *consumed)
+static PyObject *
+unicode_decode_utf8(const char *s, Py_ssize_t size,
+ _Py_error_handler error_handler, const char *errors,
+ Py_ssize_t *consumed)
{
if (size == 0) {
if (consumed)
@@ -5042,29 +5042,29 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
return get_latin1_char((unsigned char)s[0]);
}
- const char *starts = s;
- const char *end = s + size;
+ const char *starts = s;
+ const char *end = s + size;
+
+ // fast path: try ASCII string.
+ PyObject *u = PyUnicode_New(size, 127);
+ if (u == NULL) {
+ return NULL;
+ }
+ s += ascii_decode(s, end, PyUnicode_1BYTE_DATA(u));
+ if (s == end) {
+ return u;
+ }
+
+ // Use _PyUnicodeWriter after fast path is failed.
+ _PyUnicodeWriter writer;
+ _PyUnicodeWriter_InitWithBuffer(&writer, u);
+ writer.pos = s - starts;
+
+ Py_ssize_t startinpos, endinpos;
+ const char *errmsg = "";
+ PyObject *error_handler_obj = NULL;
+ PyObject *exc = NULL;
- // fast path: try ASCII string.
- PyObject *u = PyUnicode_New(size, 127);
- if (u == NULL) {
- return NULL;
- }
- s += ascii_decode(s, end, PyUnicode_1BYTE_DATA(u));
- if (s == end) {
- return u;
- }
-
- // Use _PyUnicodeWriter after fast path is failed.
- _PyUnicodeWriter writer;
- _PyUnicodeWriter_InitWithBuffer(&writer, u);
- writer.pos = s - starts;
-
- Py_ssize_t startinpos, endinpos;
- const char *errmsg = "";
- PyObject *error_handler_obj = NULL;
- PyObject *exc = NULL;
-
while (s < end) {
Py_UCS4 ch;
int kind = writer.kind;
@@ -5095,13 +5095,13 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
endinpos = startinpos + 1;
break;
case 2:
- if (consumed && (unsigned char)s[0] == 0xED && end - s == 2
- && (unsigned char)s[1] >= 0xA0 && (unsigned char)s[1] <= 0xBF)
- {
- /* Truncated surrogate code in range D800-DFFF */
- goto End;
- }
- /* fall through */
+ if (consumed && (unsigned char)s[0] == 0xED && end - s == 2
+ && (unsigned char)s[1] >= 0xA0 && (unsigned char)s[1] <= 0xBF)
+ {
+ /* Truncated surrogate code in range D800-DFFF */
+ goto End;
+ }
+ /* fall through */
case 3:
case 4:
errmsg = "invalid continuation byte";
@@ -5115,7 +5115,7 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
}
if (error_handler == _Py_ERROR_UNKNOWN)
- error_handler = _Py_GetErrorHandler(errors);
+ error_handler = _Py_GetErrorHandler(errors);
switch (error_handler) {
case _Py_ERROR_IGNORE:
@@ -5170,16 +5170,16 @@ onError:
}
-PyObject *
-PyUnicode_DecodeUTF8Stateful(const char *s,
- Py_ssize_t size,
- const char *errors,
- Py_ssize_t *consumed)
-{
- return unicode_decode_utf8(s, size, _Py_ERROR_UNKNOWN, errors, consumed);
-}
-
-
+PyObject *
+PyUnicode_DecodeUTF8Stateful(const char *s,
+ Py_ssize_t size,
+ const char *errors,
+ Py_ssize_t *consumed)
+{
+ return unicode_decode_utf8(s, size, _Py_ERROR_UNKNOWN, errors, consumed);
+}
+
+
/* UTF-8 decoder: use surrogateescape error handler if 'surrogateescape' is
non-zero, use strict error handler otherwise.
@@ -5194,29 +5194,29 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
is not NULL, write the decoding error message into *reason. */
int
_Py_DecodeUTF8Ex(const char *s, Py_ssize_t size, wchar_t **wstr, size_t *wlen,
- const char **reason, _Py_error_handler errors)
+ const char **reason, _Py_error_handler errors)
{
const char *orig_s = s;
const char *e;
wchar_t *unicode;
Py_ssize_t outpos;
- int surrogateescape = 0;
- int surrogatepass = 0;
- switch (errors)
- {
- case _Py_ERROR_STRICT:
- break;
- case _Py_ERROR_SURROGATEESCAPE:
- surrogateescape = 1;
- break;
- case _Py_ERROR_SURROGATEPASS:
- surrogatepass = 1;
- break;
- default:
- return -3;
- }
-
+ int surrogateescape = 0;
+ int surrogatepass = 0;
+ switch (errors)
+ {
+ case _Py_ERROR_STRICT:
+ break;
+ case _Py_ERROR_SURROGATEESCAPE:
+ surrogateescape = 1;
+ break;
+ case _Py_ERROR_SURROGATEPASS:
+ surrogatepass = 1;
+ break;
+ default:
+ return -3;
+ }
+
/* Note: size will always be longer than the resulting Unicode
character count */
if (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t) < (size + 1)) {
@@ -5249,45 +5249,45 @@ _Py_DecodeUTF8Ex(const char *s, Py_ssize_t size, wchar_t **wstr, size_t *wlen,
#endif
}
else {
- if (!ch && s == e) {
+ if (!ch && s == e) {
break;
- }
-
- if (surrogateescape) {
- unicode[outpos++] = 0xDC00 + (unsigned char)*s++;
- }
- else {
- /* Is it a valid three-byte code? */
- if (surrogatepass
- && (e - s) >= 3
- && (s[0] & 0xf0) == 0xe0
- && (s[1] & 0xc0) == 0x80
- && (s[2] & 0xc0) == 0x80)
- {
- ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f);
- s += 3;
- unicode[outpos++] = ch;
- }
- else {
- PyMem_RawFree(unicode );
- if (reason != NULL) {
- switch (ch) {
- case 0:
- *reason = "unexpected end of data";
- break;
- case 1:
- *reason = "invalid start byte";
- break;
- /* 2, 3, 4 */
- default:
- *reason = "invalid continuation byte";
- break;
- }
+ }
+
+ if (surrogateescape) {
+ unicode[outpos++] = 0xDC00 + (unsigned char)*s++;
+ }
+ else {
+ /* Is it a valid three-byte code? */
+ if (surrogatepass
+ && (e - s) >= 3
+ && (s[0] & 0xf0) == 0xe0
+ && (s[1] & 0xc0) == 0x80
+ && (s[2] & 0xc0) == 0x80)
+ {
+ ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f);
+ s += 3;
+ unicode[outpos++] = ch;
+ }
+ else {
+ PyMem_RawFree(unicode );
+ if (reason != NULL) {
+ switch (ch) {
+ case 0:
+ *reason = "unexpected end of data";
+ break;
+ case 1:
+ *reason = "invalid start byte";
+ break;
+ /* 2, 3, 4 */
+ default:
+ *reason = "invalid continuation byte";
+ break;
+ }
}
- if (wlen != NULL) {
- *wlen = s - orig_s;
- }
- return -2;
+ if (wlen != NULL) {
+ *wlen = s - orig_s;
+ }
+ return -2;
}
}
}
@@ -5300,21 +5300,21 @@ _Py_DecodeUTF8Ex(const char *s, Py_ssize_t size, wchar_t **wstr, size_t *wlen,
return 0;
}
-
+
wchar_t*
-_Py_DecodeUTF8_surrogateescape(const char *arg, Py_ssize_t arglen,
- size_t *wlen)
+_Py_DecodeUTF8_surrogateescape(const char *arg, Py_ssize_t arglen,
+ size_t *wlen)
{
wchar_t *wstr;
- int res = _Py_DecodeUTF8Ex(arg, arglen,
- &wstr, wlen,
- NULL, _Py_ERROR_SURROGATEESCAPE);
+ int res = _Py_DecodeUTF8Ex(arg, arglen,
+ &wstr, wlen,
+ NULL, _Py_ERROR_SURROGATEESCAPE);
if (res != 0) {
- /* _Py_DecodeUTF8Ex() must support _Py_ERROR_SURROGATEESCAPE */
- assert(res != -3);
- if (wlen) {
- *wlen = (size_t)res;
- }
+ /* _Py_DecodeUTF8Ex() must support _Py_ERROR_SURROGATEESCAPE */
+ assert(res != -3);
+ if (wlen) {
+ *wlen = (size_t)res;
+ }
return NULL;
}
return wstr;
@@ -5333,29 +5333,29 @@ _Py_DecodeUTF8_surrogateescape(const char *arg, Py_ssize_t arglen,
On memory allocation failure, return -1. */
int
_Py_EncodeUTF8Ex(const wchar_t *text, char **str, size_t *error_pos,
- const char **reason, int raw_malloc, _Py_error_handler errors)
+ const char **reason, int raw_malloc, _Py_error_handler errors)
{
const Py_ssize_t max_char_size = 4;
Py_ssize_t len = wcslen(text);
assert(len >= 0);
- int surrogateescape = 0;
- int surrogatepass = 0;
- switch (errors)
- {
- case _Py_ERROR_STRICT:
- break;
- case _Py_ERROR_SURROGATEESCAPE:
- surrogateescape = 1;
- break;
- case _Py_ERROR_SURROGATEPASS:
- surrogatepass = 1;
- break;
- default:
- return -3;
- }
-
+ int surrogateescape = 0;
+ int surrogatepass = 0;
+ switch (errors)
+ {
+ case _Py_ERROR_STRICT:
+ break;
+ case _Py_ERROR_SURROGATEESCAPE:
+ surrogateescape = 1;
+ break;
+ case _Py_ERROR_SURROGATEPASS:
+ surrogatepass = 1;
+ break;
+ default:
+ return -3;
+ }
+
if (len > PY_SSIZE_T_MAX / max_char_size - 1) {
return -1;
}
@@ -5372,19 +5372,19 @@ _Py_EncodeUTF8Ex(const wchar_t *text, char **str, size_t *error_pos,
char *p = bytes;
Py_ssize_t i;
- for (i = 0; i < len; ) {
- Py_ssize_t ch_pos = i;
+ for (i = 0; i < len; ) {
+ Py_ssize_t ch_pos = i;
Py_UCS4 ch = text[i];
- i++;
-#if Py_UNICODE_SIZE == 2
- if (Py_UNICODE_IS_HIGH_SURROGATE(ch)
- && i < len
- && Py_UNICODE_IS_LOW_SURROGATE(text[i]))
- {
- ch = Py_UNICODE_JOIN_SURROGATES(ch, text[i]);
- i++;
- }
-#endif
+ i++;
+#if Py_UNICODE_SIZE == 2
+ if (Py_UNICODE_IS_HIGH_SURROGATE(ch)
+ && i < len
+ && Py_UNICODE_IS_LOW_SURROGATE(text[i]))
+ {
+ ch = Py_UNICODE_JOIN_SURROGATES(ch, text[i]);
+ i++;
+ }
+#endif
if (ch < 0x80) {
/* Encode ASCII */
@@ -5396,11 +5396,11 @@ _Py_EncodeUTF8Ex(const wchar_t *text, char **str, size_t *error_pos,
*p++ = (char)(0xc0 | (ch >> 6));
*p++ = (char)(0x80 | (ch & 0x3f));
}
- else if (Py_UNICODE_IS_SURROGATE(ch) && !surrogatepass) {
+ else if (Py_UNICODE_IS_SURROGATE(ch) && !surrogatepass) {
/* surrogateescape error handler */
if (!surrogateescape || !(0xDC80 <= ch && ch <= 0xDCFF)) {
if (error_pos != NULL) {
- *error_pos = (size_t)ch_pos;
+ *error_pos = (size_t)ch_pos;
}
if (reason != NULL) {
*reason = "encoding error";
@@ -5463,9 +5463,9 @@ _Py_EncodeUTF8Ex(const wchar_t *text, char **str, size_t *error_pos,
maximum possible needed (4 result bytes per Unicode character), and return
the excess memory at the end.
*/
-static PyObject *
-unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
- const char *errors)
+static PyObject *
+unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
+ const char *errors)
{
if (!PyUnicode_Check(unicode)) {
PyErr_BadArgument();
@@ -5479,96 +5479,96 @@ unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
return PyBytes_FromStringAndSize(PyUnicode_UTF8(unicode),
PyUnicode_UTF8_LENGTH(unicode));
- enum PyUnicode_Kind kind = PyUnicode_KIND(unicode);
- const void *data = PyUnicode_DATA(unicode);
- Py_ssize_t size = PyUnicode_GET_LENGTH(unicode);
+ enum PyUnicode_Kind kind = PyUnicode_KIND(unicode);
+ const void *data = PyUnicode_DATA(unicode);
+ Py_ssize_t size = PyUnicode_GET_LENGTH(unicode);
+
+ _PyBytesWriter writer;
+ char *end;
- _PyBytesWriter writer;
- char *end;
-
switch (kind) {
default:
Py_UNREACHABLE();
case PyUnicode_1BYTE_KIND:
/* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
assert(!PyUnicode_IS_ASCII(unicode));
- end = ucs1lib_utf8_encoder(&writer, unicode, data, size, error_handler, errors);
- break;
+ end = ucs1lib_utf8_encoder(&writer, unicode, data, size, error_handler, errors);
+ break;
case PyUnicode_2BYTE_KIND:
- end = ucs2lib_utf8_encoder(&writer, unicode, data, size, error_handler, errors);
- break;
+ end = ucs2lib_utf8_encoder(&writer, unicode, data, size, error_handler, errors);
+ break;
case PyUnicode_4BYTE_KIND:
- end = ucs4lib_utf8_encoder(&writer, unicode, data, size, error_handler, errors);
- break;
+ end = ucs4lib_utf8_encoder(&writer, unicode, data, size, error_handler, errors);
+ break;
+ }
+
+ if (end == NULL) {
+ _PyBytesWriter_Dealloc(&writer);
+ return NULL;
}
-
- if (end == NULL) {
- _PyBytesWriter_Dealloc(&writer);
- return NULL;
- }
- return _PyBytesWriter_Finish(&writer, end);
+ return _PyBytesWriter_Finish(&writer, end);
+}
+
+static int
+unicode_fill_utf8(PyObject *unicode)
+{
+ /* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
+ assert(!PyUnicode_IS_ASCII(unicode));
+
+ enum PyUnicode_Kind kind = PyUnicode_KIND(unicode);
+ const void *data = PyUnicode_DATA(unicode);
+ Py_ssize_t size = PyUnicode_GET_LENGTH(unicode);
+
+ _PyBytesWriter writer;
+ char *end;
+
+ switch (kind) {
+ default:
+ Py_UNREACHABLE();
+ case PyUnicode_1BYTE_KIND:
+ end = ucs1lib_utf8_encoder(&writer, unicode, data, size,
+ _Py_ERROR_STRICT, NULL);
+ break;
+ case PyUnicode_2BYTE_KIND:
+ end = ucs2lib_utf8_encoder(&writer, unicode, data, size,
+ _Py_ERROR_STRICT, NULL);
+ break;
+ case PyUnicode_4BYTE_KIND:
+ end = ucs4lib_utf8_encoder(&writer, unicode, data, size,
+ _Py_ERROR_STRICT, NULL);
+ break;
+ }
+ if (end == NULL) {
+ _PyBytesWriter_Dealloc(&writer);
+ return -1;
+ }
+
+ const char *start = writer.use_small_buffer ? writer.small_buffer :
+ PyBytes_AS_STRING(writer.buffer);
+ Py_ssize_t len = end - start;
+
+ char *cache = PyObject_MALLOC(len + 1);
+ if (cache == NULL) {
+ _PyBytesWriter_Dealloc(&writer);
+ PyErr_NoMemory();
+ return -1;
+ }
+ _PyUnicode_UTF8(unicode) = cache;
+ _PyUnicode_UTF8_LENGTH(unicode) = len;
+ memcpy(cache, start, len);
+ cache[len] = '\0';
+ _PyBytesWriter_Dealloc(&writer);
+ return 0;
}
-static int
-unicode_fill_utf8(PyObject *unicode)
-{
- /* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
- assert(!PyUnicode_IS_ASCII(unicode));
-
- enum PyUnicode_Kind kind = PyUnicode_KIND(unicode);
- const void *data = PyUnicode_DATA(unicode);
- Py_ssize_t size = PyUnicode_GET_LENGTH(unicode);
-
- _PyBytesWriter writer;
- char *end;
-
- switch (kind) {
- default:
- Py_UNREACHABLE();
- case PyUnicode_1BYTE_KIND:
- end = ucs1lib_utf8_encoder(&writer, unicode, data, size,
- _Py_ERROR_STRICT, NULL);
- break;
- case PyUnicode_2BYTE_KIND:
- end = ucs2lib_utf8_encoder(&writer, unicode, data, size,
- _Py_ERROR_STRICT, NULL);
- break;
- case PyUnicode_4BYTE_KIND:
- end = ucs4lib_utf8_encoder(&writer, unicode, data, size,
- _Py_ERROR_STRICT, NULL);
- break;
- }
- if (end == NULL) {
- _PyBytesWriter_Dealloc(&writer);
- return -1;
- }
-
- const char *start = writer.use_small_buffer ? writer.small_buffer :
- PyBytes_AS_STRING(writer.buffer);
- Py_ssize_t len = end - start;
-
- char *cache = PyObject_MALLOC(len + 1);
- if (cache == NULL) {
- _PyBytesWriter_Dealloc(&writer);
- PyErr_NoMemory();
- return -1;
- }
- _PyUnicode_UTF8(unicode) = cache;
- _PyUnicode_UTF8_LENGTH(unicode) = len;
- memcpy(cache, start, len);
- cache[len] = '\0';
- _PyBytesWriter_Dealloc(&writer);
- return 0;
-}
-
PyObject *
-_PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
-{
- return unicode_encode_utf8(unicode, _Py_ERROR_UNKNOWN, errors);
-}
-
-
-PyObject *
+_PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
+{
+ return unicode_encode_utf8(unicode, _Py_ERROR_UNKNOWN, errors);
+}
+
+
+PyObject *
PyUnicode_EncodeUTF8(const Py_UNICODE *s,
Py_ssize_t size,
const char *errors)
@@ -5618,7 +5618,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s,
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
- q = (const unsigned char *)s;
+ q = (const unsigned char *)s;
e = q + size;
if (byteorder)
@@ -5943,7 +5943,7 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
PyObject *exc = NULL;
const char *encoding;
- q = (const unsigned char *)s;
+ q = (const unsigned char *)s;
e = q + size;
if (byteorder)
@@ -6271,10 +6271,10 @@ PyUnicode_AsUTF16String(PyObject *unicode)
static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
PyObject *
-_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
+_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
Py_ssize_t size,
const char *errors,
- Py_ssize_t *consumed,
+ Py_ssize_t *consumed,
const char **first_invalid_escape)
{
const char *starts = s;
@@ -6287,9 +6287,9 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
*first_invalid_escape = NULL;
if (size == 0) {
- if (consumed) {
- *consumed = 0;
- }
+ if (consumed) {
+ *consumed = 0;
+ }
_Py_RETURN_UNICODE_EMPTY();
}
/* Escaped strings will always be longer than the resulting
@@ -6334,11 +6334,11 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
continue;
}
- Py_ssize_t startinpos = s - starts - 1;
+ Py_ssize_t startinpos = s - starts - 1;
/* \ - Escapes */
if (s >= end) {
message = "\\ at end of string";
- goto incomplete;
+ goto incomplete;
}
c = (unsigned char) *s++;
@@ -6392,10 +6392,10 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
count = 8;
message = "truncated \\UXXXXXXXX escape";
hexescape:
- for (ch = 0; count; ++s, --count) {
- if (s >= end) {
- goto incomplete;
- }
+ for (ch = 0; count; ++s, --count) {
+ if (s >= end) {
+ goto incomplete;
+ }
c = (unsigned char)*s;
ch <<= 4;
if (c >= '0' && c <= '9') {
@@ -6408,7 +6408,7 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
ch += c - ('A' - 10);
}
else {
- goto error;
+ goto error;
}
}
@@ -6437,20 +6437,20 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
}
message = "malformed \\N character escape";
- if (s >= end) {
- goto incomplete;
- }
- if (*s == '{') {
+ if (s >= end) {
+ goto incomplete;
+ }
+ if (*s == '{') {
const char *start = ++s;
size_t namelen;
/* look for the closing brace */
while (s < end && *s != '}')
s++;
- if (s >= end) {
- goto incomplete;
- }
+ if (s >= end) {
+ goto incomplete;
+ }
namelen = s - start;
- if (namelen) {
+ if (namelen) {
/* found a name. look it up in the unicode database */
s++;
ch = 0xffffffff; /* in case 'getcode' messes up */
@@ -6476,13 +6476,13 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
continue;
}
- incomplete:
- if (consumed) {
- *consumed = startinpos;
- break;
- }
- error:;
- Py_ssize_t endinpos = s-starts;
+ incomplete:
+ if (consumed) {
+ *consumed = startinpos;
+ break;
+ }
+ error:;
+ Py_ssize_t endinpos = s-starts;
writer.min_length = end - s + writer.pos;
if (unicode_decode_call_errorhandler_writer(
errors, &errorHandler,
@@ -6509,14 +6509,14 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
}
PyObject *
-_PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
+_PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
Py_ssize_t size,
- const char *errors,
- Py_ssize_t *consumed)
+ const char *errors,
+ Py_ssize_t *consumed)
{
const char *first_invalid_escape;
- PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors,
- consumed,
+ PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors,
+ consumed,
&first_invalid_escape);
if (result == NULL)
return NULL;
@@ -6531,14 +6531,14 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
return result;
}
-PyObject *
-PyUnicode_DecodeUnicodeEscape(const char *s,
- Py_ssize_t size,
- const char *errors)
-{
- return _PyUnicode_DecodeUnicodeEscapeStateful(s, size, errors, NULL);
-}
-
+PyObject *
+PyUnicode_DecodeUnicodeEscape(const char *s,
+ Py_ssize_t size,
+ const char *errors)
+{
+ return _PyUnicode_DecodeUnicodeEscapeStateful(s, size, errors, NULL);
+}
+
/* Return a Unicode-Escape string version of the Unicode object. */
PyObject *
@@ -6548,7 +6548,7 @@ PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
PyObject *repr;
char *p;
enum PyUnicode_Kind kind;
- const void *data;
+ const void *data;
Py_ssize_t expandsize;
/* Initial allocation is based on the longest-possible character
@@ -6677,10 +6677,10 @@ PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
/* --- Raw Unicode Escape Codec ------------------------------------------- */
PyObject *
-_PyUnicode_DecodeRawUnicodeEscapeStateful(const char *s,
- Py_ssize_t size,
- const char *errors,
- Py_ssize_t *consumed)
+_PyUnicode_DecodeRawUnicodeEscapeStateful(const char *s,
+ Py_ssize_t size,
+ const char *errors,
+ Py_ssize_t *consumed)
{
const char *starts = s;
_PyUnicodeWriter writer;
@@ -6689,9 +6689,9 @@ _PyUnicode_DecodeRawUnicodeEscapeStateful(const char *s,
PyObject *exc = NULL;
if (size == 0) {
- if (consumed) {
- *consumed = 0;
- }
+ if (consumed) {
+ *consumed = 0;
+ }
_Py_RETURN_UNICODE_EMPTY();
}
@@ -6700,7 +6700,7 @@ _PyUnicode_DecodeRawUnicodeEscapeStateful(const char *s,
length after conversion to the true value. (But decoding error
handler might have to resize the string) */
_PyUnicodeWriter_Init(&writer);
- writer.min_length = size;
+ writer.min_length = size;
if (_PyUnicodeWriter_Prepare(&writer, size, 127) < 0) {
goto onError;
}
@@ -6724,21 +6724,21 @@ _PyUnicode_DecodeRawUnicodeEscapeStateful(const char *s,
} while(0)
/* Non-escape characters are interpreted as Unicode ordinals */
- if (c != '\\' || (s >= end && !consumed)) {
+ if (c != '\\' || (s >= end && !consumed)) {
WRITE_CHAR(c);
continue;
}
- Py_ssize_t startinpos = s - starts - 1;
- /* \ - Escapes */
- if (s >= end) {
- assert(consumed);
- // Set message to silent compiler warning.
- // Actually it is never used.
- message = "\\ at end of string";
- goto incomplete;
- }
-
+ Py_ssize_t startinpos = s - starts - 1;
+ /* \ - Escapes */
+ if (s >= end) {
+ assert(consumed);
+ // Set message to silent compiler warning.
+ // Actually it is never used.
+ message = "\\ at end of string";
+ goto incomplete;
+ }
+
c = (unsigned char) *s++;
if (c == 'u') {
count = 4;
@@ -6756,10 +6756,10 @@ _PyUnicode_DecodeRawUnicodeEscapeStateful(const char *s,
}
/* \uHHHH with 4 hex digits, \U00HHHHHH with 8 */
- for (ch = 0; count; ++s, --count) {
- if (s >= end) {
- goto incomplete;
- }
+ for (ch = 0; count; ++s, --count) {
+ if (s >= end) {
+ goto incomplete;
+ }
c = (unsigned char)*s;
ch <<= 4;
if (c >= '0' && c <= '9') {
@@ -6772,23 +6772,23 @@ _PyUnicode_DecodeRawUnicodeEscapeStateful(const char *s,
ch += c - ('A' - 10);
}
else {
- goto error;
+ goto error;
}
}
- if (ch > MAX_UNICODE) {
+ if (ch > MAX_UNICODE) {
message = "\\Uxxxxxxxx out of range";
- goto error;
+ goto error;
}
- WRITE_CHAR(ch);
- continue;
+ WRITE_CHAR(ch);
+ continue;
- incomplete:
- if (consumed) {
- *consumed = startinpos;
- break;
- }
- error:;
- Py_ssize_t endinpos = s-starts;
+ incomplete:
+ if (consumed) {
+ *consumed = startinpos;
+ break;
+ }
+ error:;
+ Py_ssize_t endinpos = s-starts;
writer.min_length = end - s + writer.pos;
if (unicode_decode_call_errorhandler_writer(
errors, &errorHandler,
@@ -6810,14 +6810,14 @@ _PyUnicode_DecodeRawUnicodeEscapeStateful(const char *s,
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
return NULL;
-}
+}
-PyObject *
-PyUnicode_DecodeRawUnicodeEscape(const char *s,
- Py_ssize_t size,
- const char *errors)
-{
- return _PyUnicode_DecodeRawUnicodeEscapeStateful(s, size, errors, NULL);
+PyObject *
+PyUnicode_DecodeRawUnicodeEscape(const char *s,
+ Py_ssize_t size,
+ const char *errors)
+{
+ return _PyUnicode_DecodeRawUnicodeEscapeStateful(s, size, errors, NULL);
}
@@ -6828,7 +6828,7 @@ PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
char *p;
Py_ssize_t expandsize, pos;
int kind;
- const void *data;
+ const void *data;
Py_ssize_t len;
if (!PyUnicode_Check(unicode)) {
@@ -6868,7 +6868,7 @@ PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
if (ch < 0x100) {
*p++ = (char) ch;
}
- /* U+0100-U+ffff range: Map 16-bit characters to '\uHHHH' */
+ /* U+0100-U+ffff range: Map 16-bit characters to '\uHHHH' */
else if (ch < 0x10000) {
*p++ = '\\';
*p++ = 'u';
@@ -6921,7 +6921,7 @@ PyUnicode_DecodeLatin1(const char *s,
const char *errors)
{
/* Latin-1 is equivalent to the first 256 ordinals in Unicode. */
- return _PyUnicode_FromUCS1((const unsigned char*)s, size);
+ return _PyUnicode_FromUCS1((const unsigned char*)s, size);
}
/* create or adjust a UnicodeEncodeError */
@@ -6996,7 +6996,7 @@ unicode_encode_call_errorhandler(const char *errors,
if (*exceptionObject == NULL)
return NULL;
- restuple = PyObject_CallOneArg(*errorHandler, *exceptionObject);
+ restuple = PyObject_CallOneArg(*errorHandler, *exceptionObject);
if (restuple == NULL)
return NULL;
if (!PyTuple_Check(restuple)) {
@@ -7034,7 +7034,7 @@ unicode_encode_ucs1(PyObject *unicode,
/* input state */
Py_ssize_t pos=0, size;
int kind;
- const void *data;
+ const void *data;
/* pointer into the output */
char *str;
const char *encoding = (limit == 256) ? "latin-1" : "ascii";
@@ -7085,7 +7085,7 @@ unicode_encode_ucs1(PyObject *unicode,
/* cache callback name lookup (if not done yet, i.e. it's the first error) */
if (error_handler == _Py_ERROR_UNKNOWN)
- error_handler = _Py_GetErrorHandler(errors);
+ error_handler = _Py_GetErrorHandler(errors);
switch (error_handler) {
case _Py_ERROR_STRICT:
@@ -7245,7 +7245,7 @@ PyUnicode_DecodeASCII(const char *s,
const char *errors)
{
const char *starts = s;
- const char *e = s + size;
+ const char *e = s + size;
PyObject *error_handler_obj = NULL;
PyObject *exc = NULL;
_Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
@@ -7257,25 +7257,25 @@ PyUnicode_DecodeASCII(const char *s,
if (size == 1 && (unsigned char)s[0] < 128)
return get_latin1_char((unsigned char)s[0]);
- // Shortcut for simple case
- PyObject *u = PyUnicode_New(size, 127);
- if (u == NULL) {
+ // Shortcut for simple case
+ PyObject *u = PyUnicode_New(size, 127);
+ if (u == NULL) {
return NULL;
- }
- Py_ssize_t outpos = ascii_decode(s, e, PyUnicode_1BYTE_DATA(u));
- if (outpos == size) {
- return u;
- }
+ }
+ Py_ssize_t outpos = ascii_decode(s, e, PyUnicode_1BYTE_DATA(u));
+ if (outpos == size) {
+ return u;
+ }
- _PyUnicodeWriter writer;
- _PyUnicodeWriter_InitWithBuffer(&writer, u);
+ _PyUnicodeWriter writer;
+ _PyUnicodeWriter_InitWithBuffer(&writer, u);
writer.pos = outpos;
- s += outpos;
- int kind = writer.kind;
- void *data = writer.data;
- Py_ssize_t startinpos, endinpos;
-
+ s += outpos;
+ int kind = writer.kind;
+ void *data = writer.data;
+ Py_ssize_t startinpos, endinpos;
+
while (s < e) {
unsigned char c = (unsigned char)*s;
if (c < 128) {
@@ -7288,7 +7288,7 @@ PyUnicode_DecodeASCII(const char *s,
/* byte outsize range 0x00..0x7f: call the error handler */
if (error_handler == _Py_ERROR_UNKNOWN)
- error_handler = _Py_GetErrorHandler(errors);
+ error_handler = _Py_GetErrorHandler(errors);
switch (error_handler)
{
@@ -7383,12 +7383,12 @@ PyUnicode_AsASCIIString(PyObject *unicode)
#define NEED_RETRY
#endif
-/* INT_MAX is the theoretical largest chunk (or INT_MAX / 2 when
- transcoding from UTF-16), but INT_MAX / 4 performs better in
- both cases also and avoids partial characters overrunning the
- length limit in MultiByteToWideChar on Windows */
-#define DECODING_CHUNK_SIZE (INT_MAX/4)
-
+/* INT_MAX is the theoretical largest chunk (or INT_MAX / 2 when
+ transcoding from UTF-16), but INT_MAX / 4 performs better in
+ both cases also and avoids partial characters overrunning the
+ length limit in MultiByteToWideChar on Windows */
+#define DECODING_CHUNK_SIZE (INT_MAX/4)
+
#ifndef WC_ERR_INVALID_CHARS
# define WC_ERR_INVALID_CHARS 0x0080
#endif
@@ -7430,33 +7430,33 @@ decode_code_page_flags(UINT code_page)
*/
static int
decode_code_page_strict(UINT code_page,
- wchar_t **buf,
- Py_ssize_t *bufsize,
+ wchar_t **buf,
+ Py_ssize_t *bufsize,
const char *in,
int insize)
{
- DWORD flags = MB_ERR_INVALID_CHARS;
+ DWORD flags = MB_ERR_INVALID_CHARS;
wchar_t *out;
DWORD outsize;
/* First get the size of the result */
assert(insize > 0);
- while ((outsize = MultiByteToWideChar(code_page, flags,
- in, insize, NULL, 0)) <= 0)
- {
- if (!flags || GetLastError() != ERROR_INVALID_FLAGS) {
- goto error;
- }
- /* For some code pages (e.g. UTF-7) flags must be set to 0. */
- flags = 0;
- }
+ while ((outsize = MultiByteToWideChar(code_page, flags,
+ in, insize, NULL, 0)) <= 0)
+ {
+ if (!flags || GetLastError() != ERROR_INVALID_FLAGS) {
+ goto error;
+ }
+ /* For some code pages (e.g. UTF-7) flags must be set to 0. */
+ flags = 0;
+ }
- /* Extend a wchar_t* buffer */
- Py_ssize_t n = *bufsize; /* Get the current length */
- if (widechar_resize(buf, bufsize, n + outsize) < 0) {
- return -1;
+ /* Extend a wchar_t* buffer */
+ Py_ssize_t n = *bufsize; /* Get the current length */
+ if (widechar_resize(buf, bufsize, n + outsize) < 0) {
+ return -1;
}
- out = *buf + n;
+ out = *buf + n;
/* Do the conversion */
outsize = MultiByteToWideChar(code_page, flags, in, insize, out, outsize);
@@ -7480,14 +7480,14 @@ error:
*/
static int
decode_code_page_errors(UINT code_page,
- wchar_t **buf,
- Py_ssize_t *bufsize,
+ wchar_t **buf,
+ Py_ssize_t *bufsize,
const char *in, const int size,
const char *errors, int final)
{
const char *startin = in;
const char *endin = in + size;
- DWORD flags = MB_ERR_INVALID_CHARS;
+ DWORD flags = MB_ERR_INVALID_CHARS;
/* Ideally, we should get reason from FormatMessage. This is the Windows
2000 English version of the message. */
const char *reason = "No mapping for the Unicode character exists "
@@ -7521,16 +7521,16 @@ decode_code_page_errors(UINT code_page,
goto error;
}
- /* Extend a wchar_t* buffer */
- Py_ssize_t n = *bufsize; /* Get the current length */
- if (size > (PY_SSIZE_T_MAX - n) / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) {
- PyErr_NoMemory();
- goto error;
+ /* Extend a wchar_t* buffer */
+ Py_ssize_t n = *bufsize; /* Get the current length */
+ if (size > (PY_SSIZE_T_MAX - n) / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) {
+ PyErr_NoMemory();
+ goto error;
}
- if (widechar_resize(buf, bufsize, n + size * Py_ARRAY_LENGTH(buffer)) < 0) {
- goto error;
+ if (widechar_resize(buf, bufsize, n + size * Py_ARRAY_LENGTH(buffer)) < 0) {
+ goto error;
}
- out = *buf + n;
+ out = *buf + n;
/* Decode the byte string character per character */
while (in < endin)
@@ -7545,11 +7545,11 @@ decode_code_page_errors(UINT code_page,
if (outsize > 0)
break;
err = GetLastError();
- if (err == ERROR_INVALID_FLAGS && flags) {
- /* For some code pages (e.g. UTF-7) flags must be set to 0. */
- flags = 0;
- continue;
- }
+ if (err == ERROR_INVALID_FLAGS && flags) {
+ /* For some code pages (e.g. UTF-7) flags must be set to 0. */
+ flags = 0;
+ continue;
+ }
if (err != ERROR_NO_UNICODE_TRANSLATION
&& err != ERROR_INSUFFICIENT_BUFFER)
{
@@ -7570,16 +7570,16 @@ decode_code_page_errors(UINT code_page,
startinpos = in - startin;
endinpos = startinpos + 1;
- outpos = out - *buf;
+ outpos = out - *buf;
if (unicode_decode_call_errorhandler_wchar(
errors, &errorHandler,
encoding, reason,
&startin, &endin, &startinpos, &endinpos, &exc, &in,
- buf, bufsize, &outpos))
+ buf, bufsize, &outpos))
{
goto error;
}
- out = *buf + outpos;
+ out = *buf + outpos;
}
else {
in += insize;
@@ -7588,9 +7588,9 @@ decode_code_page_errors(UINT code_page,
}
}
- /* Shrink the buffer */
- assert(out - *buf <= *bufsize);
- *bufsize = out - *buf;
+ /* Shrink the buffer */
+ assert(out - *buf <= *bufsize);
+ *bufsize = out - *buf;
/* (in - startin) <= size and size is an int */
ret = Py_SAFE_DOWNCAST(in - startin, Py_ssize_t, int);
@@ -7606,8 +7606,8 @@ decode_code_page_stateful(int code_page,
const char *s, Py_ssize_t size,
const char *errors, Py_ssize_t *consumed)
{
- wchar_t *buf = NULL;
- Py_ssize_t bufsize = 0;
+ wchar_t *buf = NULL;
+ Py_ssize_t bufsize = 0;
int chunk_size, final, converted, done;
if (code_page < 0) {
@@ -7625,8 +7625,8 @@ decode_code_page_stateful(int code_page,
do
{
#ifdef NEED_RETRY
- if (size > DECODING_CHUNK_SIZE) {
- chunk_size = DECODING_CHUNK_SIZE;
+ if (size > DECODING_CHUNK_SIZE) {
+ chunk_size = DECODING_CHUNK_SIZE;
final = 0;
done = 0;
}
@@ -7639,21 +7639,21 @@ decode_code_page_stateful(int code_page,
}
if (chunk_size == 0 && done) {
- if (buf != NULL)
+ if (buf != NULL)
break;
_Py_RETURN_UNICODE_EMPTY();
}
- converted = decode_code_page_strict(code_page, &buf, &bufsize,
+ converted = decode_code_page_strict(code_page, &buf, &bufsize,
s, chunk_size);
if (converted == -2)
- converted = decode_code_page_errors(code_page, &buf, &bufsize,
+ converted = decode_code_page_errors(code_page, &buf, &bufsize,
s, chunk_size,
errors, final);
assert(converted != 0 || done);
if (converted < 0) {
- PyMem_Free(buf);
+ PyMem_Free(buf);
return NULL;
}
@@ -7664,9 +7664,9 @@ decode_code_page_stateful(int code_page,
size -= converted;
} while (!done);
- PyObject *v = PyUnicode_FromWideChar(buf, bufsize);
- PyMem_Free(buf);
- return v;
+ PyObject *v = PyUnicode_FromWideChar(buf, bufsize);
+ PyMem_Free(buf);
+ return v;
}
PyObject *
@@ -7747,10 +7747,10 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
substring = PyUnicode_Substring(unicode, offset, offset+len);
if (substring == NULL)
return -1;
-_Py_COMP_DIAG_PUSH
-_Py_COMP_DIAG_IGNORE_DEPR_DECLS
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
p = PyUnicode_AsUnicodeAndSize(substring, &size);
-_Py_COMP_DIAG_POP
+_Py_COMP_DIAG_POP
if (p == NULL) {
Py_DECREF(substring);
return -1;
@@ -7952,7 +7952,7 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
else {
Py_ssize_t i;
enum PyUnicode_Kind kind;
- const void *data;
+ const void *data;
if (PyUnicode_READY(rep) == -1) {
Py_DECREF(rep);
@@ -8033,8 +8033,8 @@ encode_code_page(int code_page,
do
{
#ifdef NEED_RETRY
- if (len > DECODING_CHUNK_SIZE) {
- chunk_len = DECODING_CHUNK_SIZE;
+ if (len > DECODING_CHUNK_SIZE) {
+ chunk_len = DECODING_CHUNK_SIZE;
done = 0;
}
else
@@ -8110,7 +8110,7 @@ charmap_decode_string(const char *s,
PyObject *errorHandler = NULL, *exc = NULL;
Py_ssize_t maplen;
enum PyUnicode_Kind mapkind;
- const void *mapdata;
+ const void *mapdata;
Py_UCS4 x;
unsigned char ch;
@@ -8127,7 +8127,7 @@ charmap_decode_string(const char *s,
/* fast-path for cp037, cp500 and iso8859_1 encodings. iso8859_1
* is disabled in encoding aliases, latin1 is preferred because
* its implementation is faster. */
- const Py_UCS1 *mapdata_ucs1 = (const Py_UCS1 *)mapdata;
+ const Py_UCS1 *mapdata_ucs1 = (const Py_UCS1 *)mapdata;
Py_UCS1 *outdata = (Py_UCS1 *)writer->data;
Py_UCS4 maxchar = writer->maxchar;
@@ -8151,7 +8151,7 @@ charmap_decode_string(const char *s,
while (s < e) {
if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) {
enum PyUnicode_Kind outkind = writer->kind;
- const Py_UCS2 *mapdata_ucs2 = (const Py_UCS2 *)mapdata;
+ const Py_UCS2 *mapdata_ucs2 = (const Py_UCS2 *)mapdata;
if (outkind == PyUnicode_1BYTE_KIND) {
Py_UCS1 *outdata = (Py_UCS1 *)writer->data;
Py_UCS4 maxchar = writer->maxchar;
@@ -8260,7 +8260,7 @@ charmap_decode_mapping(const char *s,
goto Undefined;
if (value < 0 || value > MAX_UNICODE) {
PyErr_Format(PyExc_TypeError,
- "character mapping must be in range(0x%x)",
+ "character mapping must be in range(0x%x)",
(unsigned long)MAX_UNICODE + 1);
goto onError;
}
@@ -8381,11 +8381,11 @@ static PyTypeObject EncodingMapType = {
sizeof(struct encoding_map), /*tp_basicsize*/
0, /*tp_itemsize*/
/* methods */
- 0, /*tp_dealloc*/
- 0, /*tp_vectorcall_offset*/
+ 0, /*tp_dealloc*/
+ 0, /*tp_vectorcall_offset*/
0, /*tp_getattr*/
0, /*tp_setattr*/
- 0, /*tp_as_async*/
+ 0, /*tp_as_async*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
@@ -8431,7 +8431,7 @@ PyUnicode_BuildEncodingMap(PyObject* string)
unsigned char *mlevel1, *mlevel2, *mlevel3;
int count2 = 0, count3 = 0;
int kind;
- const void *data;
+ const void *data;
Py_ssize_t length;
Py_UCS4 ch;
@@ -8599,7 +8599,7 @@ charmapencode_lookup(Py_UCS4 c, PyObject *mapping)
/* wrong return value */
PyErr_Format(PyExc_TypeError,
"character mapping must return integer, bytes or None, not %.400s",
- Py_TYPE(x)->tp_name);
+ Py_TYPE(x)->tp_name);
Py_DECREF(x);
return NULL;
}
@@ -8634,7 +8634,7 @@ charmapencode_output(Py_UCS4 c, PyObject *mapping,
char *outstart;
Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj);
- if (Py_IS_TYPE(mapping, &EncodingMapType)) {
+ if (Py_IS_TYPE(mapping, &EncodingMapType)) {
int res = encoding_map_lookup(c, mapping);
Py_ssize_t requiredsize = *outpos+1;
if (res == -1)
@@ -8695,7 +8695,7 @@ charmap_encoding_error(
Py_ssize_t size, repsize;
Py_ssize_t newpos;
enum PyUnicode_Kind kind;
- const void *data;
+ const void *data;
Py_ssize_t index;
/* startpos for collecting unencodable chars */
Py_ssize_t collstartpos = *inpos;
@@ -8713,7 +8713,7 @@ charmap_encoding_error(
/* find all unencodable characters */
while (collendpos < size) {
PyObject *rep;
- if (Py_IS_TYPE(mapping, &EncodingMapType)) {
+ if (Py_IS_TYPE(mapping, &EncodingMapType)) {
ch = PyUnicode_READ_CHAR(unicode, collendpos);
val = encoding_map_lookup(ch, mapping);
if (val != -1)
@@ -8736,7 +8736,7 @@ charmap_encoding_error(
/* cache callback name lookup
* (if not done yet, i.e. it's the first error) */
if (*error_handler == _Py_ERROR_UNKNOWN)
- *error_handler = _Py_GetErrorHandler(errors);
+ *error_handler = _Py_GetErrorHandler(errors);
switch (*error_handler) {
case _Py_ERROR_STRICT:
@@ -8845,7 +8845,7 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
PyObject *error_handler_obj = NULL;
PyObject *exc = NULL;
_Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
- const void *data;
+ const void *data;
int kind;
if (PyUnicode_READY(unicode) == -1)
@@ -8981,7 +8981,7 @@ unicode_translate_call_errorhandler(const char *errors,
if (*exceptionObject == NULL)
return NULL;
- restuple = PyObject_CallOneArg(*errorHandler, *exceptionObject);
+ restuple = PyObject_CallOneArg(*errorHandler, *exceptionObject);
if (restuple == NULL)
return NULL;
if (!PyTuple_Check(restuple)) {
@@ -9177,8 +9177,8 @@ unicode_fast_translate(PyObject *input, PyObject *mapping,
{
Py_UCS1 ascii_table[128], ch, ch2;
Py_ssize_t len;
- const Py_UCS1 *in, *end;
- Py_UCS1 *out;
+ const Py_UCS1 *in, *end;
+ Py_UCS1 *out;
int res = 0;
len = PyUnicode_GET_LENGTH(input);
@@ -9227,7 +9227,7 @@ _PyUnicode_TranslateCharmap(PyObject *input,
const char *errors)
{
/* input object */
- const void *data;
+ const void *data;
Py_ssize_t size, i;
int kind;
/* output buffer */
@@ -9246,7 +9246,7 @@ _PyUnicode_TranslateCharmap(PyObject *input,
if (PyUnicode_READY(input) == -1)
return NULL;
- data = PyUnicode_DATA(input);
+ data = PyUnicode_DATA(input);
kind = PyUnicode_KIND(input);
size = PyUnicode_GET_LENGTH(input);
@@ -9424,7 +9424,7 @@ PyUnicode_TransformDecimalToASCII(Py_UNICODE *s,
Py_ssize_t i;
Py_UCS4 maxchar;
enum PyUnicode_Kind kind;
- const void *data;
+ const void *data;
maxchar = 127;
for (i = 0; i < length; i++) {
@@ -9466,7 +9466,7 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
PyObject *unicode;
Py_ssize_t i;
enum PyUnicode_Kind kind;
- const void *data;
+ const void *data;
if (output == NULL) {
PyErr_BadArgument();
@@ -9544,7 +9544,7 @@ any_find_slice(PyObject* s1, PyObject* s2,
int direction)
{
int kind1, kind2;
- const void *buf1, *buf2;
+ const void *buf1, *buf2;
Py_ssize_t len1, len2, result;
kind1 = PyUnicode_KIND(s1);
@@ -9571,7 +9571,7 @@ any_find_slice(PyObject* s1, PyObject* s2,
}
if (kind2 != kind1) {
- buf2 = unicode_askind(kind2, buf2, len2, kind1);
+ buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return -2;
}
@@ -9613,9 +9613,9 @@ any_find_slice(PyObject* s1, PyObject* s2,
}
}
- assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(s2)));
+ assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(s2)));
if (kind2 != kind1)
- PyMem_Free((void *)buf2);
+ PyMem_Free((void *)buf2);
return result;
}
@@ -9774,7 +9774,7 @@ PyUnicode_Count(PyObject *str,
{
Py_ssize_t result;
int kind1, kind2;
- const void *buf1 = NULL, *buf2 = NULL;
+ const void *buf1 = NULL, *buf2 = NULL;
Py_ssize_t len1, len2;
if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0)
@@ -9794,7 +9794,7 @@ PyUnicode_Count(PyObject *str,
buf1 = PyUnicode_DATA(str);
buf2 = PyUnicode_DATA(substr);
if (kind2 != kind1) {
- buf2 = unicode_askind(kind2, buf2, len2, kind1);
+ buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
goto onError;
}
@@ -9803,24 +9803,24 @@ PyUnicode_Count(PyObject *str,
case PyUnicode_1BYTE_KIND:
if (PyUnicode_IS_ASCII(str) && PyUnicode_IS_ASCII(substr))
result = asciilib_count(
- ((const Py_UCS1*)buf1) + start, end - start,
+ ((const Py_UCS1*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX
);
else
result = ucs1lib_count(
- ((const Py_UCS1*)buf1) + start, end - start,
+ ((const Py_UCS1*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX
);
break;
case PyUnicode_2BYTE_KIND:
result = ucs2lib_count(
- ((const Py_UCS2*)buf1) + start, end - start,
+ ((const Py_UCS2*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX
);
break;
case PyUnicode_4BYTE_KIND:
result = ucs4lib_count(
- ((const Py_UCS4*)buf1) + start, end - start,
+ ((const Py_UCS4*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX
);
break;
@@ -9828,15 +9828,15 @@ PyUnicode_Count(PyObject *str,
Py_UNREACHABLE();
}
- assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr)));
+ assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr)));
if (kind2 != kind1)
- PyMem_Free((void *)buf2);
+ PyMem_Free((void *)buf2);
return result;
onError:
- assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr)));
- if (kind2 != kind1)
- PyMem_Free((void *)buf2);
+ assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr)));
+ if (kind2 != kind1)
+ PyMem_Free((void *)buf2);
return -1;
}
@@ -9884,8 +9884,8 @@ tailmatch(PyObject *self,
{
int kind_self;
int kind_sub;
- const void *data_self;
- const void *data_sub;
+ const void *data_self;
+ const void *data_sub;
Py_ssize_t offset;
Py_ssize_t i;
Py_ssize_t end_sub;
@@ -9959,8 +9959,8 @@ static PyObject *
ascii_upper_or_lower(PyObject *self, int lower)
{
Py_ssize_t len = PyUnicode_GET_LENGTH(self);
- const char *data = PyUnicode_DATA(self);
- char *resdata;
+ const char *data = PyUnicode_DATA(self);
+ char *resdata;
PyObject *res;
res = PyUnicode_New(len, 127);
@@ -9975,7 +9975,7 @@ ascii_upper_or_lower(PyObject *self, int lower)
}
static Py_UCS4
-handle_capital_sigma(int kind, const void *data, Py_ssize_t length, Py_ssize_t i)
+handle_capital_sigma(int kind, const void *data, Py_ssize_t length, Py_ssize_t i)
{
Py_ssize_t j;
int final_sigma;
@@ -10004,7 +10004,7 @@ handle_capital_sigma(int kind, const void *data, Py_ssize_t length, Py_ssize_t i
}
static int
-lower_ucs4(int kind, const void *data, Py_ssize_t length, Py_ssize_t i,
+lower_ucs4(int kind, const void *data, Py_ssize_t length, Py_ssize_t i,
Py_UCS4 c, Py_UCS4 *mapped)
{
/* Obscure special case. */
@@ -10016,14 +10016,14 @@ lower_ucs4(int kind, const void *data, Py_ssize_t length, Py_ssize_t i,
}
static Py_ssize_t
-do_capitalize(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
+do_capitalize(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
{
Py_ssize_t i, k = 0;
int n_res, j;
Py_UCS4 c, mapped[3];
c = PyUnicode_READ(kind, data, 0);
- n_res = _PyUnicode_ToTitleFull(c, mapped);
+ n_res = _PyUnicode_ToTitleFull(c, mapped);
for (j = 0; j < n_res; j++) {
*maxchar = Py_MAX(*maxchar, mapped[j]);
res[k++] = mapped[j];
@@ -10040,7 +10040,7 @@ do_capitalize(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UC
}
static Py_ssize_t
-do_swapcase(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) {
+do_swapcase(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) {
Py_ssize_t i, k = 0;
for (i = 0; i < length; i++) {
@@ -10065,7 +10065,7 @@ do_swapcase(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4
}
static Py_ssize_t
-do_upper_or_lower(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res,
+do_upper_or_lower(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res,
Py_UCS4 *maxchar, int lower)
{
Py_ssize_t i, k = 0;
@@ -10086,19 +10086,19 @@ do_upper_or_lower(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res,
}
static Py_ssize_t
-do_upper(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
+do_upper(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
{
return do_upper_or_lower(kind, data, length, res, maxchar, 0);
}
static Py_ssize_t
-do_lower(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
+do_lower(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
{
return do_upper_or_lower(kind, data, length, res, maxchar, 1);
}
static Py_ssize_t
-do_casefold(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
+do_casefold(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
{
Py_ssize_t i, k = 0;
@@ -10115,7 +10115,7 @@ do_casefold(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4
}
static Py_ssize_t
-do_title(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
+do_title(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
{
Py_ssize_t i, k = 0;
int previous_is_cased;
@@ -10143,13 +10143,13 @@ do_title(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *m
static PyObject *
case_operation(PyObject *self,
- Py_ssize_t (*perform)(int, const void *, Py_ssize_t, Py_UCS4 *, Py_UCS4 *))
+ Py_ssize_t (*perform)(int, const void *, Py_ssize_t, Py_UCS4 *, Py_UCS4 *))
{
PyObject *res = NULL;
Py_ssize_t length, newlength = 0;
int kind, outkind;
- const void *data;
- void *outdata;
+ const void *data;
+ void *outdata;
Py_UCS4 maxchar = 0, *tmp, *tmpend;
assert(PyUnicode_IS_READY(self));
@@ -10396,7 +10396,7 @@ _PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
assert(fill_char <= PyUnicode_MAX_CHAR_VALUE(unicode));
assert(start >= 0);
assert(start + length <= PyUnicode_GET_LENGTH(unicode));
- unicode_fill(kind, data, fill_char, start, length);
+ unicode_fill(kind, data, fill_char, start, length);
}
Py_ssize_t
@@ -10467,9 +10467,9 @@ pad(PyObject *self,
kind = PyUnicode_KIND(u);
data = PyUnicode_DATA(u);
if (left)
- unicode_fill(kind, data, fill, 0, left);
+ unicode_fill(kind, data, fill, 0, left);
if (right)
- unicode_fill(kind, data, fill, left + _PyUnicode_LENGTH(self), right);
+ unicode_fill(kind, data, fill, left + _PyUnicode_LENGTH(self), right);
_PyUnicode_FastCopyCharacters(u, left, self, 0, _PyUnicode_LENGTH(self));
assert(_PyUnicode_CheckConsistency(u, 1));
return u;
@@ -10516,7 +10516,7 @@ split(PyObject *self,
Py_ssize_t maxcount)
{
int kind1, kind2;
- const void *buf1, *buf2;
+ const void *buf1, *buf2;
Py_ssize_t len1, len2;
PyObject* out;
@@ -10571,7 +10571,7 @@ split(PyObject *self,
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) {
- buf2 = unicode_askind(kind2, buf2, len2, kind1);
+ buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}
@@ -10596,9 +10596,9 @@ split(PyObject *self,
default:
out = NULL;
}
- assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substring)));
+ assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substring)));
if (kind2 != kind1)
- PyMem_Free((void *)buf2);
+ PyMem_Free((void *)buf2);
return out;
}
@@ -10608,7 +10608,7 @@ rsplit(PyObject *self,
Py_ssize_t maxcount)
{
int kind1, kind2;
- const void *buf1, *buf2;
+ const void *buf1, *buf2;
Py_ssize_t len1, len2;
PyObject* out;
@@ -10663,7 +10663,7 @@ rsplit(PyObject *self,
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) {
- buf2 = unicode_askind(kind2, buf2, len2, kind1);
+ buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}
@@ -10688,15 +10688,15 @@ rsplit(PyObject *self,
default:
out = NULL;
}
- assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substring)));
+ assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substring)));
if (kind2 != kind1)
- PyMem_Free((void *)buf2);
+ PyMem_Free((void *)buf2);
return out;
}
static Py_ssize_t
-anylib_find(int kind, PyObject *str1, const void *buf1, Py_ssize_t len1,
- PyObject *str2, const void *buf2, Py_ssize_t len2, Py_ssize_t offset)
+anylib_find(int kind, PyObject *str1, const void *buf1, Py_ssize_t len1,
+ PyObject *str2, const void *buf2, Py_ssize_t len2, Py_ssize_t offset)
{
switch (kind) {
case PyUnicode_1BYTE_KIND:
@@ -10713,8 +10713,8 @@ anylib_find(int kind, PyObject *str1, const void *buf1, Py_ssize_t len1,
}
static Py_ssize_t
-anylib_count(int kind, PyObject *sstr, const void* sbuf, Py_ssize_t slen,
- PyObject *str1, const void *buf1, Py_ssize_t len1, Py_ssize_t maxcount)
+anylib_count(int kind, PyObject *sstr, const void* sbuf, Py_ssize_t slen,
+ PyObject *str1, const void *buf1, Py_ssize_t len1, Py_ssize_t maxcount)
{
switch (kind) {
case PyUnicode_1BYTE_KIND:
@@ -10760,9 +10760,9 @@ replace(PyObject *self, PyObject *str1,
PyObject *str2, Py_ssize_t maxcount)
{
PyObject *u;
- const char *sbuf = PyUnicode_DATA(self);
- const void *buf1 = PyUnicode_DATA(str1);
- const void *buf2 = PyUnicode_DATA(str2);
+ const char *sbuf = PyUnicode_DATA(self);
+ const void *buf1 = PyUnicode_DATA(str1);
+ const void *buf2 = PyUnicode_DATA(str2);
int srelease = 0, release1 = 0, release2 = 0;
int skind = PyUnicode_KIND(self);
int kind1 = PyUnicode_KIND(str1);
@@ -10773,12 +10773,12 @@ replace(PyObject *self, PyObject *str1,
int mayshrink;
Py_UCS4 maxchar, maxchar_str1, maxchar_str2;
- if (slen < len1)
- goto nothing;
-
+ if (slen < len1)
+ goto nothing;
+
if (maxcount < 0)
maxcount = PY_SSIZE_T_MAX;
- else if (maxcount == 0)
+ else if (maxcount == 0)
goto nothing;
if (str1 == str2)
@@ -10823,7 +10823,7 @@ replace(PyObject *self, PyObject *str1,
if (kind1 < rkind) {
/* widen substring */
- buf1 = unicode_askind(kind1, buf1, len1, rkind);
+ buf1 = unicode_askind(kind1, buf1, len1, rkind);
if (!buf1) goto error;
release1 = 1;
}
@@ -10832,23 +10832,23 @@ replace(PyObject *self, PyObject *str1,
goto nothing;
if (rkind > kind2) {
/* widen replacement */
- buf2 = unicode_askind(kind2, buf2, len2, rkind);
+ buf2 = unicode_askind(kind2, buf2, len2, rkind);
if (!buf2) goto error;
release2 = 1;
}
else if (rkind < kind2) {
/* widen self and buf1 */
rkind = kind2;
- if (release1) {
- assert(buf1 != PyUnicode_DATA(str1));
- PyMem_Free((void *)buf1);
- buf1 = PyUnicode_DATA(str1);
- release1 = 0;
- }
- sbuf = unicode_askind(skind, sbuf, slen, rkind);
+ if (release1) {
+ assert(buf1 != PyUnicode_DATA(str1));
+ PyMem_Free((void *)buf1);
+ buf1 = PyUnicode_DATA(str1);
+ release1 = 0;
+ }
+ sbuf = unicode_askind(skind, sbuf, slen, rkind);
if (!sbuf) goto error;
srelease = 1;
- buf1 = unicode_askind(kind1, buf1, len1, rkind);
+ buf1 = unicode_askind(kind1, buf1, len1, rkind);
if (!buf1) goto error;
release1 = 1;
}
@@ -10886,7 +10886,7 @@ replace(PyObject *self, PyObject *str1,
if (kind1 < rkind) {
/* widen substring */
- buf1 = unicode_askind(kind1, buf1, len1, rkind);
+ buf1 = unicode_askind(kind1, buf1, len1, rkind);
if (!buf1) goto error;
release1 = 1;
}
@@ -10895,28 +10895,28 @@ replace(PyObject *self, PyObject *str1,
goto nothing;
if (kind2 < rkind) {
/* widen replacement */
- buf2 = unicode_askind(kind2, buf2, len2, rkind);
+ buf2 = unicode_askind(kind2, buf2, len2, rkind);
if (!buf2) goto error;
release2 = 1;
}
else if (kind2 > rkind) {
/* widen self and buf1 */
rkind = kind2;
- sbuf = unicode_askind(skind, sbuf, slen, rkind);
+ sbuf = unicode_askind(skind, sbuf, slen, rkind);
if (!sbuf) goto error;
srelease = 1;
- if (release1) {
- assert(buf1 != PyUnicode_DATA(str1));
- PyMem_Free((void *)buf1);
- buf1 = PyUnicode_DATA(str1);
- release1 = 0;
- }
- buf1 = unicode_askind(kind1, buf1, len1, rkind);
+ if (release1) {
+ assert(buf1 != PyUnicode_DATA(str1));
+ PyMem_Free((void *)buf1);
+ buf1 = PyUnicode_DATA(str1);
+ release1 = 0;
+ }
+ buf1 = unicode_askind(kind1, buf1, len1, rkind);
if (!buf1) goto error;
release1 = 1;
}
/* new_size = PyUnicode_GET_LENGTH(self) + n * (PyUnicode_GET_LENGTH(str2) -
- PyUnicode_GET_LENGTH(str1)); */
+ PyUnicode_GET_LENGTH(str1)); */
if (len1 < len2 && len2 - len1 > (PY_SSIZE_T_MAX - slen) / n) {
PyErr_SetString(PyExc_OverflowError,
"replace string is too long");
@@ -10999,41 +10999,41 @@ replace(PyObject *self, PyObject *str1,
}
done:
- assert(srelease == (sbuf != PyUnicode_DATA(self)));
- assert(release1 == (buf1 != PyUnicode_DATA(str1)));
- assert(release2 == (buf2 != PyUnicode_DATA(str2)));
+ assert(srelease == (sbuf != PyUnicode_DATA(self)));
+ assert(release1 == (buf1 != PyUnicode_DATA(str1)));
+ assert(release2 == (buf2 != PyUnicode_DATA(str2)));
if (srelease)
- PyMem_FREE((void *)sbuf);
+ PyMem_FREE((void *)sbuf);
if (release1)
- PyMem_FREE((void *)buf1);
+ PyMem_FREE((void *)buf1);
if (release2)
- PyMem_FREE((void *)buf2);
+ PyMem_FREE((void *)buf2);
assert(_PyUnicode_CheckConsistency(u, 1));
return u;
nothing:
/* nothing to replace; return original string (when possible) */
- assert(srelease == (sbuf != PyUnicode_DATA(self)));
- assert(release1 == (buf1 != PyUnicode_DATA(str1)));
- assert(release2 == (buf2 != PyUnicode_DATA(str2)));
+ assert(srelease == (sbuf != PyUnicode_DATA(self)));
+ assert(release1 == (buf1 != PyUnicode_DATA(str1)));
+ assert(release2 == (buf2 != PyUnicode_DATA(str2)));
if (srelease)
- PyMem_FREE((void *)sbuf);
+ PyMem_FREE((void *)sbuf);
if (release1)
- PyMem_FREE((void *)buf1);
+ PyMem_FREE((void *)buf1);
if (release2)
- PyMem_FREE((void *)buf2);
+ PyMem_FREE((void *)buf2);
return unicode_result_unchanged(self);
error:
- assert(srelease == (sbuf != PyUnicode_DATA(self)));
- assert(release1 == (buf1 != PyUnicode_DATA(str1)));
- assert(release2 == (buf2 != PyUnicode_DATA(str2)));
- if (srelease)
- PyMem_FREE((void *)sbuf);
- if (release1)
- PyMem_FREE((void *)buf1);
- if (release2)
- PyMem_FREE((void *)buf2);
+ assert(srelease == (sbuf != PyUnicode_DATA(self)));
+ assert(release1 == (buf1 != PyUnicode_DATA(str1)));
+ assert(release2 == (buf2 != PyUnicode_DATA(str2)));
+ if (srelease)
+ PyMem_FREE((void *)sbuf);
+ if (release1)
+ PyMem_FREE((void *)buf1);
+ if (release2)
+ PyMem_FREE((void *)buf2);
return NULL;
}
@@ -11170,7 +11170,7 @@ unicode_compare(PyObject *str1, PyObject *str2)
while (0)
int kind1, kind2;
- const void *data1, *data2;
+ const void *data1, *data2;
Py_ssize_t len1, len2, len;
kind1 = PyUnicode_KIND(str1);
@@ -11271,7 +11271,7 @@ static int
unicode_compare_eq(PyObject *str1, PyObject *str2)
{
int kind;
- const void *data1, *data2;
+ const void *data1, *data2;
Py_ssize_t len;
int cmp;
@@ -11305,8 +11305,8 @@ PyUnicode_Compare(PyObject *left, PyObject *right)
}
PyErr_Format(PyExc_TypeError,
"Can't compare %.100s and %.100s",
- Py_TYPE(left)->tp_name,
- Py_TYPE(right)->tp_name);
+ Py_TYPE(left)->tp_name,
+ Py_TYPE(right)->tp_name);
return -1;
}
@@ -11356,7 +11356,7 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
return 0;
}
else {
- const void *data = PyUnicode_DATA(uni);
+ const void *data = PyUnicode_DATA(uni);
/* Compare Unicode string and source character set string */
for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++)
if (chr != (unsigned char)str[i])
@@ -11447,12 +11447,12 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
if (PyUnicode_CHECK_INTERNED(left))
return 0;
-#ifdef INTERNED_STRINGS
+#ifdef INTERNED_STRINGS
assert(_PyUnicode_HASH(right_uni) != -1);
- Py_hash_t hash = _PyUnicode_HASH(left);
+ Py_hash_t hash = _PyUnicode_HASH(left);
if (hash != -1 && hash != _PyUnicode_HASH(right_uni))
return 0;
-#endif
+#endif
return unicode_compare_eq(left, right_uni);
}
@@ -11506,7 +11506,7 @@ int
PyUnicode_Contains(PyObject *str, PyObject *substr)
{
int kind1, kind2;
- const void *buf1, *buf2;
+ const void *buf1, *buf2;
Py_ssize_t len1, len2;
int result;
@@ -11537,7 +11537,7 @@ PyUnicode_Contains(PyObject *str, PyObject *substr)
return result;
}
if (kind2 != kind1) {
- buf2 = unicode_askind(kind2, buf2, len2, kind1);
+ buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return -1;
}
@@ -11556,9 +11556,9 @@ PyUnicode_Contains(PyObject *str, PyObject *substr)
Py_UNREACHABLE();
}
- assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(substr)));
+ assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(substr)));
if (kind2 != kind1)
- PyMem_Free((void *)buf2);
+ PyMem_Free((void *)buf2);
return result;
}
@@ -11578,7 +11578,7 @@ PyUnicode_Concat(PyObject *left, PyObject *right)
if (!PyUnicode_Check(right)) {
PyErr_Format(PyExc_TypeError,
"can only concatenate str (not \"%.200s\") to str",
- Py_TYPE(right)->tp_name);
+ Py_TYPE(right)->tp_name);
return NULL;
}
if (PyUnicode_READY(right) < 0)
@@ -11735,7 +11735,7 @@ unicode_count(PyObject *self, PyObject *args)
Py_ssize_t end = PY_SSIZE_T_MAX;
PyObject *result;
int kind1, kind2;
- const void *buf1, *buf2;
+ const void *buf1, *buf2;
Py_ssize_t len1, len2, iresult;
if (!parse_args_finds_unicode("count", args, &substring, &start, &end))
@@ -11755,26 +11755,26 @@ unicode_count(PyObject *self, PyObject *args)
buf1 = PyUnicode_DATA(self);
buf2 = PyUnicode_DATA(substring);
if (kind2 != kind1) {
- buf2 = unicode_askind(kind2, buf2, len2, kind1);
+ buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}
switch (kind1) {
case PyUnicode_1BYTE_KIND:
iresult = ucs1lib_count(
- ((const Py_UCS1*)buf1) + start, end - start,
+ ((const Py_UCS1*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX
);
break;
case PyUnicode_2BYTE_KIND:
iresult = ucs2lib_count(
- ((const Py_UCS2*)buf1) + start, end - start,
+ ((const Py_UCS2*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX
);
break;
case PyUnicode_4BYTE_KIND:
iresult = ucs4lib_count(
- ((const Py_UCS4*)buf1) + start, end - start,
+ ((const Py_UCS4*)buf1) + start, end - start,
buf2, len2, PY_SSIZE_T_MAX
);
break;
@@ -11784,9 +11784,9 @@ unicode_count(PyObject *self, PyObject *args)
result = PyLong_FromSsize_t(iresult);
- assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(substring)));
+ assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(substring)));
if (kind2 != kind1)
- PyMem_Free((void *)buf2);
+ PyMem_Free((void *)buf2);
return result;
}
@@ -11830,8 +11830,8 @@ unicode_expandtabs_impl(PyObject *self, int tabsize)
Py_ssize_t i, j, line_pos, src_len, incr;
Py_UCS4 ch;
PyObject *u;
- const void *src_data;
- void *dest_data;
+ const void *src_data;
+ void *dest_data;
int kind;
int found;
@@ -11882,7 +11882,7 @@ unicode_expandtabs_impl(PyObject *self, int tabsize)
if (tabsize > 0) {
incr = tabsize - (line_pos % tabsize);
line_pos += incr;
- unicode_fill(kind, dest_data, ' ', j, incr);
+ unicode_fill(kind, dest_data, ' ', j, incr);
j += incr;
}
}
@@ -11937,7 +11937,7 @@ unicode_find(PyObject *self, PyObject *args)
static PyObject *
unicode_getitem(PyObject *self, Py_ssize_t index)
{
- const void *data;
+ const void *data;
enum PyUnicode_Kind kind;
Py_UCS4 ch;
@@ -11972,7 +11972,7 @@ unicode_hash(PyObject *self)
return _PyUnicode_HASH(self);
if (PyUnicode_READY(self) == -1)
return -1;
-
+
x = _Py_HashBytes(PyUnicode_DATA(self),
PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self));
_PyUnicode_HASH(self) = x;
@@ -11982,7 +11982,7 @@ unicode_hash(PyObject *self)
PyDoc_STRVAR(index__doc__,
"S.index(sub[, start[, end]]) -> int\n\
\n\
-Return the lowest index in S where substring sub is found,\n\
+Return the lowest index in S where substring sub is found,\n\
such that sub is contained within S[start:end]. Optional\n\
arguments start and end are interpreted as in slice notation.\n\
\n\
@@ -12050,7 +12050,7 @@ unicode_islower_impl(PyObject *self)
{
Py_ssize_t i, length;
int kind;
- const void *data;
+ const void *data;
int cased;
if (PyUnicode_READY(self) == -1)
@@ -12095,7 +12095,7 @@ unicode_isupper_impl(PyObject *self)
{
Py_ssize_t i, length;
int kind;
- const void *data;
+ const void *data;
int cased;
if (PyUnicode_READY(self) == -1)
@@ -12140,7 +12140,7 @@ unicode_istitle_impl(PyObject *self)
{
Py_ssize_t i, length;
int kind;
- const void *data;
+ const void *data;
int cased, previous_is_cased;
if (PyUnicode_READY(self) == -1)
@@ -12198,7 +12198,7 @@ unicode_isspace_impl(PyObject *self)
{
Py_ssize_t i, length;
int kind;
- const void *data;
+ const void *data;
if (PyUnicode_READY(self) == -1)
return NULL;
@@ -12238,7 +12238,7 @@ unicode_isalpha_impl(PyObject *self)
{
Py_ssize_t i, length;
int kind;
- const void *data;
+ const void *data;
if (PyUnicode_READY(self) == -1)
return NULL;
@@ -12276,7 +12276,7 @@ unicode_isalnum_impl(PyObject *self)
/*[clinic end generated code: output=a5a23490ffc3660c input=5c6579bf2e04758c]*/
{
int kind;
- const void *data;
+ const void *data;
Py_ssize_t len, i;
if (PyUnicode_READY(self) == -1)
@@ -12319,7 +12319,7 @@ unicode_isdecimal_impl(PyObject *self)
{
Py_ssize_t i, length;
int kind;
- const void *data;
+ const void *data;
if (PyUnicode_READY(self) == -1)
return NULL;
@@ -12358,7 +12358,7 @@ unicode_isdigit_impl(PyObject *self)
{
Py_ssize_t i, length;
int kind;
- const void *data;
+ const void *data;
if (PyUnicode_READY(self) == -1)
return NULL;
@@ -12398,7 +12398,7 @@ unicode_isnumeric_impl(PyObject *self)
{
Py_ssize_t i, length;
int kind;
- const void *data;
+ const void *data;
if (PyUnicode_READY(self) == -1)
return NULL;
@@ -12422,22 +12422,22 @@ unicode_isnumeric_impl(PyObject *self)
Py_RETURN_TRUE;
}
-Py_ssize_t
-_PyUnicode_ScanIdentifier(PyObject *self)
+Py_ssize_t
+_PyUnicode_ScanIdentifier(PyObject *self)
{
Py_ssize_t i;
- if (PyUnicode_READY(self) == -1)
- return -1;
+ if (PyUnicode_READY(self) == -1)
+ return -1;
- Py_ssize_t len = PyUnicode_GET_LENGTH(self);
- if (len == 0) {
- /* an empty string is not a valid identifier */
+ Py_ssize_t len = PyUnicode_GET_LENGTH(self);
+ if (len == 0) {
+ /* an empty string is not a valid identifier */
return 0;
}
- int kind = PyUnicode_KIND(self);
- const void *data = PyUnicode_DATA(self);
- Py_UCS4 ch = PyUnicode_READ(kind, data, 0);
+ int kind = PyUnicode_KIND(self);
+ const void *data = PyUnicode_DATA(self);
+ Py_UCS4 ch = PyUnicode_READ(kind, data, 0);
/* PEP 3131 says that the first character must be in
XID_Start and subsequent characters in XID_Continue,
and for the ASCII range, the 2.x rules apply (i.e
@@ -12446,70 +12446,70 @@ _PyUnicode_ScanIdentifier(PyObject *self)
definition of XID_Start and XID_Continue, it is sufficient
to check just for these, except that _ must be allowed
as starting an identifier. */
- if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) {
+ if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) {
return 0;
- }
+ }
+
+ for (i = 1; i < len; i++) {
+ ch = PyUnicode_READ(kind, data, i);
+ if (!_PyUnicode_IsXidContinue(ch)) {
+ return i;
+ }
+ }
+ return i;
+}
- for (i = 1; i < len; i++) {
- ch = PyUnicode_READ(kind, data, i);
- if (!_PyUnicode_IsXidContinue(ch)) {
- return i;
- }
- }
- return i;
-}
-
-int
-PyUnicode_IsIdentifier(PyObject *self)
-{
- if (PyUnicode_IS_READY(self)) {
- Py_ssize_t i = _PyUnicode_ScanIdentifier(self);
- Py_ssize_t len = PyUnicode_GET_LENGTH(self);
- /* an empty string is not a valid identifier */
- return len && i == len;
- }
- else {
-_Py_COMP_DIAG_PUSH
-_Py_COMP_DIAG_IGNORE_DEPR_DECLS
- Py_ssize_t i = 0, len = PyUnicode_GET_SIZE(self);
- if (len == 0) {
- /* an empty string is not a valid identifier */
+int
+PyUnicode_IsIdentifier(PyObject *self)
+{
+ if (PyUnicode_IS_READY(self)) {
+ Py_ssize_t i = _PyUnicode_ScanIdentifier(self);
+ Py_ssize_t len = PyUnicode_GET_LENGTH(self);
+ /* an empty string is not a valid identifier */
+ return len && i == len;
+ }
+ else {
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
+ Py_ssize_t i = 0, len = PyUnicode_GET_SIZE(self);
+ if (len == 0) {
+ /* an empty string is not a valid identifier */
+ return 0;
+ }
+
+ const wchar_t *wstr = _PyUnicode_WSTR(self);
+ Py_UCS4 ch = wstr[i++];
+#if SIZEOF_WCHAR_T == 2
+ if (Py_UNICODE_IS_HIGH_SURROGATE(ch)
+ && i < len
+ && Py_UNICODE_IS_LOW_SURROGATE(wstr[i]))
+ {
+ ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]);
+ i++;
+ }
+#endif
+ if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) {
return 0;
- }
-
- const wchar_t *wstr = _PyUnicode_WSTR(self);
- Py_UCS4 ch = wstr[i++];
-#if SIZEOF_WCHAR_T == 2
- if (Py_UNICODE_IS_HIGH_SURROGATE(ch)
- && i < len
- && Py_UNICODE_IS_LOW_SURROGATE(wstr[i]))
- {
- ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]);
- i++;
- }
-#endif
- if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) {
- return 0;
- }
-
- while (i < len) {
- ch = wstr[i++];
-#if SIZEOF_WCHAR_T == 2
- if (Py_UNICODE_IS_HIGH_SURROGATE(ch)
- && i < len
- && Py_UNICODE_IS_LOW_SURROGATE(wstr[i]))
- {
- ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]);
- i++;
- }
-#endif
- if (!_PyUnicode_IsXidContinue(ch)) {
- return 0;
- }
- }
- return 1;
-_Py_COMP_DIAG_POP
- }
+ }
+
+ while (i < len) {
+ ch = wstr[i++];
+#if SIZEOF_WCHAR_T == 2
+ if (Py_UNICODE_IS_HIGH_SURROGATE(ch)
+ && i < len
+ && Py_UNICODE_IS_LOW_SURROGATE(wstr[i]))
+ {
+ ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]);
+ i++;
+ }
+#endif
+ if (!_PyUnicode_IsXidContinue(ch)) {
+ return 0;
+ }
+ }
+ return 1;
+_Py_COMP_DIAG_POP
+ }
}
/*[clinic input]
@@ -12517,13 +12517,13 @@ str.isidentifier as unicode_isidentifier
Return True if the string is a valid Python identifier, False otherwise.
-Call keyword.iskeyword(s) to test whether string s is a reserved identifier,
-such as "def" or "class".
+Call keyword.iskeyword(s) to test whether string s is a reserved identifier,
+such as "def" or "class".
[clinic start generated code]*/
static PyObject *
unicode_isidentifier_impl(PyObject *self)
-/*[clinic end generated code: output=fe585a9666572905 input=2d807a104f21c0c5]*/
+/*[clinic end generated code: output=fe585a9666572905 input=2d807a104f21c0c5]*/
{
return PyBool_FromLong(PyUnicode_IsIdentifier(self));
}
@@ -12543,7 +12543,7 @@ unicode_isprintable_impl(PyObject *self)
{
Py_ssize_t i, length;
int kind;
- const void *data;
+ const void *data;
if (PyUnicode_READY(self) == -1)
return NULL;
@@ -12648,7 +12648,7 @@ static const char *stripfuncnames[] = {"lstrip", "rstrip", "strip"};
PyObject *
_PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj)
{
- const void *data;
+ const void *data;
int kind;
Py_ssize_t i, j, len;
BLOOM_MASK sepmask;
@@ -12698,7 +12698,7 @@ _PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj)
PyObject*
PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
{
- const unsigned char *data;
+ const unsigned char *data;
int kind;
Py_ssize_t length;
@@ -12721,7 +12721,7 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
length = end - start;
if (PyUnicode_IS_ASCII(self)) {
data = PyUnicode_1BYTE_DATA(self);
- return _PyUnicode_FromASCII((const char*)(data + start), length);
+ return _PyUnicode_FromASCII((const char*)(data + start), length);
}
else {
kind = PyUnicode_KIND(self);
@@ -12743,7 +12743,7 @@ do_strip(PyObject *self, int striptype)
len = PyUnicode_GET_LENGTH(self);
if (PyUnicode_IS_ASCII(self)) {
- const Py_UCS1 *data = PyUnicode_1BYTE_DATA(self);
+ const Py_UCS1 *data = PyUnicode_1BYTE_DATA(self);
i = 0;
if (striptype != RIGHTSTRIP) {
@@ -12769,7 +12769,7 @@ do_strip(PyObject *self, int striptype)
}
else {
int kind = PyUnicode_KIND(self);
- const void *data = PyUnicode_DATA(self);
+ const void *data = PyUnicode_DATA(self);
i = 0;
if (striptype != RIGHTSTRIP) {
@@ -12801,7 +12801,7 @@ do_strip(PyObject *self, int striptype)
static PyObject *
do_argstrip(PyObject *self, int striptype, PyObject *sep)
{
- if (sep != Py_None) {
+ if (sep != Py_None) {
if (PyUnicode_Check(sep))
return _PyUnicode_XStrip(self, striptype, sep);
else {
@@ -12822,14 +12822,14 @@ str.strip as unicode_strip
chars: object = None
/
-Return a copy of the string with leading and trailing whitespace removed.
+Return a copy of the string with leading and trailing whitespace removed.
If chars is given and not None, remove characters in chars instead.
[clinic start generated code]*/
static PyObject *
unicode_strip_impl(PyObject *self, PyObject *chars)
-/*[clinic end generated code: output=ca19018454345d57 input=385289c6f423b954]*/
+/*[clinic end generated code: output=ca19018454345d57 input=385289c6f423b954]*/
{
return do_argstrip(self, BOTHSTRIP, chars);
}
@@ -12838,7 +12838,7 @@ unicode_strip_impl(PyObject *self, PyObject *chars)
/*[clinic input]
str.lstrip as unicode_lstrip
- chars: object = None
+ chars: object = None
/
Return a copy of the string with leading whitespace removed.
@@ -12848,7 +12848,7 @@ If chars is given and not None, remove characters in chars instead.
static PyObject *
unicode_lstrip_impl(PyObject *self, PyObject *chars)
-/*[clinic end generated code: output=3b43683251f79ca7 input=529f9f3834448671]*/
+/*[clinic end generated code: output=3b43683251f79ca7 input=529f9f3834448671]*/
{
return do_argstrip(self, LEFTSTRIP, chars);
}
@@ -12857,7 +12857,7 @@ unicode_lstrip_impl(PyObject *self, PyObject *chars)
/*[clinic input]
str.rstrip as unicode_rstrip
- chars: object = None
+ chars: object = None
/
Return a copy of the string with trailing whitespace removed.
@@ -12867,7 +12867,7 @@ If chars is given and not None, remove characters in chars instead.
static PyObject *
unicode_rstrip_impl(PyObject *self, PyObject *chars)
-/*[clinic end generated code: output=4a59230017cc3b7a input=62566c627916557f]*/
+/*[clinic end generated code: output=4a59230017cc3b7a input=62566c627916557f]*/
{
return do_argstrip(self, RIGHTSTRIP, chars);
}
@@ -12902,8 +12902,8 @@ unicode_repeat(PyObject *str, Py_ssize_t len)
assert(PyUnicode_KIND(u) == PyUnicode_KIND(str));
if (PyUnicode_GET_LENGTH(str) == 1) {
- int kind = PyUnicode_KIND(str);
- Py_UCS4 fill_char = PyUnicode_READ(kind, PyUnicode_DATA(str), 0);
+ int kind = PyUnicode_KIND(str);
+ Py_UCS4 fill_char = PyUnicode_READ(kind, PyUnicode_DATA(str), 0);
if (kind == PyUnicode_1BYTE_KIND) {
void *to = PyUnicode_DATA(u);
memset(to, (unsigned char)fill_char, len);
@@ -12922,7 +12922,7 @@ unicode_repeat(PyObject *str, Py_ssize_t len)
else {
/* number of characters copied this far */
Py_ssize_t done = PyUnicode_GET_LENGTH(str);
- Py_ssize_t char_size = PyUnicode_KIND(str);
+ Py_ssize_t char_size = PyUnicode_KIND(str);
char *to = (char *) PyUnicode_DATA(u);
memcpy(to, PyUnicode_DATA(str),
PyUnicode_GET_LENGTH(str) * char_size);
@@ -12975,62 +12975,62 @@ unicode_replace_impl(PyObject *self, PyObject *old, PyObject *new,
return replace(self, old, new, count);
}
-/*[clinic input]
-str.removeprefix as unicode_removeprefix
-
- prefix: unicode
- /
-
-Return a str with the given prefix string removed if present.
-
-If the string starts with the prefix string, return string[len(prefix):].
-Otherwise, return a copy of the original string.
-[clinic start generated code]*/
-
+/*[clinic input]
+str.removeprefix as unicode_removeprefix
+
+ prefix: unicode
+ /
+
+Return a str with the given prefix string removed if present.
+
+If the string starts with the prefix string, return string[len(prefix):].
+Otherwise, return a copy of the original string.
+[clinic start generated code]*/
+
+static PyObject *
+unicode_removeprefix_impl(PyObject *self, PyObject *prefix)
+/*[clinic end generated code: output=f1e5945e9763bcb9 input=27ec40b99a37eb88]*/
+{
+ int match = tailmatch(self, prefix, 0, PY_SSIZE_T_MAX, -1);
+ if (match == -1) {
+ return NULL;
+ }
+ if (match) {
+ return PyUnicode_Substring(self, PyUnicode_GET_LENGTH(prefix),
+ PyUnicode_GET_LENGTH(self));
+ }
+ return unicode_result_unchanged(self);
+}
+
+/*[clinic input]
+str.removesuffix as unicode_removesuffix
+
+ suffix: unicode
+ /
+
+Return a str with the given suffix string removed if present.
+
+If the string ends with the suffix string and that suffix is not empty,
+return string[:-len(suffix)]. Otherwise, return a copy of the original
+string.
+[clinic start generated code]*/
+
+static PyObject *
+unicode_removesuffix_impl(PyObject *self, PyObject *suffix)
+/*[clinic end generated code: output=d36629e227636822 input=12cc32561e769be4]*/
+{
+ int match = tailmatch(self, suffix, 0, PY_SSIZE_T_MAX, +1);
+ if (match == -1) {
+ return NULL;
+ }
+ if (match) {
+ return PyUnicode_Substring(self, 0, PyUnicode_GET_LENGTH(self)
+ - PyUnicode_GET_LENGTH(suffix));
+ }
+ return unicode_result_unchanged(self);
+}
+
static PyObject *
-unicode_removeprefix_impl(PyObject *self, PyObject *prefix)
-/*[clinic end generated code: output=f1e5945e9763bcb9 input=27ec40b99a37eb88]*/
-{
- int match = tailmatch(self, prefix, 0, PY_SSIZE_T_MAX, -1);
- if (match == -1) {
- return NULL;
- }
- if (match) {
- return PyUnicode_Substring(self, PyUnicode_GET_LENGTH(prefix),
- PyUnicode_GET_LENGTH(self));
- }
- return unicode_result_unchanged(self);
-}
-
-/*[clinic input]
-str.removesuffix as unicode_removesuffix
-
- suffix: unicode
- /
-
-Return a str with the given suffix string removed if present.
-
-If the string ends with the suffix string and that suffix is not empty,
-return string[:-len(suffix)]. Otherwise, return a copy of the original
-string.
-[clinic start generated code]*/
-
-static PyObject *
-unicode_removesuffix_impl(PyObject *self, PyObject *suffix)
-/*[clinic end generated code: output=d36629e227636822 input=12cc32561e769be4]*/
-{
- int match = tailmatch(self, suffix, 0, PY_SSIZE_T_MAX, +1);
- if (match == -1) {
- return NULL;
- }
- if (match) {
- return PyUnicode_Substring(self, 0, PyUnicode_GET_LENGTH(self)
- - PyUnicode_GET_LENGTH(suffix));
- }
- return unicode_result_unchanged(self);
-}
-
-static PyObject *
unicode_repr(PyObject *unicode)
{
PyObject *repr;
@@ -13038,8 +13038,8 @@ unicode_repr(PyObject *unicode)
Py_ssize_t osize, squote, dquote, i, o;
Py_UCS4 max, quote;
int ikind, okind, unchanged;
- const void *idata;
- void *odata;
+ const void *idata;
+ void *odata;
if (PyUnicode_READY(unicode) == -1)
return NULL;
@@ -13332,7 +13332,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
{
PyObject* out;
int kind1, kind2;
- const void *buf1, *buf2;
+ const void *buf1, *buf2;
Py_ssize_t len1, len2;
if (ensure_unicode(str_obj) < 0 || ensure_unicode(sep_obj) < 0)
@@ -13355,7 +13355,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
buf1 = PyUnicode_DATA(str_obj);
buf2 = PyUnicode_DATA(sep_obj);
if (kind2 != kind1) {
- buf2 = unicode_askind(kind2, buf2, len2, kind1);
+ buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}
@@ -13377,9 +13377,9 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
Py_UNREACHABLE();
}
- assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(sep_obj)));
+ assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(sep_obj)));
if (kind2 != kind1)
- PyMem_Free((void *)buf2);
+ PyMem_Free((void *)buf2);
return out;
}
@@ -13390,7 +13390,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
{
PyObject* out;
int kind1, kind2;
- const void *buf1, *buf2;
+ const void *buf1, *buf2;
Py_ssize_t len1, len2;
if (ensure_unicode(str_obj) < 0 || ensure_unicode(sep_obj) < 0)
@@ -13413,7 +13413,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
buf1 = PyUnicode_DATA(str_obj);
buf2 = PyUnicode_DATA(sep_obj);
if (kind2 != kind1) {
- buf2 = unicode_askind(kind2, buf2, len2, kind1);
+ buf2 = unicode_askind(kind2, buf2, len2, kind1);
if (!buf2)
return NULL;
}
@@ -13435,9 +13435,9 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
Py_UNREACHABLE();
}
- assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(sep_obj)));
+ assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(sep_obj)));
if (kind2 != kind1)
- PyMem_Free((void *)buf2);
+ PyMem_Free((void *)buf2);
return out;
}
@@ -13593,7 +13593,7 @@ unicode_maketrans_impl(PyObject *x, PyObject *y, PyObject *z)
return NULL;
if (y != NULL) {
int x_kind, y_kind, z_kind;
- const void *x_data, *y_data, *z_data;
+ const void *x_data, *y_data, *z_data;
/* x must be a string too, of equal length */
if (!PyUnicode_Check(x)) {
@@ -13642,7 +13642,7 @@ unicode_maketrans_impl(PyObject *x, PyObject *y, PyObject *z)
}
} else {
int kind;
- const void *data;
+ const void *data;
/* x must be a dict */
if (!PyDict_CheckExact(x)) {
@@ -13743,7 +13743,7 @@ unicode_zfill_impl(PyObject *self, Py_ssize_t width)
Py_ssize_t fill;
PyObject *u;
int kind;
- const void *data;
+ const void *data;
Py_UCS4 chr;
if (PyUnicode_READY(self) == -1)
@@ -13924,16 +13924,16 @@ _PyUnicodeWriter_Init(_PyUnicodeWriter *writer)
assert(writer->kind <= PyUnicode_1BYTE_KIND);
}
-// Initialize _PyUnicodeWriter with initial buffer
-static inline void
-_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer)
-{
- memset(writer, 0, sizeof(*writer));
- writer->buffer = buffer;
- _PyUnicodeWriter_Update(writer);
- writer->min_length = writer->size;
-}
-
+// Initialize _PyUnicodeWriter with initial buffer
+static inline void
+_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer)
+{
+ memset(writer, 0, sizeof(*writer));
+ writer->buffer = buffer;
+ _PyUnicodeWriter_Update(writer);
+ writer->min_length = writer->size;
+}
+
int
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
Py_ssize_t length, Py_UCS4 maxchar)
@@ -14024,7 +14024,7 @@ _PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
{
case PyUnicode_1BYTE_KIND: maxchar = 0xff; break;
case PyUnicode_2BYTE_KIND: maxchar = 0xffff; break;
- case PyUnicode_4BYTE_KIND: maxchar = MAX_UNICODE; break;
+ case PyUnicode_4BYTE_KIND: maxchar = MAX_UNICODE; break;
default:
Py_UNREACHABLE();
}
@@ -14122,7 +14122,7 @@ _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
if (len == -1)
len = strlen(ascii);
- assert(ucs1lib_find_max_char((const Py_UCS1*)ascii, (const Py_UCS1*)ascii + len) < 128);
+ assert(ucs1lib_find_max_char((const Py_UCS1*)ascii, (const Py_UCS1*)ascii + len) < 128);
if (writer->buffer == NULL && !writer->overallocate) {
PyObject *str;
@@ -14181,7 +14181,7 @@ _PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
{
Py_UCS4 maxchar;
- maxchar = ucs1lib_find_max_char((const Py_UCS1*)str, (const Py_UCS1*)str + len);
+ maxchar = ucs1lib_find_max_char((const Py_UCS1*)str, (const Py_UCS1*)str + len);
if (_PyUnicodeWriter_Prepare(writer, len, maxchar) == -1)
return -1;
unicode_write_cstr(writer->buffer, writer->pos, str, len);
@@ -14308,7 +14308,7 @@ unicode_sizeof_impl(PyObject *self)
}
static PyObject *
-unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored))
+unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored))
{
PyObject *copy = _PyUnicode_Copy(v);
if (!copy)
@@ -14346,8 +14346,8 @@ static PyMethodDef unicode_methods[] = {
UNICODE_UPPER_METHODDEF
{"startswith", (PyCFunction) unicode_startswith, METH_VARARGS, startswith__doc__},
{"endswith", (PyCFunction) unicode_endswith, METH_VARARGS, endswith__doc__},
- UNICODE_REMOVEPREFIX_METHODDEF
- UNICODE_REMOVESUFFIX_METHODDEF
+ UNICODE_REMOVEPREFIX_METHODDEF
+ UNICODE_REMOVESUFFIX_METHODDEF
UNICODE_ISASCII_METHODDEF
UNICODE_ISLOWER_METHODDEF
UNICODE_ISUPPER_METHODDEF
@@ -14361,7 +14361,7 @@ static PyMethodDef unicode_methods[] = {
UNICODE_ISIDENTIFIER_METHODDEF
UNICODE_ISPRINTABLE_METHODDEF
UNICODE_ZFILL_METHODDEF
- {"format", (PyCFunction)(void(*)(void)) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
+ {"format", (PyCFunction)(void(*)(void)) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
{"format_map", (PyCFunction) do_string_format_map, METH_O, format_map__doc__},
UNICODE___FORMAT___METHODDEF
UNICODE_MAKETRANS_METHODDEF
@@ -14371,7 +14371,7 @@ static PyMethodDef unicode_methods[] = {
{"_decimal2ascii", (PyCFunction) unicode__decimal2ascii, METH_NOARGS},
#endif
- {"__getnewargs__", unicode_getnewargs, METH_NOARGS},
+ {"__getnewargs__", unicode_getnewargs, METH_NOARGS},
{NULL, NULL}
};
@@ -14407,7 +14407,7 @@ unicode_subscript(PyObject* self, PyObject* item)
if (PyUnicode_READY(self) == -1)
return NULL;
- if (_PyIndex_Check(item)) {
+ if (_PyIndex_Check(item)) {
Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
if (i == -1 && PyErr_Occurred())
return NULL;
@@ -14415,11 +14415,11 @@ unicode_subscript(PyObject* self, PyObject* item)
i += PyUnicode_GET_LENGTH(self);
return unicode_getitem(self, i);
} else if (PySlice_Check(item)) {
- Py_ssize_t start, stop, step, slicelength, i;
- size_t cur;
+ Py_ssize_t start, stop, step, slicelength, i;
+ size_t cur;
PyObject *result;
- const void *src_data;
- void *dest_data;
+ const void *src_data;
+ void *dest_data;
int src_kind, dest_kind;
Py_UCS4 ch, max_char, kind_limit;
@@ -14490,7 +14490,7 @@ struct unicode_formatter_t {
enum PyUnicode_Kind fmtkind;
Py_ssize_t fmtcnt, fmtpos;
- const void *fmtdata;
+ const void *fmtdata;
PyObject *fmtstr;
_PyUnicodeWriter writer;
@@ -15164,7 +15164,7 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx,
{
Py_ssize_t len;
enum PyUnicode_Kind kind;
- const void *pbuf;
+ const void *pbuf;
Py_ssize_t pindex;
Py_UCS4 signchar;
Py_ssize_t buflen;
@@ -15274,7 +15274,7 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx,
/* Pad left with the fill character if needed */
if (arg->width > len && !(arg->flags & F_LJUST)) {
sublen = arg->width - len;
- unicode_fill(writer->kind, writer->data, fill, writer->pos, sublen);
+ unicode_fill(writer->kind, writer->data, fill, writer->pos, sublen);
writer->pos += sublen;
arg->width = len;
}
@@ -15306,7 +15306,7 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx,
/* Pad right with the fill character if needed */
if (arg->width > len) {
sublen = arg->width - len;
- unicode_fill(writer->kind, writer->data, ' ', writer->pos, sublen);
+ unicode_fill(writer->kind, writer->data, ' ', writer->pos, sublen);
writer->pos += sublen;
}
return 0;
@@ -15581,52 +15581,52 @@ static PyObject *unicode_iter(PyObject *seq);
PyTypeObject PyUnicode_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
- "str", /* tp_name */
- sizeof(PyUnicodeObject), /* tp_basicsize */
- 0, /* tp_itemsize */
+ "str", /* tp_name */
+ sizeof(PyUnicodeObject), /* tp_basicsize */
+ 0, /* tp_itemsize */
/* Slots */
- (destructor)unicode_dealloc, /* tp_dealloc */
- 0, /* tp_vectorcall_offset */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_as_async */
- unicode_repr, /* tp_repr */
- &unicode_as_number, /* tp_as_number */
- &unicode_as_sequence, /* tp_as_sequence */
- &unicode_as_mapping, /* tp_as_mapping */
- (hashfunc) unicode_hash, /* tp_hash*/
- 0, /* tp_call*/
- (reprfunc) unicode_str, /* tp_str */
- PyObject_GenericGetAttr, /* tp_getattro */
- 0, /* tp_setattro */
- 0, /* tp_as_buffer */
+ (destructor)unicode_dealloc, /* tp_dealloc */
+ 0, /* tp_vectorcall_offset */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_as_async */
+ unicode_repr, /* tp_repr */
+ &unicode_as_number, /* tp_as_number */
+ &unicode_as_sequence, /* tp_as_sequence */
+ &unicode_as_mapping, /* tp_as_mapping */
+ (hashfunc) unicode_hash, /* tp_hash*/
+ 0, /* tp_call*/
+ (reprfunc) unicode_str, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
- Py_TPFLAGS_UNICODE_SUBCLASS, /* tp_flags */
- unicode_doc, /* tp_doc */
- 0, /* tp_traverse */
- 0, /* tp_clear */
- PyUnicode_RichCompare, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- unicode_iter, /* tp_iter */
- 0, /* tp_iternext */
- unicode_methods, /* tp_methods */
- 0, /* tp_members */
- 0, /* tp_getset */
- &PyBaseObject_Type, /* tp_base */
- 0, /* tp_dict */
- 0, /* tp_descr_get */
- 0, /* tp_descr_set */
- 0, /* tp_dictoffset */
- 0, /* tp_init */
- 0, /* tp_alloc */
- unicode_new, /* tp_new */
- PyObject_Del, /* tp_free */
+ Py_TPFLAGS_UNICODE_SUBCLASS, /* tp_flags */
+ unicode_doc, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ PyUnicode_RichCompare, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ unicode_iter, /* tp_iter */
+ 0, /* tp_iternext */
+ unicode_methods, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ &PyBaseObject_Type, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ unicode_new, /* tp_new */
+ PyObject_Del, /* tp_free */
};
/* Initialize the Unicode implementation */
-PyStatus
-_PyUnicode_Init(void)
+PyStatus
+_PyUnicode_Init(void)
{
/* XXX - move this array to unicodectype.c ? */
Py_UCS2 linebreak[] = {
@@ -15642,30 +15642,30 @@ _PyUnicode_Init(void)
/* Init the implementation */
_Py_INCREF_UNICODE_EMPTY();
- if (!unicode_empty) {
- return _PyStatus_ERR("Can't create empty string");
- }
+ if (!unicode_empty) {
+ return _PyStatus_ERR("Can't create empty string");
+ }
Py_DECREF(unicode_empty);
- if (PyType_Ready(&PyUnicode_Type) < 0) {
- return _PyStatus_ERR("Can't initialize unicode type");
- }
+ if (PyType_Ready(&PyUnicode_Type) < 0) {
+ return _PyStatus_ERR("Can't initialize unicode type");
+ }
/* initialize the linebreak bloom filter */
bloom_linebreak = make_bloom_mask(
PyUnicode_2BYTE_KIND, linebreak,
Py_ARRAY_LENGTH(linebreak));
- if (PyType_Ready(&EncodingMapType) < 0) {
- return _PyStatus_ERR("Can't initialize encoding map type");
- }
- if (PyType_Ready(&PyFieldNameIter_Type) < 0) {
- return _PyStatus_ERR("Can't initialize field name iterator type");
- }
- if (PyType_Ready(&PyFormatterIter_Type) < 0) {
- return _PyStatus_ERR("Can't initialize formatter iter type");
- }
- return _PyStatus_OK();
+ if (PyType_Ready(&EncodingMapType) < 0) {
+ return _PyStatus_ERR("Can't initialize encoding map type");
+ }
+ if (PyType_Ready(&PyFieldNameIter_Type) < 0) {
+ return _PyStatus_ERR("Can't initialize field name iterator type");
+ }
+ if (PyType_Ready(&PyFormatterIter_Type) < 0) {
+ return _PyStatus_ERR("Can't initialize formatter iter type");
+ }
+ return _PyStatus_OK();
}
@@ -15677,22 +15677,22 @@ PyUnicode_InternInPlace(PyObject **p)
assert(s != NULL);
assert(_PyUnicode_CHECK(s));
#else
- if (s == NULL || !PyUnicode_Check(s)) {
+ if (s == NULL || !PyUnicode_Check(s)) {
return;
- }
+ }
#endif
-
+
/* If it's a subclass, we don't really know what putting
it in the interned dict might do. */
- if (!PyUnicode_CheckExact(s)) {
+ if (!PyUnicode_CheckExact(s)) {
return;
- }
-
- if (PyUnicode_CHECK_INTERNED(s)) {
+ }
+
+ if (PyUnicode_CHECK_INTERNED(s)) {
return;
- }
-
-#ifdef INTERNED_STRINGS
+ }
+
+#ifdef INTERNED_STRINGS
if (interned == NULL) {
interned = PyDict_New();
if (interned == NULL) {
@@ -15700,26 +15700,26 @@ PyUnicode_InternInPlace(PyObject **p)
return;
}
}
-
- PyObject *t;
+
+ PyObject *t;
t = PyDict_SetDefault(interned, s, s);
-
+
if (t == NULL) {
PyErr_Clear();
return;
}
-
+
if (t != s) {
Py_INCREF(t);
Py_SETREF(*p, t);
return;
}
-
+
/* The two references in interned are not counted by refcnt.
The deallocator will take care of this */
- Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
+ Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
-#endif
+#endif
}
void
@@ -15742,67 +15742,67 @@ PyUnicode_InternFromString(const char *cp)
return s;
}
-
-#if defined(WITH_VALGRIND) || defined(__INSURE__)
-static void
-unicode_release_interned(void)
+
+#if defined(WITH_VALGRIND) || defined(__INSURE__)
+static void
+unicode_release_interned(void)
{
- if (interned == NULL || !PyDict_Check(interned)) {
+ if (interned == NULL || !PyDict_Check(interned)) {
return;
- }
- PyObject *keys = PyDict_Keys(interned);
+ }
+ PyObject *keys = PyDict_Keys(interned);
if (keys == NULL || !PyList_Check(keys)) {
PyErr_Clear();
return;
}
- /* Since unicode_release_interned() is intended to help a leak
+ /* Since unicode_release_interned() is intended to help a leak
detector, interned unicode strings are not forcibly deallocated;
rather, we give them their stolen references back, and then clear
and DECREF the interned dict. */
- Py_ssize_t n = PyList_GET_SIZE(keys);
-#ifdef INTERNED_STATS
+ Py_ssize_t n = PyList_GET_SIZE(keys);
+#ifdef INTERNED_STATS
fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
n);
-
- Py_ssize_t immortal_size = 0, mortal_size = 0;
-#endif
- for (Py_ssize_t i = 0; i < n; i++) {
- PyObject *s = PyList_GET_ITEM(keys, i);
+
+ Py_ssize_t immortal_size = 0, mortal_size = 0;
+#endif
+ for (Py_ssize_t i = 0; i < n; i++) {
+ PyObject *s = PyList_GET_ITEM(keys, i);
if (PyUnicode_READY(s) == -1) {
Py_UNREACHABLE();
}
switch (PyUnicode_CHECK_INTERNED(s)) {
case SSTATE_INTERNED_IMMORTAL:
Py_REFCNT(s) += 1;
-#ifdef INTERNED_STATS
+#ifdef INTERNED_STATS
immortal_size += PyUnicode_GET_LENGTH(s);
-#endif
+#endif
break;
case SSTATE_INTERNED_MORTAL:
Py_REFCNT(s) += 2;
-#ifdef INTERNED_STATS
+#ifdef INTERNED_STATS
mortal_size += PyUnicode_GET_LENGTH(s);
-#endif
+#endif
break;
- case SSTATE_NOT_INTERNED:
- /* fall through */
+ case SSTATE_NOT_INTERNED:
+ /* fall through */
default:
- Py_UNREACHABLE();
+ Py_UNREACHABLE();
}
_PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED;
}
-#ifdef INTERNED_STATS
+#ifdef INTERNED_STATS
fprintf(stderr, "total size of all interned strings: "
"%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
"mortal/immortal\n", mortal_size, immortal_size);
-#endif
+#endif
Py_DECREF(keys);
PyDict_Clear(interned);
Py_CLEAR(interned);
}
-#endif
+#endif
/********************* Unicode Iterator **************************/
@@ -15841,7 +15841,7 @@ unicodeiter_next(unicodeiterobject *it)
if (it->it_index < PyUnicode_GET_LENGTH(seq)) {
int kind = PyUnicode_KIND(seq);
- const void *data = PyUnicode_DATA(seq);
+ const void *data = PyUnicode_DATA(seq);
Py_UCS4 chr = PyUnicode_READ(kind, data, it->it_index);
item = PyUnicode_FromOrdinal(chr);
if (item != NULL)
@@ -15855,7 +15855,7 @@ unicodeiter_next(unicodeiterobject *it)
}
static PyObject *
-unicodeiter_len(unicodeiterobject *it, PyObject *Py_UNUSED(ignored))
+unicodeiter_len(unicodeiterobject *it, PyObject *Py_UNUSED(ignored))
{
Py_ssize_t len = 0;
if (it->it_seq)
@@ -15866,17 +15866,17 @@ unicodeiter_len(unicodeiterobject *it, PyObject *Py_UNUSED(ignored))
PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
static PyObject *
-unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored))
+unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored))
{
- _Py_IDENTIFIER(iter);
+ _Py_IDENTIFIER(iter);
if (it->it_seq != NULL) {
- return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
+ return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
it->it_seq, it->it_index);
} else {
PyObject *u = (PyObject *)_PyUnicode_New(0);
if (u == NULL)
return NULL;
- return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), u);
+ return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), u);
}
}
@@ -15917,10 +15917,10 @@ PyTypeObject PyUnicodeIter_Type = {
0, /* tp_itemsize */
/* methods */
(destructor)unicodeiter_dealloc, /* tp_dealloc */
- 0, /* tp_vectorcall_offset */
+ 0, /* tp_vectorcall_offset */
0, /* tp_getattr */
0, /* tp_setattr */
- 0, /* tp_as_async */
+ 0, /* tp_as_async */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
@@ -16062,10 +16062,10 @@ PyUnicode_AsUnicodeCopy(PyObject *unicode)
PyErr_BadArgument();
return NULL;
}
-_Py_COMP_DIAG_PUSH
-_Py_COMP_DIAG_IGNORE_DEPR_DECLS
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
u = PyUnicode_AsUnicodeAndSize(unicode, &len);
-_Py_COMP_DIAG_POP
+_Py_COMP_DIAG_POP
if (u == NULL)
return NULL;
/* Ensure we won't overflow the size. */
@@ -16084,242 +16084,242 @@ _Py_COMP_DIAG_POP
return copy;
}
-
-static int
-encode_wstr_utf8(wchar_t *wstr, char **str, const char *name)
-{
- int res;
- res = _Py_EncodeUTF8Ex(wstr, str, NULL, NULL, 1, _Py_ERROR_STRICT);
- if (res == -2) {
- PyErr_Format(PyExc_RuntimeWarning, "cannot decode %s", name);
- return -1;
- }
- if (res < 0) {
- PyErr_NoMemory();
- return -1;
- }
- return 0;
-}
-
-
-static int
-config_get_codec_name(wchar_t **config_encoding)
-{
- char *encoding;
- if (encode_wstr_utf8(*config_encoding, &encoding, "stdio_encoding") < 0) {
- return -1;
- }
-
- PyObject *name_obj = NULL;
- PyObject *codec = _PyCodec_Lookup(encoding);
- PyMem_RawFree(encoding);
-
- if (!codec)
- goto error;
-
- name_obj = PyObject_GetAttrString(codec, "name");
- Py_CLEAR(codec);
- if (!name_obj) {
- goto error;
- }
-
- wchar_t *wname = PyUnicode_AsWideCharString(name_obj, NULL);
- Py_DECREF(name_obj);
- if (wname == NULL) {
- goto error;
- }
-
- wchar_t *raw_wname = _PyMem_RawWcsdup(wname);
- if (raw_wname == NULL) {
- PyMem_Free(wname);
- PyErr_NoMemory();
- goto error;
- }
-
- PyMem_RawFree(*config_encoding);
- *config_encoding = raw_wname;
-
- PyMem_Free(wname);
- return 0;
-
-error:
- Py_XDECREF(codec);
- Py_XDECREF(name_obj);
- return -1;
-}
-
-
-static PyStatus
-init_stdio_encoding(PyThreadState *tstate)
-{
- /* Update the stdio encoding to the normalized Python codec name. */
- PyConfig *config = (PyConfig*)_PyInterpreterState_GetConfig(tstate->interp);
- if (config_get_codec_name(&config->stdio_encoding) < 0) {
- return _PyStatus_ERR("failed to get the Python codec name "
- "of the stdio encoding");
- }
- return _PyStatus_OK();
-}
-
-
-static int
-init_fs_codec(PyInterpreterState *interp)
-{
- const PyConfig *config = _PyInterpreterState_GetConfig(interp);
-
- _Py_error_handler error_handler;
- error_handler = get_error_handler_wide(config->filesystem_errors);
- if (error_handler == _Py_ERROR_UNKNOWN) {
- PyErr_SetString(PyExc_RuntimeError, "unknown filesystem error handler");
- return -1;
- }
-
- char *encoding, *errors;
- if (encode_wstr_utf8(config->filesystem_encoding,
- &encoding,
- "filesystem_encoding") < 0) {
- return -1;
- }
-
- if (encode_wstr_utf8(config->filesystem_errors,
- &errors,
- "filesystem_errors") < 0) {
- PyMem_RawFree(encoding);
- return -1;
- }
-
- struct _Py_unicode_fs_codec *fs_codec = &interp->unicode.fs_codec;
- PyMem_RawFree(fs_codec->encoding);
- fs_codec->encoding = encoding;
- /* encoding has been normalized by init_fs_encoding() */
- fs_codec->utf8 = (strcmp(encoding, "utf-8") == 0);
- PyMem_RawFree(fs_codec->errors);
- fs_codec->errors = errors;
- fs_codec->error_handler = error_handler;
-
-#ifdef _Py_FORCE_UTF8_FS_ENCODING
- assert(fs_codec->utf8 == 1);
-#endif
-
- /* At this point, PyUnicode_EncodeFSDefault() and
- PyUnicode_DecodeFSDefault() can now use the Python codec rather than
- the C implementation of the filesystem encoding. */
-
- /* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
- global configuration variables. */
- if (_Py_SetFileSystemEncoding(fs_codec->encoding,
- fs_codec->errors) < 0) {
- PyErr_NoMemory();
- return -1;
- }
- return 0;
-}
-
-
-static PyStatus
-init_fs_encoding(PyThreadState *tstate)
-{
- PyInterpreterState *interp = tstate->interp;
-
- /* Update the filesystem encoding to the normalized Python codec name.
- For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
- (Python codec name). */
- PyConfig *config = (PyConfig*)_PyInterpreterState_GetConfig(interp);
- if (config_get_codec_name(&config->filesystem_encoding) < 0) {
- _Py_DumpPathConfig(tstate);
- return _PyStatus_ERR("failed to get the Python codec "
- "of the filesystem encoding");
- }
-
- if (init_fs_codec(interp) < 0) {
- return _PyStatus_ERR("cannot initialize filesystem codec");
- }
- return _PyStatus_OK();
-}
-
-
-PyStatus
-_PyUnicode_InitEncodings(PyThreadState *tstate)
-{
- PyStatus status = init_fs_encoding(tstate);
- if (_PyStatus_EXCEPTION(status)) {
- return status;
- }
-
- return init_stdio_encoding(tstate);
-}
-
-
-static void
-_PyUnicode_FiniEncodings(struct _Py_unicode_fs_codec *fs_codec)
-{
- PyMem_RawFree(fs_codec->encoding);
- fs_codec->encoding = NULL;
- fs_codec->utf8 = 0;
- PyMem_RawFree(fs_codec->errors);
- fs_codec->errors = NULL;
- fs_codec->error_handler = _Py_ERROR_UNKNOWN;
-}
-
-
-#ifdef MS_WINDOWS
-int
-_PyUnicode_EnableLegacyWindowsFSEncoding(void)
-{
- PyInterpreterState *interp = _PyInterpreterState_GET();
- PyConfig *config = (PyConfig *)_PyInterpreterState_GetConfig(interp);
-
- /* Set the filesystem encoding to mbcs/replace (PEP 529) */
- wchar_t *encoding = _PyMem_RawWcsdup(L"mbcs");
- wchar_t *errors = _PyMem_RawWcsdup(L"replace");
- if (encoding == NULL || errors == NULL) {
- PyMem_RawFree(encoding);
- PyMem_RawFree(errors);
- PyErr_NoMemory();
- return -1;
- }
-
- PyMem_RawFree(config->filesystem_encoding);
- config->filesystem_encoding = encoding;
- PyMem_RawFree(config->filesystem_errors);
- config->filesystem_errors = errors;
-
- return init_fs_codec(interp);
-}
-#endif
-
-
-void
-_PyUnicode_Fini(PyThreadState *tstate)
-{
- if (_Py_IsMainInterpreter(tstate)) {
-#if defined(WITH_VALGRIND) || defined(__INSURE__)
- /* Insure++ is a memory analysis tool that aids in discovering
- * memory leaks and other memory problems. On Python exit, the
- * interned string dictionaries are flagged as being in use at exit
- * (which it is). Under normal circumstances, this is fine because
- * the memory will be automatically reclaimed by the system. Under
- * memory debugging, it's a huge source of useless noise, so we
- * trade off slower shutdown for less distraction in the memory
- * reports. -baw
- */
- unicode_release_interned();
-#endif /* __INSURE__ */
-
- Py_CLEAR(unicode_empty);
-
-#ifdef LATIN1_SINGLETONS
- for (Py_ssize_t i = 0; i < 256; i++) {
- Py_CLEAR(unicode_latin1[i]);
- }
-#endif
- unicode_clear_static_strings();
- }
-
- _PyUnicode_FiniEncodings(&tstate->interp->unicode.fs_codec);
-}
-
-
+
+static int
+encode_wstr_utf8(wchar_t *wstr, char **str, const char *name)
+{
+ int res;
+ res = _Py_EncodeUTF8Ex(wstr, str, NULL, NULL, 1, _Py_ERROR_STRICT);
+ if (res == -2) {
+ PyErr_Format(PyExc_RuntimeWarning, "cannot decode %s", name);
+ return -1;
+ }
+ if (res < 0) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ return 0;
+}
+
+
+static int
+config_get_codec_name(wchar_t **config_encoding)
+{
+ char *encoding;
+ if (encode_wstr_utf8(*config_encoding, &encoding, "stdio_encoding") < 0) {
+ return -1;
+ }
+
+ PyObject *name_obj = NULL;
+ PyObject *codec = _PyCodec_Lookup(encoding);
+ PyMem_RawFree(encoding);
+
+ if (!codec)
+ goto error;
+
+ name_obj = PyObject_GetAttrString(codec, "name");
+ Py_CLEAR(codec);
+ if (!name_obj) {
+ goto error;
+ }
+
+ wchar_t *wname = PyUnicode_AsWideCharString(name_obj, NULL);
+ Py_DECREF(name_obj);
+ if (wname == NULL) {
+ goto error;
+ }
+
+ wchar_t *raw_wname = _PyMem_RawWcsdup(wname);
+ if (raw_wname == NULL) {
+ PyMem_Free(wname);
+ PyErr_NoMemory();
+ goto error;
+ }
+
+ PyMem_RawFree(*config_encoding);
+ *config_encoding = raw_wname;
+
+ PyMem_Free(wname);
+ return 0;
+
+error:
+ Py_XDECREF(codec);
+ Py_XDECREF(name_obj);
+ return -1;
+}
+
+
+static PyStatus
+init_stdio_encoding(PyThreadState *tstate)
+{
+ /* Update the stdio encoding to the normalized Python codec name. */
+ PyConfig *config = (PyConfig*)_PyInterpreterState_GetConfig(tstate->interp);
+ if (config_get_codec_name(&config->stdio_encoding) < 0) {
+ return _PyStatus_ERR("failed to get the Python codec name "
+ "of the stdio encoding");
+ }
+ return _PyStatus_OK();
+}
+
+
+static int
+init_fs_codec(PyInterpreterState *interp)
+{
+ const PyConfig *config = _PyInterpreterState_GetConfig(interp);
+
+ _Py_error_handler error_handler;
+ error_handler = get_error_handler_wide(config->filesystem_errors);
+ if (error_handler == _Py_ERROR_UNKNOWN) {
+ PyErr_SetString(PyExc_RuntimeError, "unknown filesystem error handler");
+ return -1;
+ }
+
+ char *encoding, *errors;
+ if (encode_wstr_utf8(config->filesystem_encoding,
+ &encoding,
+ "filesystem_encoding") < 0) {
+ return -1;
+ }
+
+ if (encode_wstr_utf8(config->filesystem_errors,
+ &errors,
+ "filesystem_errors") < 0) {
+ PyMem_RawFree(encoding);
+ return -1;
+ }
+
+ struct _Py_unicode_fs_codec *fs_codec = &interp->unicode.fs_codec;
+ PyMem_RawFree(fs_codec->encoding);
+ fs_codec->encoding = encoding;
+ /* encoding has been normalized by init_fs_encoding() */
+ fs_codec->utf8 = (strcmp(encoding, "utf-8") == 0);
+ PyMem_RawFree(fs_codec->errors);
+ fs_codec->errors = errors;
+ fs_codec->error_handler = error_handler;
+
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
+ assert(fs_codec->utf8 == 1);
+#endif
+
+ /* At this point, PyUnicode_EncodeFSDefault() and
+ PyUnicode_DecodeFSDefault() can now use the Python codec rather than
+ the C implementation of the filesystem encoding. */
+
+ /* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
+ global configuration variables. */
+ if (_Py_SetFileSystemEncoding(fs_codec->encoding,
+ fs_codec->errors) < 0) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ return 0;
+}
+
+
+static PyStatus
+init_fs_encoding(PyThreadState *tstate)
+{
+ PyInterpreterState *interp = tstate->interp;
+
+ /* Update the filesystem encoding to the normalized Python codec name.
+ For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
+ (Python codec name). */
+ PyConfig *config = (PyConfig*)_PyInterpreterState_GetConfig(interp);
+ if (config_get_codec_name(&config->filesystem_encoding) < 0) {
+ _Py_DumpPathConfig(tstate);
+ return _PyStatus_ERR("failed to get the Python codec "
+ "of the filesystem encoding");
+ }
+
+ if (init_fs_codec(interp) < 0) {
+ return _PyStatus_ERR("cannot initialize filesystem codec");
+ }
+ return _PyStatus_OK();
+}
+
+
+PyStatus
+_PyUnicode_InitEncodings(PyThreadState *tstate)
+{
+ PyStatus status = init_fs_encoding(tstate);
+ if (_PyStatus_EXCEPTION(status)) {
+ return status;
+ }
+
+ return init_stdio_encoding(tstate);
+}
+
+
+static void
+_PyUnicode_FiniEncodings(struct _Py_unicode_fs_codec *fs_codec)
+{
+ PyMem_RawFree(fs_codec->encoding);
+ fs_codec->encoding = NULL;
+ fs_codec->utf8 = 0;
+ PyMem_RawFree(fs_codec->errors);
+ fs_codec->errors = NULL;
+ fs_codec->error_handler = _Py_ERROR_UNKNOWN;
+}
+
+
+#ifdef MS_WINDOWS
+int
+_PyUnicode_EnableLegacyWindowsFSEncoding(void)
+{
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ PyConfig *config = (PyConfig *)_PyInterpreterState_GetConfig(interp);
+
+ /* Set the filesystem encoding to mbcs/replace (PEP 529) */
+ wchar_t *encoding = _PyMem_RawWcsdup(L"mbcs");
+ wchar_t *errors = _PyMem_RawWcsdup(L"replace");
+ if (encoding == NULL || errors == NULL) {
+ PyMem_RawFree(encoding);
+ PyMem_RawFree(errors);
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ PyMem_RawFree(config->filesystem_encoding);
+ config->filesystem_encoding = encoding;
+ PyMem_RawFree(config->filesystem_errors);
+ config->filesystem_errors = errors;
+
+ return init_fs_codec(interp);
+}
+#endif
+
+
+void
+_PyUnicode_Fini(PyThreadState *tstate)
+{
+ if (_Py_IsMainInterpreter(tstate)) {
+#if defined(WITH_VALGRIND) || defined(__INSURE__)
+ /* Insure++ is a memory analysis tool that aids in discovering
+ * memory leaks and other memory problems. On Python exit, the
+ * interned string dictionaries are flagged as being in use at exit
+ * (which it is). Under normal circumstances, this is fine because
+ * the memory will be automatically reclaimed by the system. Under
+ * memory debugging, it's a huge source of useless noise, so we
+ * trade off slower shutdown for less distraction in the memory
+ * reports. -baw
+ */
+ unicode_release_interned();
+#endif /* __INSURE__ */
+
+ Py_CLEAR(unicode_empty);
+
+#ifdef LATIN1_SINGLETONS
+ for (Py_ssize_t i = 0; i < 256; i++) {
+ Py_CLEAR(unicode_latin1[i]);
+ }
+#endif
+ unicode_clear_static_strings();
+ }
+
+ _PyUnicode_FiniEncodings(&tstate->interp->unicode.fs_codec);
+}
+
+
/* A _string module, to export formatter_parser and formatter_field_name_split
to the string.Formatter class implemented in Python. */