diff options
author | orivej <orivej@yandex-team.ru> | 2022-02-10 16:44:49 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:49 +0300 |
commit | 718c552901d703c502ccbefdfc3c9028d608b947 (patch) | |
tree | 46534a98bbefcd7b1f3faa5b52c138ab27db75b7 /contrib/tools/python3/src/Objects/stringlib/find_max_char.h | |
parent | e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (diff) | |
download | ydb-718c552901d703c502ccbefdfc3c9028d608b947.tar.gz |
Restoring authorship annotation for <orivej@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/tools/python3/src/Objects/stringlib/find_max_char.h')
-rw-r--r-- | contrib/tools/python3/src/Objects/stringlib/find_max_char.h | 266 |
1 files changed, 133 insertions, 133 deletions
diff --git a/contrib/tools/python3/src/Objects/stringlib/find_max_char.h b/contrib/tools/python3/src/Objects/stringlib/find_max_char.h index f4e0a7761d..608bc37a43 100644 --- a/contrib/tools/python3/src/Objects/stringlib/find_max_char.h +++ b/contrib/tools/python3/src/Objects/stringlib/find_max_char.h @@ -1,134 +1,134 @@ -/* Finding the optimal width of unicode characters in a buffer */ - -#if !STRINGLIB_IS_UNICODE -# error "find_max_char.h is specific to Unicode" -#endif - -/* Mask to quickly check whether a C 'long' contains a - non-ASCII, UTF8-encoded char. */ -#if (SIZEOF_LONG == 8) -# define UCS1_ASCII_CHAR_MASK 0x8080808080808080UL -#elif (SIZEOF_LONG == 4) -# define UCS1_ASCII_CHAR_MASK 0x80808080UL -#else -# error C 'long' size should be either 4 or 8! -#endif - -#if STRINGLIB_SIZEOF_CHAR == 1 - -Py_LOCAL_INLINE(Py_UCS4) -STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end) -{ - const unsigned char *p = (const unsigned char *) begin; - const unsigned char *aligned_end = - (const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG); - - while (p < end) { - if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { - /* Help register allocation */ - const unsigned char *_p = p; - while (_p < aligned_end) { +/* Finding the optimal width of unicode characters in a buffer */ + +#if !STRINGLIB_IS_UNICODE +# error "find_max_char.h is specific to Unicode" +#endif + +/* Mask to quickly check whether a C 'long' contains a + non-ASCII, UTF8-encoded char. */ +#if (SIZEOF_LONG == 8) +# define UCS1_ASCII_CHAR_MASK 0x8080808080808080UL +#elif (SIZEOF_LONG == 4) +# define UCS1_ASCII_CHAR_MASK 0x80808080UL +#else +# error C 'long' size should be either 4 or 8! +#endif + +#if STRINGLIB_SIZEOF_CHAR == 1 + +Py_LOCAL_INLINE(Py_UCS4) +STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end) +{ + const unsigned char *p = (const unsigned char *) begin; + const unsigned char *aligned_end = + (const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG); + + while (p < end) { + if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { + /* Help register allocation */ + const unsigned char *_p = p; + while (_p < aligned_end) { unsigned long value = *(const unsigned long *) _p; - if (value & UCS1_ASCII_CHAR_MASK) - return 255; - _p += SIZEOF_LONG; - } - p = _p; - if (p == end) - break; - } - if (*p++ & 0x80) - return 255; - } - return 127; -} - -#undef ASCII_CHAR_MASK - -#else /* STRINGLIB_SIZEOF_CHAR == 1 */ - -#define MASK_ASCII 0xFFFFFF80 -#define MASK_UCS1 0xFFFFFF00 -#define MASK_UCS2 0xFFFF0000 - -#define MAX_CHAR_ASCII 0x7f -#define MAX_CHAR_UCS1 0xff -#define MAX_CHAR_UCS2 0xffff -#define MAX_CHAR_UCS4 0x10ffff - -Py_LOCAL_INLINE(Py_UCS4) -STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end) -{ -#if STRINGLIB_SIZEOF_CHAR == 2 - const Py_UCS4 mask_limit = MASK_UCS1; - const Py_UCS4 max_char_limit = MAX_CHAR_UCS2; -#elif STRINGLIB_SIZEOF_CHAR == 4 - const Py_UCS4 mask_limit = MASK_UCS2; - const Py_UCS4 max_char_limit = MAX_CHAR_UCS4; -#else -#error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4) -#endif - Py_UCS4 mask; - Py_ssize_t n = end - begin; - const STRINGLIB_CHAR *p = begin; - const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4); - Py_UCS4 max_char; - - max_char = MAX_CHAR_ASCII; - mask = MASK_ASCII; - while (p < unrolled_end) { - STRINGLIB_CHAR bits = p[0] | p[1] | p[2] | p[3]; - if (bits & mask) { - if (mask == mask_limit) { - /* Limit reached */ - return max_char_limit; - } - if (mask == MASK_ASCII) { - max_char = MAX_CHAR_UCS1; - mask = MASK_UCS1; - } - else { - /* mask can't be MASK_UCS2 because of mask_limit above */ - assert(mask == MASK_UCS1); - max_char = MAX_CHAR_UCS2; - mask = MASK_UCS2; - } - /* We check the new mask on the same chars in the next iteration */ - continue; - } - p += 4; - } - while (p < end) { - if (p[0] & mask) { - if (mask == mask_limit) { - /* Limit reached */ - return max_char_limit; - } - if (mask == MASK_ASCII) { - max_char = MAX_CHAR_UCS1; - mask = MASK_UCS1; - } - else { - /* mask can't be MASK_UCS2 because of mask_limit above */ - assert(mask == MASK_UCS1); - max_char = MAX_CHAR_UCS2; - mask = MASK_UCS2; - } - /* We check the new mask on the same chars in the next iteration */ - continue; - } - p++; - } - return max_char; -} - -#undef MASK_ASCII -#undef MASK_UCS1 -#undef MASK_UCS2 -#undef MAX_CHAR_ASCII -#undef MAX_CHAR_UCS1 -#undef MAX_CHAR_UCS2 -#undef MAX_CHAR_UCS4 - -#endif /* STRINGLIB_SIZEOF_CHAR == 1 */ - + if (value & UCS1_ASCII_CHAR_MASK) + return 255; + _p += SIZEOF_LONG; + } + p = _p; + if (p == end) + break; + } + if (*p++ & 0x80) + return 255; + } + return 127; +} + +#undef ASCII_CHAR_MASK + +#else /* STRINGLIB_SIZEOF_CHAR == 1 */ + +#define MASK_ASCII 0xFFFFFF80 +#define MASK_UCS1 0xFFFFFF00 +#define MASK_UCS2 0xFFFF0000 + +#define MAX_CHAR_ASCII 0x7f +#define MAX_CHAR_UCS1 0xff +#define MAX_CHAR_UCS2 0xffff +#define MAX_CHAR_UCS4 0x10ffff + +Py_LOCAL_INLINE(Py_UCS4) +STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end) +{ +#if STRINGLIB_SIZEOF_CHAR == 2 + const Py_UCS4 mask_limit = MASK_UCS1; + const Py_UCS4 max_char_limit = MAX_CHAR_UCS2; +#elif STRINGLIB_SIZEOF_CHAR == 4 + const Py_UCS4 mask_limit = MASK_UCS2; + const Py_UCS4 max_char_limit = MAX_CHAR_UCS4; +#else +#error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4) +#endif + Py_UCS4 mask; + Py_ssize_t n = end - begin; + const STRINGLIB_CHAR *p = begin; + const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4); + Py_UCS4 max_char; + + max_char = MAX_CHAR_ASCII; + mask = MASK_ASCII; + while (p < unrolled_end) { + STRINGLIB_CHAR bits = p[0] | p[1] | p[2] | p[3]; + if (bits & mask) { + if (mask == mask_limit) { + /* Limit reached */ + return max_char_limit; + } + if (mask == MASK_ASCII) { + max_char = MAX_CHAR_UCS1; + mask = MASK_UCS1; + } + else { + /* mask can't be MASK_UCS2 because of mask_limit above */ + assert(mask == MASK_UCS1); + max_char = MAX_CHAR_UCS2; + mask = MASK_UCS2; + } + /* We check the new mask on the same chars in the next iteration */ + continue; + } + p += 4; + } + while (p < end) { + if (p[0] & mask) { + if (mask == mask_limit) { + /* Limit reached */ + return max_char_limit; + } + if (mask == MASK_ASCII) { + max_char = MAX_CHAR_UCS1; + mask = MASK_UCS1; + } + else { + /* mask can't be MASK_UCS2 because of mask_limit above */ + assert(mask == MASK_UCS1); + max_char = MAX_CHAR_UCS2; + mask = MASK_UCS2; + } + /* We check the new mask on the same chars in the next iteration */ + continue; + } + p++; + } + return max_char; +} + +#undef MASK_ASCII +#undef MASK_UCS1 +#undef MASK_UCS2 +#undef MAX_CHAR_ASCII +#undef MAX_CHAR_UCS1 +#undef MAX_CHAR_UCS2 +#undef MAX_CHAR_UCS4 + +#endif /* STRINGLIB_SIZEOF_CHAR == 1 */ + |